archicore 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/code-index/ast-parser.js +21 -6
- package/dist/code-index/file-chunker.d.ts +61 -0
- package/dist/code-index/file-chunker.js +431 -0
- package/dist/code-index/index.d.ts +2 -0
- package/dist/code-index/index.js +1 -0
- package/dist/code-index/symbol-extractor.js +25 -1
- package/dist/orchestrator/index.js +13 -2
- package/dist/semantic-memory/index.d.ts +8 -0
- package/dist/semantic-memory/index.js +73 -0
- package/dist/semantic-memory/vector-store.js +1 -1
- package/dist/server/services/project-service.js +29 -5
- package/package.json +1 -1
|
@@ -162,10 +162,12 @@ export class ASTParser {
|
|
|
162
162
|
/^(?:public|private|protected)?\s*(?:static\s+)?(?:final\s+)?(?:\w+\s+)+(\w+)\s*\(/
|
|
163
163
|
],
|
|
164
164
|
php: [
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
165
|
+
/^\s*namespace\s+([\w\\]+)/, // namespace App\Controllers
|
|
166
|
+
/^\s*(?:final\s+|abstract\s+)?class\s+(\w+)/, // class, final class, abstract class
|
|
167
|
+
/^\s*interface\s+(\w+)/, // interface
|
|
168
|
+
/^\s*trait\s+(\w+)/, // trait
|
|
169
|
+
/^\s*(?:public|private|protected)?\s*(?:static\s+)?function\s+(\w+)/, // function
|
|
170
|
+
/^\s*const\s+(\w+)\s*=/, // const
|
|
169
171
|
],
|
|
170
172
|
ruby: [
|
|
171
173
|
/^def\s+(\w+)/,
|
|
@@ -400,16 +402,29 @@ export class ASTParser {
|
|
|
400
402
|
continue;
|
|
401
403
|
}
|
|
402
404
|
const type = this.inferTypeFromPattern(pattern, language);
|
|
405
|
+
// Извлекаем больше контекста - следующие строки до закрывающей скобки
|
|
406
|
+
let codeContext = trimmedLine;
|
|
407
|
+
let endLineIndex = index;
|
|
408
|
+
// Для функций/классов пытаемся захватить тело (до 50 строк)
|
|
409
|
+
if (type.includes('function') || type.includes('class') || type.includes('method')) {
|
|
410
|
+
let braceCount = (trimmedLine.match(/\{/g) || []).length - (trimmedLine.match(/\}/g) || []).length;
|
|
411
|
+
for (let j = index + 1; j < Math.min(index + 50, lines.length) && braceCount > 0; j++) {
|
|
412
|
+
const nextLine = lines[j];
|
|
413
|
+
codeContext += '\n' + nextLine;
|
|
414
|
+
braceCount += (nextLine.match(/\{/g) || []).length - (nextLine.match(/\}/g) || []).length;
|
|
415
|
+
endLineIndex = j;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
403
418
|
children.push({
|
|
404
419
|
id: `${filePath}:${name}:${index}`,
|
|
405
420
|
type,
|
|
406
421
|
name,
|
|
407
422
|
filePath,
|
|
408
423
|
startLine: index,
|
|
409
|
-
endLine:
|
|
424
|
+
endLine: endLineIndex,
|
|
410
425
|
children: [],
|
|
411
426
|
metadata: {
|
|
412
|
-
text:
|
|
427
|
+
text: codeContext.substring(0, 2000), // Увеличено с 200 до 2000
|
|
413
428
|
hasErrors: false,
|
|
414
429
|
regexParsed: true
|
|
415
430
|
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* File Chunker
|
|
3
|
+
*
|
|
4
|
+
* Умное разбиение больших файлов на логические части:
|
|
5
|
+
* - По классам/функциям/namespace
|
|
6
|
+
* - С сохранением контекста (импорты, namespace)
|
|
7
|
+
* - Для эффективного семантического поиска
|
|
8
|
+
*/
|
|
9
|
+
export interface FileChunk {
|
|
10
|
+
id: string;
|
|
11
|
+
filePath: string;
|
|
12
|
+
chunkIndex: number;
|
|
13
|
+
totalChunks: number;
|
|
14
|
+
startLine: number;
|
|
15
|
+
endLine: number;
|
|
16
|
+
type: 'header' | 'namespace' | 'class' | 'function' | 'trait' | 'interface' | 'code';
|
|
17
|
+
name: string;
|
|
18
|
+
content: string;
|
|
19
|
+
context: string;
|
|
20
|
+
}
|
|
21
|
+
export interface ChunkingOptions {
|
|
22
|
+
maxChunkSize: number;
|
|
23
|
+
minChunkSize: number;
|
|
24
|
+
includeContext: boolean;
|
|
25
|
+
language: string;
|
|
26
|
+
}
|
|
27
|
+
export declare class FileChunker {
|
|
28
|
+
/**
|
|
29
|
+
* Разбить файл на логические чанки
|
|
30
|
+
*/
|
|
31
|
+
chunkFile(content: string, filePath: string, options?: Partial<ChunkingOptions>): FileChunk[];
|
|
32
|
+
/**
|
|
33
|
+
* Извлечь контекст файла (импорты, namespace)
|
|
34
|
+
*/
|
|
35
|
+
private extractContext;
|
|
36
|
+
/**
|
|
37
|
+
* Найти логические границы в файле
|
|
38
|
+
*/
|
|
39
|
+
private findLogicalBoundaries;
|
|
40
|
+
/**
|
|
41
|
+
* Найти конец блока кода
|
|
42
|
+
*/
|
|
43
|
+
private findBlockEnd;
|
|
44
|
+
/**
|
|
45
|
+
* Получить уровень отступа строки
|
|
46
|
+
*/
|
|
47
|
+
private getIndent;
|
|
48
|
+
/**
|
|
49
|
+
* Создать чанки из найденных границ
|
|
50
|
+
*/
|
|
51
|
+
private createChunks;
|
|
52
|
+
/**
|
|
53
|
+
* Разбить большой блок на подчанки
|
|
54
|
+
*/
|
|
55
|
+
private splitLargeBlock;
|
|
56
|
+
/**
|
|
57
|
+
* Разбить файл по размеру (fallback)
|
|
58
|
+
*/
|
|
59
|
+
private chunkBySize;
|
|
60
|
+
}
|
|
61
|
+
//# sourceMappingURL=file-chunker.d.ts.map
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* File Chunker
|
|
3
|
+
*
|
|
4
|
+
* Умное разбиение больших файлов на логические части:
|
|
5
|
+
* - По классам/функциям/namespace
|
|
6
|
+
* - С сохранением контекста (импорты, namespace)
|
|
7
|
+
* - Для эффективного семантического поиска
|
|
8
|
+
*/
|
|
9
|
+
import { Logger } from '../utils/logger.js';
|
|
10
|
+
const DEFAULT_OPTIONS = {
|
|
11
|
+
maxChunkSize: 4000,
|
|
12
|
+
minChunkSize: 500,
|
|
13
|
+
includeContext: true,
|
|
14
|
+
language: 'unknown'
|
|
15
|
+
};
|
|
16
|
+
export class FileChunker {
|
|
17
|
+
/**
|
|
18
|
+
* Разбить файл на логические чанки
|
|
19
|
+
*/
|
|
20
|
+
chunkFile(content, filePath, options = {}) {
|
|
21
|
+
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
22
|
+
const lines = content.split('\n');
|
|
23
|
+
// Если файл маленький - возвращаем как есть
|
|
24
|
+
if (content.length <= opts.maxChunkSize) {
|
|
25
|
+
return [{
|
|
26
|
+
id: `${filePath}:chunk:0`,
|
|
27
|
+
filePath,
|
|
28
|
+
chunkIndex: 0,
|
|
29
|
+
totalChunks: 1,
|
|
30
|
+
startLine: 0,
|
|
31
|
+
endLine: lines.length - 1,
|
|
32
|
+
type: 'code',
|
|
33
|
+
name: filePath.split(/[/\\]/).pop() || 'file',
|
|
34
|
+
content,
|
|
35
|
+
context: ''
|
|
36
|
+
}];
|
|
37
|
+
}
|
|
38
|
+
// Извлекаем контекст (импорты, namespace, use statements)
|
|
39
|
+
const contextInfo = this.extractContext(lines, opts.language);
|
|
40
|
+
// Находим логические границы (классы, функции и т.д.)
|
|
41
|
+
const boundaries = this.findLogicalBoundaries(lines, opts.language);
|
|
42
|
+
// Создаём чанки
|
|
43
|
+
const chunks = this.createChunks(lines, boundaries, contextInfo, filePath, opts);
|
|
44
|
+
Logger.debug(`Chunked ${filePath}: ${content.length} chars -> ${chunks.length} chunks`);
|
|
45
|
+
return chunks;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Извлечь контекст файла (импорты, namespace)
|
|
49
|
+
*/
|
|
50
|
+
extractContext(lines, language) {
|
|
51
|
+
const contextLines = [];
|
|
52
|
+
let endLine = 0;
|
|
53
|
+
const contextPatterns = {
|
|
54
|
+
php: [
|
|
55
|
+
/^\s*<\?php/,
|
|
56
|
+
/^\s*namespace\s+/,
|
|
57
|
+
/^\s*use\s+/,
|
|
58
|
+
/^\s*require(_once)?\s+/,
|
|
59
|
+
/^\s*include(_once)?\s+/,
|
|
60
|
+
],
|
|
61
|
+
typescript: [
|
|
62
|
+
/^\s*import\s+/,
|
|
63
|
+
/^\s*export\s+\{[^}]*\}\s+from/,
|
|
64
|
+
/^\s*\/\/\s*@ts-/,
|
|
65
|
+
],
|
|
66
|
+
javascript: [
|
|
67
|
+
/^\s*import\s+/,
|
|
68
|
+
/^\s*const\s+\{[^}]*\}\s*=\s*require/,
|
|
69
|
+
/^\s*require\s*\(/,
|
|
70
|
+
],
|
|
71
|
+
python: [
|
|
72
|
+
/^\s*import\s+/,
|
|
73
|
+
/^\s*from\s+\w+\s+import/,
|
|
74
|
+
/^\s*#.*coding[:=]/,
|
|
75
|
+
],
|
|
76
|
+
java: [
|
|
77
|
+
/^\s*package\s+/,
|
|
78
|
+
/^\s*import\s+/,
|
|
79
|
+
],
|
|
80
|
+
csharp: [
|
|
81
|
+
/^\s*using\s+/,
|
|
82
|
+
/^\s*namespace\s+/,
|
|
83
|
+
],
|
|
84
|
+
go: [
|
|
85
|
+
/^\s*package\s+/,
|
|
86
|
+
/^\s*import\s+/,
|
|
87
|
+
],
|
|
88
|
+
rust: [
|
|
89
|
+
/^\s*use\s+/,
|
|
90
|
+
/^\s*mod\s+/,
|
|
91
|
+
/^\s*extern\s+crate/,
|
|
92
|
+
],
|
|
93
|
+
ruby: [
|
|
94
|
+
/^\s*require\s+/,
|
|
95
|
+
/^\s*require_relative\s+/,
|
|
96
|
+
/^\s*include\s+/,
|
|
97
|
+
]
|
|
98
|
+
};
|
|
99
|
+
const patterns = contextPatterns[language] || [];
|
|
100
|
+
// Собираем все строки импортов/namespace в начале файла
|
|
101
|
+
for (let i = 0; i < Math.min(lines.length, 100); i++) {
|
|
102
|
+
const line = lines[i];
|
|
103
|
+
const trimmed = line.trim();
|
|
104
|
+
// Пустые строки и комментарии в начале - пропускаем но добавляем
|
|
105
|
+
if (!trimmed || trimmed.startsWith('//') || trimmed.startsWith('#') || trimmed.startsWith('/*') || trimmed.startsWith('*')) {
|
|
106
|
+
if (contextLines.length > 0 || patterns.some(p => p.test(trimmed))) {
|
|
107
|
+
contextLines.push(line);
|
|
108
|
+
endLine = i;
|
|
109
|
+
}
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
// Проверяем паттерны
|
|
113
|
+
const isContext = patterns.some(p => p.test(trimmed));
|
|
114
|
+
if (isContext) {
|
|
115
|
+
contextLines.push(line);
|
|
116
|
+
endLine = i;
|
|
117
|
+
}
|
|
118
|
+
else if (contextLines.length > 0) {
|
|
119
|
+
// Достигли конца контекста
|
|
120
|
+
break;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return {
|
|
124
|
+
context: contextLines.join('\n'),
|
|
125
|
+
endLine
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Найти логические границы в файле
|
|
130
|
+
*/
|
|
131
|
+
findLogicalBoundaries(lines, language) {
|
|
132
|
+
const boundaries = [];
|
|
133
|
+
// Паттерны для начала блоков
|
|
134
|
+
const blockPatterns = {
|
|
135
|
+
php: [
|
|
136
|
+
{ pattern: /^\s*namespace\s+([\w\\]+)/, type: 'namespace' },
|
|
137
|
+
{ pattern: /^\s*(?:final\s+|abstract\s+)?class\s+(\w+)/, type: 'class' },
|
|
138
|
+
{ pattern: /^\s*interface\s+(\w+)/, type: 'interface' },
|
|
139
|
+
{ pattern: /^\s*trait\s+(\w+)/, type: 'trait' },
|
|
140
|
+
{ pattern: /^\s*(?:public|private|protected)?\s*(?:static\s+)?function\s+(\w+)/, type: 'function' },
|
|
141
|
+
],
|
|
142
|
+
typescript: [
|
|
143
|
+
{ pattern: /^\s*(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/, type: 'class' },
|
|
144
|
+
{ pattern: /^\s*(?:export\s+)?interface\s+(\w+)/, type: 'interface' },
|
|
145
|
+
{ pattern: /^\s*(?:export\s+)?(?:async\s+)?function\s+(\w+)/, type: 'function' },
|
|
146
|
+
{ pattern: /^\s*(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?\(/, type: 'function' },
|
|
147
|
+
],
|
|
148
|
+
javascript: [
|
|
149
|
+
{ pattern: /^\s*(?:export\s+)?class\s+(\w+)/, type: 'class' },
|
|
150
|
+
{ pattern: /^\s*(?:export\s+)?(?:async\s+)?function\s+(\w+)/, type: 'function' },
|
|
151
|
+
{ pattern: /^\s*(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?\(/, type: 'function' },
|
|
152
|
+
],
|
|
153
|
+
python: [
|
|
154
|
+
{ pattern: /^class\s+(\w+)/, type: 'class' },
|
|
155
|
+
{ pattern: /^(?:async\s+)?def\s+(\w+)/, type: 'function' },
|
|
156
|
+
],
|
|
157
|
+
java: [
|
|
158
|
+
{ pattern: /^\s*(?:public|private|protected)?\s*(?:abstract\s+)?class\s+(\w+)/, type: 'class' },
|
|
159
|
+
{ pattern: /^\s*(?:public|private|protected)?\s*interface\s+(\w+)/, type: 'interface' },
|
|
160
|
+
{ pattern: /^\s*(?:public|private|protected)?\s*(?:static\s+)?(?:\w+\s+)+(\w+)\s*\(/, type: 'function' },
|
|
161
|
+
],
|
|
162
|
+
csharp: [
|
|
163
|
+
{ pattern: /^\s*(?:public|private|protected|internal)?\s*(?:partial\s+)?class\s+(\w+)/, type: 'class' },
|
|
164
|
+
{ pattern: /^\s*(?:public|private|protected|internal)?\s*interface\s+(\w+)/, type: 'interface' },
|
|
165
|
+
],
|
|
166
|
+
go: [
|
|
167
|
+
{ pattern: /^func\s+(?:\([^)]+\)\s+)?(\w+)/, type: 'function' },
|
|
168
|
+
{ pattern: /^type\s+(\w+)\s+struct/, type: 'class' },
|
|
169
|
+
{ pattern: /^type\s+(\w+)\s+interface/, type: 'interface' },
|
|
170
|
+
],
|
|
171
|
+
rust: [
|
|
172
|
+
{ pattern: /^(?:pub\s+)?(?:async\s+)?fn\s+(\w+)/, type: 'function' },
|
|
173
|
+
{ pattern: /^(?:pub\s+)?struct\s+(\w+)/, type: 'class' },
|
|
174
|
+
{ pattern: /^(?:pub\s+)?trait\s+(\w+)/, type: 'interface' },
|
|
175
|
+
{ pattern: /^impl(?:<[^>]+>)?\s+(\w+)/, type: 'class' },
|
|
176
|
+
],
|
|
177
|
+
ruby: [
|
|
178
|
+
{ pattern: /^\s*class\s+(\w+)/, type: 'class' },
|
|
179
|
+
{ pattern: /^\s*module\s+(\w+)/, type: 'namespace' },
|
|
180
|
+
{ pattern: /^\s*def\s+(\w+)/, type: 'function' },
|
|
181
|
+
]
|
|
182
|
+
};
|
|
183
|
+
const patterns = blockPatterns[language] || blockPatterns.javascript || [];
|
|
184
|
+
// Находим все блоки
|
|
185
|
+
for (let i = 0; i < lines.length; i++) {
|
|
186
|
+
const line = lines[i];
|
|
187
|
+
for (const { pattern, type } of patterns) {
|
|
188
|
+
const match = line.match(pattern);
|
|
189
|
+
if (match) {
|
|
190
|
+
const name = match[1] || 'anonymous';
|
|
191
|
+
// Находим конец блока (по скобкам или отступам)
|
|
192
|
+
const endLine = this.findBlockEnd(lines, i, language);
|
|
193
|
+
boundaries.push({
|
|
194
|
+
startLine: i,
|
|
195
|
+
endLine,
|
|
196
|
+
type,
|
|
197
|
+
name
|
|
198
|
+
});
|
|
199
|
+
// Пропускаем вложенные определения для классов
|
|
200
|
+
if (type === 'class' || type === 'interface' || type === 'trait') {
|
|
201
|
+
i = endLine;
|
|
202
|
+
}
|
|
203
|
+
break;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
return boundaries;
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Найти конец блока кода
|
|
211
|
+
*/
|
|
212
|
+
findBlockEnd(lines, startLine, language) {
|
|
213
|
+
// Для языков с фигурными скобками
|
|
214
|
+
const braceLanguages = ['php', 'typescript', 'javascript', 'java', 'csharp', 'go', 'rust', 'c', 'cpp'];
|
|
215
|
+
if (braceLanguages.includes(language)) {
|
|
216
|
+
let braceCount = 0;
|
|
217
|
+
let foundFirstBrace = false;
|
|
218
|
+
for (let i = startLine; i < lines.length; i++) {
|
|
219
|
+
const line = lines[i];
|
|
220
|
+
// Подсчитываем скобки (упрощённо, не учитываем строки/комментарии)
|
|
221
|
+
for (const char of line) {
|
|
222
|
+
if (char === '{') {
|
|
223
|
+
braceCount++;
|
|
224
|
+
foundFirstBrace = true;
|
|
225
|
+
}
|
|
226
|
+
else if (char === '}') {
|
|
227
|
+
braceCount--;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
// Блок закрыт
|
|
231
|
+
if (foundFirstBrace && braceCount === 0) {
|
|
232
|
+
return i;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
// Для Python - по отступам
|
|
237
|
+
if (language === 'python') {
|
|
238
|
+
const startIndent = this.getIndent(lines[startLine]);
|
|
239
|
+
for (let i = startLine + 1; i < lines.length; i++) {
|
|
240
|
+
const line = lines[i];
|
|
241
|
+
if (line.trim() === '')
|
|
242
|
+
continue;
|
|
243
|
+
const indent = this.getIndent(line);
|
|
244
|
+
if (indent <= startIndent && line.trim() !== '') {
|
|
245
|
+
return i - 1;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
// Fallback: ищем следующее определение того же уровня или конец файла
|
|
250
|
+
return Math.min(startLine + 100, lines.length - 1);
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Получить уровень отступа строки
|
|
254
|
+
*/
|
|
255
|
+
getIndent(line) {
|
|
256
|
+
const match = line.match(/^(\s*)/);
|
|
257
|
+
return match ? match[1].length : 0;
|
|
258
|
+
}
|
|
259
|
+
/**
|
|
260
|
+
* Создать чанки из найденных границ
|
|
261
|
+
*/
|
|
262
|
+
createChunks(lines, boundaries, contextInfo, filePath, opts) {
|
|
263
|
+
const chunks = [];
|
|
264
|
+
const fileName = filePath.split(/[/\\]/).pop() || 'file';
|
|
265
|
+
// Если нет логических границ - разбиваем по размеру
|
|
266
|
+
if (boundaries.length === 0) {
|
|
267
|
+
return this.chunkBySize(lines, filePath, contextInfo, opts);
|
|
268
|
+
}
|
|
269
|
+
// Добавляем header chunk если есть контекст
|
|
270
|
+
if (contextInfo.context && contextInfo.context.length > opts.minChunkSize) {
|
|
271
|
+
chunks.push({
|
|
272
|
+
id: `${filePath}:chunk:header`,
|
|
273
|
+
filePath,
|
|
274
|
+
chunkIndex: 0,
|
|
275
|
+
totalChunks: 0, // Будет обновлено позже
|
|
276
|
+
startLine: 0,
|
|
277
|
+
endLine: contextInfo.endLine,
|
|
278
|
+
type: 'header',
|
|
279
|
+
name: `${fileName} imports`,
|
|
280
|
+
content: contextInfo.context,
|
|
281
|
+
context: ''
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
// Создаём чанки для каждой логической границы
|
|
285
|
+
for (const boundary of boundaries) {
|
|
286
|
+
const chunkLines = lines.slice(boundary.startLine, boundary.endLine + 1);
|
|
287
|
+
let content = chunkLines.join('\n');
|
|
288
|
+
// Если чанк слишком большой - разбиваем дальше
|
|
289
|
+
if (content.length > opts.maxChunkSize) {
|
|
290
|
+
const subChunks = this.splitLargeBlock(chunkLines, boundary, filePath, contextInfo, opts);
|
|
291
|
+
chunks.push(...subChunks);
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
// Добавляем контекст если нужно
|
|
295
|
+
const contextPrefix = opts.includeContext && contextInfo.context
|
|
296
|
+
? `// Context from ${fileName}:\n${contextInfo.context}\n\n// ${boundary.type}: ${boundary.name}\n`
|
|
297
|
+
: '';
|
|
298
|
+
chunks.push({
|
|
299
|
+
id: `${filePath}:chunk:${boundary.type}:${boundary.name}:${boundary.startLine}`,
|
|
300
|
+
filePath,
|
|
301
|
+
chunkIndex: chunks.length,
|
|
302
|
+
totalChunks: 0,
|
|
303
|
+
startLine: boundary.startLine,
|
|
304
|
+
endLine: boundary.endLine,
|
|
305
|
+
type: boundary.type,
|
|
306
|
+
name: boundary.name,
|
|
307
|
+
content,
|
|
308
|
+
context: contextPrefix
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
// Обновляем totalChunks
|
|
313
|
+
const total = chunks.length;
|
|
314
|
+
chunks.forEach((chunk, i) => {
|
|
315
|
+
chunk.totalChunks = total;
|
|
316
|
+
chunk.chunkIndex = i;
|
|
317
|
+
});
|
|
318
|
+
return chunks;
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Разбить большой блок на подчанки
|
|
322
|
+
*/
|
|
323
|
+
splitLargeBlock(lines, boundary, filePath, contextInfo, opts) {
|
|
324
|
+
const chunks = [];
|
|
325
|
+
let currentChunk = [];
|
|
326
|
+
let currentSize = 0;
|
|
327
|
+
let chunkStartLine = boundary.startLine;
|
|
328
|
+
let partNum = 1;
|
|
329
|
+
for (let i = 0; i < lines.length; i++) {
|
|
330
|
+
const line = lines[i];
|
|
331
|
+
const lineSize = line.length + 1; // +1 for newline
|
|
332
|
+
if (currentSize + lineSize > opts.maxChunkSize && currentChunk.length > 0) {
|
|
333
|
+
// Сохраняем текущий чанк
|
|
334
|
+
const contextPrefix = opts.includeContext && contextInfo.context
|
|
335
|
+
? `// Context: ${boundary.type} ${boundary.name} (part ${partNum})\n`
|
|
336
|
+
: '';
|
|
337
|
+
chunks.push({
|
|
338
|
+
id: `${filePath}:chunk:${boundary.type}:${boundary.name}:${chunkStartLine}:part${partNum}`,
|
|
339
|
+
filePath,
|
|
340
|
+
chunkIndex: chunks.length,
|
|
341
|
+
totalChunks: 0,
|
|
342
|
+
startLine: chunkStartLine,
|
|
343
|
+
endLine: boundary.startLine + i - 1,
|
|
344
|
+
type: boundary.type,
|
|
345
|
+
name: `${boundary.name} (part ${partNum})`,
|
|
346
|
+
content: currentChunk.join('\n'),
|
|
347
|
+
context: contextPrefix
|
|
348
|
+
});
|
|
349
|
+
currentChunk = [];
|
|
350
|
+
currentSize = 0;
|
|
351
|
+
chunkStartLine = boundary.startLine + i;
|
|
352
|
+
partNum++;
|
|
353
|
+
}
|
|
354
|
+
currentChunk.push(line);
|
|
355
|
+
currentSize += lineSize;
|
|
356
|
+
}
|
|
357
|
+
// Последний чанк
|
|
358
|
+
if (currentChunk.length > 0) {
|
|
359
|
+
chunks.push({
|
|
360
|
+
id: `${filePath}:chunk:${boundary.type}:${boundary.name}:${chunkStartLine}:part${partNum}`,
|
|
361
|
+
filePath,
|
|
362
|
+
chunkIndex: chunks.length,
|
|
363
|
+
totalChunks: 0,
|
|
364
|
+
startLine: chunkStartLine,
|
|
365
|
+
endLine: boundary.endLine,
|
|
366
|
+
type: boundary.type,
|
|
367
|
+
name: `${boundary.name} (part ${partNum})`,
|
|
368
|
+
content: currentChunk.join('\n'),
|
|
369
|
+
context: opts.includeContext ? `// Context: ${boundary.type} ${boundary.name} (part ${partNum})\n` : ''
|
|
370
|
+
});
|
|
371
|
+
}
|
|
372
|
+
return chunks;
|
|
373
|
+
}
|
|
374
|
+
/**
|
|
375
|
+
* Разбить файл по размеру (fallback)
|
|
376
|
+
*/
|
|
377
|
+
chunkBySize(lines, filePath, contextInfo, opts) {
|
|
378
|
+
const chunks = [];
|
|
379
|
+
const fileName = filePath.split(/[/\\]/).pop() || 'file';
|
|
380
|
+
let currentChunk = [];
|
|
381
|
+
let currentSize = 0;
|
|
382
|
+
let chunkStartLine = 0;
|
|
383
|
+
let partNum = 1;
|
|
384
|
+
for (let i = 0; i < lines.length; i++) {
|
|
385
|
+
const line = lines[i];
|
|
386
|
+
const lineSize = line.length + 1;
|
|
387
|
+
if (currentSize + lineSize > opts.maxChunkSize && currentChunk.length > 0) {
|
|
388
|
+
chunks.push({
|
|
389
|
+
id: `${filePath}:chunk:${chunkStartLine}:part${partNum}`,
|
|
390
|
+
filePath,
|
|
391
|
+
chunkIndex: chunks.length,
|
|
392
|
+
totalChunks: 0,
|
|
393
|
+
startLine: chunkStartLine,
|
|
394
|
+
endLine: i - 1,
|
|
395
|
+
type: 'code',
|
|
396
|
+
name: `${fileName} (part ${partNum})`,
|
|
397
|
+
content: currentChunk.join('\n'),
|
|
398
|
+
context: opts.includeContext && partNum === 1 ? contextInfo.context : ''
|
|
399
|
+
});
|
|
400
|
+
currentChunk = [];
|
|
401
|
+
currentSize = 0;
|
|
402
|
+
chunkStartLine = i;
|
|
403
|
+
partNum++;
|
|
404
|
+
}
|
|
405
|
+
currentChunk.push(line);
|
|
406
|
+
currentSize += lineSize;
|
|
407
|
+
}
|
|
408
|
+
if (currentChunk.length > 0) {
|
|
409
|
+
chunks.push({
|
|
410
|
+
id: `${filePath}:chunk:${chunkStartLine}:part${partNum}`,
|
|
411
|
+
filePath,
|
|
412
|
+
chunkIndex: chunks.length,
|
|
413
|
+
totalChunks: 0,
|
|
414
|
+
startLine: chunkStartLine,
|
|
415
|
+
endLine: lines.length - 1,
|
|
416
|
+
type: 'code',
|
|
417
|
+
name: `${fileName} (part ${partNum})`,
|
|
418
|
+
content: currentChunk.join('\n'),
|
|
419
|
+
context: ''
|
|
420
|
+
});
|
|
421
|
+
}
|
|
422
|
+
// Обновляем totalChunks
|
|
423
|
+
const total = chunks.length;
|
|
424
|
+
chunks.forEach((chunk, i) => {
|
|
425
|
+
chunk.totalChunks = total;
|
|
426
|
+
chunk.chunkIndex = i;
|
|
427
|
+
});
|
|
428
|
+
return chunks;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
//# sourceMappingURL=file-chunker.js.map
|
|
@@ -60,5 +60,7 @@ export declare class CodeIndex {
|
|
|
60
60
|
private getSymbolsByKind;
|
|
61
61
|
}
|
|
62
62
|
export { ASTParser, SymbolExtractor, DependencyGraphBuilder, SourceMapExtractor };
|
|
63
|
+
export { FileChunker } from './file-chunker.js';
|
|
63
64
|
export type { VirtualFile, ExtractionResult } from './source-map-extractor.js';
|
|
65
|
+
export type { FileChunk, ChunkingOptions } from './file-chunker.js';
|
|
64
66
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/code-index/index.js
CHANGED
|
@@ -59,7 +59,31 @@ export class SymbolExtractor {
|
|
|
59
59
|
'impl_item': SymbolKind.Class,
|
|
60
60
|
'struct_item': SymbolKind.Class,
|
|
61
61
|
'enum_item': SymbolKind.Type,
|
|
62
|
-
'trait_item': SymbolKind.Interface
|
|
62
|
+
'trait_item': SymbolKind.Interface,
|
|
63
|
+
// PHP and general
|
|
64
|
+
'namespace_definition': SymbolKind.Namespace,
|
|
65
|
+
'namespace_use_declaration': SymbolKind.Variable,
|
|
66
|
+
'trait_declaration': SymbolKind.Interface,
|
|
67
|
+
'property_declaration': SymbolKind.Variable,
|
|
68
|
+
'enum_declaration': SymbolKind.Type,
|
|
69
|
+
'enum_declaration_list': SymbolKind.Type,
|
|
70
|
+
'module_declaration': SymbolKind.Namespace, // For regex-parsed namespaces
|
|
71
|
+
'struct_declaration': SymbolKind.Class, // For regex-parsed structs
|
|
72
|
+
'impl_declaration': SymbolKind.Class, // For regex-parsed impl blocks
|
|
73
|
+
// Java
|
|
74
|
+
'constructor_declaration': SymbolKind.Function,
|
|
75
|
+
'field_declaration': SymbolKind.Variable,
|
|
76
|
+
'annotation_type_declaration': SymbolKind.Interface,
|
|
77
|
+
// C/C++
|
|
78
|
+
'struct_specifier': SymbolKind.Class,
|
|
79
|
+
'union_specifier': SymbolKind.Class,
|
|
80
|
+
'enum_specifier': SymbolKind.Type,
|
|
81
|
+
'preproc_function_def': SymbolKind.Function,
|
|
82
|
+
// Ruby
|
|
83
|
+
'method': SymbolKind.Function,
|
|
84
|
+
'singleton_method': SymbolKind.Function,
|
|
85
|
+
'module': SymbolKind.Namespace,
|
|
86
|
+
'class': SymbolKind.Class
|
|
63
87
|
};
|
|
64
88
|
return mapping[nodeType] || null;
|
|
65
89
|
}
|
|
@@ -403,8 +403,19 @@ A: "✅ Компонент Comments найден в 3 файлах:
|
|
|
403
403
|
prompt += '\n';
|
|
404
404
|
for (const result of context.semanticMemory.slice(0, maxResults)) {
|
|
405
405
|
const cleanPath = sanitizePath(result.chunk.metadata.filePath);
|
|
406
|
-
|
|
407
|
-
|
|
406
|
+
const lineInfo = result.chunk.metadata.startLine > 0
|
|
407
|
+
? `:${result.chunk.metadata.startLine}-${result.chunk.metadata.endLine}`
|
|
408
|
+
: '';
|
|
409
|
+
prompt += `\n### Файл: ${cleanPath}${lineInfo}\n`;
|
|
410
|
+
prompt += `Символы: ${result.chunk.metadata.symbols.join(', ') || 'N/A'}\n`;
|
|
411
|
+
prompt += `Тип: ${result.chunk.metadata.type}\n`;
|
|
412
|
+
// Используем полный контент из chunk.content, не обрезанный context
|
|
413
|
+
const codeContent = result.chunk.content || result.context;
|
|
414
|
+
// Ограничиваем до 3000 символов на файл чтобы не переполнить контекст
|
|
415
|
+
const truncatedCode = codeContent.length > 3000
|
|
416
|
+
? codeContent.substring(0, 3000) + '\n... (truncated)'
|
|
417
|
+
: codeContent;
|
|
418
|
+
prompt += `\`\`\`\n${truncatedCode}\n\`\`\`\n`;
|
|
408
419
|
}
|
|
409
420
|
prompt += '\n###END PROJECT FILES###';
|
|
410
421
|
}
|
|
@@ -16,6 +16,14 @@ export declare class SemanticMemory {
|
|
|
16
16
|
constructor(embeddingConfig: EmbeddingConfig, vectorStoreConfig: VectorStoreConfig);
|
|
17
17
|
initialize(): Promise<void>;
|
|
18
18
|
indexSymbols(symbols: Map<string, Symbol>, asts: Map<string, ASTNode>, progressCallback?: (current: number, total: number) => void): Promise<void>;
|
|
19
|
+
/**
|
|
20
|
+
* Индексировать файлы с умным chunking
|
|
21
|
+
* Для больших файлов создаёт несколько чанков по логическим границам
|
|
22
|
+
*/
|
|
23
|
+
indexFileChunks(fileContents: Map<string, string>, progressCallback?: (current: number, total: number) => void): Promise<{
|
|
24
|
+
indexed: number;
|
|
25
|
+
chunks: number;
|
|
26
|
+
}>;
|
|
19
27
|
indexModules(asts: Map<string, ASTNode>): Promise<void>;
|
|
20
28
|
searchByQuery(query: string, limit?: number): Promise<SemanticSearchResult[]>;
|
|
21
29
|
searchSimilarCode(code: string, metadata?: {
|
|
@@ -10,6 +10,8 @@
|
|
|
10
10
|
import { EmbeddingService } from './embedding-service.js';
|
|
11
11
|
import { VectorStore } from './vector-store.js';
|
|
12
12
|
import { Logger } from '../utils/logger.js';
|
|
13
|
+
import { FileChunker } from '../code-index/file-chunker.js';
|
|
14
|
+
import { FileUtils } from '../utils/file-utils.js';
|
|
13
15
|
export class SemanticMemory {
|
|
14
16
|
embeddingService;
|
|
15
17
|
vectorStore;
|
|
@@ -85,6 +87,77 @@ export class SemanticMemory {
|
|
|
85
87
|
}
|
|
86
88
|
Logger.success(`Indexed ${symbolData.length} symbols (batch mode)`);
|
|
87
89
|
}
|
|
90
|
+
/**
|
|
91
|
+
* Индексировать файлы с умным chunking
|
|
92
|
+
* Для больших файлов создаёт несколько чанков по логическим границам
|
|
93
|
+
*/
|
|
94
|
+
async indexFileChunks(fileContents, progressCallback) {
|
|
95
|
+
Logger.progress('Indexing file chunks into semantic memory...');
|
|
96
|
+
const chunker = new FileChunker();
|
|
97
|
+
const allChunks = [];
|
|
98
|
+
const files = Array.from(fileContents.entries());
|
|
99
|
+
// Шаг 1: Разбиваем все файлы на чанки
|
|
100
|
+
for (const [filePath, content] of files) {
|
|
101
|
+
const language = FileUtils.getLanguageFromExtension(filePath);
|
|
102
|
+
const chunks = chunker.chunkFile(content, filePath, {
|
|
103
|
+
language,
|
|
104
|
+
maxChunkSize: 4000,
|
|
105
|
+
minChunkSize: 200,
|
|
106
|
+
includeContext: true
|
|
107
|
+
});
|
|
108
|
+
allChunks.push(...chunks);
|
|
109
|
+
}
|
|
110
|
+
if (allChunks.length === 0) {
|
|
111
|
+
Logger.warn('No chunks to index');
|
|
112
|
+
return { indexed: 0, chunks: 0 };
|
|
113
|
+
}
|
|
114
|
+
Logger.progress(`Prepared ${allChunks.length} chunks from ${files.length} files`);
|
|
115
|
+
// Шаг 2: Генерируем embeddings батчами
|
|
116
|
+
const textsToEmbed = [];
|
|
117
|
+
for (const chunk of allChunks) {
|
|
118
|
+
// Добавляем контекст для лучшего понимания
|
|
119
|
+
const textWithContext = chunk.context
|
|
120
|
+
? `${chunk.context}\n${chunk.content}`
|
|
121
|
+
: chunk.content;
|
|
122
|
+
textsToEmbed.push(this.embeddingService.prepareCodeForEmbedding(textWithContext, `File: ${chunk.filePath}\nType: ${chunk.type}\nName: ${chunk.name}`));
|
|
123
|
+
}
|
|
124
|
+
Logger.progress(`Generating embeddings for ${textsToEmbed.length} chunks...`);
|
|
125
|
+
const embeddings = await this.embeddingService.generateBatchEmbeddings(textsToEmbed, progressCallback);
|
|
126
|
+
// Шаг 3: Создаём SemanticChunks и сохраняем в vector store
|
|
127
|
+
Logger.progress('Storing chunks in vector DB...');
|
|
128
|
+
const semanticChunks = [];
|
|
129
|
+
for (let i = 0; i < allChunks.length; i++) {
|
|
130
|
+
const chunk = allChunks[i];
|
|
131
|
+
const embedding = embeddings[i];
|
|
132
|
+
const metadata = {
|
|
133
|
+
filePath: chunk.filePath,
|
|
134
|
+
startLine: chunk.startLine,
|
|
135
|
+
endLine: chunk.endLine,
|
|
136
|
+
type: chunk.type === 'class' ? 'class' :
|
|
137
|
+
chunk.type === 'function' ? 'function' :
|
|
138
|
+
'module',
|
|
139
|
+
symbols: [chunk.name],
|
|
140
|
+
tags: [chunk.type, chunk.name, this.extractDomain(chunk.filePath)]
|
|
141
|
+
};
|
|
142
|
+
semanticChunks.push({
|
|
143
|
+
id: chunk.id,
|
|
144
|
+
content: chunk.content,
|
|
145
|
+
embedding,
|
|
146
|
+
metadata
|
|
147
|
+
});
|
|
148
|
+
// Batch upsert
|
|
149
|
+
if (semanticChunks.length >= 500) {
|
|
150
|
+
await this.vectorStore.upsertChunks(semanticChunks);
|
|
151
|
+
semanticChunks.length = 0;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
// Оставшиеся чанки
|
|
155
|
+
if (semanticChunks.length > 0) {
|
|
156
|
+
await this.vectorStore.upsertChunks(semanticChunks);
|
|
157
|
+
}
|
|
158
|
+
Logger.success(`Indexed ${allChunks.length} chunks from ${files.length} files`);
|
|
159
|
+
return { indexed: files.length, chunks: allChunks.length };
|
|
160
|
+
}
|
|
88
161
|
async indexModules(asts) {
|
|
89
162
|
Logger.progress('Indexing modules into semantic memory...');
|
|
90
163
|
const chunks = [];
|
|
@@ -428,6 +428,8 @@ export class ProjectService {
|
|
|
428
428
|
// Анализируем стек технологий проекта
|
|
429
429
|
const { analyzeProjectStack } = await import('../../utils/project-analyzer.js');
|
|
430
430
|
const projectMetadata = analyzeProjectStack(project.path);
|
|
431
|
+
// Загружаем содержимое файлов для полного контекста
|
|
432
|
+
const fileContents = await this.getFileContents(projectId);
|
|
431
433
|
// Ищем релевантный контекст (если есть семантическая память)
|
|
432
434
|
let searchResults = [];
|
|
433
435
|
if (data.semanticMemory) {
|
|
@@ -435,20 +437,34 @@ export class ProjectService {
|
|
|
435
437
|
// Увеличиваем лимит результатов поиска с 5 до 30
|
|
436
438
|
searchResults = await data.semanticMemory.searchByQuery(question, 30);
|
|
437
439
|
Logger.debug(`Semantic search returned ${searchResults.length} results`);
|
|
440
|
+
// Обогащаем результаты полным содержимым файлов
|
|
441
|
+
for (const result of searchResults) {
|
|
442
|
+
const fullContent = fileContents.get(result.chunk.metadata.filePath);
|
|
443
|
+
if (fullContent && fullContent.length > result.chunk.content.length) {
|
|
444
|
+
// Заменяем короткий фрагмент на полное содержимое (до 5000 символов)
|
|
445
|
+
result.chunk.content = fullContent.length > 5000
|
|
446
|
+
? fullContent.substring(0, 5000) + '\n... (file truncated)'
|
|
447
|
+
: fullContent;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
438
450
|
}
|
|
439
451
|
catch (err) {
|
|
440
452
|
Logger.warn('Semantic search failed, using fallback');
|
|
441
453
|
}
|
|
442
454
|
}
|
|
443
|
-
// Fallback: если семантический поиск не работает, даём контекст из графа
|
|
455
|
+
// Fallback: если семантический поиск не работает, даём контекст из графа + содержимое файлов
|
|
444
456
|
if (searchResults.length === 0 && data.graph) {
|
|
445
457
|
Logger.info('Using graph fallback for context');
|
|
446
|
-
const files = Array.from(data.graph.nodes.values()).slice(0, 30);
|
|
458
|
+
const files = Array.from(data.graph.nodes.values()).slice(0, 30);
|
|
447
459
|
for (const file of files) {
|
|
460
|
+
const fullContent = fileContents.get(file.filePath) || `File: ${file.filePath}\nType: ${file.type}`;
|
|
461
|
+
const truncatedContent = fullContent.length > 5000
|
|
462
|
+
? fullContent.substring(0, 5000) + '\n... (file truncated)'
|
|
463
|
+
: fullContent;
|
|
448
464
|
searchResults.push({
|
|
449
465
|
chunk: {
|
|
450
466
|
id: file.id,
|
|
451
|
-
content:
|
|
467
|
+
content: truncatedContent,
|
|
452
468
|
embedding: [],
|
|
453
469
|
metadata: {
|
|
454
470
|
filePath: file.filePath,
|
|
@@ -460,13 +476,13 @@ export class ProjectService {
|
|
|
460
476
|
}
|
|
461
477
|
},
|
|
462
478
|
score: 1,
|
|
463
|
-
context:
|
|
479
|
+
context: truncatedContent
|
|
464
480
|
});
|
|
465
481
|
}
|
|
466
482
|
}
|
|
467
483
|
// Добавляем информацию о символах если есть
|
|
468
484
|
if (data.symbols && data.symbols.size > 0) {
|
|
469
|
-
const symbolsList = Array.from(data.symbols.values()).slice(0, 50);
|
|
485
|
+
const symbolsList = Array.from(data.symbols.values()).slice(0, 50);
|
|
470
486
|
for (const sym of symbolsList) {
|
|
471
487
|
const existing = searchResults.find(r => r.chunk.metadata.filePath === sym.filePath);
|
|
472
488
|
if (existing) {
|
|
@@ -580,7 +596,15 @@ export class ProjectService {
|
|
|
580
596
|
// Индексация в семантическую память (если доступна)
|
|
581
597
|
if (data.semanticMemory) {
|
|
582
598
|
await data.semanticMemory.initialize();
|
|
599
|
+
// Индексируем символы (функции, классы)
|
|
583
600
|
await data.semanticMemory.indexSymbols(symbols, asts);
|
|
601
|
+
// Также индексируем файлы с умным chunking для больших файлов
|
|
602
|
+
if (indexedData.fileContents && indexedData.fileContents.length > 0) {
|
|
603
|
+
const fileContentsMap = new Map(indexedData.fileContents);
|
|
604
|
+
Logger.progress(`Indexing ${fileContentsMap.size} files with smart chunking...`);
|
|
605
|
+
const chunkResult = await data.semanticMemory.indexFileChunks(fileContentsMap);
|
|
606
|
+
Logger.success(`Indexed ${chunkResult.chunks} chunks from ${chunkResult.indexed} files`);
|
|
607
|
+
}
|
|
584
608
|
}
|
|
585
609
|
// Сохраняем индексные данные на диск
|
|
586
610
|
const projectDataDir = path.join(this.dataDir, projectId);
|