archicore 0.3.9 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/interactive.js +3 -1
- package/dist/cli/utils/upload-utils.js +33 -26
- package/dist/code-index/ast-parser.js +21 -6
- package/dist/code-index/file-chunker.d.ts +61 -0
- package/dist/code-index/file-chunker.js +431 -0
- package/dist/code-index/index.d.ts +2 -0
- package/dist/code-index/index.js +1 -0
- package/dist/code-index/symbol-extractor.js +25 -1
- package/dist/orchestrator/index.js +13 -2
- package/dist/semantic-memory/index.d.ts +8 -0
- package/dist/semantic-memory/index.js +73 -0
- package/dist/semantic-memory/vector-store.js +1 -1
- package/dist/server/services/project-service.js +29 -5
- package/dist/utils/file-utils.js +15 -9
- package/dist/utils/logger.js +2 -1
- package/package.json +1 -1
|
@@ -710,7 +710,9 @@ async function handleIndexCommand() {
|
|
|
710
710
|
// Детальная обработка ошибок
|
|
711
711
|
indexSpinner.fail('Indexing failed');
|
|
712
712
|
// Debug output to see what's in the result
|
|
713
|
-
|
|
713
|
+
if (process.env.DEBUG) {
|
|
714
|
+
console.log(colors.dim(` [DEBUG] uploadResult: ${JSON.stringify(uploadResult, null, 2)}`));
|
|
715
|
+
}
|
|
714
716
|
if (uploadResult.errorDetails) {
|
|
715
717
|
const { code, message, suggestion, technicalDetails } = uploadResult.errorDetails;
|
|
716
718
|
console.log();
|
|
@@ -7,6 +7,13 @@
|
|
|
7
7
|
* - Детальная обработка ошибок
|
|
8
8
|
*/
|
|
9
9
|
import { loadConfig } from './config.js';
|
|
10
|
+
// Debug logging (only when DEBUG env var is set)
|
|
11
|
+
const DEBUG = process.env.DEBUG === 'true' || process.env.DEBUG === '1';
|
|
12
|
+
function debugLog(message) {
|
|
13
|
+
if (DEBUG) {
|
|
14
|
+
debugLog(` ${message}`);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
10
17
|
// Лимиты для chunked upload (оптимизировано для очень больших проектов и нестабильных соединений)
|
|
11
18
|
const MAX_PAYLOAD_SIZE = 3 * 1024 * 1024; // 3MB per chunk (уменьшено для надёжности на медленных соединениях)
|
|
12
19
|
const MAX_SYMBOLS_PER_CHUNK = 1500; // Меньше символов на chunk для стабильности
|
|
@@ -190,7 +197,7 @@ async function fetchWithRetry(url, options, timeout = UPLOAD_TIMEOUT, maxRetries
|
|
|
190
197
|
if (attempt < maxRetries) {
|
|
191
198
|
// 2s, 4s, 8s, 16s, 32s, до 60s max
|
|
192
199
|
const delay = Math.min(2000 * Math.pow(2, attempt - 1), 60000);
|
|
193
|
-
|
|
200
|
+
debugLog(` fetchWithRetry: waiting ${delay / 1000}s before attempt ${attempt + 1}...`);
|
|
194
201
|
await new Promise(resolve => setTimeout(resolve, delay));
|
|
195
202
|
}
|
|
196
203
|
}
|
|
@@ -215,11 +222,11 @@ export async function uploadIndexData(projectId, data, onProgress) {
|
|
|
215
222
|
});
|
|
216
223
|
// Для небольших проектов - обычная загрузка
|
|
217
224
|
if (!isLargeProject) {
|
|
218
|
-
|
|
225
|
+
debugLog(` Using single request upload for ${data.symbols.length} symbols`);
|
|
219
226
|
return uploadSingleRequest(url, projectId, data, config.accessToken || '', onProgress);
|
|
220
227
|
}
|
|
221
228
|
// Для больших проектов - chunked upload
|
|
222
|
-
|
|
229
|
+
debugLog(` Using chunked upload for large project (${data.symbols.length} symbols)`);
|
|
223
230
|
return uploadChunked(url, projectId, data, config.accessToken || '', onProgress);
|
|
224
231
|
}
|
|
225
232
|
/**
|
|
@@ -243,11 +250,11 @@ async function uploadSingleRequest(url, projectId, data, accessToken, onProgress
|
|
|
243
250
|
});
|
|
244
251
|
if (!response.ok) {
|
|
245
252
|
const errorBody = await response.json().catch(() => ({}));
|
|
246
|
-
|
|
253
|
+
debugLog(` HTTP error ${response.status}: ${JSON.stringify(errorBody)}`);
|
|
247
254
|
const errorDetails = analyzeHttpError(response.status, errorBody);
|
|
248
255
|
// Если payload слишком большой - пробуем chunked
|
|
249
256
|
if (response.status === 413) {
|
|
250
|
-
|
|
257
|
+
debugLog(` Payload too large, trying chunked upload`);
|
|
251
258
|
return uploadChunked(url, projectId, data, accessToken, onProgress);
|
|
252
259
|
}
|
|
253
260
|
return {
|
|
@@ -269,13 +276,13 @@ async function uploadSingleRequest(url, projectId, data, accessToken, onProgress
|
|
|
269
276
|
};
|
|
270
277
|
}
|
|
271
278
|
catch (error) {
|
|
272
|
-
|
|
279
|
+
debugLog(` uploadSingleRequest caught error: ${error}`);
|
|
273
280
|
const errorDetails = analyzeNetworkError(error);
|
|
274
|
-
|
|
281
|
+
debugLog(` Analyzed error: ${JSON.stringify(errorDetails)}`);
|
|
275
282
|
// Если ошибка связана с размером - пробуем chunked
|
|
276
283
|
if (errorDetails.code === 'CONNECTION_RESET' ||
|
|
277
284
|
errorDetails.code === 'PAYLOAD_TOO_LARGE') {
|
|
278
|
-
|
|
285
|
+
debugLog(` Trying chunked upload due to ${errorDetails.code}`);
|
|
279
286
|
return uploadChunked(url, projectId, data, accessToken, onProgress);
|
|
280
287
|
}
|
|
281
288
|
return {
|
|
@@ -293,7 +300,7 @@ async function uploadChunked(baseUrl, projectId, data, accessToken, onProgress)
|
|
|
293
300
|
// Для очень больших проектов пропускаем fileContents (экономим трафик и время)
|
|
294
301
|
const isVeryLargeProject = data.symbols.length > VERY_LARGE_PROJECT_SYMBOLS;
|
|
295
302
|
if (isVeryLargeProject) {
|
|
296
|
-
|
|
303
|
+
debugLog(` Very large project (${data.symbols.length} symbols), skipping fileContents upload`);
|
|
297
304
|
}
|
|
298
305
|
// Разбиваем данные на chunks
|
|
299
306
|
const symbolChunks = chunkArray(data.symbols, MAX_SYMBOLS_PER_CHUNK);
|
|
@@ -301,7 +308,7 @@ async function uploadChunked(baseUrl, projectId, data, accessToken, onProgress)
|
|
|
301
308
|
const fileChunks = isVeryLargeProject ? [] : chunkArray(data.fileContents, MAX_FILES_PER_CHUNK);
|
|
302
309
|
const totalChunks = symbolChunks.length + astChunks.length + fileChunks.length + 1; // +1 for graph
|
|
303
310
|
let completedChunks = 0;
|
|
304
|
-
|
|
311
|
+
debugLog(` Chunked upload: ${symbolChunks.length} symbol chunks, ${astChunks.length} AST chunks, ${fileChunks.length} file chunks`);
|
|
305
312
|
onProgress?.({
|
|
306
313
|
phase: 'uploading',
|
|
307
314
|
current: 0,
|
|
@@ -328,13 +335,13 @@ async function uploadChunked(baseUrl, projectId, data, accessToken, onProgress)
|
|
|
328
335
|
}
|
|
329
336
|
catch (initError) {
|
|
330
337
|
// Сервер не поддерживает chunked upload или недоступен
|
|
331
|
-
|
|
338
|
+
debugLog(` Chunked upload not supported, falling back to minimal data`);
|
|
332
339
|
return uploadMinimalData(baseUrl, projectId, data, accessToken, onProgress);
|
|
333
340
|
}
|
|
334
341
|
if (!initResponse.ok) {
|
|
335
342
|
// Fallback: если сервер не поддерживает chunked upload,
|
|
336
343
|
// отправляем только минимальные данные
|
|
337
|
-
|
|
344
|
+
debugLog(` Init returned ${initResponse.status}, falling back to minimal data`);
|
|
338
345
|
return uploadMinimalData(baseUrl, projectId, data, accessToken, onProgress);
|
|
339
346
|
}
|
|
340
347
|
const initResult = await initResponse.json();
|
|
@@ -343,13 +350,13 @@ async function uploadChunked(baseUrl, projectId, data, accessToken, onProgress)
|
|
|
343
350
|
const PARALLEL_UPLOADS = 3;
|
|
344
351
|
// Helper для параллельной загрузки с retry для отдельных chunks
|
|
345
352
|
async function uploadChunksParallel(chunks, chunkType, label) {
|
|
346
|
-
|
|
353
|
+
debugLog(` Starting parallel upload of ${chunks.length} ${chunkType} chunks`);
|
|
347
354
|
const failedChunks = [];
|
|
348
355
|
const MAX_CHUNK_RETRIES = 5; // Увеличено для нестабильных соединений (Debian и т.д.)
|
|
349
356
|
for (let batch = 0; batch < chunks.length; batch += PARALLEL_UPLOADS) {
|
|
350
357
|
const batchChunks = chunks.slice(batch, batch + PARALLEL_UPLOADS);
|
|
351
358
|
const batchNum = Math.floor(batch / PARALLEL_UPLOADS) + 1;
|
|
352
|
-
|
|
359
|
+
debugLog(` Uploading batch ${batchNum} (${batchChunks.length} chunks)`);
|
|
353
360
|
// Upload each chunk with individual retry logic
|
|
354
361
|
const results = await Promise.allSettled(batchChunks.map(async (chunk, idx) => {
|
|
355
362
|
const chunkIndex = batch + idx;
|
|
@@ -357,17 +364,17 @@ async function uploadChunked(baseUrl, projectId, data, accessToken, onProgress)
|
|
|
357
364
|
for (let retry = 0; retry < MAX_CHUNK_RETRIES; retry++) {
|
|
358
365
|
try {
|
|
359
366
|
await uploadChunk(config.serverUrl, projectId, uploadId, chunkType, chunkIndex, chunk, accessToken);
|
|
360
|
-
|
|
367
|
+
debugLog(` Chunk ${chunkType}[${chunkIndex}] uploaded`);
|
|
361
368
|
return { success: true, chunkIndex };
|
|
362
369
|
}
|
|
363
370
|
catch (error) {
|
|
364
371
|
lastError = error instanceof Error ? error : new Error(String(error));
|
|
365
|
-
|
|
372
|
+
debugLog(` Chunk ${chunkType}[${chunkIndex}] failed (attempt ${retry + 1}/${MAX_CHUNK_RETRIES}): ${lastError.message}`);
|
|
366
373
|
// Exponential backoff before retry (увеличено для нестабильных соединений)
|
|
367
374
|
if (retry < MAX_CHUNK_RETRIES - 1) {
|
|
368
375
|
// Более агрессивный backoff: 2s, 4s, 8s, 16s, до 30s max
|
|
369
376
|
const delay = Math.min(2000 * Math.pow(2, retry), 30000);
|
|
370
|
-
|
|
377
|
+
debugLog(` Waiting ${delay / 1000}s before retry ${retry + 2}...`);
|
|
371
378
|
await new Promise(resolve => setTimeout(resolve, delay));
|
|
372
379
|
}
|
|
373
380
|
}
|
|
@@ -385,7 +392,7 @@ async function uploadChunked(baseUrl, projectId, data, accessToken, onProgress)
|
|
|
385
392
|
else {
|
|
386
393
|
const chunkIndex = batch + i;
|
|
387
394
|
failedChunks.push(chunkIndex);
|
|
388
|
-
|
|
395
|
+
debugLog(` Chunk ${chunkType}[${chunkIndex}] failed permanently: ${result.reason}`);
|
|
389
396
|
}
|
|
390
397
|
}
|
|
391
398
|
completedChunks += batchSuccesses;
|
|
@@ -400,10 +407,10 @@ async function uploadChunked(baseUrl, projectId, data, accessToken, onProgress)
|
|
|
400
407
|
throw new Error(`Too many chunks failed (${failedChunks.length}/${chunks.length}). Network may be unstable. Try again or check your connection.`);
|
|
401
408
|
}
|
|
402
409
|
}
|
|
403
|
-
|
|
410
|
+
debugLog(` Finished uploading ${chunks.length} ${chunkType} chunks (${failedChunks.length} failed)`);
|
|
404
411
|
// If any chunks failed, warn but continue if under threshold
|
|
405
412
|
if (failedChunks.length > 0) {
|
|
406
|
-
|
|
413
|
+
debugLog(` Warning: ${failedChunks.length} ${chunkType} chunks failed to upload`);
|
|
407
414
|
}
|
|
408
415
|
}
|
|
409
416
|
// 2. Загружаем ASTs параллельно
|
|
@@ -475,7 +482,7 @@ async function uploadChunked(baseUrl, projectId, data, accessToken, onProgress)
|
|
|
475
482
|
async function uploadMinimalData(url, _projectId, data, accessToken, onProgress) {
|
|
476
483
|
const symbolCount = data.symbols.length;
|
|
477
484
|
const fileCount = data.asts.length;
|
|
478
|
-
|
|
485
|
+
debugLog(` uploadMinimalData: ${symbolCount} symbols, ${fileCount} files`);
|
|
479
486
|
onProgress?.({
|
|
480
487
|
phase: 'uploading',
|
|
481
488
|
current: 50,
|
|
@@ -491,9 +498,9 @@ async function uploadMinimalData(url, _projectId, data, accessToken, onProgress)
|
|
|
491
498
|
statistics: data.statistics,
|
|
492
499
|
// Без fileContents - это самая большая часть
|
|
493
500
|
};
|
|
494
|
-
|
|
501
|
+
debugLog(` Minimal payload: ${minimalData.asts.length} ASTs, ${minimalData.symbols.length} symbols`);
|
|
495
502
|
try {
|
|
496
|
-
|
|
503
|
+
debugLog(` Sending minimal data to ${url}`);
|
|
497
504
|
const response = await fetchWithRetry(url, {
|
|
498
505
|
method: 'POST',
|
|
499
506
|
headers: {
|
|
@@ -502,10 +509,10 @@ async function uploadMinimalData(url, _projectId, data, accessToken, onProgress)
|
|
|
502
509
|
},
|
|
503
510
|
body: JSON.stringify(minimalData),
|
|
504
511
|
}, 180000, 2); // 3 минуты, 2 попытки
|
|
505
|
-
|
|
512
|
+
debugLog(` Response status: ${response.status}`);
|
|
506
513
|
if (!response.ok) {
|
|
507
514
|
const errorBody = await response.json().catch(() => ({}));
|
|
508
|
-
|
|
515
|
+
debugLog(` Error body: ${JSON.stringify(errorBody)}`);
|
|
509
516
|
const errorDetails = analyzeHttpError(response.status, errorBody);
|
|
510
517
|
return {
|
|
511
518
|
success: false,
|
|
@@ -529,7 +536,7 @@ async function uploadMinimalData(url, _projectId, data, accessToken, onProgress)
|
|
|
529
536
|
};
|
|
530
537
|
}
|
|
531
538
|
catch (error) {
|
|
532
|
-
|
|
539
|
+
debugLog(` uploadMinimalData error: ${error}`);
|
|
533
540
|
const errorDetails = analyzeNetworkError(error);
|
|
534
541
|
return {
|
|
535
542
|
success: false,
|
|
@@ -162,10 +162,12 @@ export class ASTParser {
|
|
|
162
162
|
/^(?:public|private|protected)?\s*(?:static\s+)?(?:final\s+)?(?:\w+\s+)+(\w+)\s*\(/
|
|
163
163
|
],
|
|
164
164
|
php: [
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
165
|
+
/^\s*namespace\s+([\w\\]+)/, // namespace App\Controllers
|
|
166
|
+
/^\s*(?:final\s+|abstract\s+)?class\s+(\w+)/, // class, final class, abstract class
|
|
167
|
+
/^\s*interface\s+(\w+)/, // interface
|
|
168
|
+
/^\s*trait\s+(\w+)/, // trait
|
|
169
|
+
/^\s*(?:public|private|protected)?\s*(?:static\s+)?function\s+(\w+)/, // function
|
|
170
|
+
/^\s*const\s+(\w+)\s*=/, // const
|
|
169
171
|
],
|
|
170
172
|
ruby: [
|
|
171
173
|
/^def\s+(\w+)/,
|
|
@@ -400,16 +402,29 @@ export class ASTParser {
|
|
|
400
402
|
continue;
|
|
401
403
|
}
|
|
402
404
|
const type = this.inferTypeFromPattern(pattern, language);
|
|
405
|
+
// Извлекаем больше контекста - следующие строки до закрывающей скобки
|
|
406
|
+
let codeContext = trimmedLine;
|
|
407
|
+
let endLineIndex = index;
|
|
408
|
+
// Для функций/классов пытаемся захватить тело (до 50 строк)
|
|
409
|
+
if (type.includes('function') || type.includes('class') || type.includes('method')) {
|
|
410
|
+
let braceCount = (trimmedLine.match(/\{/g) || []).length - (trimmedLine.match(/\}/g) || []).length;
|
|
411
|
+
for (let j = index + 1; j < Math.min(index + 50, lines.length) && braceCount > 0; j++) {
|
|
412
|
+
const nextLine = lines[j];
|
|
413
|
+
codeContext += '\n' + nextLine;
|
|
414
|
+
braceCount += (nextLine.match(/\{/g) || []).length - (nextLine.match(/\}/g) || []).length;
|
|
415
|
+
endLineIndex = j;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
403
418
|
children.push({
|
|
404
419
|
id: `${filePath}:${name}:${index}`,
|
|
405
420
|
type,
|
|
406
421
|
name,
|
|
407
422
|
filePath,
|
|
408
423
|
startLine: index,
|
|
409
|
-
endLine:
|
|
424
|
+
endLine: endLineIndex,
|
|
410
425
|
children: [],
|
|
411
426
|
metadata: {
|
|
412
|
-
text:
|
|
427
|
+
text: codeContext.substring(0, 2000), // Увеличено с 200 до 2000
|
|
413
428
|
hasErrors: false,
|
|
414
429
|
regexParsed: true
|
|
415
430
|
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* File Chunker
|
|
3
|
+
*
|
|
4
|
+
* Умное разбиение больших файлов на логические части:
|
|
5
|
+
* - По классам/функциям/namespace
|
|
6
|
+
* - С сохранением контекста (импорты, namespace)
|
|
7
|
+
* - Для эффективного семантического поиска
|
|
8
|
+
*/
|
|
9
|
+
export interface FileChunk {
|
|
10
|
+
id: string;
|
|
11
|
+
filePath: string;
|
|
12
|
+
chunkIndex: number;
|
|
13
|
+
totalChunks: number;
|
|
14
|
+
startLine: number;
|
|
15
|
+
endLine: number;
|
|
16
|
+
type: 'header' | 'namespace' | 'class' | 'function' | 'trait' | 'interface' | 'code';
|
|
17
|
+
name: string;
|
|
18
|
+
content: string;
|
|
19
|
+
context: string;
|
|
20
|
+
}
|
|
21
|
+
export interface ChunkingOptions {
|
|
22
|
+
maxChunkSize: number;
|
|
23
|
+
minChunkSize: number;
|
|
24
|
+
includeContext: boolean;
|
|
25
|
+
language: string;
|
|
26
|
+
}
|
|
27
|
+
export declare class FileChunker {
|
|
28
|
+
/**
|
|
29
|
+
* Разбить файл на логические чанки
|
|
30
|
+
*/
|
|
31
|
+
chunkFile(content: string, filePath: string, options?: Partial<ChunkingOptions>): FileChunk[];
|
|
32
|
+
/**
|
|
33
|
+
* Извлечь контекст файла (импорты, namespace)
|
|
34
|
+
*/
|
|
35
|
+
private extractContext;
|
|
36
|
+
/**
|
|
37
|
+
* Найти логические границы в файле
|
|
38
|
+
*/
|
|
39
|
+
private findLogicalBoundaries;
|
|
40
|
+
/**
|
|
41
|
+
* Найти конец блока кода
|
|
42
|
+
*/
|
|
43
|
+
private findBlockEnd;
|
|
44
|
+
/**
|
|
45
|
+
* Получить уровень отступа строки
|
|
46
|
+
*/
|
|
47
|
+
private getIndent;
|
|
48
|
+
/**
|
|
49
|
+
* Создать чанки из найденных границ
|
|
50
|
+
*/
|
|
51
|
+
private createChunks;
|
|
52
|
+
/**
|
|
53
|
+
* Разбить большой блок на подчанки
|
|
54
|
+
*/
|
|
55
|
+
private splitLargeBlock;
|
|
56
|
+
/**
|
|
57
|
+
* Разбить файл по размеру (fallback)
|
|
58
|
+
*/
|
|
59
|
+
private chunkBySize;
|
|
60
|
+
}
|
|
61
|
+
//# sourceMappingURL=file-chunker.d.ts.map
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* File Chunker
|
|
3
|
+
*
|
|
4
|
+
* Умное разбиение больших файлов на логические части:
|
|
5
|
+
* - По классам/функциям/namespace
|
|
6
|
+
* - С сохранением контекста (импорты, namespace)
|
|
7
|
+
* - Для эффективного семантического поиска
|
|
8
|
+
*/
|
|
9
|
+
import { Logger } from '../utils/logger.js';
|
|
10
|
+
const DEFAULT_OPTIONS = {
|
|
11
|
+
maxChunkSize: 4000,
|
|
12
|
+
minChunkSize: 500,
|
|
13
|
+
includeContext: true,
|
|
14
|
+
language: 'unknown'
|
|
15
|
+
};
|
|
16
|
+
export class FileChunker {
|
|
17
|
+
/**
|
|
18
|
+
* Разбить файл на логические чанки
|
|
19
|
+
*/
|
|
20
|
+
chunkFile(content, filePath, options = {}) {
|
|
21
|
+
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
22
|
+
const lines = content.split('\n');
|
|
23
|
+
// Если файл маленький - возвращаем как есть
|
|
24
|
+
if (content.length <= opts.maxChunkSize) {
|
|
25
|
+
return [{
|
|
26
|
+
id: `${filePath}:chunk:0`,
|
|
27
|
+
filePath,
|
|
28
|
+
chunkIndex: 0,
|
|
29
|
+
totalChunks: 1,
|
|
30
|
+
startLine: 0,
|
|
31
|
+
endLine: lines.length - 1,
|
|
32
|
+
type: 'code',
|
|
33
|
+
name: filePath.split(/[/\\]/).pop() || 'file',
|
|
34
|
+
content,
|
|
35
|
+
context: ''
|
|
36
|
+
}];
|
|
37
|
+
}
|
|
38
|
+
// Извлекаем контекст (импорты, namespace, use statements)
|
|
39
|
+
const contextInfo = this.extractContext(lines, opts.language);
|
|
40
|
+
// Находим логические границы (классы, функции и т.д.)
|
|
41
|
+
const boundaries = this.findLogicalBoundaries(lines, opts.language);
|
|
42
|
+
// Создаём чанки
|
|
43
|
+
const chunks = this.createChunks(lines, boundaries, contextInfo, filePath, opts);
|
|
44
|
+
Logger.debug(`Chunked ${filePath}: ${content.length} chars -> ${chunks.length} chunks`);
|
|
45
|
+
return chunks;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Извлечь контекст файла (импорты, namespace)
|
|
49
|
+
*/
|
|
50
|
+
extractContext(lines, language) {
|
|
51
|
+
const contextLines = [];
|
|
52
|
+
let endLine = 0;
|
|
53
|
+
const contextPatterns = {
|
|
54
|
+
php: [
|
|
55
|
+
/^\s*<\?php/,
|
|
56
|
+
/^\s*namespace\s+/,
|
|
57
|
+
/^\s*use\s+/,
|
|
58
|
+
/^\s*require(_once)?\s+/,
|
|
59
|
+
/^\s*include(_once)?\s+/,
|
|
60
|
+
],
|
|
61
|
+
typescript: [
|
|
62
|
+
/^\s*import\s+/,
|
|
63
|
+
/^\s*export\s+\{[^}]*\}\s+from/,
|
|
64
|
+
/^\s*\/\/\s*@ts-/,
|
|
65
|
+
],
|
|
66
|
+
javascript: [
|
|
67
|
+
/^\s*import\s+/,
|
|
68
|
+
/^\s*const\s+\{[^}]*\}\s*=\s*require/,
|
|
69
|
+
/^\s*require\s*\(/,
|
|
70
|
+
],
|
|
71
|
+
python: [
|
|
72
|
+
/^\s*import\s+/,
|
|
73
|
+
/^\s*from\s+\w+\s+import/,
|
|
74
|
+
/^\s*#.*coding[:=]/,
|
|
75
|
+
],
|
|
76
|
+
java: [
|
|
77
|
+
/^\s*package\s+/,
|
|
78
|
+
/^\s*import\s+/,
|
|
79
|
+
],
|
|
80
|
+
csharp: [
|
|
81
|
+
/^\s*using\s+/,
|
|
82
|
+
/^\s*namespace\s+/,
|
|
83
|
+
],
|
|
84
|
+
go: [
|
|
85
|
+
/^\s*package\s+/,
|
|
86
|
+
/^\s*import\s+/,
|
|
87
|
+
],
|
|
88
|
+
rust: [
|
|
89
|
+
/^\s*use\s+/,
|
|
90
|
+
/^\s*mod\s+/,
|
|
91
|
+
/^\s*extern\s+crate/,
|
|
92
|
+
],
|
|
93
|
+
ruby: [
|
|
94
|
+
/^\s*require\s+/,
|
|
95
|
+
/^\s*require_relative\s+/,
|
|
96
|
+
/^\s*include\s+/,
|
|
97
|
+
]
|
|
98
|
+
};
|
|
99
|
+
const patterns = contextPatterns[language] || [];
|
|
100
|
+
// Собираем все строки импортов/namespace в начале файла
|
|
101
|
+
for (let i = 0; i < Math.min(lines.length, 100); i++) {
|
|
102
|
+
const line = lines[i];
|
|
103
|
+
const trimmed = line.trim();
|
|
104
|
+
// Пустые строки и комментарии в начале - пропускаем но добавляем
|
|
105
|
+
if (!trimmed || trimmed.startsWith('//') || trimmed.startsWith('#') || trimmed.startsWith('/*') || trimmed.startsWith('*')) {
|
|
106
|
+
if (contextLines.length > 0 || patterns.some(p => p.test(trimmed))) {
|
|
107
|
+
contextLines.push(line);
|
|
108
|
+
endLine = i;
|
|
109
|
+
}
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
// Проверяем паттерны
|
|
113
|
+
const isContext = patterns.some(p => p.test(trimmed));
|
|
114
|
+
if (isContext) {
|
|
115
|
+
contextLines.push(line);
|
|
116
|
+
endLine = i;
|
|
117
|
+
}
|
|
118
|
+
else if (contextLines.length > 0) {
|
|
119
|
+
// Достигли конца контекста
|
|
120
|
+
break;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return {
|
|
124
|
+
context: contextLines.join('\n'),
|
|
125
|
+
endLine
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Найти логические границы в файле
|
|
130
|
+
*/
|
|
131
|
+
findLogicalBoundaries(lines, language) {
|
|
132
|
+
const boundaries = [];
|
|
133
|
+
// Паттерны для начала блоков
|
|
134
|
+
const blockPatterns = {
|
|
135
|
+
php: [
|
|
136
|
+
{ pattern: /^\s*namespace\s+([\w\\]+)/, type: 'namespace' },
|
|
137
|
+
{ pattern: /^\s*(?:final\s+|abstract\s+)?class\s+(\w+)/, type: 'class' },
|
|
138
|
+
{ pattern: /^\s*interface\s+(\w+)/, type: 'interface' },
|
|
139
|
+
{ pattern: /^\s*trait\s+(\w+)/, type: 'trait' },
|
|
140
|
+
{ pattern: /^\s*(?:public|private|protected)?\s*(?:static\s+)?function\s+(\w+)/, type: 'function' },
|
|
141
|
+
],
|
|
142
|
+
typescript: [
|
|
143
|
+
{ pattern: /^\s*(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/, type: 'class' },
|
|
144
|
+
{ pattern: /^\s*(?:export\s+)?interface\s+(\w+)/, type: 'interface' },
|
|
145
|
+
{ pattern: /^\s*(?:export\s+)?(?:async\s+)?function\s+(\w+)/, type: 'function' },
|
|
146
|
+
{ pattern: /^\s*(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?\(/, type: 'function' },
|
|
147
|
+
],
|
|
148
|
+
javascript: [
|
|
149
|
+
{ pattern: /^\s*(?:export\s+)?class\s+(\w+)/, type: 'class' },
|
|
150
|
+
{ pattern: /^\s*(?:export\s+)?(?:async\s+)?function\s+(\w+)/, type: 'function' },
|
|
151
|
+
{ pattern: /^\s*(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?\(/, type: 'function' },
|
|
152
|
+
],
|
|
153
|
+
python: [
|
|
154
|
+
{ pattern: /^class\s+(\w+)/, type: 'class' },
|
|
155
|
+
{ pattern: /^(?:async\s+)?def\s+(\w+)/, type: 'function' },
|
|
156
|
+
],
|
|
157
|
+
java: [
|
|
158
|
+
{ pattern: /^\s*(?:public|private|protected)?\s*(?:abstract\s+)?class\s+(\w+)/, type: 'class' },
|
|
159
|
+
{ pattern: /^\s*(?:public|private|protected)?\s*interface\s+(\w+)/, type: 'interface' },
|
|
160
|
+
{ pattern: /^\s*(?:public|private|protected)?\s*(?:static\s+)?(?:\w+\s+)+(\w+)\s*\(/, type: 'function' },
|
|
161
|
+
],
|
|
162
|
+
csharp: [
|
|
163
|
+
{ pattern: /^\s*(?:public|private|protected|internal)?\s*(?:partial\s+)?class\s+(\w+)/, type: 'class' },
|
|
164
|
+
{ pattern: /^\s*(?:public|private|protected|internal)?\s*interface\s+(\w+)/, type: 'interface' },
|
|
165
|
+
],
|
|
166
|
+
go: [
|
|
167
|
+
{ pattern: /^func\s+(?:\([^)]+\)\s+)?(\w+)/, type: 'function' },
|
|
168
|
+
{ pattern: /^type\s+(\w+)\s+struct/, type: 'class' },
|
|
169
|
+
{ pattern: /^type\s+(\w+)\s+interface/, type: 'interface' },
|
|
170
|
+
],
|
|
171
|
+
rust: [
|
|
172
|
+
{ pattern: /^(?:pub\s+)?(?:async\s+)?fn\s+(\w+)/, type: 'function' },
|
|
173
|
+
{ pattern: /^(?:pub\s+)?struct\s+(\w+)/, type: 'class' },
|
|
174
|
+
{ pattern: /^(?:pub\s+)?trait\s+(\w+)/, type: 'interface' },
|
|
175
|
+
{ pattern: /^impl(?:<[^>]+>)?\s+(\w+)/, type: 'class' },
|
|
176
|
+
],
|
|
177
|
+
ruby: [
|
|
178
|
+
{ pattern: /^\s*class\s+(\w+)/, type: 'class' },
|
|
179
|
+
{ pattern: /^\s*module\s+(\w+)/, type: 'namespace' },
|
|
180
|
+
{ pattern: /^\s*def\s+(\w+)/, type: 'function' },
|
|
181
|
+
]
|
|
182
|
+
};
|
|
183
|
+
const patterns = blockPatterns[language] || blockPatterns.javascript || [];
|
|
184
|
+
// Находим все блоки
|
|
185
|
+
for (let i = 0; i < lines.length; i++) {
|
|
186
|
+
const line = lines[i];
|
|
187
|
+
for (const { pattern, type } of patterns) {
|
|
188
|
+
const match = line.match(pattern);
|
|
189
|
+
if (match) {
|
|
190
|
+
const name = match[1] || 'anonymous';
|
|
191
|
+
// Находим конец блока (по скобкам или отступам)
|
|
192
|
+
const endLine = this.findBlockEnd(lines, i, language);
|
|
193
|
+
boundaries.push({
|
|
194
|
+
startLine: i,
|
|
195
|
+
endLine,
|
|
196
|
+
type,
|
|
197
|
+
name
|
|
198
|
+
});
|
|
199
|
+
// Пропускаем вложенные определения для классов
|
|
200
|
+
if (type === 'class' || type === 'interface' || type === 'trait') {
|
|
201
|
+
i = endLine;
|
|
202
|
+
}
|
|
203
|
+
break;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
return boundaries;
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Найти конец блока кода
|
|
211
|
+
*/
|
|
212
|
+
findBlockEnd(lines, startLine, language) {
|
|
213
|
+
// Для языков с фигурными скобками
|
|
214
|
+
const braceLanguages = ['php', 'typescript', 'javascript', 'java', 'csharp', 'go', 'rust', 'c', 'cpp'];
|
|
215
|
+
if (braceLanguages.includes(language)) {
|
|
216
|
+
let braceCount = 0;
|
|
217
|
+
let foundFirstBrace = false;
|
|
218
|
+
for (let i = startLine; i < lines.length; i++) {
|
|
219
|
+
const line = lines[i];
|
|
220
|
+
// Подсчитываем скобки (упрощённо, не учитываем строки/комментарии)
|
|
221
|
+
for (const char of line) {
|
|
222
|
+
if (char === '{') {
|
|
223
|
+
braceCount++;
|
|
224
|
+
foundFirstBrace = true;
|
|
225
|
+
}
|
|
226
|
+
else if (char === '}') {
|
|
227
|
+
braceCount--;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
// Блок закрыт
|
|
231
|
+
if (foundFirstBrace && braceCount === 0) {
|
|
232
|
+
return i;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
// Для Python - по отступам
|
|
237
|
+
if (language === 'python') {
|
|
238
|
+
const startIndent = this.getIndent(lines[startLine]);
|
|
239
|
+
for (let i = startLine + 1; i < lines.length; i++) {
|
|
240
|
+
const line = lines[i];
|
|
241
|
+
if (line.trim() === '')
|
|
242
|
+
continue;
|
|
243
|
+
const indent = this.getIndent(line);
|
|
244
|
+
if (indent <= startIndent && line.trim() !== '') {
|
|
245
|
+
return i - 1;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
// Fallback: ищем следующее определение того же уровня или конец файла
|
|
250
|
+
return Math.min(startLine + 100, lines.length - 1);
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Получить уровень отступа строки
|
|
254
|
+
*/
|
|
255
|
+
getIndent(line) {
|
|
256
|
+
const match = line.match(/^(\s*)/);
|
|
257
|
+
return match ? match[1].length : 0;
|
|
258
|
+
}
|
|
259
|
+
/**
|
|
260
|
+
* Создать чанки из найденных границ
|
|
261
|
+
*/
|
|
262
|
+
createChunks(lines, boundaries, contextInfo, filePath, opts) {
|
|
263
|
+
const chunks = [];
|
|
264
|
+
const fileName = filePath.split(/[/\\]/).pop() || 'file';
|
|
265
|
+
// Если нет логических границ - разбиваем по размеру
|
|
266
|
+
if (boundaries.length === 0) {
|
|
267
|
+
return this.chunkBySize(lines, filePath, contextInfo, opts);
|
|
268
|
+
}
|
|
269
|
+
// Добавляем header chunk если есть контекст
|
|
270
|
+
if (contextInfo.context && contextInfo.context.length > opts.minChunkSize) {
|
|
271
|
+
chunks.push({
|
|
272
|
+
id: `${filePath}:chunk:header`,
|
|
273
|
+
filePath,
|
|
274
|
+
chunkIndex: 0,
|
|
275
|
+
totalChunks: 0, // Будет обновлено позже
|
|
276
|
+
startLine: 0,
|
|
277
|
+
endLine: contextInfo.endLine,
|
|
278
|
+
type: 'header',
|
|
279
|
+
name: `${fileName} imports`,
|
|
280
|
+
content: contextInfo.context,
|
|
281
|
+
context: ''
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
// Создаём чанки для каждой логической границы
|
|
285
|
+
for (const boundary of boundaries) {
|
|
286
|
+
const chunkLines = lines.slice(boundary.startLine, boundary.endLine + 1);
|
|
287
|
+
let content = chunkLines.join('\n');
|
|
288
|
+
// Если чанк слишком большой - разбиваем дальше
|
|
289
|
+
if (content.length > opts.maxChunkSize) {
|
|
290
|
+
const subChunks = this.splitLargeBlock(chunkLines, boundary, filePath, contextInfo, opts);
|
|
291
|
+
chunks.push(...subChunks);
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
// Добавляем контекст если нужно
|
|
295
|
+
const contextPrefix = opts.includeContext && contextInfo.context
|
|
296
|
+
? `// Context from ${fileName}:\n${contextInfo.context}\n\n// ${boundary.type}: ${boundary.name}\n`
|
|
297
|
+
: '';
|
|
298
|
+
chunks.push({
|
|
299
|
+
id: `${filePath}:chunk:${boundary.type}:${boundary.name}:${boundary.startLine}`,
|
|
300
|
+
filePath,
|
|
301
|
+
chunkIndex: chunks.length,
|
|
302
|
+
totalChunks: 0,
|
|
303
|
+
startLine: boundary.startLine,
|
|
304
|
+
endLine: boundary.endLine,
|
|
305
|
+
type: boundary.type,
|
|
306
|
+
name: boundary.name,
|
|
307
|
+
content,
|
|
308
|
+
context: contextPrefix
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
// Обновляем totalChunks
|
|
313
|
+
const total = chunks.length;
|
|
314
|
+
chunks.forEach((chunk, i) => {
|
|
315
|
+
chunk.totalChunks = total;
|
|
316
|
+
chunk.chunkIndex = i;
|
|
317
|
+
});
|
|
318
|
+
return chunks;
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Разбить большой блок на подчанки
|
|
322
|
+
*/
|
|
323
|
+
splitLargeBlock(lines, boundary, filePath, contextInfo, opts) {
|
|
324
|
+
const chunks = [];
|
|
325
|
+
let currentChunk = [];
|
|
326
|
+
let currentSize = 0;
|
|
327
|
+
let chunkStartLine = boundary.startLine;
|
|
328
|
+
let partNum = 1;
|
|
329
|
+
for (let i = 0; i < lines.length; i++) {
|
|
330
|
+
const line = lines[i];
|
|
331
|
+
const lineSize = line.length + 1; // +1 for newline
|
|
332
|
+
if (currentSize + lineSize > opts.maxChunkSize && currentChunk.length > 0) {
|
|
333
|
+
// Сохраняем текущий чанк
|
|
334
|
+
const contextPrefix = opts.includeContext && contextInfo.context
|
|
335
|
+
? `// Context: ${boundary.type} ${boundary.name} (part ${partNum})\n`
|
|
336
|
+
: '';
|
|
337
|
+
chunks.push({
|
|
338
|
+
id: `${filePath}:chunk:${boundary.type}:${boundary.name}:${chunkStartLine}:part${partNum}`,
|
|
339
|
+
filePath,
|
|
340
|
+
chunkIndex: chunks.length,
|
|
341
|
+
totalChunks: 0,
|
|
342
|
+
startLine: chunkStartLine,
|
|
343
|
+
endLine: boundary.startLine + i - 1,
|
|
344
|
+
type: boundary.type,
|
|
345
|
+
name: `${boundary.name} (part ${partNum})`,
|
|
346
|
+
content: currentChunk.join('\n'),
|
|
347
|
+
context: contextPrefix
|
|
348
|
+
});
|
|
349
|
+
currentChunk = [];
|
|
350
|
+
currentSize = 0;
|
|
351
|
+
chunkStartLine = boundary.startLine + i;
|
|
352
|
+
partNum++;
|
|
353
|
+
}
|
|
354
|
+
currentChunk.push(line);
|
|
355
|
+
currentSize += lineSize;
|
|
356
|
+
}
|
|
357
|
+
// Последний чанк
|
|
358
|
+
if (currentChunk.length > 0) {
|
|
359
|
+
chunks.push({
|
|
360
|
+
id: `${filePath}:chunk:${boundary.type}:${boundary.name}:${chunkStartLine}:part${partNum}`,
|
|
361
|
+
filePath,
|
|
362
|
+
chunkIndex: chunks.length,
|
|
363
|
+
totalChunks: 0,
|
|
364
|
+
startLine: chunkStartLine,
|
|
365
|
+
endLine: boundary.endLine,
|
|
366
|
+
type: boundary.type,
|
|
367
|
+
name: `${boundary.name} (part ${partNum})`,
|
|
368
|
+
content: currentChunk.join('\n'),
|
|
369
|
+
context: opts.includeContext ? `// Context: ${boundary.type} ${boundary.name} (part ${partNum})\n` : ''
|
|
370
|
+
});
|
|
371
|
+
}
|
|
372
|
+
return chunks;
|
|
373
|
+
}
|
|
374
|
+
/**
|
|
375
|
+
* Разбить файл по размеру (fallback)
|
|
376
|
+
*/
|
|
377
|
+
chunkBySize(lines, filePath, contextInfo, opts) {
|
|
378
|
+
const chunks = [];
|
|
379
|
+
const fileName = filePath.split(/[/\\]/).pop() || 'file';
|
|
380
|
+
let currentChunk = [];
|
|
381
|
+
let currentSize = 0;
|
|
382
|
+
let chunkStartLine = 0;
|
|
383
|
+
let partNum = 1;
|
|
384
|
+
for (let i = 0; i < lines.length; i++) {
|
|
385
|
+
const line = lines[i];
|
|
386
|
+
const lineSize = line.length + 1;
|
|
387
|
+
if (currentSize + lineSize > opts.maxChunkSize && currentChunk.length > 0) {
|
|
388
|
+
chunks.push({
|
|
389
|
+
id: `${filePath}:chunk:${chunkStartLine}:part${partNum}`,
|
|
390
|
+
filePath,
|
|
391
|
+
chunkIndex: chunks.length,
|
|
392
|
+
totalChunks: 0,
|
|
393
|
+
startLine: chunkStartLine,
|
|
394
|
+
endLine: i - 1,
|
|
395
|
+
type: 'code',
|
|
396
|
+
name: `${fileName} (part ${partNum})`,
|
|
397
|
+
content: currentChunk.join('\n'),
|
|
398
|
+
context: opts.includeContext && partNum === 1 ? contextInfo.context : ''
|
|
399
|
+
});
|
|
400
|
+
currentChunk = [];
|
|
401
|
+
currentSize = 0;
|
|
402
|
+
chunkStartLine = i;
|
|
403
|
+
partNum++;
|
|
404
|
+
}
|
|
405
|
+
currentChunk.push(line);
|
|
406
|
+
currentSize += lineSize;
|
|
407
|
+
}
|
|
408
|
+
if (currentChunk.length > 0) {
|
|
409
|
+
chunks.push({
|
|
410
|
+
id: `${filePath}:chunk:${chunkStartLine}:part${partNum}`,
|
|
411
|
+
filePath,
|
|
412
|
+
chunkIndex: chunks.length,
|
|
413
|
+
totalChunks: 0,
|
|
414
|
+
startLine: chunkStartLine,
|
|
415
|
+
endLine: lines.length - 1,
|
|
416
|
+
type: 'code',
|
|
417
|
+
name: `${fileName} (part ${partNum})`,
|
|
418
|
+
content: currentChunk.join('\n'),
|
|
419
|
+
context: ''
|
|
420
|
+
});
|
|
421
|
+
}
|
|
422
|
+
// Обновляем totalChunks
|
|
423
|
+
const total = chunks.length;
|
|
424
|
+
chunks.forEach((chunk, i) => {
|
|
425
|
+
chunk.totalChunks = total;
|
|
426
|
+
chunk.chunkIndex = i;
|
|
427
|
+
});
|
|
428
|
+
return chunks;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
//# sourceMappingURL=file-chunker.js.map
|
|
@@ -60,5 +60,7 @@ export declare class CodeIndex {
|
|
|
60
60
|
private getSymbolsByKind;
|
|
61
61
|
}
|
|
62
62
|
export { ASTParser, SymbolExtractor, DependencyGraphBuilder, SourceMapExtractor };
|
|
63
|
+
export { FileChunker } from './file-chunker.js';
|
|
63
64
|
export type { VirtualFile, ExtractionResult } from './source-map-extractor.js';
|
|
65
|
+
export type { FileChunk, ChunkingOptions } from './file-chunker.js';
|
|
64
66
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/code-index/index.js
CHANGED
|
@@ -59,7 +59,31 @@ export class SymbolExtractor {
|
|
|
59
59
|
'impl_item': SymbolKind.Class,
|
|
60
60
|
'struct_item': SymbolKind.Class,
|
|
61
61
|
'enum_item': SymbolKind.Type,
|
|
62
|
-
'trait_item': SymbolKind.Interface
|
|
62
|
+
'trait_item': SymbolKind.Interface,
|
|
63
|
+
// PHP and general
|
|
64
|
+
'namespace_definition': SymbolKind.Namespace,
|
|
65
|
+
'namespace_use_declaration': SymbolKind.Variable,
|
|
66
|
+
'trait_declaration': SymbolKind.Interface,
|
|
67
|
+
'property_declaration': SymbolKind.Variable,
|
|
68
|
+
'enum_declaration': SymbolKind.Type,
|
|
69
|
+
'enum_declaration_list': SymbolKind.Type,
|
|
70
|
+
'module_declaration': SymbolKind.Namespace, // For regex-parsed namespaces
|
|
71
|
+
'struct_declaration': SymbolKind.Class, // For regex-parsed structs
|
|
72
|
+
'impl_declaration': SymbolKind.Class, // For regex-parsed impl blocks
|
|
73
|
+
// Java
|
|
74
|
+
'constructor_declaration': SymbolKind.Function,
|
|
75
|
+
'field_declaration': SymbolKind.Variable,
|
|
76
|
+
'annotation_type_declaration': SymbolKind.Interface,
|
|
77
|
+
// C/C++
|
|
78
|
+
'struct_specifier': SymbolKind.Class,
|
|
79
|
+
'union_specifier': SymbolKind.Class,
|
|
80
|
+
'enum_specifier': SymbolKind.Type,
|
|
81
|
+
'preproc_function_def': SymbolKind.Function,
|
|
82
|
+
// Ruby
|
|
83
|
+
'method': SymbolKind.Function,
|
|
84
|
+
'singleton_method': SymbolKind.Function,
|
|
85
|
+
'module': SymbolKind.Namespace,
|
|
86
|
+
'class': SymbolKind.Class
|
|
63
87
|
};
|
|
64
88
|
return mapping[nodeType] || null;
|
|
65
89
|
}
|
|
@@ -403,8 +403,19 @@ A: "✅ Компонент Comments найден в 3 файлах:
|
|
|
403
403
|
prompt += '\n';
|
|
404
404
|
for (const result of context.semanticMemory.slice(0, maxResults)) {
|
|
405
405
|
const cleanPath = sanitizePath(result.chunk.metadata.filePath);
|
|
406
|
-
|
|
407
|
-
|
|
406
|
+
const lineInfo = result.chunk.metadata.startLine > 0
|
|
407
|
+
? `:${result.chunk.metadata.startLine}-${result.chunk.metadata.endLine}`
|
|
408
|
+
: '';
|
|
409
|
+
prompt += `\n### Файл: ${cleanPath}${lineInfo}\n`;
|
|
410
|
+
prompt += `Символы: ${result.chunk.metadata.symbols.join(', ') || 'N/A'}\n`;
|
|
411
|
+
prompt += `Тип: ${result.chunk.metadata.type}\n`;
|
|
412
|
+
// Используем полный контент из chunk.content, не обрезанный context
|
|
413
|
+
const codeContent = result.chunk.content || result.context;
|
|
414
|
+
// Ограничиваем до 3000 символов на файл чтобы не переполнить контекст
|
|
415
|
+
const truncatedCode = codeContent.length > 3000
|
|
416
|
+
? codeContent.substring(0, 3000) + '\n... (truncated)'
|
|
417
|
+
: codeContent;
|
|
418
|
+
prompt += `\`\`\`\n${truncatedCode}\n\`\`\`\n`;
|
|
408
419
|
}
|
|
409
420
|
prompt += '\n###END PROJECT FILES###';
|
|
410
421
|
}
|
|
@@ -16,6 +16,14 @@ export declare class SemanticMemory {
|
|
|
16
16
|
constructor(embeddingConfig: EmbeddingConfig, vectorStoreConfig: VectorStoreConfig);
|
|
17
17
|
initialize(): Promise<void>;
|
|
18
18
|
indexSymbols(symbols: Map<string, Symbol>, asts: Map<string, ASTNode>, progressCallback?: (current: number, total: number) => void): Promise<void>;
|
|
19
|
+
/**
|
|
20
|
+
* Индексировать файлы с умным chunking
|
|
21
|
+
* Для больших файлов создаёт несколько чанков по логическим границам
|
|
22
|
+
*/
|
|
23
|
+
indexFileChunks(fileContents: Map<string, string>, progressCallback?: (current: number, total: number) => void): Promise<{
|
|
24
|
+
indexed: number;
|
|
25
|
+
chunks: number;
|
|
26
|
+
}>;
|
|
19
27
|
indexModules(asts: Map<string, ASTNode>): Promise<void>;
|
|
20
28
|
searchByQuery(query: string, limit?: number): Promise<SemanticSearchResult[]>;
|
|
21
29
|
searchSimilarCode(code: string, metadata?: {
|
|
@@ -10,6 +10,8 @@
|
|
|
10
10
|
import { EmbeddingService } from './embedding-service.js';
|
|
11
11
|
import { VectorStore } from './vector-store.js';
|
|
12
12
|
import { Logger } from '../utils/logger.js';
|
|
13
|
+
import { FileChunker } from '../code-index/file-chunker.js';
|
|
14
|
+
import { FileUtils } from '../utils/file-utils.js';
|
|
13
15
|
export class SemanticMemory {
|
|
14
16
|
embeddingService;
|
|
15
17
|
vectorStore;
|
|
@@ -85,6 +87,77 @@ export class SemanticMemory {
|
|
|
85
87
|
}
|
|
86
88
|
Logger.success(`Indexed ${symbolData.length} symbols (batch mode)`);
|
|
87
89
|
}
|
|
90
|
+
/**
|
|
91
|
+
* Индексировать файлы с умным chunking
|
|
92
|
+
* Для больших файлов создаёт несколько чанков по логическим границам
|
|
93
|
+
*/
|
|
94
|
+
async indexFileChunks(fileContents, progressCallback) {
|
|
95
|
+
Logger.progress('Indexing file chunks into semantic memory...');
|
|
96
|
+
const chunker = new FileChunker();
|
|
97
|
+
const allChunks = [];
|
|
98
|
+
const files = Array.from(fileContents.entries());
|
|
99
|
+
// Шаг 1: Разбиваем все файлы на чанки
|
|
100
|
+
for (const [filePath, content] of files) {
|
|
101
|
+
const language = FileUtils.getLanguageFromExtension(filePath);
|
|
102
|
+
const chunks = chunker.chunkFile(content, filePath, {
|
|
103
|
+
language,
|
|
104
|
+
maxChunkSize: 4000,
|
|
105
|
+
minChunkSize: 200,
|
|
106
|
+
includeContext: true
|
|
107
|
+
});
|
|
108
|
+
allChunks.push(...chunks);
|
|
109
|
+
}
|
|
110
|
+
if (allChunks.length === 0) {
|
|
111
|
+
Logger.warn('No chunks to index');
|
|
112
|
+
return { indexed: 0, chunks: 0 };
|
|
113
|
+
}
|
|
114
|
+
Logger.progress(`Prepared ${allChunks.length} chunks from ${files.length} files`);
|
|
115
|
+
// Шаг 2: Генерируем embeddings батчами
|
|
116
|
+
const textsToEmbed = [];
|
|
117
|
+
for (const chunk of allChunks) {
|
|
118
|
+
// Добавляем контекст для лучшего понимания
|
|
119
|
+
const textWithContext = chunk.context
|
|
120
|
+
? `${chunk.context}\n${chunk.content}`
|
|
121
|
+
: chunk.content;
|
|
122
|
+
textsToEmbed.push(this.embeddingService.prepareCodeForEmbedding(textWithContext, `File: ${chunk.filePath}\nType: ${chunk.type}\nName: ${chunk.name}`));
|
|
123
|
+
}
|
|
124
|
+
Logger.progress(`Generating embeddings for ${textsToEmbed.length} chunks...`);
|
|
125
|
+
const embeddings = await this.embeddingService.generateBatchEmbeddings(textsToEmbed, progressCallback);
|
|
126
|
+
// Шаг 3: Создаём SemanticChunks и сохраняем в vector store
|
|
127
|
+
Logger.progress('Storing chunks in vector DB...');
|
|
128
|
+
const semanticChunks = [];
|
|
129
|
+
for (let i = 0; i < allChunks.length; i++) {
|
|
130
|
+
const chunk = allChunks[i];
|
|
131
|
+
const embedding = embeddings[i];
|
|
132
|
+
const metadata = {
|
|
133
|
+
filePath: chunk.filePath,
|
|
134
|
+
startLine: chunk.startLine,
|
|
135
|
+
endLine: chunk.endLine,
|
|
136
|
+
type: chunk.type === 'class' ? 'class' :
|
|
137
|
+
chunk.type === 'function' ? 'function' :
|
|
138
|
+
'module',
|
|
139
|
+
symbols: [chunk.name],
|
|
140
|
+
tags: [chunk.type, chunk.name, this.extractDomain(chunk.filePath)]
|
|
141
|
+
};
|
|
142
|
+
semanticChunks.push({
|
|
143
|
+
id: chunk.id,
|
|
144
|
+
content: chunk.content,
|
|
145
|
+
embedding,
|
|
146
|
+
metadata
|
|
147
|
+
});
|
|
148
|
+
// Batch upsert
|
|
149
|
+
if (semanticChunks.length >= 500) {
|
|
150
|
+
await this.vectorStore.upsertChunks(semanticChunks);
|
|
151
|
+
semanticChunks.length = 0;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
// Оставшиеся чанки
|
|
155
|
+
if (semanticChunks.length > 0) {
|
|
156
|
+
await this.vectorStore.upsertChunks(semanticChunks);
|
|
157
|
+
}
|
|
158
|
+
Logger.success(`Indexed ${allChunks.length} chunks from ${files.length} files`);
|
|
159
|
+
return { indexed: files.length, chunks: allChunks.length };
|
|
160
|
+
}
|
|
88
161
|
async indexModules(asts) {
|
|
89
162
|
Logger.progress('Indexing modules into semantic memory...');
|
|
90
163
|
const chunks = [];
|
|
@@ -428,6 +428,8 @@ export class ProjectService {
|
|
|
428
428
|
// Анализируем стек технологий проекта
|
|
429
429
|
const { analyzeProjectStack } = await import('../../utils/project-analyzer.js');
|
|
430
430
|
const projectMetadata = analyzeProjectStack(project.path);
|
|
431
|
+
// Загружаем содержимое файлов для полного контекста
|
|
432
|
+
const fileContents = await this.getFileContents(projectId);
|
|
431
433
|
// Ищем релевантный контекст (если есть семантическая память)
|
|
432
434
|
let searchResults = [];
|
|
433
435
|
if (data.semanticMemory) {
|
|
@@ -435,20 +437,34 @@ export class ProjectService {
|
|
|
435
437
|
// Увеличиваем лимит результатов поиска с 5 до 30
|
|
436
438
|
searchResults = await data.semanticMemory.searchByQuery(question, 30);
|
|
437
439
|
Logger.debug(`Semantic search returned ${searchResults.length} results`);
|
|
440
|
+
// Обогащаем результаты полным содержимым файлов
|
|
441
|
+
for (const result of searchResults) {
|
|
442
|
+
const fullContent = fileContents.get(result.chunk.metadata.filePath);
|
|
443
|
+
if (fullContent && fullContent.length > result.chunk.content.length) {
|
|
444
|
+
// Заменяем короткий фрагмент на полное содержимое (до 5000 символов)
|
|
445
|
+
result.chunk.content = fullContent.length > 5000
|
|
446
|
+
? fullContent.substring(0, 5000) + '\n... (file truncated)'
|
|
447
|
+
: fullContent;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
438
450
|
}
|
|
439
451
|
catch (err) {
|
|
440
452
|
Logger.warn('Semantic search failed, using fallback');
|
|
441
453
|
}
|
|
442
454
|
}
|
|
443
|
-
// Fallback: если семантический поиск не работает, даём контекст из графа
|
|
455
|
+
// Fallback: если семантический поиск не работает, даём контекст из графа + содержимое файлов
|
|
444
456
|
if (searchResults.length === 0 && data.graph) {
|
|
445
457
|
Logger.info('Using graph fallback for context');
|
|
446
|
-
const files = Array.from(data.graph.nodes.values()).slice(0, 30);
|
|
458
|
+
const files = Array.from(data.graph.nodes.values()).slice(0, 30);
|
|
447
459
|
for (const file of files) {
|
|
460
|
+
const fullContent = fileContents.get(file.filePath) || `File: ${file.filePath}\nType: ${file.type}`;
|
|
461
|
+
const truncatedContent = fullContent.length > 5000
|
|
462
|
+
? fullContent.substring(0, 5000) + '\n... (file truncated)'
|
|
463
|
+
: fullContent;
|
|
448
464
|
searchResults.push({
|
|
449
465
|
chunk: {
|
|
450
466
|
id: file.id,
|
|
451
|
-
content:
|
|
467
|
+
content: truncatedContent,
|
|
452
468
|
embedding: [],
|
|
453
469
|
metadata: {
|
|
454
470
|
filePath: file.filePath,
|
|
@@ -460,13 +476,13 @@ export class ProjectService {
|
|
|
460
476
|
}
|
|
461
477
|
},
|
|
462
478
|
score: 1,
|
|
463
|
-
context:
|
|
479
|
+
context: truncatedContent
|
|
464
480
|
});
|
|
465
481
|
}
|
|
466
482
|
}
|
|
467
483
|
// Добавляем информацию о символах если есть
|
|
468
484
|
if (data.symbols && data.symbols.size > 0) {
|
|
469
|
-
const symbolsList = Array.from(data.symbols.values()).slice(0, 50);
|
|
485
|
+
const symbolsList = Array.from(data.symbols.values()).slice(0, 50);
|
|
470
486
|
for (const sym of symbolsList) {
|
|
471
487
|
const existing = searchResults.find(r => r.chunk.metadata.filePath === sym.filePath);
|
|
472
488
|
if (existing) {
|
|
@@ -580,7 +596,15 @@ export class ProjectService {
|
|
|
580
596
|
// Индексация в семантическую память (если доступна)
|
|
581
597
|
if (data.semanticMemory) {
|
|
582
598
|
await data.semanticMemory.initialize();
|
|
599
|
+
// Индексируем символы (функции, классы)
|
|
583
600
|
await data.semanticMemory.indexSymbols(symbols, asts);
|
|
601
|
+
// Также индексируем файлы с умным chunking для больших файлов
|
|
602
|
+
if (indexedData.fileContents && indexedData.fileContents.length > 0) {
|
|
603
|
+
const fileContentsMap = new Map(indexedData.fileContents);
|
|
604
|
+
Logger.progress(`Indexing ${fileContentsMap.size} files with smart chunking...`);
|
|
605
|
+
const chunkResult = await data.semanticMemory.indexFileChunks(fileContentsMap);
|
|
606
|
+
Logger.success(`Indexed ${chunkResult.chunks} chunks from ${chunkResult.indexed} files`);
|
|
607
|
+
}
|
|
584
608
|
}
|
|
585
609
|
// Сохраняем индексные данные на диск
|
|
586
610
|
const projectDataDir = path.join(this.dataDir, projectId);
|
package/dist/utils/file-utils.js
CHANGED
|
@@ -80,7 +80,10 @@ export class FileUtils {
|
|
|
80
80
|
const resolvedPath = resolve(rootDir);
|
|
81
81
|
// Normalize for glob (use forward slashes on all platforms)
|
|
82
82
|
const absoluteRootDir = resolvedPath.replace(/\\/g, '/');
|
|
83
|
-
Logger.
|
|
83
|
+
// Logger.debug for verbose output (only in DEBUG mode)
|
|
84
|
+
if (process.env.DEBUG) {
|
|
85
|
+
Logger.info(`getAllFiles: rootDir=${rootDir}, resolvedPath=${resolvedPath}`);
|
|
86
|
+
}
|
|
84
87
|
// Check if directory exists and is accessible
|
|
85
88
|
try {
|
|
86
89
|
const dirStats = await stat(resolvedPath);
|
|
@@ -88,7 +91,7 @@ export class FileUtils {
|
|
|
88
91
|
Logger.error(`getAllFiles: ${resolvedPath} is not a directory`);
|
|
89
92
|
return [];
|
|
90
93
|
}
|
|
91
|
-
|
|
94
|
+
// Directory exists - no need to log in production
|
|
92
95
|
}
|
|
93
96
|
catch (err) {
|
|
94
97
|
Logger.error(`getAllFiles: Cannot access directory ${resolvedPath}: ${err}`);
|
|
@@ -107,6 +110,9 @@ export class FileUtils {
|
|
|
107
110
|
'bower_components/**',
|
|
108
111
|
'**/bower_components/**',
|
|
109
112
|
'jspm_packages/**',
|
|
113
|
+
// PHP Composer dependencies
|
|
114
|
+
'vendor/**',
|
|
115
|
+
'**/vendor/**',
|
|
110
116
|
// Build/Generated output
|
|
111
117
|
'dist/**',
|
|
112
118
|
'**/dist/**',
|
|
@@ -168,7 +174,10 @@ export class FileUtils {
|
|
|
168
174
|
});
|
|
169
175
|
files.push(...matches);
|
|
170
176
|
}
|
|
171
|
-
|
|
177
|
+
// Verbose logging only in DEBUG mode
|
|
178
|
+
if (process.env.DEBUG) {
|
|
179
|
+
Logger.info(`getAllFiles: found ${files.length} files`);
|
|
180
|
+
}
|
|
172
181
|
// If no files found, list directory contents for debugging
|
|
173
182
|
if (files.length === 0) {
|
|
174
183
|
try {
|
|
@@ -207,12 +216,9 @@ export class FileUtils {
|
|
|
207
216
|
// Skip files we can't stat
|
|
208
217
|
}
|
|
209
218
|
}
|
|
210
|
-
|
|
211
|
-
if (
|
|
212
|
-
Logger.info(`
|
|
213
|
-
}
|
|
214
|
-
else if (filteredFiles.length > 20) {
|
|
215
|
-
Logger.info(`First 20 files: ${filteredFiles.slice(0, 20).join(', ')}`);
|
|
219
|
+
// Only log file list in DEBUG mode
|
|
220
|
+
if (process.env.DEBUG && filteredFiles.length > 0) {
|
|
221
|
+
Logger.info(`getAllFiles: returning ${filteredFiles.length} files`);
|
|
216
222
|
}
|
|
217
223
|
return filteredFiles;
|
|
218
224
|
}
|
package/dist/utils/logger.js
CHANGED
|
@@ -7,7 +7,8 @@ export var LogLevel;
|
|
|
7
7
|
LogLevel[LogLevel["ERROR"] = 3] = "ERROR";
|
|
8
8
|
})(LogLevel || (LogLevel = {}));
|
|
9
9
|
export class Logger {
|
|
10
|
-
|
|
10
|
+
// Default to WARN in production, INFO/DEBUG only when DEBUG env var is set
|
|
11
|
+
static level = process.env.DEBUG ? LogLevel.DEBUG : LogLevel.WARN;
|
|
11
12
|
static setLevel(level) {
|
|
12
13
|
this.level = level;
|
|
13
14
|
}
|