codecritique 1.2.2 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/content-retrieval.js +93 -153
- package/src/content-retrieval.test.js +49 -9
- package/src/custom-documents.js +17 -17
- package/src/feedback-loader.js +31 -31
- package/src/index.js +71 -94
- package/src/llm.js +4 -3
- package/src/project-analyzer.js +73 -41
- package/src/project-analyzer.test.js +3 -5
- package/src/rag-analyzer.js +189 -169
- package/src/rag-analyzer.test.js +55 -0
- package/src/rag-review.js +105 -74
- package/src/rag-review.test.js +115 -3
- package/src/zero-shot-classifier-open.js +3 -2
package/package.json
CHANGED
package/src/content-retrieval.js
CHANGED
|
@@ -23,7 +23,9 @@ import { calculateCosineSimilarity, calculatePathSimilarity } from './embeddings
|
|
|
23
23
|
import { inferContextFromDocumentContent } from './utils/context-inference.js';
|
|
24
24
|
import { isGenericDocument, getGenericDocumentContext } from './utils/document-detection.js';
|
|
25
25
|
import { isDocumentationFile } from './utils/file-validation.js';
|
|
26
|
-
import { debug } from './utils/logging.js';
|
|
26
|
+
import { debug, verboseLog } from './utils/logging.js';
|
|
27
|
+
import { isPathWithinProject } from './utils/path-utils.js';
|
|
28
|
+
import { escapeSqlString } from './utils/string-utils.js';
|
|
27
29
|
|
|
28
30
|
const FILE_EMBEDDINGS_TABLE = TABLE_NAMES.FILE_EMBEDDINGS;
|
|
29
31
|
const DOCUMENT_CHUNK_TABLE = TABLE_NAMES.DOCUMENT_CHUNK;
|
|
@@ -54,6 +56,58 @@ export class ContentRetriever {
|
|
|
54
56
|
this.cleaningUp = false;
|
|
55
57
|
}
|
|
56
58
|
|
|
59
|
+
resolveProjectResultPath(filePath, resolvedProjectPath) {
|
|
60
|
+
if (!filePath) {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const absolutePath = path.isAbsolute(filePath) ? path.resolve(filePath) : path.resolve(resolvedProjectPath, filePath);
|
|
65
|
+
return isPathWithinProject(absolutePath, resolvedProjectPath) ? absolutePath : null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async filterResultsForProject(results, resolvedProjectPath, getPath) {
|
|
69
|
+
const resultsToCheck = [];
|
|
70
|
+
const projectMatchMap = new Map();
|
|
71
|
+
|
|
72
|
+
for (let i = 0; i < results.length; i++) {
|
|
73
|
+
const result = results[i];
|
|
74
|
+
const resultPath = getPath(result);
|
|
75
|
+
|
|
76
|
+
if (result.project_path && result.project_path !== resolvedProjectPath) {
|
|
77
|
+
projectMatchMap.set(i, false);
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const absolutePath = this.resolveProjectResultPath(resultPath, resolvedProjectPath);
|
|
82
|
+
if (!absolutePath) {
|
|
83
|
+
projectMatchMap.set(i, false);
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
resultsToCheck.push({ index: i, absolutePath, resultPath });
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (resultsToCheck.length > 0) {
|
|
91
|
+
const existenceResults = await Promise.all(
|
|
92
|
+
resultsToCheck.map(async ({ index, absolutePath, resultPath }) => {
|
|
93
|
+
try {
|
|
94
|
+
await fs.promises.access(absolutePath, fs.constants.F_OK);
|
|
95
|
+
return { index, exists: true };
|
|
96
|
+
} catch {
|
|
97
|
+
debug(`Filtering out non-existent project file: ${resultPath}`);
|
|
98
|
+
return { index, exists: false };
|
|
99
|
+
}
|
|
100
|
+
})
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
for (const { index, exists } of existenceResults) {
|
|
104
|
+
projectMatchMap.set(index, exists);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return results.filter((result, index) => projectMatchMap.get(index) === true);
|
|
109
|
+
}
|
|
110
|
+
|
|
57
111
|
/**
|
|
58
112
|
* Find relevant documentation with sophisticated reranking
|
|
59
113
|
* @param {string} queryText - The search query
|
|
@@ -79,7 +133,8 @@ export class ContentRetriever {
|
|
|
79
133
|
return [];
|
|
80
134
|
}
|
|
81
135
|
|
|
82
|
-
|
|
136
|
+
verboseLog(
|
|
137
|
+
options,
|
|
83
138
|
chalk.cyan(`Native hybrid documentation search - limit: ${limit}, threshold: ${similarityThreshold}, reranking: ${useReranking}`)
|
|
84
139
|
);
|
|
85
140
|
|
|
@@ -91,14 +146,14 @@ export class ContentRetriever {
|
|
|
91
146
|
return [];
|
|
92
147
|
}
|
|
93
148
|
|
|
94
|
-
|
|
149
|
+
verboseLog(options, chalk.cyan('Performing native hybrid search for documentation...'));
|
|
95
150
|
let query = table.search(queryText).nearestToText(queryText);
|
|
96
151
|
|
|
97
152
|
const resolvedProjectPath = path.resolve(projectPath);
|
|
98
153
|
try {
|
|
99
154
|
const tableSchema = await table.schema;
|
|
100
155
|
if (tableSchema?.fields?.some((field) => field.name === 'project_path')) {
|
|
101
|
-
query = query.where(`project_path = '${resolvedProjectPath
|
|
156
|
+
query = query.where(`project_path = '${escapeSqlString(resolvedProjectPath)}'`);
|
|
102
157
|
debug(`Filtering documentation by project_path: ${resolvedProjectPath}`);
|
|
103
158
|
}
|
|
104
159
|
} catch (schemaError) {
|
|
@@ -106,69 +161,15 @@ export class ContentRetriever {
|
|
|
106
161
|
}
|
|
107
162
|
|
|
108
163
|
const results = await query.limit(Math.max(limit * 3, 20)).toArray();
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
// OPTIMIZATION: Enhanced batch file existence checks with parallel processing
|
|
112
|
-
const docsToCheck = [];
|
|
113
|
-
const docProjectMatchMap = new Map();
|
|
114
|
-
|
|
115
|
-
// First pass: collect files that need existence checking
|
|
116
|
-
for (let i = 0; i < results.length; i++) {
|
|
117
|
-
const result = results[i];
|
|
118
|
-
|
|
119
|
-
if (result.project_path) {
|
|
120
|
-
docProjectMatchMap.set(i, result.project_path === resolvedProjectPath);
|
|
121
|
-
continue;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
if (!result.original_document_path) {
|
|
125
|
-
docProjectMatchMap.set(i, false);
|
|
126
|
-
continue;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
const filePath = result.original_document_path;
|
|
130
|
-
try {
|
|
131
|
-
if (path.isAbsolute(filePath)) {
|
|
132
|
-
docProjectMatchMap.set(i, filePath.startsWith(resolvedProjectPath));
|
|
133
|
-
continue;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
const absolutePath = path.resolve(resolvedProjectPath, filePath);
|
|
137
|
-
if (absolutePath.startsWith(resolvedProjectPath)) {
|
|
138
|
-
// Mark for batch existence check
|
|
139
|
-
docsToCheck.push({ result, index: i, absolutePath, filePath });
|
|
140
|
-
} else {
|
|
141
|
-
docProjectMatchMap.set(i, false);
|
|
142
|
-
}
|
|
143
|
-
} catch (error) {
|
|
144
|
-
debug(`Error filtering result for project: ${error.message}`);
|
|
145
|
-
docProjectMatchMap.set(i, false);
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
// Enhanced batch check file existence with improved error handling
|
|
150
|
-
if (docsToCheck.length > 0) {
|
|
151
|
-
debug(`[OPTIMIZATION] Batch checking existence of ${docsToCheck.length} documentation files`);
|
|
152
|
-
const existencePromises = docsToCheck.map(async ({ index, absolutePath, filePath }) => {
|
|
153
|
-
try {
|
|
154
|
-
await fs.promises.access(absolutePath, fs.constants.F_OK);
|
|
155
|
-
return { index, exists: true };
|
|
156
|
-
} catch {
|
|
157
|
-
debug(`Filtering out non-existent documentation file: ${filePath}`);
|
|
158
|
-
return { index, exists: false };
|
|
159
|
-
}
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
const existenceResults = await Promise.all(existencePromises);
|
|
163
|
-
for (const { index, exists } of existenceResults) {
|
|
164
|
-
docProjectMatchMap.set(index, exists);
|
|
165
|
-
}
|
|
166
|
-
}
|
|
164
|
+
verboseLog(options, chalk.green(`Native hybrid search returned ${results.length} documentation results`));
|
|
167
165
|
|
|
168
|
-
|
|
169
|
-
|
|
166
|
+
const projectFilteredResults = await this.filterResultsForProject(
|
|
167
|
+
results,
|
|
168
|
+
resolvedProjectPath,
|
|
169
|
+
(result) => result.original_document_path
|
|
170
|
+
);
|
|
170
171
|
|
|
171
|
-
|
|
172
|
+
verboseLog(options, chalk.blue(`Filtered to ${projectFilteredResults.length} documentation results from current project`));
|
|
172
173
|
let finalResults = projectFilteredResults.map((result) => {
|
|
173
174
|
let similarity;
|
|
174
175
|
if (result._distance !== undefined) {
|
|
@@ -197,7 +198,7 @@ export class ContentRetriever {
|
|
|
197
198
|
|
|
198
199
|
let queryEmbedding = null;
|
|
199
200
|
if (useReranking && queryContextForReranking && finalResults.length >= 3) {
|
|
200
|
-
|
|
201
|
+
verboseLog(options, chalk.cyan('Applying sophisticated contextual reranking to documentation...'));
|
|
201
202
|
const WEIGHT_INITIAL_SIM = 0.3;
|
|
202
203
|
const WEIGHT_H1_CHUNK_RERANK = 0.15;
|
|
203
204
|
const HEAVY_BOOST_SAME_AREA = 0.4;
|
|
@@ -416,7 +417,7 @@ export class ContentRetriever {
|
|
|
416
417
|
finalResults = finalResults.slice(0, limit);
|
|
417
418
|
}
|
|
418
419
|
|
|
419
|
-
|
|
420
|
+
verboseLog(options, chalk.green(`Returning ${finalResults.length} documentation results`));
|
|
420
421
|
|
|
421
422
|
return finalResults;
|
|
422
423
|
} catch (error) {
|
|
@@ -443,7 +444,10 @@ export class ContentRetriever {
|
|
|
443
444
|
precomputedQueryEmbedding = null,
|
|
444
445
|
} = options;
|
|
445
446
|
|
|
446
|
-
|
|
447
|
+
verboseLog(
|
|
448
|
+
options,
|
|
449
|
+
chalk.cyan(`Native hybrid code search - limit: ${limit}, threshold: ${similarityThreshold}, isTestFile: ${isTestFile}`)
|
|
450
|
+
);
|
|
447
451
|
|
|
448
452
|
try {
|
|
449
453
|
if (!queryText?.trim()) {
|
|
@@ -460,7 +464,7 @@ export class ContentRetriever {
|
|
|
460
464
|
}
|
|
461
465
|
|
|
462
466
|
// Native hybrid search with automatic vector + FTS + RRF
|
|
463
|
-
|
|
467
|
+
verboseLog(options, chalk.cyan('Performing native hybrid search for code...'));
|
|
464
468
|
let query = table.search(queryText).nearestToText(queryText);
|
|
465
469
|
|
|
466
470
|
// Add filtering conditions
|
|
@@ -472,13 +476,13 @@ export class ContentRetriever {
|
|
|
472
476
|
if (isTestFile) {
|
|
473
477
|
// Only include test files
|
|
474
478
|
conditions.push(`(path LIKE '%.test.%' OR path LIKE '%.spec.%' OR path LIKE '%_test.py' OR path LIKE 'test_%.py')`);
|
|
475
|
-
|
|
479
|
+
verboseLog(options, chalk.blue(`Filtering to include only test files.`));
|
|
476
480
|
} else {
|
|
477
481
|
// Exclude test files
|
|
478
482
|
conditions.push(
|
|
479
483
|
`(path NOT LIKE '%.test.%' AND path NOT LIKE '%.spec.%' AND path NOT LIKE '%_test.py' AND path NOT LIKE 'test_%.py')`
|
|
480
484
|
);
|
|
481
|
-
|
|
485
|
+
verboseLog(options, chalk.blue(`Filtering to exclude test files.`));
|
|
482
486
|
}
|
|
483
487
|
}
|
|
484
488
|
|
|
@@ -489,13 +493,13 @@ export class ContentRetriever {
|
|
|
489
493
|
if (queryFilePath) {
|
|
490
494
|
const normalizedQueryPath = path.resolve(resolvedProjectPath, queryFilePath);
|
|
491
495
|
// Add condition to exclude the file being reviewed
|
|
492
|
-
const escapedPath = normalizedQueryPath
|
|
496
|
+
const escapedPath = escapeSqlString(normalizedQueryPath);
|
|
493
497
|
conditions.push(`path != '${escapedPath}'`);
|
|
494
498
|
|
|
495
499
|
// Also check for relative path variants to be thorough
|
|
496
500
|
const relativePath = path.relative(resolvedProjectPath, normalizedQueryPath);
|
|
497
501
|
if (relativePath && !relativePath.startsWith('..')) {
|
|
498
|
-
const escapedRelativePath = relativePath
|
|
502
|
+
const escapedRelativePath = escapeSqlString(relativePath);
|
|
499
503
|
conditions.push(`path != '${escapedRelativePath}'`);
|
|
500
504
|
}
|
|
501
505
|
|
|
@@ -511,7 +515,7 @@ export class ContentRetriever {
|
|
|
511
515
|
|
|
512
516
|
if (hasProjectPathField) {
|
|
513
517
|
// Use exact match for project path
|
|
514
|
-
conditions.push(`project_path = '${resolvedProjectPath
|
|
518
|
+
conditions.push(`project_path = '${escapeSqlString(resolvedProjectPath)}'`);
|
|
515
519
|
debug(`Filtering by project_path: ${resolvedProjectPath}`);
|
|
516
520
|
}
|
|
517
521
|
}
|
|
@@ -526,76 +530,15 @@ export class ContentRetriever {
|
|
|
526
530
|
|
|
527
531
|
const results = await query.limit(Math.max(limit * 3, 20)).toArray();
|
|
528
532
|
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
// OPTIMIZATION: Batch file existence checks for better performance
|
|
532
|
-
const resultsToCheck = [];
|
|
533
|
-
const projectMatchMap = new Map();
|
|
534
|
-
|
|
535
|
-
// First pass: collect files that need existence checking
|
|
536
|
-
for (let i = 0; i < results.length; i++) {
|
|
537
|
-
const result = results[i];
|
|
538
|
-
|
|
539
|
-
// Use project_path field if available (new schema)
|
|
540
|
-
if (result.project_path) {
|
|
541
|
-
projectMatchMap.set(i, result.project_path === resolvedProjectPath);
|
|
542
|
-
continue;
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
// Fallback for old embeddings without project_path field
|
|
546
|
-
if (!result.path && !result.original_document_path) {
|
|
547
|
-
projectMatchMap.set(i, false);
|
|
548
|
-
continue;
|
|
549
|
-
}
|
|
550
|
-
|
|
551
|
-
const filePath = result.original_document_path || result.path;
|
|
552
|
-
try {
|
|
553
|
-
// Check if this result belongs to the current project
|
|
554
|
-
// First try as absolute path
|
|
555
|
-
if (path.isAbsolute(filePath)) {
|
|
556
|
-
projectMatchMap.set(i, filePath.startsWith(resolvedProjectPath));
|
|
557
|
-
continue;
|
|
558
|
-
}
|
|
559
|
-
|
|
560
|
-
// For relative paths, check if the file actually exists in the project
|
|
561
|
-
const absolutePath = path.resolve(resolvedProjectPath, filePath);
|
|
562
|
-
|
|
563
|
-
// Verify the path is within project bounds
|
|
564
|
-
if (absolutePath.startsWith(resolvedProjectPath)) {
|
|
565
|
-
// Mark for batch existence check
|
|
566
|
-
resultsToCheck.push({ result, index: i, absolutePath });
|
|
567
|
-
} else {
|
|
568
|
-
projectMatchMap.set(i, false);
|
|
569
|
-
}
|
|
570
|
-
} catch (error) {
|
|
571
|
-
debug(`Error filtering result for project: ${error.message}`);
|
|
572
|
-
projectMatchMap.set(i, false);
|
|
573
|
-
}
|
|
574
|
-
}
|
|
575
|
-
|
|
576
|
-
// Batch check file existence for better performance
|
|
577
|
-
if (resultsToCheck.length > 0) {
|
|
578
|
-
debug(`[OPTIMIZATION] Batch checking existence of ${resultsToCheck.length} files`);
|
|
579
|
-
const existencePromises = resultsToCheck.map(async ({ result, index, absolutePath }) => {
|
|
580
|
-
try {
|
|
581
|
-
await fs.promises.access(absolutePath, fs.constants.F_OK);
|
|
582
|
-
return { index, exists: true };
|
|
583
|
-
} catch {
|
|
584
|
-
debug(`Filtering out non-existent file: ${result.original_document_path || result.path}`);
|
|
585
|
-
return { index, exists: false };
|
|
586
|
-
}
|
|
587
|
-
});
|
|
588
|
-
|
|
589
|
-
const existenceResults = await Promise.all(existencePromises);
|
|
590
|
-
for (const { index, exists } of existenceResults) {
|
|
591
|
-
projectMatchMap.set(index, exists);
|
|
592
|
-
}
|
|
593
|
-
}
|
|
533
|
+
verboseLog(options, chalk.green(`Native hybrid search returned ${results.length} results`));
|
|
594
534
|
|
|
595
|
-
|
|
596
|
-
|
|
535
|
+
const projectFilteredResults = await this.filterResultsForProject(
|
|
536
|
+
results,
|
|
537
|
+
resolvedProjectPath,
|
|
538
|
+
(result) => result.original_document_path || result.path
|
|
539
|
+
);
|
|
597
540
|
|
|
598
|
-
|
|
541
|
+
verboseLog(options, chalk.blue(`Filtered to ${projectFilteredResults.length} results from current project`));
|
|
599
542
|
|
|
600
543
|
// Map results to expected format
|
|
601
544
|
let finalResults = projectFilteredResults.map((result) => {
|
|
@@ -639,14 +582,11 @@ export class ContentRetriever {
|
|
|
639
582
|
try {
|
|
640
583
|
const fileTable = await this.database.getTable(FILE_EMBEDDINGS_TABLE);
|
|
641
584
|
if (fileTable) {
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
if (structureResults.length === 0) {
|
|
648
|
-
structureResults = await fileTable.query().where("id = '__project_structure__'").limit(1).toArray();
|
|
649
|
-
}
|
|
585
|
+
const structureResults = await fileTable
|
|
586
|
+
.query()
|
|
587
|
+
.where(`project_path = '${escapeSqlString(resolvedProjectPath)}' AND type = 'directory-structure'`)
|
|
588
|
+
.limit(1)
|
|
589
|
+
.toArray();
|
|
650
590
|
|
|
651
591
|
if (structureResults.length > 0) {
|
|
652
592
|
const structureRecord = structureResults[0];
|
|
@@ -683,7 +623,7 @@ export class ContentRetriever {
|
|
|
683
623
|
finalResults = finalResults.slice(0, limit);
|
|
684
624
|
}
|
|
685
625
|
|
|
686
|
-
|
|
626
|
+
verboseLog(options, chalk.green(`Returning ${finalResults.length} optimized hybrid search results`));
|
|
687
627
|
return finalResults;
|
|
688
628
|
} catch (error) {
|
|
689
629
|
console.error(chalk.red(`Error in optimized findSimilarCode: ${error.message}`), error);
|
|
@@ -712,7 +652,7 @@ export class ContentRetriever {
|
|
|
712
652
|
this.h1EmbeddingCache.clear();
|
|
713
653
|
this.documentContextCache.clear();
|
|
714
654
|
this.documentContextPromiseCache.clear();
|
|
715
|
-
|
|
655
|
+
verboseLog({}, chalk.green('ContentRetriever caches cleared'));
|
|
716
656
|
}
|
|
717
657
|
|
|
718
658
|
/**
|
|
@@ -739,7 +679,7 @@ export class ContentRetriever {
|
|
|
739
679
|
parallelRerankingTime: 0,
|
|
740
680
|
};
|
|
741
681
|
|
|
742
|
-
|
|
682
|
+
verboseLog({}, chalk.green('ContentRetriever cleanup complete'));
|
|
743
683
|
} finally {
|
|
744
684
|
this.cleaningUp = false;
|
|
745
685
|
}
|
|
@@ -459,6 +459,14 @@ describe('ContentRetriever', () => {
|
|
|
459
459
|
const results = await retriever.findRelevantDocs('query', { projectPath: '/project' });
|
|
460
460
|
expect(results.length).toBe(0);
|
|
461
461
|
});
|
|
462
|
+
|
|
463
|
+
it('should reject sibling project absolute paths for documentation', async () => {
|
|
464
|
+
mockTable.toArray.mockResolvedValue([
|
|
465
|
+
createMockDocResult({ project_path: null, original_document_path: '/project-old/docs/readme.md' }),
|
|
466
|
+
]);
|
|
467
|
+
const results = await retriever.findRelevantDocs('query', { projectPath: '/project' });
|
|
468
|
+
expect(results).toHaveLength(0);
|
|
469
|
+
});
|
|
462
470
|
});
|
|
463
471
|
|
|
464
472
|
// ==========================================================================
|
|
@@ -498,6 +506,12 @@ describe('ContentRetriever', () => {
|
|
|
498
506
|
expect(results.length).toBe(0);
|
|
499
507
|
});
|
|
500
508
|
|
|
509
|
+
it('should reject sibling project absolute paths for code results', async () => {
|
|
510
|
+
mockTable.toArray.mockResolvedValue([createMockCodeResult({ project_path: null, path: '/project-old/src/file.js' })]);
|
|
511
|
+
const results = await retriever.findSimilarCode('query', { projectPath: '/project', similarityThreshold: 0 });
|
|
512
|
+
expect(results).toHaveLength(0);
|
|
513
|
+
});
|
|
514
|
+
|
|
501
515
|
it('should handle schema check errors', async () => {
|
|
502
516
|
mockTable.schema = null;
|
|
503
517
|
mockTable.toArray.mockResolvedValue([createMockCodeResult()]);
|
|
@@ -511,20 +525,31 @@ describe('ContentRetriever', () => {
|
|
|
511
525
|
// ==========================================================================
|
|
512
526
|
|
|
513
527
|
describe('project structure inclusion', () => {
|
|
514
|
-
it('should
|
|
528
|
+
it('should include only project-scoped structure rows', async () => {
|
|
515
529
|
mockTable.toArray.mockResolvedValue([createMockCodeResult()]);
|
|
516
|
-
|
|
530
|
+
const queryChain = {
|
|
517
531
|
where: vi.fn().mockReturnThis(),
|
|
518
532
|
limit: vi.fn().mockReturnThis(),
|
|
519
|
-
toArray: vi
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
533
|
+
toArray: vi.fn().mockResolvedValue([
|
|
534
|
+
{
|
|
535
|
+
id: '__project_structure__#abc12345',
|
|
536
|
+
content: 'Project structure',
|
|
537
|
+
path: '.',
|
|
538
|
+
project_path: '/project',
|
|
539
|
+
type: 'directory-structure',
|
|
540
|
+
vector: new Float32Array(384).fill(0.1),
|
|
541
|
+
},
|
|
542
|
+
]),
|
|
543
|
+
};
|
|
544
|
+
mockTable.query.mockReturnValue(queryChain);
|
|
545
|
+
const results = await retriever.findSimilarCode('query', {
|
|
546
|
+
includeProjectStructure: true,
|
|
547
|
+
similarityThreshold: 0,
|
|
548
|
+
projectPath: '/project',
|
|
525
549
|
});
|
|
526
|
-
const results = await retriever.findSimilarCode('query', { includeProjectStructure: true, similarityThreshold: 0 });
|
|
527
550
|
expect(results.some((r) => r.type === 'project-structure')).toBe(true);
|
|
551
|
+
expect(queryChain.where).toHaveBeenCalledWith(expect.stringContaining("type = 'directory-structure'"));
|
|
552
|
+
expect(queryChain.where).toHaveBeenCalledWith(expect.stringContaining("project_path = '/project'"));
|
|
528
553
|
});
|
|
529
554
|
|
|
530
555
|
it('should handle project structure inclusion errors', async () => {
|
|
@@ -539,6 +564,21 @@ describe('ContentRetriever', () => {
|
|
|
539
564
|
expect(console.warn).toHaveBeenCalledWith(expect.stringContaining('Project structure inclusion failed'));
|
|
540
565
|
});
|
|
541
566
|
|
|
567
|
+
it('should skip project structure rows from another project', async () => {
|
|
568
|
+
mockTable.toArray.mockResolvedValue([createMockCodeResult()]);
|
|
569
|
+
mockTable.query.mockReturnValue({
|
|
570
|
+
where: vi.fn().mockReturnThis(),
|
|
571
|
+
limit: vi.fn().mockReturnThis(),
|
|
572
|
+
toArray: vi.fn().mockResolvedValue([]),
|
|
573
|
+
});
|
|
574
|
+
const results = await retriever.findSimilarCode('query', {
|
|
575
|
+
includeProjectStructure: true,
|
|
576
|
+
similarityThreshold: 0,
|
|
577
|
+
projectPath: '/project',
|
|
578
|
+
});
|
|
579
|
+
expect(results.some((r) => r.type === 'project-structure')).toBe(false);
|
|
580
|
+
});
|
|
581
|
+
|
|
542
582
|
it('should skip structure when similarity is too low', async () => {
|
|
543
583
|
mockTable.toArray.mockResolvedValue([createMockCodeResult()]);
|
|
544
584
|
mockTable.query.mockReturnValue({
|
package/src/custom-documents.js
CHANGED
|
@@ -18,7 +18,7 @@ import { CacheManager } from './embeddings/cache-manager.js';
|
|
|
18
18
|
import { EmbeddingError, ValidationError } from './embeddings/errors.js';
|
|
19
19
|
import { ModelManager } from './embeddings/model-manager.js';
|
|
20
20
|
import { calculateCosineSimilarity, calculatePathSimilarity } from './embeddings/similarity-calculator.js';
|
|
21
|
-
import { debug } from './utils/logging.js';
|
|
21
|
+
import { debug, verboseLog } from './utils/logging.js';
|
|
22
22
|
import { slugify } from './utils/string-utils.js';
|
|
23
23
|
|
|
24
24
|
/**
|
|
@@ -140,7 +140,7 @@ export class CustomDocumentProcessor {
|
|
|
140
140
|
this.performanceMetrics.averageChunkSize = chunks.reduce((sum, chunk) => sum + chunk.content.length, 0) / chunks.length;
|
|
141
141
|
this.performanceMetrics.processingTime += Date.now() - startTime;
|
|
142
142
|
|
|
143
|
-
|
|
143
|
+
verboseLog({}, chalk.gray(` Chunked document "${documentTitle}" into ${chunks.length} chunks`));
|
|
144
144
|
return chunks;
|
|
145
145
|
} catch (error) {
|
|
146
146
|
console.error(chalk.red(`Error chunking document: ${error.message}`));
|
|
@@ -159,18 +159,18 @@ export class CustomDocumentProcessor {
|
|
|
159
159
|
|
|
160
160
|
try {
|
|
161
161
|
if (!customDocs || customDocs.length === 0) {
|
|
162
|
-
|
|
162
|
+
verboseLog({}, chalk.gray('No custom documents to process'));
|
|
163
163
|
return [];
|
|
164
164
|
}
|
|
165
165
|
|
|
166
|
-
|
|
166
|
+
verboseLog({}, chalk.cyan(`Processing ${customDocs.length} custom documents into chunks...`));
|
|
167
167
|
|
|
168
168
|
const allChunks = [];
|
|
169
169
|
let totalBatchAttempts = 0;
|
|
170
170
|
let successfulBatches = 0;
|
|
171
171
|
|
|
172
172
|
for (const doc of customDocs) {
|
|
173
|
-
|
|
173
|
+
verboseLog({}, chalk.gray(` Processing document: ${doc.title}`));
|
|
174
174
|
|
|
175
175
|
// Chunk the document
|
|
176
176
|
const chunks = this.chunkDocument(doc);
|
|
@@ -205,12 +205,12 @@ export class CustomDocumentProcessor {
|
|
|
205
205
|
const validChunks = chunksWithEmbeddings.filter((chunk) => chunk !== null);
|
|
206
206
|
allChunks.push(...validChunks);
|
|
207
207
|
|
|
208
|
-
|
|
208
|
+
verboseLog({}, chalk.gray(` Generated embeddings for ${validChunks.length}/${chunks.length} chunks`));
|
|
209
209
|
this.performanceMetrics.embeddingsCalculated += validChunks.length;
|
|
210
210
|
} catch (error) {
|
|
211
211
|
console.error(chalk.red(`Error in batch embedding generation for document ${doc.title}: ${error.message}`));
|
|
212
212
|
// Fallback to individual processing for this document
|
|
213
|
-
|
|
213
|
+
verboseLog({}, chalk.yellow(` Falling back to individual processing for ${doc.title}`));
|
|
214
214
|
|
|
215
215
|
const chunksWithEmbeddings = await Promise.all(
|
|
216
216
|
chunks.map(async (chunk) => {
|
|
@@ -235,7 +235,7 @@ export class CustomDocumentProcessor {
|
|
|
235
235
|
const validChunks = chunksWithEmbeddings.filter((chunk) => chunk !== null);
|
|
236
236
|
allChunks.push(...validChunks);
|
|
237
237
|
|
|
238
|
-
|
|
238
|
+
verboseLog({}, chalk.gray(` Generated embeddings for ${validChunks.length}/${chunks.length} chunks (fallback)`));
|
|
239
239
|
}
|
|
240
240
|
}
|
|
241
241
|
|
|
@@ -252,7 +252,7 @@ export class CustomDocumentProcessor {
|
|
|
252
252
|
this.performanceMetrics.documentsProcessed += customDocs.length;
|
|
253
253
|
this.performanceMetrics.processingTime += Date.now() - startTime;
|
|
254
254
|
|
|
255
|
-
|
|
255
|
+
verboseLog({}, chalk.green(`Successfully processed ${allChunks.length} custom document chunks (${Date.now() - startTime}ms)`));
|
|
256
256
|
return allChunks;
|
|
257
257
|
} catch (error) {
|
|
258
258
|
console.error(chalk.red(`Error processing custom documents: ${error.message}`));
|
|
@@ -285,11 +285,11 @@ export class CustomDocumentProcessor {
|
|
|
285
285
|
}
|
|
286
286
|
|
|
287
287
|
if (!chunks || chunks.length === 0) {
|
|
288
|
-
|
|
288
|
+
verboseLog({}, chalk.gray('No custom document chunks available for search'));
|
|
289
289
|
return [];
|
|
290
290
|
}
|
|
291
291
|
|
|
292
|
-
|
|
292
|
+
verboseLog({}, chalk.cyan(`Searching ${chunks.length} custom document chunks...`));
|
|
293
293
|
|
|
294
294
|
// OPTIMIZATION: Use pre-computed query embedding if available
|
|
295
295
|
let queryEmbedding = precomputedQueryEmbedding;
|
|
@@ -319,7 +319,7 @@ export class CustomDocumentProcessor {
|
|
|
319
319
|
filteredResults = filteredResults.slice(0, limit);
|
|
320
320
|
}
|
|
321
321
|
|
|
322
|
-
|
|
322
|
+
verboseLog({}, chalk.green(`Found ${filteredResults.length} relevant custom document chunks (${Date.now() - startTime}ms)`));
|
|
323
323
|
|
|
324
324
|
// Log top results for debugging
|
|
325
325
|
if (filteredResults.length > 0) {
|
|
@@ -371,7 +371,7 @@ export class CustomDocumentProcessor {
|
|
|
371
371
|
* @private
|
|
372
372
|
*/
|
|
373
373
|
async _applyParallelReranking(filteredResults, queryText, queryContextForReranking, queryFilePath, queryEmbedding) {
|
|
374
|
-
|
|
374
|
+
verboseLog({}, chalk.cyan('Applying optimized parallel contextual reranking to custom document chunks...'));
|
|
375
375
|
|
|
376
376
|
const WEIGHT_INITIAL_SIM = 0.4;
|
|
377
377
|
const WEIGHT_DOCUMENT_TITLE_MATCH = 0.2;
|
|
@@ -466,7 +466,7 @@ export class CustomDocumentProcessor {
|
|
|
466
466
|
// Wait for all reranking calculations to complete in parallel
|
|
467
467
|
await Promise.all(rerankingPromises);
|
|
468
468
|
|
|
469
|
-
|
|
469
|
+
verboseLog({}, chalk.cyan(`Parallel reranking completed for ${filteredResults.length} chunks`));
|
|
470
470
|
|
|
471
471
|
// Log debug info for first few results
|
|
472
472
|
for (let i = 0; i < Math.min(3, filteredResults.length); i++) {
|
|
@@ -521,7 +521,7 @@ export class CustomDocumentProcessor {
|
|
|
521
521
|
const resolvedProjectPath = path.resolve(projectPath);
|
|
522
522
|
this.customDocumentChunks.delete(resolvedProjectPath);
|
|
523
523
|
this.cacheManager.customDocumentChunks.delete(resolvedProjectPath);
|
|
524
|
-
|
|
524
|
+
verboseLog({}, chalk.green(`Cleared custom document chunks for project: ${resolvedProjectPath}`));
|
|
525
525
|
} catch (error) {
|
|
526
526
|
console.error(chalk.red(`Error clearing project chunks: ${error.message}`));
|
|
527
527
|
}
|
|
@@ -561,7 +561,7 @@ export class CustomDocumentProcessor {
|
|
|
561
561
|
clearCaches() {
|
|
562
562
|
this.h1EmbeddingCache.clear();
|
|
563
563
|
this.customDocumentChunks.clear();
|
|
564
|
-
|
|
564
|
+
verboseLog({}, chalk.green('CustomDocumentProcessor caches cleared'));
|
|
565
565
|
}
|
|
566
566
|
|
|
567
567
|
/**
|
|
@@ -589,7 +589,7 @@ export class CustomDocumentProcessor {
|
|
|
589
589
|
processingTime: 0,
|
|
590
590
|
};
|
|
591
591
|
|
|
592
|
-
|
|
592
|
+
verboseLog({}, chalk.green('CustomDocumentProcessor cleanup complete'));
|
|
593
593
|
} finally {
|
|
594
594
|
this.cleaningUp = false;
|
|
595
595
|
}
|