codecritique 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +82 -114
  2. package/package.json +10 -9
  3. package/src/content-retrieval.test.js +775 -0
  4. package/src/custom-documents.test.js +440 -0
  5. package/src/feedback-loader.test.js +529 -0
  6. package/src/llm.test.js +256 -0
  7. package/src/project-analyzer.test.js +747 -0
  8. package/src/rag-analyzer.js +12 -0
  9. package/src/rag-analyzer.test.js +1109 -0
  10. package/src/rag-review.test.js +317 -0
  11. package/src/setupTests.js +131 -0
  12. package/src/zero-shot-classifier-open.test.js +278 -0
  13. package/src/embeddings/cache-manager.js +0 -364
  14. package/src/embeddings/constants.js +0 -40
  15. package/src/embeddings/database.js +0 -921
  16. package/src/embeddings/errors.js +0 -208
  17. package/src/embeddings/factory.js +0 -447
  18. package/src/embeddings/file-processor.js +0 -851
  19. package/src/embeddings/model-manager.js +0 -337
  20. package/src/embeddings/similarity-calculator.js +0 -97
  21. package/src/embeddings/types.js +0 -113
  22. package/src/pr-history/analyzer.js +0 -579
  23. package/src/pr-history/bot-detector.js +0 -123
  24. package/src/pr-history/cli-utils.js +0 -204
  25. package/src/pr-history/comment-processor.js +0 -549
  26. package/src/pr-history/database.js +0 -819
  27. package/src/pr-history/github-client.js +0 -629
  28. package/src/technology-keywords.json +0 -753
  29. package/src/utils/command.js +0 -48
  30. package/src/utils/constants.js +0 -263
  31. package/src/utils/context-inference.js +0 -364
  32. package/src/utils/document-detection.js +0 -105
  33. package/src/utils/file-validation.js +0 -271
  34. package/src/utils/git.js +0 -232
  35. package/src/utils/language-detection.js +0 -170
  36. package/src/utils/logging.js +0 -24
  37. package/src/utils/markdown.js +0 -132
  38. package/src/utils/mobilebert-tokenizer.js +0 -141
  39. package/src/utils/pr-chunking.js +0 -276
  40. package/src/utils/string-utils.js +0 -28
@@ -0,0 +1,775 @@
1
+ import fs from 'node:fs';
2
+ import { ContentRetriever } from './content-retrieval.js';
3
+ import { createMockTable, createMockDatabaseManager, createMockModelManager } from './test-utils/fixtures.js';
4
+
5
+ vi.mock('./embeddings/model-manager.js', () => ({
6
+ ModelManager: class {
7
+ calculateQueryEmbedding = vi.fn().mockResolvedValue(createMockEmbedding());
8
+ calculateEmbeddingBatch = vi.fn().mockResolvedValue([createMockEmbedding()]);
9
+ },
10
+ }));
11
+
12
+ vi.mock('./embeddings/database.js', () => ({
13
+ DatabaseManager: class {
14
+ connect = vi.fn().mockResolvedValue({});
15
+ getTable = vi.fn();
16
+ },
17
+ }));
18
+
19
+ vi.mock('./embeddings/cache-manager.js', () => ({
20
+ CacheManager: class {},
21
+ }));
22
+
23
+ vi.mock('./utils/context-inference.js', () => ({
24
+ inferContextFromDocumentContent: vi.fn().mockResolvedValue({
25
+ area: 'Frontend',
26
+ dominantTech: ['React'],
27
+ isGeneralPurposeReadmeStyle: false,
28
+ }),
29
+ }));
30
+
31
+ vi.mock('./utils/document-detection.js', () => ({
32
+ isGenericDocument: vi.fn().mockReturnValue(false),
33
+ getGenericDocumentContext: vi.fn().mockReturnValue({
34
+ area: 'General',
35
+ dominantTech: [],
36
+ isGeneralPurposeReadmeStyle: true,
37
+ }),
38
+ }));
39
+
40
+ vi.mock('./utils/file-validation.js', () => ({
41
+ isDocumentationFile: vi.fn().mockReturnValue(false),
42
+ }));
43
+
44
+ vi.mock('node:fs', async (importOriginal) => {
45
+ const original = await importOriginal();
46
+ return {
47
+ ...original,
48
+ default: {
49
+ ...original,
50
+ promises: { access: vi.fn() },
51
+ },
52
+ promises: { access: vi.fn() },
53
+ };
54
+ });
55
+
56
+ // ============================================================================
57
+ // Helper Functions
58
+ // ============================================================================
59
+
60
+ const createMockDocResult = (overrides = {}) => ({
61
+ content: 'Documentation content',
62
+ original_document_path: 'docs/api.md',
63
+ project_path: process.cwd(),
64
+ _distance: 0.1,
65
+ heading_text: 'API Reference',
66
+ document_title: 'API Documentation',
67
+ language: 'markdown',
68
+ ...overrides,
69
+ });
70
+
71
+ const createMockCodeResult = (overrides = {}) => ({
72
+ content: 'function test() {}',
73
+ path: 'src/utils.js',
74
+ project_path: process.cwd(),
75
+ _distance: 0.1,
76
+ ...overrides,
77
+ });
78
+
79
+ // ============================================================================
80
+ // Tests
81
+ // ============================================================================
82
+
83
+ describe('ContentRetriever', () => {
84
+ let retriever;
85
+ let mockTable;
86
+ let mockDatabase;
87
+ let mockModelManager;
88
+
89
+ beforeEach(() => {
90
+ mockConsole();
91
+ mockTable = createMockTable({
92
+ search: vi.fn().mockReturnThis(),
93
+ nearestToText: vi.fn().mockReturnThis(),
94
+ where: vi.fn().mockReturnThis(),
95
+ limit: vi.fn().mockReturnThis(),
96
+ query: vi.fn().mockReturnThis(),
97
+ toArray: vi.fn().mockResolvedValue([]),
98
+ schema: { fields: [{ name: 'project_path' }] },
99
+ });
100
+ mockDatabase = createMockDatabaseManager(mockTable);
101
+ mockModelManager = createMockModelManager();
102
+ retriever = new ContentRetriever({ database: mockDatabase, modelManager: mockModelManager });
103
+ fs.promises.access.mockResolvedValue(undefined);
104
+ });
105
+
106
+ // ==========================================================================
107
+ // Constructor
108
+ // ==========================================================================
109
+
110
+ describe('constructor', () => {
111
+ it('should initialize with default options', () => {
112
+ const r = new ContentRetriever();
113
+ expect(r.h1EmbeddingCache).toBeInstanceOf(Map);
114
+ expect(r.documentContextCache).toBeInstanceOf(Map);
115
+ expect(r.performanceMetrics.searchCount).toBe(0);
116
+ });
117
+
118
+ it('should accept custom dependencies', () => {
119
+ const r = new ContentRetriever({ database: mockDatabase, modelManager: mockModelManager });
120
+ expect(r.database).toBe(mockDatabase);
121
+ expect(r.modelManager).toBe(mockModelManager);
122
+ });
123
+ });
124
+
125
+ // ==========================================================================
126
+ // findRelevantDocs - Basic
127
+ // ==========================================================================
128
+
129
+ describe('findRelevantDocs', () => {
130
+ it.each([
131
+ ['empty query', ''],
132
+ ['whitespace query', ' '],
133
+ ])('should return empty array for %s', async (_, query) => {
134
+ const results = await retriever.findRelevantDocs(query);
135
+ expect(results).toEqual([]);
136
+ });
137
+
138
+ it('should return empty array when table not found', async () => {
139
+ mockDatabase.getTable.mockResolvedValue(null);
140
+ const results = await retriever.findRelevantDocs('test query');
141
+ expect(results).toEqual([]);
142
+ expect(console.warn).toHaveBeenCalled();
143
+ });
144
+
145
+ it('should perform hybrid search on documentation table', async () => {
146
+ mockTable.toArray.mockResolvedValue([createMockDocResult()]);
147
+ const results = await retriever.findRelevantDocs('API documentation');
148
+ expect(mockTable.search).toHaveBeenCalledWith('API documentation');
149
+ expect(results.length).toBeGreaterThanOrEqual(0);
150
+ });
151
+
152
+ it('should filter by similarity threshold', async () => {
153
+ mockTable.toArray.mockResolvedValue([createMockDocResult({ _distance: 0.1 }), createMockDocResult({ _distance: 0.9 })]);
154
+ const results = await retriever.findRelevantDocs('query', { similarityThreshold: 0.5 });
155
+ expect(results.every((r) => r.similarity >= 0.5)).toBe(true);
156
+ });
157
+
158
+ it('should limit results', async () => {
159
+ mockTable.toArray.mockResolvedValue(
160
+ Array.from({ length: 20 }, (_, i) => createMockDocResult({ content: `Doc ${i}`, _distance: 0.1 + i * 0.01 }))
161
+ );
162
+ const results = await retriever.findRelevantDocs('query', { limit: 5 });
163
+ expect(results.length).toBeLessThanOrEqual(5);
164
+ });
165
+
166
+ it('should filter results by project path', async () => {
167
+ const projectPath = '/test/project';
168
+ mockTable.toArray.mockResolvedValue([
169
+ createMockDocResult({ project_path: projectPath }),
170
+ createMockDocResult({ project_path: '/other/project' }),
171
+ ]);
172
+ const results = await retriever.findRelevantDocs('query', { projectPath });
173
+ expect(results.every((r) => r.path !== '/other/project')).toBe(true);
174
+ });
175
+
176
+ it('should map results to expected format', async () => {
177
+ mockTable.toArray.mockResolvedValue([createMockDocResult()]);
178
+ const results = await retriever.findRelevantDocs('query');
179
+ expect(results[0]).toMatchObject({
180
+ type: 'documentation-chunk',
181
+ content: 'Documentation content',
182
+ path: 'docs/api.md',
183
+ headingText: 'API Reference',
184
+ document_title: 'API Documentation',
185
+ });
186
+ });
187
+
188
+ it('should increment performance metrics', async () => {
189
+ await retriever.findRelevantDocs('query');
190
+ expect(retriever.performanceMetrics.searchCount).toBe(1);
191
+ });
192
+ });
193
+
194
+ // ==========================================================================
195
+ // findSimilarCode - Basic
196
+ // ==========================================================================
197
+
198
+ describe('findSimilarCode', () => {
199
+ it('should return empty array for empty query', async () => {
200
+ const results = await retriever.findSimilarCode('');
201
+ expect(results).toEqual([]);
202
+ });
203
+
204
+ it('should return empty array when table not found', async () => {
205
+ mockDatabase.getTable.mockResolvedValue(null);
206
+ const results = await retriever.findSimilarCode('test query');
207
+ expect(results).toEqual([]);
208
+ });
209
+
210
+ it('should perform hybrid search on file embeddings table', async () => {
211
+ mockTable.toArray.mockResolvedValue([createMockCodeResult()]);
212
+ const results = await retriever.findSimilarCode('test function');
213
+ expect(mockTable.search).toHaveBeenCalledWith('test function');
214
+ expect(results.length).toBeGreaterThanOrEqual(0);
215
+ });
216
+
217
+ it('should exclude directory-structure from results', async () => {
218
+ mockTable.toArray.mockResolvedValue([createMockCodeResult()]);
219
+ await retriever.findSimilarCode('query');
220
+ expect(mockTable.where).toHaveBeenCalledWith(expect.stringContaining("type != 'directory-structure'"));
221
+ });
222
+
223
+ it('should filter for test files when isTestFile is true', async () => {
224
+ await retriever.findSimilarCode('query', { isTestFile: true });
225
+ expect(mockTable.where).toHaveBeenCalledWith(expect.stringContaining('.test.'));
226
+ });
227
+
228
+ it('should exclude test files when isTestFile is false', async () => {
229
+ await retriever.findSimilarCode('query', { isTestFile: false });
230
+ expect(mockTable.where).toHaveBeenCalledWith(expect.stringContaining('NOT LIKE'));
231
+ });
232
+
233
+ it('should exclude the file being reviewed', async () => {
234
+ await retriever.findSimilarCode('query', { queryFilePath: 'src/current-file.js', projectPath: '/project' });
235
+ expect(mockTable.where).toHaveBeenCalledWith(expect.stringContaining('current-file'));
236
+ });
237
+
238
+ it('should filter by project path', async () => {
239
+ const projectPath = '/test/project';
240
+ mockTable.toArray.mockResolvedValue([
241
+ createMockCodeResult({ project_path: projectPath }),
242
+ createMockCodeResult({ project_path: '/other', path: 'src/other.js' }),
243
+ ]);
244
+ const results = await retriever.findSimilarCode('query', { projectPath });
245
+ expect(results.some((r) => r.path === 'src/other.js')).toBe(false);
246
+ });
247
+
248
+ it('should include project structure when requested', async () => {
249
+ mockTable.toArray.mockResolvedValue([createMockCodeResult()]);
250
+ mockTable.query.mockReturnValue({
251
+ where: vi.fn().mockReturnThis(),
252
+ limit: vi.fn().mockReturnThis(),
253
+ toArray: vi
254
+ .fn()
255
+ .mockResolvedValue([
256
+ { id: '__project_structure__', content: 'Project structure', path: '.', vector: new Float32Array(384).fill(0.1) },
257
+ ]),
258
+ });
259
+ const results = await retriever.findSimilarCode('query', { includeProjectStructure: true });
260
+ expect(results.some((r) => r.type === 'project-structure')).toBe(true);
261
+ });
262
+
263
+ it('should handle errors gracefully', async () => {
264
+ mockDatabase.getTable.mockRejectedValue(new Error('Database error'));
265
+ const results = await retriever.findSimilarCode('query');
266
+ expect(results).toEqual([]);
267
+ expect(console.error).toHaveBeenCalled();
268
+ });
269
+ });
270
+
271
+ // ==========================================================================
272
+ // Performance Metrics & Cleanup
273
+ // ==========================================================================
274
+
275
+ describe('getPerformanceMetrics', () => {
276
+ it('should return performance metrics', () => {
277
+ const metrics = retriever.getPerformanceMetrics();
278
+ expect(metrics).toHaveProperty('searchCount');
279
+ expect(metrics).toHaveProperty('totalSearchTime');
280
+ expect(metrics).toHaveProperty('cacheSize');
281
+ expect(metrics).toHaveProperty('documentContextCacheSize');
282
+ });
283
+
284
+ it('should calculate average search time', async () => {
285
+ await retriever.findRelevantDocs('query1');
286
+ await retriever.findRelevantDocs('query2');
287
+ const metrics = retriever.getPerformanceMetrics();
288
+ expect(metrics.searchCount).toBe(2);
289
+ });
290
+ });
291
+
292
+ describe('clearCaches', () => {
293
+ it('should clear all caches', () => {
294
+ retriever.h1EmbeddingCache.set('key1', 'value1');
295
+ retriever.documentContextCache.set('key2', 'value2');
296
+ retriever.clearCaches();
297
+ expect(retriever.h1EmbeddingCache.size).toBe(0);
298
+ expect(retriever.documentContextCache.size).toBe(0);
299
+ });
300
+ });
301
+
302
+ describe('cleanup', () => {
303
+ it('should clear caches and reset metrics', async () => {
304
+ retriever.h1EmbeddingCache.set('key', 'value');
305
+ retriever.performanceMetrics.searchCount = 10;
306
+ await retriever.cleanup();
307
+ expect(retriever.h1EmbeddingCache.size).toBe(0);
308
+ expect(retriever.performanceMetrics.searchCount).toBe(0);
309
+ });
310
+
311
+ it('should prevent duplicate cleanup calls', async () => {
312
+ retriever.cleaningUp = true;
313
+ retriever.h1EmbeddingCache.set('key', 'value');
314
+ await retriever.cleanup();
315
+ expect(retriever.h1EmbeddingCache.size).toBe(1);
316
+ });
317
+
318
+ it('should reset cleaningUp flag after completion', async () => {
319
+ await retriever.cleanup();
320
+ expect(retriever.cleaningUp).toBe(false);
321
+ });
322
+ });
323
+
324
+ // ==========================================================================
325
+ // Similarity Calculation
326
+ // ==========================================================================
327
+
328
+ describe('similarity score calculation', () => {
329
+ it('should calculate similarity from distance 0', async () => {
330
+ mockTable.toArray.mockResolvedValue([createMockCodeResult({ _distance: 0 })]);
331
+ const results = await retriever.findSimilarCode('query', { similarityThreshold: 0 });
332
+ expect(results[0].similarity).toBeGreaterThan(0.9);
333
+ });
334
+
335
+ it('should calculate similarity from _score', async () => {
336
+ mockTable.toArray.mockResolvedValue([{ content: 'test', path: 'src/test.js', project_path: process.cwd(), _score: 0.9 }]);
337
+ const results = await retriever.findSimilarCode('query', { similarityThreshold: 0 });
338
+ expect(results[0].similarity).toBe(0.9);
339
+ });
340
+
341
+ it('should use fallback when no score/distance', async () => {
342
+ mockTable.toArray.mockResolvedValue([{ content: 'test', path: 'src/test.js', project_path: process.cwd() }]);
343
+ const results = await retriever.findSimilarCode('query', { similarityThreshold: 0 });
344
+ expect(results[0].similarity).toBe(0.5);
345
+ });
346
+ });
347
+
348
+ // ==========================================================================
349
+ // Reranking
350
+ // ==========================================================================
351
+
352
+ describe('findRelevantDocs with reranking', () => {
353
+ it('should apply reranking when enabled with context', async () => {
354
+ mockTable.toArray.mockResolvedValue([
355
+ createMockDocResult({ content: 'Doc 1', heading_text: 'API' }),
356
+ createMockDocResult({ content: 'Doc 2', original_document_path: 'docs/guide.md', _distance: 0.2 }),
357
+ createMockDocResult({ content: 'Doc 3', original_document_path: 'docs/faq.md', _distance: 0.3 }),
358
+ ]);
359
+ const results = await retriever.findRelevantDocs('API usage', {
360
+ useReranking: true,
361
+ queryContextForReranking: { area: 'Frontend', dominantTech: ['React'] },
362
+ });
363
+ expect(Array.isArray(results)).toBe(true);
364
+ });
365
+
366
+ it('should skip reranking when disabled', async () => {
367
+ mockTable.toArray.mockResolvedValue([createMockDocResult()]);
368
+ const results = await retriever.findRelevantDocs('query', { useReranking: false });
369
+ expect(results.length).toBeGreaterThanOrEqual(0);
370
+ });
371
+
372
+ it('should handle schema check errors gracefully', async () => {
373
+ mockTable.schema = null;
374
+ mockTable.toArray.mockResolvedValue([createMockDocResult()]);
375
+ const results = await retriever.findRelevantDocs('query');
376
+ expect(Array.isArray(results)).toBe(true);
377
+ });
378
+ });
379
+
380
+ // ==========================================================================
381
+ // Advanced Options
382
+ // ==========================================================================
383
+
384
+ describe('findSimilarCode advanced options', () => {
385
+ it('should handle precomputed embeddings', async () => {
386
+ mockTable.toArray.mockResolvedValue([createMockCodeResult()]);
387
+ const precomputed = createMockEmbedding();
388
+ const results = await retriever.findSimilarCode('query', { precomputedQueryEmbedding: precomputed });
389
+ expect(Array.isArray(results)).toBe(true);
390
+ });
391
+
392
+ it('should call where clause to exclude self-matches', async () => {
393
+ mockTable.toArray.mockResolvedValue([createMockCodeResult({ path: 'src/other.js' })]);
394
+ await retriever.findSimilarCode('query', { queryFilePath: 'src/current.js', projectPath: process.cwd(), similarityThreshold: 0 });
395
+ expect(mockTable.where).toHaveBeenCalledWith(expect.stringContaining('current'));
396
+ });
397
+ });
398
+
399
+ // ==========================================================================
400
+ // Path Filtering
401
+ // ==========================================================================
402
+
403
+ describe('file path filtering', () => {
404
+ it('should filter results from different projects', async () => {
405
+ mockTable.toArray.mockResolvedValue([
406
+ createMockCodeResult({ project_path: '/my/project' }),
407
+ createMockCodeResult({ project_path: '/other/project', path: 'src/other.js' }),
408
+ ]);
409
+ const results = await retriever.findSimilarCode('query', { projectPath: '/my/project', similarityThreshold: 0 });
410
+ expect(results.every((r) => !r.path.includes('/other/project'))).toBe(true);
411
+ });
412
+ });
413
+
414
+ describe('error handling', () => {
415
+ it('should throw EmbeddingError on database connection failure', async () => {
416
+ mockDatabase.connect.mockRejectedValue(new Error('Connection failed'));
417
+ await expect(retriever.findRelevantDocs('query')).rejects.toThrow('Documentation search failed');
418
+ expect(console.error).toHaveBeenCalled();
419
+ });
420
+
421
+ it('should throw EmbeddingError on table search failure', async () => {
422
+ mockTable.toArray.mockRejectedValue(new Error('Search failed'));
423
+ await expect(retriever.findRelevantDocs('query')).rejects.toThrow('Documentation search failed');
424
+ });
425
+ });
426
+
427
+ // ==========================================================================
428
+ // Documentation Path Filtering
429
+ // ==========================================================================
430
+
431
+ describe('documentation path filtering', () => {
432
+ it.each([
433
+ ['without project_path field', { content: 'Doc', original_document_path: 'docs/readme.md', _distance: 0.1 }],
434
+ ['with absolute paths in project', { content: 'In project', original_document_path: '/test/project/docs/readme.md', _distance: 0.1 }],
435
+ ])('should handle results %s', async (_, result) => {
436
+ mockTable.toArray.mockResolvedValue([result]);
437
+ const results = await retriever.findRelevantDocs('query', { projectPath: '/test/project' });
438
+ expect(Array.isArray(results)).toBe(true);
439
+ });
440
+
441
+ it('should filter out results without original_document_path', async () => {
442
+ mockTable.toArray.mockResolvedValue([{ content: 'Doc without path', _distance: 0.1 }]);
443
+ const results = await retriever.findRelevantDocs('query');
444
+ expect(results.length).toBe(0);
445
+ });
446
+
447
+ it('should check file existence for relative paths', async () => {
448
+ fs.promises.access.mockResolvedValueOnce(undefined).mockRejectedValueOnce(new Error('ENOENT'));
449
+ mockTable.toArray.mockResolvedValue([
450
+ createMockDocResult({ original_document_path: 'docs/exists.md' }),
451
+ createMockDocResult({ original_document_path: 'docs/missing.md' }),
452
+ ]);
453
+ const results = await retriever.findRelevantDocs('query', { projectPath: '/project' });
454
+ expect(results.some((r) => r.content === 'Missing doc')).toBe(false);
455
+ });
456
+
457
+ it('should filter out paths outside project bounds', async () => {
458
+ mockTable.toArray.mockResolvedValue([createMockDocResult({ original_document_path: '../outside/doc.md' })]);
459
+ const results = await retriever.findRelevantDocs('query', { projectPath: '/project' });
460
+ expect(results.length).toBe(0);
461
+ });
462
+ });
463
+
464
+ // ==========================================================================
465
+ // Code Search Path Filtering
466
+ // ==========================================================================
467
+
468
+ describe('code search path filtering', () => {
469
+ it('should handle results without path fields', async () => {
470
+ mockTable.toArray.mockResolvedValue([{ content: 'Code', _distance: 0.1 }]);
471
+ const results = await retriever.findSimilarCode('query', { similarityThreshold: 0 });
472
+ expect(results.length).toBe(0);
473
+ });
474
+
475
+ it('should handle absolute paths in code results', async () => {
476
+ mockTable.toArray.mockResolvedValue([
477
+ createMockCodeResult({ path: '/test/project/src/file.js', project_path: '/test/project' }),
478
+ createMockCodeResult({ path: '/other/project/src/file.js', project_path: '/other/project' }),
479
+ ]);
480
+ const results = await retriever.findSimilarCode('query', { projectPath: '/test/project', similarityThreshold: 0 });
481
+ expect(results.length).toBe(1);
482
+ expect(results[0].content).toBe('function test() {}');
483
+ });
484
+
485
+ it('should check file existence for relative paths', async () => {
486
+ fs.promises.access.mockResolvedValueOnce(undefined).mockRejectedValueOnce(new Error('ENOENT'));
487
+ mockTable.toArray.mockResolvedValue([
488
+ createMockCodeResult({ path: 'src/exists.js' }),
489
+ createMockCodeResult({ path: 'src/missing.js', content: 'Missing' }),
490
+ ]);
491
+ const results = await retriever.findSimilarCode('query', { projectPath: '/project', similarityThreshold: 0 });
492
+ expect(results.some((r) => r.content === 'Missing')).toBe(false);
493
+ });
494
+
495
+ it('should filter out paths outside project bounds', async () => {
496
+ mockTable.toArray.mockResolvedValue([createMockCodeResult({ path: '../outside/file.js' })]);
497
+ const results = await retriever.findSimilarCode('query', { projectPath: '/project', similarityThreshold: 0 });
498
+ expect(results.length).toBe(0);
499
+ });
500
+
501
+ it('should handle schema check errors', async () => {
502
+ mockTable.schema = null;
503
+ mockTable.toArray.mockResolvedValue([createMockCodeResult()]);
504
+ const results = await retriever.findSimilarCode('query', { similarityThreshold: 0 });
505
+ expect(Array.isArray(results)).toBe(true);
506
+ });
507
+ });
508
+
509
+ // ==========================================================================
510
+ // Project Structure
511
+ // ==========================================================================
512
+
513
+ describe('project structure inclusion', () => {
514
+ it('should fall back to generic project structure', async () => {
515
+ mockTable.toArray.mockResolvedValue([createMockCodeResult()]);
516
+ mockTable.query.mockReturnValue({
517
+ where: vi.fn().mockReturnThis(),
518
+ limit: vi.fn().mockReturnThis(),
519
+ toArray: vi
520
+ .fn()
521
+ .mockResolvedValueOnce([])
522
+ .mockResolvedValueOnce([
523
+ { id: '__project_structure__', content: 'Generic structure', path: '.', vector: new Float32Array(384).fill(0.1) },
524
+ ]),
525
+ });
526
+ const results = await retriever.findSimilarCode('query', { includeProjectStructure: true, similarityThreshold: 0 });
527
+ expect(results.some((r) => r.type === 'project-structure')).toBe(true);
528
+ });
529
+
530
+ it('should handle project structure inclusion errors', async () => {
531
+ mockTable.toArray.mockResolvedValue([createMockCodeResult()]);
532
+ mockTable.query.mockReturnValue({
533
+ where: vi.fn().mockReturnThis(),
534
+ limit: vi.fn().mockReturnThis(),
535
+ toArray: vi.fn().mockRejectedValue(new Error('Structure lookup failed')),
536
+ });
537
+ const results = await retriever.findSimilarCode('query', { includeProjectStructure: true, similarityThreshold: 0 });
538
+ expect(Array.isArray(results)).toBe(true);
539
+ expect(console.warn).toHaveBeenCalledWith(expect.stringContaining('Project structure inclusion failed'));
540
+ });
541
+
542
+ it('should skip structure when similarity is too low', async () => {
543
+ mockTable.toArray.mockResolvedValue([createMockCodeResult()]);
544
+ mockTable.query.mockReturnValue({
545
+ where: vi.fn().mockReturnThis(),
546
+ limit: vi.fn().mockReturnThis(),
547
+ toArray: vi
548
+ .fn()
549
+ .mockResolvedValue([{ id: '__project_structure__', content: 'Structure', path: '.', vector: new Float32Array(384).fill(0) }]),
550
+ });
551
+ const results = await retriever.findSimilarCode('query', { includeProjectStructure: true, similarityThreshold: 0 });
552
+ expect(results.some((r) => r.type === 'project-structure')).toBe(false);
553
+ });
554
+ });
555
+
556
+ // ==========================================================================
557
+ // Advanced Reranking
558
+ // ==========================================================================
559
+
560
+ describe('advanced reranking', () => {
561
+ let inferContextMock;
562
+ let isGenericDocMock;
563
+ let getGenericContextMock;
564
+
565
+ beforeEach(async () => {
566
+ const contextInference = await import('./utils/context-inference.js'); // eslint-disable-line no-restricted-syntax
567
+ const docDetection = await import('./utils/document-detection.js'); // eslint-disable-line no-restricted-syntax
568
+ inferContextMock = vi.spyOn(contextInference, 'inferContextFromDocumentContent');
569
+ isGenericDocMock = vi.spyOn(docDetection, 'isGenericDocument');
570
+ getGenericContextMock = vi.spyOn(docDetection, 'getGenericDocumentContext');
571
+ });
572
+
573
+ it('should use fast-path for generic documents', async () => {
574
+ isGenericDocMock.mockReturnValue(true);
575
+ getGenericContextMock.mockReturnValue({ area: 'General', dominantTech: [], isGeneralPurposeReadmeStyle: true });
576
+ mockTable.toArray.mockResolvedValue([
577
+ createMockDocResult({ content: 'README', document_title: 'README' }),
578
+ createMockDocResult({ content: 'Guide', original_document_path: 'GUIDE.md', _distance: 0.2 }),
579
+ createMockDocResult({ content: 'API', original_document_path: 'API.md', _distance: 0.3 }),
580
+ ]);
581
+ await retriever.findRelevantDocs('query', {
582
+ useReranking: true,
583
+ queryContextForReranking: { area: 'Frontend', dominantTech: ['React'] },
584
+ });
585
+ expect(isGenericDocMock).toHaveBeenCalled();
586
+ expect(getGenericContextMock).toHaveBeenCalled();
587
+ });
588
+
589
+ it('should apply generic doc penalty for low context match', async () => {
590
+ isGenericDocMock.mockReturnValue(false);
591
+ inferContextMock.mockResolvedValue({ area: 'Backend', dominantTech: ['Node.js'], isGeneralPurposeReadmeStyle: true });
592
+ mockTable.toArray.mockResolvedValue([
593
+ createMockDocResult({ content: 'Doc', original_document_path: 'docs/readme.md', document_title: 'Readme' }),
594
+ createMockDocResult({ content: 'Doc 2', original_document_path: 'docs/api.md', _distance: 0.2 }),
595
+ createMockDocResult({ content: 'Doc 3', original_document_path: 'docs/guide.md', _distance: 0.3 }),
596
+ ]);
597
+ const results = await retriever.findRelevantDocs('query', {
598
+ useReranking: true,
599
+ queryContextForReranking: { area: 'Frontend', dominantTech: ['React'] },
600
+ });
601
+ expect(Array.isArray(results)).toBe(true);
602
+ });
603
+
604
+ it('should boost results with matching area and tech', async () => {
605
+ isGenericDocMock.mockReturnValue(false);
606
+ inferContextMock.mockResolvedValue({ area: 'Frontend', dominantTech: ['React'], isGeneralPurposeReadmeStyle: false });
607
+ mockTable.toArray.mockResolvedValue([
608
+ createMockDocResult({ content: 'React doc', original_document_path: 'docs/react.md', _distance: 0.3, document_title: 'React' }),
609
+ createMockDocResult({ content: 'Other doc', original_document_path: 'docs/other.md', _distance: 0.2 }),
610
+ createMockDocResult({ content: 'Third doc', original_document_path: 'docs/third.md', _distance: 0.4 }),
611
+ ]);
612
+ const results = await retriever.findRelevantDocs('query', {
613
+ useReranking: true,
614
+ queryContextForReranking: { area: 'Frontend', dominantTech: ['React'] },
615
+ });
616
+ expect(results[0].reranked).toBe(true);
617
+ });
618
+
619
+ it('should apply path similarity bonus when queryFilePath provided', async () => {
620
+ isGenericDocMock.mockReturnValue(false);
621
+ inferContextMock.mockResolvedValue({ area: 'Frontend', dominantTech: ['React'], isGeneralPurposeReadmeStyle: false });
622
+ mockTable.toArray.mockResolvedValue([
623
+ createMockDocResult({ content: 'Component doc', original_document_path: 'docs/components.md', _distance: 0.2 }),
624
+ createMockDocResult({ content: 'API doc', original_document_path: 'docs/api.md', _distance: 0.2 }),
625
+ createMockDocResult({ content: 'Hooks doc', original_document_path: 'docs/hooks.md', _distance: 0.2 }),
626
+ ]);
627
+ const results = await retriever.findRelevantDocs('query', {
628
+ useReranking: true,
629
+ queryContextForReranking: { area: 'Frontend', dominantTech: ['React'] },
630
+ queryFilePath: 'src/components/Button.jsx',
631
+ });
632
+ expect(results.every((r) => r.reranked)).toBe(true);
633
+ });
634
+
635
+ it('should handle context calculation errors gracefully', async () => {
636
+ isGenericDocMock.mockReturnValue(false);
637
+ inferContextMock.mockRejectedValue(new Error('Context calculation failed'));
638
+ mockTable.toArray.mockResolvedValue([
639
+ createMockDocResult({ document_title: 'Readme' }),
640
+ createMockDocResult({ original_document_path: 'docs/api.md', _distance: 0.2 }),
641
+ createMockDocResult({ original_document_path: 'docs/guide.md', _distance: 0.3 }),
642
+ ]);
643
+ const results = await retriever.findRelevantDocs('query', {
644
+ useReranking: true,
645
+ queryContextForReranking: { area: 'Frontend', dominantTech: ['React'] },
646
+ });
647
+ expect(Array.isArray(results)).toBe(true);
648
+ });
649
+
650
+ it('should use cached document context promise', async () => {
651
+ isGenericDocMock.mockReturnValue(false);
652
+ inferContextMock.mockResolvedValue({ area: 'Frontend', dominantTech: ['React'], isGeneralPurposeReadmeStyle: false });
653
+ const contextPromise = Promise.resolve({ area: 'Frontend', dominantTech: ['React'], isGeneralPurposeReadmeStyle: false });
654
+ const docPath = require('node:path').resolve(process.cwd(), 'docs/cached.md');
655
+ retriever.documentContextPromiseCache.set(docPath, contextPromise);
656
+ mockTable.toArray.mockResolvedValue([
657
+ createMockDocResult({ content: 'Cached doc', original_document_path: 'docs/cached.md', document_title: 'Cached' }),
658
+ createMockDocResult({ content: 'Other doc', original_document_path: 'docs/other.md', _distance: 0.2 }),
659
+ createMockDocResult({ content: 'Third doc', original_document_path: 'docs/third.md', _distance: 0.3 }),
660
+ ]);
661
+ const results = await retriever.findRelevantDocs('query', {
662
+ useReranking: true,
663
+ queryContextForReranking: { area: 'Frontend', dominantTech: ['React'] },
664
+ });
665
+ expect(Array.isArray(results)).toBe(true);
666
+ });
667
+ });
668
+
669
+ // ==========================================================================
670
+ // H1 Embedding Caching
671
+ // ==========================================================================
672
+
673
+ describe('H1 embedding caching', () => {
674
+ it('should batch calculate H1 embeddings for cache misses', async () => {
675
+ mockTable.toArray.mockResolvedValue([
676
+ createMockDocResult({ document_title: 'API Reference' }),
677
+ createMockDocResult({ original_document_path: 'docs/guide.md', _distance: 0.2, document_title: 'User Guide' }),
678
+ createMockDocResult({ original_document_path: 'docs/faq.md', _distance: 0.3, document_title: 'FAQ' }),
679
+ ]);
680
+ mockModelManager.calculateEmbeddingBatch.mockResolvedValue([createMockEmbedding(), createMockEmbedding(), createMockEmbedding()]);
681
+ await retriever.findRelevantDocs('query', {
682
+ useReranking: true,
683
+ queryContextForReranking: { area: 'Frontend', dominantTech: ['React'] },
684
+ });
685
+ expect(mockModelManager.calculateEmbeddingBatch).toHaveBeenCalled();
686
+ expect(retriever.h1EmbeddingCache.has('API Reference')).toBe(true);
687
+ });
688
+
689
+ it('should reuse cached H1 embeddings', async () => {
690
+ retriever.h1EmbeddingCache.set('Cached Title', createMockEmbedding());
691
+ mockTable.toArray.mockResolvedValue([
692
+ createMockDocResult({ original_document_path: 'docs/cached.md', document_title: 'Cached Title' }),
693
+ createMockDocResult({ original_document_path: 'docs/new.md', _distance: 0.2, document_title: 'New Title' }),
694
+ createMockDocResult({ original_document_path: 'docs/other.md', _distance: 0.3, document_title: 'Other Title' }),
695
+ ]);
696
+ mockModelManager.calculateEmbeddingBatch.mockResolvedValue([createMockEmbedding(), createMockEmbedding()]);
697
+ await retriever.findRelevantDocs('query', {
698
+ useReranking: true,
699
+ queryContextForReranking: { area: 'Frontend', dominantTech: ['React'] },
700
+ });
701
+ expect(mockModelManager.calculateEmbeddingBatch).toHaveBeenCalledWith(['New Title', 'Other Title']);
702
+ });
703
+ });
704
+
705
+ // ==========================================================================
706
+ // Result Sorting & Schema Errors
707
+ // ==========================================================================
708
+
709
+ describe('result limiting and sorting', () => {
710
+ it('should sort and limit final results', async () => {
711
+ mockTable.toArray.mockResolvedValue(
712
+ Array.from({ length: 15 }, (_, i) => createMockDocResult({ content: `Doc ${i}`, _distance: 0.1 + i * 0.02 }))
713
+ );
714
+ const results = await retriever.findRelevantDocs('query', { limit: 5 });
715
+ expect(results.length).toBeLessThanOrEqual(5);
716
+ for (let i = 1; i < results.length; i++) {
717
+ expect(results[i - 1].similarity).toBeGreaterThanOrEqual(results[i].similarity);
718
+ }
719
+ });
720
+ });
721
+
722
+ describe('schema error handling', () => {
723
+ it('should handle schema access errors in findRelevantDocs', async () => {
724
+ Object.defineProperty(mockTable, 'schema', {
725
+ get: () => {
726
+ throw new Error('Schema not accessible');
727
+ },
728
+ configurable: true,
729
+ });
730
+ mockTable.toArray.mockResolvedValue([createMockDocResult({ project_path: process.cwd() })]);
731
+ const results = await retriever.findRelevantDocs('query');
732
+ expect(Array.isArray(results)).toBe(true);
733
+ });
734
+
735
+ it('should handle schema access errors in findSimilarCode', async () => {
736
+ Object.defineProperty(mockTable, 'schema', {
737
+ get: () => {
738
+ throw new Error('Schema not accessible');
739
+ },
740
+ configurable: true,
741
+ });
742
+ mockTable.toArray.mockResolvedValue([createMockCodeResult()]);
743
+ const results = await retriever.findSimilarCode('query', { similarityThreshold: 0 });
744
+ expect(Array.isArray(results)).toBe(true);
745
+ });
746
+ });
747
+
748
+ describe('similarity calculation variants', () => {
749
+ it('should calculate similarity from _score', async () => {
750
+ mockTable.toArray.mockResolvedValue([
751
+ { content: 'Doc', project_path: process.cwd(), original_document_path: 'docs/api.md', _score: 0.85 },
752
+ ]);
753
+ const results = await retriever.findRelevantDocs('query', { similarityThreshold: 0 });
754
+ expect(results[0].similarity).toBe(0.85);
755
+ });
756
+
757
+ it('should use fallback similarity when no _distance or _score', async () => {
758
+ mockTable.toArray.mockResolvedValue([{ content: 'Doc', project_path: process.cwd(), original_document_path: 'docs/api.md' }]);
759
+ const results = await retriever.findRelevantDocs('query', { similarityThreshold: 0 });
760
+ expect(results[0].similarity).toBe(0.5);
761
+ });
762
+ });
763
+
764
+ describe('findSimilarCode result limiting', () => {
765
+ it('should limit results when exceeding limit', async () => {
766
+ mockTable.toArray.mockResolvedValue(
767
+ Array.from({ length: 20 }, (_, i) =>
768
+ createMockCodeResult({ content: `Code ${i}`, path: `src/file${i}.js`, _distance: 0.1 + i * 0.01 })
769
+ )
770
+ );
771
+ const results = await retriever.findSimilarCode('query', { limit: 5, similarityThreshold: 0 });
772
+ expect(results.length).toBeLessThanOrEqual(5);
773
+ });
774
+ });
775
+ });