@afterxleep/doc-bot 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,807 @@
1
+ const { DocumentIndex } = require('../DocumentIndex');
2
+
3
+ describe('DocumentIndex', () => {
4
+ let documentIndex;
5
+ let mockDocuments;
6
+
7
+ beforeEach(() => {
8
+ documentIndex = new DocumentIndex();
9
+ mockDocuments = [
10
+ {
11
+ fileName: 'react-guide.md',
12
+ content: 'React components are the building blocks of React applications.',
13
+ metadata: {
14
+ title: 'React Component Guide',
15
+ keywords: ['react', 'components', 'jsx'],
16
+ tags: ['frontend', 'ui'],
17
+ category: 'development'
18
+ }
19
+ },
20
+ {
21
+ fileName: 'testing.md',
22
+ content: 'Testing is crucial for reliable software. Use Jest for unit tests.',
23
+ metadata: {
24
+ title: 'Testing Guide',
25
+ keywords: ['testing', 'jest', 'unit-tests'],
26
+ tags: ['quality', 'testing'],
27
+ category: 'development'
28
+ }
29
+ }
30
+ ];
31
+ });
32
+
33
+ describe('constructor', () => {
34
+ it('should initialize with empty indexes', () => {
35
+ expect(documentIndex.keywordIndex).toBeInstanceOf(Map);
36
+ expect(documentIndex.topicIndex).toBeInstanceOf(Map);
37
+ expect(documentIndex.patternIndex).toBeInstanceOf(Map);
38
+ expect(documentIndex.extensionIndex).toBeInstanceOf(Map);
39
+ expect(documentIndex.keywordIndex.size).toBe(0);
40
+ expect(documentIndex.topicIndex.size).toBe(0);
41
+ expect(documentIndex.patternIndex.size).toBe(0);
42
+ expect(documentIndex.extensionIndex.size).toBe(0);
43
+ });
44
+ });
45
+
46
+ describe('buildIndexes', () => {
47
+ it('should build indexes from provided documents', async () => {
48
+ await documentIndex.buildIndexes(mockDocuments);
49
+
50
+ expect(documentIndex.keywordIndex.size).toBeGreaterThan(0);
51
+ expect(documentIndex.topicIndex.size).toBeGreaterThan(0);
52
+ });
53
+
54
+ it('should handle empty document array', async () => {
55
+ await documentIndex.buildIndexes([]);
56
+
57
+ expect(documentIndex.keywordIndex.size).toBe(0);
58
+ expect(documentIndex.topicIndex.size).toBe(0);
59
+ expect(documentIndex.patternIndex.size).toBe(0);
60
+ expect(documentIndex.extensionIndex.size).toBe(0);
61
+ });
62
+ });
63
+
64
+ describe('indexDocument', () => {
65
+ it('should index keywords from metadata', async () => {
66
+ const document = {
67
+ fileName: 'test.md',
68
+ metadata: {
69
+ keywords: ['javascript', 'node', 'backend']
70
+ }
71
+ };
72
+
73
+ await documentIndex.indexDocument(document);
74
+
75
+ expect(documentIndex.keywordIndex.has('javascript')).toBe(true);
76
+ expect(documentIndex.keywordIndex.has('node')).toBe(true);
77
+ expect(documentIndex.keywordIndex.has('backend')).toBe(true);
78
+ const javascriptEntries = documentIndex.keywordIndex.get('javascript');
79
+ expect(javascriptEntries.some(entry => entry.document === document)).toBe(true);
80
+ });
81
+
82
+ it('should handle single keyword as string', async () => {
83
+ const document = {
84
+ fileName: 'test.md',
85
+ metadata: {
86
+ keywords: 'python'
87
+ }
88
+ };
89
+
90
+ await documentIndex.indexDocument(document);
91
+
92
+ expect(documentIndex.keywordIndex.has('python')).toBe(true);
93
+ const pythonEntries = documentIndex.keywordIndex.get('python');
94
+ expect(pythonEntries.some(entry => entry.document === document)).toBe(true);
95
+ });
96
+
97
+ it('should index tags in topic index', async () => {
98
+ const document = {
99
+ fileName: 'test.md',
100
+ metadata: {
101
+ tags: ['frontend', 'ui', 'design']
102
+ }
103
+ };
104
+
105
+ await documentIndex.indexDocument(document);
106
+
107
+ expect(documentIndex.topicIndex.has('frontend')).toBe(true);
108
+ expect(documentIndex.topicIndex.has('ui')).toBe(true);
109
+ expect(documentIndex.topicIndex.has('design')).toBe(true);
110
+ const frontendEntries = documentIndex.topicIndex.get('frontend');
111
+ expect(frontendEntries.some(entry => entry.document === document)).toBe(true);
112
+ });
113
+
114
+ it('should handle single tag as string', async () => {
115
+ const document = {
116
+ fileName: 'test.md',
117
+ metadata: {
118
+ tags: 'database'
119
+ }
120
+ };
121
+
122
+ await documentIndex.indexDocument(document);
123
+
124
+ expect(documentIndex.topicIndex.has('database')).toBe(true);
125
+ const databaseEntries = documentIndex.topicIndex.get('database');
126
+ expect(databaseEntries.some(entry => entry.document === document)).toBe(true);
127
+ });
128
+
129
+ it('should index category in topic index', async () => {
130
+ const document = {
131
+ fileName: 'test.md',
132
+ metadata: {
133
+ category: 'Architecture'
134
+ }
135
+ };
136
+
137
+ await documentIndex.indexDocument(document);
138
+
139
+ expect(documentIndex.topicIndex.has('architecture')).toBe(true);
140
+ const architectureEntries = documentIndex.topicIndex.get('architecture');
141
+ expect(architectureEntries.some(entry => entry.document === document)).toBe(true);
142
+ });
143
+
144
+ it('should handle documents without metadata', async () => {
145
+ const document = {
146
+ fileName: 'test.md'
147
+ };
148
+
149
+ await documentIndex.indexDocument(document);
150
+
151
+ expect(documentIndex.keywordIndex.size).toBe(0);
152
+ expect(documentIndex.topicIndex.size).toBe(0);
153
+ });
154
+
155
+ it('should handle empty metadata', async () => {
156
+ const document = {
157
+ fileName: 'test.md',
158
+ metadata: {}
159
+ };
160
+
161
+ await documentIndex.indexDocument(document);
162
+
163
+ expect(documentIndex.keywordIndex.size).toBe(0);
164
+ expect(documentIndex.topicIndex.size).toBe(0);
165
+ });
166
+
167
+ it('should index keywords case-insensitively', async () => {
168
+ const document = {
169
+ fileName: 'test.md',
170
+ metadata: {
171
+ keywords: ['JavaScript', 'REACT', 'Node.js']
172
+ }
173
+ };
174
+
175
+ await documentIndex.indexDocument(document);
176
+
177
+ expect(documentIndex.keywordIndex.has('javascript')).toBe(true);
178
+ expect(documentIndex.keywordIndex.has('react')).toBe(true);
179
+ expect(documentIndex.keywordIndex.has('node.js')).toBe(true);
180
+ });
181
+
182
+ it('should allow multiple documents for same keyword', async () => {
183
+ const doc1 = {
184
+ fileName: 'react-basics.md',
185
+ metadata: { keywords: ['react'] }
186
+ };
187
+ const doc2 = {
188
+ fileName: 'react-advanced.md',
189
+ metadata: { keywords: ['react'] }
190
+ };
191
+
192
+ await documentIndex.indexDocument(doc1);
193
+ await documentIndex.indexDocument(doc2);
194
+
195
+ const reactEntries = documentIndex.keywordIndex.get('react');
196
+ expect(reactEntries).toHaveLength(2);
197
+ expect(reactEntries.some(entry => entry.document === doc1)).toBe(true);
198
+ expect(reactEntries.some(entry => entry.document === doc2)).toBe(true);
199
+ });
200
+ });
201
+
202
+ describe('findRelevantDocs', () => {
203
+ beforeEach(async () => {
204
+ await documentIndex.buildIndexes(mockDocuments);
205
+ });
206
+
207
+ it('should return empty array when no context provided', () => {
208
+ const result = documentIndex.findRelevantDocs({});
209
+ expect(result).toEqual([]);
210
+ });
211
+
212
+ it('should return documents based on query keywords', () => {
213
+ const context = { query: 'react components' };
214
+ const result = documentIndex.findRelevantDocs(context);
215
+
216
+ expect(result).toBeInstanceOf(Array);
217
+ expect(result.length).toBeGreaterThan(0);
218
+ expect(result[0]).toHaveProperty('document');
219
+ expect(result[0]).toHaveProperty('score');
220
+ });
221
+
222
+ it('should return scored and ranked results', () => {
223
+ const context = { query: 'testing' };
224
+ const result = documentIndex.findRelevantDocs(context);
225
+
226
+ // Results should be sorted by score (descending)
227
+ for (let i = 0; i < result.length - 1; i++) {
228
+ expect(result[i].score).toBeGreaterThanOrEqual(result[i + 1].score);
229
+ }
230
+ });
231
+
232
+ it('should find documents by exact keyword match', async () => {
233
+ // Create a fresh index with no content to test exact scoring
234
+ const testIndex = new DocumentIndex();
235
+ const testDoc = {
236
+ fileName: 'clean-test.md',
237
+ metadata: { keywords: ['react'] }
238
+ };
239
+ await testIndex.indexDocument(testDoc);
240
+
241
+ const context = { query: 'react' };
242
+ const result = testIndex.findRelevantDocs(context);
243
+
244
+ expect(result.length).toBe(1);
245
+ expect(result[0].document.fileName).toBe('clean-test.md');
246
+ expect(result[0].score).toBe(10); // High score for exact keyword match
247
+ });
248
+
249
+ it('should find documents by topic match', async () => {
250
+ // Create a fresh index with no content to test exact scoring
251
+ const testIndex = new DocumentIndex();
252
+ const testDoc = {
253
+ fileName: 'clean-test.md',
254
+ metadata: { tags: ['frontend'] }
255
+ };
256
+ await testIndex.indexDocument(testDoc);
257
+
258
+ const context = { query: 'frontend' };
259
+ const result = testIndex.findRelevantDocs(context);
260
+
261
+ expect(result.length).toBe(1);
262
+ expect(result[0].document.fileName).toBe('clean-test.md');
263
+ expect(result[0].score).toBe(5); // Medium score for topic match
264
+ });
265
+
266
+ it('should combine scores for multiple matches', async () => {
267
+ // Create a fresh index with no content to test exact scoring
268
+ const testIndex = new DocumentIndex();
269
+ const testDoc = {
270
+ fileName: 'clean-test.md',
271
+ metadata: {
272
+ keywords: ['react'],
273
+ tags: ['frontend']
274
+ }
275
+ };
276
+ await testIndex.indexDocument(testDoc);
277
+
278
+ const context = { query: 'react frontend' };
279
+ const result = testIndex.findRelevantDocs(context);
280
+
281
+ expect(result.length).toBe(1);
282
+ expect(result[0].document.fileName).toBe('clean-test.md');
283
+ expect(result[0].score).toBe(15); // 10 (keyword) + 5 (topic)
284
+ });
285
+
286
+ it('should handle case-insensitive queries', () => {
287
+ const context = { query: 'REACT Components' };
288
+ const result = documentIndex.findRelevantDocs(context);
289
+
290
+ expect(result.length).toBe(1);
291
+ expect(result[0].document.fileName).toBe('react-guide.md');
292
+ });
293
+ });
294
+
295
+ describe('content keyword extraction', () => {
296
+ it('should extract keywords from code blocks', async () => {
297
+ const document = {
298
+ fileName: 'api-guide.md',
299
+ content: `
300
+ # API Guide
301
+
302
+ \`\`\`javascript
303
+ const express = require('express');
304
+ const mongoose = require('mongoose');
305
+ app.use(bodyParser.json());
306
+ \`\`\`
307
+
308
+ \`\`\`python
309
+ import flask
310
+ from sqlalchemy import create_engine
311
+ \`\`\`
312
+ `,
313
+ metadata: {}
314
+ };
315
+
316
+ await documentIndex.indexDocument(document);
317
+
318
+ // Should extract technical terms from code blocks
319
+ expect(documentIndex.keywordIndex.has('express')).toBe(true);
320
+ expect(documentIndex.keywordIndex.has('mongoose')).toBe(true);
321
+ expect(documentIndex.keywordIndex.has('bodyparser')).toBe(true);
322
+ expect(documentIndex.keywordIndex.has('flask')).toBe(true);
323
+ expect(documentIndex.keywordIndex.has('sqlalchemy')).toBe(true);
324
+ });
325
+
326
+ it('should extract keywords from headings', async () => {
327
+ const document = {
328
+ fileName: 'deployment-guide.md',
329
+ content: `
330
+ # Docker Deployment Guide
331
+
332
+ ## Setting up Kubernetes
333
+
334
+ ### Using Terraform for Infrastructure
335
+
336
+ #### CI/CD Pipeline Configuration
337
+ `,
338
+ metadata: {}
339
+ };
340
+
341
+ await documentIndex.indexDocument(document);
342
+
343
+ expect(documentIndex.keywordIndex.has('docker')).toBe(true);
344
+ expect(documentIndex.keywordIndex.has('kubernetes')).toBe(true);
345
+ expect(documentIndex.keywordIndex.has('terraform')).toBe(true);
346
+ expect(documentIndex.keywordIndex.has('ci/cd')).toBe(true);
347
+ expect(documentIndex.keywordIndex.has('pipeline')).toBe(true);
348
+ });
349
+
350
+ it('should extract file extensions mentioned in content', async () => {
351
+ const document = {
352
+ fileName: 'project-structure.md',
353
+ content: `
354
+ Files in this project:
355
+ - *.js files for JavaScript
356
+ - *.py files for Python
357
+ - *.md files for documentation
358
+ - *.json files for configuration
359
+ `,
360
+ metadata: {}
361
+ };
362
+
363
+ await documentIndex.indexDocument(document);
364
+
365
+ expect(documentIndex.extensionIndex.has('js')).toBe(true);
366
+ expect(documentIndex.extensionIndex.has('py')).toBe(true);
367
+ expect(documentIndex.extensionIndex.has('md')).toBe(true);
368
+ expect(documentIndex.extensionIndex.has('json')).toBe(true);
369
+ });
370
+
371
+ it('should extract framework and library names', async () => {
372
+ const document = {
373
+ fileName: 'tech-stack.md',
374
+ content: `
375
+ Our tech stack includes:
376
+ - React for frontend
377
+ - Node.js for backend
378
+ - PostgreSQL for database
379
+ - Redis for caching
380
+ - AWS for cloud infrastructure
381
+ `,
382
+ metadata: {}
383
+ };
384
+
385
+ await documentIndex.indexDocument(document);
386
+
387
+ expect(documentIndex.keywordIndex.has('react')).toBe(true);
388
+ expect(documentIndex.keywordIndex.has('node.js')).toBe(true);
389
+ expect(documentIndex.keywordIndex.has('postgresql')).toBe(true);
390
+ expect(documentIndex.keywordIndex.has('redis')).toBe(true);
391
+ expect(documentIndex.keywordIndex.has('aws')).toBe(true);
392
+ });
393
+
394
+ it('should not extract common words', async () => {
395
+ const document = {
396
+ fileName: 'guide.md',
397
+ content: `
398
+ This is a guide that explains how to use the system.
399
+ The system is very useful and helps developers.
400
+ `,
401
+ metadata: {}
402
+ };
403
+
404
+ await documentIndex.indexDocument(document);
405
+
406
+ // Should not extract common words
407
+ expect(documentIndex.keywordIndex.has('this')).toBe(false);
408
+ expect(documentIndex.keywordIndex.has('is')).toBe(false);
409
+ expect(documentIndex.keywordIndex.has('a')).toBe(false);
410
+ expect(documentIndex.keywordIndex.has('the')).toBe(false);
411
+ expect(documentIndex.keywordIndex.has('and')).toBe(false);
412
+ expect(documentIndex.keywordIndex.has('to')).toBe(false);
413
+ expect(documentIndex.keywordIndex.has('how')).toBe(false);
414
+ });
415
+
416
+ it('should score content keywords lower than metadata keywords', async () => {
417
+ const docWithMetadata = {
418
+ fileName: 'meta-doc.md',
419
+ content: 'Some content about React',
420
+ metadata: {
421
+ keywords: ['react']
422
+ }
423
+ };
424
+
425
+ const docWithContentOnly = {
426
+ fileName: 'content-doc.md',
427
+ content: 'This document talks about React development',
428
+ metadata: {}
429
+ };
430
+
431
+ await documentIndex.indexDocument(docWithMetadata);
432
+ await documentIndex.indexDocument(docWithContentOnly);
433
+
434
+ const context = { query: 'react' };
435
+ const result = documentIndex.findRelevantDocs(context);
436
+
437
+ // Document with metadata keywords should score higher
438
+ expect(result[0].document.fileName).toBe('meta-doc.md');
439
+ expect(result[0].score).toBeGreaterThan(result[1].score);
440
+ });
441
+ });
442
+
443
+ describe('code pattern indexing', () => {
444
+ it('should index common code patterns', async () => {
445
+ const document = {
446
+ fileName: 'patterns-guide.md',
447
+ content: `
448
+ # Common Patterns
449
+
450
+ ## React Hooks
451
+ \`\`\`javascript
452
+ const [state, setState] = useState();
453
+ useEffect(() => {});
454
+ \`\`\`
455
+
456
+ ## Express Routes
457
+ \`\`\`javascript
458
+ app.get('/api/users', (req, res) => {});
459
+ app.post('/api/data', handler);
460
+ \`\`\`
461
+
462
+ ## Testing
463
+ \`\`\`javascript
464
+ describe('Component', () => {
465
+ it('should render', () => {});
466
+ });
467
+ \`\`\`
468
+ `,
469
+ metadata: {}
470
+ };
471
+
472
+ await documentIndex.indexDocument(document);
473
+
474
+ expect(documentIndex.patternIndex.has('useState')).toBe(true);
475
+ expect(documentIndex.patternIndex.has('useEffect')).toBe(true);
476
+ expect(documentIndex.patternIndex.has('app.get')).toBe(true);
477
+ expect(documentIndex.patternIndex.has('app.post')).toBe(true);
478
+ expect(documentIndex.patternIndex.has('describe(')).toBe(true);
479
+ expect(documentIndex.patternIndex.has('it(')).toBe(true);
480
+ });
481
+
482
+ it('should find documents by code patterns in codeSnippet context', async () => {
483
+ const testIndex = new DocumentIndex();
484
+ const document = {
485
+ fileName: 'react-hooks.md',
486
+ content: `
487
+ # React Hooks Guide
488
+ \`\`\`javascript
489
+ const [count, setCount] = useState(0);
490
+ \`\`\`
491
+ `,
492
+ metadata: {}
493
+ };
494
+
495
+ await testIndex.indexDocument(document);
496
+
497
+ const context = { codeSnippet: 'const [value, setValue] = useState(10);' };
498
+ const result = testIndex.findRelevantDocs(context);
499
+
500
+ expect(result.length).toBeGreaterThan(0);
501
+ expect(result[0].document.fileName).toBe('react-hooks.md');
502
+ expect(result[0].score).toBe(8); // Pattern match score
503
+ });
504
+
505
+ it('should index Python patterns', async () => {
506
+ const document = {
507
+ fileName: 'python-patterns.md',
508
+ content: `
509
+ # Python Patterns
510
+
511
+ \`\`\`python
512
+ def my_function():
513
+ pass
514
+
515
+ class MyClass:
516
+ def __init__(self):
517
+ pass
518
+
519
+ if __name__ == '__main__':
520
+ pass
521
+ \`\`\`
522
+ `,
523
+ metadata: {}
524
+ };
525
+
526
+ await documentIndex.indexDocument(document);
527
+
528
+ expect(documentIndex.patternIndex.has('def ')).toBe(true);
529
+ expect(documentIndex.patternIndex.has('class ')).toBe(true);
530
+ expect(documentIndex.patternIndex.has('__init__')).toBe(true);
531
+ expect(documentIndex.patternIndex.has('if __name__')).toBe(true);
532
+ });
533
+
534
+ it('should index SQL patterns', async () => {
535
+ const document = {
536
+ fileName: 'sql-guide.md',
537
+ content: `
538
+ # SQL Guide
539
+
540
+ \`\`\`sql
541
+ SELECT * FROM users WHERE id = 1;
542
+ INSERT INTO products (name, price) VALUES ('item', 10.99);
543
+ UPDATE users SET name = 'new' WHERE id = 1;
544
+ DELETE FROM logs WHERE date < '2023-01-01';
545
+ \`\`\`
546
+ `,
547
+ metadata: {}
548
+ };
549
+
550
+ await documentIndex.indexDocument(document);
551
+
552
+ expect(documentIndex.patternIndex.has('SELECT')).toBe(true);
553
+ expect(documentIndex.patternIndex.has('INSERT INTO')).toBe(true);
554
+ expect(documentIndex.patternIndex.has('UPDATE')).toBe(true);
555
+ expect(documentIndex.patternIndex.has('DELETE FROM')).toBe(true);
556
+ });
557
+
558
+ it('should handle multiple patterns in same document', async () => {
559
+ const document = {
560
+ fileName: 'multi-patterns.md',
561
+ content: `
562
+ \`\`\`javascript
563
+ useState();
564
+ useEffect();
565
+ \`\`\`
566
+
567
+ \`\`\`python
568
+ def func():
569
+ pass
570
+ \`\`\`
571
+ `,
572
+ metadata: {}
573
+ };
574
+
575
+ await documentIndex.indexDocument(document);
576
+
577
+ const useStateEntries = documentIndex.patternIndex.get('useState');
578
+ const defEntries = documentIndex.patternIndex.get('def ');
579
+
580
+ expect(useStateEntries.some(entry => entry.document === document)).toBe(true);
581
+ expect(defEntries.some(entry => entry.document === document)).toBe(true);
582
+ });
583
+
584
+ it('should match patterns case-insensitively for SQL', async () => {
585
+ const testIndex = new DocumentIndex();
586
+ const document = {
587
+ fileName: 'sql-doc.md',
588
+ content: '```sql\nselect * from users;\n```',
589
+ metadata: {}
590
+ };
591
+
592
+ await testIndex.indexDocument(document);
593
+
594
+ const context = { codeSnippet: 'SELECT name FROM products;' };
595
+ const result = testIndex.findRelevantDocs(context);
596
+
597
+ expect(result.length).toBeGreaterThan(0);
598
+ expect(result[0].document.fileName).toBe('sql-doc.md');
599
+ });
600
+ });
601
+
602
+ describe('smart inference and relevance scoring', () => {
603
+ it('should provide higher relevance for comprehensive context', async () => {
604
+ const testIndex = new DocumentIndex();
605
+
606
+ // Document with multiple matching signals
607
+ const comprehensiveDoc = {
608
+ fileName: 'comprehensive-react.md',
609
+ content: `
610
+ # React Component Testing
611
+
612
+ \`\`\`javascript
613
+ import { render } from '@testing-library/react';
614
+ const [state, setState] = useState();
615
+ \`\`\`
616
+ `,
617
+ metadata: {
618
+ keywords: ['react', 'testing'],
619
+ tags: ['frontend', 'testing'],
620
+ category: 'development'
621
+ }
622
+ };
623
+
624
+ // Document with fewer matching signals
625
+ const basicDoc = {
626
+ fileName: 'basic-react.md',
627
+ content: 'Basic React information',
628
+ metadata: {
629
+ keywords: ['react']
630
+ }
631
+ };
632
+
633
+ await testIndex.indexDocument(comprehensiveDoc);
634
+ await testIndex.indexDocument(basicDoc);
635
+
636
+ const context = {
637
+ query: 'react testing',
638
+ codeSnippet: 'useState()',
639
+ filePath: 'src/components/Button.jsx'
640
+ };
641
+ const result = testIndex.findRelevantDocs(context);
642
+
643
+ expect(result.length).toBe(2);
644
+ expect(result[0].document.fileName).toBe('comprehensive-react.md');
645
+ expect(result[0].score).toBeGreaterThan(result[1].score);
646
+ });
647
+
648
+ it('should handle file extension inference', async () => {
649
+ const testIndex = new DocumentIndex();
650
+
651
+ const jsDoc = {
652
+ fileName: 'js-guide.md',
653
+ content: '*.js files contain JavaScript code',
654
+ metadata: {}
655
+ };
656
+
657
+ await testIndex.indexDocument(jsDoc);
658
+
659
+ const context = { filePath: 'src/utils/helper.js' };
660
+ const result = testIndex.findRelevantDocs(context);
661
+
662
+ expect(result.length).toBeGreaterThan(0);
663
+ expect(result[0].document.fileName).toBe('js-guide.md');
664
+ });
665
+
666
+ it('should combine multiple scoring factors appropriately', async () => {
667
+ const testIndex = new DocumentIndex();
668
+
669
+ const multiFactorDoc = {
670
+ fileName: 'multi-factor.md',
671
+ content: `
672
+ # React Testing Guide
673
+
674
+ \`\`\`javascript
675
+ describe('Component', () => {
676
+ const [state] = useState();
677
+ });
678
+ \`\`\`
679
+
680
+ Files: *.test.js
681
+ `,
682
+ metadata: {
683
+ keywords: ['react', 'testing'],
684
+ tags: ['frontend'],
685
+ category: 'testing'
686
+ }
687
+ };
688
+
689
+ await testIndex.indexDocument(multiFactorDoc);
690
+
691
+ const context = {
692
+ query: 'react testing frontend',
693
+ codeSnippet: 'useState() describe(',
694
+ filePath: 'tests/Button.test.js'
695
+ };
696
+ const result = testIndex.findRelevantDocs(context);
697
+
698
+ expect(result.length).toBe(1);
699
+ // Should have high score from multiple factors:
700
+ // - Keywords: react (10) + testing (10)
701
+ // - Topics: frontend (5)
702
+ // - Patterns: useState (6) + describe( (6)
703
+ // - Extension: test.js (3)
704
+ expect(result[0].score).toBeGreaterThan(35);
705
+ });
706
+
707
+ it('should rank documents by relevance score', async () => {
708
+ const testIndex = new DocumentIndex();
709
+
710
+ const docs = [
711
+ {
712
+ fileName: 'high-relevance.md',
713
+ metadata: { keywords: ['javascript', 'react'], tags: ['frontend'] },
714
+ content: '```javascript\nconst [state] = useState();\n```'
715
+ },
716
+ {
717
+ fileName: 'medium-relevance.md',
718
+ metadata: { keywords: ['javascript'] },
719
+ content: 'Basic JavaScript information'
720
+ },
721
+ {
722
+ fileName: 'low-relevance.md',
723
+ metadata: { tags: ['backend'] },
724
+ content: 'Server-side development'
725
+ }
726
+ ];
727
+
728
+ for (const doc of docs) {
729
+ await testIndex.indexDocument(doc);
730
+ }
731
+
732
+ const context = { query: 'javascript react', codeSnippet: 'useState()' };
733
+ const result = testIndex.findRelevantDocs(context);
734
+
735
+ expect(result.length).toBe(2); // Only docs matching query should be returned
736
+ expect(result[0].document.fileName).toBe('high-relevance.md');
737
+ expect(result[1].document.fileName).toBe('medium-relevance.md');
738
+ expect(result[0].score).toBeGreaterThan(result[1].score);
739
+ });
740
+
741
+ it('should handle edge cases gracefully', async () => {
742
+ const testIndex = new DocumentIndex();
743
+
744
+ // Empty context
745
+ expect(testIndex.findRelevantDocs({})).toEqual([]);
746
+
747
+ // Context with undefined values
748
+ const result1 = testIndex.findRelevantDocs({
749
+ query: undefined,
750
+ codeSnippet: null,
751
+ filePath: ''
752
+ });
753
+ expect(result1).toEqual([]);
754
+
755
+ // Very long query
756
+ const longQuery = 'word '.repeat(1000);
757
+ const result2 = testIndex.findRelevantDocs({ query: longQuery });
758
+ expect(result2).toEqual([]);
759
+ });
760
+
761
+ it('should provide confidence scoring', async () => {
762
+ const testIndex = new DocumentIndex();
763
+
764
+ const doc = {
765
+ fileName: 'test-doc.md',
766
+ metadata: { keywords: ['testing'] },
767
+ content: 'Testing information'
768
+ };
769
+
770
+ await testIndex.indexDocument(doc);
771
+
772
+ // High confidence context (exact keyword match)
773
+ const highConfidenceContext = { query: 'testing' };
774
+ const highResult = testIndex.findRelevantDocs(highConfidenceContext);
775
+
776
+ // Lower confidence context (partial match)
777
+ const lowConfidenceContext = { query: 'software development' };
778
+ const lowResult = testIndex.findRelevantDocs(lowConfidenceContext);
779
+
780
+ expect(highResult.length).toBeGreaterThan(0);
781
+ expect(lowResult.length).toBe(0);
782
+ });
783
+
784
+ it('should handle duplicate documents correctly', async () => {
785
+ const testIndex = new DocumentIndex();
786
+
787
+ const doc = {
788
+ fileName: 'duplicate-test.md',
789
+ metadata: {
790
+ keywords: ['react', 'react'], // Duplicate keywords
791
+ tags: ['frontend', 'frontend'] // Duplicate tags
792
+ },
793
+ content: 'React React React' // Repeated content
794
+ };
795
+
796
+ await testIndex.indexDocument(doc);
797
+
798
+ const context = { query: 'react' };
799
+ const result = testIndex.findRelevantDocs(context);
800
+
801
+ expect(result.length).toBe(1);
802
+ expect(result[0].document.fileName).toBe('duplicate-test.md');
803
+ // Score should still be reasonable despite duplicates
804
+ expect(result[0].score).toBeLessThan(100);
805
+ });
806
+ });
807
+ });