@afterxleep/doc-bot 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,807 @@
|
|
|
1
|
+
const { DocumentIndex } = require('../DocumentIndex');
|
|
2
|
+
|
|
3
|
+
describe('DocumentIndex', () => {
|
|
4
|
+
let documentIndex;
|
|
5
|
+
let mockDocuments;
|
|
6
|
+
|
|
7
|
+
beforeEach(() => {
|
|
8
|
+
documentIndex = new DocumentIndex();
|
|
9
|
+
mockDocuments = [
|
|
10
|
+
{
|
|
11
|
+
fileName: 'react-guide.md',
|
|
12
|
+
content: 'React components are the building blocks of React applications.',
|
|
13
|
+
metadata: {
|
|
14
|
+
title: 'React Component Guide',
|
|
15
|
+
keywords: ['react', 'components', 'jsx'],
|
|
16
|
+
tags: ['frontend', 'ui'],
|
|
17
|
+
category: 'development'
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
fileName: 'testing.md',
|
|
22
|
+
content: 'Testing is crucial for reliable software. Use Jest for unit tests.',
|
|
23
|
+
metadata: {
|
|
24
|
+
title: 'Testing Guide',
|
|
25
|
+
keywords: ['testing', 'jest', 'unit-tests'],
|
|
26
|
+
tags: ['quality', 'testing'],
|
|
27
|
+
category: 'development'
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
];
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
describe('constructor', () => {
|
|
34
|
+
it('should initialize with empty indexes', () => {
|
|
35
|
+
expect(documentIndex.keywordIndex).toBeInstanceOf(Map);
|
|
36
|
+
expect(documentIndex.topicIndex).toBeInstanceOf(Map);
|
|
37
|
+
expect(documentIndex.patternIndex).toBeInstanceOf(Map);
|
|
38
|
+
expect(documentIndex.extensionIndex).toBeInstanceOf(Map);
|
|
39
|
+
expect(documentIndex.keywordIndex.size).toBe(0);
|
|
40
|
+
expect(documentIndex.topicIndex.size).toBe(0);
|
|
41
|
+
expect(documentIndex.patternIndex.size).toBe(0);
|
|
42
|
+
expect(documentIndex.extensionIndex.size).toBe(0);
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
describe('buildIndexes', () => {
|
|
47
|
+
it('should build indexes from provided documents', async () => {
|
|
48
|
+
await documentIndex.buildIndexes(mockDocuments);
|
|
49
|
+
|
|
50
|
+
expect(documentIndex.keywordIndex.size).toBeGreaterThan(0);
|
|
51
|
+
expect(documentIndex.topicIndex.size).toBeGreaterThan(0);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it('should handle empty document array', async () => {
|
|
55
|
+
await documentIndex.buildIndexes([]);
|
|
56
|
+
|
|
57
|
+
expect(documentIndex.keywordIndex.size).toBe(0);
|
|
58
|
+
expect(documentIndex.topicIndex.size).toBe(0);
|
|
59
|
+
expect(documentIndex.patternIndex.size).toBe(0);
|
|
60
|
+
expect(documentIndex.extensionIndex.size).toBe(0);
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
describe('indexDocument', () => {
|
|
65
|
+
it('should index keywords from metadata', async () => {
|
|
66
|
+
const document = {
|
|
67
|
+
fileName: 'test.md',
|
|
68
|
+
metadata: {
|
|
69
|
+
keywords: ['javascript', 'node', 'backend']
|
|
70
|
+
}
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
await documentIndex.indexDocument(document);
|
|
74
|
+
|
|
75
|
+
expect(documentIndex.keywordIndex.has('javascript')).toBe(true);
|
|
76
|
+
expect(documentIndex.keywordIndex.has('node')).toBe(true);
|
|
77
|
+
expect(documentIndex.keywordIndex.has('backend')).toBe(true);
|
|
78
|
+
const javascriptEntries = documentIndex.keywordIndex.get('javascript');
|
|
79
|
+
expect(javascriptEntries.some(entry => entry.document === document)).toBe(true);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it('should handle single keyword as string', async () => {
|
|
83
|
+
const document = {
|
|
84
|
+
fileName: 'test.md',
|
|
85
|
+
metadata: {
|
|
86
|
+
keywords: 'python'
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
await documentIndex.indexDocument(document);
|
|
91
|
+
|
|
92
|
+
expect(documentIndex.keywordIndex.has('python')).toBe(true);
|
|
93
|
+
const pythonEntries = documentIndex.keywordIndex.get('python');
|
|
94
|
+
expect(pythonEntries.some(entry => entry.document === document)).toBe(true);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it('should index tags in topic index', async () => {
|
|
98
|
+
const document = {
|
|
99
|
+
fileName: 'test.md',
|
|
100
|
+
metadata: {
|
|
101
|
+
tags: ['frontend', 'ui', 'design']
|
|
102
|
+
}
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
await documentIndex.indexDocument(document);
|
|
106
|
+
|
|
107
|
+
expect(documentIndex.topicIndex.has('frontend')).toBe(true);
|
|
108
|
+
expect(documentIndex.topicIndex.has('ui')).toBe(true);
|
|
109
|
+
expect(documentIndex.topicIndex.has('design')).toBe(true);
|
|
110
|
+
const frontendEntries = documentIndex.topicIndex.get('frontend');
|
|
111
|
+
expect(frontendEntries.some(entry => entry.document === document)).toBe(true);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it('should handle single tag as string', async () => {
|
|
115
|
+
const document = {
|
|
116
|
+
fileName: 'test.md',
|
|
117
|
+
metadata: {
|
|
118
|
+
tags: 'database'
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
await documentIndex.indexDocument(document);
|
|
123
|
+
|
|
124
|
+
expect(documentIndex.topicIndex.has('database')).toBe(true);
|
|
125
|
+
const databaseEntries = documentIndex.topicIndex.get('database');
|
|
126
|
+
expect(databaseEntries.some(entry => entry.document === document)).toBe(true);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it('should index category in topic index', async () => {
|
|
130
|
+
const document = {
|
|
131
|
+
fileName: 'test.md',
|
|
132
|
+
metadata: {
|
|
133
|
+
category: 'Architecture'
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
await documentIndex.indexDocument(document);
|
|
138
|
+
|
|
139
|
+
expect(documentIndex.topicIndex.has('architecture')).toBe(true);
|
|
140
|
+
const architectureEntries = documentIndex.topicIndex.get('architecture');
|
|
141
|
+
expect(architectureEntries.some(entry => entry.document === document)).toBe(true);
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it('should handle documents without metadata', async () => {
|
|
145
|
+
const document = {
|
|
146
|
+
fileName: 'test.md'
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
await documentIndex.indexDocument(document);
|
|
150
|
+
|
|
151
|
+
expect(documentIndex.keywordIndex.size).toBe(0);
|
|
152
|
+
expect(documentIndex.topicIndex.size).toBe(0);
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it('should handle empty metadata', async () => {
|
|
156
|
+
const document = {
|
|
157
|
+
fileName: 'test.md',
|
|
158
|
+
metadata: {}
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
await documentIndex.indexDocument(document);
|
|
162
|
+
|
|
163
|
+
expect(documentIndex.keywordIndex.size).toBe(0);
|
|
164
|
+
expect(documentIndex.topicIndex.size).toBe(0);
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it('should index keywords case-insensitively', async () => {
|
|
168
|
+
const document = {
|
|
169
|
+
fileName: 'test.md',
|
|
170
|
+
metadata: {
|
|
171
|
+
keywords: ['JavaScript', 'REACT', 'Node.js']
|
|
172
|
+
}
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
await documentIndex.indexDocument(document);
|
|
176
|
+
|
|
177
|
+
expect(documentIndex.keywordIndex.has('javascript')).toBe(true);
|
|
178
|
+
expect(documentIndex.keywordIndex.has('react')).toBe(true);
|
|
179
|
+
expect(documentIndex.keywordIndex.has('node.js')).toBe(true);
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
it('should allow multiple documents for same keyword', async () => {
|
|
183
|
+
const doc1 = {
|
|
184
|
+
fileName: 'react-basics.md',
|
|
185
|
+
metadata: { keywords: ['react'] }
|
|
186
|
+
};
|
|
187
|
+
const doc2 = {
|
|
188
|
+
fileName: 'react-advanced.md',
|
|
189
|
+
metadata: { keywords: ['react'] }
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
await documentIndex.indexDocument(doc1);
|
|
193
|
+
await documentIndex.indexDocument(doc2);
|
|
194
|
+
|
|
195
|
+
const reactEntries = documentIndex.keywordIndex.get('react');
|
|
196
|
+
expect(reactEntries).toHaveLength(2);
|
|
197
|
+
expect(reactEntries.some(entry => entry.document === doc1)).toBe(true);
|
|
198
|
+
expect(reactEntries.some(entry => entry.document === doc2)).toBe(true);
|
|
199
|
+
});
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
describe('findRelevantDocs', () => {
|
|
203
|
+
beforeEach(async () => {
|
|
204
|
+
await documentIndex.buildIndexes(mockDocuments);
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it('should return empty array when no context provided', () => {
|
|
208
|
+
const result = documentIndex.findRelevantDocs({});
|
|
209
|
+
expect(result).toEqual([]);
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
it('should return documents based on query keywords', () => {
|
|
213
|
+
const context = { query: 'react components' };
|
|
214
|
+
const result = documentIndex.findRelevantDocs(context);
|
|
215
|
+
|
|
216
|
+
expect(result).toBeInstanceOf(Array);
|
|
217
|
+
expect(result.length).toBeGreaterThan(0);
|
|
218
|
+
expect(result[0]).toHaveProperty('document');
|
|
219
|
+
expect(result[0]).toHaveProperty('score');
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
it('should return scored and ranked results', () => {
|
|
223
|
+
const context = { query: 'testing' };
|
|
224
|
+
const result = documentIndex.findRelevantDocs(context);
|
|
225
|
+
|
|
226
|
+
// Results should be sorted by score (descending)
|
|
227
|
+
for (let i = 0; i < result.length - 1; i++) {
|
|
228
|
+
expect(result[i].score).toBeGreaterThanOrEqual(result[i + 1].score);
|
|
229
|
+
}
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
it('should find documents by exact keyword match', async () => {
|
|
233
|
+
// Create a fresh index with no content to test exact scoring
|
|
234
|
+
const testIndex = new DocumentIndex();
|
|
235
|
+
const testDoc = {
|
|
236
|
+
fileName: 'clean-test.md',
|
|
237
|
+
metadata: { keywords: ['react'] }
|
|
238
|
+
};
|
|
239
|
+
await testIndex.indexDocument(testDoc);
|
|
240
|
+
|
|
241
|
+
const context = { query: 'react' };
|
|
242
|
+
const result = testIndex.findRelevantDocs(context);
|
|
243
|
+
|
|
244
|
+
expect(result.length).toBe(1);
|
|
245
|
+
expect(result[0].document.fileName).toBe('clean-test.md');
|
|
246
|
+
expect(result[0].score).toBe(10); // High score for exact keyword match
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
it('should find documents by topic match', async () => {
|
|
250
|
+
// Create a fresh index with no content to test exact scoring
|
|
251
|
+
const testIndex = new DocumentIndex();
|
|
252
|
+
const testDoc = {
|
|
253
|
+
fileName: 'clean-test.md',
|
|
254
|
+
metadata: { tags: ['frontend'] }
|
|
255
|
+
};
|
|
256
|
+
await testIndex.indexDocument(testDoc);
|
|
257
|
+
|
|
258
|
+
const context = { query: 'frontend' };
|
|
259
|
+
const result = testIndex.findRelevantDocs(context);
|
|
260
|
+
|
|
261
|
+
expect(result.length).toBe(1);
|
|
262
|
+
expect(result[0].document.fileName).toBe('clean-test.md');
|
|
263
|
+
expect(result[0].score).toBe(5); // Medium score for topic match
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
it('should combine scores for multiple matches', async () => {
|
|
267
|
+
// Create a fresh index with no content to test exact scoring
|
|
268
|
+
const testIndex = new DocumentIndex();
|
|
269
|
+
const testDoc = {
|
|
270
|
+
fileName: 'clean-test.md',
|
|
271
|
+
metadata: {
|
|
272
|
+
keywords: ['react'],
|
|
273
|
+
tags: ['frontend']
|
|
274
|
+
}
|
|
275
|
+
};
|
|
276
|
+
await testIndex.indexDocument(testDoc);
|
|
277
|
+
|
|
278
|
+
const context = { query: 'react frontend' };
|
|
279
|
+
const result = testIndex.findRelevantDocs(context);
|
|
280
|
+
|
|
281
|
+
expect(result.length).toBe(1);
|
|
282
|
+
expect(result[0].document.fileName).toBe('clean-test.md');
|
|
283
|
+
expect(result[0].score).toBe(15); // 10 (keyword) + 5 (topic)
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
it('should handle case-insensitive queries', () => {
|
|
287
|
+
const context = { query: 'REACT Components' };
|
|
288
|
+
const result = documentIndex.findRelevantDocs(context);
|
|
289
|
+
|
|
290
|
+
expect(result.length).toBe(1);
|
|
291
|
+
expect(result[0].document.fileName).toBe('react-guide.md');
|
|
292
|
+
});
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
describe('content keyword extraction', () => {
|
|
296
|
+
it('should extract keywords from code blocks', async () => {
|
|
297
|
+
const document = {
|
|
298
|
+
fileName: 'api-guide.md',
|
|
299
|
+
content: `
|
|
300
|
+
# API Guide
|
|
301
|
+
|
|
302
|
+
\`\`\`javascript
|
|
303
|
+
const express = require('express');
|
|
304
|
+
const mongoose = require('mongoose');
|
|
305
|
+
app.use(bodyParser.json());
|
|
306
|
+
\`\`\`
|
|
307
|
+
|
|
308
|
+
\`\`\`python
|
|
309
|
+
import flask
|
|
310
|
+
from sqlalchemy import create_engine
|
|
311
|
+
\`\`\`
|
|
312
|
+
`,
|
|
313
|
+
metadata: {}
|
|
314
|
+
};
|
|
315
|
+
|
|
316
|
+
await documentIndex.indexDocument(document);
|
|
317
|
+
|
|
318
|
+
// Should extract technical terms from code blocks
|
|
319
|
+
expect(documentIndex.keywordIndex.has('express')).toBe(true);
|
|
320
|
+
expect(documentIndex.keywordIndex.has('mongoose')).toBe(true);
|
|
321
|
+
expect(documentIndex.keywordIndex.has('bodyparser')).toBe(true);
|
|
322
|
+
expect(documentIndex.keywordIndex.has('flask')).toBe(true);
|
|
323
|
+
expect(documentIndex.keywordIndex.has('sqlalchemy')).toBe(true);
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
it('should extract keywords from headings', async () => {
|
|
327
|
+
const document = {
|
|
328
|
+
fileName: 'deployment-guide.md',
|
|
329
|
+
content: `
|
|
330
|
+
# Docker Deployment Guide
|
|
331
|
+
|
|
332
|
+
## Setting up Kubernetes
|
|
333
|
+
|
|
334
|
+
### Using Terraform for Infrastructure
|
|
335
|
+
|
|
336
|
+
#### CI/CD Pipeline Configuration
|
|
337
|
+
`,
|
|
338
|
+
metadata: {}
|
|
339
|
+
};
|
|
340
|
+
|
|
341
|
+
await documentIndex.indexDocument(document);
|
|
342
|
+
|
|
343
|
+
expect(documentIndex.keywordIndex.has('docker')).toBe(true);
|
|
344
|
+
expect(documentIndex.keywordIndex.has('kubernetes')).toBe(true);
|
|
345
|
+
expect(documentIndex.keywordIndex.has('terraform')).toBe(true);
|
|
346
|
+
expect(documentIndex.keywordIndex.has('ci/cd')).toBe(true);
|
|
347
|
+
expect(documentIndex.keywordIndex.has('pipeline')).toBe(true);
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
it('should extract file extensions mentioned in content', async () => {
|
|
351
|
+
const document = {
|
|
352
|
+
fileName: 'project-structure.md',
|
|
353
|
+
content: `
|
|
354
|
+
Files in this project:
|
|
355
|
+
- *.js files for JavaScript
|
|
356
|
+
- *.py files for Python
|
|
357
|
+
- *.md files for documentation
|
|
358
|
+
- *.json files for configuration
|
|
359
|
+
`,
|
|
360
|
+
metadata: {}
|
|
361
|
+
};
|
|
362
|
+
|
|
363
|
+
await documentIndex.indexDocument(document);
|
|
364
|
+
|
|
365
|
+
expect(documentIndex.extensionIndex.has('js')).toBe(true);
|
|
366
|
+
expect(documentIndex.extensionIndex.has('py')).toBe(true);
|
|
367
|
+
expect(documentIndex.extensionIndex.has('md')).toBe(true);
|
|
368
|
+
expect(documentIndex.extensionIndex.has('json')).toBe(true);
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
it('should extract framework and library names', async () => {
|
|
372
|
+
const document = {
|
|
373
|
+
fileName: 'tech-stack.md',
|
|
374
|
+
content: `
|
|
375
|
+
Our tech stack includes:
|
|
376
|
+
- React for frontend
|
|
377
|
+
- Node.js for backend
|
|
378
|
+
- PostgreSQL for database
|
|
379
|
+
- Redis for caching
|
|
380
|
+
- AWS for cloud infrastructure
|
|
381
|
+
`,
|
|
382
|
+
metadata: {}
|
|
383
|
+
};
|
|
384
|
+
|
|
385
|
+
await documentIndex.indexDocument(document);
|
|
386
|
+
|
|
387
|
+
expect(documentIndex.keywordIndex.has('react')).toBe(true);
|
|
388
|
+
expect(documentIndex.keywordIndex.has('node.js')).toBe(true);
|
|
389
|
+
expect(documentIndex.keywordIndex.has('postgresql')).toBe(true);
|
|
390
|
+
expect(documentIndex.keywordIndex.has('redis')).toBe(true);
|
|
391
|
+
expect(documentIndex.keywordIndex.has('aws')).toBe(true);
|
|
392
|
+
});
|
|
393
|
+
|
|
394
|
+
it('should not extract common words', async () => {
|
|
395
|
+
const document = {
|
|
396
|
+
fileName: 'guide.md',
|
|
397
|
+
content: `
|
|
398
|
+
This is a guide that explains how to use the system.
|
|
399
|
+
The system is very useful and helps developers.
|
|
400
|
+
`,
|
|
401
|
+
metadata: {}
|
|
402
|
+
};
|
|
403
|
+
|
|
404
|
+
await documentIndex.indexDocument(document);
|
|
405
|
+
|
|
406
|
+
// Should not extract common words
|
|
407
|
+
expect(documentIndex.keywordIndex.has('this')).toBe(false);
|
|
408
|
+
expect(documentIndex.keywordIndex.has('is')).toBe(false);
|
|
409
|
+
expect(documentIndex.keywordIndex.has('a')).toBe(false);
|
|
410
|
+
expect(documentIndex.keywordIndex.has('the')).toBe(false);
|
|
411
|
+
expect(documentIndex.keywordIndex.has('and')).toBe(false);
|
|
412
|
+
expect(documentIndex.keywordIndex.has('to')).toBe(false);
|
|
413
|
+
expect(documentIndex.keywordIndex.has('how')).toBe(false);
|
|
414
|
+
});
|
|
415
|
+
|
|
416
|
+
it('should score content keywords lower than metadata keywords', async () => {
|
|
417
|
+
const docWithMetadata = {
|
|
418
|
+
fileName: 'meta-doc.md',
|
|
419
|
+
content: 'Some content about React',
|
|
420
|
+
metadata: {
|
|
421
|
+
keywords: ['react']
|
|
422
|
+
}
|
|
423
|
+
};
|
|
424
|
+
|
|
425
|
+
const docWithContentOnly = {
|
|
426
|
+
fileName: 'content-doc.md',
|
|
427
|
+
content: 'This document talks about React development',
|
|
428
|
+
metadata: {}
|
|
429
|
+
};
|
|
430
|
+
|
|
431
|
+
await documentIndex.indexDocument(docWithMetadata);
|
|
432
|
+
await documentIndex.indexDocument(docWithContentOnly);
|
|
433
|
+
|
|
434
|
+
const context = { query: 'react' };
|
|
435
|
+
const result = documentIndex.findRelevantDocs(context);
|
|
436
|
+
|
|
437
|
+
// Document with metadata keywords should score higher
|
|
438
|
+
expect(result[0].document.fileName).toBe('meta-doc.md');
|
|
439
|
+
expect(result[0].score).toBeGreaterThan(result[1].score);
|
|
440
|
+
});
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
describe('code pattern indexing', () => {
|
|
444
|
+
it('should index common code patterns', async () => {
|
|
445
|
+
const document = {
|
|
446
|
+
fileName: 'patterns-guide.md',
|
|
447
|
+
content: `
|
|
448
|
+
# Common Patterns
|
|
449
|
+
|
|
450
|
+
## React Hooks
|
|
451
|
+
\`\`\`javascript
|
|
452
|
+
const [state, setState] = useState();
|
|
453
|
+
useEffect(() => {});
|
|
454
|
+
\`\`\`
|
|
455
|
+
|
|
456
|
+
## Express Routes
|
|
457
|
+
\`\`\`javascript
|
|
458
|
+
app.get('/api/users', (req, res) => {});
|
|
459
|
+
app.post('/api/data', handler);
|
|
460
|
+
\`\`\`
|
|
461
|
+
|
|
462
|
+
## Testing
|
|
463
|
+
\`\`\`javascript
|
|
464
|
+
describe('Component', () => {
|
|
465
|
+
it('should render', () => {});
|
|
466
|
+
});
|
|
467
|
+
\`\`\`
|
|
468
|
+
`,
|
|
469
|
+
metadata: {}
|
|
470
|
+
};
|
|
471
|
+
|
|
472
|
+
await documentIndex.indexDocument(document);
|
|
473
|
+
|
|
474
|
+
expect(documentIndex.patternIndex.has('useState')).toBe(true);
|
|
475
|
+
expect(documentIndex.patternIndex.has('useEffect')).toBe(true);
|
|
476
|
+
expect(documentIndex.patternIndex.has('app.get')).toBe(true);
|
|
477
|
+
expect(documentIndex.patternIndex.has('app.post')).toBe(true);
|
|
478
|
+
expect(documentIndex.patternIndex.has('describe(')).toBe(true);
|
|
479
|
+
expect(documentIndex.patternIndex.has('it(')).toBe(true);
|
|
480
|
+
});
|
|
481
|
+
|
|
482
|
+
it('should find documents by code patterns in codeSnippet context', async () => {
|
|
483
|
+
const testIndex = new DocumentIndex();
|
|
484
|
+
const document = {
|
|
485
|
+
fileName: 'react-hooks.md',
|
|
486
|
+
content: `
|
|
487
|
+
# React Hooks Guide
|
|
488
|
+
\`\`\`javascript
|
|
489
|
+
const [count, setCount] = useState(0);
|
|
490
|
+
\`\`\`
|
|
491
|
+
`,
|
|
492
|
+
metadata: {}
|
|
493
|
+
};
|
|
494
|
+
|
|
495
|
+
await testIndex.indexDocument(document);
|
|
496
|
+
|
|
497
|
+
const context = { codeSnippet: 'const [value, setValue] = useState(10);' };
|
|
498
|
+
const result = testIndex.findRelevantDocs(context);
|
|
499
|
+
|
|
500
|
+
expect(result.length).toBeGreaterThan(0);
|
|
501
|
+
expect(result[0].document.fileName).toBe('react-hooks.md');
|
|
502
|
+
expect(result[0].score).toBe(8); // Pattern match score
|
|
503
|
+
});
|
|
504
|
+
|
|
505
|
+
it('should index Python patterns', async () => {
|
|
506
|
+
const document = {
|
|
507
|
+
fileName: 'python-patterns.md',
|
|
508
|
+
content: `
|
|
509
|
+
# Python Patterns
|
|
510
|
+
|
|
511
|
+
\`\`\`python
|
|
512
|
+
def my_function():
|
|
513
|
+
pass
|
|
514
|
+
|
|
515
|
+
class MyClass:
|
|
516
|
+
def __init__(self):
|
|
517
|
+
pass
|
|
518
|
+
|
|
519
|
+
if __name__ == '__main__':
|
|
520
|
+
pass
|
|
521
|
+
\`\`\`
|
|
522
|
+
`,
|
|
523
|
+
metadata: {}
|
|
524
|
+
};
|
|
525
|
+
|
|
526
|
+
await documentIndex.indexDocument(document);
|
|
527
|
+
|
|
528
|
+
expect(documentIndex.patternIndex.has('def ')).toBe(true);
|
|
529
|
+
expect(documentIndex.patternIndex.has('class ')).toBe(true);
|
|
530
|
+
expect(documentIndex.patternIndex.has('__init__')).toBe(true);
|
|
531
|
+
expect(documentIndex.patternIndex.has('if __name__')).toBe(true);
|
|
532
|
+
});
|
|
533
|
+
|
|
534
|
+
it('should index SQL patterns', async () => {
|
|
535
|
+
const document = {
|
|
536
|
+
fileName: 'sql-guide.md',
|
|
537
|
+
content: `
|
|
538
|
+
# SQL Guide
|
|
539
|
+
|
|
540
|
+
\`\`\`sql
|
|
541
|
+
SELECT * FROM users WHERE id = 1;
|
|
542
|
+
INSERT INTO products (name, price) VALUES ('item', 10.99);
|
|
543
|
+
UPDATE users SET name = 'new' WHERE id = 1;
|
|
544
|
+
DELETE FROM logs WHERE date < '2023-01-01';
|
|
545
|
+
\`\`\`
|
|
546
|
+
`,
|
|
547
|
+
metadata: {}
|
|
548
|
+
};
|
|
549
|
+
|
|
550
|
+
await documentIndex.indexDocument(document);
|
|
551
|
+
|
|
552
|
+
expect(documentIndex.patternIndex.has('SELECT')).toBe(true);
|
|
553
|
+
expect(documentIndex.patternIndex.has('INSERT INTO')).toBe(true);
|
|
554
|
+
expect(documentIndex.patternIndex.has('UPDATE')).toBe(true);
|
|
555
|
+
expect(documentIndex.patternIndex.has('DELETE FROM')).toBe(true);
|
|
556
|
+
});
|
|
557
|
+
|
|
558
|
+
it('should handle multiple patterns in same document', async () => {
|
|
559
|
+
const document = {
|
|
560
|
+
fileName: 'multi-patterns.md',
|
|
561
|
+
content: `
|
|
562
|
+
\`\`\`javascript
|
|
563
|
+
useState();
|
|
564
|
+
useEffect();
|
|
565
|
+
\`\`\`
|
|
566
|
+
|
|
567
|
+
\`\`\`python
|
|
568
|
+
def func():
|
|
569
|
+
pass
|
|
570
|
+
\`\`\`
|
|
571
|
+
`,
|
|
572
|
+
metadata: {}
|
|
573
|
+
};
|
|
574
|
+
|
|
575
|
+
await documentIndex.indexDocument(document);
|
|
576
|
+
|
|
577
|
+
const useStateEntries = documentIndex.patternIndex.get('useState');
|
|
578
|
+
const defEntries = documentIndex.patternIndex.get('def ');
|
|
579
|
+
|
|
580
|
+
expect(useStateEntries.some(entry => entry.document === document)).toBe(true);
|
|
581
|
+
expect(defEntries.some(entry => entry.document === document)).toBe(true);
|
|
582
|
+
});
|
|
583
|
+
|
|
584
|
+
it('should match patterns case-insensitively for SQL', async () => {
|
|
585
|
+
const testIndex = new DocumentIndex();
|
|
586
|
+
const document = {
|
|
587
|
+
fileName: 'sql-doc.md',
|
|
588
|
+
content: '```sql\nselect * from users;\n```',
|
|
589
|
+
metadata: {}
|
|
590
|
+
};
|
|
591
|
+
|
|
592
|
+
await testIndex.indexDocument(document);
|
|
593
|
+
|
|
594
|
+
const context = { codeSnippet: 'SELECT name FROM products;' };
|
|
595
|
+
const result = testIndex.findRelevantDocs(context);
|
|
596
|
+
|
|
597
|
+
expect(result.length).toBeGreaterThan(0);
|
|
598
|
+
expect(result[0].document.fileName).toBe('sql-doc.md');
|
|
599
|
+
});
|
|
600
|
+
});
|
|
601
|
+
|
|
602
|
+
describe('smart inference and relevance scoring', () => {
|
|
603
|
+
it('should provide higher relevance for comprehensive context', async () => {
|
|
604
|
+
const testIndex = new DocumentIndex();
|
|
605
|
+
|
|
606
|
+
// Document with multiple matching signals
|
|
607
|
+
const comprehensiveDoc = {
|
|
608
|
+
fileName: 'comprehensive-react.md',
|
|
609
|
+
content: `
|
|
610
|
+
# React Component Testing
|
|
611
|
+
|
|
612
|
+
\`\`\`javascript
|
|
613
|
+
import { render } from '@testing-library/react';
|
|
614
|
+
const [state, setState] = useState();
|
|
615
|
+
\`\`\`
|
|
616
|
+
`,
|
|
617
|
+
metadata: {
|
|
618
|
+
keywords: ['react', 'testing'],
|
|
619
|
+
tags: ['frontend', 'testing'],
|
|
620
|
+
category: 'development'
|
|
621
|
+
}
|
|
622
|
+
};
|
|
623
|
+
|
|
624
|
+
// Document with fewer matching signals
|
|
625
|
+
const basicDoc = {
|
|
626
|
+
fileName: 'basic-react.md',
|
|
627
|
+
content: 'Basic React information',
|
|
628
|
+
metadata: {
|
|
629
|
+
keywords: ['react']
|
|
630
|
+
}
|
|
631
|
+
};
|
|
632
|
+
|
|
633
|
+
await testIndex.indexDocument(comprehensiveDoc);
|
|
634
|
+
await testIndex.indexDocument(basicDoc);
|
|
635
|
+
|
|
636
|
+
const context = {
|
|
637
|
+
query: 'react testing',
|
|
638
|
+
codeSnippet: 'useState()',
|
|
639
|
+
filePath: 'src/components/Button.jsx'
|
|
640
|
+
};
|
|
641
|
+
const result = testIndex.findRelevantDocs(context);
|
|
642
|
+
|
|
643
|
+
expect(result.length).toBe(2);
|
|
644
|
+
expect(result[0].document.fileName).toBe('comprehensive-react.md');
|
|
645
|
+
expect(result[0].score).toBeGreaterThan(result[1].score);
|
|
646
|
+
});
|
|
647
|
+
|
|
648
|
+
it('should handle file extension inference', async () => {
|
|
649
|
+
const testIndex = new DocumentIndex();
|
|
650
|
+
|
|
651
|
+
const jsDoc = {
|
|
652
|
+
fileName: 'js-guide.md',
|
|
653
|
+
content: '*.js files contain JavaScript code',
|
|
654
|
+
metadata: {}
|
|
655
|
+
};
|
|
656
|
+
|
|
657
|
+
await testIndex.indexDocument(jsDoc);
|
|
658
|
+
|
|
659
|
+
const context = { filePath: 'src/utils/helper.js' };
|
|
660
|
+
const result = testIndex.findRelevantDocs(context);
|
|
661
|
+
|
|
662
|
+
expect(result.length).toBeGreaterThan(0);
|
|
663
|
+
expect(result[0].document.fileName).toBe('js-guide.md');
|
|
664
|
+
});
|
|
665
|
+
|
|
666
|
+
it('should combine multiple scoring factors appropriately', async () => {
|
|
667
|
+
const testIndex = new DocumentIndex();
|
|
668
|
+
|
|
669
|
+
const multiFactorDoc = {
|
|
670
|
+
fileName: 'multi-factor.md',
|
|
671
|
+
content: `
|
|
672
|
+
# React Testing Guide
|
|
673
|
+
|
|
674
|
+
\`\`\`javascript
|
|
675
|
+
describe('Component', () => {
|
|
676
|
+
const [state] = useState();
|
|
677
|
+
});
|
|
678
|
+
\`\`\`
|
|
679
|
+
|
|
680
|
+
Files: *.test.js
|
|
681
|
+
`,
|
|
682
|
+
metadata: {
|
|
683
|
+
keywords: ['react', 'testing'],
|
|
684
|
+
tags: ['frontend'],
|
|
685
|
+
category: 'testing'
|
|
686
|
+
}
|
|
687
|
+
};
|
|
688
|
+
|
|
689
|
+
await testIndex.indexDocument(multiFactorDoc);
|
|
690
|
+
|
|
691
|
+
const context = {
|
|
692
|
+
query: 'react testing frontend',
|
|
693
|
+
codeSnippet: 'useState() describe(',
|
|
694
|
+
filePath: 'tests/Button.test.js'
|
|
695
|
+
};
|
|
696
|
+
const result = testIndex.findRelevantDocs(context);
|
|
697
|
+
|
|
698
|
+
expect(result.length).toBe(1);
|
|
699
|
+
// Should have high score from multiple factors:
|
|
700
|
+
// - Keywords: react (10) + testing (10)
|
|
701
|
+
// - Topics: frontend (5)
|
|
702
|
+
// - Patterns: useState (6) + describe( (6)
|
|
703
|
+
// - Extension: test.js (3)
|
|
704
|
+
expect(result[0].score).toBeGreaterThan(35);
|
|
705
|
+
});
|
|
706
|
+
|
|
707
|
+
it('should rank documents by relevance score', async () => {
|
|
708
|
+
const testIndex = new DocumentIndex();
|
|
709
|
+
|
|
710
|
+
const docs = [
|
|
711
|
+
{
|
|
712
|
+
fileName: 'high-relevance.md',
|
|
713
|
+
metadata: { keywords: ['javascript', 'react'], tags: ['frontend'] },
|
|
714
|
+
content: '```javascript\nconst [state] = useState();\n```'
|
|
715
|
+
},
|
|
716
|
+
{
|
|
717
|
+
fileName: 'medium-relevance.md',
|
|
718
|
+
metadata: { keywords: ['javascript'] },
|
|
719
|
+
content: 'Basic JavaScript information'
|
|
720
|
+
},
|
|
721
|
+
{
|
|
722
|
+
fileName: 'low-relevance.md',
|
|
723
|
+
metadata: { tags: ['backend'] },
|
|
724
|
+
content: 'Server-side development'
|
|
725
|
+
}
|
|
726
|
+
];
|
|
727
|
+
|
|
728
|
+
for (const doc of docs) {
|
|
729
|
+
await testIndex.indexDocument(doc);
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
const context = { query: 'javascript react', codeSnippet: 'useState()' };
|
|
733
|
+
const result = testIndex.findRelevantDocs(context);
|
|
734
|
+
|
|
735
|
+
expect(result.length).toBe(2); // Only docs matching query should be returned
|
|
736
|
+
expect(result[0].document.fileName).toBe('high-relevance.md');
|
|
737
|
+
expect(result[1].document.fileName).toBe('medium-relevance.md');
|
|
738
|
+
expect(result[0].score).toBeGreaterThan(result[1].score);
|
|
739
|
+
});
|
|
740
|
+
|
|
741
|
+
it('should handle edge cases gracefully', async () => {
|
|
742
|
+
const testIndex = new DocumentIndex();
|
|
743
|
+
|
|
744
|
+
// Empty context
|
|
745
|
+
expect(testIndex.findRelevantDocs({})).toEqual([]);
|
|
746
|
+
|
|
747
|
+
// Context with undefined values
|
|
748
|
+
const result1 = testIndex.findRelevantDocs({
|
|
749
|
+
query: undefined,
|
|
750
|
+
codeSnippet: null,
|
|
751
|
+
filePath: ''
|
|
752
|
+
});
|
|
753
|
+
expect(result1).toEqual([]);
|
|
754
|
+
|
|
755
|
+
// Very long query
|
|
756
|
+
const longQuery = 'word '.repeat(1000);
|
|
757
|
+
const result2 = testIndex.findRelevantDocs({ query: longQuery });
|
|
758
|
+
expect(result2).toEqual([]);
|
|
759
|
+
});
|
|
760
|
+
|
|
761
|
+
it('should provide confidence scoring', async () => {
|
|
762
|
+
const testIndex = new DocumentIndex();
|
|
763
|
+
|
|
764
|
+
const doc = {
|
|
765
|
+
fileName: 'test-doc.md',
|
|
766
|
+
metadata: { keywords: ['testing'] },
|
|
767
|
+
content: 'Testing information'
|
|
768
|
+
};
|
|
769
|
+
|
|
770
|
+
await testIndex.indexDocument(doc);
|
|
771
|
+
|
|
772
|
+
// High confidence context (exact keyword match)
|
|
773
|
+
const highConfidenceContext = { query: 'testing' };
|
|
774
|
+
const highResult = testIndex.findRelevantDocs(highConfidenceContext);
|
|
775
|
+
|
|
776
|
+
// Lower confidence context (partial match)
|
|
777
|
+
const lowConfidenceContext = { query: 'software development' };
|
|
778
|
+
const lowResult = testIndex.findRelevantDocs(lowConfidenceContext);
|
|
779
|
+
|
|
780
|
+
expect(highResult.length).toBeGreaterThan(0);
|
|
781
|
+
expect(lowResult.length).toBe(0);
|
|
782
|
+
});
|
|
783
|
+
|
|
784
|
+
it('should handle duplicate documents correctly', async () => {
|
|
785
|
+
const testIndex = new DocumentIndex();
|
|
786
|
+
|
|
787
|
+
const doc = {
|
|
788
|
+
fileName: 'duplicate-test.md',
|
|
789
|
+
metadata: {
|
|
790
|
+
keywords: ['react', 'react'], // Duplicate keywords
|
|
791
|
+
tags: ['frontend', 'frontend'] // Duplicate tags
|
|
792
|
+
},
|
|
793
|
+
content: 'React React React' // Repeated content
|
|
794
|
+
};
|
|
795
|
+
|
|
796
|
+
await testIndex.indexDocument(doc);
|
|
797
|
+
|
|
798
|
+
const context = { query: 'react' };
|
|
799
|
+
const result = testIndex.findRelevantDocs(context);
|
|
800
|
+
|
|
801
|
+
expect(result.length).toBe(1);
|
|
802
|
+
expect(result[0].document.fileName).toBe('duplicate-test.md');
|
|
803
|
+
// Score should still be reasonable despite duplicates
|
|
804
|
+
expect(result[0].score).toBeLessThan(100);
|
|
805
|
+
});
|
|
806
|
+
});
|
|
807
|
+
});
|