@afterxleep/doc-bot 1.5.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +184 -122
- package/bin/doc-bot.js +16 -14
- package/package.json +5 -2
- package/src/index.js +326 -131
- package/src/services/DocumentIndex.js +1 -10
- package/src/services/DocumentationService.js +134 -68
- package/src/services/__tests__/DocumentIndex.test.js +3 -72
- package/src/services/__tests__/InferenceEngine.integration.test.js +0 -3
|
@@ -28,16 +28,7 @@ class DocumentIndex {
|
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
-
// Index topics from
|
|
32
|
-
if (document.metadata?.tags) {
|
|
33
|
-
const tags = Array.isArray(document.metadata.tags)
|
|
34
|
-
? document.metadata.tags
|
|
35
|
-
: [document.metadata.tags];
|
|
36
|
-
|
|
37
|
-
for (const tag of tags) {
|
|
38
|
-
this.addToIndex(this.topicIndex, tag.toLowerCase(), document, 5);
|
|
39
|
-
}
|
|
40
|
-
}
|
|
31
|
+
// Index topics from category (if present)
|
|
41
32
|
|
|
42
33
|
if (document.metadata?.category) {
|
|
43
34
|
this.addToIndex(this.topicIndex, document.metadata.category.toLowerCase(), document, 5);
|
|
@@ -6,7 +6,7 @@ const yaml = require('yaml');
|
|
|
6
6
|
class DocumentationService {
|
|
7
7
|
constructor(docsPath, manifestLoader = null) {
|
|
8
8
|
this.docsPath = docsPath;
|
|
9
|
-
this.manifestLoader = manifestLoader;
|
|
9
|
+
this.manifestLoader = manifestLoader; // Keep for backward compatibility but not required
|
|
10
10
|
this.documents = new Map();
|
|
11
11
|
this.lastScanned = null;
|
|
12
12
|
}
|
|
@@ -94,12 +94,12 @@ class DocumentationService {
|
|
|
94
94
|
return [];
|
|
95
95
|
}
|
|
96
96
|
|
|
97
|
-
const
|
|
97
|
+
const searchTerms = this.parseQuery(query);
|
|
98
98
|
const results = [];
|
|
99
99
|
|
|
100
100
|
for (const doc of this.documents.values()) {
|
|
101
|
-
const score = this.
|
|
102
|
-
if (score > 0) {
|
|
101
|
+
const score = this.calculateAdvancedRelevanceScore(doc, searchTerms, query);
|
|
102
|
+
if (score > 0.1) { // Minimum relevance threshold
|
|
103
103
|
results.push({
|
|
104
104
|
...doc,
|
|
105
105
|
relevanceScore: score
|
|
@@ -111,65 +111,147 @@ class DocumentationService {
|
|
|
111
111
|
return results.sort((a, b) => b.relevanceScore - a.relevanceScore);
|
|
112
112
|
}
|
|
113
113
|
|
|
114
|
-
|
|
115
|
-
|
|
114
|
+
parseQuery(query) {
|
|
115
|
+
// Split by spaces and remove common stop words
|
|
116
|
+
const stopWords = new Set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'how', 'what', 'where', 'when']);
|
|
117
|
+
return query.toLowerCase()
|
|
118
|
+
.split(/\s+/)
|
|
119
|
+
.map(term => term.replace(/[^a-z0-9]/g, '')) // Remove punctuation
|
|
120
|
+
.filter(term => term.length > 1 && !stopWords.has(term));
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
calculateAdvancedRelevanceScore(doc, searchTerms, originalQuery) {
|
|
124
|
+
let totalScore = 0;
|
|
116
125
|
const content = doc.content.toLowerCase();
|
|
117
126
|
const title = (doc.metadata?.title || doc.fileName).toLowerCase();
|
|
127
|
+
const description = (doc.metadata?.description || '').toLowerCase();
|
|
118
128
|
|
|
119
|
-
//
|
|
120
|
-
if (title.includes(
|
|
121
|
-
|
|
129
|
+
// Exact phrase match bonus (highest priority)
|
|
130
|
+
if (content.includes(originalQuery.toLowerCase()) || title.includes(originalQuery.toLowerCase())) {
|
|
131
|
+
totalScore += 20;
|
|
122
132
|
}
|
|
123
133
|
|
|
124
|
-
|
|
125
|
-
const
|
|
126
|
-
score += contentMatches * 2;
|
|
134
|
+
let matchedTerms = 0;
|
|
135
|
+
const termScores = [];
|
|
127
136
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
137
|
+
for (const term of searchTerms) {
|
|
138
|
+
let termScore = 0;
|
|
139
|
+
|
|
140
|
+
// Title matches (highest weight)
|
|
141
|
+
if (title.includes(term)) {
|
|
142
|
+
termScore += 15;
|
|
143
|
+
matchedTerms++;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Description matches (high weight)
|
|
147
|
+
if (description.includes(term)) {
|
|
148
|
+
termScore += 10;
|
|
149
|
+
matchedTerms++;
|
|
150
|
+
}
|
|
133
151
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
152
|
+
// Keyword exact matches (very high weight)
|
|
153
|
+
if (doc.metadata?.keywords) {
|
|
154
|
+
const keywords = Array.isArray(doc.metadata.keywords)
|
|
155
|
+
? doc.metadata.keywords
|
|
156
|
+
: [doc.metadata.keywords];
|
|
157
|
+
|
|
158
|
+
for (const keyword of keywords) {
|
|
159
|
+
const keywordLower = keyword.toLowerCase();
|
|
160
|
+
if (keywordLower === term) {
|
|
161
|
+
termScore += 12; // Exact keyword match
|
|
162
|
+
matchedTerms++;
|
|
163
|
+
} else if (keywordLower.includes(term) || term.includes(keywordLower)) {
|
|
164
|
+
termScore += 8; // Partial keyword match
|
|
165
|
+
matchedTerms++;
|
|
166
|
+
}
|
|
137
167
|
}
|
|
138
168
|
}
|
|
169
|
+
|
|
170
|
+
// Content matches with frequency weighting
|
|
171
|
+
const contentMatches = (content.match(new RegExp(this.escapeRegExp(term), 'g')) || []).length;
|
|
172
|
+
if (contentMatches > 0) {
|
|
173
|
+
termScore += Math.min(contentMatches * 2, 10); // Cap at 10 to prevent spam
|
|
174
|
+
matchedTerms++;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Fuzzy matching for typos (lower weight)
|
|
178
|
+
if (termScore === 0) {
|
|
179
|
+
const fuzzyScore = this.calculateFuzzyMatch(term, [title, description, content.substring(0, 500)].join(' '));
|
|
180
|
+
termScore += fuzzyScore;
|
|
181
|
+
if (fuzzyScore > 0) matchedTerms++;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
termScores.push(termScore);
|
|
139
185
|
}
|
|
140
186
|
|
|
141
|
-
//
|
|
142
|
-
|
|
143
|
-
|
|
187
|
+
// Calculate final score
|
|
188
|
+
totalScore += termScores.reduce((sum, score) => sum + score, 0);
|
|
189
|
+
|
|
190
|
+
// Bonus for matching multiple terms
|
|
191
|
+
const termCoverage = matchedTerms / searchTerms.length;
|
|
192
|
+
totalScore *= (0.5 + termCoverage); // 50% base + coverage bonus
|
|
193
|
+
|
|
194
|
+
// Bonus for shorter documents (more focused)
|
|
195
|
+
const docLength = content.length;
|
|
196
|
+
if (docLength < 2000) {
|
|
197
|
+
totalScore *= 1.1;
|
|
144
198
|
}
|
|
145
199
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
200
|
+
// Normalize score (0-100 scale)
|
|
201
|
+
return Math.min(totalScore / 10, 100);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
escapeRegExp(string) {
|
|
205
|
+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
calculateFuzzyMatch(term, text) {
|
|
209
|
+
// Simple fuzzy matching - check for partial matches
|
|
210
|
+
const words = text.toLowerCase().split(/\s+/);
|
|
211
|
+
let maxScore = 0;
|
|
212
|
+
|
|
213
|
+
for (const word of words) {
|
|
214
|
+
if (word.includes(term) || term.includes(word)) {
|
|
215
|
+
maxScore = Math.max(maxScore, 2);
|
|
216
|
+
} else if (this.levenshteinDistance(term, word) <= 2 && Math.min(term.length, word.length) > 3) {
|
|
217
|
+
maxScore = Math.max(maxScore, 1);
|
|
155
218
|
}
|
|
156
219
|
}
|
|
157
220
|
|
|
158
|
-
return
|
|
221
|
+
return maxScore;
|
|
159
222
|
}
|
|
160
223
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
return [];
|
|
164
|
-
}
|
|
224
|
+
levenshteinDistance(str1, str2) {
|
|
225
|
+
const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
|
|
165
226
|
|
|
166
|
-
|
|
167
|
-
|
|
227
|
+
for (let i = 0; i <= str1.length; i++) matrix[0][i] = i;
|
|
228
|
+
for (let j = 0; j <= str2.length; j++) matrix[j][0] = j;
|
|
168
229
|
|
|
230
|
+
for (let j = 1; j <= str2.length; j++) {
|
|
231
|
+
for (let i = 1; i <= str1.length; i++) {
|
|
232
|
+
const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
|
|
233
|
+
matrix[j][i] = Math.min(
|
|
234
|
+
matrix[j][i - 1] + 1,
|
|
235
|
+
matrix[j - 1][i] + 1,
|
|
236
|
+
matrix[j - 1][i - 1] + indicator
|
|
237
|
+
);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return matrix[str2.length][str1.length];
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
calculateRelevanceScore(doc, searchTerm) {
|
|
245
|
+
// Legacy method - keep for backward compatibility
|
|
246
|
+
return this.calculateAdvancedRelevanceScore(doc, [searchTerm], searchTerm);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
async getGlobalRules() {
|
|
169
250
|
const globalRules = [];
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
251
|
+
|
|
252
|
+
// Find all documents with alwaysApply: true in frontmatter
|
|
253
|
+
for (const doc of this.documents.values()) {
|
|
254
|
+
if (doc.metadata?.alwaysApply === true) {
|
|
173
255
|
globalRules.push(doc);
|
|
174
256
|
}
|
|
175
257
|
}
|
|
@@ -178,21 +260,19 @@ class DocumentationService {
|
|
|
178
260
|
}
|
|
179
261
|
|
|
180
262
|
async getContextualDocs(filePath) {
|
|
181
|
-
if (!this.manifestLoader) {
|
|
182
|
-
return [];
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
const manifest = await this.manifestLoader.load();
|
|
186
|
-
const contextualRules = manifest.contextualRules || {};
|
|
187
|
-
|
|
188
263
|
const matchingDocs = [];
|
|
189
264
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
265
|
+
// Find documents with alwaysApply: false and matching patterns
|
|
266
|
+
for (const doc of this.documents.values()) {
|
|
267
|
+
if (doc.metadata?.alwaysApply === false || doc.metadata?.alwaysApply === undefined) {
|
|
268
|
+
// Check if document has file patterns in frontmatter
|
|
269
|
+
const patterns = doc.metadata?.filePatterns || doc.metadata?.applies || [];
|
|
270
|
+
const patternArray = Array.isArray(patterns) ? patterns : [patterns];
|
|
271
|
+
|
|
272
|
+
for (const pattern of patternArray) {
|
|
273
|
+
if (pattern && this.matchesPattern(filePath, pattern)) {
|
|
195
274
|
matchingDocs.push(doc);
|
|
275
|
+
break; // Don't add the same doc multiple times
|
|
196
276
|
}
|
|
197
277
|
}
|
|
198
278
|
}
|
|
@@ -228,20 +308,6 @@ class DocumentationService {
|
|
|
228
308
|
return results;
|
|
229
309
|
}
|
|
230
310
|
|
|
231
|
-
getDocumentsByTag(tag) {
|
|
232
|
-
const results = [];
|
|
233
|
-
|
|
234
|
-
for (const doc of this.documents.values()) {
|
|
235
|
-
const tags = doc.metadata?.tags || [];
|
|
236
|
-
const tagArray = Array.isArray(tags) ? tags : [tags];
|
|
237
|
-
|
|
238
|
-
if (tagArray.includes(tag)) {
|
|
239
|
-
results.push(doc);
|
|
240
|
-
}
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
return results;
|
|
244
|
-
}
|
|
245
311
|
}
|
|
246
312
|
|
|
247
313
|
module.exports = { DocumentationService };
|
|
@@ -13,7 +13,6 @@ describe('DocumentIndex', () => {
|
|
|
13
13
|
metadata: {
|
|
14
14
|
title: 'React Component Guide',
|
|
15
15
|
keywords: ['react', 'components', 'jsx'],
|
|
16
|
-
tags: ['frontend', 'ui'],
|
|
17
16
|
category: 'development'
|
|
18
17
|
}
|
|
19
18
|
},
|
|
@@ -23,7 +22,6 @@ describe('DocumentIndex', () => {
|
|
|
23
22
|
metadata: {
|
|
24
23
|
title: 'Testing Guide',
|
|
25
24
|
keywords: ['testing', 'jest', 'unit-tests'],
|
|
26
|
-
tags: ['quality', 'testing'],
|
|
27
25
|
category: 'development'
|
|
28
26
|
}
|
|
29
27
|
}
|
|
@@ -94,37 +92,7 @@ describe('DocumentIndex', () => {
|
|
|
94
92
|
expect(pythonEntries.some(entry => entry.document === document)).toBe(true);
|
|
95
93
|
});
|
|
96
94
|
|
|
97
|
-
it('should index tags in topic index', async () => {
|
|
98
|
-
const document = {
|
|
99
|
-
fileName: 'test.md',
|
|
100
|
-
metadata: {
|
|
101
|
-
tags: ['frontend', 'ui', 'design']
|
|
102
|
-
}
|
|
103
|
-
};
|
|
104
|
-
|
|
105
|
-
await documentIndex.indexDocument(document);
|
|
106
|
-
|
|
107
|
-
expect(documentIndex.topicIndex.has('frontend')).toBe(true);
|
|
108
|
-
expect(documentIndex.topicIndex.has('ui')).toBe(true);
|
|
109
|
-
expect(documentIndex.topicIndex.has('design')).toBe(true);
|
|
110
|
-
const frontendEntries = documentIndex.topicIndex.get('frontend');
|
|
111
|
-
expect(frontendEntries.some(entry => entry.document === document)).toBe(true);
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
it('should handle single tag as string', async () => {
|
|
115
|
-
const document = {
|
|
116
|
-
fileName: 'test.md',
|
|
117
|
-
metadata: {
|
|
118
|
-
tags: 'database'
|
|
119
|
-
}
|
|
120
|
-
};
|
|
121
95
|
|
|
122
|
-
await documentIndex.indexDocument(document);
|
|
123
|
-
|
|
124
|
-
expect(documentIndex.topicIndex.has('database')).toBe(true);
|
|
125
|
-
const databaseEntries = documentIndex.topicIndex.get('database');
|
|
126
|
-
expect(databaseEntries.some(entry => entry.document === document)).toBe(true);
|
|
127
|
-
});
|
|
128
96
|
|
|
129
97
|
it('should index category in topic index', async () => {
|
|
130
98
|
const document = {
|
|
@@ -246,42 +214,7 @@ describe('DocumentIndex', () => {
|
|
|
246
214
|
expect(result[0].score).toBe(10); // High score for exact keyword match
|
|
247
215
|
});
|
|
248
216
|
|
|
249
|
-
it('should find documents by topic match', async () => {
|
|
250
|
-
// Create a fresh index with no content to test exact scoring
|
|
251
|
-
const testIndex = new DocumentIndex();
|
|
252
|
-
const testDoc = {
|
|
253
|
-
fileName: 'clean-test.md',
|
|
254
|
-
metadata: { tags: ['frontend'] }
|
|
255
|
-
};
|
|
256
|
-
await testIndex.indexDocument(testDoc);
|
|
257
|
-
|
|
258
|
-
const context = { query: 'frontend' };
|
|
259
|
-
const result = testIndex.findRelevantDocs(context);
|
|
260
|
-
|
|
261
|
-
expect(result.length).toBe(1);
|
|
262
|
-
expect(result[0].document.fileName).toBe('clean-test.md');
|
|
263
|
-
expect(result[0].score).toBe(5); // Medium score for topic match
|
|
264
|
-
});
|
|
265
217
|
|
|
266
|
-
it('should combine scores for multiple matches', async () => {
|
|
267
|
-
// Create a fresh index with no content to test exact scoring
|
|
268
|
-
const testIndex = new DocumentIndex();
|
|
269
|
-
const testDoc = {
|
|
270
|
-
fileName: 'clean-test.md',
|
|
271
|
-
metadata: {
|
|
272
|
-
keywords: ['react'],
|
|
273
|
-
tags: ['frontend']
|
|
274
|
-
}
|
|
275
|
-
};
|
|
276
|
-
await testIndex.indexDocument(testDoc);
|
|
277
|
-
|
|
278
|
-
const context = { query: 'react frontend' };
|
|
279
|
-
const result = testIndex.findRelevantDocs(context);
|
|
280
|
-
|
|
281
|
-
expect(result.length).toBe(1);
|
|
282
|
-
expect(result[0].document.fileName).toBe('clean-test.md');
|
|
283
|
-
expect(result[0].score).toBe(15); // 10 (keyword) + 5 (topic)
|
|
284
|
-
});
|
|
285
218
|
|
|
286
219
|
it('should handle case-insensitive queries', () => {
|
|
287
220
|
const context = { query: 'REACT Components' };
|
|
@@ -616,7 +549,6 @@ const [state, setState] = useState();
|
|
|
616
549
|
`,
|
|
617
550
|
metadata: {
|
|
618
551
|
keywords: ['react', 'testing'],
|
|
619
|
-
tags: ['frontend', 'testing'],
|
|
620
552
|
category: 'development'
|
|
621
553
|
}
|
|
622
554
|
};
|
|
@@ -681,7 +613,6 @@ Files: *.test.js
|
|
|
681
613
|
`,
|
|
682
614
|
metadata: {
|
|
683
615
|
keywords: ['react', 'testing'],
|
|
684
|
-
tags: ['frontend'],
|
|
685
616
|
category: 'testing'
|
|
686
617
|
}
|
|
687
618
|
};
|
|
@@ -710,7 +641,7 @@ Files: *.test.js
|
|
|
710
641
|
const docs = [
|
|
711
642
|
{
|
|
712
643
|
fileName: 'high-relevance.md',
|
|
713
|
-
metadata: { keywords: ['javascript', 'react']
|
|
644
|
+
metadata: { keywords: ['javascript', 'react'] },
|
|
714
645
|
content: '```javascript\nconst [state] = useState();\n```'
|
|
715
646
|
},
|
|
716
647
|
{
|
|
@@ -720,7 +651,7 @@ Files: *.test.js
|
|
|
720
651
|
},
|
|
721
652
|
{
|
|
722
653
|
fileName: 'low-relevance.md',
|
|
723
|
-
metadata: {
|
|
654
|
+
metadata: { category: 'backend' },
|
|
724
655
|
content: 'Server-side development'
|
|
725
656
|
}
|
|
726
657
|
];
|
|
@@ -788,7 +719,7 @@ Files: *.test.js
|
|
|
788
719
|
fileName: 'duplicate-test.md',
|
|
789
720
|
metadata: {
|
|
790
721
|
keywords: ['react', 'react'], // Duplicate keywords
|
|
791
|
-
|
|
722
|
+
category: 'frontend' // Category
|
|
792
723
|
},
|
|
793
724
|
content: 'React React React' // Repeated content
|
|
794
725
|
};
|
|
@@ -42,7 +42,6 @@ This guide covers React components, hooks, and best practices.
|
|
|
42
42
|
metadata: {
|
|
43
43
|
title: 'React Component Guide',
|
|
44
44
|
keywords: ['react', 'components', 'hooks', 'useState', 'useEffect'],
|
|
45
|
-
tags: ['frontend', 'javascript'],
|
|
46
45
|
category: 'development'
|
|
47
46
|
},
|
|
48
47
|
lastModified: new Date()
|
|
@@ -69,7 +68,6 @@ Best practices for testing React components.
|
|
|
69
68
|
metadata: {
|
|
70
69
|
title: 'Testing Guide',
|
|
71
70
|
keywords: ['testing', 'jest', 'react-testing-library'],
|
|
72
|
-
tags: ['testing', 'quality'],
|
|
73
71
|
category: 'development'
|
|
74
72
|
},
|
|
75
73
|
lastModified: new Date()
|
|
@@ -98,7 +96,6 @@ Building REST APIs with Express.js.
|
|
|
98
96
|
metadata: {
|
|
99
97
|
title: 'API Development Guide',
|
|
100
98
|
keywords: ['api', 'express', 'nodejs', 'rest'],
|
|
101
|
-
tags: ['backend', 'api'],
|
|
102
99
|
category: 'development'
|
|
103
100
|
},
|
|
104
101
|
lastModified: new Date()
|