@afterxleep/doc-bot 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,16 +28,7 @@ class DocumentIndex {
28
28
  }
29
29
  }
30
30
 
31
- // Index topics from tags and category (if present)
32
- if (document.metadata?.tags) {
33
- const tags = Array.isArray(document.metadata.tags)
34
- ? document.metadata.tags
35
- : [document.metadata.tags];
36
-
37
- for (const tag of tags) {
38
- this.addToIndex(this.topicIndex, tag.toLowerCase(), document, 5);
39
- }
40
- }
31
+ // Index topics from category (if present)
41
32
 
42
33
  if (document.metadata?.category) {
43
34
  this.addToIndex(this.topicIndex, document.metadata.category.toLowerCase(), document, 5);
@@ -6,7 +6,7 @@ const yaml = require('yaml');
6
6
  class DocumentationService {
7
7
  constructor(docsPath, manifestLoader = null) {
8
8
  this.docsPath = docsPath;
9
- this.manifestLoader = manifestLoader;
9
+ this.manifestLoader = manifestLoader; // Keep for backward compatibility but not required
10
10
  this.documents = new Map();
11
11
  this.lastScanned = null;
12
12
  }
@@ -94,12 +94,12 @@ class DocumentationService {
94
94
  return [];
95
95
  }
96
96
 
97
- const searchTerm = query.toLowerCase();
97
+ const searchTerms = this.parseQuery(query);
98
98
  const results = [];
99
99
 
100
100
  for (const doc of this.documents.values()) {
101
- const score = this.calculateRelevanceScore(doc, searchTerm);
102
- if (score > 0) {
101
+ const score = this.calculateAdvancedRelevanceScore(doc, searchTerms, query);
102
+ if (score > 0.1) { // Minimum relevance threshold
103
103
  results.push({
104
104
  ...doc,
105
105
  relevanceScore: score
@@ -111,65 +111,147 @@ class DocumentationService {
111
111
  return results.sort((a, b) => b.relevanceScore - a.relevanceScore);
112
112
  }
113
113
 
114
- calculateRelevanceScore(doc, searchTerm) {
115
- let score = 0;
114
+ parseQuery(query) {
115
+ // Split by spaces and remove common stop words
116
+ const stopWords = new Set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'how', 'what', 'where', 'when']);
117
+ return query.toLowerCase()
118
+ .split(/\s+/)
119
+ .map(term => term.replace(/[^a-z0-9]/g, '')) // Remove punctuation
120
+ .filter(term => term.length > 1 && !stopWords.has(term));
121
+ }
122
+
123
+ calculateAdvancedRelevanceScore(doc, searchTerms, originalQuery) {
124
+ let totalScore = 0;
116
125
  const content = doc.content.toLowerCase();
117
126
  const title = (doc.metadata?.title || doc.fileName).toLowerCase();
127
+ const description = (doc.metadata?.description || '').toLowerCase();
118
128
 
119
- // Title matches get highest score
120
- if (title.includes(searchTerm)) {
121
- score += 10;
129
+ // Exact phrase match bonus (highest priority)
130
+ if (content.includes(originalQuery.toLowerCase()) || title.includes(originalQuery.toLowerCase())) {
131
+ totalScore += 20;
122
132
  }
123
133
 
124
- // Content matches
125
- const contentMatches = (content.match(new RegExp(searchTerm, 'g')) || []).length;
126
- score += contentMatches * 2;
134
+ let matchedTerms = 0;
135
+ const termScores = [];
127
136
 
128
- // Keyword matches in metadata
129
- if (doc.metadata?.keywords) {
130
- const keywords = Array.isArray(doc.metadata.keywords)
131
- ? doc.metadata.keywords
132
- : [doc.metadata.keywords];
137
+ for (const term of searchTerms) {
138
+ let termScore = 0;
139
+
140
+ // Title matches (highest weight)
141
+ if (title.includes(term)) {
142
+ termScore += 15;
143
+ matchedTerms++;
144
+ }
145
+
146
+ // Description matches (high weight)
147
+ if (description.includes(term)) {
148
+ termScore += 10;
149
+ matchedTerms++;
150
+ }
133
151
 
134
- for (const keyword of keywords) {
135
- if (keyword.toLowerCase().includes(searchTerm)) {
136
- score += 5;
152
+ // Keyword exact matches (very high weight)
153
+ if (doc.metadata?.keywords) {
154
+ const keywords = Array.isArray(doc.metadata.keywords)
155
+ ? doc.metadata.keywords
156
+ : [doc.metadata.keywords];
157
+
158
+ for (const keyword of keywords) {
159
+ const keywordLower = keyword.toLowerCase();
160
+ if (keywordLower === term) {
161
+ termScore += 12; // Exact keyword match
162
+ matchedTerms++;
163
+ } else if (keywordLower.includes(term) || term.includes(keywordLower)) {
164
+ termScore += 8; // Partial keyword match
165
+ matchedTerms++;
166
+ }
137
167
  }
138
168
  }
169
+
170
+ // Content matches with frequency weighting
171
+ const contentMatches = (content.match(new RegExp(this.escapeRegExp(term), 'g')) || []).length;
172
+ if (contentMatches > 0) {
173
+ termScore += Math.min(contentMatches * 2, 10); // Cap at 10 to prevent spam
174
+ matchedTerms++;
175
+ }
176
+
177
+ // Fuzzy matching for typos (lower weight)
178
+ if (termScore === 0) {
179
+ const fuzzyScore = this.calculateFuzzyMatch(term, [title, description, content.substring(0, 500)].join(' '));
180
+ termScore += fuzzyScore;
181
+ if (fuzzyScore > 0) matchedTerms++;
182
+ }
183
+
184
+ termScores.push(termScore);
139
185
  }
140
186
 
141
- // Category/tag matches
142
- if (doc.metadata?.category?.toLowerCase().includes(searchTerm)) {
143
- score += 3;
187
+ // Calculate final score
188
+ totalScore += termScores.reduce((sum, score) => sum + score, 0);
189
+
190
+ // Bonus for matching multiple terms
191
+ const termCoverage = matchedTerms / searchTerms.length;
192
+ totalScore *= (0.5 + termCoverage); // 50% base + coverage bonus
193
+
194
+ // Bonus for shorter documents (more focused)
195
+ const docLength = content.length;
196
+ if (docLength < 2000) {
197
+ totalScore *= 1.1;
144
198
  }
145
199
 
146
- if (doc.metadata?.tags) {
147
- const tags = Array.isArray(doc.metadata.tags)
148
- ? doc.metadata.tags
149
- : [doc.metadata.tags];
150
-
151
- for (const tag of tags) {
152
- if (tag.toLowerCase().includes(searchTerm)) {
153
- score += 2;
154
- }
200
+ // Normalize score (0-100 scale)
201
+ return Math.min(totalScore / 10, 100);
202
+ }
203
+
204
+ escapeRegExp(string) {
205
+ return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
206
+ }
207
+
208
+ calculateFuzzyMatch(term, text) {
209
+ // Simple fuzzy matching - check for partial matches
210
+ const words = text.toLowerCase().split(/\s+/);
211
+ let maxScore = 0;
212
+
213
+ for (const word of words) {
214
+ if (word.includes(term) || term.includes(word)) {
215
+ maxScore = Math.max(maxScore, 2);
216
+ } else if (this.levenshteinDistance(term, word) <= 2 && Math.min(term.length, word.length) > 3) {
217
+ maxScore = Math.max(maxScore, 1);
155
218
  }
156
219
  }
157
220
 
158
- return score;
221
+ return maxScore;
159
222
  }
160
223
 
161
- async getGlobalRules() {
162
- if (!this.manifestLoader) {
163
- return [];
164
- }
224
+ levenshteinDistance(str1, str2) {
225
+ const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
165
226
 
166
- const manifest = await this.manifestLoader.load();
167
- const globalRulePaths = manifest.globalRules || [];
227
+ for (let i = 0; i <= str1.length; i++) matrix[0][i] = i;
228
+ for (let j = 0; j <= str2.length; j++) matrix[j][0] = j;
168
229
 
230
+ for (let j = 1; j <= str2.length; j++) {
231
+ for (let i = 1; i <= str1.length; i++) {
232
+ const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
233
+ matrix[j][i] = Math.min(
234
+ matrix[j][i - 1] + 1,
235
+ matrix[j - 1][i] + 1,
236
+ matrix[j - 1][i - 1] + indicator
237
+ );
238
+ }
239
+ }
240
+
241
+ return matrix[str2.length][str1.length];
242
+ }
243
+
244
+ calculateRelevanceScore(doc, searchTerm) {
245
+ // Legacy method - keep for backward compatibility
246
+ return this.calculateAdvancedRelevanceScore(doc, [searchTerm], searchTerm);
247
+ }
248
+
249
+ async getGlobalRules() {
169
250
  const globalRules = [];
170
- for (const rulePath of globalRulePaths) {
171
- const doc = this.documents.get(rulePath);
172
- if (doc) {
251
+
252
+ // Find all documents with alwaysApply: true in frontmatter
253
+ for (const doc of this.documents.values()) {
254
+ if (doc.metadata?.alwaysApply === true) {
173
255
  globalRules.push(doc);
174
256
  }
175
257
  }
@@ -178,21 +260,19 @@ class DocumentationService {
178
260
  }
179
261
 
180
262
  async getContextualDocs(filePath) {
181
- if (!this.manifestLoader) {
182
- return [];
183
- }
184
-
185
- const manifest = await this.manifestLoader.load();
186
- const contextualRules = manifest.contextualRules || {};
187
-
188
263
  const matchingDocs = [];
189
264
 
190
- for (const [pattern, docPaths] of Object.entries(contextualRules)) {
191
- if (this.matchesPattern(filePath, pattern)) {
192
- for (const docPath of docPaths) {
193
- const doc = this.documents.get(docPath);
194
- if (doc) {
265
+ // Find documents with alwaysApply: false and matching patterns
266
+ for (const doc of this.documents.values()) {
267
+ if (doc.metadata?.alwaysApply === false || doc.metadata?.alwaysApply === undefined) {
268
+ // Check if document has file patterns in frontmatter
269
+ const patterns = doc.metadata?.filePatterns || doc.metadata?.applies || [];
270
+ const patternArray = Array.isArray(patterns) ? patterns : [patterns];
271
+
272
+ for (const pattern of patternArray) {
273
+ if (pattern && this.matchesPattern(filePath, pattern)) {
195
274
  matchingDocs.push(doc);
275
+ break; // Don't add the same doc multiple times
196
276
  }
197
277
  }
198
278
  }
@@ -228,20 +308,6 @@ class DocumentationService {
228
308
  return results;
229
309
  }
230
310
 
231
- getDocumentsByTag(tag) {
232
- const results = [];
233
-
234
- for (const doc of this.documents.values()) {
235
- const tags = doc.metadata?.tags || [];
236
- const tagArray = Array.isArray(tags) ? tags : [tags];
237
-
238
- if (tagArray.includes(tag)) {
239
- results.push(doc);
240
- }
241
- }
242
-
243
- return results;
244
- }
245
311
  }
246
312
 
247
313
  module.exports = { DocumentationService };
@@ -13,7 +13,6 @@ describe('DocumentIndex', () => {
13
13
  metadata: {
14
14
  title: 'React Component Guide',
15
15
  keywords: ['react', 'components', 'jsx'],
16
- tags: ['frontend', 'ui'],
17
16
  category: 'development'
18
17
  }
19
18
  },
@@ -23,7 +22,6 @@ describe('DocumentIndex', () => {
23
22
  metadata: {
24
23
  title: 'Testing Guide',
25
24
  keywords: ['testing', 'jest', 'unit-tests'],
26
- tags: ['quality', 'testing'],
27
25
  category: 'development'
28
26
  }
29
27
  }
@@ -94,37 +92,7 @@ describe('DocumentIndex', () => {
94
92
  expect(pythonEntries.some(entry => entry.document === document)).toBe(true);
95
93
  });
96
94
 
97
- it('should index tags in topic index', async () => {
98
- const document = {
99
- fileName: 'test.md',
100
- metadata: {
101
- tags: ['frontend', 'ui', 'design']
102
- }
103
- };
104
-
105
- await documentIndex.indexDocument(document);
106
-
107
- expect(documentIndex.topicIndex.has('frontend')).toBe(true);
108
- expect(documentIndex.topicIndex.has('ui')).toBe(true);
109
- expect(documentIndex.topicIndex.has('design')).toBe(true);
110
- const frontendEntries = documentIndex.topicIndex.get('frontend');
111
- expect(frontendEntries.some(entry => entry.document === document)).toBe(true);
112
- });
113
-
114
- it('should handle single tag as string', async () => {
115
- const document = {
116
- fileName: 'test.md',
117
- metadata: {
118
- tags: 'database'
119
- }
120
- };
121
95
 
122
- await documentIndex.indexDocument(document);
123
-
124
- expect(documentIndex.topicIndex.has('database')).toBe(true);
125
- const databaseEntries = documentIndex.topicIndex.get('database');
126
- expect(databaseEntries.some(entry => entry.document === document)).toBe(true);
127
- });
128
96
 
129
97
  it('should index category in topic index', async () => {
130
98
  const document = {
@@ -246,42 +214,7 @@ describe('DocumentIndex', () => {
246
214
  expect(result[0].score).toBe(10); // High score for exact keyword match
247
215
  });
248
216
 
249
- it('should find documents by topic match', async () => {
250
- // Create a fresh index with no content to test exact scoring
251
- const testIndex = new DocumentIndex();
252
- const testDoc = {
253
- fileName: 'clean-test.md',
254
- metadata: { tags: ['frontend'] }
255
- };
256
- await testIndex.indexDocument(testDoc);
257
-
258
- const context = { query: 'frontend' };
259
- const result = testIndex.findRelevantDocs(context);
260
-
261
- expect(result.length).toBe(1);
262
- expect(result[0].document.fileName).toBe('clean-test.md');
263
- expect(result[0].score).toBe(5); // Medium score for topic match
264
- });
265
217
 
266
- it('should combine scores for multiple matches', async () => {
267
- // Create a fresh index with no content to test exact scoring
268
- const testIndex = new DocumentIndex();
269
- const testDoc = {
270
- fileName: 'clean-test.md',
271
- metadata: {
272
- keywords: ['react'],
273
- tags: ['frontend']
274
- }
275
- };
276
- await testIndex.indexDocument(testDoc);
277
-
278
- const context = { query: 'react frontend' };
279
- const result = testIndex.findRelevantDocs(context);
280
-
281
- expect(result.length).toBe(1);
282
- expect(result[0].document.fileName).toBe('clean-test.md');
283
- expect(result[0].score).toBe(15); // 10 (keyword) + 5 (topic)
284
- });
285
218
 
286
219
  it('should handle case-insensitive queries', () => {
287
220
  const context = { query: 'REACT Components' };
@@ -616,7 +549,6 @@ const [state, setState] = useState();
616
549
  `,
617
550
  metadata: {
618
551
  keywords: ['react', 'testing'],
619
- tags: ['frontend', 'testing'],
620
552
  category: 'development'
621
553
  }
622
554
  };
@@ -681,7 +613,6 @@ Files: *.test.js
681
613
  `,
682
614
  metadata: {
683
615
  keywords: ['react', 'testing'],
684
- tags: ['frontend'],
685
616
  category: 'testing'
686
617
  }
687
618
  };
@@ -710,7 +641,7 @@ Files: *.test.js
710
641
  const docs = [
711
642
  {
712
643
  fileName: 'high-relevance.md',
713
- metadata: { keywords: ['javascript', 'react'], tags: ['frontend'] },
644
+ metadata: { keywords: ['javascript', 'react'] },
714
645
  content: '```javascript\nconst [state] = useState();\n```'
715
646
  },
716
647
  {
@@ -720,7 +651,7 @@ Files: *.test.js
720
651
  },
721
652
  {
722
653
  fileName: 'low-relevance.md',
723
- metadata: { tags: ['backend'] },
654
+ metadata: { category: 'backend' },
724
655
  content: 'Server-side development'
725
656
  }
726
657
  ];
@@ -788,7 +719,7 @@ Files: *.test.js
788
719
  fileName: 'duplicate-test.md',
789
720
  metadata: {
790
721
  keywords: ['react', 'react'], // Duplicate keywords
791
- tags: ['frontend', 'frontend'] // Duplicate tags
722
+ category: 'frontend' // Category
792
723
  },
793
724
  content: 'React React React' // Repeated content
794
725
  };
@@ -42,7 +42,6 @@ This guide covers React components, hooks, and best practices.
42
42
  metadata: {
43
43
  title: 'React Component Guide',
44
44
  keywords: ['react', 'components', 'hooks', 'useState', 'useEffect'],
45
- tags: ['frontend', 'javascript'],
46
45
  category: 'development'
47
46
  },
48
47
  lastModified: new Date()
@@ -69,7 +68,6 @@ Best practices for testing React components.
69
68
  metadata: {
70
69
  title: 'Testing Guide',
71
70
  keywords: ['testing', 'jest', 'react-testing-library'],
72
- tags: ['testing', 'quality'],
73
71
  category: 'development'
74
72
  },
75
73
  lastModified: new Date()
@@ -98,7 +96,6 @@ Building REST APIs with Express.js.
98
96
  metadata: {
99
97
  title: 'API Development Guide',
100
98
  keywords: ['api', 'express', 'nodejs', 'rest'],
101
- tags: ['backend', 'api'],
102
99
  category: 'development'
103
100
  },
104
101
  lastModified: new Date()