npm - @afterxleep/doc-bot - Versions diffs - 1.5.0 → 1.7.0 - Mend

@afterxleep/doc-bot 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +184 -122
package/bin/doc-bot.js +16 -14
package/package.json +5 -2
package/src/index.js +326 -131
package/src/services/DocumentIndex.js +1 -10
package/src/services/DocumentationService.js +134 -68
package/src/services/__tests__/DocumentIndex.test.js +3 -72
package/src/services/__tests__/InferenceEngine.integration.test.js +0 -3

package/src/services/DocumentIndex.js CHANGED Viewed

@@ -28,16 +28,7 @@ class DocumentIndex {
       }
     }
-    // Index topics from tags and category (if present)
-    if (document.metadata?.tags) {
-      const tags = Array.isArray(document.metadata.tags)
-        ? document.metadata.tags
-        : [document.metadata.tags];
-      for (const tag of tags) {
-        this.addToIndex(this.topicIndex, tag.toLowerCase(), document, 5);
-      }
-    }
+    // Index topics from category (if present)
     if (document.metadata?.category) {
       this.addToIndex(this.topicIndex, document.metadata.category.toLowerCase(), document, 5);

package/src/services/DocumentationService.js CHANGED Viewed

@@ -6,7 +6,7 @@ const yaml = require('yaml');
 class DocumentationService {
   constructor(docsPath, manifestLoader = null) {
     this.docsPath = docsPath;
-    this.manifestLoader = manifestLoader;
+    this.manifestLoader = manifestLoader; // Keep for backward compatibility but not required
     this.documents = new Map();
     this.lastScanned = null;
   }
@@ -94,12 +94,12 @@ class DocumentationService {
       return [];
     }
-    const searchTerm = query.toLowerCase();
+    const searchTerms = this.parseQuery(query);
     const results = [];
     for (const doc of this.documents.values()) {
-      const score = this.calculateRelevanceScore(doc, searchTerm);
-      if (score > 0) {
+      const score = this.calculateAdvancedRelevanceScore(doc, searchTerms, query);
+      if (score > 0.1) { // Minimum relevance threshold
         results.push({
           ...doc,
           relevanceScore: score
@@ -111,65 +111,147 @@ class DocumentationService {
     return results.sort((a, b) => b.relevanceScore - a.relevanceScore);
   }
-  calculateRelevanceScore(doc, searchTerm) {
-    let score = 0;
+  parseQuery(query) {
+    // Split by spaces and remove common stop words
+    const stopWords = new Set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'how', 'what', 'where', 'when']);
+    return query.toLowerCase()
+      .split(/\s+/)
+      .map(term => term.replace(/[^a-z0-9]/g, '')) // Remove punctuation
+      .filter(term => term.length > 1 && !stopWords.has(term));
+  }
+  calculateAdvancedRelevanceScore(doc, searchTerms, originalQuery) {
+    let totalScore = 0;
     const content = doc.content.toLowerCase();
     const title = (doc.metadata?.title || doc.fileName).toLowerCase();
+    const description = (doc.metadata?.description || '').toLowerCase();
-    // Title matches get highest score
-    if (title.includes(searchTerm)) {
-      score += 10;
+    // Exact phrase match bonus (highest priority)
+    if (content.includes(originalQuery.toLowerCase()) || title.includes(originalQuery.toLowerCase())) {
+      totalScore += 20;
     }
-    // Content matches
-    const contentMatches = (content.match(new RegExp(searchTerm, 'g')) || []).length;
-    score += contentMatches * 2;
+    let matchedTerms = 0;
+    const termScores = [];
-    // Keyword matches in metadata
-    if (doc.metadata?.keywords) {
-      const keywords = Array.isArray(doc.metadata.keywords)
-        ? doc.metadata.keywords
-        : [doc.metadata.keywords];
+    for (const term of searchTerms) {
+      let termScore = 0;
+      // Title matches (highest weight)
+      if (title.includes(term)) {
+        termScore += 15;
+        matchedTerms++;
+      }
+      // Description matches (high weight)
+      if (description.includes(term)) {
+        termScore += 10;
+        matchedTerms++;
+      }
-      for (const keyword of keywords) {
-        if (keyword.toLowerCase().includes(searchTerm)) {
-          score += 5;
+      // Keyword exact matches (very high weight)
+      if (doc.metadata?.keywords) {
+        const keywords = Array.isArray(doc.metadata.keywords)
+          ? doc.metadata.keywords
+          : [doc.metadata.keywords];
+        for (const keyword of keywords) {
+          const keywordLower = keyword.toLowerCase();
+          if (keywordLower === term) {
+            termScore += 12; // Exact keyword match
+            matchedTerms++;
+          } else if (keywordLower.includes(term) || term.includes(keywordLower)) {
+            termScore += 8; // Partial keyword match
+            matchedTerms++;
+          }
         }
       }
+      // Content matches with frequency weighting
+      const contentMatches = (content.match(new RegExp(this.escapeRegExp(term), 'g')) || []).length;
+      if (contentMatches > 0) {
+        termScore += Math.min(contentMatches * 2, 10); // Cap at 10 to prevent spam
+        matchedTerms++;
+      }
+      // Fuzzy matching for typos (lower weight)
+      if (termScore === 0) {
+        const fuzzyScore = this.calculateFuzzyMatch(term, [title, description, content.substring(0, 500)].join(' '));
+        termScore += fuzzyScore;
+        if (fuzzyScore > 0) matchedTerms++;
+      }
+      termScores.push(termScore);
     }
-    // Category/tag matches
-    if (doc.metadata?.category?.toLowerCase().includes(searchTerm)) {
-      score += 3;
+    // Calculate final score
+    totalScore += termScores.reduce((sum, score) => sum + score, 0);
+    // Bonus for matching multiple terms
+    const termCoverage = matchedTerms / searchTerms.length;
+    totalScore *= (0.5 + termCoverage); // 50% base + coverage bonus
+    // Bonus for shorter documents (more focused)
+    const docLength = content.length;
+    if (docLength < 2000) {
+      totalScore *= 1.1;
     }
-    if (doc.metadata?.tags) {
-      const tags = Array.isArray(doc.metadata.tags)
-        ? doc.metadata.tags
-        : [doc.metadata.tags];
-      for (const tag of tags) {
-        if (tag.toLowerCase().includes(searchTerm)) {
-          score += 2;
-        }
+    // Normalize score (0-100 scale)
+    return Math.min(totalScore / 10, 100);
+  }
+  escapeRegExp(string) {
+    return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+  }
+  calculateFuzzyMatch(term, text) {
+    // Simple fuzzy matching - check for partial matches
+    const words = text.toLowerCase().split(/\s+/);
+    let maxScore = 0;
+    for (const word of words) {
+      if (word.includes(term) || term.includes(word)) {
+        maxScore = Math.max(maxScore, 2);
+      } else if (this.levenshteinDistance(term, word) <= 2 && Math.min(term.length, word.length) > 3) {
+        maxScore = Math.max(maxScore, 1);
       }
     }
-    return score;
+    return maxScore;
   }
-  async getGlobalRules() {
-    if (!this.manifestLoader) {
-      return [];
-    }
+  levenshteinDistance(str1, str2) {
+    const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
-    const manifest = await this.manifestLoader.load();
-    const globalRulePaths = manifest.globalRules || [];
+    for (let i = 0; i <= str1.length; i++) matrix[0][i] = i;
+    for (let j = 0; j <= str2.length; j++) matrix[j][0] = j;
+    for (let j = 1; j <= str2.length; j++) {
+      for (let i = 1; i <= str1.length; i++) {
+        const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
+        matrix[j][i] = Math.min(
+          matrix[j][i - 1] + 1,
+          matrix[j - 1][i] + 1,
+          matrix[j - 1][i - 1] + indicator
+        );
+      }
+    }
+    return matrix[str2.length][str1.length];
+  }
+  calculateRelevanceScore(doc, searchTerm) {
+    // Legacy method - keep for backward compatibility
+    return this.calculateAdvancedRelevanceScore(doc, [searchTerm], searchTerm);
+  }
+  async getGlobalRules() {
     const globalRules = [];
-    for (const rulePath of globalRulePaths) {
-      const doc = this.documents.get(rulePath);
-      if (doc) {
+    // Find all documents with alwaysApply: true in frontmatter
+    for (const doc of this.documents.values()) {
+      if (doc.metadata?.alwaysApply === true) {
         globalRules.push(doc);
       }
     }
@@ -178,21 +260,19 @@ class DocumentationService {
   }
   async getContextualDocs(filePath) {
-    if (!this.manifestLoader) {
-      return [];
-    }
-    const manifest = await this.manifestLoader.load();
-    const contextualRules = manifest.contextualRules || {};
     const matchingDocs = [];
-    for (const [pattern, docPaths] of Object.entries(contextualRules)) {
-      if (this.matchesPattern(filePath, pattern)) {
-        for (const docPath of docPaths) {
-          const doc = this.documents.get(docPath);
-          if (doc) {
+    // Find documents with alwaysApply: false and matching patterns
+    for (const doc of this.documents.values()) {
+      if (doc.metadata?.alwaysApply === false || doc.metadata?.alwaysApply === undefined) {
+        // Check if document has file patterns in frontmatter
+        const patterns = doc.metadata?.filePatterns || doc.metadata?.applies || [];
+        const patternArray = Array.isArray(patterns) ? patterns : [patterns];
+        for (const pattern of patternArray) {
+          if (pattern && this.matchesPattern(filePath, pattern)) {
             matchingDocs.push(doc);
+            break; // Don't add the same doc multiple times
           }
         }
       }
@@ -228,20 +308,6 @@ class DocumentationService {
     return results;
   }
-  getDocumentsByTag(tag) {
-    const results = [];
-    for (const doc of this.documents.values()) {
-      const tags = doc.metadata?.tags || [];
-      const tagArray = Array.isArray(tags) ? tags : [tags];
-      if (tagArray.includes(tag)) {
-        results.push(doc);
-      }
-    }
-    return results;
-  }
 }
 module.exports = { DocumentationService };

package/src/services/__tests__/DocumentIndex.test.js CHANGED Viewed

@@ -13,7 +13,6 @@ describe('DocumentIndex', () => {
         metadata: {
           title: 'React Component Guide',
           keywords: ['react', 'components', 'jsx'],
-          tags: ['frontend', 'ui'],
           category: 'development'
         }
       },
@@ -23,7 +22,6 @@ describe('DocumentIndex', () => {
         metadata: {
           title: 'Testing Guide',
           keywords: ['testing', 'jest', 'unit-tests'],
-          tags: ['quality', 'testing'],
           category: 'development'
         }
       }
@@ -94,37 +92,7 @@ describe('DocumentIndex', () => {
       expect(pythonEntries.some(entry => entry.document === document)).toBe(true);
     });
-    it('should index tags in topic index', async () => {
-      const document = {
-        fileName: 'test.md',
-        metadata: {
-          tags: ['frontend', 'ui', 'design']
-        }
-      };
-      await documentIndex.indexDocument(document);
-      expect(documentIndex.topicIndex.has('frontend')).toBe(true);
-      expect(documentIndex.topicIndex.has('ui')).toBe(true);
-      expect(documentIndex.topicIndex.has('design')).toBe(true);
-      const frontendEntries = documentIndex.topicIndex.get('frontend');
-      expect(frontendEntries.some(entry => entry.document === document)).toBe(true);
-    });
-    it('should handle single tag as string', async () => {
-      const document = {
-        fileName: 'test.md',
-        metadata: {
-          tags: 'database'
-        }
-      };
-      await documentIndex.indexDocument(document);
-      expect(documentIndex.topicIndex.has('database')).toBe(true);
-      const databaseEntries = documentIndex.topicIndex.get('database');
-      expect(databaseEntries.some(entry => entry.document === document)).toBe(true);
-    });
     it('should index category in topic index', async () => {
       const document = {
@@ -246,42 +214,7 @@ describe('DocumentIndex', () => {
       expect(result[0].score).toBe(10); // High score for exact keyword match
     });
-    it('should find documents by topic match', async () => {
-      // Create a fresh index with no content to test exact scoring
-      const testIndex = new DocumentIndex();
-      const testDoc = {
-        fileName: 'clean-test.md',
-        metadata: { tags: ['frontend'] }
-      };
-      await testIndex.indexDocument(testDoc);
-      const context = { query: 'frontend' };
-      const result = testIndex.findRelevantDocs(context);
-      expect(result.length).toBe(1);
-      expect(result[0].document.fileName).toBe('clean-test.md');
-      expect(result[0].score).toBe(5); // Medium score for topic match
-    });
-    it('should combine scores for multiple matches', async () => {
-      // Create a fresh index with no content to test exact scoring
-      const testIndex = new DocumentIndex();
-      const testDoc = {
-        fileName: 'clean-test.md',
-        metadata: {
-          keywords: ['react'],
-          tags: ['frontend']
-        }
-      };
-      await testIndex.indexDocument(testDoc);
-      const context = { query: 'react frontend' };
-      const result = testIndex.findRelevantDocs(context);
-      expect(result.length).toBe(1);
-      expect(result[0].document.fileName).toBe('clean-test.md');
-      expect(result[0].score).toBe(15); // 10 (keyword) + 5 (topic)
-    });
     it('should handle case-insensitive queries', () => {
       const context = { query: 'REACT Components' };
@@ -616,7 +549,6 @@ const [state, setState] = useState();
         `,
         metadata: {
           keywords: ['react', 'testing'],
-          tags: ['frontend', 'testing'],
           category: 'development'
         }
       };
@@ -681,7 +613,6 @@ Files: *.test.js
         `,
         metadata: {
           keywords: ['react', 'testing'],
-          tags: ['frontend'],
           category: 'testing'
         }
       };
@@ -710,7 +641,7 @@ Files: *.test.js
       const docs = [
         {
           fileName: 'high-relevance.md',
-          metadata: { keywords: ['javascript', 'react'], tags: ['frontend'] },
+          metadata: { keywords: ['javascript', 'react'] },
           content: '```javascript\nconst [state] = useState();\n```'
         },
         {
@@ -720,7 +651,7 @@ Files: *.test.js
         },
         {
           fileName: 'low-relevance.md',
-          metadata: { tags: ['backend'] },
+          metadata: { category: 'backend' },
           content: 'Server-side development'
         }
       ];
@@ -788,7 +719,7 @@ Files: *.test.js
         fileName: 'duplicate-test.md',
         metadata: {
           keywords: ['react', 'react'], // Duplicate keywords
-          tags: ['frontend', 'frontend'] // Duplicate tags
+          category: 'frontend' // Category
         },
         content: 'React React React' // Repeated content
       };

package/src/services/__tests__/InferenceEngine.integration.test.js CHANGED Viewed

@@ -42,7 +42,6 @@ This guide covers React components, hooks, and best practices.
         metadata: {
           title: 'React Component Guide',
           keywords: ['react', 'components', 'hooks', 'useState', 'useEffect'],
-          tags: ['frontend', 'javascript'],
           category: 'development'
         },
         lastModified: new Date()
@@ -69,7 +68,6 @@ Best practices for testing React components.
         metadata: {
           title: 'Testing Guide',
           keywords: ['testing', 'jest', 'react-testing-library'],
-          tags: ['testing', 'quality'],
           category: 'development'
         },
         lastModified: new Date()
@@ -98,7 +96,6 @@ Building REST APIs with Express.js.
         metadata: {
           title: 'API Development Guide',
           keywords: ['api', 'express', 'nodejs', 'rest'],
-          tags: ['backend', 'api'],
           category: 'development'
         },
         lastModified: new Date()