npm - @afterxleep/doc-bot - Versions diffs - 1.10.0 → 1.13.0 - Mend

@afterxleep/doc-bot 1.10.0 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/src/services/DocumentationService.js CHANGED Viewed

@@ -99,10 +99,17 @@ class DocumentationService {
     for (const doc of this.documents.values()) {
       const score = this.calculateAdvancedRelevanceScore(doc, searchTerms, query);
-      if (score > 0.1) { // Minimum relevance threshold
+      // Only include documents with meaningful relevance (5% or higher)
+      // This filters out documents that only have weak partial matches
+      if (score >= 5.0) {
+        // Extract a relevant snippet from the content
+        const snippet = this.extractRelevantSnippet(doc.content, searchTerms, query);
         results.push({
           ...doc,
-          relevanceScore: score
+          relevanceScore: score,
+          snippet: snippet,
+          matchedTerms: this.getMatchedTerms(doc, searchTerms)
         });
       }
     }
@@ -245,6 +252,128 @@ class DocumentationService {
     // Legacy method - keep for backward compatibility
     return this.calculateAdvancedRelevanceScore(doc, [searchTerm], searchTerm);
   }
+  /**
+   * Extract a relevant snippet from content that shows context around matched terms
+   * @param {string} content - The document content
+   * @param {string[]} searchTerms - The search terms
+   * @param {string} originalQuery - The original query
+   * @returns {string} A relevant snippet
+   */
+  extractRelevantSnippet(content, searchTerms, originalQuery) {
+    const contentLower = content.toLowerCase();
+    const snippetLength = 200;
+    let bestSnippet = '';
+    let bestScore = 0;
+    // First, try to find exact phrase match
+    if (originalQuery.length > 3) {
+      const phraseIndex = contentLower.indexOf(originalQuery.toLowerCase());
+      if (phraseIndex !== -1) {
+        const start = Math.max(0, phraseIndex - 50);
+        const end = Math.min(content.length, phraseIndex + originalQuery.length + 150);
+        return this.cleanSnippet(content.substring(start, end), start > 0, end < content.length);
+      }
+    }
+    // Find the best snippet containing the most search terms
+    const lines = content.split('\n');
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i];
+      const lineLower = line.toLowerCase();
+      // Skip empty lines and frontmatter
+      if (!line.trim() || line.startsWith('---')) continue;
+      // Count matching terms in this line and surrounding context
+      let score = 0;
+      let matchCount = 0;
+      for (const term of searchTerms) {
+        if (lineLower.includes(term.toLowerCase())) {
+          matchCount++;
+          // Higher score for terms in headers
+          if (line.startsWith('#')) {
+            score += 10;
+          } else {
+            score += 5;
+          }
+        }
+      }
+      if (matchCount > 0) {
+        // Get context around this line
+        const contextStart = Math.max(0, i - 1);
+        const contextEnd = Math.min(lines.length, i + 3);
+        const contextLines = lines.slice(contextStart, contextEnd);
+        const snippet = contextLines.join(' ').trim();
+        if (score > bestScore && snippet.length > 20) {
+          bestScore = score;
+          bestSnippet = snippet;
+        }
+      }
+    }
+    // If no good snippet found, return the description or first meaningful paragraph
+    if (!bestSnippet) {
+      const metadata = this.extractMetadata(content);
+      if (metadata.description) {
+        return metadata.description;
+      }
+      // Find first non-empty paragraph after frontmatter
+      const contentWithoutFrontmatter = content.replace(/^---[\s\S]*?---\n*/m, '');
+      const paragraphs = contentWithoutFrontmatter.split(/\n\n+/);
+      for (const para of paragraphs) {
+        const cleaned = para.trim();
+        if (cleaned && !cleaned.startsWith('#') && cleaned.length > 30) {
+          return this.cleanSnippet(cleaned.substring(0, snippetLength), false, cleaned.length > snippetLength);
+        }
+      }
+    }
+    return this.cleanSnippet(bestSnippet.substring(0, snippetLength), false, bestSnippet.length > snippetLength);
+  }
+  /**
+   * Clean and format a snippet for display
+   */
+  cleanSnippet(snippet, hasStart, hasEnd) {
+    // Remove multiple spaces and clean up
+    let cleaned = snippet.replace(/\s+/g, ' ').trim();
+    // Remove markdown formatting for readability
+    cleaned = cleaned.replace(/\*\*/g, '');
+    cleaned = cleaned.replace(/`/g, '');
+    // Add ellipsis if truncated
+    if (hasStart) cleaned = '...' + cleaned;
+    if (hasEnd) cleaned = cleaned + '...';
+    return cleaned;
+  }
+  /**
+   * Get the terms that matched in this document
+   */
+  getMatchedTerms(doc, searchTerms) {
+    const matched = [];
+    const contentLower = doc.content.toLowerCase();
+    const titleLower = (doc.metadata?.title || doc.fileName).toLowerCase();
+    const descriptionLower = (doc.metadata?.description || '').toLowerCase();
+    for (const term of searchTerms) {
+      const termLower = term.toLowerCase();
+      if (titleLower.includes(termLower) ||
+          descriptionLower.includes(termLower) ||
+          contentLower.includes(termLower)) {
+        matched.push(term);
+      }
+    }
+    return matched;
+  }
   async getGlobalRules() {
     const globalRules = [];

package/src/services/UnifiedSearchService.js ADDED Viewed

@@ -0,0 +1,214 @@
+import { DocumentationService } from './DocumentationService.js';
+import { MultiDocsetDatabase } from './docset/database.js';
+/**
+ * UnifiedSearchService provides a single search interface that searches
+ * both local project documentation and official API documentation (docsets)
+ * with intelligent query parsing and relevance scoring.
+ */
+export class UnifiedSearchService {
+  constructor(documentationService, multiDocsetDatabase) {
+    this.documentationService = documentationService;
+    this.multiDocsetDatabase = multiDocsetDatabase;
+  }
+  /**
+   * Parse query into individual search terms, removing stop words
+   * @param {string} query - The search query
+   * @returns {string[]} Array of search terms
+   */
+  parseQuery(query) {
+    const stopWords = new Set([
+      'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to',
+      'for', 'of', 'with', 'by', 'how', 'what', 'where', 'when', 'is',
+      'are', 'was', 'were', 'been', 'being', 'have', 'has', 'had',
+      'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may',
+      'might', 'can', 'this', 'that', 'these', 'those'
+    ]);
+    return query.toLowerCase()
+      .split(/\s+/)
+      .map(term => term.replace(/[^a-z0-9-_.]/g, '')) // Keep alphanumeric, dash, underscore, dot
+      .filter(term => term.length > 1 && !stopWords.has(term));
+  }
+  /**
+   * Search both local documentation and docsets with a unified query
+   * @param {string} query - The search query
+   * @param {Object} options - Search options
+   * @param {number} options.limit - Maximum number of results (default: 20)
+   * @param {string} options.docsetId - Limit to specific docset
+   * @param {string} options.type - Filter docset results by type
+   * @returns {Promise<Array>} Combined search results sorted by relevance
+   */
+  async search(query, options = {}) {
+    const { limit = 20, docsetId, type } = options;
+    if (!query || query.trim() === '') {
+      return [];
+    }
+    // Parse query into search terms
+    const searchTerms = this.parseQuery(query);
+    if (searchTerms.length === 0) {
+      return [];
+    }
+    // Perform searches in parallel
+    const [localResults, docsetResults] = await Promise.all([
+      // Search local documentation (unless searching specific docset)
+      docsetId ? [] : this.searchLocalDocs(query, searchTerms, Math.ceil(limit / 2)),
+      // Search docsets
+      this.searchDocsets(searchTerms, { type, docsetId, limit: Math.ceil(limit / 2) })
+    ]);
+    // Combine and normalize results
+    const combinedResults = [
+      ...this.normalizeLocalResults(localResults),
+      ...this.normalizeDocsetResults(docsetResults)
+    ];
+    // Apply source-based score boosting
+    const boostedResults = combinedResults.map(result => {
+      // Boost project documentation scores to prioritize them
+      if (result.type === 'local') {
+        // Multiply project doc scores by 5 to ensure they rank higher
+        // This ensures even moderately relevant project docs appear before API docs
+        result.relevanceScore = result.relevanceScore * 5;
+      }
+      return result;
+    });
+    // Sort by relevance score
+    const sortedResults = boostedResults
+      .sort((a, b) => b.relevanceScore - a.relevanceScore);
+    // Apply quality filtering
+    // If we have high-quality results (score > 50), filter out low-quality ones
+    const highQualityResults = sortedResults.filter(r => r.relevanceScore >= 50);
+    if (highQualityResults.length >= 5) {
+      // We have enough high-quality results, use only those
+      return highQualityResults.slice(0, limit);
+    } else if (sortedResults.length > 0) {
+      // Include medium quality results, but filter out very low relevance
+      const minScore = Math.max(sortedResults[0].relevanceScore * 0.1, 10);
+      const qualityResults = sortedResults.filter(r => r.relevanceScore >= minScore);
+      return qualityResults.slice(0, limit);
+    }
+    return [];
+  }
+  /**
+   * Search local project documentation
+   */
+  async searchLocalDocs(query, searchTerms, limit) {
+    try {
+      // Use existing DocumentationService search which already has good relevance scoring
+      const results = await this.documentationService.searchDocuments(query);
+      return results.slice(0, limit);
+    } catch (error) {
+      console.error('Error searching local docs:', error);
+      return [];
+    }
+  }
+  /**
+   * Search docsets using term-based search
+   */
+  searchDocsets(searchTerms, options) {
+    try {
+      // Use the new term-based search method
+      return this.multiDocsetDatabase.searchWithTerms(searchTerms, options);
+    } catch (error) {
+      console.error('Error searching docsets:', error);
+      return [];
+    }
+  }
+  /**
+   * Normalize local documentation results to unified format
+   */
+  normalizeLocalResults(results) {
+    return results.map(doc => ({
+      id: doc.fileName,
+      title: doc.metadata?.title || doc.fileName,
+      description: doc.metadata?.description || doc.snippet || '',
+      type: 'local',
+      source: 'project',
+      path: doc.fileName,
+      url: doc.fileName,
+      relevanceScore: doc.relevanceScore || 0,
+      metadata: doc.metadata,
+      content: doc.content,
+      snippet: doc.snippet,
+      matchedTerms: doc.matchedTerms || []
+    }));
+  }
+  /**
+   * Normalize docset results to unified format
+   */
+  normalizeDocsetResults(results) {
+    // First normalize all results
+    const normalized = results.map(doc => ({
+      id: `${doc.docsetId}:${doc.name}`,
+      title: doc.name,
+      description: `${doc.type} in ${doc.docsetName}`,
+      type: 'docset',
+      source: doc.docsetName,
+      path: doc.path || doc.url,
+      url: doc.url,
+      relevanceScore: doc.relevanceScore || 0,
+      docsetId: doc.docsetId,
+      docsetName: doc.docsetName,
+      entryType: doc.type
+    }));
+    // Deduplicate by name + type, preferring Swift entries
+    const dedupMap = new Map();
+    for (const doc of normalized) {
+      const key = `${doc.title}:${doc.entryType}`;
+      const existing = dedupMap.get(key);
+      if (!existing) {
+        dedupMap.set(key, doc);
+      } else {
+        // Prefer Swift entries (they have 'language=swift' in the URL)
+        const isSwift = doc.url && doc.url.includes('language=swift');
+        const existingIsSwift = existing.url && existing.url.includes('language=swift');
+        if (isSwift && !existingIsSwift) {
+          dedupMap.set(key, doc);
+        } else if (!isSwift && !existingIsSwift && doc.relevanceScore > existing.relevanceScore) {
+          // If neither is Swift, keep the one with higher score
+          dedupMap.set(key, doc);
+        }
+      }
+    }
+    return Array.from(dedupMap.values());
+  }
+  /**
+   * Get a summary of available documentation sources
+   */
+  async getSources() {
+    const localDocs = this.documentationService.documents.size;
+    const docsets = this.multiDocsetDatabase.databases.size;
+    const docsetStats = this.multiDocsetDatabase.getStats();
+    return {
+      local: {
+        documentCount: localDocs,
+        indexed: localDocs > 0
+      },
+      docsets: {
+        count: docsets,
+        details: docsetStats
+      }
+    };
+  }
+}

package/src/services/__tests__/DocumentationService.test.js ADDED Viewed

@@ -0,0 +1,318 @@
+import { DocumentationService } from '../DocumentationService.js';
+import fs from 'fs-extra';
+import path from 'path';
+import { fileURLToPath } from 'url';
+import { dirname } from 'path';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+describe('DocumentationService', () => {
+  let docService;
+  let tempDocsPath;
+  beforeEach(async () => {
+    tempDocsPath = path.join(__dirname, 'temp-docs-' + Date.now());
+    await fs.ensureDir(tempDocsPath);
+    docService = new DocumentationService(tempDocsPath);
+  });
+  afterEach(async () => {
+    await fs.remove(tempDocsPath);
+  });
+  describe('extractRelevantSnippet', () => {
+    it('should extract snippet around exact phrase match', () => {
+      const content = `This is some content before the match.
+      Here we discuss how to use AlarmKit Framework effectively.
+      And this is content after the match.`;
+      const snippet = docService.extractRelevantSnippet(
+        content,
+        ['use', 'alarmkit', 'framework'],
+        'use AlarmKit Framework'
+      );
+      expect(snippet).toContain('use AlarmKit Framework');
+      expect(snippet.length).toBeLessThanOrEqual(250);
+    });
+    it('should find best snippet with most matching terms', () => {
+      const content = `# Introduction
+      This document covers various topics.
+      # URLSession Configuration
+      Learn how to configure URLSession properly.
+      # Advanced Usage
+      Here we discuss URLSession and configuration together.
+      URLSession provides many configuration options.`;
+      const snippet = docService.extractRelevantSnippet(
+        content,
+        ['urlsession', 'configuration'],
+        'URLSession configuration'
+      );
+      expect(snippet).toContain('URLSession');
+      expect(snippet).toContain('configuration');
+    });
+    it('should prioritize headers containing search terms', () => {
+      const content = `# Random Header
+      Some content here.
+      # AlarmKit Integration
+      This is the section about integration.
+      Some other content mentioning AlarmKit.`;
+      const snippet = docService.extractRelevantSnippet(
+        content,
+        ['alarmkit', 'integration'],
+        'AlarmKit integration'
+      );
+      expect(snippet).toContain('# AlarmKit Integration');
+    });
+    it('should return description from metadata if no good snippet found', () => {
+      const content = `---
+title: Test Document
+description: This is a comprehensive guide to using the API
+---
+Some unrelated content here.`;
+      const snippet = docService.extractRelevantSnippet(
+        content,
+        ['nonexistent', 'terms'],
+        'nonexistent terms'
+      );
+      expect(snippet).toBe('This is a comprehensive guide to using the API');
+    });
+    it('should clean and format snippets properly', () => {
+      const content = `This is **bold** text with \`code\` and     multiple    spaces.`;
+      const snippet = docService.extractRelevantSnippet(
+        content,
+        ['bold', 'text'],
+        'bold text'
+      );
+      expect(snippet).not.toContain('**');
+      expect(snippet).not.toContain('`');
+      expect(snippet).not.toMatch(/\s{2,}/);
+    });
+  });
+  describe('getMatchedTerms', () => {
+    it('should return terms that match in document', () => {
+      const doc = {
+        content: 'Learn about URLSession and networking in Swift',
+        metadata: {
+          title: 'Swift Networking Guide',
+          description: 'A guide to URLSession'
+        },
+        fileName: 'networking.md'
+      };
+      const matched = docService.getMatchedTerms(doc, ['urlsession', 'swift', 'api']);
+      expect(matched).toContain('urlsession');
+      expect(matched).toContain('swift');
+      expect(matched).not.toContain('api');
+    });
+    it('should match terms in title and description', () => {
+      const doc = {
+        content: 'Some content',
+        metadata: {
+          title: 'AlarmKit Framework',
+          description: 'Learn to use AlarmKit'
+        },
+        fileName: 'guide.md'
+      };
+      const matched = docService.getMatchedTerms(doc, ['alarmkit', 'framework', 'use']);
+      expect(matched).toEqual(['alarmkit', 'framework', 'use']);
+    });
+  });
+  describe('searchDocuments with enhanced features', () => {
+    beforeEach(async () => {
+      // Create test documents
+      await fs.writeFile(
+        path.join(tempDocsPath, 'high-relevance.md'),
+        `---
+title: AlarmKit Framework Guide
+description: Complete guide to using AlarmKit Framework
+keywords: [alarmkit, framework, ios]
+---
+# AlarmKit Framework
+Learn how to use AlarmKit Framework effectively.
+AlarmKit provides powerful alarm functionality.`
+      );
+      await fs.writeFile(
+        path.join(tempDocsPath, 'medium-relevance.md'),
+        `---
+title: iOS Development
+description: General iOS development guide
+---
+# iOS Development
+This guide covers various frameworks including AlarmKit.`
+      );
+      await fs.writeFile(
+        path.join(tempDocsPath, 'low-relevance.md'),
+        `---
+title: Random Document
+---
+# Random Content
+This has nothing to do with alarms or kits.`
+      );
+      await docService.initialize();
+    });
+    it('should return results with snippets and matched terms', async () => {
+      const results = await docService.searchDocuments('AlarmKit Framework');
+      expect(results.length).toBeGreaterThan(0);
+      expect(results[0].snippet).toBeDefined();
+      expect(results[0].matchedTerms).toBeDefined();
+      expect(results[0].matchedTerms).toContain('alarmkit');
+      expect(results[0].matchedTerms).toContain('framework');
+    });
+    it('should filter out low relevance results', async () => {
+      const results = await docService.searchDocuments('AlarmKit Framework');
+      // Should not include the "Random Document" with no relevant content
+      const hasLowRelevance = results.some(r => r.metadata?.title === 'Random Document');
+      expect(hasLowRelevance).toBe(false);
+    });
+    it('should prioritize exact phrase matches', async () => {
+      const results = await docService.searchDocuments('AlarmKit Framework');
+      expect(results[0].metadata?.title).toBe('AlarmKit Framework Guide');
+      expect(results[0].relevanceScore).toBeGreaterThan(50);
+    });
+    it('should boost keyword matches', async () => {
+      const results = await docService.searchDocuments('alarmkit');
+      // Document with alarmkit in keywords should score higher
+      expect(results[0].metadata?.keywords).toContain('alarmkit');
+    });
+  });
+  describe('calculateAdvancedRelevanceScore', () => {
+    it('should give high score for exact phrase match', () => {
+      const doc = {
+        content: 'Learn how to use AlarmKit Framework in your iOS app',
+        metadata: { title: 'iOS Guide' },
+        fileName: 'guide.md'
+      };
+      const score = docService.calculateAdvancedRelevanceScore(
+        doc,
+        ['alarmkit', 'framework'],
+        'AlarmKit Framework'
+      );
+      expect(score).toBeGreaterThan(20); // Exact phrase bonus
+    });
+    it('should boost matches in title', () => {
+      const doc1 = {
+        content: 'Some content about URLSession',
+        metadata: { title: 'URLSession Guide' },
+        fileName: 'guide1.md'
+      };
+      const doc2 = {
+        content: 'URLSession is mentioned here',
+        metadata: { title: 'Random Guide' },
+        fileName: 'guide2.md'
+      };
+      const score1 = docService.calculateAdvancedRelevanceScore(
+        doc1,
+        ['urlsession'],
+        'URLSession'
+      );
+      const score2 = docService.calculateAdvancedRelevanceScore(
+        doc2,
+        ['urlsession'],
+        'URLSession'
+      );
+      expect(score1).toBeGreaterThan(score2);
+    });
+    it('should apply term coverage bonus', () => {
+      const doc = {
+        content: 'URLSession configuration and usage',
+        metadata: { title: 'Networking' },
+        fileName: 'net.md'
+      };
+      const score1 = docService.calculateAdvancedRelevanceScore(
+        doc,
+        ['urlsession'],
+        'URLSession'
+      );
+      const score2 = docService.calculateAdvancedRelevanceScore(
+        doc,
+        ['urlsession', 'configuration'],
+        'URLSession configuration'
+      );
+      // Matching both terms should score higher
+      expect(score2).toBeGreaterThan(score1);
+    });
+    it('should cap content match frequency to prevent spam', () => {
+      const spamDoc = {
+        content: 'test '.repeat(100),
+        metadata: { title: 'Spam' },
+        fileName: 'spam.md'
+      };
+      const normalDoc = {
+        content: 'This is a test document with normal content',
+        metadata: { title: 'Normal' },
+        fileName: 'normal.md'
+      };
+      const spamScore = docService.calculateAdvancedRelevanceScore(
+        spamDoc,
+        ['test'],
+        'test'
+      );
+      const normalScore = docService.calculateAdvancedRelevanceScore(
+        normalDoc,
+        ['test'],
+        'test'
+      );
+      // Spam score should be capped, not drastically higher
+      expect(spamScore / normalScore).toBeLessThan(5);
+    });
+  });
+});