npm - @mnemonik/shared - Versions diffs - 1.0.0 - Mend

@mnemonik/shared 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/dist/FileSystemReader.d.ts +60 -0
package/dist/FileSystemReader.d.ts.map +1 -0
package/dist/FileSystemReader.js +236 -0
package/dist/FileSystemReader.js.map +1 -0
package/dist/asyncUtils.d.ts +2 -0
package/dist/asyncUtils.d.ts.map +1 -0
package/dist/asyncUtils.js +12 -0
package/dist/asyncUtils.js.map +1 -0
package/dist/codeScanner.d.ts +96 -0
package/dist/codeScanner.d.ts.map +1 -0
package/dist/codeScanner.js +620 -0
package/dist/codeScanner.js.map +1 -0
package/dist/index.d.ts +11 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +11 -0
package/dist/index.js.map +1 -0
package/dist/instructions.d.ts +34 -0
package/dist/instructions.d.ts.map +1 -0
package/dist/instructions.js +52 -0
package/dist/instructions.js.map +1 -0
package/dist/logger.d.ts +4 -0
package/dist/logger.d.ts.map +1 -0
package/dist/logger.js +8 -0
package/dist/logger.js.map +1 -0
package/dist/usageGuide.d.ts +14 -0
package/dist/usageGuide.d.ts.map +1 -0
package/dist/usageGuide.js +75 -0
package/dist/usageGuide.js.map +1 -0
package/package.json +28 -0
package/src/FileSystemReader.ts +299 -0
package/src/asyncUtils.ts +16 -0
package/src/codeScanner.ts +727 -0
package/src/index.ts +17 -0
package/src/instructions.ts +55 -0
package/src/logger.ts +7 -0
package/src/usageGuide.ts +75 -0
package/tsconfig.json +19 -0

package/src/codeScanner.ts ADDED Viewed

@@ -0,0 +1,727 @@
+/**
+ * Code Scanner - Parse and chunk source files for embedding
+ */
+import { readdir, readFile, stat, lstat, realpath } from 'fs/promises';
+import { join, relative, extname } from 'path';
+import { createHash } from 'crypto';
+import { debug as logDebug } from './logger.js';
+import { withTimeout } from './asyncUtils.js';
+/**
+ * v2.46: File operation timeout (5 seconds) to prevent hanging on slow/unresponsive filesystems
+ */
+const FILE_OP_TIMEOUT_MS = 5000;
+export interface CodeChunk {
+  content: string;
+  filePath: string;
+  language: string;
+  startLine: number;
+  endLine: number;
+  chunkType: 'function' | 'class' | 'module' | 'raw';
+  contentHash: string;
+  metadata: {
+    fileName: string;
+    extension: string;
+    size: number;
+    signature?: string; // v3.3: Function/class signature (e.g. "function foo(bar: string): number")
+    symbolName?: string; // v3.3: Symbol name (e.g. "foo")
+  };
+}
+export interface ScanOptions {
+  maxChunkSize?: number; // characters
+  minChunkSize?: number;
+  ignorePatterns?: string[];
+  includeExtensions?: string[];
+}
+const DEFAULT_OPTIONS: Required<ScanOptions> = {
+  maxChunkSize: 8000, // ~2000 tokens
+  minChunkSize: 100,
+  ignorePatterns: [
+    // JavaScript/Node
+    'node_modules',
+    '.next',
+    // Python virtual environments
+    'venv',
+    '.venv',
+    'env',
+    '.env',
+    '__pycache__',
+    '.tox',
+    '.mypy_cache',
+    '.pytest_cache',
+    // Build outputs
+    'dist',
+    'build',
+    'target',
+    'bin',
+    'obj',
+    'coverage',
+    // Version control & cache
+    '.git',
+    '.cache',
+    '.DS_Store',
+    // Lock files
+    '*.log',
+    '*.lock',
+    'package-lock.json',
+    'yarn.lock',
+    // Minified/bundled files - too large and not useful for context
+    '*.min.js',
+    '*.min.css',
+    '*.bundle.js',
+    '*.legacy.js',
+    '*.map',
+  ],
+  includeExtensions: [
+    '.ts',
+    '.tsx',
+    '.js',
+    '.jsx',
+    '.py',
+    '.rs',
+    '.go',
+    '.java',
+    '.c',
+    '.cpp',
+    '.h',
+    '.cs',
+    '.rb',
+    '.php',
+    '.swift',
+    '.kt',
+    '.md',
+  ],
+};
+export class CodeScanner {
+  private options: Required<ScanOptions>;
+  constructor(options: ScanOptions = {}) {
+    this.options = { ...DEFAULT_OPTIONS, ...options };
+  }
+  /**
+   * Maximum directory depth for recursive scanning
+   * v2.43: Prevents runaway recursion on deep/symlinked structures
+   */
+  private static readonly MAX_DEPTH = 10;
+  /**
+   * Scan a directory recursively and extract code chunks
+   * v2.43: Added max depth (10) to prevent infinite recursion
+   */
+  async scanDirectory(rootPath: string): Promise<CodeChunk[]> {
+    const chunks: CodeChunk[] = [];
+    await this.traverseDirectory(rootPath, rootPath, chunks, 0);
+    return chunks;
+  }
+  /**
+   * Scan specific files and extract code chunks.
+   * Pass rootPath to compute proper relative file paths in chunk metadata.
+   */
+  async scanFiles(filePaths: string[], rootPath: string): Promise<CodeChunk[]> {
+    const chunks: CodeChunk[] = [];
+    for (const filePath of filePaths) {
+      try {
+        if (this.shouldIgnore(filePath)) {
+          continue;
+        }
+        const ext = extname(filePath);
+        if (this.options.includeExtensions.includes(ext)) {
+          const fileChunks = await this.parseFile(filePath, rootPath || filePath);
+          chunks.push(...fileChunks);
+        }
+      } catch (error) {
+        logDebug('Error scanning file', { filePath, error });
+      }
+    }
+    return chunks;
+  }
+  /**
+   * Recursively traverse directory
+   * v2.43: Added depth parameter with max limit
+   */
+  private async traverseDirectory(
+    currentPath: string,
+    rootPath: string,
+    chunks: CodeChunk[],
+    depth: number
+  ): Promise<void> {
+    // v2.43: Prevent infinite recursion
+    if (depth >= CodeScanner.MAX_DEPTH) {
+      logDebug('Max directory depth reached, skipping', { path: currentPath, depth });
+      return;
+    }
+    try {
+      // v2.46: Wrap readdir with timeout to prevent hanging
+      const entries = await withTimeout(
+        readdir(currentPath),
+        FILE_OP_TIMEOUT_MS,
+        `readdir timed out: ${currentPath}`
+      );
+      for (const entry of entries) {
+        const fullPath = join(currentPath, entry);
+        const relativePath = relative(rootPath, fullPath);
+        // Check ignore patterns
+        if (this.shouldIgnore(relativePath)) {
+          continue;
+        }
+        const lstats = await withTimeout(
+          lstat(fullPath),
+          FILE_OP_TIMEOUT_MS,
+          `lstat timed out: ${fullPath}`
+        );
+        if (lstats.isSymbolicLink()) {
+          const resolved = await realpath(fullPath);
+          const resolvedRoot = await realpath(rootPath);
+          if (!resolved.startsWith(resolvedRoot + '/') && resolved !== resolvedRoot) {
+            logDebug('Skipping symlink escaping project root', { fullPath, resolved, rootPath });
+            continue;
+          }
+        }
+        const stats = lstats.isSymbolicLink()
+          ? await withTimeout(stat(fullPath), FILE_OP_TIMEOUT_MS, `stat timed out: ${fullPath}`)
+          : lstats;
+        if (stats.isDirectory()) {
+          await this.traverseDirectory(fullPath, rootPath, chunks, depth + 1);
+        } else if (stats.isFile()) {
+          const ext = extname(fullPath);
+          if (this.options.includeExtensions.includes(ext)) {
+            const fileChunks = await this.parseFile(fullPath, rootPath);
+            chunks.push(...fileChunks);
+          }
+        }
+      }
+    } catch (error) {
+      logDebug('Error traversing directory', { path: currentPath, error });
+    }
+  }
+  /**
+   * Check if path should be ignored
+   * v2.71: Fixed glob-to-regex conversion and substring matching.
+   * - Escape regex special chars before replacing * with .*
+   * - Replace ALL * occurrences (not just the first)
+   * - For non-glob patterns, match on path segments to avoid false positives
+   *   (e.g., '.env' should not match '.environment.ts')
+   */
+  private shouldIgnore(path: string): boolean {
+    const segments = path.split('/');
+    return this.options.ignorePatterns.some((pattern) => {
+      if (pattern.includes('*')) {
+        // Escape regex special chars, then replace all * with .*
+        const escaped = pattern.replace(/[.+?^${}()|[\]\\]/g, '\\$&');
+        const regex = new RegExp(escaped.replace(/\*/g, '.*'));
+        return regex.test(path);
+      }
+      // For non-glob patterns, check if any path segment matches exactly
+      // or if the full path ends with the pattern (for extension-like patterns)
+      return segments.some((segment) => segment === pattern) || path.endsWith('/' + pattern);
+    });
+  }
+  /**
+   * Parse a file and extract code chunks
+   * v2.43: Added 10MB file size limit
+   */
+  private static readonly MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB
+  private async parseFile(filePath: string, rootPath: string): Promise<CodeChunk[]> {
+    try {
+      // v2.43: Check file size before reading to avoid memory issues
+      // v2.46: Wrap stat with timeout
+      const stats = await withTimeout(
+        stat(filePath),
+        FILE_OP_TIMEOUT_MS,
+        `stat timed out: ${filePath}`
+      );
+      if (stats.size > CodeScanner.MAX_FILE_SIZE) {
+        logDebug('Skipping file exceeding size limit', {
+          filePath,
+          size: stats.size,
+          limit: CodeScanner.MAX_FILE_SIZE,
+        });
+        return [];
+      }
+      // v2.46: Wrap readFile with timeout
+      const content = await withTimeout(
+        readFile(filePath, 'utf-8'),
+        FILE_OP_TIMEOUT_MS,
+        `readFile timed out: ${filePath}`
+      );
+      const relativePath = relative(rootPath, filePath);
+      const language = this.detectLanguage(filePath);
+      // Try to extract functions/classes
+      if (language === 'markdown') {
+        return this.chunkMarkdown(content, relativePath, stats.size);
+      }
+      const structuredChunks = this.extractStructuredChunks(content, language);
+      const fileMetadata = {
+        fileName: filePath.split('/').pop() || '',
+        extension: extname(filePath),
+        size: stats.size,
+      };
+      if (structuredChunks.length > 0) {
+        const mapped = structuredChunks.map(({ signature, symbolName, ...chunk }) => ({
+          ...chunk,
+          filePath: relativePath,
+          language,
+          metadata: {
+            ...fileMetadata,
+            ...(signature && { signature }),
+            ...(symbolName && { symbolName }),
+          },
+        }));
+        // Coverage check: if structured chunks cover less than 50% of file lines,
+        // supplement with raw chunks for uncovered regions. This prevents a single
+        // small match from blocking all raw chunking in large files.
+        const totalLines = content.split('\n').length;
+        const coveredLines = new Set<number>();
+        for (const chunk of structuredChunks) {
+          for (let l = chunk.startLine; l <= chunk.endLine; l++) {
+            coveredLines.add(l);
+          }
+        }
+        const coverageRatio = coveredLines.size / totalLines;
+        if (coverageRatio < 0.5 && totalLines > 50) {
+          const rawChunks = this.chunkRaw(content, relativePath, language, stats.size);
+          // Only keep raw chunks that don't overlap with structured chunks
+          const supplemental = rawChunks.filter((rc) => {
+            for (const sc of structuredChunks) {
+              if (rc.startLine <= sc.endLine && rc.endLine >= sc.startLine) {
+                return false;
+              }
+            }
+            return true;
+          });
+          mapped.push(...supplemental);
+        }
+        return mapped;
+      }
+      // Fall back to raw chunking
+      return this.chunkRaw(content, relativePath, language, stats.size);
+    } catch (error) {
+      logDebug('Error parsing file', { filePath, error });
+      return [];
+    }
+  }
+  /**
+   * Detect language from file extension
+   */
+  private detectLanguage(filePath: string): string {
+    const ext = extname(filePath).toLowerCase();
+    const langMap: Record<string, string> = {
+      '.ts': 'typescript',
+      '.tsx': 'typescript',
+      '.js': 'javascript',
+      '.jsx': 'javascript',
+      '.py': 'python',
+      '.rs': 'rust',
+      '.go': 'go',
+      '.java': 'java',
+      '.c': 'c',
+      '.cpp': 'cpp',
+      '.h': 'c',
+      '.cs': 'csharp',
+      '.rb': 'ruby',
+      '.php': 'php',
+      '.swift': 'swift',
+      '.kt': 'kotlin',
+      '.md': 'markdown',
+    };
+    return langMap[ext] || 'unknown';
+  }
+  /**
+   * Chunk markdown files by headers
+   */
+  private chunkMarkdown(content: string, filePath: string, size: number): CodeChunk[] {
+    const chunks: CodeChunk[] = [];
+    const lines = content.split('\n');
+    let currentChunk: string[] = [];
+    let currentStartLine = 1;
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i];
+      if (line === undefined) continue;
+      const isHeader = /^#{1,6}\s/.test(line);
+      // If we hit a new header and have content, push the previous chunk
+      if (isHeader && currentChunk.length > 0) {
+        const chunkContent = currentChunk.join('\n').trim();
+        if (chunkContent.length >= this.options.minChunkSize) {
+          chunks.push({
+            content: chunkContent,
+            filePath,
+            language: 'markdown',
+            startLine: currentStartLine,
+            endLine: i, // Previous line
+            chunkType: 'module', // Treat sections as modules
+            contentHash: this.hash(chunkContent),
+            metadata: {
+              fileName: filePath.split('/').pop() || '',
+              extension: '.md',
+              size,
+            },
+          });
+        }
+        currentChunk = [];
+        currentStartLine = i + 1;
+        // currentHeader = line; // unused
+      }
+      currentChunk.push(line);
+      // If chunk gets too big, force a split (fallback to raw-like behavior but inside markdown logic)
+      if (currentChunk.join('\n').length > this.options.maxChunkSize) {
+        const chunkContent = currentChunk.join('\n').trim();
+        chunks.push({
+          content: chunkContent,
+          filePath,
+          language: 'markdown',
+          startLine: currentStartLine,
+          endLine: i + 1,
+          chunkType: 'raw',
+          contentHash: this.hash(chunkContent),
+          metadata: {
+            fileName: filePath.split('/').pop() || '',
+            extension: '.md',
+            size,
+          },
+        });
+        currentChunk = [];
+        currentStartLine = i + 2;
+      }
+    }
+    // Push remaining content
+    if (currentChunk.length > 0) {
+      const chunkContent = currentChunk.join('\n').trim();
+      if (chunkContent.length >= this.options.minChunkSize) {
+        chunks.push({
+          content: chunkContent,
+          filePath,
+          language: 'markdown',
+          startLine: currentStartLine,
+          endLine: lines.length,
+          chunkType: 'module',
+          contentHash: this.hash(chunkContent),
+          metadata: {
+            fileName: filePath.split('/').pop() || '',
+            extension: '.md',
+            size,
+          },
+        });
+      }
+    }
+    return chunks;
+  }
+  /**
+   * Find the index of the closing brace matching the opening brace at openIndex.
+   * Handles nested braces. Skips braces inside string literals, template literals,
+   * single-line comments, multi-line comments, and regex literals.
+   */
+  private findMatchingBrace(content: string, openIndex: number): number {
+    if (content[openIndex] !== '{') return -1;
+    let depth = 1;
+    let i = openIndex + 1;
+    const len = content.length;
+    while (i < len) {
+      const c = content[i];
+      const next = i + 1 < len ? content[i + 1] : '';
+      // Single-line comment
+      if (c === '/' && next === '/') {
+        i = content.indexOf('\n', i);
+        if (i === -1) return -1;
+        i++;
+        continue;
+      }
+      // Multi-line comment
+      if (c === '/' && next === '*') {
+        i = content.indexOf('*/', i + 2);
+        if (i === -1) return -1;
+        i += 2;
+        continue;
+      }
+      // String literals (single or double quote)
+      if (c === "'" || c === '"') {
+        i++;
+        while (i < len && content[i] !== c) {
+          if (content[i] === '\\') i++; // skip escaped char
+          i++;
+        }
+        i++; // skip closing quote
+        continue;
+      }
+      // Template literal
+      if (c === '`') {
+        i++;
+        while (i < len && content[i] !== '`') {
+          if (content[i] === '\\') i++; // skip escaped char
+          i++;
+        }
+        i++; // skip closing backtick
+        continue;
+      }
+      // Regex literal — heuristic: / after operator chars or keywords that precede expressions
+      if (c === '/' && i > 0) {
+        // Look back for operator context (skip whitespace)
+        let j = i - 1;
+        while (j >= 0 && (content[j] === ' ' || content[j] === '\t')) j--;
+        const prev = j >= 0 ? content[j] : '\n';
+        // Check for keywords that precede regex: return, typeof, void, delete, throw, new, case, in, instanceof
+        let isRegexContext = '=({[,;:!&|?+->~^%\n'.includes(prev);
+        if (!isRegexContext && j >= 0 && /[a-z]/.test(prev)) {
+          // Extract the word ending at position j
+          let wordStart = j;
+          while (wordStart > 0 && /[a-z]/.test(content[wordStart - 1])) wordStart--;
+          const word = content.substring(wordStart, j + 1);
+          const regexKeywords = [
+            'return',
+            'typeof',
+            'void',
+            'delete',
+            'throw',
+            'new',
+            'case',
+            'in',
+            'instanceof',
+            'yield',
+            'await',
+          ];
+          isRegexContext = regexKeywords.includes(word);
+        }
+        if (isRegexContext) {
+          i++;
+          while (i < len && content[i] !== '/') {
+            if (content[i] === '\\') {
+              i++; // skip escaped char
+            } else if (content[i] === '[') {
+              // character class — skip to ]
+              i++;
+              while (i < len && content[i] !== ']') {
+                if (content[i] === '\\') i++;
+                i++;
+              }
+            }
+            i++;
+          }
+          i++; // skip closing /
+          continue;
+        }
+      }
+      if (c === '{') depth++;
+      else if (c === '}') {
+        depth--;
+        if (depth === 0) return i;
+      }
+      i++;
+    }
+    return -1;
+  }
+  /**
+   * Extract structured chunks (functions, classes)
+   * v2.76: Uses brace-matching for TS/JS/Rust so nested braces are not truncated at first \n}
+   */
+  private extractStructuredChunks(
+    content: string,
+    language: string
+  ): (Omit<CodeChunk, 'filePath' | 'language' | 'metadata'> & {
+    signature?: string;
+    symbolName?: string;
+  })[] {
+    const chunks: (Omit<CodeChunk, 'filePath' | 'language' | 'metadata'> & {
+      signature?: string;
+      symbolName?: string;
+    })[] = [];
+    const patterns = this.getLanguagePatterns(language);
+    const useBraceMatch = ['typescript', 'javascript', 'rust'].includes(language);
+    for (const pattern of patterns) {
+      let match: RegExpExecArray | null;
+      const regex = new RegExp(pattern.regex, 'gm');
+      while ((match = regex.exec(content)) !== null) {
+        let matchContent: string;
+        if (useBraceMatch && pattern.regex.endsWith('\\{')) {
+          const openBraceIndex = match.index + match[0].length - 1;
+          if (content[openBraceIndex] === '{') {
+            const closeIndex = this.findMatchingBrace(content, openBraceIndex);
+            if (closeIndex >= 0) {
+              matchContent = content.slice(match.index, closeIndex + 1);
+            } else {
+              matchContent = match[0];
+            }
+          } else {
+            matchContent = match[0];
+          }
+        } else {
+          matchContent = match[0];
+        }
+        const startLine = content.substring(0, match.index).split('\n').length;
+        const endLine = startLine + matchContent.split('\n').length - 1;
+        if (
+          matchContent.length >= this.options.minChunkSize &&
+          matchContent.length <= this.options.maxChunkSize
+        ) {
+          // v3.3: Extract function/class signature and symbol name
+          const firstLine = matchContent.split('\n')[0].trim();
+          const signature = firstLine.replace(/\{$/, '').trim() || undefined;
+          const nameMatch = firstLine.match(
+            /(?:function|class|const|interface|type|enum|export\s+(?:default\s+)?(?:function|class|const|interface|type|enum))\s+(\w+)/
+          );
+          const symbolName = nameMatch?.[1] || undefined;
+          chunks.push({
+            content: matchContent.trim(),
+            startLine,
+            endLine,
+            chunkType: pattern.type,
+            contentHash: this.hash(matchContent),
+            signature,
+            symbolName,
+          });
+        }
+      }
+    }
+    return chunks;
+  }
+  /**
+   * Get regex patterns for language
+   */
+  private getLanguagePatterns(language: string): Array<{
+    regex: string;
+    type: 'function' | 'class' | 'module';
+  }> {
+    switch (language) {
+      case 'typescript':
+      case 'javascript':
+        return [
+          // Classes (body extracted via brace-matching)
+          {
+            regex: '(?:export\\s+)?(?:abstract\\s+)?class\\s+\\w+[^{]*\\{',
+            type: 'class',
+          },
+          // Functions (body extracted via brace-matching)
+          {
+            regex: '(?:export\\s+)?(?:async\\s+)?function\\s+\\w+[^{]*\\{',
+            type: 'function',
+          },
+          // Arrow functions (body extracted via brace-matching)
+          {
+            regex: '(?:export\\s+)?const\\s+\\w+\\s*=\\s*(?:async\\s+)?\\([^)]*\\)\\s*=>\\s*\\{',
+            type: 'function',
+          },
+          // Class methods — matches indented methods with optional modifiers.
+          // Excludes control flow keywords (if, for, while, switch, catch, return).
+          {
+            regex:
+              '^\\s+(?:(?:private|protected|public|static|abstract|override|readonly|async|get|set)\\s+)*(?!if|for|while|switch|catch|return|throw|new|import|export)\\w+\\s*(?:<[^>]*>)?\\s*\\([^)]*\\)[^{]*\\{',
+            type: 'function',
+          },
+        ];
+      case 'python':
+        return [
+          // Classes
+          { regex: 'class\\s+\\w+[^:]*:[^]*?(?=\\nclass\\s|\\ndef\\s|$)', type: 'class' },
+          // Functions
+          { regex: 'def\\s+\\w+[^:]*:[^]*?(?=\\ndef\\s|\\nclass\\s|$)', type: 'function' },
+        ];
+      case 'rust':
+        return [
+          // Functions (body extracted via brace-matching)
+          { regex: '(?:pub\\s+)?fn\\s+\\w+[^{]*\\{', type: 'function' },
+          // Structs (single-line style; no nested braces in pattern)
+          { regex: '(?:pub\\s+)?struct\\s+\\w+[^}]*\\}', type: 'class' },
+        ];
+      default:
+        return [];
+    }
+  }
+  /**
+   * Fall back to raw chunking with overlap
+   */
+  private chunkRaw(content: string, filePath: string, language: string, size: number): CodeChunk[] {
+    const chunks: CodeChunk[] = [];
+    const lines = content.split('\n');
+    const chunkSizeLines = Math.floor(this.options.maxChunkSize / 80); // Assume ~80 chars per line
+    const overlapLines = Math.floor(chunkSizeLines * 0.1); // 10% overlap
+    for (let i = 0; i < lines.length; i += chunkSizeLines - overlapLines) {
+      const chunkLines = lines.slice(i, i + chunkSizeLines);
+      const chunkContent = chunkLines.join('\n');
+      if (chunkContent.length >= this.options.minChunkSize) {
+        chunks.push({
+          content: chunkContent.trim(),
+          filePath,
+          language,
+          startLine: i + 1,
+          endLine: i + chunkLines.length,
+          chunkType: 'raw',
+          contentHash: this.hash(chunkContent),
+          metadata: {
+            fileName: filePath.split('/').pop() || '',
+            extension: extname(filePath),
+            size,
+          },
+        });
+      }
+    }
+    return chunks;
+  }
+  /**
+   * Generate content hash for drift detection
+   */
+  private hash(content: string): string {
+    return createHash('sha256').update(content).digest('hex').substring(0, 16);
+  }
+}