npm - codebasesearch - Versions diffs - 0.1.4 - Mend

codebasesearch 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/.thornsignore +419 -0
package/README.md +129 -0
package/bin/code-search.js +27 -0
package/mcp.js +276 -0
package/package.json +35 -0
package/scripts/patch-transformers.js +42 -0
package/src/cli.js +113 -0
package/src/embeddings.js +151 -0
package/src/ignore-parser.js +129 -0
package/src/patch-sharp.js +38 -0
package/src/scanner.js +172 -0
package/src/search.js +45 -0
package/src/store.js +166 -0

package/src/ignore-parser.js ADDED Viewed

@@ -0,0 +1,129 @@
+import { readFileSync, existsSync } from 'fs';
+import { join, dirname } from 'path';
+import { fileURLToPath } from 'url';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+function loadDefaultIgnores() {
+  const ignorePath = join(__dirname, '..', '.thornsignore');
+  if (!existsSync(ignorePath)) {
+    return getHardcodedIgnores();
+  }
+  try {
+    const content = readFileSync(ignorePath, 'utf8');
+    return parseIgnoreFile(content);
+  } catch (e) {
+    return getHardcodedIgnores();
+  }
+}
+function getHardcodedIgnores() {
+  return new Set([
+    'node_modules', '.git', '.svn', '.hg', 'dist', 'build', 'out',
+    'target', 'vendor', '__pycache__', '.pytest_cache', '.mypy_cache',
+    '.next', '.nuxt', '.cache', '.parcel-cache', '.vite', '.turbo',
+    'coverage', '.nyc_output', '.firebase', '.terraform', '.aws',
+    '.azure', '.gcloud', '.vscode', '.idea', '.vs', 'bin', 'obj',
+    '.gradle', '.mvn', 'Pods', 'DerivedData', '.bundle',
+    '.yarn', '.pnp', 'pnpm-lock.yaml', '.pnpm-store',
+    '.tox', '.eggs', '*.egg-info', '.venv', 'venv', 'env',
+    '.tsc', '.eslintcache', '.stylelintcache', '.parcel-cache',
+    'temp', 'tmp', '.tmp', '.DS_Store', 'Thumbs.db',
+    '.swp', '.swo', '*.swp', '*.swo', '.tern-port',
+    'dist-server', 'out-tsc', '.cache', '.parcel-cache',
+    'typings', '.env', '.env.local', '.env.*.local'
+  ]);
+}
+function parseIgnoreFile(content) {
+  const patterns = new Set();
+  const lines = content.split('\n');
+  for (let line of lines) {
+    line = line.trim();
+    // Skip comments and empty lines
+    if (!line || line.startsWith('#')) continue;
+    // Remove trailing slash for directory patterns
+    if (line.endsWith('/')) {
+      line = line.slice(0, -1);
+    }
+    // Skip negation patterns (!) for now
+    if (line.startsWith('!')) continue;
+    // Handle wildcards
+    if (line.includes('*')) {
+      // Remove trailing wildcards
+      line = line.replace(/\/\*+$/, '');
+    }
+    if (line) {
+      patterns.add(line);
+    }
+  }
+  return patterns;
+}
+function loadProjectIgnores(rootPath) {
+  const patterns = new Set();
+  const ignoreFiles = [
+    '.gitignore',
+    '.dockerignore',
+    '.npmignore',
+    '.eslintignore',
+    '.prettierignore',
+    '.thornsignore',
+    '.codesearchignore'
+  ];
+  for (const file of ignoreFiles) {
+    const path = join(rootPath, file);
+    if (existsSync(path)) {
+      try {
+        const content = readFileSync(path, 'utf8');
+        const filePatterns = parseIgnoreFile(content);
+        for (const pattern of filePatterns) {
+          patterns.add(pattern);
+        }
+      } catch (e) {
+        // Ignore read errors
+      }
+    }
+  }
+  return patterns;
+}
+export function loadIgnorePatterns(rootPath) {
+  const defaultPatterns = loadDefaultIgnores();
+  const projectPatterns = loadProjectIgnores(rootPath);
+  // Merge both sets
+  const merged = new Set([...defaultPatterns, ...projectPatterns]);
+  return merged;
+}
+export function shouldIgnore(filePath, ignorePatterns) {
+  const normalizedPath = filePath.replace(/\\/g, '/');
+  const pathParts = normalizedPath.split('/');
+  for (const pattern of ignorePatterns) {
+    if (pattern.includes('/')) {
+      if (normalizedPath.includes(pattern)) {
+        return true;
+      }
+    } else {
+      for (const part of pathParts) {
+        if (part === pattern) {
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}

package/src/patch-sharp.js ADDED Viewed

@@ -0,0 +1,38 @@
+// Auto-patch transformers dist for sharp removal
+import fs from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const distPath = path.join(__dirname, '..', 'node_modules', '@huggingface', 'transformers', 'dist', 'transformers.node.mjs');
+if (fs.existsSync(distPath)) {
+  let content = fs.readFileSync(distPath, 'utf-8');
+  // Only patch if not already patched
+  if (!content.includes('SHARP_REMOVED_FOR_WINDOWS_COMPATIBILITY')) {
+    // Remove sharp import
+    content = content.replace(
+      /import \* as __WEBPACK_EXTERNAL_MODULE_sharp__ from "sharp";\n/,
+      '// SHARP_REMOVED_FOR_WINDOWS_COMPATIBILITY\n'
+    );
+    // Replace sharp module export with stub
+    content = content.replace(
+      /module\.exports = __WEBPACK_EXTERNAL_MODULE_sharp__;/g,
+      'module.exports = {};'
+    );
+    // Replace image processing error with fallback
+    content = content.replace(
+      /} else \{\s*throw new Error\('Unable to load image processing library\.'\);\s*\}/,
+      '} else {\n    loadImageFunction = async () => { throw new Error(\'Image processing unavailable\'); };\n}'
+    );
+    try {
+      fs.writeFileSync(distPath, content);
+    } catch (e) {
+      // Silently continue if unable to patch (read-only filesystem)
+    }
+  }
+}

package/src/scanner.js ADDED Viewed

@@ -0,0 +1,172 @@
+import { readdirSync, statSync, readFileSync } from 'fs';
+import { join, relative } from 'path';
+import { shouldIgnore } from './ignore-parser.js';
+const SUPPORTED_EXTENSIONS = new Set([
+  '.js', '.ts', '.tsx', '.jsx', '.mjs', '.cjs',
+  '.py', '.pyw', '.pyi',
+  '.go',
+  '.rs',
+  '.java', '.kt', '.scala',
+  '.cpp', '.cc', '.cxx', '.h', '.hpp', '.hxx',
+  '.c', '.h',
+  '.rb', '.erb',
+  '.php',
+  '.cs', '.csx',
+  '.swift',
+  '.m', '.mm',
+  '.sh', '.bash', '.zsh',
+  '.sql',
+  '.r', '.R',
+  '.lua',
+  '.pl', '.pm',
+  '.groovy',
+  '.gradle',
+  '.xml', '.xsd',
+  '.json', '.jsonc',
+  '.yaml', '.yml',
+  '.toml',
+  '.html', '.htm',
+  '.css', '.scss', '.sass', '.less',
+  '.vue', '.svelte',
+  '.md', '.markdown'
+]);
+function getFileExtension(filePath) {
+  const lastDot = filePath.lastIndexOf('.');
+  if (lastDot === -1) return '';
+  return filePath.substring(lastDot).toLowerCase();
+}
+function isBinaryFile(filePath) {
+  const binaryExtensions = new Set([
+    '.zip', '.tar', '.gz', '.rar', '.7z', '.iso',
+    '.exe', '.dll', '.so', '.dylib', '.bin',
+    '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.ico',
+    '.mp3', '.mp4', '.mov', '.avi', '.flv', '.m4a',
+    '.pdf', '.doc', '.docx', '.xls', '.xlsx',
+    '.woff', '.woff2', '.ttf', '.otf', '.eot'
+  ]);
+  const ext = getFileExtension(filePath);
+  return binaryExtensions.has(ext);
+}
+function walkDirectory(dirPath, ignorePatterns, relativePath = '') {
+  const files = [];
+  try {
+    const entries = readdirSync(dirPath, { withFileTypes: true });
+    for (const entry of entries) {
+      const fullPath = join(dirPath, entry.name);
+      const relPath = relativePath ? join(relativePath, entry.name) : entry.name;
+      // Normalize to forward slashes for consistent ignore pattern matching
+      const normalizedRelPath = relPath.replace(/\\/g, '/');
+      // Check if should ignore
+      if (shouldIgnore(normalizedRelPath, ignorePatterns)) {
+        continue;
+      }
+      if (entry.isDirectory()) {
+        files.push(...walkDirectory(fullPath, ignorePatterns, relPath));
+      } else if (entry.isFile()) {
+        const ext = getFileExtension(entry.name);
+        if (SUPPORTED_EXTENSIONS.has(ext) && !isBinaryFile(entry.name)) {
+          try {
+            const stat = entry.isSymbolicLink ? null : statSync(fullPath);
+            const maxSize = 5 * 1024 * 1024;
+            if (!stat || stat.size <= maxSize) {
+              files.push({
+                fullPath,
+                relativePath: normalizedRelPath,
+                mtime: stat ? stat.mtime.getTime() : Date.now()
+              });
+            }
+          } catch (e) {
+          }
+        }
+      }
+    }
+  } catch (e) {
+    // Ignore read errors for individual directories
+  }
+  return files;
+}
+function chunkContent(content, chunkSize = 1000, overlapSize = 200) {
+  const lines = content.split('\n');
+  const chunks = [];
+  for (let i = 0; i < lines.length; i += chunkSize - overlapSize) {
+    const endIdx = Math.min(i + chunkSize, lines.length);
+    const chunk = lines.slice(i, endIdx).join('\n');
+    if (chunk.trim().length > 0) {
+      chunks.push({
+        content: chunk,
+        line_start: i + 1,
+        line_end: endIdx
+      });
+    }
+    // Stop if we've reached the end
+    if (endIdx === lines.length) {
+      break;
+    }
+  }
+  return chunks;
+}
+export function scanRepository(rootPath, ignorePatterns) {
+  const files = walkDirectory(rootPath, ignorePatterns);
+  const chunks = [];
+  for (const file of files) {
+    try {
+      const content = readFileSync(file.fullPath, 'utf8');
+      const mtime = file.mtime;
+      // For small files, treat as single chunk
+      if (content.split('\n').length <= 1000) {
+        chunks.push({
+          file_path: file.relativePath,
+          chunk_index: 0,
+          content,
+          line_start: 1,
+          line_end: content.split('\n').length,
+          mtime
+        });
+      } else {
+        // For large files, chunk them
+        const fileChunks = chunkContent(content);
+        fileChunks.forEach((chunk, idx) => {
+          chunks.push({
+            file_path: file.relativePath,
+            chunk_index: idx,
+            content: chunk.content,
+            line_start: chunk.line_start,
+            line_end: chunk.line_end,
+            mtime
+          });
+        });
+      }
+    } catch (e) {
+      // Ignore read errors for individual files
+    }
+  }
+  return chunks;
+}
+export function getFileStats(chunks) {
+  const stats = {};
+  for (const chunk of chunks) {
+    if (!stats[chunk.file_path]) {
+      stats[chunk.file_path] = chunk.mtime;
+    }
+  }
+  return stats;
+}

package/src/search.js ADDED Viewed

@@ -0,0 +1,45 @@
+import { generateSingleEmbedding } from './embeddings.js';
+import { searchSimilar } from './store.js';
+export async function executeSearch(query, limit = 10) {
+  if (!query || query.trim().length === 0) {
+    throw new Error('Query cannot be empty');
+  }
+  console.error(`Searching for: "${query}"`);
+  // Generate embedding for query
+  const queryEmbedding = await generateSingleEmbedding(query);
+  // Search vector store
+  const results = await searchSimilar(queryEmbedding, limit);
+  return results;
+}
+export function formatResults(results) {
+  if (results.length === 0) {
+    return 'No results found.';
+  }
+  const lines = [];
+  lines.push(`\nFound ${results.length} result${results.length !== 1 ? 's' : ''}:\n`);
+  for (let i = 0; i < results.length; i++) {
+    const result = results[i];
+    const match = i + 1;
+    lines.push(`${match}. ${result.file_path}:${result.line_start}-${result.line_end} (score: ${(result.score * 100).toFixed(1)}%)`);
+    // Show code snippet (first 3 lines)
+    const codeLines = result.content.split('\n').slice(0, 3);
+    for (const line of codeLines) {
+      const trimmed = line.slice(0, 80); // Limit line length
+      lines.push(`   > ${trimmed}`);
+    }
+    lines.push('');
+  }
+  return lines.join('\n');
+}

package/src/store.js ADDED Viewed

@@ -0,0 +1,166 @@
+import { connect } from 'vectordb';
+import { join } from 'path';
+import { mkdirSync, existsSync } from 'fs';
+let dbConnection = null;
+let tableRef = null;
+export async function initStore(dbPath) {
+  // Ensure directory exists
+  const dbDir = join(dbPath, 'lancedb');
+  if (!existsSync(dbDir)) {
+    mkdirSync(dbDir, { recursive: true });
+  }
+  try {
+    // Connect to LanceDB (embedded, file-based, no network)
+    // Use absolute path for Windows compatibility
+    dbConnection = await connect({
+      uri: dbDir,
+      mode: 'overwrite'
+    });
+    console.error('Vector store initialized');
+    return true;
+  } catch (e) {
+    console.error('Failed to initialize vector store:', e.message);
+    throw e;
+  }
+}
+export async function getTable() {
+  if (!dbConnection) {
+    throw new Error('Store not initialized. Call initStore first.');
+  }
+  const tableName = 'code_chunks';
+  try {
+    // Try to open existing table
+    tableRef = await dbConnection.openTable(tableName);
+  } catch (e) {
+    // Table doesn't exist, will be created on first insert
+    tableRef = null;
+  }
+  return tableRef;
+}
+export async function upsertChunks(chunks) {
+  if (!dbConnection) {
+    throw new Error('Store not initialized');
+  }
+  if (chunks.length === 0) {
+    return;
+  }
+  const tableName = 'code_chunks';
+  const data = chunks.map(chunk => ({
+    file_path: String(chunk.file_path),
+    chunk_index: Number(chunk.chunk_index),
+    content: String(chunk.content),
+    line_start: Number(chunk.line_start),
+    line_end: Number(chunk.line_end),
+    vector: chunk.vector,
+    mtime: Number(chunk.mtime)
+  }));
+  try {
+    let table = null;
+    // Try to open existing table
+    try {
+      table = await dbConnection.openTable(tableName);
+      // Overwrite existing table with new data
+      await table.overwrite(data);
+    } catch (e) {
+      // Table doesn't exist, create new one
+      table = await dbConnection.createTable(tableName, data);
+    }
+    tableRef = table;
+    console.error(`Indexed ${chunks.length} chunks`);
+  } catch (e) {
+    console.error('Failed to upsert chunks:', e.message);
+    throw e;
+  }
+}
+export async function searchSimilar(queryEmbedding, limit = 10) {
+  if (!tableRef) {
+    if (!dbConnection) {
+      console.error('No database connection');
+      return [];
+    }
+    try {
+      await getTable();
+    } catch (e) {
+      console.error('No index available');
+      return [];
+    }
+  }
+  if (!tableRef) {
+    console.error('No index available');
+    return [];
+  }
+  try {
+    // Ensure vector is a proper array/tensor
+    const query = Array.isArray(queryEmbedding) ? queryEmbedding : Array.from(queryEmbedding);
+    const results = await tableRef
+      .search(query)
+      .limit(limit)
+      .execute();
+    return results.map(result => {
+      const distance = result._distance !== undefined ? result._distance : (result.distance || 0);
+      const score = distance !== null && distance !== undefined ? 1 / (1 + distance) : 0;
+      return {
+        file_path: result.file_path,
+        chunk_index: result.chunk_index,
+        content: result.content,
+        line_start: result.line_start,
+        line_end: result.line_end,
+        distance: distance,
+        score: score
+      };
+    });
+  } catch (e) {
+    console.error('Search failed:', e.message);
+    return [];
+  }
+}
+export async function getRowCount() {
+  if (!tableRef) {
+    return 0;
+  }
+  try {
+    return await tableRef.countRows();
+  } catch (e) {
+    return 0;
+  }
+}
+export async function getIndexedFiles() {
+  if (!tableRef) {
+    return {};
+  }
+  // For now, we'll do a full reindex each time
+  // This ensures the index is always up-to-date
+  // Future optimization: store a metadata file with mtimes
+  return {};
+}
+export async function closeStore() {
+  // LanceDB doesn't require explicit close in embedded mode
+  // But we clear references for cleanliness
+  if (dbConnection) {
+    dbConnection = null;
+    tableRef = null;
+  }
+}