npm - codebasesearch - Versions diffs - 0.1.20 → 0.1.22 - Mend

codebasesearch 0.1.20 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "codebasesearch",
-  "version": "0.1.20",
+  "version": "0.1.22",
   "description": "Ultra-simple code search tool with Jina embeddings, LanceDB, and MCP protocol support",
   "type": "module",
   "bin": {

package/src/ignore-parser.js CHANGED Viewed

@@ -106,109 +106,17 @@ function loadDefaultIgnores() {
 function getHardcodedIgnores() {
   return new Set([
-    'node_modules', '.git', '.svn', '.hg', 'dist', 'build', 'out',
-    'target', 'vendor', '__pycache__', '.pytest_cache', '.mypy_cache',
-    '.next', '.nuxt', '.cache', '.parcel-cache', '.vite', '.turbo',
-    'coverage', '.nyc_output', '.firebase', '.terraform', '.aws',
-    '.azure', '.gcloud', '.vscode', '.idea', '.vs', 'bin', 'obj',
-    '.gradle', '.mvn', 'Pods', 'DerivedData', '.bundle',
-    '.yarn', '.pnp', 'pnpm-lock.yaml', '.pnpm-store',
-    '.tox', '.eggs', '*.egg-info', '.venv', 'venv', 'env',
-    '.tsc', '.eslintcache', '.stylelintcache', '.parcel-cache',
-    'temp', 'tmp', '.tmp', '.DS_Store', 'Thumbs.db',
-    '.swp', '.swo', '*.swp', '*.swo', '.tern-port',
-    'dist-server', 'out-tsc', '.cache', '.parcel-cache',
-    'typings', '.env', '.env.local', '.env.*.local',
-    // JSON files - PRIMARY PRIORITY for memory reduction
-    '*.json', 'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml',
-    'Gemfile.lock', 'poetry.lock', 'Pipfile.lock',
-    // Lock files
-    '*.lock',
-    // Build outputs
-    'public', 'static', 'site', '_site', '.docusaurus', '.gatsby',
-    // Cache/dependency directories
-    '.rush', '.lerna', '.nx',
-    // IDE/editor configs
-    '.cursor', '.replit', '.sublime-project', '.sublime-workspace',
-    '*.iml', '.project', '.classpath', '.settings', '*.sublime-*',
+    // Lock files / package manager artifacts
+    'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml',
+    'Gemfile.lock', 'poetry.lock', 'Pipfile.lock', 'Cargo.lock',
+    'composer.lock', 'go.sum',
     // OS files
-    '.Spotlight-V100', '.Trashes', 'ehthumbs.db', '.fseventsd',
-    '.TemporaryItems', '.AppleDouble', '.LSOverride', 'desktop.ini',
-    // Large data files
-    '*.db', '*.sqlite', '*.sqlite3', '*.bak', '*.dump',
-    '*.backup', '*.data', '*.orig',
-    // Logs and temp
-    '*.log', 'logs', 'npm-debug.log', 'yarn-error.log',
-    // Test coverage and reports
-    'lcov.info', '.coverage', 'test-results',
-    // Database related
-    'storage', 'fixtures',
-    // LLM/Vector related
-    '.llamaindex', '.chroma', '.vectorstore', '.embeddings',
-    '.langchain', '.autogen', '.semantic-kernel', '.openai-cache',
-    '.anthropic-cache', 'embeddings', 'vector-db', 'faiss-index',
-    'chromadb', 'pinecone-cache', 'weaviate-data',
-    // Compiled output
+    '.DS_Store', 'Thumbs.db', 'desktop.ini',
+    // Editor swap files
+    '.tern-port',
+    // Compiled binary artifacts (files, not dirs)
     '*.min.js', '*.min.css', '*.bundle.js', '*.chunk.js', '*.map',
-    // Generated/build artifacts
-    '.assets', 'out-tsc', 'cmake_build_debug', 'cmake_build_release',
-    // Version managers
-    '.rbenv', '.nvm', '.nvmrc',
-    // Ruby specific
-    '*.gem', '*.rbc', '/pkg', '/spec/reports', '/spec/examples.txt',
-    '/test/tmp', '/test/version_tmp', 'lib/bundler/man', '.ruby-version',
-    // Go specific
-    'go.work',
-    // Rust specific
-    'Cargo.lock', '**/*.rs.bk', '*.pdb',
-    // Java specific
-    '*.class', '*.jar', '*.war', '*.ear', '*.nar', '*.nupkg', '*.snupkg',
-    // C# specific
-    '*.suo', '*.user', '*.userosscache', '*.sln.docstates',
-    'project.lock.json', 'project.fragment.lock.json', 'artifacts',
-    // C/C++ specific
-    '*.o', '*.a', '*.so', '*.exe', '*.obj', '*.dll', '*.dylib',
-    'CMakeFiles', 'CMakeCache.txt', '*.cmake',
-    // Swift/Xcode specific
-    '*.xcodeproj', '*.xcworkspace', '*.moved-aside', '*.pbxuser',
-    '*.mode1v3', '*.mode2v3', '*.perspectivev3',
-    // Scala/SBT specific
-    'lib_managed', 'src_managed', 'project/boot', 'project/plugins/project',
-    '.history', '.lib',
-    // PHP specific
-    'composer.lock', '*.phar',
-    // Docker
-    '.dockerignore', 'docker-compose.override.yml', '.docker',
-    // Documentation build
-    'docs/_build', '.vuepress',
-    // Testing frameworks
-    'jest.config', 'vitest.config', 'pytest.ini', 'tox.ini',
-    '__tests__', '__mocks__', 'spec', 'cypress', 'playwright',
-    // Monorepo workspace patterns (implicit through directory coverage)
-    '.turbo', '.nx',
-    // Python package patterns
-    '*.py[cod]', '*$py.class', '.Python', 'pip-log.txt',
-    'pip-delete-this-directory.txt', '.hypothesis', '.pyre', '.pytype',
-    '*.whl',
-    // Config/metadata that are typically low-value
-    '*.config.js', '*.config.ts', 'webpack.config.js', 'rollup.config.js',
-    'vite.config.js', 'tsconfig.json', 'jsconfig.json', 'babel.config',
-    '.babelrc', '.eslintrc', '.prettierrc', '.stylelintrc', '.editorconfig',
-    '*.local', '*.development', '*.production',
-    // Node specific
-    '.npm', '.node_repl_history', '*.tsbuildinfo', 'yarn-error.log',
-    // Documentation/reference files that don't help with search
-    '*.md', '*.txt', '*.rst', '*.adoc', 'docs', 'documentation', 'wiki',
-    'CHANGELOG', 'HISTORY', 'NEWS', 'UPGRADING', 'FAQ', 'CONTRIBUTING',
-    'SECURITY', 'LICENSE', 'LICENCE', 'COPYRIGHT', 'NOTICE', 'AUTHORS',
-    'THIRDPARTY',
-    // Test and coverage files
-    '*.test', '*.spec', 'test', 'tests', 'htmlcov',
-    // Profiling
-    '*.prof', '*.cpuprofile', '*.heapprofile',
-    // Misc
-    '.tern-port', 'firebase-debug.log', 'firestore-debug.log',
-    'ui-debug.log', '.firebaserc', '.stackdump'
+    '*.tsbuildinfo',
   ]);
 }
@@ -283,76 +191,54 @@ export function loadIgnorePatterns(rootPath) {
   return merged;
 }
-// Directories to always ignore
+// Directories to always ignore - only clear non-source directories
 const IGNORED_DIRECTORIES = new Set([
-  // Dependencies - NEVER include
+  // Dependencies
   'node_modules', 'bower_components', 'jspm_packages', 'web_modules',
   // Version control
-  '.git', '.svn', '.hg', '.bzr', '.vscode', '.idea', '.vs', '.atom', '.sublime-project',
-  // Build outputs - comprehensive list
-  'dist', 'dist-server', 'dist-ssr', 'dist-client', 'dist-server',
-  'build', 'built', 'Build', 'BUILD',
-  'out', 'output', 'Output', 'OUT', 'release', 'Release', 'RELEASE',
-  'target', 'Target', 'TARGET',
-  'bin', 'Bin', 'BIN', 'obj', 'Obj', 'OBJ',
-  'public', 'static', 'assets', 'www', 'wwwroot',
-  'site', '_site', '.site', '.docusaurus', '.gatsby', '.vuepress',
-  'storybook-static', '.nuxt', 'nuxt', '.next', 'next',
-  'out-tsc', 'tsc', '.tsc',
+  '.git', '.svn', '.hg', '.bzr',
+  // IDE
+  '.vscode', '.idea', '.vs', '.atom',
+  // Build outputs (unambiguous names only)
+  'dist', 'dist-server', 'dist-ssr', 'dist-client',
+  'build', 'built',
+  'out', 'out-tsc',
+  'target',
+  'storybook-static', '.docusaurus', '.gatsby', '.vuepress',
+  '.nuxt', '.next',
+  '.tsc',
   // Cache directories
-  '.cache', 'cache', '.parcel-cache', '.vite', 'vite', '.turbo', 'turbo',
+  '.cache', '.parcel-cache', '.vite', '.turbo',
   '.npm', '.yarn', '.pnp', '.pnpm-store', '.rush', '.lerna', '.nx',
   // Testing
   'coverage', '.nyc_output', '.coverage', 'htmlcov', 'test-results',
-  'test', 'tests', 'Test', 'Tests', 'TEST', 'TESTS',
   '__tests__', '__mocks__', '__snapshots__', '__fixtures__',
-  'cypress', 'playwright', 'e2e', 'integration', 'spec', 'specs',
+  'cypress', 'playwright',
   '.tox', '.eggs', '.hypothesis', '.pyre', '.pytype',
   // Python
-  '__pycache__', '.pytest_cache', '.mypy_cache', '.venv', 'venv', 'env',
-  'env.bak', 'venv.bak', '.Python', 'pip-wheel-metadata', '*.egg-info',
+  '__pycache__', '.pytest_cache', '.mypy_cache', '.venv', 'venv',
   // Java/Gradle/Maven
-  '.gradle', '.mvn', 'gradle', 'mvn', '.settings', '.project', '.classpath',
+  '.gradle', '.mvn',
   // iOS/Android
-  'Pods', 'DerivedData', 'build', '.bundle', 'xcuserdata', '.xcodeproj', '.xcworkspace',
+  'Pods', 'DerivedData', '.bundle', 'xcuserdata',
   // Ruby
-  'vendor', '.bundle', '.ruby-version', 'pkg',
-  // Rust
-  'target', 'Cargo.lock',
-  // Go
-  'vendor', 'Godeps',
-  // PHP
-  'vendor', 'composer',
+  '.bundle', 'pkg',
   // Infrastructure
   '.terraform', '.terragrunt-cache', '.pulumi', '.serverless', '.firebase',
-  '.aws', '.azure', '.gcloud', '.vercel', '.netlify', '.now',
-  // Docker
-  '.docker', 'docker', '.dockerignore',
+  '.aws', '.azure', '.gcloud', '.vercel', '.netlify',
   // Temp files
-  'temp', 'tmp', '.tmp', '.temp', 'tmpfs', 'scratch', '.scratch',
-  // Documentation
-  'docs', 'doc', 'documentation', 'wiki', 'guides', 'examples', 'demo', 'demos',
-  'CHANGELOG', 'HISTORY', 'NEWS', 'LICENSE', 'LICENCE', 'COPYING', 'AUTHORS',
-  // IDE/Editor
-  '.vs', '.vscode', '.idea', '.eclipse', '.settings', '.classpath', '.project',
-  // Logs
-  'logs', 'log', '*.log',
-  // Data/Storage
-  'storage', 'data', 'database', 'db', 'fixtures', 'seeds',
-  'uploads', 'files', 'media', 'resources', 'assets', 'images', 'img',
-  // LLM/AI
+  'temp', 'tmp', '.tmp', '.temp',
+  // LLM/AI artifacts
   '.llamaindex', '.chroma', '.vectorstore', '.embeddings',
   '.langchain', '.autogen', '.semantic-kernel', '.openai-cache',
   '.anthropic-cache', 'embeddings', 'vector-db', 'faiss-index',
   'chromadb', 'pinecone-cache', 'weaviate-data',
-  // Package managers
-  '.yarn', '.pnpm', '.npm', '.bun',
-  // Compiled outputs
-  'typings', 'types', '@types', 'type-definitions',
-  // Misc
-  'public', 'static', 'site', '_site',
-  'cmake_build_debug', 'cmake_build_release', 'CMakeFiles', 'CMakeCache.txt',
-  'out-tsc', 'dist-server', 'server', 'client', 'browser', 'esm', 'cjs', 'umd', 'lib', 'es'
+  // Package manager caches
+  '.pnpm', '.bun',
+  // Static/built asset directories
+  'assets', 'static', 'public', 'wwwroot', 'www',
+  // Misc generated
+  'cmake_build_debug', 'cmake_build_release', 'CMakeFiles',
 ]);
 export function isCodeFile(filePath) {
@@ -373,28 +259,34 @@ export function isCodeFile(filePath) {
 export function shouldIgnoreDirectory(dirPath) {
   const normalizedPath = dirPath.replace(/\\/g, '/');
   const pathParts = normalizedPath.split('/');
   for (const part of pathParts) {
     if (IGNORED_DIRECTORIES.has(part)) {
       return true;
     }
   }
   return false;
 }
-export function shouldIgnore(filePath, ignorePatterns) {
+export function shouldIgnore(filePath, ignorePatterns, isDirectory = false) {
   const normalizedPath = filePath.replace(/\\/g, '/');
   const pathParts = normalizedPath.split('/');
   const fileName = pathParts[pathParts.length - 1];
-  // Check if any directory in path should be ignored
+  if (isDirectory) {
+    if (IGNORED_DIRECTORIES.has(fileName)) return true;
+    for (const pattern of ignorePatterns) {
+      if (!pattern.includes('/') && fileName === pattern) return true;
+    }
+    return false;
+  }
+  // For files: check all ancestor directories
   for (const part of pathParts.slice(0, -1)) {
     if (IGNORED_DIRECTORIES.has(part)) {
       return true;
     }
   }
   // Check if it's a code file using whitelist
   if (!isCodeFile(filePath)) {
     return true;
@@ -402,22 +294,13 @@ export function shouldIgnore(filePath, ignorePatterns) {
   // Check against additional ignore patterns
   for (const pattern of ignorePatterns) {
-    // Handle path patterns (contain /)
     if (pattern.includes('/')) {
-      if (normalizedPath.includes(pattern)) {
-        return true;
-      }
-    }
-    // Handle exact file name patterns
-    else if (fileName === pattern) {
+      if (normalizedPath.includes(pattern)) return true;
+    } else if (fileName === pattern) {
       return true;
-    }
-    // Handle directory name patterns (match any path part)
-    else {
+    } else {
       for (const part of pathParts) {
-        if (part === pattern || part.startsWith(pattern + '/')) {
-          return true;
-        }
+        if (part === pattern) return true;
       }
     }
   }

package/src/scanner.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { readdirSync, statSync, readFileSync } from 'fs';
 import { join, relative } from 'path';
-import { shouldIgnore, isCodeFile } from './ignore-parser.js';
+import { shouldIgnore, shouldIgnoreDirectory, isCodeFile } from './ignore-parser.js';
 function getFileExtension(filePath) {
   const lastDot = filePath.lastIndexOf('.');
@@ -33,14 +33,15 @@ function walkDirectory(dirPath, ignorePatterns, relativePath = '') {
       // Normalize to forward slashes for consistent ignore pattern matching
       const normalizedRelPath = relPath.replace(/\\/g, '/');
-      // Check if should ignore
-      if (shouldIgnore(normalizedRelPath, ignorePatterns)) {
-        continue;
-      }
       if (entry.isDirectory()) {
+        if (shouldIgnoreDirectory(normalizedRelPath) || shouldIgnore(normalizedRelPath, ignorePatterns, true)) {
+          continue;
+        }
         files.push(...walkDirectory(fullPath, ignorePatterns, relPath));
       } else if (entry.isFile()) {
+        if (shouldIgnore(normalizedRelPath, ignorePatterns, false)) {
+          continue;
+        }
         if (isCodeFile(normalizedRelPath) && !isBinaryFile(entry.name)) {
           try {
             const stat = entry.isSymbolicLink ? null : statSync(fullPath);

package/src/text-search.js CHANGED Viewed

@@ -4,19 +4,21 @@ export function buildTextIndex(chunks) {
   chunks.forEach((chunk, idx) => {
     const tokens = tokenize(chunk.content);
+    const fileNameTokens = tokenize(chunk.file_path);
     const symbols = extractSymbols(chunk.content);
     const frequency = new Map();
     tokens.forEach(token => {
       frequency.set(token, (frequency.get(token) || 0) + 1);
       if (!index.has(token)) {
-        index.set(token, []);
+        index.set(token, new Set());
       }
-      index.get(token).push(idx);
+      index.get(token).add(idx);
     });
     chunkMetadata[idx] = {
       tokens,
+      fileNameTokens,
       symbols,
       frequency,
       isCode: isCodeFile(chunk.file_path),
@@ -32,38 +34,56 @@ export function searchText(query, chunks, indexData) {
   const querySymbols = extractSymbols(query);
   const chunkScores = new Map();
-  chunks.forEach((chunk, idx) => {
+  // Use index to find candidate chunks efficiently
+  const candidates = new Set();
+  queryTokens.forEach(token => {
+    if (index.has(token)) {
+      for (const idx of index.get(token)) candidates.add(idx);
+    }
+  });
+  querySymbols.forEach(sym => {
+    if (index.has(sym)) {
+      for (const idx of index.get(sym)) candidates.add(idx);
+    }
+  });
+  for (const idx of candidates) {
+    const chunk = chunks[idx];
+    const meta = chunkMetadata[idx];
     let score = 0;
     queryTokens.forEach(token => {
-      if (index.has(token)) {
-        if (index.get(token).includes(idx)) {
-          const freq = chunkMetadata[idx].frequency.get(token) || 1;
-          const boost = token.length > 4 ? 1.5 : 1;
-          score += boost * freq;
-        }
+      if (index.has(token) && index.get(token).has(idx)) {
+        const freq = meta.frequency.get(token) || 1;
+        const lengthBoost = token.length > 4 ? 1.5 : 1;
+        score += lengthBoost * Math.min(freq, 5);
       }
     });
+    // Filename token match - strong signal that this file is about the query topic
+    let fileNameMatches = 0;
+    queryTokens.forEach(token => {
+      if (meta.fileNameTokens.includes(token)) fileNameMatches++;
+    });
+    if (fileNameMatches > 0) {
+      score += fileNameMatches * 8;
+    }
+    // Symbol match in content - function/class named after query terms
     querySymbols.forEach(symbol => {
-      if (chunkMetadata[idx].symbols.includes(symbol)) {
-        score += 5;
-      }
+      if (meta.symbols.includes(symbol)) score += 5;
     });
-    const exactMatch = chunk.content.includes(query);
-    if (exactMatch) {
-      score += 10;
+    // Exact phrase match
+    if (chunk.content.toLowerCase().includes(query.toLowerCase())) {
+      score += 15;
     }
-    if (chunkMetadata[idx].isCode) {
-      score *= 1.2;
-    }
+    // Code file boost
+    if (meta.isCode) score *= 1.2;
-    if (score > 0) {
-      chunkScores.set(idx, score);
-    }
-  });
+    if (score > 0) chunkScores.set(idx, score);
+  }
   const results = Array.from(chunkScores.entries())
     .map(([idx, score]) => ({
@@ -71,7 +91,6 @@ export function searchText(query, chunks, indexData) {
       score: Math.min(score / 100, 1),
       _rawScore: score,
     }))
-    .filter(r => r.score > 0)
     .sort((a, b) => b._rawScore - a._rawScore);
   return results;
@@ -80,20 +99,24 @@ export function searchText(query, chunks, indexData) {
 function tokenize(text) {
   const tokens = new Set();
-  text.toLowerCase().split(/\s+/).forEach(word => {
+  text.split(/\s+/).forEach(word => {
     if (word.length === 0) return;
-    tokens.add(word.replace(/[^\w]/g, ''));
-    const camelCaseTokens = word.match(/[a-z]+|[A-Z][a-z]*|[0-9]+/g) || [];
+    // camelCase/PascalCase split BEFORE lowercasing so uppercase boundaries are visible
+    const camelCaseTokens = word.match(/[A-Z]?[a-z]+|[A-Z]+(?=[A-Z][a-z]|\d|\W|$)|[0-9]+/g) || [];
     camelCaseTokens.forEach(t => {
       if (t.length > 1) tokens.add(t.toLowerCase());
     });
-    const snakeCaseTokens = word.split(/[-_]/).filter(t => t.length > 0);
-    snakeCaseTokens.forEach(t => {
-      if (t.length > 1) tokens.add(t.toLowerCase());
+    // snake_case and kebab-case split
+    word.split(/[-_.]/).forEach(t => {
+      const cleaned = t.replace(/[^\w]/g, '').toLowerCase();
+      if (cleaned.length > 1) tokens.add(cleaned);
     });
+    // Full word lowercased (stripped of punctuation)
+    const cleaned = word.replace(/[^\w]/g, '').toLowerCase();
+    if (cleaned.length > 1) tokens.add(cleaned);
   });
   return Array.from(tokens).filter(t => t.length > 1);

package/.prd DELETED Viewed

@@ -1,58 +0,0 @@
-{
-  "project": "thorns-mcp",
-  "created": "2026-03-06",
-  "objective": "Improve search output to be maximally revealing so agents need minimal follow-up exploration",
-  "items": [
-    {
-      "id": "1",
-      "subject": "Expand snippet from 3 lines to full chunk content in search-worker.js",
-      "status": "pending",
-      "description": "In search-worker.js line 63, snippet is truncated to 3 lines. Show full chunk content (up to ~30 lines) so agents see the complete context. Also add totalLines to each result by counting newlines in full file content.",
-      "blocking": ["3"],
-      "blockedBy": [],
-      "effort": "small",
-      "category": "feature"
-    },
-    {
-      "id": "2",
-      "subject": "Add enclosing function/class context detection to search-worker.js",
-      "status": "pending",
-      "description": "For each result, detect the nearest enclosing function or class name above the match line. Pass this as 'context' field in the result object. Use regex scan backwards through lines above line_start.",
-      "blocking": ["3"],
-      "blockedBy": [],
-      "effort": "medium",
-      "category": "feature"
-    },
-    {
-      "id": "3",
-      "subject": "Update mcp.js output formatter to display all new fields",
-      "status": "pending",
-      "description": "Update the result formatting in mcp.js to show: totalLines alongside path (e.g. 'file.js [142 lines]:5-20'), enclosing context (e.g. 'in: functionName'), and the full snippet. Also show relative path when repository_path is provided.",
-      "blocking": ["4"],
-      "blockedBy": ["1", "2"],
-      "effort": "small",
-      "category": "feature"
-    },
-    {
-      "id": "4",
-      "subject": "Verify output quality by running CLI on /mnt/c/dev/spawnpoint",
-      "status": "pending",
-      "description": "Run: node src/cli.js /mnt/c/dev/spawnpoint with a test query and inspect the output quality. Confirm snippets are longer, context is shown, line counts are shown.",
-      "blocking": ["5"],
-      "blockedBy": ["3"],
-      "effort": "small",
-      "category": "feature"
-    },
-    {
-      "id": "5",
-      "subject": "Bump version to 0.1.19 and git push",
-      "status": "pending",
-      "description": "Update package.json version from 0.1.18 to 0.1.19, git add -A, commit, push.",
-      "blocking": [],
-      "blockedBy": ["4"],
-      "effort": "small",
-      "category": "infra"
-    }
-  ],
-  "completed": []
-}