codebasesearch 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codebasesearch",
3
- "version": "0.1.12",
3
+ "version": "0.1.14",
4
4
  "description": "Ultra-simple code search tool with Jina embeddings, LanceDB, and MCP protocol support",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,9 +1,87 @@
1
1
  import { readFileSync, existsSync } from 'fs';
2
- import { join, dirname } from 'path';
2
+ import { join, dirname, extname } from 'path';
3
3
  import { fileURLToPath } from 'url';
4
4
 
5
5
  const __dirname = dirname(fileURLToPath(import.meta.url));
6
6
 
7
+ // Whitelist of code file extensions to include
8
+ const CODE_EXTENSIONS = new Set([
9
+ // JavaScript/TypeScript
10
+ '.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs', '.mts', '.cts',
11
+ // Python
12
+ '.py', '.pyw', '.pyi',
13
+ // Java
14
+ '.java',
15
+ // C/C++
16
+ '.c', '.cpp', '.cc', '.cxx', '.h', '.hpp', '.hh', '.hxx',
17
+ // C#
18
+ '.cs',
19
+ // Go
20
+ '.go',
21
+ // Rust
22
+ '.rs',
23
+ // Ruby
24
+ '.rb',
25
+ // PHP
26
+ '.php', '.phtml',
27
+ // Swift
28
+ '.swift',
29
+ // Kotlin
30
+ '.kt', '.kts',
31
+ // Scala
32
+ '.scala', '.sc',
33
+ // Perl
34
+ '.pl', '.pm',
35
+ // Shell/Bash
36
+ '.sh', '.bash', '.zsh', '.fish',
37
+ // PowerShell
38
+ '.ps1', '.psm1', '.psd1',
39
+ // Lua
40
+ '.lua',
41
+ // R
42
+ '.r', '.R',
43
+ // MATLAB/Octave
44
+ '.m', '.mat',
45
+ // Julia
46
+ '.jl',
47
+ // Dart
48
+ '.dart',
49
+ // Elixir
50
+ '.ex', '.exs',
51
+ // Erlang
52
+ '.erl', '.hrl',
53
+ // Haskell
54
+ '.hs', '.lhs',
55
+ // Clojure
56
+ '.clj', '.cljs', '.cljc',
57
+ // Lisp
58
+ '.lisp', '.lsp', '.scm', '.ss', '.rkt',
59
+ // Fortran
60
+ '.f', '.for', '.f90', '.f95', '.f03',
61
+ // Assembly
62
+ '.asm', '.s', '.S',
63
+ // Groovy
64
+ '.groovy', '.gvy',
65
+ // Visual Basic
66
+ '.vb', '.vbs',
67
+ // F#
68
+ '.fs', '.fsx',
69
+ // OCaml
70
+ '.ml', '.mli',
71
+ // Objective-C
72
+ '.m', '.mm',
73
+ // Arduino
74
+ '.ino',
75
+ // Vue SFC
76
+ '.vue',
77
+ // Svelte
78
+ '.svelte',
79
+ // CoffeeScript
80
+ '.coffee',
81
+ // Reason
82
+ '.re', '.rei'
83
+ ]);
84
+
7
85
  function loadDefaultIgnores() {
8
86
  const ignorePath = join(__dirname, '..', '.thornsignore');
9
87
  if (!existsSync(ignorePath)) {
@@ -197,11 +275,124 @@ export function loadIgnorePatterns(rootPath) {
197
275
  return merged;
198
276
  }
199
277
 
278
+ // Directories to always ignore
279
+ const IGNORED_DIRECTORIES = new Set([
280
+ // Dependencies - NEVER include
281
+ 'node_modules', 'bower_components', 'jspm_packages', 'web_modules',
282
+ // Version control
283
+ '.git', '.svn', '.hg', '.bzr', '.vscode', '.idea', '.vs', '.atom', '.sublime-project',
284
+ // Build outputs - comprehensive list
285
+ 'dist', 'dist-server', 'dist-ssr', 'dist-client', 'dist-server',
286
+ 'build', 'built', 'Build', 'BUILD',
287
+ 'out', 'output', 'Output', 'OUT', 'release', 'Release', 'RELEASE',
288
+ 'target', 'Target', 'TARGET',
289
+ 'bin', 'Bin', 'BIN', 'obj', 'Obj', 'OBJ',
290
+ 'public', 'static', 'assets', 'www', 'wwwroot',
291
+ 'site', '_site', '.site', '.docusaurus', '.gatsby', '.vuepress',
292
+ 'storybook-static', '.nuxt', 'nuxt', '.next', 'next',
293
+ 'out-tsc', 'tsc', '.tsc',
294
+ // Cache directories
295
+ '.cache', 'cache', '.parcel-cache', '.vite', 'vite', '.turbo', 'turbo',
296
+ '.npm', '.yarn', '.pnp', '.pnpm-store', '.rush', '.lerna', '.nx',
297
+ // Testing
298
+ 'coverage', '.nyc_output', '.coverage', 'htmlcov', 'test-results',
299
+ 'test', 'tests', 'Test', 'Tests', 'TEST', 'TESTS',
300
+ '__tests__', '__mocks__', '__snapshots__', '__fixtures__',
301
+ 'cypress', 'playwright', 'e2e', 'integration', 'spec', 'specs',
302
+ '.tox', '.eggs', '.hypothesis', '.pyre', '.pytype',
303
+ // Python
304
+ '__pycache__', '.pytest_cache', '.mypy_cache', '.venv', 'venv', 'env',
305
+ 'env.bak', 'venv.bak', '.Python', 'pip-wheel-metadata', '*.egg-info',
306
+ // Java/Gradle/Maven
307
+ '.gradle', '.mvn', 'gradle', 'mvn', '.settings', '.project', '.classpath',
308
+ // iOS/Android
309
+ 'Pods', 'DerivedData', 'build', '.bundle', 'xcuserdata', '.xcodeproj', '.xcworkspace',
310
+ // Ruby
311
+ 'vendor', '.bundle', '.ruby-version', 'pkg',
312
+ // Rust
313
+ 'target', 'Cargo.lock',
314
+ // Go
315
+ 'vendor', 'Godeps',
316
+ // PHP
317
+ 'vendor', 'composer',
318
+ // Infrastructure
319
+ '.terraform', '.terragrunt-cache', '.pulumi', '.serverless', '.firebase',
320
+ '.aws', '.azure', '.gcloud', '.vercel', '.netlify', '.now',
321
+ // Docker
322
+ '.docker', 'docker', '.dockerignore',
323
+ // Temp files
324
+ 'temp', 'tmp', '.tmp', '.temp', 'tmpfs', 'scratch', '.scratch',
325
+ // Documentation
326
+ 'docs', 'doc', 'documentation', 'wiki', 'guides', 'examples', 'demo', 'demos',
327
+ 'CHANGELOG', 'HISTORY', 'NEWS', 'LICENSE', 'LICENCE', 'COPYING', 'AUTHORS',
328
+ // IDE/Editor
329
+ '.vs', '.vscode', '.idea', '.eclipse', '.settings', '.classpath', '.project',
330
+ // Logs
331
+ 'logs', 'log', '*.log',
332
+ // Data/Storage
333
+ 'storage', 'data', 'database', 'db', 'fixtures', 'seeds',
334
+ 'uploads', 'files', 'media', 'resources', 'assets', 'images', 'img',
335
+ // LLM/AI
336
+ '.llamaindex', '.chroma', '.vectorstore', '.embeddings',
337
+ '.langchain', '.autogen', '.semantic-kernel', '.openai-cache',
338
+ '.anthropic-cache', 'embeddings', 'vector-db', 'faiss-index',
339
+ 'chromadb', 'pinecone-cache', 'weaviate-data',
340
+ // Package managers
341
+ '.yarn', '.pnpm', '.npm', '.bun',
342
+ // Compiled outputs
343
+ 'typings', 'types', '@types', 'type-definitions',
344
+ // Misc
345
+ 'public', 'static', 'site', '_site',
346
+ 'cmake_build_debug', 'cmake_build_release', 'CMakeFiles', 'CMakeCache.txt',
347
+ 'out-tsc', 'dist-server', 'server', 'client', 'browser', 'esm', 'cjs', 'umd', 'lib', 'es'
348
+ ]);
349
+
350
+ export function isCodeFile(filePath) {
351
+ const normalizedPath = filePath.replace(/\\/g, '/');
352
+ const pathParts = normalizedPath.split('/');
353
+ const fileName = pathParts[pathParts.length - 1];
354
+
355
+ // Get file extension
356
+ const lastDotIndex = fileName.lastIndexOf('.');
357
+ if (lastDotIndex === -1 || lastDotIndex === 0) {
358
+ return false; // No extension or hidden file without extension
359
+ }
360
+
361
+ const ext = fileName.slice(lastDotIndex).toLowerCase();
362
+ return CODE_EXTENSIONS.has(ext);
363
+ }
364
+
365
+ export function shouldIgnoreDirectory(dirPath) {
366
+ const normalizedPath = dirPath.replace(/\\/g, '/');
367
+ const pathParts = normalizedPath.split('/');
368
+
369
+ for (const part of pathParts) {
370
+ if (IGNORED_DIRECTORIES.has(part)) {
371
+ return true;
372
+ }
373
+ }
374
+
375
+ return false;
376
+ }
377
+
200
378
  export function shouldIgnore(filePath, ignorePatterns) {
201
379
  const normalizedPath = filePath.replace(/\\/g, '/');
202
380
  const pathParts = normalizedPath.split('/');
203
381
  const fileName = pathParts[pathParts.length - 1];
382
+
383
+ // Check if any directory in path should be ignored
384
+ for (const part of pathParts.slice(0, -1)) {
385
+ if (IGNORED_DIRECTORIES.has(part)) {
386
+ return true;
387
+ }
388
+ }
389
+
390
+ // Check if it's a code file using whitelist
391
+ if (!isCodeFile(filePath)) {
392
+ return true;
393
+ }
204
394
 
395
+ // Check against additional ignore patterns
205
396
  for (const pattern of ignorePatterns) {
206
397
  // Handle path patterns (contain /)
207
398
  if (pattern.includes('/')) {
@@ -209,13 +400,6 @@ export function shouldIgnore(filePath, ignorePatterns) {
209
400
  return true;
210
401
  }
211
402
  }
212
- // Handle extension patterns (*.ext)
213
- else if (pattern.startsWith('*.')) {
214
- const ext = pattern.slice(1);
215
- if (fileName.endsWith(ext)) {
216
- return true;
217
- }
218
- }
219
403
  // Handle exact file name patterns
220
404
  else if (fileName === pattern) {
221
405
  return true;
package/src/scanner.js CHANGED
@@ -28,7 +28,8 @@ const SUPPORTED_EXTENSIONS = new Set([
28
28
  '.html', '.htm',
29
29
  '.css', '.scss', '.sass', '.less',
30
30
  '.vue', '.svelte',
31
- '.md', '.markdown'
31
+ '.md', '.markdown',
32
+ '.txt'
32
33
  ]);
33
34
 
34
35
  function getFileExtension(filePath) {