codebasesearch 0.1.12 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/ignore-parser.js +192 -8
package/package.json
CHANGED
package/src/ignore-parser.js
CHANGED
|
@@ -1,9 +1,87 @@
|
|
|
1
1
|
import { readFileSync, existsSync } from 'fs';
|
|
2
|
-
import { join, dirname } from 'path';
|
|
2
|
+
import { join, dirname, extname } from 'path';
|
|
3
3
|
import { fileURLToPath } from 'url';
|
|
4
4
|
|
|
5
5
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
6
6
|
|
|
7
|
+
// Whitelist of code file extensions to include
|
|
8
|
+
const CODE_EXTENSIONS = new Set([
|
|
9
|
+
// JavaScript/TypeScript
|
|
10
|
+
'.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs', '.mts', '.cts',
|
|
11
|
+
// Python
|
|
12
|
+
'.py', '.pyw', '.pyi',
|
|
13
|
+
// Java
|
|
14
|
+
'.java',
|
|
15
|
+
// C/C++
|
|
16
|
+
'.c', '.cpp', '.cc', '.cxx', '.h', '.hpp', '.hh', '.hxx',
|
|
17
|
+
// C#
|
|
18
|
+
'.cs',
|
|
19
|
+
// Go
|
|
20
|
+
'.go',
|
|
21
|
+
// Rust
|
|
22
|
+
'.rs',
|
|
23
|
+
// Ruby
|
|
24
|
+
'.rb',
|
|
25
|
+
// PHP
|
|
26
|
+
'.php', '.phtml',
|
|
27
|
+
// Swift
|
|
28
|
+
'.swift',
|
|
29
|
+
// Kotlin
|
|
30
|
+
'.kt', '.kts',
|
|
31
|
+
// Scala
|
|
32
|
+
'.scala', '.sc',
|
|
33
|
+
// Perl
|
|
34
|
+
'.pl', '.pm',
|
|
35
|
+
// Shell/Bash
|
|
36
|
+
'.sh', '.bash', '.zsh', '.fish',
|
|
37
|
+
// PowerShell
|
|
38
|
+
'.ps1', '.psm1', '.psd1',
|
|
39
|
+
// Lua
|
|
40
|
+
'.lua',
|
|
41
|
+
// R
|
|
42
|
+
'.r', '.R',
|
|
43
|
+
// MATLAB/Octave
|
|
44
|
+
'.m', '.mat',
|
|
45
|
+
// Julia
|
|
46
|
+
'.jl',
|
|
47
|
+
// Dart
|
|
48
|
+
'.dart',
|
|
49
|
+
// Elixir
|
|
50
|
+
'.ex', '.exs',
|
|
51
|
+
// Erlang
|
|
52
|
+
'.erl', '.hrl',
|
|
53
|
+
// Haskell
|
|
54
|
+
'.hs', '.lhs',
|
|
55
|
+
// Clojure
|
|
56
|
+
'.clj', '.cljs', '.cljc',
|
|
57
|
+
// Lisp
|
|
58
|
+
'.lisp', '.lsp', '.scm', '.ss', '.rkt',
|
|
59
|
+
// Fortran
|
|
60
|
+
'.f', '.for', '.f90', '.f95', '.f03',
|
|
61
|
+
// Assembly
|
|
62
|
+
'.asm', '.s', '.S',
|
|
63
|
+
// Groovy
|
|
64
|
+
'.groovy', '.gvy',
|
|
65
|
+
// Visual Basic
|
|
66
|
+
'.vb', '.vbs',
|
|
67
|
+
// F#
|
|
68
|
+
'.fs', '.fsx',
|
|
69
|
+
// OCaml
|
|
70
|
+
'.ml', '.mli',
|
|
71
|
+
// Objective-C
|
|
72
|
+
'.m', '.mm',
|
|
73
|
+
// Arduino
|
|
74
|
+
'.ino',
|
|
75
|
+
// Vue SFC
|
|
76
|
+
'.vue',
|
|
77
|
+
// Svelte
|
|
78
|
+
'.svelte',
|
|
79
|
+
// CoffeeScript
|
|
80
|
+
'.coffee',
|
|
81
|
+
// Reason
|
|
82
|
+
'.re', '.rei'
|
|
83
|
+
]);
|
|
84
|
+
|
|
7
85
|
function loadDefaultIgnores() {
|
|
8
86
|
const ignorePath = join(__dirname, '..', '.thornsignore');
|
|
9
87
|
if (!existsSync(ignorePath)) {
|
|
@@ -197,11 +275,124 @@ export function loadIgnorePatterns(rootPath) {
|
|
|
197
275
|
return merged;
|
|
198
276
|
}
|
|
199
277
|
|
|
278
|
+
// Directories to always ignore
|
|
279
|
+
const IGNORED_DIRECTORIES = new Set([
|
|
280
|
+
// Dependencies - NEVER include
|
|
281
|
+
'node_modules', 'bower_components', 'jspm_packages', 'web_modules',
|
|
282
|
+
// Version control
|
|
283
|
+
'.git', '.svn', '.hg', '.bzr', '.vscode', '.idea', '.vs', '.atom', '.sublime-project',
|
|
284
|
+
// Build outputs - comprehensive list
|
|
285
|
+
'dist', 'dist-server', 'dist-ssr', 'dist-client', 'dist-server',
|
|
286
|
+
'build', 'built', 'Build', 'BUILD',
|
|
287
|
+
'out', 'output', 'Output', 'OUT', 'release', 'Release', 'RELEASE',
|
|
288
|
+
'target', 'Target', 'TARGET',
|
|
289
|
+
'bin', 'Bin', 'BIN', 'obj', 'Obj', 'OBJ',
|
|
290
|
+
'public', 'static', 'assets', 'www', 'wwwroot',
|
|
291
|
+
'site', '_site', '.site', '.docusaurus', '.gatsby', '.vuepress',
|
|
292
|
+
'storybook-static', '.nuxt', 'nuxt', '.next', 'next',
|
|
293
|
+
'out-tsc', 'tsc', '.tsc',
|
|
294
|
+
// Cache directories
|
|
295
|
+
'.cache', 'cache', '.parcel-cache', '.vite', 'vite', '.turbo', 'turbo',
|
|
296
|
+
'.npm', '.yarn', '.pnp', '.pnpm-store', '.rush', '.lerna', '.nx',
|
|
297
|
+
// Testing
|
|
298
|
+
'coverage', '.nyc_output', '.coverage', 'htmlcov', 'test-results',
|
|
299
|
+
'test', 'tests', 'Test', 'Tests', 'TEST', 'TESTS',
|
|
300
|
+
'__tests__', '__mocks__', '__snapshots__', '__fixtures__',
|
|
301
|
+
'cypress', 'playwright', 'e2e', 'integration', 'spec', 'specs',
|
|
302
|
+
'.tox', '.eggs', '.hypothesis', '.pyre', '.pytype',
|
|
303
|
+
// Python
|
|
304
|
+
'__pycache__', '.pytest_cache', '.mypy_cache', '.venv', 'venv', 'env',
|
|
305
|
+
'env.bak', 'venv.bak', '.Python', 'pip-wheel-metadata', '*.egg-info',
|
|
306
|
+
// Java/Gradle/Maven
|
|
307
|
+
'.gradle', '.mvn', 'gradle', 'mvn', '.settings', '.project', '.classpath',
|
|
308
|
+
// iOS/Android
|
|
309
|
+
'Pods', 'DerivedData', 'build', '.bundle', 'xcuserdata', '.xcodeproj', '.xcworkspace',
|
|
310
|
+
// Ruby
|
|
311
|
+
'vendor', '.bundle', '.ruby-version', 'pkg',
|
|
312
|
+
// Rust
|
|
313
|
+
'target', 'Cargo.lock',
|
|
314
|
+
// Go
|
|
315
|
+
'vendor', 'Godeps',
|
|
316
|
+
// PHP
|
|
317
|
+
'vendor', 'composer',
|
|
318
|
+
// Infrastructure
|
|
319
|
+
'.terraform', '.terragrunt-cache', '.pulumi', '.serverless', '.firebase',
|
|
320
|
+
'.aws', '.azure', '.gcloud', '.vercel', '.netlify', '.now',
|
|
321
|
+
// Docker
|
|
322
|
+
'.docker', 'docker', '.dockerignore',
|
|
323
|
+
// Temp files
|
|
324
|
+
'temp', 'tmp', '.tmp', '.temp', 'tmpfs', 'scratch', '.scratch',
|
|
325
|
+
// Documentation
|
|
326
|
+
'docs', 'doc', 'documentation', 'wiki', 'guides', 'examples', 'demo', 'demos',
|
|
327
|
+
'CHANGELOG', 'HISTORY', 'NEWS', 'LICENSE', 'LICENCE', 'COPYING', 'AUTHORS',
|
|
328
|
+
// IDE/Editor
|
|
329
|
+
'.vs', '.vscode', '.idea', '.eclipse', '.settings', '.classpath', '.project',
|
|
330
|
+
// Logs
|
|
331
|
+
'logs', 'log', '*.log',
|
|
332
|
+
// Data/Storage
|
|
333
|
+
'storage', 'data', 'database', 'db', 'fixtures', 'seeds',
|
|
334
|
+
'uploads', 'files', 'media', 'resources', 'assets', 'images', 'img',
|
|
335
|
+
// LLM/AI
|
|
336
|
+
'.llamaindex', '.chroma', '.vectorstore', '.embeddings',
|
|
337
|
+
'.langchain', '.autogen', '.semantic-kernel', '.openai-cache',
|
|
338
|
+
'.anthropic-cache', 'embeddings', 'vector-db', 'faiss-index',
|
|
339
|
+
'chromadb', 'pinecone-cache', 'weaviate-data',
|
|
340
|
+
// Package managers
|
|
341
|
+
'.yarn', '.pnpm', '.npm', '.bun',
|
|
342
|
+
// Compiled outputs
|
|
343
|
+
'typings', 'types', '@types', 'type-definitions',
|
|
344
|
+
// Misc
|
|
345
|
+
'public', 'static', 'site', '_site',
|
|
346
|
+
'cmake_build_debug', 'cmake_build_release', 'CMakeFiles', 'CMakeCache.txt',
|
|
347
|
+
'out-tsc', 'dist-server', 'server', 'client', 'browser', 'esm', 'cjs', 'umd', 'lib', 'es'
|
|
348
|
+
]);
|
|
349
|
+
|
|
350
|
+
export function isCodeFile(filePath) {
|
|
351
|
+
const normalizedPath = filePath.replace(/\\/g, '/');
|
|
352
|
+
const pathParts = normalizedPath.split('/');
|
|
353
|
+
const fileName = pathParts[pathParts.length - 1];
|
|
354
|
+
|
|
355
|
+
// Get file extension
|
|
356
|
+
const lastDotIndex = fileName.lastIndexOf('.');
|
|
357
|
+
if (lastDotIndex === -1 || lastDotIndex === 0) {
|
|
358
|
+
return false; // No extension or hidden file without extension
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
const ext = fileName.slice(lastDotIndex).toLowerCase();
|
|
362
|
+
return CODE_EXTENSIONS.has(ext);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
export function shouldIgnoreDirectory(dirPath) {
|
|
366
|
+
const normalizedPath = dirPath.replace(/\\/g, '/');
|
|
367
|
+
const pathParts = normalizedPath.split('/');
|
|
368
|
+
|
|
369
|
+
for (const part of pathParts) {
|
|
370
|
+
if (IGNORED_DIRECTORIES.has(part)) {
|
|
371
|
+
return true;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
return false;
|
|
376
|
+
}
|
|
377
|
+
|
|
200
378
|
export function shouldIgnore(filePath, ignorePatterns) {
|
|
201
379
|
const normalizedPath = filePath.replace(/\\/g, '/');
|
|
202
380
|
const pathParts = normalizedPath.split('/');
|
|
203
381
|
const fileName = pathParts[pathParts.length - 1];
|
|
382
|
+
|
|
383
|
+
// Check if any directory in path should be ignored
|
|
384
|
+
for (const part of pathParts.slice(0, -1)) {
|
|
385
|
+
if (IGNORED_DIRECTORIES.has(part)) {
|
|
386
|
+
return true;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// Check if it's a code file using whitelist
|
|
391
|
+
if (!isCodeFile(filePath)) {
|
|
392
|
+
return true;
|
|
393
|
+
}
|
|
204
394
|
|
|
395
|
+
// Check against additional ignore patterns
|
|
205
396
|
for (const pattern of ignorePatterns) {
|
|
206
397
|
// Handle path patterns (contain /)
|
|
207
398
|
if (pattern.includes('/')) {
|
|
@@ -209,13 +400,6 @@ export function shouldIgnore(filePath, ignorePatterns) {
|
|
|
209
400
|
return true;
|
|
210
401
|
}
|
|
211
402
|
}
|
|
212
|
-
// Handle extension patterns (*.ext)
|
|
213
|
-
else if (pattern.startsWith('*.')) {
|
|
214
|
-
const ext = pattern.slice(1);
|
|
215
|
-
if (fileName.endsWith(ext)) {
|
|
216
|
-
return true;
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
403
|
// Handle exact file name patterns
|
|
220
404
|
else if (fileName === pattern) {
|
|
221
405
|
return true;
|