codebasesearch 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.thornsignore ADDED
@@ -0,0 +1,419 @@
1
+ # Code Search Default Ignore Patterns
2
+ # Ultra-comprehensive ignore list for all languages and frameworks
3
+
4
+ # === VERSION CONTROL ===
5
+ .git/
6
+ .svn/
7
+ .hg/
8
+ .bzr/
9
+ CVS/
10
+
11
+ # === NODE / JAVASCRIPT / TYPESCRIPT ===
12
+ node_modules/
13
+ .npm/
14
+ .yarn/
15
+ .pnp.*
16
+ .pnp/
17
+ yarn-error.log
18
+ npm-debug.log*
19
+ .eslintcache
20
+ .node_repl_history
21
+ *.tsbuildinfo
22
+ .next/
23
+ .nuxt/
24
+ dist/
25
+ out/
26
+ build/
27
+ .cache/
28
+ .parcel-cache/
29
+ .vite/
30
+ .turbo/
31
+
32
+ # === PYTHON ===
33
+ __pycache__/
34
+ *.py[cod]
35
+ *$py.class
36
+ *.so
37
+ .Python
38
+ env/
39
+ venv/
40
+ ENV/
41
+ .venv
42
+ pip-log.txt
43
+ pip-delete-this-directory.txt
44
+ .pytest_cache/
45
+ .hypothesis/
46
+ .mypy_cache/
47
+ .dmypy.json
48
+ .pyre/
49
+ .pytype/
50
+ *.egg-info/
51
+ dist/
52
+ build/
53
+ *.whl
54
+ .tox/
55
+ .coverage
56
+ htmlcov/
57
+
58
+ # === RUST ===
59
+ target/
60
+ Cargo.lock
61
+ **/*.rs.bk
62
+ *.pdb
63
+
64
+ # === GO ===
65
+ vendor/
66
+ *.exe
67
+ *.exe~
68
+ *.dll
69
+ *.so
70
+ *.dylib
71
+ *.test
72
+ *.out
73
+ go.work
74
+
75
+ # === JAVA ===
76
+ target/
77
+ *.class
78
+ *.jar
79
+ *.war
80
+ *.ear
81
+ *.nar
82
+ .gradle/
83
+ build/
84
+ .mvn/
85
+ !.mvn/wrapper/maven-wrapper.jar
86
+
87
+ # === C / C++ ===
88
+ *.o
89
+ *.a
90
+ *.so
91
+ *.out
92
+ *.exe
93
+ *.obj
94
+ *.dll
95
+ *.dylib
96
+ CMakeFiles/
97
+ CMakeCache.txt
98
+ cmake_build_debug/
99
+ cmake_build_release/
100
+ build/
101
+ *.cmake
102
+ !CMakeLists.txt
103
+
104
+ # === C# / .NET ===
105
+ bin/
106
+ obj/
107
+ *.suo
108
+ *.user
109
+ *.userosscache
110
+ *.sln.docstates
111
+ [Dd]ebug/
112
+ [Rr]elease/
113
+ x64/
114
+ x86/
115
+ [Bb]uild/
116
+ bld/
117
+ [Bb]in/
118
+ [Oo]bj/
119
+ *.nupkg
120
+ *.snupkg
121
+ project.lock.json
122
+ project.fragment.lock.json
123
+ artifacts/
124
+
125
+ # === RUBY ===
126
+ *.gem
127
+ *.rbc
128
+ /.config
129
+ /coverage/
130
+ /InstalledFiles
131
+ /pkg/
132
+ /spec/reports/
133
+ /spec/examples.txt
134
+ /test/tmp/
135
+ /test/version_tmp/
136
+ /tmp/
137
+ .bundle/
138
+ vendor/bundle/
139
+ lib/bundler/man/
140
+
141
+ # === PHP ===
142
+ /vendor/
143
+ composer.lock
144
+ *.phar
145
+
146
+ # === SCALA / SBT ===
147
+ target/
148
+ lib_managed/
149
+ src_managed/
150
+ project/boot/
151
+ project/plugins/project/
152
+ .history
153
+ .cache
154
+ .lib/
155
+
156
+ # === SWIFT / XCODE ===
157
+ *.xcodeproj/
158
+ *.xcworkspace/
159
+ Pods/
160
+ DerivedData/
161
+ *.moved-aside
162
+ *.pbxuser
163
+ !default.pbxuser
164
+ *.mode1v3
165
+ !default.mode1v3
166
+ *.mode2v3
167
+ !default.mode2v3
168
+ *.perspectivev3
169
+ !default.perspectivev3
170
+
171
+ # === FIREBASE ===
172
+ .firebase/
173
+ firebase-debug.log
174
+ firestore-debug.log
175
+ ui-debug.log
176
+ .firebaserc
177
+
178
+ # === LLM / AI / AGENTIC FRAMEWORKS ===
179
+ .cache/
180
+ .llamaindex/
181
+ .chroma/
182
+ .vectorstore/
183
+ .embeddings/
184
+ .langchain/
185
+ .autogen/
186
+ .semantic-kernel/
187
+ .openai-cache/
188
+ .anthropic-cache/
189
+ embeddings/
190
+ vector-db/
191
+ faiss-index/
192
+ chromadb/
193
+ pinecone-cache/
194
+ weaviate-data/
195
+
196
+ # === CLOUD PROVIDERS ===
197
+ .aws/
198
+ .azure/
199
+ .gcloud/
200
+ .terraform/
201
+ *.tfstate
202
+ *.tfstate.backup
203
+ .terraformrc
204
+ terraform.rc
205
+
206
+ # === DOCKER / CONTAINERS ===
207
+ .dockerignore
208
+ docker-compose.override.yml
209
+ .docker/
210
+
211
+ # === DATABASES ===
212
+ *.db
213
+ *.sqlite
214
+ *.sqlite3
215
+ *.sql
216
+ *.bak
217
+ *.dump
218
+
219
+ # === LOGS & TEMP FILES ===
220
+ *.log
221
+ *.tmp
222
+ *.temp
223
+ *.swp
224
+ *.swo
225
+ *~
226
+ .DS_Store
227
+ Thumbs.db
228
+ desktop.ini
229
+ *.stackdump
230
+ *.orig
231
+
232
+ # === EDITORS & IDEs ===
233
+ .vscode/
234
+ .idea/
235
+ *.iml
236
+ .project
237
+ .classpath
238
+ .settings/
239
+ *.sublime-project
240
+ *.sublime-workspace
241
+ .vs/
242
+
243
+ # === COMPILED OUTPUT ===
244
+ *.min.js
245
+ *.min.css
246
+ *.bundle.js
247
+ *.chunk.js
248
+ *.map
249
+
250
+ # === DOCUMENTATION BUILD ===
251
+ docs/_build/
252
+ site/
253
+ _site/
254
+ .docusaurus/
255
+ .vuepress/dist/
256
+
257
+ # === TEST COVERAGE ===
258
+ coverage/
259
+ .nyc_output/
260
+ lcov.info
261
+ *.lcov
262
+
263
+ # === PACKAGE MANAGERS ===
264
+ .pnpm-store/
265
+ .rush/
266
+ .lerna/
267
+
268
+ # === MONOREPO ===
269
+ .nx/
270
+ .turbo/
271
+
272
+ # === METADATA / OS ===
273
+ .Spotlight-V100
274
+ .Trashes
275
+ ehthumbs.db
276
+ .fseventsd
277
+ .TemporaryItems
278
+ .AppleDouble
279
+ .LSOverride
280
+
281
+ # === SECURITY / SECRETS ===
282
+ .env
283
+ .env.local
284
+ .env.*.local
285
+ *.key
286
+ *.pem
287
+ *.p12
288
+ *.pfx
289
+ credentials.json
290
+ secrets.yaml
291
+ secrets.yml
292
+
293
+ # === PROFILING / BENCHMARKS ===
294
+ *.prof
295
+ *.cpuprofile
296
+ *.heapprofile
297
+
298
+ # === LOCK FILES (optional - can be removed if needed) ===
299
+ package-lock.json
300
+ yarn.lock
301
+ pnpm-lock.yaml
302
+ Gemfile.lock
303
+ poetry.lock
304
+ Pipfile.lock
305
+
306
+ # === CONFIGURATION & METADATA FILES (LOW-VALUE FOR ANALYSIS) ===
307
+ *.config.js
308
+ *.config.ts
309
+ webpack.config.js
310
+ rollup.config.js
311
+ vite.config.js
312
+ tsconfig.json
313
+ jsconfig.json
314
+ babel.config.*
315
+ .babelrc
316
+ .eslintrc.*
317
+ .prettierrc.*
318
+ .stylelintrc.*
319
+ .editorconfig
320
+ .nvmrc
321
+ .env.example
322
+ .env.template
323
+ .env.sample
324
+ *.local
325
+ *.development
326
+ *.production
327
+
328
+ # === DOCUMENTATION & REFERENCE FILES ===
329
+ *.md
330
+ *.txt
331
+ *.rst
332
+ *.adoc
333
+ docs/
334
+ documentation/
335
+ wiki/
336
+ CHANGELOG*
337
+ HISTORY*
338
+ NEWS*
339
+ UPGRADING*
340
+ FAQ*
341
+ CONTRIBUTING*
342
+ SECURITY*
343
+ LICENSE*
344
+ LICENCE*
345
+ COPYRIGHT*
346
+ NOTICE*
347
+ AUTHORS*
348
+ THIRDPARTY*
349
+ *.orig
350
+ *.rej
351
+
352
+ # === TEST & COVERAGE FILES ===
353
+ *.test.*
354
+ *.spec.*
355
+ test/
356
+ tests/
357
+ __tests__/
358
+ __mocks__/
359
+ fixtures/
360
+ spec/
361
+ cypress/
362
+ playwright/
363
+ test-results/
364
+ coverage/
365
+ .nyc_output/
366
+ lcov.info
367
+ *.lcov
368
+ .coverage
369
+ pytest.ini
370
+ tox.ini
371
+ jest.config.*
372
+ vitest.config.*
373
+
374
+ # === GENERATED & BUILD ARTIFACTS ===
375
+ dist/
376
+ build/
377
+ out/
378
+ target/
379
+ .next/
380
+ .nuxt/
381
+ .gatsby/
382
+ .docusaurus/
383
+ .vuepress/dist/
384
+ site/
385
+ public/
386
+ static/
387
+ .assets/
388
+ .cache/
389
+ .parcel-cache/
390
+ .vite/
391
+ .turbo/
392
+ .tmp/
393
+ temp/
394
+
395
+ # === DATABASE & DATA FILES ===
396
+ *.db
397
+ *.sqlite
398
+ *.sqlite3
399
+ *.sql
400
+ *.bak
401
+ *.dump
402
+ *.backup
403
+ *.data/
404
+ storage/
405
+ logs/
406
+ *.log
407
+
408
+ # === IDE & EDITOR SPECIFIC ===
409
+ .vscode/
410
+ .idea/
411
+ *.iml
412
+ *.swp
413
+ *.swo
414
+ *~
415
+ .DS_Store
416
+ Thumbs.db
417
+ desktop.ini
418
+ *.sublime-*
419
+ .vs/
package/README.md ADDED
@@ -0,0 +1,129 @@
1
+ # code-search
2
+
3
+ Ultra-simple semantic code search with Jina embeddings and LanceDB. Supports both CLI and MCP protocol interfaces.
4
+
5
+ ## Quick Start
6
+
7
+ ### CLI
8
+ ```bash
9
+ bunx code-search "your search query"
10
+ ```
11
+
12
+ ### MCP (for Claude Code & IDE plugins)
13
+ ```bash
14
+ bunx code-search --mcp
15
+ ```
16
+
17
+ Example:
18
+ ```bash
19
+ claude mcp add -s user code-search -- bunx code-search
20
+ ```
21
+
22
+ ## Features
23
+
24
+ - **Semantic search** across entire repositories using Jina embeddings (512-dim vectors)
25
+ - **Embedded vector database** (LanceDB) - no external servers or setup required
26
+ - **Auto-indexing** - automatically scans and indexes repository before each search
27
+ - **Comprehensive ignore patterns** - respects .gitignore and ignores build artifacts, node_modules, etc. across all languages
28
+ - **Single-shot execution** - no persistent processes, no background daemons
29
+ - **MCP protocol support** - integrates with Claude Code and other MCP-compatible tools
30
+ - **Auto-gitignore** - automatically adds `.code-search/` to .gitignore on first run
31
+ - **Auto-recover from corruption** - automatically detects and clears corrupted model cache on Protobuf errors
32
+ - **Performance optimized** - 5MB file size limits, smart chunking, batch embedding generation
33
+
34
+ ## Usage
35
+
36
+ ### Search from CLI
37
+
38
+ ```bash
39
+ bunx code-search "authentication middleware"
40
+ bunx code-search "database connection pool"
41
+ bunx code-search "error handling"
42
+ ```
43
+
44
+ ### Search from custom repository
45
+
46
+ ```bash
47
+ bunx code-search --repo /path/to/repo "query"
48
+ ```
49
+
50
+ **Default Search Directory**: When no path is specified, searches the **current working directory** (project root), not the Claude Code plugins directory. In Claude Code, this defaults to your project context.
51
+
52
+ ### MCP Tool (in Claude Code)
53
+
54
+ The `search` tool accepts:
55
+ - `query` (required): Natural language search string
56
+ - `repository_path` (optional): Path to repository (defaults to current directory)
57
+
58
+ Example:
59
+ ```
60
+ search query="middleware validation" repository_path="/path/to/repo"
61
+ ```
62
+
63
+ ## How It Works
64
+
65
+ 1. **Scans** the repository for code files (25+ language types supported)
66
+ 2. **Respects** .gitignore and comprehensive ignore patterns
67
+ 3. **Chunks** large files into manageable segments
68
+ 4. **Generates embeddings** using Jina embeddings v2 small (512 dimensions)
69
+ 5. **Stores** vectors in embedded LanceDB database
70
+ 6. **Searches** using semantic similarity
71
+ 7. **Returns** ranked results with line numbers and code snippets
72
+ 8. **Auto-adds** `.code-search/` to .gitignore
73
+
74
+ ## Supported Languages
75
+
76
+ JavaScript, TypeScript, Python, Go, Rust, Java, C/C++, C#, Ruby, PHP, Scala, Swift, Shell, SQL, R, Lua, Perl, Groovy, XML, JSON, YAML, TOML, HTML, CSS, SCSS, Vue, and more.
77
+
78
+ ## Storage
79
+
80
+ Search index is stored in `.code-search/lancedb/` (automatically added to .gitignore).
81
+
82
+ First run downloads the Jina model (~120MB) to `~/.cache/huggingface`.
83
+
84
+ ## Performance
85
+
86
+ - **First run**: ~30-60s (downloads model + indexes repository)
87
+ - **Subsequent runs**: Sub-second search queries (index already exists)
88
+ - **Large repos** (10k+ files): May take 1-2 minutes for full indexing
89
+
90
+ ### Technical Optimizations
91
+
92
+ - **5MB file size limit**: Files larger than 5MB are skipped to prevent memory issues
93
+ - **Smart chunking**: Files >1000 lines auto-split into overlapping chunks (200-line overlap) for better semantic context
94
+ - **Batch embedding**: Chunks processed in batches of 32 for efficient API usage
95
+ - **Binary detection**: 47 binary file extensions ignored (.zip, .exe, .jpg, .mp4, etc.)
96
+ - **Auto-recovery**: Detects Protobuf parsing errors in cached models and auto-clears corrupted cache
97
+ - **5-minute timeout**: Model loading has timeout to prevent indefinite hangs
98
+
99
+ ## Installation Details
100
+
101
+ The package includes:
102
+
103
+ - **bin/code-search.js** - CLI entry point for direct use
104
+ - **mcp.js** - MCP server for integration with Claude Code
105
+ - **src/** - Core modules (embeddings, scanning, vector store, search)
106
+ - **.thornsignore** - Comprehensive ignore patterns (all languages/frameworks)
107
+
108
+ ## Ignored Files & Directories
109
+
110
+ By default, the tool ignores:
111
+ - All build artifacts (dist/, build/, target/, node_modules/, etc.)
112
+ - Version control (.git/, .svn/, .hg/, etc.)
113
+ - IDE files (.vscode/, .idea/, etc.)
114
+ - Lock files (package-lock.json, yarn.lock, etc.)
115
+ - Dependencies and caches
116
+ - Test files and coverage reports
117
+ - Secrets and credentials
118
+
119
+ Configure custom ignores via `.codesearchignore` file.
120
+
121
+ ## Privacy
122
+
123
+ All processing happens locally. No data is sent to external servers. The Jina model is downloaded once and cached locally.
124
+
125
+ ## License
126
+
127
+ MIT
128
+ # Triggered npm publishing
129
+
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env node
2
+
3
+ // MUST patch sharp before any other imports
4
+ import fs from 'fs';
5
+ import path from 'path';
6
+ import { fileURLToPath } from 'url';
7
+
8
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
9
+ const distPath = path.join(__dirname, '..', 'node_modules', '@huggingface', 'transformers', 'dist', 'transformers.node.mjs');
10
+
11
+ if (fs.existsSync(distPath)) {
12
+ let content = fs.readFileSync(distPath, 'utf-8');
13
+ if (!content.includes('SHARP_REMOVED_FOR_WINDOWS_COMPATIBILITY')) {
14
+ content = content.replace(/import \* as __WEBPACK_EXTERNAL_MODULE_sharp__ from "sharp";\n/, '// SHARP_REMOVED_FOR_WINDOWS_COMPATIBILITY\n');
15
+ content = content.replace(/module\.exports = __WEBPACK_EXTERNAL_MODULE_sharp__;/g, 'module.exports = {};');
16
+ content = content.replace(/} else \{\s*throw new Error\('Unable to load image processing library\.'\);\s*\}/, '} else {\n loadImageFunction = async () => { throw new Error(\'Image processing unavailable\'); };\n}');
17
+
18
+
19
+ try { fs.writeFileSync(distPath, content); } catch (e) {}
20
+ }
21
+ }
22
+
23
+ import('../src/cli.js').then(m => m.run(process.argv.slice(2)))
24
+ .catch(err => {
25
+ console.error('Error:', err.message);
26
+ process.exit(1);
27
+ });