codebase-contextualizer-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +242 -0
- package/bin/contextualizer.js +3 -0
- package/index.js +248 -0
- package/package.json +43 -0
- package/scripts/benchmark.js +189 -0
- package/src/abort.js +27 -0
- package/src/cache.js +100 -0
- package/src/concurrency.js +20 -0
- package/src/database.js +172 -0
- package/src/file-filter.js +113 -0
- package/src/file-hash.js +17 -0
- package/src/indexer.js +290 -0
- package/src/parser.js +116 -0
- package/src/paths.js +14 -0
- package/src/search.js +134 -0
- package/src/shutdown.js +34 -0
- package/src/walker.js +214 -0
- package/src/worker-pool.js +178 -0
- package/src/worker.js +124 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Roopak Yadav
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
# Codebase Contextualizer CLI
|
|
2
|
+
|
|
3
|
+
Query your codebase in plain English, completely offline. Codebase Contextualizer CLI indexes local JavaScript-family projects with Tree-sitter, creates local embeddings with `@xenova/transformers`, stores vectors in SQLite, and searches code semantics from your terminal without sending source code to external APIs.
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
npx codebase-contextualizer-cli index .
|
|
7
|
+
npx codebase-contextualizer-cli search "where are files hashed with sha256" .
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
## Highlights
|
|
11
|
+
|
|
12
|
+
- Zero external API dependencies: embeddings and search run locally.
|
|
13
|
+
- Sub-0.08ms p95 vector retrieval latency over 1,000 local benchmark queries.
|
|
14
|
+
- Worker-thread indexing keeps parsing and embedding work off the main V8 event loop.
|
|
15
|
+
- Tree-sitter semantic chunks for JavaScript-family files.
|
|
16
|
+
- SQLite vector storage in `.contextualizer/vector.db`.
|
|
17
|
+
- JSON output for scripts, demos, and CI-friendly inspection.
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
Run without installing:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
npx codebase-contextualizer-cli <command> [arguments]
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Install globally:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
npm install -g codebase-contextualizer-cli
|
|
31
|
+
codebase-contextualizer <command> [arguments]
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Requirements:
|
|
35
|
+
|
|
36
|
+
- Node.js 20 or newer
|
|
37
|
+
- npm 10 or newer recommended
|
|
38
|
+
|
|
39
|
+
## Usage
|
|
40
|
+
|
|
41
|
+
Index a codebase:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
codebase-contextualizer index .
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Index with a fixed worker count:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
codebase-contextualizer index . --workers 4
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Check cache drift without writing updates:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
codebase-contextualizer status .
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Search indexed code:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
codebase-contextualizer search "hash files with sha256" .
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Return machine-readable output:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
codebase-contextualizer search "worker pool queue" . --json
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## CLI Examples
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
$ codebase-contextualizer index .
|
|
75
|
+
Index complete: C:\projects\my-app
|
|
76
|
+
Cache: C:\projects\my-app\.contextualizer\cache.json
|
|
77
|
+
Scanned files: 42
|
|
78
|
+
New: 42
|
|
79
|
+
Modified: 0
|
|
80
|
+
Unchanged: 0
|
|
81
|
+
Removed: 0
|
|
82
|
+
Embedded chunks: 118
|
|
83
|
+
Vector database: C:\projects\my-app\.contextualizer\vector.db
|
|
84
|
+
Persisted chunks: 118
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
$ codebase-contextualizer search "where is the worker queue drained" .
|
|
89
|
+
Search: where is the worker queue drained
|
|
90
|
+
Target: C:\projects\my-app
|
|
91
|
+
Database: C:\projects\my-app\.contextualizer\vector.db
|
|
92
|
+
|
|
93
|
+
1. drainQueue (function)
|
|
94
|
+
Score: 0.8124
|
|
95
|
+
File: src/worker-pool.js
|
|
96
|
+
Line: 74-118
|
|
97
|
+
Code:
|
|
98
|
+
function drainQueue() {
|
|
99
|
+
...
|
|
100
|
+
}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
$ codebase-contextualizer status . --json
|
|
105
|
+
{
|
|
106
|
+
"root": "C:\\projects\\my-app",
|
|
107
|
+
"cacheExists": true,
|
|
108
|
+
"saved": false,
|
|
109
|
+
"counts": {
|
|
110
|
+
"scanned": 42,
|
|
111
|
+
"new": 0,
|
|
112
|
+
"modified": 1,
|
|
113
|
+
"unchanged": 41,
|
|
114
|
+
"removed": 0
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Features
|
|
120
|
+
|
|
121
|
+
- Offline semantic code search with no SaaS API calls.
|
|
122
|
+
- Incremental indexing with SHA-256 file hashes.
|
|
123
|
+
- `.gitignore`-aware traversal.
|
|
124
|
+
- Worker pool architecture for CPU-heavy parsing and embedding.
|
|
125
|
+
- Zero-copy transfer of `Float32Array` embeddings from workers to the main thread.
|
|
126
|
+
- SQLite persistence with WAL mode and transactional writes.
|
|
127
|
+
- Graceful Ctrl+C cleanup for workers and database handles.
|
|
128
|
+
|
|
129
|
+
## System Architecture
|
|
130
|
+
|
|
131
|
+
The CLI is split into two execution planes.
|
|
132
|
+
|
|
133
|
+
**Main thread**
|
|
134
|
+
|
|
135
|
+
- Parses commands with `commander`.
|
|
136
|
+
- Walks directories asynchronously while respecting `.gitignore`.
|
|
137
|
+
- Hashes source files with streamed SHA-256.
|
|
138
|
+
- Uses the cache at `.contextualizer/cache.json` to process only new or modified files.
|
|
139
|
+
- Owns all SQLite writes through `better-sqlite3`.
|
|
140
|
+
|
|
141
|
+
**Worker pool**
|
|
142
|
+
|
|
143
|
+
- Uses native Node.js `worker_threads`.
|
|
144
|
+
- Defaults to `Math.min(4, Math.max(1, os.cpus().length - 1))` workers.
|
|
145
|
+
- Each worker initializes its own Tree-sitter parser and singleton local embedding pipeline.
|
|
146
|
+
- Workers read files, extract JavaScript functions/classes/methods, build semantic payloads, and return chunk metadata plus vector tensors.
|
|
147
|
+
|
|
148
|
+
This design avoids the primary Node.js performance trap: running CPU-heavy AST parsing and ONNX inference on the main V8 event loop. The main thread remains responsible for orchestration and storage, while worker threads absorb the expensive compute path.
|
|
149
|
+
|
|
150
|
+
## Memory Management
|
|
151
|
+
|
|
152
|
+
Embeddings are represented as `Float32Array` instances end to end. Workers convert model output into compact typed arrays before returning results to the main process.
|
|
153
|
+
|
|
154
|
+
The worker response uses zero-copy transfer:
|
|
155
|
+
|
|
156
|
+
```js
|
|
157
|
+
parentPort.postMessage(payload, transferList);
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
Each vector's underlying `ArrayBuffer` is included in `transferList`, so ownership moves from the worker to the main thread instead of cloning megabytes of embedding data. This prevents avoidable garbage collection pressure and keeps large-codebase indexing predictable.
|
|
161
|
+
|
|
162
|
+
## Storage Engine
|
|
163
|
+
|
|
164
|
+
Vectors are persisted in `.contextualizer/vector.db` with `better-sqlite3`.
|
|
165
|
+
|
|
166
|
+
The database uses:
|
|
167
|
+
|
|
168
|
+
- `PRAGMA journal_mode = WAL` for concurrent-friendly write behavior.
|
|
169
|
+
- A `files` table keyed by path and hash.
|
|
170
|
+
- A `chunks` table containing symbol metadata, line ranges, source snippets, and vector BLOBs.
|
|
171
|
+
- Bulk transactional writes with `db.transaction()` so chunk inserts are committed as a batch.
|
|
172
|
+
- Binary vector storage via `Buffer.from(embedding.buffer)`.
|
|
173
|
+
|
|
174
|
+
At search time, BLOBs are reconstructed as typed arrays:
|
|
175
|
+
|
|
176
|
+
```js
|
|
177
|
+
new Float32Array(
|
|
178
|
+
row.embedding.buffer,
|
|
179
|
+
row.embedding.byteOffset,
|
|
180
|
+
row.embedding.byteLength / Float32Array.BYTES_PER_ELEMENT
|
|
181
|
+
);
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Cosine similarity is computed over typed arrays and the top matches are ranked in memory.
|
|
185
|
+
|
|
186
|
+
## Benchmark
|
|
187
|
+
|
|
188
|
+
Run the search latency benchmark against the current directory:
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
node scripts/benchmark.js
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
Run against another indexed target:
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
node scripts/benchmark.js ./src "authentication logic"
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
The benchmark opens `.contextualizer/vector.db`, embeds a dummy query once, runs 1,000 cosine-similarity scans, and reports average, p50, p90, and p95 latency in milliseconds.
|
|
201
|
+
|
|
202
|
+
Performance: achieves sub-0.0763ms p95 vector retrieval latency over 1,000 local queries.
|
|
203
|
+
|
|
204
|
+
## Current Language Support
|
|
205
|
+
|
|
206
|
+
The parser dispatch layer currently routes JavaScript-family files:
|
|
207
|
+
|
|
208
|
+
- `.js`
|
|
209
|
+
- `.jsx`
|
|
210
|
+
- `.mjs`
|
|
211
|
+
- `.cjs`
|
|
212
|
+
|
|
213
|
+
Unsupported source extensions are skipped gracefully. The architecture is ready for additional Tree-sitter grammars through the parser router.
|
|
214
|
+
|
|
215
|
+
## Local Package Testing
|
|
216
|
+
|
|
217
|
+
From the repository root:
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
npm install
|
|
221
|
+
npm link
|
|
222
|
+
codebase-contextualizer --help
|
|
223
|
+
codebase-contextualizer status . --json
|
|
224
|
+
npm pack --dry-run
|
|
225
|
+
npx --yes . --help
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
Remove the global link when finished:
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
npm unlink -g codebase-contextualizer-cli
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## Engineering Notes
|
|
235
|
+
|
|
236
|
+
- No external embedding APIs are used.
|
|
237
|
+
- Unchanged files are not re-parsed or re-embedded.
|
|
238
|
+
- Indexing skips source files over 1 MB, common binary extensions, and obvious minified bundles before they reach the parser.
|
|
239
|
+
- Ctrl+C aborts active indexing, terminates worker threads, and closes open SQLite handles.
|
|
240
|
+
- SQLite writes happen on the main thread after worker results return.
|
|
241
|
+
- Search loads the local embedding model as a singleton for query vectors.
|
|
242
|
+
- The system favors explicit typed-array and BLOB boundaries over generic JavaScript arrays.
|
package/index.js
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
const { Command, InvalidArgumentError } = require("commander");
|
|
4
|
+
const { isAbortError } = require("./src/abort");
|
|
5
|
+
const { getDefaultWorkerCount, indexTarget, statusTarget } = require("./src/indexer");
|
|
6
|
+
const { searchTarget } = require("./src/search");
|
|
7
|
+
const { runCleanup } = require("./src/shutdown");
|
|
8
|
+
|
|
9
|
+
const DEFAULT_HASH_CONCURRENCY = 16;
|
|
10
|
+
const shutdownController = new AbortController();
|
|
11
|
+
let shutdownStarted = false;
|
|
12
|
+
|
|
13
|
+
async function shutdown(signalName) {
|
|
14
|
+
if (shutdownStarted) {
|
|
15
|
+
process.exit(signalName === "SIGTERM" ? 143 : 130);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
shutdownStarted = true;
|
|
19
|
+
const error = new Error(`Interrupted by ${signalName}`);
|
|
20
|
+
error.name = "AbortError";
|
|
21
|
+
shutdownController.abort(error);
|
|
22
|
+
|
|
23
|
+
console.error("");
|
|
24
|
+
console.error("Interrupted. Cleaning up workers and database handles...");
|
|
25
|
+
await runCleanup();
|
|
26
|
+
process.exit(signalName === "SIGTERM" ? 143 : 130);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
process.on("SIGINT", () => {
|
|
30
|
+
void shutdown("SIGINT");
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
process.on("SIGTERM", () => {
|
|
34
|
+
void shutdown("SIGTERM");
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
function parsePositiveInteger(value) {
|
|
38
|
+
const parsed = Number.parseInt(value, 10);
|
|
39
|
+
|
|
40
|
+
if (!Number.isInteger(parsed) || parsed < 1) {
|
|
41
|
+
throw new InvalidArgumentError("must be a positive integer");
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return parsed;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function summarize(result, saved) {
|
|
48
|
+
return {
|
|
49
|
+
root: result.root,
|
|
50
|
+
cachePath: result.cachePath,
|
|
51
|
+
cacheExists: result.cacheExists,
|
|
52
|
+
saved,
|
|
53
|
+
counts: result.counts,
|
|
54
|
+
changes: result.changes,
|
|
55
|
+
database: result.database,
|
|
56
|
+
embeddings: result.embeddings
|
|
57
|
+
? {
|
|
58
|
+
counts: result.embeddings.counts,
|
|
59
|
+
files: result.embeddings.files.map((file) => ({
|
|
60
|
+
relativePath: file.relativePath,
|
|
61
|
+
ok: file.ok,
|
|
62
|
+
skipped: Boolean(file.skipped),
|
|
63
|
+
reason: file.reason,
|
|
64
|
+
error: file.error,
|
|
65
|
+
chunkCount: file.chunks.length,
|
|
66
|
+
})),
|
|
67
|
+
}
|
|
68
|
+
: undefined,
|
|
69
|
+
errors: result.errors,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function printChangedFiles(label, files) {
|
|
74
|
+
if (files.length === 0) {
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
console.log(`${label}:`);
|
|
79
|
+
for (const file of files.slice(0, 20)) {
|
|
80
|
+
console.log(` ${file}`);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (files.length > 20) {
|
|
84
|
+
console.log(` ...and ${files.length - 20} more`);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function printResult(result, mode, json) {
|
|
89
|
+
const saved = mode === "index";
|
|
90
|
+
|
|
91
|
+
if (json) {
|
|
92
|
+
console.log(JSON.stringify(summarize(result, saved), null, 2));
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
console.log(`${mode === "index" ? "Index complete" : "Status"}: ${result.root}`);
|
|
97
|
+
console.log(`Cache: ${result.cachePath}${result.cacheExists ? "" : " (new)"}`);
|
|
98
|
+
console.log(`Scanned files: ${result.counts.scanned}`);
|
|
99
|
+
console.log(`New: ${result.counts.new}`);
|
|
100
|
+
console.log(`Modified: ${result.counts.modified}`);
|
|
101
|
+
console.log(`Unchanged: ${result.counts.unchanged}`);
|
|
102
|
+
console.log(`Removed: ${result.counts.removed}`);
|
|
103
|
+
|
|
104
|
+
printChangedFiles("New files", result.changes.new);
|
|
105
|
+
printChangedFiles("Modified files", result.changes.modified);
|
|
106
|
+
printChangedFiles("Removed files", result.changes.removed);
|
|
107
|
+
|
|
108
|
+
if (result.embeddings) {
|
|
109
|
+
console.log(`Embedded chunks: ${result.embeddings.counts.chunks}`);
|
|
110
|
+
console.log(`Vector database: ${result.database.databasePath}`);
|
|
111
|
+
console.log(`Persisted chunks: ${result.database.chunksWritten}`);
|
|
112
|
+
|
|
113
|
+
for (const file of result.embeddings.files) {
|
|
114
|
+
if (!file.ok) {
|
|
115
|
+
console.log(`Failed to embed ${file.relativePath}: ${file.error}`);
|
|
116
|
+
} else if (file.skipped) {
|
|
117
|
+
console.log(`Skipped ${file.relativePath}: ${file.reason}`);
|
|
118
|
+
} else {
|
|
119
|
+
console.log(`Successfully embedded ${file.chunks.length} chunks from ${file.relativePath}`);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (result.errors.length > 0) {
|
|
125
|
+
console.log("Traversal warnings:");
|
|
126
|
+
for (const error of result.errors.slice(0, 10)) {
|
|
127
|
+
console.log(` ${error.path}: ${error.message}`);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (result.errors.length > 10) {
|
|
131
|
+
console.log(` ...and ${result.errors.length - 10} more`);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function trimSnippet(code, maxLines = 16) {
|
|
137
|
+
const lines = code.trim().split(/\r?\n/);
|
|
138
|
+
const clipped = lines.slice(0, maxLines);
|
|
139
|
+
|
|
140
|
+
if (lines.length > maxLines) {
|
|
141
|
+
clipped.push("...");
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return clipped.join("\n");
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function printSearchResults(result, json) {
|
|
148
|
+
if (json) {
|
|
149
|
+
console.log(JSON.stringify(result, null, 2));
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
console.log(`Search: ${result.query}`);
|
|
154
|
+
console.log(`Target: ${result.root}`);
|
|
155
|
+
console.log(`Database: ${result.databasePath}`);
|
|
156
|
+
|
|
157
|
+
if (result.results.length === 0) {
|
|
158
|
+
console.log("No indexed chunks found.");
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
for (const [index, match] of result.results.entries()) {
|
|
163
|
+
const lineRange = match.startLine === match.endLine
|
|
164
|
+
? `${match.startLine}`
|
|
165
|
+
: `${match.startLine}-${match.endLine}`;
|
|
166
|
+
|
|
167
|
+
console.log("");
|
|
168
|
+
console.log(`${index + 1}. ${match.name} (${match.type})`);
|
|
169
|
+
console.log(`Score: ${match.score.toFixed(4)}`);
|
|
170
|
+
console.log(`File: ${match.file}`);
|
|
171
|
+
console.log(`Line: ${lineRange}`);
|
|
172
|
+
console.log("Code:");
|
|
173
|
+
console.log(trimSnippet(match.code));
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
async function runIndex(target, options) {
|
|
178
|
+
const result = await indexTarget(target, {
|
|
179
|
+
hashConcurrency: options.concurrency,
|
|
180
|
+
signal: shutdownController.signal,
|
|
181
|
+
workerCount: options.workers,
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
printResult(result, "index", options.json);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
async function runStatus(target, options) {
|
|
188
|
+
const result = await statusTarget(target, {
|
|
189
|
+
hashConcurrency: options.concurrency,
|
|
190
|
+
signal: shutdownController.signal,
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
printResult(result, "status", options.json);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
async function runSearch(query, target, options) {
|
|
197
|
+
const result = await searchTarget(target || ".", query, {
|
|
198
|
+
limit: options.limit,
|
|
199
|
+
signal: shutdownController.signal,
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
printSearchResults(result, options.json);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const program = new Command();
|
|
206
|
+
|
|
207
|
+
program
|
|
208
|
+
.name("codebase-contextualizer")
|
|
209
|
+
.description("Index local codebases for offline semantic search.")
|
|
210
|
+
.version("0.1.0");
|
|
211
|
+
|
|
212
|
+
program
|
|
213
|
+
.command("index")
|
|
214
|
+
.description("Walk a target directory, hash source files, and update the local cache.")
|
|
215
|
+
.argument("<target>", "directory to index")
|
|
216
|
+
.option("-c, --concurrency <number>", "concurrent file hash operations", parsePositiveInteger, DEFAULT_HASH_CONCURRENCY)
|
|
217
|
+
.option("-w, --workers <number>", "worker thread count for parsing and embedding, capped at 4", parsePositiveInteger, getDefaultWorkerCount())
|
|
218
|
+
.option("--json", "print machine-readable output")
|
|
219
|
+
.action(runIndex);
|
|
220
|
+
|
|
221
|
+
program
|
|
222
|
+
.command("status")
|
|
223
|
+
.description("Show cache drift without writing cache updates.")
|
|
224
|
+
.argument("<target>", "directory to inspect")
|
|
225
|
+
.option("-c, --concurrency <number>", "concurrent file hash operations", parsePositiveInteger, DEFAULT_HASH_CONCURRENCY)
|
|
226
|
+
.option("--json", "print machine-readable output")
|
|
227
|
+
.action(runStatus);
|
|
228
|
+
|
|
229
|
+
program
|
|
230
|
+
.command("search")
|
|
231
|
+
.description("Search indexed chunks with a local semantic query.")
|
|
232
|
+
.argument("<query>", "natural language query")
|
|
233
|
+
.argument("[target]", "indexed target directory", ".")
|
|
234
|
+
.option("-n, --limit <number>", "maximum ranked results", parsePositiveInteger, 5)
|
|
235
|
+
.option("--json", "print machine-readable output")
|
|
236
|
+
.action(runSearch);
|
|
237
|
+
|
|
238
|
+
program.parseAsync(process.argv).catch(async (error) => {
|
|
239
|
+
await runCleanup();
|
|
240
|
+
|
|
241
|
+
if (isAbortError(error)) {
|
|
242
|
+
process.exitCode = 130;
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
console.error(`Error: ${error.message}`);
|
|
247
|
+
process.exitCode = 1;
|
|
248
|
+
});
|
package/package.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "codebase-contextualizer-cli",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Local semantic code search CLI using Tree-sitter, worker threads, local embeddings, and SQLite vector storage.",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"codebase-contextualizer": "bin/contextualizer.js"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"bin",
|
|
11
|
+
"src",
|
|
12
|
+
"scripts",
|
|
13
|
+
"index.js",
|
|
14
|
+
"README.md",
|
|
15
|
+
"LICENSE"
|
|
16
|
+
],
|
|
17
|
+
"scripts": {
|
|
18
|
+
"start": "node index.js"
|
|
19
|
+
},
|
|
20
|
+
"keywords": [
|
|
21
|
+
"cli",
|
|
22
|
+
"search",
|
|
23
|
+
"semantic",
|
|
24
|
+
"embeddings",
|
|
25
|
+
"vector",
|
|
26
|
+
"sqlite",
|
|
27
|
+
"tree-sitter",
|
|
28
|
+
"transformers"
|
|
29
|
+
],
|
|
30
|
+
"author": "Roopak Yadav",
|
|
31
|
+
"license": "MIT",
|
|
32
|
+
"engines": {
|
|
33
|
+
"node": ">=20"
|
|
34
|
+
},
|
|
35
|
+
"dependencies": {
|
|
36
|
+
"@xenova/transformers": "^2.17.2",
|
|
37
|
+
"better-sqlite3": "^12.9.0",
|
|
38
|
+
"commander": "^14.0.3",
|
|
39
|
+
"ignore": "^7.0.5",
|
|
40
|
+
"tree-sitter": "^0.25.0",
|
|
41
|
+
"tree-sitter-javascript": "^0.25.0"
|
|
42
|
+
}
|
|
43
|
+
}
|