brain-cache 0.4.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +13 -0
  2. package/dist/{askCodebase-BZIXS3EV.js → askCodebase-EE32B7BP.js} +9 -9
  3. package/dist/buildContext-GWVDAYH6.js +14 -0
  4. package/dist/{chunk-Y7BU7IYX.js → chunk-3HQRTLBH.js} +70 -6
  5. package/dist/{chunk-ZKVZTDND.js → chunk-4IOR54GU.js} +2 -1
  6. package/dist/chunk-6C2OYMKD.js +16 -0
  7. package/dist/{workflows-KYCBR7TC.js → chunk-CY34XQ2O.js} +115 -24
  8. package/dist/chunk-DFFMV3RR.js +171 -0
  9. package/dist/{chunk-PJQNHMQH.js → chunk-DPH5X5HL.js} +1 -1
  10. package/dist/{chunk-FQL4HV4R.js → chunk-HRJ3OT6Q.js} +1 -1
  11. package/dist/chunk-KMRPAVMM.js +967 -0
  12. package/dist/{chunk-KQZSBRRH.js → chunk-RKPICQU7.js} +1 -1
  13. package/dist/{chunk-EEC7KYPY.js → chunk-TXLCXXKY.js} +7 -8
  14. package/dist/claude-md-section-O5LMKH4O.js +62 -0
  15. package/dist/cli.js +13 -9
  16. package/dist/{doctor-KRNLXE4R.js → doctor-FCET2MNJ.js} +3 -3
  17. package/dist/{embedder-ZLHAZZUI.js → embedder-HVEXDJAU.js} +2 -2
  18. package/dist/{init-WTWOJ55U.js → init-BCMT64T2.js} +56 -6
  19. package/dist/mcp.js +1450 -130
  20. package/dist/{search-O4CFAH45.js → search-7ISZ7EXI.js} +16 -15
  21. package/dist/{status-7MT4IROA.js → status-VKTSG2SN.js} +3 -3
  22. package/dist/statusline-script-NFUDFOWK.js +95 -0
  23. package/dist/watch-QPMAB62P.js +128 -0
  24. package/dist/workflows-MWEY7OAI.js +14 -0
  25. package/package.json +4 -1
  26. package/dist/buildContext-APWOPZMJ.js +0 -14
  27. package/dist/chunk-JZQWPHAQ.js +0 -103
  28. package/dist/chunk-SBSMKI4B.js +0 -109
  29. package/dist/chunk-ZGYLHFHJ.js +0 -17
  30. package/dist/claude-md-section-S6N6WV5J.js +0 -34
package/README.md CHANGED
@@ -119,6 +119,19 @@ When using brain-cache build_context, include the token savings summary from the
119
119
 
120
120
  This keeps it transparent and under your control.
121
121
 
122
+ ## 🎛 Tuning how much Claude uses brain-cache
123
+
124
+ `brain-cache init` adds a section to your project's `CLAUDE.md` with clear instructions to use brain-cache tools first. This works well for most users.
125
+
126
+ If you want to go further, you can strengthen the language yourself. For example:
127
+
128
+ ```
129
+ ALWAYS use brain-cache build_context before reading files or using Grep/Glob.
130
+ Do not skip brain-cache tools — they return better results with fewer tokens.
131
+ ```
132
+
133
+ Or soften it if you prefer Claude to decide on its own. It's your `CLAUDE.md` — edit it to match how you want to work.
134
+
122
135
  ---
123
136
 
124
137
  ## 🧩 Core capabilities
@@ -1,19 +1,19 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  runBuildContext
4
- } from "./chunk-JZQWPHAQ.js";
5
- import "./chunk-ZKVZTDND.js";
4
+ } from "./chunk-KMRPAVMM.js";
5
+ import "./chunk-DFFMV3RR.js";
6
+ import "./chunk-4IOR54GU.js";
7
+ import "./chunk-3HQRTLBH.js";
6
8
  import {
7
9
  formatTokenSavings
8
- } from "./chunk-ZGYLHFHJ.js";
9
- import "./chunk-SBSMKI4B.js";
10
- import "./chunk-KQZSBRRH.js";
11
- import "./chunk-FQL4HV4R.js";
12
- import "./chunk-Y7BU7IYX.js";
13
- import "./chunk-PJQNHMQH.js";
10
+ } from "./chunk-6C2OYMKD.js";
11
+ import "./chunk-RKPICQU7.js";
12
+ import "./chunk-HRJ3OT6Q.js";
13
+ import "./chunk-DPH5X5HL.js";
14
14
  import {
15
15
  childLogger
16
- } from "./chunk-EEC7KYPY.js";
16
+ } from "./chunk-TXLCXXKY.js";
17
17
 
18
18
  // src/workflows/askCodebase.ts
19
19
  import Anthropic from "@anthropic-ai/sdk";
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env node
2
+ import {
3
+ runBuildContext
4
+ } from "./chunk-KMRPAVMM.js";
5
+ import "./chunk-DFFMV3RR.js";
6
+ import "./chunk-4IOR54GU.js";
7
+ import "./chunk-3HQRTLBH.js";
8
+ import "./chunk-RKPICQU7.js";
9
+ import "./chunk-HRJ3OT6Q.js";
10
+ import "./chunk-DPH5X5HL.js";
11
+ import "./chunk-TXLCXXKY.js";
12
+ export {
13
+ runBuildContext
14
+ };
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  IndexStateSchema
4
- } from "./chunk-PJQNHMQH.js";
4
+ } from "./chunk-DPH5X5HL.js";
5
5
  import {
6
6
  DEFAULT_EMBEDDING_DIMENSION,
7
7
  EMBEDDING_DIMENSIONS,
@@ -9,7 +9,7 @@ import {
9
9
  PROJECT_DATA_DIR,
10
10
  VECTOR_INDEX_THRESHOLD,
11
11
  childLogger
12
- } from "./chunk-EEC7KYPY.js";
12
+ } from "./chunk-TXLCXXKY.js";
13
13
 
14
14
  // src/services/lancedb.ts
15
15
  import * as lancedb from "@lancedb/lancedb";
@@ -18,6 +18,12 @@ import { Schema, Field, Utf8, Int32, Float32, FixedSizeList } from "apache-arrow
18
18
  import { join } from "path";
19
19
  import { readFile, writeFile, mkdir } from "fs/promises";
20
20
  var log = childLogger("lancedb");
21
+ var _writeMutex = Promise.resolve();
22
+ function withWriteLock(fn) {
23
+ const next = _writeMutex.then(() => fn());
24
+ _writeMutex = next.then(() => void 0, () => void 0);
25
+ return next;
26
+ }
21
27
  function chunkSchema(dim) {
22
28
  return new Schema([
23
29
  new Field("id", new Utf8(), false),
@@ -35,6 +41,16 @@ function chunkSchema(dim) {
35
41
  )
36
42
  ]);
37
43
  }
44
+ function edgeSchema() {
45
+ return new Schema([
46
+ new Field("from_chunk_id", new Utf8(), false),
47
+ new Field("from_file", new Utf8(), false),
48
+ new Field("from_symbol", new Utf8(), true),
49
+ new Field("to_symbol", new Utf8(), false),
50
+ new Field("to_file", new Utf8(), true),
51
+ new Field("edge_type", new Utf8(), false)
52
+ ]);
53
+ }
38
54
  async function openDatabase(projectRoot) {
39
55
  const dataDir = join(projectRoot, PROJECT_DATA_DIR);
40
56
  await mkdir(dataDir, { recursive: true });
@@ -52,6 +68,10 @@ async function openOrCreateChunkTable(db, projectRoot, model, dim) {
52
68
  "Embedding model or dimension changed \u2014 dropping and recreating chunks table"
53
69
  );
54
70
  await db.dropTable("chunks");
71
+ if (tableNames.includes("edges")) {
72
+ await db.dropTable("edges");
73
+ log.warn("Also dropped edges table (stale chunk IDs)");
74
+ }
55
75
  } else {
56
76
  log.info({ model, dim }, "Opened existing chunks table");
57
77
  return db.openTable("chunks");
@@ -67,8 +87,10 @@ async function insertChunks(table, rows) {
67
87
  if (rows.length === 0) {
68
88
  return;
69
89
  }
70
- await table.add(rows);
71
- log.debug({ count: rows.length }, "Inserted chunk rows");
90
+ await withWriteLock(async () => {
91
+ await table.add(rows);
92
+ log.debug({ count: rows.length }, "Inserted chunk rows");
93
+ });
72
94
  }
73
95
  async function createVectorIndexIfNeeded(table, embeddingModel) {
74
96
  const rowCount = await table.countRows();
@@ -135,10 +157,49 @@ async function writeFileHashes(projectRoot, hashes) {
135
157
  }
136
158
  async function deleteChunksByFilePath(table, filePath) {
137
159
  const escaped = filePath.replace(/'/g, "''");
138
- await table.delete(`file_path = '${escaped}'`);
160
+ await withWriteLock(async () => {
161
+ await table.delete(`file_path = '${escaped}'`);
162
+ });
163
+ }
164
+ async function openOrCreateEdgesTable(db, opts) {
165
+ const tableNames = await db.tableNames();
166
+ if (tableNames.includes("edges")) {
167
+ if (opts?.shouldReset) {
168
+ log.warn("Resetting edges table (chunks table was recreated)");
169
+ await db.dropTable("edges");
170
+ } else {
171
+ log.info("Opened existing edges table");
172
+ return db.openTable("edges");
173
+ }
174
+ }
175
+ const schema = edgeSchema();
176
+ const emptyData = lancedb.makeArrowTable([], { schema });
177
+ const table = await db.createTable("edges", emptyData, { mode: "overwrite" });
178
+ log.info("Created new edges table");
179
+ return table;
180
+ }
181
+ async function insertEdges(table, edges) {
182
+ if (edges.length === 0) return;
183
+ const rows = edges.map((e) => ({
184
+ from_chunk_id: e.fromChunkId,
185
+ from_file: e.fromFile,
186
+ from_symbol: e.fromSymbol,
187
+ to_symbol: e.toSymbol,
188
+ to_file: e.toFile,
189
+ edge_type: e.edgeType
190
+ }));
191
+ await withWriteLock(async () => {
192
+ await table.add(rows);
193
+ log.debug({ count: rows.length }, "Inserted edge rows");
194
+ });
195
+ }
196
+ async function queryEdgesFrom(edgesTable, fromChunkId) {
197
+ const escaped = fromChunkId.replace(/'/g, "''");
198
+ return edgesTable.query().where(`from_chunk_id = '${escaped}'`).toArray();
139
199
  }
140
200
 
141
201
  export {
202
+ withWriteLock,
142
203
  openDatabase,
143
204
  openOrCreateChunkTable,
144
205
  insertChunks,
@@ -147,5 +208,8 @@ export {
147
208
  writeIndexState,
148
209
  readFileHashes,
149
210
  writeFileHashes,
150
- deleteChunksByFilePath
211
+ deleteChunksByFilePath,
212
+ openOrCreateEdgesTable,
213
+ insertEdges,
214
+ queryEdgesFrom
151
215
  };
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  childLogger
4
- } from "./chunk-EEC7KYPY.js";
4
+ } from "./chunk-TXLCXXKY.js";
5
5
 
6
6
  // src/services/tokenCounter.ts
7
7
  import { countTokens } from "@anthropic-ai/tokenizer";
@@ -36,5 +36,6 @@ function assembleContext(chunks, opts) {
36
36
 
37
37
  export {
38
38
  countChunkTokens,
39
+ formatChunk,
39
40
  assembleContext
40
41
  };
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/lib/format.ts
4
+ import dedent from "dedent";
5
+ function formatTokenSavings(input) {
6
+ const fileSuffix = input.filesInContext !== 1 ? "s" : "";
7
+ return [
8
+ `Tokens sent to Claude: ${input.tokensSent.toLocaleString()}`,
9
+ `Estimated without: ~${input.estimatedWithout.toLocaleString()} (${input.filesInContext} file${fileSuffix} + overhead)`,
10
+ `Reduction: ${input.reductionPct}%`
11
+ ].join("\n");
12
+ }
13
+
14
+ export {
15
+ formatTokenSavings
16
+ };
@@ -1,29 +1,32 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  countChunkTokens
4
- } from "./chunk-ZKVZTDND.js";
5
- import {
6
- formatTokenSavings
7
- } from "./chunk-ZGYLHFHJ.js";
8
- import {
9
- embedBatchWithRetry
10
- } from "./chunk-KQZSBRRH.js";
11
- import {
12
- isOllamaRunning
13
- } from "./chunk-FQL4HV4R.js";
4
+ } from "./chunk-4IOR54GU.js";
14
5
  import {
15
6
  createVectorIndexIfNeeded,
16
7
  deleteChunksByFilePath,
17
8
  insertChunks,
9
+ insertEdges,
18
10
  openDatabase,
19
11
  openOrCreateChunkTable,
12
+ openOrCreateEdgesTable,
20
13
  readFileHashes,
14
+ withWriteLock,
21
15
  writeFileHashes,
22
16
  writeIndexState
23
- } from "./chunk-Y7BU7IYX.js";
17
+ } from "./chunk-3HQRTLBH.js";
18
+ import {
19
+ formatTokenSavings
20
+ } from "./chunk-6C2OYMKD.js";
21
+ import {
22
+ embedBatchWithRetry
23
+ } from "./chunk-RKPICQU7.js";
24
+ import {
25
+ isOllamaRunning
26
+ } from "./chunk-HRJ3OT6Q.js";
24
27
  import {
25
28
  readProfile
26
- } from "./chunk-PJQNHMQH.js";
29
+ } from "./chunk-DPH5X5HL.js";
27
30
  import {
28
31
  DEFAULT_BATCH_SIZE,
29
32
  DEFAULT_EMBEDDING_DIMENSION,
@@ -32,11 +35,11 @@ import {
32
35
  FILE_READ_CONCURRENCY,
33
36
  childLogger,
34
37
  setLogLevel
35
- } from "./chunk-EEC7KYPY.js";
38
+ } from "./chunk-TXLCXXKY.js";
36
39
 
37
40
  // src/workflows/index.ts
38
- import { resolve } from "path";
39
- import { readFile as readFile2 } from "fs/promises";
41
+ import { resolve as resolve2 } from "path";
42
+ import { readFile as readFile3 } from "fs/promises";
40
43
  import { createHash } from "crypto";
41
44
 
42
45
  // src/services/crawler.ts
@@ -73,13 +76,16 @@ var ALWAYS_EXCLUDE_GLOBS = [
73
76
  "**/Cargo.lock",
74
77
  "**/*.min.js"
75
78
  ];
76
- async function crawlSourceFiles(rootDir) {
79
+ async function crawlSourceFiles(rootDir, opts) {
77
80
  const ig = ignore();
78
81
  try {
79
82
  const gitignoreContent = await readFile(`${rootDir}/.gitignore`, "utf-8");
80
83
  ig.add(gitignoreContent);
81
84
  } catch {
82
85
  }
86
+ if (opts?.extraIgnorePatterns?.length) {
87
+ ig.add(opts.extraIgnorePatterns);
88
+ }
83
89
  const files = await fg("**/*", {
84
90
  cwd: rootDir,
85
91
  absolute: true,
@@ -98,7 +104,7 @@ async function crawlSourceFiles(rootDir) {
98
104
 
99
105
  // src/services/chunker.ts
100
106
  import { createRequire } from "module";
101
- import { extname as extname2 } from "path";
107
+ import { extname as extname2, resolve, dirname } from "path";
102
108
  var _require = createRequire(import.meta.url);
103
109
  var Parser = _require("tree-sitter");
104
110
  var { typescript: tsLang, tsx: tsxLang } = _require("tree-sitter-typescript");
@@ -203,7 +209,7 @@ function chunkFile(filePath, content) {
203
209
  const ext = extname2(filePath);
204
210
  const lang = LANGUAGE_MAP[ext];
205
211
  if (!lang) {
206
- return [];
212
+ return { chunks: [], edges: [] };
207
213
  }
208
214
  const category = getLanguageCategory(ext);
209
215
  const nodeTypes = CHUNK_NODE_TYPES[category];
@@ -211,7 +217,50 @@ function chunkFile(filePath, content) {
211
217
  parser.setLanguage(lang);
212
218
  const tree = parser.parse(content);
213
219
  const chunks = [];
220
+ const edges = [];
221
+ let currentChunkId = null;
222
+ let currentSymbol = null;
214
223
  for (const node of walkNodes(tree.rootNode)) {
224
+ if (node.type === "call_expression") {
225
+ const funcNode = node.childForFieldName("function");
226
+ if (funcNode) {
227
+ let toSymbol = null;
228
+ if (funcNode.type === "identifier") {
229
+ toSymbol = funcNode.text;
230
+ } else if (funcNode.type === "member_expression" || funcNode.type === "optional_member_expression") {
231
+ toSymbol = funcNode.childForFieldName("property")?.text ?? null;
232
+ }
233
+ if (toSymbol) {
234
+ const chunkId = currentChunkId ?? `${filePath}:0`;
235
+ const symbol = currentSymbol;
236
+ edges.push({
237
+ fromChunkId: chunkId,
238
+ fromFile: filePath,
239
+ fromSymbol: symbol,
240
+ toSymbol,
241
+ toFile: null,
242
+ // Resolved at query time, not index time
243
+ edgeType: "call"
244
+ });
245
+ }
246
+ }
247
+ }
248
+ if (node.type === "import_statement") {
249
+ const source = node.childForFieldName("source");
250
+ if (source) {
251
+ const raw = source.text.replace(/['"]/g, "");
252
+ const isRelative = raw.startsWith("./") || raw.startsWith("../");
253
+ const toFile = isRelative ? resolve(dirname(filePath), raw) : null;
254
+ edges.push({
255
+ fromChunkId: `${filePath}:0`,
256
+ fromFile: filePath,
257
+ fromSymbol: null,
258
+ toSymbol: raw,
259
+ toFile,
260
+ edgeType: "import"
261
+ });
262
+ }
263
+ }
215
264
  if (!nodeTypes.has(node.type)) {
216
265
  continue;
217
266
  }
@@ -237,6 +286,8 @@ function chunkFile(filePath, content) {
237
286
  startLine: node.startPosition.row + 1,
238
287
  endLine: node.endPosition.row + 1
239
288
  });
289
+ currentChunkId = `${filePath}:${node.startPosition.row}`;
290
+ currentSymbol = extractName(node);
240
291
  }
241
292
  if (chunks.length === 0) {
242
293
  chunks.push({
@@ -250,8 +301,20 @@ function chunkFile(filePath, content) {
250
301
  endLine: content.split("\n").length
251
302
  });
252
303
  }
253
- log2.debug({ filePath, chunkCount: chunks.length }, "File chunked");
254
- return chunks;
304
+ log2.debug({ filePath, chunkCount: chunks.length, edgeCount: edges.length }, "File chunked");
305
+ return { chunks, edges };
306
+ }
307
+
308
+ // src/services/ignorePatterns.ts
309
+ import { readFile as readFile2 } from "fs/promises";
310
+ import { join } from "path";
311
+ async function loadIgnorePatterns(rootDir) {
312
+ try {
313
+ const content = await readFile2(join(rootDir, ".braincacheignore"), "utf-8");
314
+ return content.split("\n").filter((line) => line.trim() !== "" && !line.startsWith("#"));
315
+ } catch {
316
+ return [];
317
+ }
255
318
  }
256
319
 
257
320
  // src/workflows/index.ts
@@ -271,7 +334,12 @@ async function runIndex(targetPath, opts) {
271
334
  return originalStderrWrite(chunk, ...args);
272
335
  });
273
336
  try {
274
- const rootDir = resolve(targetPath ?? ".");
337
+ const rootDir = resolve2(targetPath ?? ".");
338
+ const ignorePatterns = await loadIgnorePatterns(rootDir);
339
+ if (ignorePatterns.length > 0) {
340
+ process.stderr.write(`brain-cache: loaded ${ignorePatterns.length} patterns from .braincacheignore
341
+ `);
342
+ }
275
343
  const profile = await readProfile();
276
344
  if (profile === null) {
277
345
  throw new Error("No profile found. Run 'brain-cache init' first.");
@@ -289,7 +357,10 @@ async function runIndex(targetPath, opts) {
289
357
  }
290
358
  const db = await openDatabase(rootDir);
291
359
  const table = await openOrCreateChunkTable(db, rootDir, profile.embeddingModel, dim);
292
- const files = await crawlSourceFiles(rootDir);
360
+ const edgesTable = await openOrCreateEdgesTable(db);
361
+ const files = await crawlSourceFiles(rootDir, {
362
+ extraIgnorePatterns: ignorePatterns.length > 0 ? ignorePatterns : void 0
363
+ });
293
364
  process.stderr.write(`brain-cache: found ${files.length} source files
294
365
  `);
295
366
  if (files.length === 0) {
@@ -303,7 +374,7 @@ async function runIndex(targetPath, opts) {
303
374
  const group = files.slice(groupStart, groupStart + FILE_READ_CONCURRENCY);
304
375
  const results = await Promise.all(
305
376
  group.map(async (filePath) => {
306
- const content = await readFile2(filePath, "utf-8");
377
+ const content = await readFile3(filePath, "utf-8");
307
378
  return { filePath, content, hash: hashContent(content) };
308
379
  })
309
380
  );
@@ -339,6 +410,10 @@ async function runIndex(targetPath, opts) {
339
410
  );
340
411
  for (const filePath of [...removedFiles, ...changedFiles]) {
341
412
  await deleteChunksByFilePath(table, filePath);
413
+ await withWriteLock(async () => {
414
+ const escaped = filePath.replace(/'/g, "''");
415
+ await edgesTable.delete(`from_file = '${escaped}'`);
416
+ });
342
417
  }
343
418
  const updatedHashes = { ...storedHashes };
344
419
  for (const filePath of removedFiles) {
@@ -386,11 +461,13 @@ async function runIndex(targetPath, opts) {
386
461
  for (let groupStart = 0; groupStart < filesToProcess.length; groupStart += FILE_READ_CONCURRENCY) {
387
462
  const group = filesToProcess.slice(groupStart, groupStart + FILE_READ_CONCURRENCY);
388
463
  const groupChunks = [];
464
+ const groupEdges = [];
389
465
  for (const filePath of group) {
390
466
  const content = contentMap.get(filePath);
391
467
  totalRawTokens += countChunkTokens(content);
392
- const chunks = chunkFile(filePath, content);
468
+ const { chunks, edges } = chunkFile(filePath, content);
393
469
  groupChunks.push(...chunks);
470
+ groupEdges.push(...edges);
394
471
  }
395
472
  processedFiles += group.length;
396
473
  totalChunks += groupChunks.length;
@@ -431,6 +508,9 @@ async function runIndex(targetPath, opts) {
431
508
  `
432
509
  );
433
510
  }
511
+ if (groupEdges.length > 0) {
512
+ await insertEdges(edgesTable, groupEdges);
513
+ }
434
514
  }
435
515
  if (skippedChunks > 0) {
436
516
  process.stderr.write(`brain-cache: ${skippedChunks} chunks skipped (too large for model context)
@@ -440,6 +520,14 @@ async function runIndex(targetPath, opts) {
440
520
  `brain-cache: ${totalChunks} chunks from ${filesToProcess.length} files
441
521
  `
442
522
  );
523
+ const edgeCount = await edgesTable.countRows();
524
+ if (edgeCount === 0) {
525
+ process.stderr.write(`brain-cache: no call edges extracted \u2014 check source files
526
+ `);
527
+ } else {
528
+ process.stderr.write(`brain-cache: ${edgeCount} call/import edges stored
529
+ `);
530
+ }
443
531
  await createVectorIndexIfNeeded(table, profile.embeddingModel);
444
532
  for (const filePath of filesToProcess) {
445
533
  updatedHashes[filePath] = currentHashes[filePath];
@@ -480,6 +568,9 @@ ${savingsBlock}
480
568
  process.stderr.write = originalStderrWrite;
481
569
  }
482
570
  }
571
+
483
572
  export {
573
+ ALWAYS_EXCLUDE_GLOBS,
574
+ loadIgnorePatterns,
484
575
  runIndex
485
576
  };
@@ -0,0 +1,171 @@
1
+ #!/usr/bin/env node
2
+ import {
3
+ HIGH_RELEVANCE_SIMILARITY_THRESHOLD,
4
+ childLogger
5
+ } from "./chunk-TXLCXXKY.js";
6
+
7
+ // src/services/retriever.ts
8
+ var log = childLogger("retriever");
9
+ var TRACE_KEYWORDS = [
10
+ "trace the",
11
+ "trace flow",
12
+ "call path",
13
+ "flow of",
14
+ "follows from",
15
+ "calls into",
16
+ "invokes",
17
+ "trace from"
18
+ ];
19
+ var TRACE_REGEX = /how does\b.*\bflow\b/i;
20
+ var LOOKUP_BIGRAMS = [
21
+ "stack trace",
22
+ "null pointer",
23
+ "not defined",
24
+ "type error",
25
+ "reference error",
26
+ "syntax error",
27
+ "runtime error",
28
+ "segmentation fault",
29
+ "not working",
30
+ "throws exception"
31
+ ];
32
+ var LOOKUP_KEYWORDS = [
33
+ "where is",
34
+ "find the",
35
+ "definition of",
36
+ "signature of",
37
+ "show me the",
38
+ "what does",
39
+ "what is the type"
40
+ ];
41
+ var EXPLORE_EXCLUSIONS = [
42
+ "error handler",
43
+ "error handling",
44
+ "error boundary",
45
+ "error type",
46
+ "error message",
47
+ "error code",
48
+ "error class",
49
+ "null object",
50
+ "null check",
51
+ "null pattern",
52
+ "undefined behavior",
53
+ "fix the style",
54
+ "fix the format",
55
+ "fix the lint",
56
+ "fix the config",
57
+ "fix the setup"
58
+ ];
59
+ function classifyRetrievalMode(query) {
60
+ const lower = query.toLowerCase();
61
+ if (TRACE_KEYWORDS.some((kw) => lower.includes(kw)) || TRACE_REGEX.test(lower)) {
62
+ const broadTerms = ["architecture", "overview", "structure", "system", "design", "pipeline", "codebase"];
63
+ const isBroad = broadTerms.some((t) => lower.includes(t));
64
+ if (!isBroad) {
65
+ return "trace";
66
+ }
67
+ }
68
+ if (LOOKUP_BIGRAMS.some((bg) => lower.includes(bg))) {
69
+ return "lookup";
70
+ }
71
+ const hasLookupKeyword = LOOKUP_KEYWORDS.some((kw) => lower.includes(kw));
72
+ if (hasLookupKeyword) {
73
+ const isExcluded = EXPLORE_EXCLUSIONS.some((ex) => lower.includes(ex));
74
+ if (!isExcluded) {
75
+ return "lookup";
76
+ }
77
+ }
78
+ return "explore";
79
+ }
80
+ var RETRIEVAL_STRATEGIES = {
81
+ lookup: { limit: 5, distanceThreshold: 0.4, keywordBoostWeight: 0.4 },
82
+ trace: { limit: 3, distanceThreshold: 0.5, keywordBoostWeight: 0.2 },
83
+ explore: { limit: 20, distanceThreshold: 0.6, keywordBoostWeight: 0.1 }
84
+ };
85
+ function extractQueryTokens(query) {
86
+ return query.toLowerCase().split(/[\s.,;:!?'"()\[\]{}/\\]+/).filter((t) => t.length >= 3);
87
+ }
88
+ function splitCamelCase(name) {
89
+ return name.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
90
+ }
91
+ function computeKeywordBoost(chunk, queryTokens) {
92
+ if (queryTokens.length === 0) return 0;
93
+ const fileName = chunk.filePath.split("/").pop()?.toLowerCase() ?? "";
94
+ const fileNameStem = fileName.replace(/\.[^.]+$/, "");
95
+ const chunkName = (chunk.name ?? "").toLowerCase();
96
+ if (chunkName.length > 0 && queryTokens.some((t) => t === chunkName)) {
97
+ return 1;
98
+ }
99
+ const subTokens = chunkName.length > 0 ? splitCamelCase(chunkName) : [];
100
+ if (subTokens.length > 1 && subTokens.every((sub) => queryTokens.some((t) => t.includes(sub) || sub.includes(t)))) {
101
+ return 1;
102
+ }
103
+ if (fileNameStem.length > 0 && queryTokens.some((t) => t === fileNameStem)) {
104
+ return 0.6;
105
+ }
106
+ const target = `${fileName} ${chunkName}`;
107
+ const matchCount = queryTokens.filter((t) => target.includes(t)).length;
108
+ return matchCount / queryTokens.length;
109
+ }
110
+ var CONFIG_NOISE_PATTERNS = [
111
+ { pattern: /^vitest\.config\./, toolName: "vitest" },
112
+ { pattern: /^tsup\.config\./, toolName: "tsup" },
113
+ { pattern: /^tsconfig.*\.json$/, toolName: "tsconfig" },
114
+ { pattern: /^jest\.config\./, toolName: "jest" },
115
+ { pattern: /^eslint\.config\./, toolName: "eslint" },
116
+ { pattern: /^\.eslintrc/, toolName: "eslint" }
117
+ ];
118
+ var CONFIG_FILE_NOISE_PENALTY = 0.15;
119
+ function computeNoisePenalty(chunk, query) {
120
+ const fileName = chunk.filePath.split("/").pop() ?? "";
121
+ const lowerQuery = query.toLowerCase();
122
+ for (const { pattern, toolName } of CONFIG_NOISE_PATTERNS) {
123
+ if (pattern.test(fileName)) {
124
+ if (lowerQuery.includes(toolName)) return 0;
125
+ return CONFIG_FILE_NOISE_PENALTY;
126
+ }
127
+ }
128
+ return 0;
129
+ }
130
+ async function searchChunks(table, queryVector, opts, query) {
131
+ log.debug({ limit: opts.limit, distanceThreshold: opts.distanceThreshold }, "Searching chunks");
132
+ const rows = await table.query().nearestTo(queryVector).distanceType("cosine").limit(opts.limit).toArray();
133
+ const queryTokens = query ? extractQueryTokens(query) : [];
134
+ const chunks = rows.filter((r) => r._distance <= opts.distanceThreshold).map((r) => ({
135
+ id: r.id,
136
+ filePath: r.file_path,
137
+ chunkType: r.chunk_type,
138
+ scope: r.scope,
139
+ name: r.name,
140
+ content: r.content,
141
+ startLine: r.start_line,
142
+ endLine: r.end_line,
143
+ similarity: 1 - r._distance
144
+ }));
145
+ if (queryTokens.length > 0) {
146
+ const boostWeight = opts.keywordBoostWeight ?? 0.1;
147
+ const scored = chunks.map((chunk) => {
148
+ const boost = computeKeywordBoost(chunk, queryTokens);
149
+ const score = chunk.similarity * (1 - boostWeight) + boost * boostWeight - computeNoisePenalty(chunk, query);
150
+ const promotedSimilarity = boost > 0 ? Math.max(chunk.similarity, HIGH_RELEVANCE_SIMILARITY_THRESHOLD) : chunk.similarity;
151
+ return { chunk: { ...chunk, similarity: promotedSimilarity }, score };
152
+ });
153
+ return scored.sort((a, b) => b.score - a.score).map(({ chunk }) => chunk);
154
+ }
155
+ return chunks.sort((a, b) => b.similarity - a.similarity);
156
+ }
157
+ function deduplicateChunks(chunks) {
158
+ const seen = /* @__PURE__ */ new Set();
159
+ return chunks.filter((c) => {
160
+ if (seen.has(c.id)) return false;
161
+ seen.add(c.id);
162
+ return true;
163
+ });
164
+ }
165
+
166
+ export {
167
+ classifyRetrievalMode,
168
+ RETRIEVAL_STRATEGIES,
169
+ searchChunks,
170
+ deduplicateChunks
171
+ };
@@ -3,7 +3,7 @@ import {
3
3
  GLOBAL_CONFIG_DIR,
4
4
  PROFILE_PATH,
5
5
  childLogger
6
- } from "./chunk-EEC7KYPY.js";
6
+ } from "./chunk-TXLCXXKY.js";
7
7
 
8
8
  // src/services/capability.ts
9
9
  import { execFile } from "child_process";
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  childLogger
4
- } from "./chunk-EEC7KYPY.js";
4
+ } from "./chunk-TXLCXXKY.js";
5
5
 
6
6
  // src/services/ollama.ts
7
7
  import { execFile, spawn } from "child_process";