brain-cache 0.4.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -0
- package/dist/{askCodebase-BZIXS3EV.js → askCodebase-EE32B7BP.js} +9 -9
- package/dist/buildContext-GWVDAYH6.js +14 -0
- package/dist/{chunk-Y7BU7IYX.js → chunk-3HQRTLBH.js} +70 -6
- package/dist/{chunk-ZKVZTDND.js → chunk-4IOR54GU.js} +2 -1
- package/dist/chunk-6C2OYMKD.js +16 -0
- package/dist/{workflows-KYCBR7TC.js → chunk-CY34XQ2O.js} +115 -24
- package/dist/chunk-DFFMV3RR.js +171 -0
- package/dist/{chunk-PJQNHMQH.js → chunk-DPH5X5HL.js} +1 -1
- package/dist/{chunk-FQL4HV4R.js → chunk-HRJ3OT6Q.js} +1 -1
- package/dist/chunk-KMRPAVMM.js +967 -0
- package/dist/{chunk-KQZSBRRH.js → chunk-RKPICQU7.js} +1 -1
- package/dist/{chunk-EEC7KYPY.js → chunk-TXLCXXKY.js} +7 -8
- package/dist/claude-md-section-O5LMKH4O.js +62 -0
- package/dist/cli.js +13 -9
- package/dist/{doctor-KRNLXE4R.js → doctor-FCET2MNJ.js} +3 -3
- package/dist/{embedder-ZLHAZZUI.js → embedder-HVEXDJAU.js} +2 -2
- package/dist/{init-WTWOJ55U.js → init-BCMT64T2.js} +56 -6
- package/dist/mcp.js +1450 -130
- package/dist/{search-O4CFAH45.js → search-7ISZ7EXI.js} +16 -15
- package/dist/{status-7MT4IROA.js → status-VKTSG2SN.js} +3 -3
- package/dist/statusline-script-NFUDFOWK.js +95 -0
- package/dist/watch-QPMAB62P.js +128 -0
- package/dist/workflows-MWEY7OAI.js +14 -0
- package/package.json +4 -1
- package/dist/buildContext-APWOPZMJ.js +0 -14
- package/dist/chunk-JZQWPHAQ.js +0 -103
- package/dist/chunk-SBSMKI4B.js +0 -109
- package/dist/chunk-ZGYLHFHJ.js +0 -17
- package/dist/claude-md-section-S6N6WV5J.js +0 -34
package/README.md
CHANGED
|
@@ -119,6 +119,19 @@ When using brain-cache build_context, include the token savings summary from the
|
|
|
119
119
|
|
|
120
120
|
This keeps it transparent and under your control.
|
|
121
121
|
|
|
122
|
+
## 🎛 Tuning how much Claude uses brain-cache
|
|
123
|
+
|
|
124
|
+
`brain-cache init` adds a section to your project's `CLAUDE.md` with clear instructions to use brain-cache tools first. This works well for most users.
|
|
125
|
+
|
|
126
|
+
If you want to go further, you can strengthen the language yourself. For example:
|
|
127
|
+
|
|
128
|
+
```
|
|
129
|
+
ALWAYS use brain-cache build_context before reading files or using Grep/Glob.
|
|
130
|
+
Do not skip brain-cache tools — they return better results with fewer tokens.
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Or soften it if you prefer Claude to decide on its own. It's your `CLAUDE.md` — edit it to match how you want to work.
|
|
134
|
+
|
|
122
135
|
---
|
|
123
136
|
|
|
124
137
|
## 🧩 Core capabilities
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
runBuildContext
|
|
4
|
-
} from "./chunk-
|
|
5
|
-
import "./chunk-
|
|
4
|
+
} from "./chunk-KMRPAVMM.js";
|
|
5
|
+
import "./chunk-DFFMV3RR.js";
|
|
6
|
+
import "./chunk-4IOR54GU.js";
|
|
7
|
+
import "./chunk-3HQRTLBH.js";
|
|
6
8
|
import {
|
|
7
9
|
formatTokenSavings
|
|
8
|
-
} from "./chunk-
|
|
9
|
-
import "./chunk-
|
|
10
|
-
import "./chunk-
|
|
11
|
-
import "./chunk-
|
|
12
|
-
import "./chunk-Y7BU7IYX.js";
|
|
13
|
-
import "./chunk-PJQNHMQH.js";
|
|
10
|
+
} from "./chunk-6C2OYMKD.js";
|
|
11
|
+
import "./chunk-RKPICQU7.js";
|
|
12
|
+
import "./chunk-HRJ3OT6Q.js";
|
|
13
|
+
import "./chunk-DPH5X5HL.js";
|
|
14
14
|
import {
|
|
15
15
|
childLogger
|
|
16
|
-
} from "./chunk-
|
|
16
|
+
} from "./chunk-TXLCXXKY.js";
|
|
17
17
|
|
|
18
18
|
// src/workflows/askCodebase.ts
|
|
19
19
|
import Anthropic from "@anthropic-ai/sdk";
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
runBuildContext
|
|
4
|
+
} from "./chunk-KMRPAVMM.js";
|
|
5
|
+
import "./chunk-DFFMV3RR.js";
|
|
6
|
+
import "./chunk-4IOR54GU.js";
|
|
7
|
+
import "./chunk-3HQRTLBH.js";
|
|
8
|
+
import "./chunk-RKPICQU7.js";
|
|
9
|
+
import "./chunk-HRJ3OT6Q.js";
|
|
10
|
+
import "./chunk-DPH5X5HL.js";
|
|
11
|
+
import "./chunk-TXLCXXKY.js";
|
|
12
|
+
export {
|
|
13
|
+
runBuildContext
|
|
14
|
+
};
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
IndexStateSchema
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-DPH5X5HL.js";
|
|
5
5
|
import {
|
|
6
6
|
DEFAULT_EMBEDDING_DIMENSION,
|
|
7
7
|
EMBEDDING_DIMENSIONS,
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
PROJECT_DATA_DIR,
|
|
10
10
|
VECTOR_INDEX_THRESHOLD,
|
|
11
11
|
childLogger
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-TXLCXXKY.js";
|
|
13
13
|
|
|
14
14
|
// src/services/lancedb.ts
|
|
15
15
|
import * as lancedb from "@lancedb/lancedb";
|
|
@@ -18,6 +18,12 @@ import { Schema, Field, Utf8, Int32, Float32, FixedSizeList } from "apache-arrow
|
|
|
18
18
|
import { join } from "path";
|
|
19
19
|
import { readFile, writeFile, mkdir } from "fs/promises";
|
|
20
20
|
var log = childLogger("lancedb");
|
|
21
|
+
var _writeMutex = Promise.resolve();
|
|
22
|
+
function withWriteLock(fn) {
|
|
23
|
+
const next = _writeMutex.then(() => fn());
|
|
24
|
+
_writeMutex = next.then(() => void 0, () => void 0);
|
|
25
|
+
return next;
|
|
26
|
+
}
|
|
21
27
|
function chunkSchema(dim) {
|
|
22
28
|
return new Schema([
|
|
23
29
|
new Field("id", new Utf8(), false),
|
|
@@ -35,6 +41,16 @@ function chunkSchema(dim) {
|
|
|
35
41
|
)
|
|
36
42
|
]);
|
|
37
43
|
}
|
|
44
|
+
function edgeSchema() {
|
|
45
|
+
return new Schema([
|
|
46
|
+
new Field("from_chunk_id", new Utf8(), false),
|
|
47
|
+
new Field("from_file", new Utf8(), false),
|
|
48
|
+
new Field("from_symbol", new Utf8(), true),
|
|
49
|
+
new Field("to_symbol", new Utf8(), false),
|
|
50
|
+
new Field("to_file", new Utf8(), true),
|
|
51
|
+
new Field("edge_type", new Utf8(), false)
|
|
52
|
+
]);
|
|
53
|
+
}
|
|
38
54
|
async function openDatabase(projectRoot) {
|
|
39
55
|
const dataDir = join(projectRoot, PROJECT_DATA_DIR);
|
|
40
56
|
await mkdir(dataDir, { recursive: true });
|
|
@@ -52,6 +68,10 @@ async function openOrCreateChunkTable(db, projectRoot, model, dim) {
|
|
|
52
68
|
"Embedding model or dimension changed \u2014 dropping and recreating chunks table"
|
|
53
69
|
);
|
|
54
70
|
await db.dropTable("chunks");
|
|
71
|
+
if (tableNames.includes("edges")) {
|
|
72
|
+
await db.dropTable("edges");
|
|
73
|
+
log.warn("Also dropped edges table (stale chunk IDs)");
|
|
74
|
+
}
|
|
55
75
|
} else {
|
|
56
76
|
log.info({ model, dim }, "Opened existing chunks table");
|
|
57
77
|
return db.openTable("chunks");
|
|
@@ -67,8 +87,10 @@ async function insertChunks(table, rows) {
|
|
|
67
87
|
if (rows.length === 0) {
|
|
68
88
|
return;
|
|
69
89
|
}
|
|
70
|
-
await
|
|
71
|
-
|
|
90
|
+
await withWriteLock(async () => {
|
|
91
|
+
await table.add(rows);
|
|
92
|
+
log.debug({ count: rows.length }, "Inserted chunk rows");
|
|
93
|
+
});
|
|
72
94
|
}
|
|
73
95
|
async function createVectorIndexIfNeeded(table, embeddingModel) {
|
|
74
96
|
const rowCount = await table.countRows();
|
|
@@ -135,10 +157,49 @@ async function writeFileHashes(projectRoot, hashes) {
|
|
|
135
157
|
}
|
|
136
158
|
async function deleteChunksByFilePath(table, filePath) {
|
|
137
159
|
const escaped = filePath.replace(/'/g, "''");
|
|
138
|
-
await
|
|
160
|
+
await withWriteLock(async () => {
|
|
161
|
+
await table.delete(`file_path = '${escaped}'`);
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
async function openOrCreateEdgesTable(db, opts) {
|
|
165
|
+
const tableNames = await db.tableNames();
|
|
166
|
+
if (tableNames.includes("edges")) {
|
|
167
|
+
if (opts?.shouldReset) {
|
|
168
|
+
log.warn("Resetting edges table (chunks table was recreated)");
|
|
169
|
+
await db.dropTable("edges");
|
|
170
|
+
} else {
|
|
171
|
+
log.info("Opened existing edges table");
|
|
172
|
+
return db.openTable("edges");
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
const schema = edgeSchema();
|
|
176
|
+
const emptyData = lancedb.makeArrowTable([], { schema });
|
|
177
|
+
const table = await db.createTable("edges", emptyData, { mode: "overwrite" });
|
|
178
|
+
log.info("Created new edges table");
|
|
179
|
+
return table;
|
|
180
|
+
}
|
|
181
|
+
async function insertEdges(table, edges) {
|
|
182
|
+
if (edges.length === 0) return;
|
|
183
|
+
const rows = edges.map((e) => ({
|
|
184
|
+
from_chunk_id: e.fromChunkId,
|
|
185
|
+
from_file: e.fromFile,
|
|
186
|
+
from_symbol: e.fromSymbol,
|
|
187
|
+
to_symbol: e.toSymbol,
|
|
188
|
+
to_file: e.toFile,
|
|
189
|
+
edge_type: e.edgeType
|
|
190
|
+
}));
|
|
191
|
+
await withWriteLock(async () => {
|
|
192
|
+
await table.add(rows);
|
|
193
|
+
log.debug({ count: rows.length }, "Inserted edge rows");
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
async function queryEdgesFrom(edgesTable, fromChunkId) {
|
|
197
|
+
const escaped = fromChunkId.replace(/'/g, "''");
|
|
198
|
+
return edgesTable.query().where(`from_chunk_id = '${escaped}'`).toArray();
|
|
139
199
|
}
|
|
140
200
|
|
|
141
201
|
export {
|
|
202
|
+
withWriteLock,
|
|
142
203
|
openDatabase,
|
|
143
204
|
openOrCreateChunkTable,
|
|
144
205
|
insertChunks,
|
|
@@ -147,5 +208,8 @@ export {
|
|
|
147
208
|
writeIndexState,
|
|
148
209
|
readFileHashes,
|
|
149
210
|
writeFileHashes,
|
|
150
|
-
deleteChunksByFilePath
|
|
211
|
+
deleteChunksByFilePath,
|
|
212
|
+
openOrCreateEdgesTable,
|
|
213
|
+
insertEdges,
|
|
214
|
+
queryEdgesFrom
|
|
151
215
|
};
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
childLogger
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-TXLCXXKY.js";
|
|
5
5
|
|
|
6
6
|
// src/services/tokenCounter.ts
|
|
7
7
|
import { countTokens } from "@anthropic-ai/tokenizer";
|
|
@@ -36,5 +36,6 @@ function assembleContext(chunks, opts) {
|
|
|
36
36
|
|
|
37
37
|
export {
|
|
38
38
|
countChunkTokens,
|
|
39
|
+
formatChunk,
|
|
39
40
|
assembleContext
|
|
40
41
|
};
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// src/lib/format.ts
|
|
4
|
+
import dedent from "dedent";
|
|
5
|
+
function formatTokenSavings(input) {
|
|
6
|
+
const fileSuffix = input.filesInContext !== 1 ? "s" : "";
|
|
7
|
+
return [
|
|
8
|
+
`Tokens sent to Claude: ${input.tokensSent.toLocaleString()}`,
|
|
9
|
+
`Estimated without: ~${input.estimatedWithout.toLocaleString()} (${input.filesInContext} file${fileSuffix} + overhead)`,
|
|
10
|
+
`Reduction: ${input.reductionPct}%`
|
|
11
|
+
].join("\n");
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export {
|
|
15
|
+
formatTokenSavings
|
|
16
|
+
};
|
|
@@ -1,29 +1,32 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
countChunkTokens
|
|
4
|
-
} from "./chunk-
|
|
5
|
-
import {
|
|
6
|
-
formatTokenSavings
|
|
7
|
-
} from "./chunk-ZGYLHFHJ.js";
|
|
8
|
-
import {
|
|
9
|
-
embedBatchWithRetry
|
|
10
|
-
} from "./chunk-KQZSBRRH.js";
|
|
11
|
-
import {
|
|
12
|
-
isOllamaRunning
|
|
13
|
-
} from "./chunk-FQL4HV4R.js";
|
|
4
|
+
} from "./chunk-4IOR54GU.js";
|
|
14
5
|
import {
|
|
15
6
|
createVectorIndexIfNeeded,
|
|
16
7
|
deleteChunksByFilePath,
|
|
17
8
|
insertChunks,
|
|
9
|
+
insertEdges,
|
|
18
10
|
openDatabase,
|
|
19
11
|
openOrCreateChunkTable,
|
|
12
|
+
openOrCreateEdgesTable,
|
|
20
13
|
readFileHashes,
|
|
14
|
+
withWriteLock,
|
|
21
15
|
writeFileHashes,
|
|
22
16
|
writeIndexState
|
|
23
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-3HQRTLBH.js";
|
|
18
|
+
import {
|
|
19
|
+
formatTokenSavings
|
|
20
|
+
} from "./chunk-6C2OYMKD.js";
|
|
21
|
+
import {
|
|
22
|
+
embedBatchWithRetry
|
|
23
|
+
} from "./chunk-RKPICQU7.js";
|
|
24
|
+
import {
|
|
25
|
+
isOllamaRunning
|
|
26
|
+
} from "./chunk-HRJ3OT6Q.js";
|
|
24
27
|
import {
|
|
25
28
|
readProfile
|
|
26
|
-
} from "./chunk-
|
|
29
|
+
} from "./chunk-DPH5X5HL.js";
|
|
27
30
|
import {
|
|
28
31
|
DEFAULT_BATCH_SIZE,
|
|
29
32
|
DEFAULT_EMBEDDING_DIMENSION,
|
|
@@ -32,11 +35,11 @@ import {
|
|
|
32
35
|
FILE_READ_CONCURRENCY,
|
|
33
36
|
childLogger,
|
|
34
37
|
setLogLevel
|
|
35
|
-
} from "./chunk-
|
|
38
|
+
} from "./chunk-TXLCXXKY.js";
|
|
36
39
|
|
|
37
40
|
// src/workflows/index.ts
|
|
38
|
-
import { resolve } from "path";
|
|
39
|
-
import { readFile as
|
|
41
|
+
import { resolve as resolve2 } from "path";
|
|
42
|
+
import { readFile as readFile3 } from "fs/promises";
|
|
40
43
|
import { createHash } from "crypto";
|
|
41
44
|
|
|
42
45
|
// src/services/crawler.ts
|
|
@@ -73,13 +76,16 @@ var ALWAYS_EXCLUDE_GLOBS = [
|
|
|
73
76
|
"**/Cargo.lock",
|
|
74
77
|
"**/*.min.js"
|
|
75
78
|
];
|
|
76
|
-
async function crawlSourceFiles(rootDir) {
|
|
79
|
+
async function crawlSourceFiles(rootDir, opts) {
|
|
77
80
|
const ig = ignore();
|
|
78
81
|
try {
|
|
79
82
|
const gitignoreContent = await readFile(`${rootDir}/.gitignore`, "utf-8");
|
|
80
83
|
ig.add(gitignoreContent);
|
|
81
84
|
} catch {
|
|
82
85
|
}
|
|
86
|
+
if (opts?.extraIgnorePatterns?.length) {
|
|
87
|
+
ig.add(opts.extraIgnorePatterns);
|
|
88
|
+
}
|
|
83
89
|
const files = await fg("**/*", {
|
|
84
90
|
cwd: rootDir,
|
|
85
91
|
absolute: true,
|
|
@@ -98,7 +104,7 @@ async function crawlSourceFiles(rootDir) {
|
|
|
98
104
|
|
|
99
105
|
// src/services/chunker.ts
|
|
100
106
|
import { createRequire } from "module";
|
|
101
|
-
import { extname as extname2 } from "path";
|
|
107
|
+
import { extname as extname2, resolve, dirname } from "path";
|
|
102
108
|
var _require = createRequire(import.meta.url);
|
|
103
109
|
var Parser = _require("tree-sitter");
|
|
104
110
|
var { typescript: tsLang, tsx: tsxLang } = _require("tree-sitter-typescript");
|
|
@@ -203,7 +209,7 @@ function chunkFile(filePath, content) {
|
|
|
203
209
|
const ext = extname2(filePath);
|
|
204
210
|
const lang = LANGUAGE_MAP[ext];
|
|
205
211
|
if (!lang) {
|
|
206
|
-
return [];
|
|
212
|
+
return { chunks: [], edges: [] };
|
|
207
213
|
}
|
|
208
214
|
const category = getLanguageCategory(ext);
|
|
209
215
|
const nodeTypes = CHUNK_NODE_TYPES[category];
|
|
@@ -211,7 +217,50 @@ function chunkFile(filePath, content) {
|
|
|
211
217
|
parser.setLanguage(lang);
|
|
212
218
|
const tree = parser.parse(content);
|
|
213
219
|
const chunks = [];
|
|
220
|
+
const edges = [];
|
|
221
|
+
let currentChunkId = null;
|
|
222
|
+
let currentSymbol = null;
|
|
214
223
|
for (const node of walkNodes(tree.rootNode)) {
|
|
224
|
+
if (node.type === "call_expression") {
|
|
225
|
+
const funcNode = node.childForFieldName("function");
|
|
226
|
+
if (funcNode) {
|
|
227
|
+
let toSymbol = null;
|
|
228
|
+
if (funcNode.type === "identifier") {
|
|
229
|
+
toSymbol = funcNode.text;
|
|
230
|
+
} else if (funcNode.type === "member_expression" || funcNode.type === "optional_member_expression") {
|
|
231
|
+
toSymbol = funcNode.childForFieldName("property")?.text ?? null;
|
|
232
|
+
}
|
|
233
|
+
if (toSymbol) {
|
|
234
|
+
const chunkId = currentChunkId ?? `${filePath}:0`;
|
|
235
|
+
const symbol = currentSymbol;
|
|
236
|
+
edges.push({
|
|
237
|
+
fromChunkId: chunkId,
|
|
238
|
+
fromFile: filePath,
|
|
239
|
+
fromSymbol: symbol,
|
|
240
|
+
toSymbol,
|
|
241
|
+
toFile: null,
|
|
242
|
+
// Resolved at query time, not index time
|
|
243
|
+
edgeType: "call"
|
|
244
|
+
});
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
if (node.type === "import_statement") {
|
|
249
|
+
const source = node.childForFieldName("source");
|
|
250
|
+
if (source) {
|
|
251
|
+
const raw = source.text.replace(/['"]/g, "");
|
|
252
|
+
const isRelative = raw.startsWith("./") || raw.startsWith("../");
|
|
253
|
+
const toFile = isRelative ? resolve(dirname(filePath), raw) : null;
|
|
254
|
+
edges.push({
|
|
255
|
+
fromChunkId: `${filePath}:0`,
|
|
256
|
+
fromFile: filePath,
|
|
257
|
+
fromSymbol: null,
|
|
258
|
+
toSymbol: raw,
|
|
259
|
+
toFile,
|
|
260
|
+
edgeType: "import"
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
}
|
|
215
264
|
if (!nodeTypes.has(node.type)) {
|
|
216
265
|
continue;
|
|
217
266
|
}
|
|
@@ -237,6 +286,8 @@ function chunkFile(filePath, content) {
|
|
|
237
286
|
startLine: node.startPosition.row + 1,
|
|
238
287
|
endLine: node.endPosition.row + 1
|
|
239
288
|
});
|
|
289
|
+
currentChunkId = `${filePath}:${node.startPosition.row}`;
|
|
290
|
+
currentSymbol = extractName(node);
|
|
240
291
|
}
|
|
241
292
|
if (chunks.length === 0) {
|
|
242
293
|
chunks.push({
|
|
@@ -250,8 +301,20 @@ function chunkFile(filePath, content) {
|
|
|
250
301
|
endLine: content.split("\n").length
|
|
251
302
|
});
|
|
252
303
|
}
|
|
253
|
-
log2.debug({ filePath, chunkCount: chunks.length }, "File chunked");
|
|
254
|
-
return chunks;
|
|
304
|
+
log2.debug({ filePath, chunkCount: chunks.length, edgeCount: edges.length }, "File chunked");
|
|
305
|
+
return { chunks, edges };
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// src/services/ignorePatterns.ts
|
|
309
|
+
import { readFile as readFile2 } from "fs/promises";
|
|
310
|
+
import { join } from "path";
|
|
311
|
+
async function loadIgnorePatterns(rootDir) {
|
|
312
|
+
try {
|
|
313
|
+
const content = await readFile2(join(rootDir, ".braincacheignore"), "utf-8");
|
|
314
|
+
return content.split("\n").filter((line) => line.trim() !== "" && !line.startsWith("#"));
|
|
315
|
+
} catch {
|
|
316
|
+
return [];
|
|
317
|
+
}
|
|
255
318
|
}
|
|
256
319
|
|
|
257
320
|
// src/workflows/index.ts
|
|
@@ -271,7 +334,12 @@ async function runIndex(targetPath, opts) {
|
|
|
271
334
|
return originalStderrWrite(chunk, ...args);
|
|
272
335
|
});
|
|
273
336
|
try {
|
|
274
|
-
const rootDir =
|
|
337
|
+
const rootDir = resolve2(targetPath ?? ".");
|
|
338
|
+
const ignorePatterns = await loadIgnorePatterns(rootDir);
|
|
339
|
+
if (ignorePatterns.length > 0) {
|
|
340
|
+
process.stderr.write(`brain-cache: loaded ${ignorePatterns.length} patterns from .braincacheignore
|
|
341
|
+
`);
|
|
342
|
+
}
|
|
275
343
|
const profile = await readProfile();
|
|
276
344
|
if (profile === null) {
|
|
277
345
|
throw new Error("No profile found. Run 'brain-cache init' first.");
|
|
@@ -289,7 +357,10 @@ async function runIndex(targetPath, opts) {
|
|
|
289
357
|
}
|
|
290
358
|
const db = await openDatabase(rootDir);
|
|
291
359
|
const table = await openOrCreateChunkTable(db, rootDir, profile.embeddingModel, dim);
|
|
292
|
-
const
|
|
360
|
+
const edgesTable = await openOrCreateEdgesTable(db);
|
|
361
|
+
const files = await crawlSourceFiles(rootDir, {
|
|
362
|
+
extraIgnorePatterns: ignorePatterns.length > 0 ? ignorePatterns : void 0
|
|
363
|
+
});
|
|
293
364
|
process.stderr.write(`brain-cache: found ${files.length} source files
|
|
294
365
|
`);
|
|
295
366
|
if (files.length === 0) {
|
|
@@ -303,7 +374,7 @@ async function runIndex(targetPath, opts) {
|
|
|
303
374
|
const group = files.slice(groupStart, groupStart + FILE_READ_CONCURRENCY);
|
|
304
375
|
const results = await Promise.all(
|
|
305
376
|
group.map(async (filePath) => {
|
|
306
|
-
const content = await
|
|
377
|
+
const content = await readFile3(filePath, "utf-8");
|
|
307
378
|
return { filePath, content, hash: hashContent(content) };
|
|
308
379
|
})
|
|
309
380
|
);
|
|
@@ -339,6 +410,10 @@ async function runIndex(targetPath, opts) {
|
|
|
339
410
|
);
|
|
340
411
|
for (const filePath of [...removedFiles, ...changedFiles]) {
|
|
341
412
|
await deleteChunksByFilePath(table, filePath);
|
|
413
|
+
await withWriteLock(async () => {
|
|
414
|
+
const escaped = filePath.replace(/'/g, "''");
|
|
415
|
+
await edgesTable.delete(`from_file = '${escaped}'`);
|
|
416
|
+
});
|
|
342
417
|
}
|
|
343
418
|
const updatedHashes = { ...storedHashes };
|
|
344
419
|
for (const filePath of removedFiles) {
|
|
@@ -386,11 +461,13 @@ async function runIndex(targetPath, opts) {
|
|
|
386
461
|
for (let groupStart = 0; groupStart < filesToProcess.length; groupStart += FILE_READ_CONCURRENCY) {
|
|
387
462
|
const group = filesToProcess.slice(groupStart, groupStart + FILE_READ_CONCURRENCY);
|
|
388
463
|
const groupChunks = [];
|
|
464
|
+
const groupEdges = [];
|
|
389
465
|
for (const filePath of group) {
|
|
390
466
|
const content = contentMap.get(filePath);
|
|
391
467
|
totalRawTokens += countChunkTokens(content);
|
|
392
|
-
const chunks = chunkFile(filePath, content);
|
|
468
|
+
const { chunks, edges } = chunkFile(filePath, content);
|
|
393
469
|
groupChunks.push(...chunks);
|
|
470
|
+
groupEdges.push(...edges);
|
|
394
471
|
}
|
|
395
472
|
processedFiles += group.length;
|
|
396
473
|
totalChunks += groupChunks.length;
|
|
@@ -431,6 +508,9 @@ async function runIndex(targetPath, opts) {
|
|
|
431
508
|
`
|
|
432
509
|
);
|
|
433
510
|
}
|
|
511
|
+
if (groupEdges.length > 0) {
|
|
512
|
+
await insertEdges(edgesTable, groupEdges);
|
|
513
|
+
}
|
|
434
514
|
}
|
|
435
515
|
if (skippedChunks > 0) {
|
|
436
516
|
process.stderr.write(`brain-cache: ${skippedChunks} chunks skipped (too large for model context)
|
|
@@ -440,6 +520,14 @@ async function runIndex(targetPath, opts) {
|
|
|
440
520
|
`brain-cache: ${totalChunks} chunks from ${filesToProcess.length} files
|
|
441
521
|
`
|
|
442
522
|
);
|
|
523
|
+
const edgeCount = await edgesTable.countRows();
|
|
524
|
+
if (edgeCount === 0) {
|
|
525
|
+
process.stderr.write(`brain-cache: no call edges extracted \u2014 check source files
|
|
526
|
+
`);
|
|
527
|
+
} else {
|
|
528
|
+
process.stderr.write(`brain-cache: ${edgeCount} call/import edges stored
|
|
529
|
+
`);
|
|
530
|
+
}
|
|
443
531
|
await createVectorIndexIfNeeded(table, profile.embeddingModel);
|
|
444
532
|
for (const filePath of filesToProcess) {
|
|
445
533
|
updatedHashes[filePath] = currentHashes[filePath];
|
|
@@ -480,6 +568,9 @@ ${savingsBlock}
|
|
|
480
568
|
process.stderr.write = originalStderrWrite;
|
|
481
569
|
}
|
|
482
570
|
}
|
|
571
|
+
|
|
483
572
|
export {
|
|
573
|
+
ALWAYS_EXCLUDE_GLOBS,
|
|
574
|
+
loadIgnorePatterns,
|
|
484
575
|
runIndex
|
|
485
576
|
};
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
HIGH_RELEVANCE_SIMILARITY_THRESHOLD,
|
|
4
|
+
childLogger
|
|
5
|
+
} from "./chunk-TXLCXXKY.js";
|
|
6
|
+
|
|
7
|
+
// src/services/retriever.ts
|
|
8
|
+
var log = childLogger("retriever");
|
|
9
|
+
var TRACE_KEYWORDS = [
|
|
10
|
+
"trace the",
|
|
11
|
+
"trace flow",
|
|
12
|
+
"call path",
|
|
13
|
+
"flow of",
|
|
14
|
+
"follows from",
|
|
15
|
+
"calls into",
|
|
16
|
+
"invokes",
|
|
17
|
+
"trace from"
|
|
18
|
+
];
|
|
19
|
+
var TRACE_REGEX = /how does\b.*\bflow\b/i;
|
|
20
|
+
var LOOKUP_BIGRAMS = [
|
|
21
|
+
"stack trace",
|
|
22
|
+
"null pointer",
|
|
23
|
+
"not defined",
|
|
24
|
+
"type error",
|
|
25
|
+
"reference error",
|
|
26
|
+
"syntax error",
|
|
27
|
+
"runtime error",
|
|
28
|
+
"segmentation fault",
|
|
29
|
+
"not working",
|
|
30
|
+
"throws exception"
|
|
31
|
+
];
|
|
32
|
+
var LOOKUP_KEYWORDS = [
|
|
33
|
+
"where is",
|
|
34
|
+
"find the",
|
|
35
|
+
"definition of",
|
|
36
|
+
"signature of",
|
|
37
|
+
"show me the",
|
|
38
|
+
"what does",
|
|
39
|
+
"what is the type"
|
|
40
|
+
];
|
|
41
|
+
var EXPLORE_EXCLUSIONS = [
|
|
42
|
+
"error handler",
|
|
43
|
+
"error handling",
|
|
44
|
+
"error boundary",
|
|
45
|
+
"error type",
|
|
46
|
+
"error message",
|
|
47
|
+
"error code",
|
|
48
|
+
"error class",
|
|
49
|
+
"null object",
|
|
50
|
+
"null check",
|
|
51
|
+
"null pattern",
|
|
52
|
+
"undefined behavior",
|
|
53
|
+
"fix the style",
|
|
54
|
+
"fix the format",
|
|
55
|
+
"fix the lint",
|
|
56
|
+
"fix the config",
|
|
57
|
+
"fix the setup"
|
|
58
|
+
];
|
|
59
|
+
function classifyRetrievalMode(query) {
|
|
60
|
+
const lower = query.toLowerCase();
|
|
61
|
+
if (TRACE_KEYWORDS.some((kw) => lower.includes(kw)) || TRACE_REGEX.test(lower)) {
|
|
62
|
+
const broadTerms = ["architecture", "overview", "structure", "system", "design", "pipeline", "codebase"];
|
|
63
|
+
const isBroad = broadTerms.some((t) => lower.includes(t));
|
|
64
|
+
if (!isBroad) {
|
|
65
|
+
return "trace";
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
if (LOOKUP_BIGRAMS.some((bg) => lower.includes(bg))) {
|
|
69
|
+
return "lookup";
|
|
70
|
+
}
|
|
71
|
+
const hasLookupKeyword = LOOKUP_KEYWORDS.some((kw) => lower.includes(kw));
|
|
72
|
+
if (hasLookupKeyword) {
|
|
73
|
+
const isExcluded = EXPLORE_EXCLUSIONS.some((ex) => lower.includes(ex));
|
|
74
|
+
if (!isExcluded) {
|
|
75
|
+
return "lookup";
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return "explore";
|
|
79
|
+
}
|
|
80
|
+
var RETRIEVAL_STRATEGIES = {
|
|
81
|
+
lookup: { limit: 5, distanceThreshold: 0.4, keywordBoostWeight: 0.4 },
|
|
82
|
+
trace: { limit: 3, distanceThreshold: 0.5, keywordBoostWeight: 0.2 },
|
|
83
|
+
explore: { limit: 20, distanceThreshold: 0.6, keywordBoostWeight: 0.1 }
|
|
84
|
+
};
|
|
85
|
+
function extractQueryTokens(query) {
|
|
86
|
+
return query.toLowerCase().split(/[\s.,;:!?'"()\[\]{}/\\]+/).filter((t) => t.length >= 3);
|
|
87
|
+
}
|
|
88
|
+
function splitCamelCase(name) {
|
|
89
|
+
return name.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
|
|
90
|
+
}
|
|
91
|
+
function computeKeywordBoost(chunk, queryTokens) {
|
|
92
|
+
if (queryTokens.length === 0) return 0;
|
|
93
|
+
const fileName = chunk.filePath.split("/").pop()?.toLowerCase() ?? "";
|
|
94
|
+
const fileNameStem = fileName.replace(/\.[^.]+$/, "");
|
|
95
|
+
const chunkName = (chunk.name ?? "").toLowerCase();
|
|
96
|
+
if (chunkName.length > 0 && queryTokens.some((t) => t === chunkName)) {
|
|
97
|
+
return 1;
|
|
98
|
+
}
|
|
99
|
+
const subTokens = chunkName.length > 0 ? splitCamelCase(chunkName) : [];
|
|
100
|
+
if (subTokens.length > 1 && subTokens.every((sub) => queryTokens.some((t) => t.includes(sub) || sub.includes(t)))) {
|
|
101
|
+
return 1;
|
|
102
|
+
}
|
|
103
|
+
if (fileNameStem.length > 0 && queryTokens.some((t) => t === fileNameStem)) {
|
|
104
|
+
return 0.6;
|
|
105
|
+
}
|
|
106
|
+
const target = `${fileName} ${chunkName}`;
|
|
107
|
+
const matchCount = queryTokens.filter((t) => target.includes(t)).length;
|
|
108
|
+
return matchCount / queryTokens.length;
|
|
109
|
+
}
|
|
110
|
+
var CONFIG_NOISE_PATTERNS = [
|
|
111
|
+
{ pattern: /^vitest\.config\./, toolName: "vitest" },
|
|
112
|
+
{ pattern: /^tsup\.config\./, toolName: "tsup" },
|
|
113
|
+
{ pattern: /^tsconfig.*\.json$/, toolName: "tsconfig" },
|
|
114
|
+
{ pattern: /^jest\.config\./, toolName: "jest" },
|
|
115
|
+
{ pattern: /^eslint\.config\./, toolName: "eslint" },
|
|
116
|
+
{ pattern: /^\.eslintrc/, toolName: "eslint" }
|
|
117
|
+
];
|
|
118
|
+
var CONFIG_FILE_NOISE_PENALTY = 0.15;
|
|
119
|
+
function computeNoisePenalty(chunk, query) {
|
|
120
|
+
const fileName = chunk.filePath.split("/").pop() ?? "";
|
|
121
|
+
const lowerQuery = query.toLowerCase();
|
|
122
|
+
for (const { pattern, toolName } of CONFIG_NOISE_PATTERNS) {
|
|
123
|
+
if (pattern.test(fileName)) {
|
|
124
|
+
if (lowerQuery.includes(toolName)) return 0;
|
|
125
|
+
return CONFIG_FILE_NOISE_PENALTY;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
return 0;
|
|
129
|
+
}
|
|
130
|
+
async function searchChunks(table, queryVector, opts, query) {
|
|
131
|
+
log.debug({ limit: opts.limit, distanceThreshold: opts.distanceThreshold }, "Searching chunks");
|
|
132
|
+
const rows = await table.query().nearestTo(queryVector).distanceType("cosine").limit(opts.limit).toArray();
|
|
133
|
+
const queryTokens = query ? extractQueryTokens(query) : [];
|
|
134
|
+
const chunks = rows.filter((r) => r._distance <= opts.distanceThreshold).map((r) => ({
|
|
135
|
+
id: r.id,
|
|
136
|
+
filePath: r.file_path,
|
|
137
|
+
chunkType: r.chunk_type,
|
|
138
|
+
scope: r.scope,
|
|
139
|
+
name: r.name,
|
|
140
|
+
content: r.content,
|
|
141
|
+
startLine: r.start_line,
|
|
142
|
+
endLine: r.end_line,
|
|
143
|
+
similarity: 1 - r._distance
|
|
144
|
+
}));
|
|
145
|
+
if (queryTokens.length > 0) {
|
|
146
|
+
const boostWeight = opts.keywordBoostWeight ?? 0.1;
|
|
147
|
+
const scored = chunks.map((chunk) => {
|
|
148
|
+
const boost = computeKeywordBoost(chunk, queryTokens);
|
|
149
|
+
const score = chunk.similarity * (1 - boostWeight) + boost * boostWeight - computeNoisePenalty(chunk, query);
|
|
150
|
+
const promotedSimilarity = boost > 0 ? Math.max(chunk.similarity, HIGH_RELEVANCE_SIMILARITY_THRESHOLD) : chunk.similarity;
|
|
151
|
+
return { chunk: { ...chunk, similarity: promotedSimilarity }, score };
|
|
152
|
+
});
|
|
153
|
+
return scored.sort((a, b) => b.score - a.score).map(({ chunk }) => chunk);
|
|
154
|
+
}
|
|
155
|
+
return chunks.sort((a, b) => b.similarity - a.similarity);
|
|
156
|
+
}
|
|
157
|
+
function deduplicateChunks(chunks) {
|
|
158
|
+
const seen = /* @__PURE__ */ new Set();
|
|
159
|
+
return chunks.filter((c) => {
|
|
160
|
+
if (seen.has(c.id)) return false;
|
|
161
|
+
seen.add(c.id);
|
|
162
|
+
return true;
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
export {
|
|
167
|
+
classifyRetrievalMode,
|
|
168
|
+
RETRIEVAL_STRATEGIES,
|
|
169
|
+
searchChunks,
|
|
170
|
+
deduplicateChunks
|
|
171
|
+
};
|