brain-cache 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +215 -0
- package/dist/askCodebase-ECDSSTQ6.js +83 -0
- package/dist/buildContext-6755TRND.js +14 -0
- package/dist/chunk-7JLSJNKU.js +97 -0
- package/dist/chunk-GGOUKACO.js +16 -0
- package/dist/chunk-OKWMQNH6.js +40 -0
- package/dist/chunk-P7WSTGLE.js +131 -0
- package/dist/chunk-PA4BZBWS.js +162 -0
- package/dist/chunk-PDQXJSH4.js +87 -0
- package/dist/chunk-WCNMLSL2.js +79 -0
- package/dist/chunk-XXWJ57QP.js +151 -0
- package/dist/chunk-ZLB4VJQK.js +109 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +86 -0
- package/dist/doctor-5775VUMA.js +62 -0
- package/dist/embedder-KRANITVN.js +10 -0
- package/dist/init-TRPFEOHF.js +89 -0
- package/dist/mcp.d.ts +2 -0
- package/dist/mcp.js +1414 -0
- package/dist/search-WKKGPNLV.js +82 -0
- package/dist/status-2SOIQ3LX.js +37 -0
- package/dist/workflows-MJLEPCZY.js +460 -0
- package/package.json +68 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
RETRIEVAL_STRATEGIES,
|
|
4
|
+
classifyQueryIntent,
|
|
5
|
+
deduplicateChunks,
|
|
6
|
+
searchChunks
|
|
7
|
+
} from "./chunk-ZLB4VJQK.js";
|
|
8
|
+
import {
|
|
9
|
+
embedBatchWithRetry
|
|
10
|
+
} from "./chunk-WCNMLSL2.js";
|
|
11
|
+
import {
|
|
12
|
+
isOllamaRunning
|
|
13
|
+
} from "./chunk-P7WSTGLE.js";
|
|
14
|
+
import {
|
|
15
|
+
openDatabase,
|
|
16
|
+
readIndexState
|
|
17
|
+
} from "./chunk-XXWJ57QP.js";
|
|
18
|
+
import {
|
|
19
|
+
readProfile
|
|
20
|
+
} from "./chunk-PA4BZBWS.js";
|
|
21
|
+
import "./chunk-PDQXJSH4.js";
|
|
22
|
+
|
|
23
|
+
// src/workflows/search.ts
|
|
24
|
+
import { resolve } from "path";
|
|
25
|
+
async function runSearch(query, opts) {
|
|
26
|
+
const profile = await readProfile();
|
|
27
|
+
if (profile === null) {
|
|
28
|
+
throw new Error("No profile found. Run 'brain-cache init' first.");
|
|
29
|
+
}
|
|
30
|
+
const running = await isOllamaRunning();
|
|
31
|
+
if (!running) {
|
|
32
|
+
throw new Error(
|
|
33
|
+
"Ollama is not running. Start it with 'ollama serve' or run 'brain-cache init'."
|
|
34
|
+
);
|
|
35
|
+
}
|
|
36
|
+
const rootDir = resolve(opts?.path ?? ".");
|
|
37
|
+
const indexState = await readIndexState(rootDir);
|
|
38
|
+
if (indexState === null) {
|
|
39
|
+
throw new Error(
|
|
40
|
+
`No index found at ${rootDir}. Run 'brain-cache index' first.`
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
const db = await openDatabase(rootDir);
|
|
44
|
+
const tableNames = await db.tableNames();
|
|
45
|
+
if (!tableNames.includes("chunks")) {
|
|
46
|
+
throw new Error("No chunks table found. Run 'brain-cache index' first.");
|
|
47
|
+
}
|
|
48
|
+
const table = await db.openTable("chunks");
|
|
49
|
+
const rowCount = await table.countRows();
|
|
50
|
+
if (rowCount === 0) {
|
|
51
|
+
throw new Error(
|
|
52
|
+
`Index is empty at ${rootDir}. No source files were indexed.`
|
|
53
|
+
);
|
|
54
|
+
}
|
|
55
|
+
const intent = classifyQueryIntent(query);
|
|
56
|
+
const strategy = {
|
|
57
|
+
limit: opts?.limit ?? RETRIEVAL_STRATEGIES[intent].limit,
|
|
58
|
+
distanceThreshold: RETRIEVAL_STRATEGIES[intent].distanceThreshold
|
|
59
|
+
};
|
|
60
|
+
process.stderr.write(
|
|
61
|
+
`brain-cache: searching (intent=${intent}, limit=${strategy.limit})
|
|
62
|
+
`
|
|
63
|
+
);
|
|
64
|
+
const vectors = await embedBatchWithRetry(indexState.embeddingModel, [query]);
|
|
65
|
+
const queryVector = vectors[0];
|
|
66
|
+
const results = await searchChunks(table, queryVector, strategy);
|
|
67
|
+
const deduped = deduplicateChunks(results);
|
|
68
|
+
process.stderr.write(
|
|
69
|
+
`brain-cache: found ${deduped.length} chunks (${results.length} before dedup)
|
|
70
|
+
`
|
|
71
|
+
);
|
|
72
|
+
for (const chunk of deduped) {
|
|
73
|
+
process.stderr.write(
|
|
74
|
+
` ${chunk.similarity.toFixed(3)} ${chunk.filePath}:${chunk.startLine}-${chunk.endLine} [${chunk.chunkType}] ${chunk.name ?? ""}
|
|
75
|
+
`
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
return deduped;
|
|
79
|
+
}
|
|
80
|
+
export {
|
|
81
|
+
runSearch
|
|
82
|
+
};
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
readIndexState
|
|
4
|
+
} from "./chunk-XXWJ57QP.js";
|
|
5
|
+
import {
|
|
6
|
+
readProfile
|
|
7
|
+
} from "./chunk-PA4BZBWS.js";
|
|
8
|
+
import "./chunk-PDQXJSH4.js";
|
|
9
|
+
|
|
10
|
+
// src/workflows/status.ts
|
|
11
|
+
import { resolve } from "path";
|
|
12
|
+
async function runStatus(targetPath) {
|
|
13
|
+
const rootDir = resolve(targetPath ?? ".");
|
|
14
|
+
const profile = await readProfile();
|
|
15
|
+
if (!profile) {
|
|
16
|
+
throw new Error("No profile found. Run 'brain-cache init' first.");
|
|
17
|
+
}
|
|
18
|
+
const indexState = await readIndexState(rootDir);
|
|
19
|
+
if (!indexState) {
|
|
20
|
+
throw new Error(`No index found at ${rootDir}. Run 'brain-cache index [path]' first.`);
|
|
21
|
+
}
|
|
22
|
+
process.stderr.write(
|
|
23
|
+
`brain-cache status
|
|
24
|
+
\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
|
25
|
+
Path: ${rootDir}
|
|
26
|
+
Files indexed: ${indexState.fileCount}
|
|
27
|
+
Chunks stored: ${indexState.chunkCount}
|
|
28
|
+
Last indexed: ${indexState.indexedAt}
|
|
29
|
+
Embedding model: ${indexState.embeddingModel}
|
|
30
|
+
Embedding dim: ${indexState.dimension}
|
|
31
|
+
VRAM tier: ${profile.vramTier}
|
|
32
|
+
`
|
|
33
|
+
);
|
|
34
|
+
}
|
|
35
|
+
export {
|
|
36
|
+
runStatus
|
|
37
|
+
};
|
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
formatTokenSavings
|
|
4
|
+
} from "./chunk-GGOUKACO.js";
|
|
5
|
+
import {
|
|
6
|
+
countChunkTokens
|
|
7
|
+
} from "./chunk-OKWMQNH6.js";
|
|
8
|
+
import {
|
|
9
|
+
embedBatchWithRetry
|
|
10
|
+
} from "./chunk-WCNMLSL2.js";
|
|
11
|
+
import {
|
|
12
|
+
isOllamaRunning
|
|
13
|
+
} from "./chunk-P7WSTGLE.js";
|
|
14
|
+
import {
|
|
15
|
+
createVectorIndexIfNeeded,
|
|
16
|
+
deleteChunksByFilePath,
|
|
17
|
+
insertChunks,
|
|
18
|
+
openDatabase,
|
|
19
|
+
openOrCreateChunkTable,
|
|
20
|
+
readFileHashes,
|
|
21
|
+
writeFileHashes,
|
|
22
|
+
writeIndexState
|
|
23
|
+
} from "./chunk-XXWJ57QP.js";
|
|
24
|
+
import {
|
|
25
|
+
readProfile
|
|
26
|
+
} from "./chunk-PA4BZBWS.js";
|
|
27
|
+
import {
|
|
28
|
+
DEFAULT_BATCH_SIZE,
|
|
29
|
+
DEFAULT_EMBEDDING_DIMENSION,
|
|
30
|
+
EMBEDDING_DIMENSIONS,
|
|
31
|
+
EMBED_MAX_TOKENS,
|
|
32
|
+
FILE_READ_CONCURRENCY,
|
|
33
|
+
childLogger
|
|
34
|
+
} from "./chunk-PDQXJSH4.js";
|
|
35
|
+
|
|
36
|
+
// src/workflows/index.ts
|
|
37
|
+
import { resolve } from "path";
|
|
38
|
+
import { readFile as readFile2 } from "fs/promises";
|
|
39
|
+
import { createHash } from "crypto";
|
|
40
|
+
|
|
41
|
+
// src/services/crawler.ts
|
|
42
|
+
import fg from "fast-glob";
|
|
43
|
+
import ignore from "ignore";
|
|
44
|
+
import { readFile } from "fs/promises";
|
|
45
|
+
import { extname, relative } from "path";
|
|
46
|
+
var log = childLogger("crawler");
|
|
47
|
+
var SOURCE_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
48
|
+
".ts",
|
|
49
|
+
".tsx",
|
|
50
|
+
".mts",
|
|
51
|
+
".cts",
|
|
52
|
+
".js",
|
|
53
|
+
".jsx",
|
|
54
|
+
".mjs",
|
|
55
|
+
".cjs",
|
|
56
|
+
".py",
|
|
57
|
+
".pyi",
|
|
58
|
+
".go",
|
|
59
|
+
".rs"
|
|
60
|
+
]);
|
|
61
|
+
var ALWAYS_EXCLUDE_GLOBS = [
|
|
62
|
+
"**/node_modules/**",
|
|
63
|
+
"**/.git/**",
|
|
64
|
+
"**/dist/**",
|
|
65
|
+
"**/build/**",
|
|
66
|
+
"**/.next/**",
|
|
67
|
+
"**/__pycache__/**",
|
|
68
|
+
"**/*.egg-info/**",
|
|
69
|
+
"**/package-lock.json",
|
|
70
|
+
"**/yarn.lock",
|
|
71
|
+
"**/pnpm-lock.yaml",
|
|
72
|
+
"**/Cargo.lock",
|
|
73
|
+
"**/*.min.js"
|
|
74
|
+
];
|
|
75
|
+
async function crawlSourceFiles(rootDir) {
|
|
76
|
+
const ig = ignore();
|
|
77
|
+
try {
|
|
78
|
+
const gitignoreContent = await readFile(`${rootDir}/.gitignore`, "utf-8");
|
|
79
|
+
ig.add(gitignoreContent);
|
|
80
|
+
} catch {
|
|
81
|
+
}
|
|
82
|
+
const files = await fg("**/*", {
|
|
83
|
+
cwd: rootDir,
|
|
84
|
+
absolute: true,
|
|
85
|
+
ignore: ALWAYS_EXCLUDE_GLOBS,
|
|
86
|
+
onlyFiles: true
|
|
87
|
+
});
|
|
88
|
+
const result = files.filter((f) => {
|
|
89
|
+
const ext = extname(f);
|
|
90
|
+
if (!SOURCE_EXTENSIONS.has(ext)) return false;
|
|
91
|
+
const rel = relative(rootDir, f);
|
|
92
|
+
return !ig.ignores(rel);
|
|
93
|
+
});
|
|
94
|
+
log.info({ rootDir, fileCount: result.length }, "Crawl complete");
|
|
95
|
+
return result;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// src/services/chunker.ts
|
|
99
|
+
import { createRequire } from "module";
|
|
100
|
+
import { extname as extname2 } from "path";
|
|
101
|
+
var _require = createRequire(import.meta.url);
|
|
102
|
+
var Parser = _require("tree-sitter");
|
|
103
|
+
var { typescript: tsLang, tsx: tsxLang } = _require("tree-sitter-typescript");
|
|
104
|
+
var pythonLang = _require("tree-sitter-python");
|
|
105
|
+
var goLang = _require("tree-sitter-go");
|
|
106
|
+
var rustLang = _require("tree-sitter-rust");
|
|
107
|
+
var log2 = childLogger("chunker");
|
|
108
|
+
var LANGUAGE_MAP = {
|
|
109
|
+
".ts": tsLang,
|
|
110
|
+
".tsx": tsxLang,
|
|
111
|
+
".mts": tsLang,
|
|
112
|
+
".cts": tsLang,
|
|
113
|
+
".js": tsLang,
|
|
114
|
+
".jsx": tsxLang,
|
|
115
|
+
".mjs": tsLang,
|
|
116
|
+
".cjs": tsLang,
|
|
117
|
+
".py": pythonLang,
|
|
118
|
+
".pyi": pythonLang,
|
|
119
|
+
".go": goLang,
|
|
120
|
+
".rs": rustLang
|
|
121
|
+
};
|
|
122
|
+
var CHUNK_NODE_TYPES = {
|
|
123
|
+
typescript: /* @__PURE__ */ new Set([
|
|
124
|
+
"function_declaration",
|
|
125
|
+
"function_expression",
|
|
126
|
+
"arrow_function",
|
|
127
|
+
"generator_function_declaration",
|
|
128
|
+
"class_declaration",
|
|
129
|
+
"abstract_class_declaration",
|
|
130
|
+
"method_definition"
|
|
131
|
+
]),
|
|
132
|
+
python: /* @__PURE__ */ new Set([
|
|
133
|
+
"function_definition",
|
|
134
|
+
"async_function_definition",
|
|
135
|
+
"class_definition"
|
|
136
|
+
]),
|
|
137
|
+
go: /* @__PURE__ */ new Set([
|
|
138
|
+
"function_declaration",
|
|
139
|
+
"method_declaration",
|
|
140
|
+
"func_literal"
|
|
141
|
+
]),
|
|
142
|
+
rust: /* @__PURE__ */ new Set([
|
|
143
|
+
"function_item",
|
|
144
|
+
"impl_item",
|
|
145
|
+
"closure_expression"
|
|
146
|
+
])
|
|
147
|
+
};
|
|
148
|
+
function getLanguageCategory(ext) {
|
|
149
|
+
switch (ext) {
|
|
150
|
+
case ".ts":
|
|
151
|
+
case ".tsx":
|
|
152
|
+
case ".mts":
|
|
153
|
+
case ".cts":
|
|
154
|
+
case ".js":
|
|
155
|
+
case ".jsx":
|
|
156
|
+
case ".mjs":
|
|
157
|
+
case ".cjs":
|
|
158
|
+
return "typescript";
|
|
159
|
+
case ".py":
|
|
160
|
+
case ".pyi":
|
|
161
|
+
return "python";
|
|
162
|
+
case ".go":
|
|
163
|
+
return "go";
|
|
164
|
+
case ".rs":
|
|
165
|
+
return "rust";
|
|
166
|
+
default:
|
|
167
|
+
return "";
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
function extractName(node) {
|
|
171
|
+
return node.childForFieldName?.("name")?.text ?? null;
|
|
172
|
+
}
|
|
173
|
+
function extractScope(node) {
|
|
174
|
+
let current = node.parent;
|
|
175
|
+
while (current) {
|
|
176
|
+
if (current.type === "class_declaration" || current.type === "abstract_class_declaration" || current.type === "class_definition" || current.type === "impl_item") {
|
|
177
|
+
return extractName(current);
|
|
178
|
+
}
|
|
179
|
+
current = current.parent;
|
|
180
|
+
}
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
function classifyChunkType(nodeType) {
|
|
184
|
+
if (nodeType === "class_declaration" || nodeType === "abstract_class_declaration" || nodeType === "class_definition" || nodeType === "impl_item") {
|
|
185
|
+
return "class";
|
|
186
|
+
}
|
|
187
|
+
if (nodeType === "method_definition" || nodeType === "method_declaration") {
|
|
188
|
+
return "method";
|
|
189
|
+
}
|
|
190
|
+
return "function";
|
|
191
|
+
}
|
|
192
|
+
function* walkNodes(node) {
|
|
193
|
+
yield node;
|
|
194
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
195
|
+
const child = node.child(i);
|
|
196
|
+
if (child !== null) {
|
|
197
|
+
yield* walkNodes(child);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
function chunkFile(filePath, content) {
|
|
202
|
+
const ext = extname2(filePath);
|
|
203
|
+
const lang = LANGUAGE_MAP[ext];
|
|
204
|
+
if (!lang) {
|
|
205
|
+
return [];
|
|
206
|
+
}
|
|
207
|
+
const category = getLanguageCategory(ext);
|
|
208
|
+
const nodeTypes = CHUNK_NODE_TYPES[category];
|
|
209
|
+
const parser = new Parser();
|
|
210
|
+
parser.setLanguage(lang);
|
|
211
|
+
const tree = parser.parse(content);
|
|
212
|
+
const chunks = [];
|
|
213
|
+
for (const node of walkNodes(tree.rootNode)) {
|
|
214
|
+
if (!nodeTypes.has(node.type)) {
|
|
215
|
+
continue;
|
|
216
|
+
}
|
|
217
|
+
if (node.type === "arrow_function") {
|
|
218
|
+
const varDeclarator = node.parent;
|
|
219
|
+
const lexDecl = varDeclarator?.parent;
|
|
220
|
+
const container = lexDecl?.parent;
|
|
221
|
+
const isTopLevelConst = varDeclarator?.type === "variable_declarator" && lexDecl?.type === "lexical_declaration" && (container?.type === "program" || container?.type === "export_statement");
|
|
222
|
+
if (!isTopLevelConst) {
|
|
223
|
+
continue;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
const chunkType = classifyChunkType(node.type);
|
|
227
|
+
const name = extractName(node);
|
|
228
|
+
const scope = extractScope(node);
|
|
229
|
+
chunks.push({
|
|
230
|
+
id: `${filePath}:${node.startPosition.row}`,
|
|
231
|
+
filePath,
|
|
232
|
+
chunkType,
|
|
233
|
+
scope,
|
|
234
|
+
name,
|
|
235
|
+
content: content.slice(node.startIndex, node.endIndex),
|
|
236
|
+
startLine: node.startPosition.row + 1,
|
|
237
|
+
endLine: node.endPosition.row + 1
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
if (chunks.length === 0) {
|
|
241
|
+
chunks.push({
|
|
242
|
+
id: `${filePath}:0`,
|
|
243
|
+
filePath,
|
|
244
|
+
chunkType: "file",
|
|
245
|
+
scope: null,
|
|
246
|
+
name: null,
|
|
247
|
+
content,
|
|
248
|
+
startLine: 1,
|
|
249
|
+
endLine: content.split("\n").length
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
log2.debug({ filePath, chunkCount: chunks.length }, "File chunked");
|
|
253
|
+
return chunks;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// src/workflows/index.ts
|
|
257
|
+
function hashContent(content) {
|
|
258
|
+
return createHash("sha256").update(content, "utf-8").digest("hex");
|
|
259
|
+
}
|
|
260
|
+
async function runIndex(targetPath, opts) {
|
|
261
|
+
const force = opts?.force ?? false;
|
|
262
|
+
const rootDir = resolve(targetPath ?? ".");
|
|
263
|
+
const profile = await readProfile();
|
|
264
|
+
if (profile === null) {
|
|
265
|
+
throw new Error("No profile found. Run 'brain-cache init' first.");
|
|
266
|
+
}
|
|
267
|
+
const running = await isOllamaRunning();
|
|
268
|
+
if (!running) {
|
|
269
|
+
throw new Error("Ollama is not running. Start it with 'ollama serve' or run 'brain-cache init'.");
|
|
270
|
+
}
|
|
271
|
+
const dim = EMBEDDING_DIMENSIONS[profile.embeddingModel] ?? DEFAULT_EMBEDDING_DIMENSION;
|
|
272
|
+
if (!(profile.embeddingModel in EMBEDDING_DIMENSIONS)) {
|
|
273
|
+
process.stderr.write(
|
|
274
|
+
`Warning: Unknown embedding model '${profile.embeddingModel}', defaulting to ${DEFAULT_EMBEDDING_DIMENSION} dimensions.
|
|
275
|
+
`
|
|
276
|
+
);
|
|
277
|
+
}
|
|
278
|
+
const db = await openDatabase(rootDir);
|
|
279
|
+
const table = await openOrCreateChunkTable(db, rootDir, profile.embeddingModel, dim);
|
|
280
|
+
const files = await crawlSourceFiles(rootDir);
|
|
281
|
+
process.stderr.write(`brain-cache: found ${files.length} source files
|
|
282
|
+
`);
|
|
283
|
+
if (files.length === 0) {
|
|
284
|
+
process.stderr.write(`No source files found in ${rootDir}
|
|
285
|
+
`);
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
const contentMap = /* @__PURE__ */ new Map();
|
|
289
|
+
const currentHashes = {};
|
|
290
|
+
for (let groupStart = 0; groupStart < files.length; groupStart += FILE_READ_CONCURRENCY) {
|
|
291
|
+
const group = files.slice(groupStart, groupStart + FILE_READ_CONCURRENCY);
|
|
292
|
+
const results = await Promise.all(
|
|
293
|
+
group.map(async (filePath) => {
|
|
294
|
+
const content = await readFile2(filePath, "utf-8");
|
|
295
|
+
return { filePath, content, hash: hashContent(content) };
|
|
296
|
+
})
|
|
297
|
+
);
|
|
298
|
+
for (const { filePath, content, hash } of results) {
|
|
299
|
+
contentMap.set(filePath, content);
|
|
300
|
+
currentHashes[filePath] = hash;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
const storedHashes = force ? {} : await readFileHashes(rootDir);
|
|
304
|
+
const crawledSet = new Set(files);
|
|
305
|
+
const newFiles = [];
|
|
306
|
+
const changedFiles = [];
|
|
307
|
+
const removedFiles = [];
|
|
308
|
+
const unchangedFiles = [];
|
|
309
|
+
for (const filePath of files) {
|
|
310
|
+
const currentHash = currentHashes[filePath];
|
|
311
|
+
if (!(filePath in storedHashes)) {
|
|
312
|
+
newFiles.push(filePath);
|
|
313
|
+
} else if (storedHashes[filePath] !== currentHash) {
|
|
314
|
+
changedFiles.push(filePath);
|
|
315
|
+
} else {
|
|
316
|
+
unchangedFiles.push(filePath);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
for (const filePath of Object.keys(storedHashes)) {
|
|
320
|
+
if (!crawledSet.has(filePath)) {
|
|
321
|
+
removedFiles.push(filePath);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
process.stderr.write(
|
|
325
|
+
`brain-cache: incremental index -- ${newFiles.length} new, ${changedFiles.length} changed, ${removedFiles.length} removed (${unchangedFiles.length} unchanged)
|
|
326
|
+
`
|
|
327
|
+
);
|
|
328
|
+
for (const filePath of [...removedFiles, ...changedFiles]) {
|
|
329
|
+
await deleteChunksByFilePath(table, filePath);
|
|
330
|
+
}
|
|
331
|
+
const updatedHashes = { ...storedHashes };
|
|
332
|
+
for (const filePath of removedFiles) {
|
|
333
|
+
delete updatedHashes[filePath];
|
|
334
|
+
}
|
|
335
|
+
const filesToProcess = [...newFiles, ...changedFiles];
|
|
336
|
+
if (filesToProcess.length === 0) {
|
|
337
|
+
process.stderr.write(`brain-cache: nothing to re-index
|
|
338
|
+
`);
|
|
339
|
+
for (const filePath of files) {
|
|
340
|
+
updatedHashes[filePath] = currentHashes[filePath];
|
|
341
|
+
}
|
|
342
|
+
await writeFileHashes(rootDir, updatedHashes);
|
|
343
|
+
const totalFiles2 = unchangedFiles.length;
|
|
344
|
+
const chunkCount2 = await table.countRows();
|
|
345
|
+
await writeIndexState(rootDir, {
|
|
346
|
+
version: 1,
|
|
347
|
+
embeddingModel: profile.embeddingModel,
|
|
348
|
+
dimension: dim,
|
|
349
|
+
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
350
|
+
fileCount: totalFiles2,
|
|
351
|
+
chunkCount: chunkCount2
|
|
352
|
+
});
|
|
353
|
+
process.stderr.write(
|
|
354
|
+
`brain-cache: indexing complete
|
|
355
|
+
Files: ${totalFiles2}
|
|
356
|
+
Chunks: ${chunkCount2}
|
|
357
|
+
Model: ${profile.embeddingModel}
|
|
358
|
+
Stored in: ${rootDir}/.brain-cache/
|
|
359
|
+
`
|
|
360
|
+
);
|
|
361
|
+
return;
|
|
362
|
+
}
|
|
363
|
+
let totalRawTokens = 0;
|
|
364
|
+
let totalChunkTokens = 0;
|
|
365
|
+
let totalChunks = 0;
|
|
366
|
+
let processedFiles = 0;
|
|
367
|
+
let processedChunks = 0;
|
|
368
|
+
for (let groupStart = 0; groupStart < filesToProcess.length; groupStart += FILE_READ_CONCURRENCY) {
|
|
369
|
+
const group = filesToProcess.slice(groupStart, groupStart + FILE_READ_CONCURRENCY);
|
|
370
|
+
const groupChunks = [];
|
|
371
|
+
for (const filePath of group) {
|
|
372
|
+
const content = contentMap.get(filePath);
|
|
373
|
+
totalRawTokens += countChunkTokens(content);
|
|
374
|
+
const chunks = chunkFile(filePath, content);
|
|
375
|
+
groupChunks.push(...chunks);
|
|
376
|
+
}
|
|
377
|
+
processedFiles += group.length;
|
|
378
|
+
totalChunks += groupChunks.length;
|
|
379
|
+
if (processedFiles % 10 === 0 || groupStart + FILE_READ_CONCURRENCY >= filesToProcess.length) {
|
|
380
|
+
process.stderr.write(`brain-cache: chunked ${processedFiles}/${filesToProcess.length} files
|
|
381
|
+
`);
|
|
382
|
+
}
|
|
383
|
+
for (let offset = 0; offset < groupChunks.length; offset += DEFAULT_BATCH_SIZE) {
|
|
384
|
+
const batch = groupChunks.slice(offset, offset + DEFAULT_BATCH_SIZE);
|
|
385
|
+
const embeddableBatch = batch.filter((chunk) => {
|
|
386
|
+
const tokens = countChunkTokens(chunk.content);
|
|
387
|
+
if (tokens > EMBED_MAX_TOKENS) {
|
|
388
|
+
process.stderr.write(
|
|
389
|
+
`
|
|
390
|
+
brain-cache: skipping oversized chunk (${tokens} tokens > ${EMBED_MAX_TOKENS} limit): ${chunk.filePath} lines ${chunk.startLine}-${chunk.endLine}
|
|
391
|
+
`
|
|
392
|
+
);
|
|
393
|
+
return false;
|
|
394
|
+
}
|
|
395
|
+
return true;
|
|
396
|
+
});
|
|
397
|
+
if (embeddableBatch.length === 0) continue;
|
|
398
|
+
const texts = embeddableBatch.map((chunk) => chunk.content);
|
|
399
|
+
totalChunkTokens += texts.reduce((sum, t) => sum + countChunkTokens(t), 0);
|
|
400
|
+
const vectors = await embedBatchWithRetry(profile.embeddingModel, texts, dim);
|
|
401
|
+
const rows = embeddableBatch.map((chunk, i) => ({
|
|
402
|
+
id: chunk.id,
|
|
403
|
+
file_path: chunk.filePath,
|
|
404
|
+
chunk_type: chunk.chunkType,
|
|
405
|
+
scope: chunk.scope,
|
|
406
|
+
name: chunk.name,
|
|
407
|
+
content: chunk.content,
|
|
408
|
+
start_line: chunk.startLine,
|
|
409
|
+
end_line: chunk.endLine,
|
|
410
|
+
vector: vectors[i]
|
|
411
|
+
}));
|
|
412
|
+
await insertChunks(table, rows);
|
|
413
|
+
processedChunks += batch.length;
|
|
414
|
+
process.stderr.write(
|
|
415
|
+
`\rbrain-cache: embedding ${processedChunks}/${totalChunks} chunks (${Math.round(processedChunks / totalChunks * 100)}%)`
|
|
416
|
+
);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
process.stderr.write("\n");
|
|
420
|
+
process.stderr.write(
|
|
421
|
+
`brain-cache: ${totalChunks} chunks from ${filesToProcess.length} files
|
|
422
|
+
`
|
|
423
|
+
);
|
|
424
|
+
await createVectorIndexIfNeeded(table, profile.embeddingModel);
|
|
425
|
+
for (const filePath of filesToProcess) {
|
|
426
|
+
updatedHashes[filePath] = currentHashes[filePath];
|
|
427
|
+
}
|
|
428
|
+
for (const filePath of unchangedFiles) {
|
|
429
|
+
updatedHashes[filePath] = currentHashes[filePath];
|
|
430
|
+
}
|
|
431
|
+
await writeFileHashes(rootDir, updatedHashes);
|
|
432
|
+
const totalFiles = files.length;
|
|
433
|
+
const chunkCount = await table.countRows();
|
|
434
|
+
await writeIndexState(rootDir, {
|
|
435
|
+
version: 1,
|
|
436
|
+
embeddingModel: profile.embeddingModel,
|
|
437
|
+
dimension: dim,
|
|
438
|
+
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
439
|
+
fileCount: totalFiles,
|
|
440
|
+
chunkCount
|
|
441
|
+
});
|
|
442
|
+
const reductionPct = totalRawTokens > 0 ? Math.round((1 - totalChunkTokens / totalRawTokens) * 100) : 0;
|
|
443
|
+
const savingsBlock = formatTokenSavings({
|
|
444
|
+
tokensSent: totalChunkTokens,
|
|
445
|
+
estimatedWithout: totalRawTokens,
|
|
446
|
+
reductionPct
|
|
447
|
+
}).split("\n").map((line) => ` ${line}`).join("\n");
|
|
448
|
+
process.stderr.write(
|
|
449
|
+
`brain-cache: indexing complete
|
|
450
|
+
Files: ${totalFiles}
|
|
451
|
+
Chunks: ${totalChunks}
|
|
452
|
+
Model: ${profile.embeddingModel}
|
|
453
|
+
${savingsBlock}
|
|
454
|
+
Stored in: ${rootDir}/.brain-cache/
|
|
455
|
+
`
|
|
456
|
+
);
|
|
457
|
+
}
|
|
458
|
+
export {
|
|
459
|
+
runIndex
|
|
460
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "brain-cache",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Local MCP-first context engine for Claude. Index your codebase, retrieve only what matters, and cut token usage.",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"bin": {
|
|
8
|
+
"brain-cache": "./dist/cli.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"dist/",
|
|
12
|
+
"README.md",
|
|
13
|
+
"LICENSE"
|
|
14
|
+
],
|
|
15
|
+
"engines": {
|
|
16
|
+
"node": ">=20"
|
|
17
|
+
},
|
|
18
|
+
"homepage": "https://github.com/j4ckwinter/brain-cache",
|
|
19
|
+
"repository": {
|
|
20
|
+
"type": "git",
|
|
21
|
+
"url": "git+https://github.com/j4ckwinter/brain-cache.git"
|
|
22
|
+
},
|
|
23
|
+
"bugs": {
|
|
24
|
+
"url": "https://github.com/j4ckwinter/brain-cache/issues"
|
|
25
|
+
},
|
|
26
|
+
"scripts": {
|
|
27
|
+
"dev": "tsx src/cli/index.ts",
|
|
28
|
+
"build": "tsup",
|
|
29
|
+
"link": "npm link",
|
|
30
|
+
"test": "vitest run",
|
|
31
|
+
"test:watch": "vitest"
|
|
32
|
+
},
|
|
33
|
+
"dependencies": {
|
|
34
|
+
"@anthropic-ai/sdk": "^0.81.0",
|
|
35
|
+
"@anthropic-ai/tokenizer": "^0.0.4",
|
|
36
|
+
"@lancedb/lancedb": "^0.27.1",
|
|
37
|
+
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
38
|
+
"apache-arrow": "^18.1.0",
|
|
39
|
+
"commander": "14.0.3",
|
|
40
|
+
"fast-glob": "^3.3.3",
|
|
41
|
+
"ignore": "^7.0.5",
|
|
42
|
+
"ollama": "^0.6.3",
|
|
43
|
+
"pino": "^9.0.0",
|
|
44
|
+
"tree-sitter": "^0.25.0",
|
|
45
|
+
"tree-sitter-go": "^0.25.0",
|
|
46
|
+
"tree-sitter-python": "^0.25.0",
|
|
47
|
+
"tree-sitter-rust": "^0.24.0",
|
|
48
|
+
"tree-sitter-typescript": "^0.23.2",
|
|
49
|
+
"zod": "^4.3.6"
|
|
50
|
+
},
|
|
51
|
+
"devDependencies": {
|
|
52
|
+
"@types/node": "^22.0.0",
|
|
53
|
+
"pino-pretty": "^11.0.0",
|
|
54
|
+
"tsup": "^8.0.0",
|
|
55
|
+
"tsx": "4.21.0",
|
|
56
|
+
"typescript": "^5.0.0",
|
|
57
|
+
"vitest": "^2.0.0"
|
|
58
|
+
},
|
|
59
|
+
"keywords": [
|
|
60
|
+
"ai",
|
|
61
|
+
"rag",
|
|
62
|
+
"claude",
|
|
63
|
+
"mcp",
|
|
64
|
+
"embeddings",
|
|
65
|
+
"developer-tools",
|
|
66
|
+
"ollama"
|
|
67
|
+
]
|
|
68
|
+
}
|