brain-cache 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{askCodebase-ECDSSTQ6.js → askCodebase-DTII3Y6P.js} +8 -8
- package/dist/buildContext-JKYV7CCP.js +14 -0
- package/dist/{chunk-PDQXJSH4.js → chunk-3SFDFUEX.js} +5 -1
- package/dist/{chunk-OKWMQNH6.js → chunk-5FXXZBZV.js} +1 -1
- package/dist/{chunk-XXWJ57QP.js → chunk-6MACVOTO.js} +2 -2
- package/dist/{chunk-7JLSJNKU.js → chunk-ABKGOJTC.js} +7 -7
- package/dist/{chunk-ZLB4VJQK.js → chunk-BF5UDEIF.js} +1 -1
- package/dist/{chunk-WCNMLSL2.js → chunk-GR6QXZ4J.js} +6 -8
- package/dist/{chunk-PA4BZBWS.js → chunk-MSI4MDIM.js} +1 -1
- package/dist/{chunk-P7WSTGLE.js → chunk-V4ARVFRG.js} +1 -1
- package/dist/cli.js +8 -8
- package/dist/{doctor-5775VUMA.js → doctor-3RIVSSNB.js} +3 -3
- package/dist/{embedder-KRANITVN.js → embedder-2UG2GDQO.js} +2 -2
- package/dist/{init-TRPFEOHF.js → init-SXC4MWOR.js} +26 -4
- package/dist/mcp.js +195 -177
- package/dist/{search-WKKGPNLV.js → search-BF7QY64J.js} +6 -6
- package/dist/{status-2SOIQ3LX.js → status-JYNMLSXZ.js} +3 -3
- package/dist/{workflows-MJLEPCZY.js → workflows-TWA2GDHJ.js} +194 -176
- package/package.json +1 -1
- package/dist/buildContext-6755TRND.js +0 -14
|
@@ -4,16 +4,16 @@ import {
|
|
|
4
4
|
} from "./chunk-GGOUKACO.js";
|
|
5
5
|
import {
|
|
6
6
|
runBuildContext
|
|
7
|
-
} from "./chunk-
|
|
8
|
-
import "./chunk-
|
|
9
|
-
import "./chunk-
|
|
10
|
-
import "./chunk-
|
|
11
|
-
import "./chunk-
|
|
12
|
-
import "./chunk-
|
|
13
|
-
import "./chunk-
|
|
7
|
+
} from "./chunk-ABKGOJTC.js";
|
|
8
|
+
import "./chunk-5FXXZBZV.js";
|
|
9
|
+
import "./chunk-BF5UDEIF.js";
|
|
10
|
+
import "./chunk-GR6QXZ4J.js";
|
|
11
|
+
import "./chunk-V4ARVFRG.js";
|
|
12
|
+
import "./chunk-6MACVOTO.js";
|
|
13
|
+
import "./chunk-MSI4MDIM.js";
|
|
14
14
|
import {
|
|
15
15
|
childLogger
|
|
16
|
-
} from "./chunk-
|
|
16
|
+
} from "./chunk-3SFDFUEX.js";
|
|
17
17
|
|
|
18
18
|
// src/workflows/askCodebase.ts
|
|
19
19
|
import Anthropic from "@anthropic-ai/sdk";
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
runBuildContext
|
|
4
|
+
} from "./chunk-ABKGOJTC.js";
|
|
5
|
+
import "./chunk-5FXXZBZV.js";
|
|
6
|
+
import "./chunk-BF5UDEIF.js";
|
|
7
|
+
import "./chunk-GR6QXZ4J.js";
|
|
8
|
+
import "./chunk-V4ARVFRG.js";
|
|
9
|
+
import "./chunk-6MACVOTO.js";
|
|
10
|
+
import "./chunk-MSI4MDIM.js";
|
|
11
|
+
import "./chunk-3SFDFUEX.js";
|
|
12
|
+
export {
|
|
13
|
+
runBuildContext
|
|
14
|
+
};
|
|
@@ -64,6 +64,9 @@ var logger = pino(
|
|
|
64
64
|
function childLogger(component) {
|
|
65
65
|
return logger.child({ component });
|
|
66
66
|
}
|
|
67
|
+
function setLogLevel(level) {
|
|
68
|
+
logger.level = level;
|
|
69
|
+
}
|
|
67
70
|
|
|
68
71
|
export {
|
|
69
72
|
GLOBAL_CONFIG_DIR,
|
|
@@ -83,5 +86,6 @@ export {
|
|
|
83
86
|
DIAGNOSTIC_SEARCH_LIMIT,
|
|
84
87
|
DEFAULT_TOKEN_BUDGET,
|
|
85
88
|
FILE_HASHES_FILENAME,
|
|
86
|
-
childLogger
|
|
89
|
+
childLogger,
|
|
90
|
+
setLogLevel
|
|
87
91
|
};
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
IndexStateSchema
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-MSI4MDIM.js";
|
|
5
5
|
import {
|
|
6
6
|
DEFAULT_EMBEDDING_DIMENSION,
|
|
7
7
|
EMBEDDING_DIMENSIONS,
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
PROJECT_DATA_DIR,
|
|
10
10
|
VECTOR_INDEX_THRESHOLD,
|
|
11
11
|
childLogger
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-3SFDFUEX.js";
|
|
13
13
|
|
|
14
14
|
// src/services/lancedb.ts
|
|
15
15
|
import * as lancedb from "@lancedb/lancedb";
|
|
@@ -2,29 +2,29 @@
|
|
|
2
2
|
import {
|
|
3
3
|
assembleContext,
|
|
4
4
|
countChunkTokens
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-5FXXZBZV.js";
|
|
6
6
|
import {
|
|
7
7
|
RETRIEVAL_STRATEGIES,
|
|
8
8
|
classifyQueryIntent,
|
|
9
9
|
deduplicateChunks,
|
|
10
10
|
searchChunks
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-BF5UDEIF.js";
|
|
12
12
|
import {
|
|
13
13
|
embedBatchWithRetry
|
|
14
|
-
} from "./chunk-
|
|
14
|
+
} from "./chunk-GR6QXZ4J.js";
|
|
15
15
|
import {
|
|
16
16
|
isOllamaRunning
|
|
17
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-V4ARVFRG.js";
|
|
18
18
|
import {
|
|
19
19
|
openDatabase,
|
|
20
20
|
readIndexState
|
|
21
|
-
} from "./chunk-
|
|
21
|
+
} from "./chunk-6MACVOTO.js";
|
|
22
22
|
import {
|
|
23
23
|
readProfile
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-MSI4MDIM.js";
|
|
25
25
|
import {
|
|
26
26
|
DEFAULT_TOKEN_BUDGET
|
|
27
|
-
} from "./chunk-
|
|
27
|
+
} from "./chunk-3SFDFUEX.js";
|
|
28
28
|
|
|
29
29
|
// src/workflows/buildContext.ts
|
|
30
30
|
import { readFile } from "fs/promises";
|
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
DEFAULT_EMBEDDING_DIMENSION,
|
|
5
5
|
EMBED_TIMEOUT_MS,
|
|
6
6
|
childLogger
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-3SFDFUEX.js";
|
|
8
8
|
|
|
9
9
|
// src/services/embedder.ts
|
|
10
10
|
import ollama from "ollama";
|
|
@@ -40,7 +40,8 @@ function isContextLengthError(err) {
|
|
|
40
40
|
}
|
|
41
41
|
async function embedBatchWithRetry(model, texts, dimension = DEFAULT_EMBEDDING_DIMENSION, attempt = 0) {
|
|
42
42
|
try {
|
|
43
|
-
|
|
43
|
+
const embeddings = await embedBatch(model, texts);
|
|
44
|
+
return { embeddings, skipped: 0 };
|
|
44
45
|
} catch (err) {
|
|
45
46
|
if (attempt === 0 && isConnectionError(err)) {
|
|
46
47
|
log.warn({ model }, "Ollama cold-start suspected, retrying in 5s");
|
|
@@ -50,24 +51,21 @@ async function embedBatchWithRetry(model, texts, dimension = DEFAULT_EMBEDDING_D
|
|
|
50
51
|
if (isContextLengthError(err)) {
|
|
51
52
|
log.warn({ model, batchSize: texts.length }, "Batch exceeded context length, falling back to individual embedding");
|
|
52
53
|
const results = [];
|
|
54
|
+
let skipped = 0;
|
|
53
55
|
for (const text of texts) {
|
|
54
56
|
try {
|
|
55
57
|
const [vec] = await embedBatch(model, [text]);
|
|
56
58
|
results.push(vec);
|
|
57
59
|
} catch (innerErr) {
|
|
58
60
|
if (isContextLengthError(innerErr)) {
|
|
59
|
-
|
|
60
|
-
`
|
|
61
|
-
brain-cache: chunk too large for embedding model, skipping (${text.length} chars)
|
|
62
|
-
`
|
|
63
|
-
);
|
|
61
|
+
skipped++;
|
|
64
62
|
results.push(new Array(dimension).fill(0));
|
|
65
63
|
} else {
|
|
66
64
|
throw innerErr;
|
|
67
65
|
}
|
|
68
66
|
}
|
|
69
67
|
}
|
|
70
|
-
return results;
|
|
68
|
+
return { embeddings: results, skipped };
|
|
71
69
|
}
|
|
72
70
|
throw err;
|
|
73
71
|
}
|
package/dist/cli.js
CHANGED
|
@@ -5,23 +5,23 @@ import {
|
|
|
5
5
|
|
|
6
6
|
// src/cli/index.ts
|
|
7
7
|
import { Command } from "commander";
|
|
8
|
-
var version = "0.
|
|
8
|
+
var version = "0.3.0";
|
|
9
9
|
var program = new Command();
|
|
10
10
|
program.name("brain-cache").description("Local AI runtime \u2014 GPU cache layer for Claude").version(version);
|
|
11
11
|
program.command("init").description("Detect hardware, pull embedding model, create config directory").action(async () => {
|
|
12
|
-
const { runInit } = await import("./init-
|
|
12
|
+
const { runInit } = await import("./init-SXC4MWOR.js");
|
|
13
13
|
await runInit();
|
|
14
14
|
});
|
|
15
15
|
program.command("doctor").description("Report system health: GPU, VRAM tier, Ollama status").action(async () => {
|
|
16
|
-
const { runDoctor } = await import("./doctor-
|
|
16
|
+
const { runDoctor } = await import("./doctor-3RIVSSNB.js");
|
|
17
17
|
await runDoctor();
|
|
18
18
|
});
|
|
19
19
|
program.command("index").description("Index a codebase: parse, chunk, embed, and store in LanceDB").argument("[path]", "Directory to index (defaults to current directory)").option("-f, --force", "Force full reindex, ignoring cached file hashes").action(async (path, opts) => {
|
|
20
|
-
const { runIndex } = await import("./workflows-
|
|
20
|
+
const { runIndex } = await import("./workflows-TWA2GDHJ.js");
|
|
21
21
|
await runIndex(path, { force: opts.force });
|
|
22
22
|
});
|
|
23
23
|
program.command("search").description("Search indexed codebase with a natural language query").argument("<query>", "Natural language query string").option("-n, --limit <n>", "Maximum number of results", "10").option("-p, --path <path>", "Project root directory").action(async (query, opts) => {
|
|
24
|
-
const { runSearch } = await import("./search-
|
|
24
|
+
const { runSearch } = await import("./search-BF7QY64J.js");
|
|
25
25
|
await runSearch(query, {
|
|
26
26
|
limit: parseInt(opts.limit, 10),
|
|
27
27
|
path: opts.path
|
|
@@ -30,12 +30,12 @@ program.command("search").description("Search indexed codebase with a natural la
|
|
|
30
30
|
program.command("status").description(
|
|
31
31
|
"Show index stats: files indexed, chunks stored, last indexed time"
|
|
32
32
|
).argument("[path]", "Project root directory (defaults to current directory)").action(async (path) => {
|
|
33
|
-
const { runStatus } = await import("./status-
|
|
33
|
+
const { runStatus } = await import("./status-JYNMLSXZ.js");
|
|
34
34
|
await runStatus(path);
|
|
35
35
|
});
|
|
36
36
|
program.command("context").description("Build token-budgeted context from codebase for a query").argument("<query>", "Natural language query string").option("-n, --limit <n>", "Maximum number of search results", "10").option("-b, --budget <tokens>", "Token budget for assembled context", "4096").option("-p, --path <path>", "Project root directory").option("--raw", "Output raw JSON (MCP transport compatible)").action(
|
|
37
37
|
async (query, opts) => {
|
|
38
|
-
const { runBuildContext } = await import("./buildContext-
|
|
38
|
+
const { runBuildContext } = await import("./buildContext-JKYV7CCP.js");
|
|
39
39
|
const result = await runBuildContext(query, {
|
|
40
40
|
limit: parseInt(opts.limit, 10),
|
|
41
41
|
maxTokens: parseInt(opts.budget, 10),
|
|
@@ -61,7 +61,7 @@ ${formatTokenSavings({ tokensSent: result.metadata.tokensSent, estimatedWithout,
|
|
|
61
61
|
program.command("ask").description(
|
|
62
62
|
"Ask a natural language question about the codebase \u2014 retrieves context locally, reasons via Claude"
|
|
63
63
|
).argument("<question>", "Natural language question about the codebase").option("-b, --budget <tokens>", "Token budget for context retrieval", "4096").option("-p, --path <path>", "Project root directory").action(async (question, opts) => {
|
|
64
|
-
const { runAskCodebase } = await import("./askCodebase-
|
|
64
|
+
const { runAskCodebase } = await import("./askCodebase-DTII3Y6P.js");
|
|
65
65
|
const result = await runAskCodebase(question, {
|
|
66
66
|
path: opts.path,
|
|
67
67
|
maxContextTokens: parseInt(opts.budget, 10)
|
|
@@ -4,14 +4,14 @@ import {
|
|
|
4
4
|
isOllamaInstalled,
|
|
5
5
|
isOllamaRunning,
|
|
6
6
|
modelMatches
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-V4ARVFRG.js";
|
|
8
8
|
import {
|
|
9
9
|
detectCapabilities,
|
|
10
10
|
readProfile
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-MSI4MDIM.js";
|
|
12
12
|
import {
|
|
13
13
|
PROFILE_PATH
|
|
14
|
-
} from "./chunk-
|
|
14
|
+
} from "./chunk-3SFDFUEX.js";
|
|
15
15
|
|
|
16
16
|
// src/workflows/doctor.ts
|
|
17
17
|
import ollama from "ollama";
|
|
@@ -5,12 +5,12 @@ import {
|
|
|
5
5
|
isOllamaRunning,
|
|
6
6
|
pullModelIfMissing,
|
|
7
7
|
startOllama
|
|
8
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-V4ARVFRG.js";
|
|
9
9
|
import {
|
|
10
10
|
detectCapabilities,
|
|
11
11
|
writeProfile
|
|
12
|
-
} from "./chunk-
|
|
13
|
-
import "./chunk-
|
|
12
|
+
} from "./chunk-MSI4MDIM.js";
|
|
13
|
+
import "./chunk-3SFDFUEX.js";
|
|
14
14
|
|
|
15
15
|
// src/workflows/init.ts
|
|
16
16
|
import { existsSync, readFileSync, writeFileSync, appendFileSync } from "fs";
|
|
@@ -52,7 +52,7 @@ async function runInit() {
|
|
|
52
52
|
`brain-cache: warming model ${profileWithVersion.embeddingModel} into VRAM...
|
|
53
53
|
`
|
|
54
54
|
);
|
|
55
|
-
const { embedBatchWithRetry } = await import("./embedder-
|
|
55
|
+
const { embedBatchWithRetry } = await import("./embedder-2UG2GDQO.js");
|
|
56
56
|
await embedBatchWithRetry(profileWithVersion.embeddingModel, ["warmup"]);
|
|
57
57
|
process.stderr.write("brain-cache: model warm.\n");
|
|
58
58
|
await writeProfile(profileWithVersion);
|
|
@@ -63,6 +63,28 @@ async function runInit() {
|
|
|
63
63
|
VRAM tier: ${profileWithVersion.vramTier}
|
|
64
64
|
`
|
|
65
65
|
);
|
|
66
|
+
const brainCacheMcpEntry = {
|
|
67
|
+
command: "node",
|
|
68
|
+
args: ["node_modules/brain-cache/dist/mcp.js"]
|
|
69
|
+
};
|
|
70
|
+
const mcpJsonPath = ".mcp.json";
|
|
71
|
+
if (existsSync(mcpJsonPath)) {
|
|
72
|
+
const mcpContent = readFileSync(mcpJsonPath, "utf-8");
|
|
73
|
+
const parsed = JSON.parse(mcpContent);
|
|
74
|
+
const existing = parsed.mcpServers?.["brain-cache"];
|
|
75
|
+
if (existing && JSON.stringify(existing) === JSON.stringify(brainCacheMcpEntry)) {
|
|
76
|
+
process.stderr.write("brain-cache: .mcp.json already contains brain-cache MCP server, skipping.\n");
|
|
77
|
+
} else {
|
|
78
|
+
parsed.mcpServers = parsed.mcpServers ?? {};
|
|
79
|
+
parsed.mcpServers["brain-cache"] = brainCacheMcpEntry;
|
|
80
|
+
writeFileSync(mcpJsonPath, JSON.stringify(parsed, null, 2) + "\n");
|
|
81
|
+
process.stderr.write("brain-cache: added brain-cache MCP server to .mcp.json.\n");
|
|
82
|
+
}
|
|
83
|
+
} else {
|
|
84
|
+
const mcpConfig = { mcpServers: { "brain-cache": brainCacheMcpEntry } };
|
|
85
|
+
writeFileSync(mcpJsonPath, JSON.stringify(mcpConfig, null, 2) + "\n");
|
|
86
|
+
process.stderr.write("brain-cache: created .mcp.json with brain-cache MCP server.\n");
|
|
87
|
+
}
|
|
66
88
|
const claudeMdPath = "CLAUDE.md";
|
|
67
89
|
const brainCacheSection = `
|
|
68
90
|
## Brain-Cache MCP Tools
|
package/dist/mcp.js
CHANGED
|
@@ -43,6 +43,9 @@ var logger = pino(
|
|
|
43
43
|
function childLogger(component) {
|
|
44
44
|
return logger.child({ component });
|
|
45
45
|
}
|
|
46
|
+
function setLogLevel(level) {
|
|
47
|
+
logger.level = level;
|
|
48
|
+
}
|
|
46
49
|
|
|
47
50
|
// src/lib/format.ts
|
|
48
51
|
function formatTokenSavings(input) {
|
|
@@ -640,7 +643,8 @@ function isContextLengthError(err) {
|
|
|
640
643
|
}
|
|
641
644
|
async function embedBatchWithRetry(model, texts, dimension = DEFAULT_EMBEDDING_DIMENSION, attempt = 0) {
|
|
642
645
|
try {
|
|
643
|
-
|
|
646
|
+
const embeddings = await embedBatch(model, texts);
|
|
647
|
+
return { embeddings, skipped: 0 };
|
|
644
648
|
} catch (err) {
|
|
645
649
|
if (attempt === 0 && isConnectionError(err)) {
|
|
646
650
|
log6.warn({ model }, "Ollama cold-start suspected, retrying in 5s");
|
|
@@ -650,24 +654,21 @@ async function embedBatchWithRetry(model, texts, dimension = DEFAULT_EMBEDDING_D
|
|
|
650
654
|
if (isContextLengthError(err)) {
|
|
651
655
|
log6.warn({ model, batchSize: texts.length }, "Batch exceeded context length, falling back to individual embedding");
|
|
652
656
|
const results = [];
|
|
657
|
+
let skipped = 0;
|
|
653
658
|
for (const text of texts) {
|
|
654
659
|
try {
|
|
655
660
|
const [vec] = await embedBatch(model, [text]);
|
|
656
661
|
results.push(vec);
|
|
657
662
|
} catch (innerErr) {
|
|
658
663
|
if (isContextLengthError(innerErr)) {
|
|
659
|
-
|
|
660
|
-
`
|
|
661
|
-
brain-cache: chunk too large for embedding model, skipping (${text.length} chars)
|
|
662
|
-
`
|
|
663
|
-
);
|
|
664
|
+
skipped++;
|
|
664
665
|
results.push(new Array(dimension).fill(0));
|
|
665
666
|
} else {
|
|
666
667
|
throw innerErr;
|
|
667
668
|
}
|
|
668
669
|
}
|
|
669
670
|
}
|
|
670
|
-
return results;
|
|
671
|
+
return { embeddings: results, skipped };
|
|
671
672
|
}
|
|
672
673
|
throw err;
|
|
673
674
|
}
|
|
@@ -710,201 +711,218 @@ function hashContent(content) {
|
|
|
710
711
|
}
|
|
711
712
|
async function runIndex(targetPath, opts) {
|
|
712
713
|
const force = opts?.force ?? false;
|
|
713
|
-
const
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
714
|
+
const previousLogLevel = process.env.BRAIN_CACHE_LOG ?? "warn";
|
|
715
|
+
setLogLevel("silent");
|
|
716
|
+
const originalStderrWrite = process.stderr.write.bind(process.stderr);
|
|
717
|
+
process.stderr.write = ((chunk, ...args) => {
|
|
718
|
+
const str = typeof chunk === "string" ? chunk : chunk.toString();
|
|
719
|
+
if (/^\[[\d\-T:Z]+ WARN lance/.test(str) || /^\[[\d\-T:Z]+ INFO lance/.test(str)) {
|
|
720
|
+
return true;
|
|
721
|
+
}
|
|
722
|
+
return originalStderrWrite(chunk, ...args);
|
|
723
|
+
});
|
|
724
|
+
try {
|
|
725
|
+
const rootDir = resolve(targetPath ?? ".");
|
|
726
|
+
const profile = await readProfile();
|
|
727
|
+
if (profile === null) {
|
|
728
|
+
throw new Error("No profile found. Run 'brain-cache init' first.");
|
|
729
|
+
}
|
|
730
|
+
const running = await isOllamaRunning();
|
|
731
|
+
if (!running) {
|
|
732
|
+
throw new Error("Ollama is not running. Start it with 'ollama serve' or run 'brain-cache init'.");
|
|
733
|
+
}
|
|
734
|
+
const dim = EMBEDDING_DIMENSIONS[profile.embeddingModel] ?? DEFAULT_EMBEDDING_DIMENSION;
|
|
735
|
+
if (!(profile.embeddingModel in EMBEDDING_DIMENSIONS)) {
|
|
736
|
+
process.stderr.write(
|
|
737
|
+
`Warning: Unknown embedding model '${profile.embeddingModel}', defaulting to ${DEFAULT_EMBEDDING_DIMENSION} dimensions.
|
|
726
738
|
`
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
739
|
+
);
|
|
740
|
+
}
|
|
741
|
+
const db = await openDatabase(rootDir);
|
|
742
|
+
const table = await openOrCreateChunkTable(db, rootDir, profile.embeddingModel, dim);
|
|
743
|
+
const files = await crawlSourceFiles(rootDir);
|
|
744
|
+
process.stderr.write(`brain-cache: found ${files.length} source files
|
|
733
745
|
`);
|
|
734
|
-
|
|
735
|
-
|
|
746
|
+
if (files.length === 0) {
|
|
747
|
+
process.stderr.write(`No source files found in ${rootDir}
|
|
736
748
|
`);
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
749
|
+
return;
|
|
750
|
+
}
|
|
751
|
+
const contentMap = /* @__PURE__ */ new Map();
|
|
752
|
+
const currentHashes = {};
|
|
753
|
+
for (let groupStart = 0; groupStart < files.length; groupStart += FILE_READ_CONCURRENCY) {
|
|
754
|
+
const group = files.slice(groupStart, groupStart + FILE_READ_CONCURRENCY);
|
|
755
|
+
const results = await Promise.all(
|
|
756
|
+
group.map(async (filePath) => {
|
|
757
|
+
const content = await readFile4(filePath, "utf-8");
|
|
758
|
+
return { filePath, content, hash: hashContent(content) };
|
|
759
|
+
})
|
|
760
|
+
);
|
|
761
|
+
for (const { filePath, content, hash } of results) {
|
|
762
|
+
contentMap.set(filePath, content);
|
|
763
|
+
currentHashes[filePath] = hash;
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
const storedHashes = force ? {} : await readFileHashes(rootDir);
|
|
767
|
+
const crawledSet = new Set(files);
|
|
768
|
+
const newFiles = [];
|
|
769
|
+
const changedFiles = [];
|
|
770
|
+
const removedFiles = [];
|
|
771
|
+
const unchangedFiles = [];
|
|
772
|
+
for (const filePath of files) {
|
|
773
|
+
const currentHash = currentHashes[filePath];
|
|
774
|
+
if (!(filePath in storedHashes)) {
|
|
775
|
+
newFiles.push(filePath);
|
|
776
|
+
} else if (storedHashes[filePath] !== currentHash) {
|
|
777
|
+
changedFiles.push(filePath);
|
|
778
|
+
} else {
|
|
779
|
+
unchangedFiles.push(filePath);
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
for (const filePath of Object.keys(storedHashes)) {
|
|
783
|
+
if (!crawledSet.has(filePath)) {
|
|
784
|
+
removedFiles.push(filePath);
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
process.stderr.write(
|
|
788
|
+
`brain-cache: incremental index -- ${newFiles.length} new, ${changedFiles.length} changed, ${removedFiles.length} removed (${unchangedFiles.length} unchanged)
|
|
789
|
+
`
|
|
748
790
|
);
|
|
749
|
-
for (const
|
|
750
|
-
|
|
751
|
-
currentHashes[filePath] = hash;
|
|
791
|
+
for (const filePath of [...removedFiles, ...changedFiles]) {
|
|
792
|
+
await deleteChunksByFilePath(table, filePath);
|
|
752
793
|
}
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
const newFiles = [];
|
|
757
|
-
const changedFiles = [];
|
|
758
|
-
const removedFiles = [];
|
|
759
|
-
const unchangedFiles = [];
|
|
760
|
-
for (const filePath of files) {
|
|
761
|
-
const currentHash = currentHashes[filePath];
|
|
762
|
-
if (!(filePath in storedHashes)) {
|
|
763
|
-
newFiles.push(filePath);
|
|
764
|
-
} else if (storedHashes[filePath] !== currentHash) {
|
|
765
|
-
changedFiles.push(filePath);
|
|
766
|
-
} else {
|
|
767
|
-
unchangedFiles.push(filePath);
|
|
794
|
+
const updatedHashes = { ...storedHashes };
|
|
795
|
+
for (const filePath of removedFiles) {
|
|
796
|
+
delete updatedHashes[filePath];
|
|
768
797
|
}
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
798
|
+
const filesToProcess = [...newFiles, ...changedFiles];
|
|
799
|
+
if (filesToProcess.length === 0) {
|
|
800
|
+
process.stderr.write(`brain-cache: nothing to re-index
|
|
801
|
+
`);
|
|
802
|
+
for (const filePath of files) {
|
|
803
|
+
updatedHashes[filePath] = currentHashes[filePath];
|
|
804
|
+
}
|
|
805
|
+
await writeFileHashes(rootDir, updatedHashes);
|
|
806
|
+
const totalFiles2 = unchangedFiles.length;
|
|
807
|
+
const chunkCount2 = await table.countRows();
|
|
808
|
+
await writeIndexState(rootDir, {
|
|
809
|
+
version: 1,
|
|
810
|
+
embeddingModel: profile.embeddingModel,
|
|
811
|
+
dimension: dim,
|
|
812
|
+
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
813
|
+
fileCount: totalFiles2,
|
|
814
|
+
chunkCount: chunkCount2
|
|
815
|
+
});
|
|
816
|
+
process.stderr.write(
|
|
817
|
+
`brain-cache: indexing complete
|
|
818
|
+
Files: ${totalFiles2}
|
|
819
|
+
Chunks: ${chunkCount2}
|
|
820
|
+
Model: ${profile.embeddingModel}
|
|
821
|
+
Stored in: ${rootDir}/.brain-cache/
|
|
822
|
+
`
|
|
823
|
+
);
|
|
824
|
+
return;
|
|
773
825
|
}
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
826
|
+
let totalRawTokens = 0;
|
|
827
|
+
let totalChunkTokens = 0;
|
|
828
|
+
let totalChunks = 0;
|
|
829
|
+
let processedFiles = 0;
|
|
830
|
+
let processedChunks = 0;
|
|
831
|
+
let skippedChunks = 0;
|
|
832
|
+
for (let groupStart = 0; groupStart < filesToProcess.length; groupStart += FILE_READ_CONCURRENCY) {
|
|
833
|
+
const group = filesToProcess.slice(groupStart, groupStart + FILE_READ_CONCURRENCY);
|
|
834
|
+
const groupChunks = [];
|
|
835
|
+
for (const filePath of group) {
|
|
836
|
+
const content = contentMap.get(filePath);
|
|
837
|
+
totalRawTokens += countChunkTokens(content);
|
|
838
|
+
const chunks = chunkFile(filePath, content);
|
|
839
|
+
groupChunks.push(...chunks);
|
|
840
|
+
}
|
|
841
|
+
processedFiles += group.length;
|
|
842
|
+
totalChunks += groupChunks.length;
|
|
843
|
+
if (processedFiles % 10 === 0 || groupStart + FILE_READ_CONCURRENCY >= filesToProcess.length) {
|
|
844
|
+
process.stderr.write(`brain-cache: chunked ${processedFiles}/${filesToProcess.length} files
|
|
845
|
+
`);
|
|
846
|
+
}
|
|
847
|
+
for (let offset = 0; offset < groupChunks.length; offset += DEFAULT_BATCH_SIZE) {
|
|
848
|
+
const batch = groupChunks.slice(offset, offset + DEFAULT_BATCH_SIZE);
|
|
849
|
+
const embeddableBatch = batch.filter((chunk) => {
|
|
850
|
+
const tokens = countChunkTokens(chunk.content);
|
|
851
|
+
if (tokens > EMBED_MAX_TOKENS) {
|
|
852
|
+
skippedChunks++;
|
|
853
|
+
return false;
|
|
854
|
+
}
|
|
855
|
+
return true;
|
|
856
|
+
});
|
|
857
|
+
if (embeddableBatch.length === 0) continue;
|
|
858
|
+
const texts = embeddableBatch.map((chunk) => chunk.content);
|
|
859
|
+
totalChunkTokens += texts.reduce((sum, t) => sum + countChunkTokens(t), 0);
|
|
860
|
+
const { embeddings: vectors, skipped } = await embedBatchWithRetry(profile.embeddingModel, texts, dim);
|
|
861
|
+
skippedChunks += skipped;
|
|
862
|
+
const rows = embeddableBatch.map((chunk, i) => ({
|
|
863
|
+
id: chunk.id,
|
|
864
|
+
file_path: chunk.filePath,
|
|
865
|
+
chunk_type: chunk.chunkType,
|
|
866
|
+
scope: chunk.scope,
|
|
867
|
+
name: chunk.name,
|
|
868
|
+
content: chunk.content,
|
|
869
|
+
start_line: chunk.startLine,
|
|
870
|
+
end_line: chunk.endLine,
|
|
871
|
+
vector: vectors[i]
|
|
872
|
+
}));
|
|
873
|
+
await insertChunks(table, rows);
|
|
874
|
+
processedChunks += batch.length;
|
|
875
|
+
process.stderr.write(
|
|
876
|
+
`brain-cache: embedding ${processedChunks}/${totalChunks} chunks (${Math.round(processedChunks / totalChunks * 100)}%)
|
|
777
877
|
`
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
for (const filePath of removedFiles) {
|
|
784
|
-
delete updatedHashes[filePath];
|
|
785
|
-
}
|
|
786
|
-
const filesToProcess = [...newFiles, ...changedFiles];
|
|
787
|
-
if (filesToProcess.length === 0) {
|
|
788
|
-
process.stderr.write(`brain-cache: nothing to re-index
|
|
878
|
+
);
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
if (skippedChunks > 0) {
|
|
882
|
+
process.stderr.write(`brain-cache: ${skippedChunks} chunks skipped (too large for model context)
|
|
789
883
|
`);
|
|
790
|
-
|
|
884
|
+
}
|
|
885
|
+
process.stderr.write(
|
|
886
|
+
`brain-cache: ${totalChunks} chunks from ${filesToProcess.length} files
|
|
887
|
+
`
|
|
888
|
+
);
|
|
889
|
+
await createVectorIndexIfNeeded(table, profile.embeddingModel);
|
|
890
|
+
for (const filePath of filesToProcess) {
|
|
891
|
+
updatedHashes[filePath] = currentHashes[filePath];
|
|
892
|
+
}
|
|
893
|
+
for (const filePath of unchangedFiles) {
|
|
791
894
|
updatedHashes[filePath] = currentHashes[filePath];
|
|
792
895
|
}
|
|
793
896
|
await writeFileHashes(rootDir, updatedHashes);
|
|
794
|
-
const
|
|
795
|
-
const
|
|
897
|
+
const totalFiles = files.length;
|
|
898
|
+
const chunkCount = await table.countRows();
|
|
796
899
|
await writeIndexState(rootDir, {
|
|
797
900
|
version: 1,
|
|
798
901
|
embeddingModel: profile.embeddingModel,
|
|
799
902
|
dimension: dim,
|
|
800
903
|
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
801
|
-
fileCount:
|
|
802
|
-
chunkCount
|
|
904
|
+
fileCount: totalFiles,
|
|
905
|
+
chunkCount
|
|
803
906
|
});
|
|
907
|
+
const reductionPct = totalRawTokens > 0 ? Math.round((1 - totalChunkTokens / totalRawTokens) * 100) : 0;
|
|
908
|
+
const savingsBlock = formatTokenSavings({
|
|
909
|
+
tokensSent: totalChunkTokens,
|
|
910
|
+
estimatedWithout: totalRawTokens,
|
|
911
|
+
reductionPct
|
|
912
|
+
}).split("\n").map((line) => ` ${line}`).join("\n");
|
|
804
913
|
process.stderr.write(
|
|
805
914
|
`brain-cache: indexing complete
|
|
806
|
-
Files: ${totalFiles2}
|
|
807
|
-
Chunks: ${chunkCount2}
|
|
808
|
-
Model: ${profile.embeddingModel}
|
|
809
|
-
Stored in: ${rootDir}/.brain-cache/
|
|
810
|
-
`
|
|
811
|
-
);
|
|
812
|
-
return;
|
|
813
|
-
}
|
|
814
|
-
let totalRawTokens = 0;
|
|
815
|
-
let totalChunkTokens = 0;
|
|
816
|
-
let totalChunks = 0;
|
|
817
|
-
let processedFiles = 0;
|
|
818
|
-
let processedChunks = 0;
|
|
819
|
-
for (let groupStart = 0; groupStart < filesToProcess.length; groupStart += FILE_READ_CONCURRENCY) {
|
|
820
|
-
const group = filesToProcess.slice(groupStart, groupStart + FILE_READ_CONCURRENCY);
|
|
821
|
-
const groupChunks = [];
|
|
822
|
-
for (const filePath of group) {
|
|
823
|
-
const content = contentMap.get(filePath);
|
|
824
|
-
totalRawTokens += countChunkTokens(content);
|
|
825
|
-
const chunks = chunkFile(filePath, content);
|
|
826
|
-
groupChunks.push(...chunks);
|
|
827
|
-
}
|
|
828
|
-
processedFiles += group.length;
|
|
829
|
-
totalChunks += groupChunks.length;
|
|
830
|
-
if (processedFiles % 10 === 0 || groupStart + FILE_READ_CONCURRENCY >= filesToProcess.length) {
|
|
831
|
-
process.stderr.write(`brain-cache: chunked ${processedFiles}/${filesToProcess.length} files
|
|
832
|
-
`);
|
|
833
|
-
}
|
|
834
|
-
for (let offset = 0; offset < groupChunks.length; offset += DEFAULT_BATCH_SIZE) {
|
|
835
|
-
const batch = groupChunks.slice(offset, offset + DEFAULT_BATCH_SIZE);
|
|
836
|
-
const embeddableBatch = batch.filter((chunk) => {
|
|
837
|
-
const tokens = countChunkTokens(chunk.content);
|
|
838
|
-
if (tokens > EMBED_MAX_TOKENS) {
|
|
839
|
-
process.stderr.write(
|
|
840
|
-
`
|
|
841
|
-
brain-cache: skipping oversized chunk (${tokens} tokens > ${EMBED_MAX_TOKENS} limit): ${chunk.filePath} lines ${chunk.startLine}-${chunk.endLine}
|
|
842
|
-
`
|
|
843
|
-
);
|
|
844
|
-
return false;
|
|
845
|
-
}
|
|
846
|
-
return true;
|
|
847
|
-
});
|
|
848
|
-
if (embeddableBatch.length === 0) continue;
|
|
849
|
-
const texts = embeddableBatch.map((chunk) => chunk.content);
|
|
850
|
-
totalChunkTokens += texts.reduce((sum, t) => sum + countChunkTokens(t), 0);
|
|
851
|
-
const vectors = await embedBatchWithRetry(profile.embeddingModel, texts, dim);
|
|
852
|
-
const rows = embeddableBatch.map((chunk, i) => ({
|
|
853
|
-
id: chunk.id,
|
|
854
|
-
file_path: chunk.filePath,
|
|
855
|
-
chunk_type: chunk.chunkType,
|
|
856
|
-
scope: chunk.scope,
|
|
857
|
-
name: chunk.name,
|
|
858
|
-
content: chunk.content,
|
|
859
|
-
start_line: chunk.startLine,
|
|
860
|
-
end_line: chunk.endLine,
|
|
861
|
-
vector: vectors[i]
|
|
862
|
-
}));
|
|
863
|
-
await insertChunks(table, rows);
|
|
864
|
-
processedChunks += batch.length;
|
|
865
|
-
process.stderr.write(
|
|
866
|
-
`\rbrain-cache: embedding ${processedChunks}/${totalChunks} chunks (${Math.round(processedChunks / totalChunks * 100)}%)`
|
|
867
|
-
);
|
|
868
|
-
}
|
|
869
|
-
}
|
|
870
|
-
process.stderr.write("\n");
|
|
871
|
-
process.stderr.write(
|
|
872
|
-
`brain-cache: ${totalChunks} chunks from ${filesToProcess.length} files
|
|
873
|
-
`
|
|
874
|
-
);
|
|
875
|
-
await createVectorIndexIfNeeded(table, profile.embeddingModel);
|
|
876
|
-
for (const filePath of filesToProcess) {
|
|
877
|
-
updatedHashes[filePath] = currentHashes[filePath];
|
|
878
|
-
}
|
|
879
|
-
for (const filePath of unchangedFiles) {
|
|
880
|
-
updatedHashes[filePath] = currentHashes[filePath];
|
|
881
|
-
}
|
|
882
|
-
await writeFileHashes(rootDir, updatedHashes);
|
|
883
|
-
const totalFiles = files.length;
|
|
884
|
-
const chunkCount = await table.countRows();
|
|
885
|
-
await writeIndexState(rootDir, {
|
|
886
|
-
version: 1,
|
|
887
|
-
embeddingModel: profile.embeddingModel,
|
|
888
|
-
dimension: dim,
|
|
889
|
-
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
890
|
-
fileCount: totalFiles,
|
|
891
|
-
chunkCount
|
|
892
|
-
});
|
|
893
|
-
const reductionPct = totalRawTokens > 0 ? Math.round((1 - totalChunkTokens / totalRawTokens) * 100) : 0;
|
|
894
|
-
const savingsBlock = formatTokenSavings({
|
|
895
|
-
tokensSent: totalChunkTokens,
|
|
896
|
-
estimatedWithout: totalRawTokens,
|
|
897
|
-
reductionPct
|
|
898
|
-
}).split("\n").map((line) => ` ${line}`).join("\n");
|
|
899
|
-
process.stderr.write(
|
|
900
|
-
`brain-cache: indexing complete
|
|
901
915
|
Files: ${totalFiles}
|
|
902
916
|
Chunks: ${totalChunks}
|
|
903
917
|
Model: ${profile.embeddingModel}
|
|
904
918
|
${savingsBlock}
|
|
905
919
|
Stored in: ${rootDir}/.brain-cache/
|
|
906
920
|
`
|
|
907
|
-
|
|
921
|
+
);
|
|
922
|
+
} finally {
|
|
923
|
+
setLogLevel(previousLogLevel);
|
|
924
|
+
process.stderr.write = originalStderrWrite;
|
|
925
|
+
}
|
|
908
926
|
}
|
|
909
927
|
|
|
910
928
|
// src/workflows/search.ts
|
|
@@ -1128,7 +1146,7 @@ async function runBuildContext(query, opts) {
|
|
|
1128
1146
|
}
|
|
1129
1147
|
|
|
1130
1148
|
// src/mcp/index.ts
|
|
1131
|
-
var version = "0.
|
|
1149
|
+
var version = "0.3.0";
|
|
1132
1150
|
var log9 = childLogger("mcp");
|
|
1133
1151
|
var server = new McpServer({ name: "brain-cache", version });
|
|
1134
1152
|
server.registerTool(
|
|
@@ -4,21 +4,21 @@ import {
|
|
|
4
4
|
classifyQueryIntent,
|
|
5
5
|
deduplicateChunks,
|
|
6
6
|
searchChunks
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-BF5UDEIF.js";
|
|
8
8
|
import {
|
|
9
9
|
embedBatchWithRetry
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-GR6QXZ4J.js";
|
|
11
11
|
import {
|
|
12
12
|
isOllamaRunning
|
|
13
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-V4ARVFRG.js";
|
|
14
14
|
import {
|
|
15
15
|
openDatabase,
|
|
16
16
|
readIndexState
|
|
17
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-6MACVOTO.js";
|
|
18
18
|
import {
|
|
19
19
|
readProfile
|
|
20
|
-
} from "./chunk-
|
|
21
|
-
import "./chunk-
|
|
20
|
+
} from "./chunk-MSI4MDIM.js";
|
|
21
|
+
import "./chunk-3SFDFUEX.js";
|
|
22
22
|
|
|
23
23
|
// src/workflows/search.ts
|
|
24
24
|
import { resolve } from "path";
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
readIndexState
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-6MACVOTO.js";
|
|
5
5
|
import {
|
|
6
6
|
readProfile
|
|
7
|
-
} from "./chunk-
|
|
8
|
-
import "./chunk-
|
|
7
|
+
} from "./chunk-MSI4MDIM.js";
|
|
8
|
+
import "./chunk-3SFDFUEX.js";
|
|
9
9
|
|
|
10
10
|
// src/workflows/status.ts
|
|
11
11
|
import { resolve } from "path";
|
|
@@ -4,13 +4,13 @@ import {
|
|
|
4
4
|
} from "./chunk-GGOUKACO.js";
|
|
5
5
|
import {
|
|
6
6
|
countChunkTokens
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-5FXXZBZV.js";
|
|
8
8
|
import {
|
|
9
9
|
embedBatchWithRetry
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-GR6QXZ4J.js";
|
|
11
11
|
import {
|
|
12
12
|
isOllamaRunning
|
|
13
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-V4ARVFRG.js";
|
|
14
14
|
import {
|
|
15
15
|
createVectorIndexIfNeeded,
|
|
16
16
|
deleteChunksByFilePath,
|
|
@@ -20,18 +20,19 @@ import {
|
|
|
20
20
|
readFileHashes,
|
|
21
21
|
writeFileHashes,
|
|
22
22
|
writeIndexState
|
|
23
|
-
} from "./chunk-
|
|
23
|
+
} from "./chunk-6MACVOTO.js";
|
|
24
24
|
import {
|
|
25
25
|
readProfile
|
|
26
|
-
} from "./chunk-
|
|
26
|
+
} from "./chunk-MSI4MDIM.js";
|
|
27
27
|
import {
|
|
28
28
|
DEFAULT_BATCH_SIZE,
|
|
29
29
|
DEFAULT_EMBEDDING_DIMENSION,
|
|
30
30
|
EMBEDDING_DIMENSIONS,
|
|
31
31
|
EMBED_MAX_TOKENS,
|
|
32
32
|
FILE_READ_CONCURRENCY,
|
|
33
|
-
childLogger
|
|
34
|
-
|
|
33
|
+
childLogger,
|
|
34
|
+
setLogLevel
|
|
35
|
+
} from "./chunk-3SFDFUEX.js";
|
|
35
36
|
|
|
36
37
|
// src/workflows/index.ts
|
|
37
38
|
import { resolve } from "path";
|
|
@@ -259,201 +260,218 @@ function hashContent(content) {
|
|
|
259
260
|
}
|
|
260
261
|
async function runIndex(targetPath, opts) {
|
|
261
262
|
const force = opts?.force ?? false;
|
|
262
|
-
const
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
263
|
+
const previousLogLevel = process.env.BRAIN_CACHE_LOG ?? "warn";
|
|
264
|
+
setLogLevel("silent");
|
|
265
|
+
const originalStderrWrite = process.stderr.write.bind(process.stderr);
|
|
266
|
+
process.stderr.write = ((chunk, ...args) => {
|
|
267
|
+
const str = typeof chunk === "string" ? chunk : chunk.toString();
|
|
268
|
+
if (/^\[[\d\-T:Z]+ WARN lance/.test(str) || /^\[[\d\-T:Z]+ INFO lance/.test(str)) {
|
|
269
|
+
return true;
|
|
270
|
+
}
|
|
271
|
+
return originalStderrWrite(chunk, ...args);
|
|
272
|
+
});
|
|
273
|
+
try {
|
|
274
|
+
const rootDir = resolve(targetPath ?? ".");
|
|
275
|
+
const profile = await readProfile();
|
|
276
|
+
if (profile === null) {
|
|
277
|
+
throw new Error("No profile found. Run 'brain-cache init' first.");
|
|
278
|
+
}
|
|
279
|
+
const running = await isOllamaRunning();
|
|
280
|
+
if (!running) {
|
|
281
|
+
throw new Error("Ollama is not running. Start it with 'ollama serve' or run 'brain-cache init'.");
|
|
282
|
+
}
|
|
283
|
+
const dim = EMBEDDING_DIMENSIONS[profile.embeddingModel] ?? DEFAULT_EMBEDDING_DIMENSION;
|
|
284
|
+
if (!(profile.embeddingModel in EMBEDDING_DIMENSIONS)) {
|
|
285
|
+
process.stderr.write(
|
|
286
|
+
`Warning: Unknown embedding model '${profile.embeddingModel}', defaulting to ${DEFAULT_EMBEDDING_DIMENSION} dimensions.
|
|
275
287
|
`
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
288
|
+
);
|
|
289
|
+
}
|
|
290
|
+
const db = await openDatabase(rootDir);
|
|
291
|
+
const table = await openOrCreateChunkTable(db, rootDir, profile.embeddingModel, dim);
|
|
292
|
+
const files = await crawlSourceFiles(rootDir);
|
|
293
|
+
process.stderr.write(`brain-cache: found ${files.length} source files
|
|
282
294
|
`);
|
|
283
|
-
|
|
284
|
-
|
|
295
|
+
if (files.length === 0) {
|
|
296
|
+
process.stderr.write(`No source files found in ${rootDir}
|
|
285
297
|
`);
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
298
|
+
return;
|
|
299
|
+
}
|
|
300
|
+
const contentMap = /* @__PURE__ */ new Map();
|
|
301
|
+
const currentHashes = {};
|
|
302
|
+
for (let groupStart = 0; groupStart < files.length; groupStart += FILE_READ_CONCURRENCY) {
|
|
303
|
+
const group = files.slice(groupStart, groupStart + FILE_READ_CONCURRENCY);
|
|
304
|
+
const results = await Promise.all(
|
|
305
|
+
group.map(async (filePath) => {
|
|
306
|
+
const content = await readFile2(filePath, "utf-8");
|
|
307
|
+
return { filePath, content, hash: hashContent(content) };
|
|
308
|
+
})
|
|
309
|
+
);
|
|
310
|
+
for (const { filePath, content, hash } of results) {
|
|
311
|
+
contentMap.set(filePath, content);
|
|
312
|
+
currentHashes[filePath] = hash;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
const storedHashes = force ? {} : await readFileHashes(rootDir);
|
|
316
|
+
const crawledSet = new Set(files);
|
|
317
|
+
const newFiles = [];
|
|
318
|
+
const changedFiles = [];
|
|
319
|
+
const removedFiles = [];
|
|
320
|
+
const unchangedFiles = [];
|
|
321
|
+
for (const filePath of files) {
|
|
322
|
+
const currentHash = currentHashes[filePath];
|
|
323
|
+
if (!(filePath in storedHashes)) {
|
|
324
|
+
newFiles.push(filePath);
|
|
325
|
+
} else if (storedHashes[filePath] !== currentHash) {
|
|
326
|
+
changedFiles.push(filePath);
|
|
327
|
+
} else {
|
|
328
|
+
unchangedFiles.push(filePath);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
for (const filePath of Object.keys(storedHashes)) {
|
|
332
|
+
if (!crawledSet.has(filePath)) {
|
|
333
|
+
removedFiles.push(filePath);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
process.stderr.write(
|
|
337
|
+
`brain-cache: incremental index -- ${newFiles.length} new, ${changedFiles.length} changed, ${removedFiles.length} removed (${unchangedFiles.length} unchanged)
|
|
338
|
+
`
|
|
297
339
|
);
|
|
298
|
-
for (const
|
|
299
|
-
|
|
300
|
-
currentHashes[filePath] = hash;
|
|
340
|
+
for (const filePath of [...removedFiles, ...changedFiles]) {
|
|
341
|
+
await deleteChunksByFilePath(table, filePath);
|
|
301
342
|
}
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
const newFiles = [];
|
|
306
|
-
const changedFiles = [];
|
|
307
|
-
const removedFiles = [];
|
|
308
|
-
const unchangedFiles = [];
|
|
309
|
-
for (const filePath of files) {
|
|
310
|
-
const currentHash = currentHashes[filePath];
|
|
311
|
-
if (!(filePath in storedHashes)) {
|
|
312
|
-
newFiles.push(filePath);
|
|
313
|
-
} else if (storedHashes[filePath] !== currentHash) {
|
|
314
|
-
changedFiles.push(filePath);
|
|
315
|
-
} else {
|
|
316
|
-
unchangedFiles.push(filePath);
|
|
343
|
+
const updatedHashes = { ...storedHashes };
|
|
344
|
+
for (const filePath of removedFiles) {
|
|
345
|
+
delete updatedHashes[filePath];
|
|
317
346
|
}
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
347
|
+
const filesToProcess = [...newFiles, ...changedFiles];
|
|
348
|
+
if (filesToProcess.length === 0) {
|
|
349
|
+
process.stderr.write(`brain-cache: nothing to re-index
|
|
350
|
+
`);
|
|
351
|
+
for (const filePath of files) {
|
|
352
|
+
updatedHashes[filePath] = currentHashes[filePath];
|
|
353
|
+
}
|
|
354
|
+
await writeFileHashes(rootDir, updatedHashes);
|
|
355
|
+
const totalFiles2 = unchangedFiles.length;
|
|
356
|
+
const chunkCount2 = await table.countRows();
|
|
357
|
+
await writeIndexState(rootDir, {
|
|
358
|
+
version: 1,
|
|
359
|
+
embeddingModel: profile.embeddingModel,
|
|
360
|
+
dimension: dim,
|
|
361
|
+
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
362
|
+
fileCount: totalFiles2,
|
|
363
|
+
chunkCount: chunkCount2
|
|
364
|
+
});
|
|
365
|
+
process.stderr.write(
|
|
366
|
+
`brain-cache: indexing complete
|
|
367
|
+
Files: ${totalFiles2}
|
|
368
|
+
Chunks: ${chunkCount2}
|
|
369
|
+
Model: ${profile.embeddingModel}
|
|
370
|
+
Stored in: ${rootDir}/.brain-cache/
|
|
371
|
+
`
|
|
372
|
+
);
|
|
373
|
+
return;
|
|
322
374
|
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
375
|
+
let totalRawTokens = 0;
|
|
376
|
+
let totalChunkTokens = 0;
|
|
377
|
+
let totalChunks = 0;
|
|
378
|
+
let processedFiles = 0;
|
|
379
|
+
let processedChunks = 0;
|
|
380
|
+
let skippedChunks = 0;
|
|
381
|
+
for (let groupStart = 0; groupStart < filesToProcess.length; groupStart += FILE_READ_CONCURRENCY) {
|
|
382
|
+
const group = filesToProcess.slice(groupStart, groupStart + FILE_READ_CONCURRENCY);
|
|
383
|
+
const groupChunks = [];
|
|
384
|
+
for (const filePath of group) {
|
|
385
|
+
const content = contentMap.get(filePath);
|
|
386
|
+
totalRawTokens += countChunkTokens(content);
|
|
387
|
+
const chunks = chunkFile(filePath, content);
|
|
388
|
+
groupChunks.push(...chunks);
|
|
389
|
+
}
|
|
390
|
+
processedFiles += group.length;
|
|
391
|
+
totalChunks += groupChunks.length;
|
|
392
|
+
if (processedFiles % 10 === 0 || groupStart + FILE_READ_CONCURRENCY >= filesToProcess.length) {
|
|
393
|
+
process.stderr.write(`brain-cache: chunked ${processedFiles}/${filesToProcess.length} files
|
|
394
|
+
`);
|
|
395
|
+
}
|
|
396
|
+
for (let offset = 0; offset < groupChunks.length; offset += DEFAULT_BATCH_SIZE) {
|
|
397
|
+
const batch = groupChunks.slice(offset, offset + DEFAULT_BATCH_SIZE);
|
|
398
|
+
const embeddableBatch = batch.filter((chunk) => {
|
|
399
|
+
const tokens = countChunkTokens(chunk.content);
|
|
400
|
+
if (tokens > EMBED_MAX_TOKENS) {
|
|
401
|
+
skippedChunks++;
|
|
402
|
+
return false;
|
|
403
|
+
}
|
|
404
|
+
return true;
|
|
405
|
+
});
|
|
406
|
+
if (embeddableBatch.length === 0) continue;
|
|
407
|
+
const texts = embeddableBatch.map((chunk) => chunk.content);
|
|
408
|
+
totalChunkTokens += texts.reduce((sum, t) => sum + countChunkTokens(t), 0);
|
|
409
|
+
const { embeddings: vectors, skipped } = await embedBatchWithRetry(profile.embeddingModel, texts, dim);
|
|
410
|
+
skippedChunks += skipped;
|
|
411
|
+
const rows = embeddableBatch.map((chunk, i) => ({
|
|
412
|
+
id: chunk.id,
|
|
413
|
+
file_path: chunk.filePath,
|
|
414
|
+
chunk_type: chunk.chunkType,
|
|
415
|
+
scope: chunk.scope,
|
|
416
|
+
name: chunk.name,
|
|
417
|
+
content: chunk.content,
|
|
418
|
+
start_line: chunk.startLine,
|
|
419
|
+
end_line: chunk.endLine,
|
|
420
|
+
vector: vectors[i]
|
|
421
|
+
}));
|
|
422
|
+
await insertChunks(table, rows);
|
|
423
|
+
processedChunks += batch.length;
|
|
424
|
+
process.stderr.write(
|
|
425
|
+
`brain-cache: embedding ${processedChunks}/${totalChunks} chunks (${Math.round(processedChunks / totalChunks * 100)}%)
|
|
326
426
|
`
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
for (const filePath of removedFiles) {
|
|
333
|
-
delete updatedHashes[filePath];
|
|
334
|
-
}
|
|
335
|
-
const filesToProcess = [...newFiles, ...changedFiles];
|
|
336
|
-
if (filesToProcess.length === 0) {
|
|
337
|
-
process.stderr.write(`brain-cache: nothing to re-index
|
|
427
|
+
);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
if (skippedChunks > 0) {
|
|
431
|
+
process.stderr.write(`brain-cache: ${skippedChunks} chunks skipped (too large for model context)
|
|
338
432
|
`);
|
|
339
|
-
|
|
433
|
+
}
|
|
434
|
+
process.stderr.write(
|
|
435
|
+
`brain-cache: ${totalChunks} chunks from ${filesToProcess.length} files
|
|
436
|
+
`
|
|
437
|
+
);
|
|
438
|
+
await createVectorIndexIfNeeded(table, profile.embeddingModel);
|
|
439
|
+
for (const filePath of filesToProcess) {
|
|
440
|
+
updatedHashes[filePath] = currentHashes[filePath];
|
|
441
|
+
}
|
|
442
|
+
for (const filePath of unchangedFiles) {
|
|
340
443
|
updatedHashes[filePath] = currentHashes[filePath];
|
|
341
444
|
}
|
|
342
445
|
await writeFileHashes(rootDir, updatedHashes);
|
|
343
|
-
const
|
|
344
|
-
const
|
|
446
|
+
const totalFiles = files.length;
|
|
447
|
+
const chunkCount = await table.countRows();
|
|
345
448
|
await writeIndexState(rootDir, {
|
|
346
449
|
version: 1,
|
|
347
450
|
embeddingModel: profile.embeddingModel,
|
|
348
451
|
dimension: dim,
|
|
349
452
|
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
350
|
-
fileCount:
|
|
351
|
-
chunkCount
|
|
453
|
+
fileCount: totalFiles,
|
|
454
|
+
chunkCount
|
|
352
455
|
});
|
|
456
|
+
const reductionPct = totalRawTokens > 0 ? Math.round((1 - totalChunkTokens / totalRawTokens) * 100) : 0;
|
|
457
|
+
const savingsBlock = formatTokenSavings({
|
|
458
|
+
tokensSent: totalChunkTokens,
|
|
459
|
+
estimatedWithout: totalRawTokens,
|
|
460
|
+
reductionPct
|
|
461
|
+
}).split("\n").map((line) => ` ${line}`).join("\n");
|
|
353
462
|
process.stderr.write(
|
|
354
463
|
`brain-cache: indexing complete
|
|
355
|
-
Files: ${totalFiles2}
|
|
356
|
-
Chunks: ${chunkCount2}
|
|
357
|
-
Model: ${profile.embeddingModel}
|
|
358
|
-
Stored in: ${rootDir}/.brain-cache/
|
|
359
|
-
`
|
|
360
|
-
);
|
|
361
|
-
return;
|
|
362
|
-
}
|
|
363
|
-
let totalRawTokens = 0;
|
|
364
|
-
let totalChunkTokens = 0;
|
|
365
|
-
let totalChunks = 0;
|
|
366
|
-
let processedFiles = 0;
|
|
367
|
-
let processedChunks = 0;
|
|
368
|
-
for (let groupStart = 0; groupStart < filesToProcess.length; groupStart += FILE_READ_CONCURRENCY) {
|
|
369
|
-
const group = filesToProcess.slice(groupStart, groupStart + FILE_READ_CONCURRENCY);
|
|
370
|
-
const groupChunks = [];
|
|
371
|
-
for (const filePath of group) {
|
|
372
|
-
const content = contentMap.get(filePath);
|
|
373
|
-
totalRawTokens += countChunkTokens(content);
|
|
374
|
-
const chunks = chunkFile(filePath, content);
|
|
375
|
-
groupChunks.push(...chunks);
|
|
376
|
-
}
|
|
377
|
-
processedFiles += group.length;
|
|
378
|
-
totalChunks += groupChunks.length;
|
|
379
|
-
if (processedFiles % 10 === 0 || groupStart + FILE_READ_CONCURRENCY >= filesToProcess.length) {
|
|
380
|
-
process.stderr.write(`brain-cache: chunked ${processedFiles}/${filesToProcess.length} files
|
|
381
|
-
`);
|
|
382
|
-
}
|
|
383
|
-
for (let offset = 0; offset < groupChunks.length; offset += DEFAULT_BATCH_SIZE) {
|
|
384
|
-
const batch = groupChunks.slice(offset, offset + DEFAULT_BATCH_SIZE);
|
|
385
|
-
const embeddableBatch = batch.filter((chunk) => {
|
|
386
|
-
const tokens = countChunkTokens(chunk.content);
|
|
387
|
-
if (tokens > EMBED_MAX_TOKENS) {
|
|
388
|
-
process.stderr.write(
|
|
389
|
-
`
|
|
390
|
-
brain-cache: skipping oversized chunk (${tokens} tokens > ${EMBED_MAX_TOKENS} limit): ${chunk.filePath} lines ${chunk.startLine}-${chunk.endLine}
|
|
391
|
-
`
|
|
392
|
-
);
|
|
393
|
-
return false;
|
|
394
|
-
}
|
|
395
|
-
return true;
|
|
396
|
-
});
|
|
397
|
-
if (embeddableBatch.length === 0) continue;
|
|
398
|
-
const texts = embeddableBatch.map((chunk) => chunk.content);
|
|
399
|
-
totalChunkTokens += texts.reduce((sum, t) => sum + countChunkTokens(t), 0);
|
|
400
|
-
const vectors = await embedBatchWithRetry(profile.embeddingModel, texts, dim);
|
|
401
|
-
const rows = embeddableBatch.map((chunk, i) => ({
|
|
402
|
-
id: chunk.id,
|
|
403
|
-
file_path: chunk.filePath,
|
|
404
|
-
chunk_type: chunk.chunkType,
|
|
405
|
-
scope: chunk.scope,
|
|
406
|
-
name: chunk.name,
|
|
407
|
-
content: chunk.content,
|
|
408
|
-
start_line: chunk.startLine,
|
|
409
|
-
end_line: chunk.endLine,
|
|
410
|
-
vector: vectors[i]
|
|
411
|
-
}));
|
|
412
|
-
await insertChunks(table, rows);
|
|
413
|
-
processedChunks += batch.length;
|
|
414
|
-
process.stderr.write(
|
|
415
|
-
`\rbrain-cache: embedding ${processedChunks}/${totalChunks} chunks (${Math.round(processedChunks / totalChunks * 100)}%)`
|
|
416
|
-
);
|
|
417
|
-
}
|
|
418
|
-
}
|
|
419
|
-
process.stderr.write("\n");
|
|
420
|
-
process.stderr.write(
|
|
421
|
-
`brain-cache: ${totalChunks} chunks from ${filesToProcess.length} files
|
|
422
|
-
`
|
|
423
|
-
);
|
|
424
|
-
await createVectorIndexIfNeeded(table, profile.embeddingModel);
|
|
425
|
-
for (const filePath of filesToProcess) {
|
|
426
|
-
updatedHashes[filePath] = currentHashes[filePath];
|
|
427
|
-
}
|
|
428
|
-
for (const filePath of unchangedFiles) {
|
|
429
|
-
updatedHashes[filePath] = currentHashes[filePath];
|
|
430
|
-
}
|
|
431
|
-
await writeFileHashes(rootDir, updatedHashes);
|
|
432
|
-
const totalFiles = files.length;
|
|
433
|
-
const chunkCount = await table.countRows();
|
|
434
|
-
await writeIndexState(rootDir, {
|
|
435
|
-
version: 1,
|
|
436
|
-
embeddingModel: profile.embeddingModel,
|
|
437
|
-
dimension: dim,
|
|
438
|
-
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
439
|
-
fileCount: totalFiles,
|
|
440
|
-
chunkCount
|
|
441
|
-
});
|
|
442
|
-
const reductionPct = totalRawTokens > 0 ? Math.round((1 - totalChunkTokens / totalRawTokens) * 100) : 0;
|
|
443
|
-
const savingsBlock = formatTokenSavings({
|
|
444
|
-
tokensSent: totalChunkTokens,
|
|
445
|
-
estimatedWithout: totalRawTokens,
|
|
446
|
-
reductionPct
|
|
447
|
-
}).split("\n").map((line) => ` ${line}`).join("\n");
|
|
448
|
-
process.stderr.write(
|
|
449
|
-
`brain-cache: indexing complete
|
|
450
464
|
Files: ${totalFiles}
|
|
451
465
|
Chunks: ${totalChunks}
|
|
452
466
|
Model: ${profile.embeddingModel}
|
|
453
467
|
${savingsBlock}
|
|
454
468
|
Stored in: ${rootDir}/.brain-cache/
|
|
455
469
|
`
|
|
456
|
-
|
|
470
|
+
);
|
|
471
|
+
} finally {
|
|
472
|
+
setLogLevel(previousLogLevel);
|
|
473
|
+
process.stderr.write = originalStderrWrite;
|
|
474
|
+
}
|
|
457
475
|
}
|
|
458
476
|
export {
|
|
459
477
|
runIndex
|
package/package.json
CHANGED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
import {
|
|
3
|
-
runBuildContext
|
|
4
|
-
} from "./chunk-7JLSJNKU.js";
|
|
5
|
-
import "./chunk-OKWMQNH6.js";
|
|
6
|
-
import "./chunk-ZLB4VJQK.js";
|
|
7
|
-
import "./chunk-WCNMLSL2.js";
|
|
8
|
-
import "./chunk-P7WSTGLE.js";
|
|
9
|
-
import "./chunk-XXWJ57QP.js";
|
|
10
|
-
import "./chunk-PA4BZBWS.js";
|
|
11
|
-
import "./chunk-PDQXJSH4.js";
|
|
12
|
-
export {
|
|
13
|
-
runBuildContext
|
|
14
|
-
};
|