brain-cache 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +215 -0
- package/dist/askCodebase-ECDSSTQ6.js +83 -0
- package/dist/buildContext-6755TRND.js +14 -0
- package/dist/chunk-7JLSJNKU.js +97 -0
- package/dist/chunk-GGOUKACO.js +16 -0
- package/dist/chunk-OKWMQNH6.js +40 -0
- package/dist/chunk-P7WSTGLE.js +131 -0
- package/dist/chunk-PA4BZBWS.js +162 -0
- package/dist/chunk-PDQXJSH4.js +87 -0
- package/dist/chunk-WCNMLSL2.js +79 -0
- package/dist/chunk-XXWJ57QP.js +151 -0
- package/dist/chunk-ZLB4VJQK.js +109 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +86 -0
- package/dist/doctor-5775VUMA.js +62 -0
- package/dist/embedder-KRANITVN.js +10 -0
- package/dist/init-TRPFEOHF.js +89 -0
- package/dist/mcp.d.ts +2 -0
- package/dist/mcp.js +1414 -0
- package/dist/search-WKKGPNLV.js +82 -0
- package/dist/status-2SOIQ3LX.js +37 -0
- package/dist/workflows-MJLEPCZY.js +460 -0
- package/package.json +68 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jack Winter
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to do so, subject to the
|
|
10
|
+
following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# brain-cache
|
|
2
|
+
|
|
3
|
+
> Stop sending your entire repo to Claude.
|
|
4
|
+
|
|
5
|
+
brain-cache is an MCP server that gives Claude local, indexed access to your codebase — so it finds what matters instead of reading everything.
|
|
6
|
+
|
|
7
|
+
→ ~90% fewer tokens sent to Claude
|
|
8
|
+
→ Sharper, grounded answers
|
|
9
|
+
→ No data leaves your machine
|
|
10
|
+
|
|
11
|
+

|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Use inside Claude Code (MCP)
|
|
16
|
+
|
|
17
|
+
The primary way to use brain-cache is as an MCP server. Once configured, Claude automatically calls brain-cache tools instead of reading raw files — no prompting required.
|
|
18
|
+
|
|
19
|
+
```json
|
|
20
|
+
// .claude/settings.json
|
|
21
|
+
{
|
|
22
|
+
"mcpServers": {
|
|
23
|
+
"brain-cache": {
|
|
24
|
+
"command": "brain-cache",
|
|
25
|
+
"args": ["mcp"]
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Claude then has access to:
|
|
32
|
+
|
|
33
|
+
- **`build_context`** — Assembles relevant context for any question. Use this instead of reading files.
|
|
34
|
+
- **`search_codebase`** — Finds functions, types, and symbols by meaning, not keyword. Use this instead of grep.
|
|
35
|
+
- **`index_repo`** — Rebuilds the local vector index.
|
|
36
|
+
- **`doctor`** — Diagnoses index health and Ollama connectivity.
|
|
37
|
+
|
|
38
|
+
No copy/pasting code into prompts. No manual file opens. Claude knows where to look.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## ⚡ The problem
|
|
43
|
+
|
|
44
|
+
When you ask Claude about your codebase, you either:
|
|
45
|
+
|
|
46
|
+
- paste huge chunks of code ❌
|
|
47
|
+
- rely on vague context ❌
|
|
48
|
+
- or let tools send way too much ❌
|
|
49
|
+
|
|
50
|
+
Result:
|
|
51
|
+
|
|
52
|
+
- worse answers
|
|
53
|
+
- hallucinations
|
|
54
|
+
- massive token usage
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## 🧠 How it works
|
|
59
|
+
|
|
60
|
+
brain-cache is the layer between your codebase and Claude.
|
|
61
|
+
|
|
62
|
+
1. Your code is indexed locally using Ollama embeddings — nothing leaves your machine
|
|
63
|
+
2. When you ask Claude a question, it calls `build_context` or `search_codebase` automatically
|
|
64
|
+
3. brain-cache retrieves only the relevant files, trims duplicates, and fits them to a token budget
|
|
65
|
+
4. Claude gets tight, useful context — not your entire repo
|
|
66
|
+
|
|
67
|
+
AI should read the right parts — and nothing else. brain-cache is the layer that makes that possible.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## 🔥 Example
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
> "Explain the overall architecture of this project"
|
|
75
|
+
|
|
76
|
+
brain-cache: context assembled (74 tokens, 97% reduction)
|
|
77
|
+
|
|
78
|
+
Tokens sent to Claude: 74
|
|
79
|
+
Estimated without: ~2,795
|
|
80
|
+
Reduction: 97%
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Claude gets only what matters → answers are sharper and grounded.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## ⚡ Quick start
|
|
88
|
+
|
|
89
|
+
**Step 1: Install**
|
|
90
|
+
|
|
91
|
+
```
|
|
92
|
+
npm install -g brain-cache
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
**Step 2: Init and index your project**
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
brain-cache init
|
|
99
|
+
brain-cache index
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
`brain-cache init` appends MCP tool instructions to your `CLAUDE.md` — so Claude knows when and how to prefer brain-cache tools over built-in file reading. Runs once; won’t duplicate.
|
|
103
|
+
|
|
104
|
+
**Step 3: Add MCP server to Claude Code**
|
|
105
|
+
|
|
106
|
+
```json
|
|
107
|
+
// .claude/settings.json
|
|
108
|
+
{
|
|
109
|
+
"mcpServers": {
|
|
110
|
+
"brain-cache": {
|
|
111
|
+
"command": "brain-cache",
|
|
112
|
+
"args": ["mcp"]
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
**Step 4: Use Claude normally**
|
|
119
|
+
|
|
120
|
+
brain-cache tools are called automatically. You don’t change how you work — the context just gets better.
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## 🧩 Core capabilities
|
|
125
|
+
|
|
126
|
+
- 🧠 Local embeddings via Ollama — no API calls, no data sent out
|
|
127
|
+
- 🔍 Semantic vector search over your codebase
|
|
128
|
+
- ✂️ Context trimming and deduplication
|
|
129
|
+
- 🎯 Token budget optimisation
|
|
130
|
+
- 🤖 MCP server for Claude Code integration
|
|
131
|
+
- ⚡ CLI for setup, debugging, and admin
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## 🧠 Why it’s different
|
|
136
|
+
|
|
137
|
+
Most AI coding tools:
|
|
138
|
+
|
|
139
|
+
- send too much context
|
|
140
|
+
- hide retrieval behind hosted services
|
|
141
|
+
- require you to prompt-engineer your way to good answers
|
|
142
|
+
|
|
143
|
+
brain-cache is:
|
|
144
|
+
|
|
145
|
+
- 🏠 Local-first — embeddings run on your machine
|
|
146
|
+
- 🔍 Transparent — you can inspect exactly what context gets sent
|
|
147
|
+
- 🎯 Token-aware — every call shows the reduction
|
|
148
|
+
- ⚙️ Developer-controlled — no vendor lock-in, no cloud dependency
|
|
149
|
+
|
|
150
|
+
Think: **Vite, but for LLM context.**
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## 🧪 CLI commands
|
|
155
|
+
|
|
156
|
+
The CLI is the setup and admin interface. Use it to init, index, debug, and diagnose — not as the primary interface.
|
|
157
|
+
|
|
158
|
+
```
|
|
159
|
+
brain-cache init Initialize brain-cache in a project
|
|
160
|
+
brain-cache index Build/rebuild the vector index
|
|
161
|
+
brain-cache search "auth middleware" Manual search (useful for debugging)
|
|
162
|
+
brain-cache context "auth flow" Manual context building (useful for debugging)
|
|
163
|
+
brain-cache ask "how does auth work?" Direct Claude query via CLI
|
|
164
|
+
brain-cache doctor Check system health
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## 📊 Token savings
|
|
170
|
+
|
|
171
|
+
Every call shows exactly what was saved:
|
|
172
|
+
|
|
173
|
+
```
|
|
174
|
+
context: 1,240 tokens (93% reduction)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
Less noise → better reasoning → cheaper usage.
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## 🧠 Built with GSD
|
|
182
|
+
|
|
183
|
+
This project uses the GSD (Get Shit Done) framework — an AI-driven workflow for going from idea → research → plan → execution. brain-cache is both a product of that philosophy and a tool that makes it work better: tight context, better outcomes.
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
## ⚠️ Status
|
|
188
|
+
|
|
189
|
+
Early stage — actively improving:
|
|
190
|
+
|
|
191
|
+
- ⏳ reranking (planned)
|
|
192
|
+
- ⏳ context compression
|
|
193
|
+
- ⏳ live indexing (watch mode)
|
|
194
|
+
|
|
195
|
+
---
|
|
196
|
+
|
|
197
|
+
## 🛠 Requirements
|
|
198
|
+
|
|
199
|
+
- Node.js 22+
|
|
200
|
+
- Ollama running locally (`nomic-embed-text` model)
|
|
201
|
+
- Anthropic API key (for `ask` command only)
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## ⭐️ If this is useful
|
|
206
|
+
|
|
207
|
+
Give it a star — or try it on your repo and let me know what breaks.
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## 📄 License
|
|
212
|
+
|
|
213
|
+
MIT — see LICENSE for details.
|
|
214
|
+
|
|
215
|
+
---
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
formatTokenSavings
|
|
4
|
+
} from "./chunk-GGOUKACO.js";
|
|
5
|
+
import {
|
|
6
|
+
runBuildContext
|
|
7
|
+
} from "./chunk-7JLSJNKU.js";
|
|
8
|
+
import "./chunk-OKWMQNH6.js";
|
|
9
|
+
import "./chunk-ZLB4VJQK.js";
|
|
10
|
+
import "./chunk-WCNMLSL2.js";
|
|
11
|
+
import "./chunk-P7WSTGLE.js";
|
|
12
|
+
import "./chunk-XXWJ57QP.js";
|
|
13
|
+
import "./chunk-PA4BZBWS.js";
|
|
14
|
+
import {
|
|
15
|
+
childLogger
|
|
16
|
+
} from "./chunk-PDQXJSH4.js";
|
|
17
|
+
|
|
18
|
+
// src/workflows/askCodebase.ts
|
|
19
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
20
|
+
var log = childLogger("ask-codebase");
|
|
21
|
+
var DEFAULT_CLAUDE_MODEL = "claude-sonnet-4-20250514";
|
|
22
|
+
var DEFAULT_MAX_RESPONSE_TOKENS = 4096;
|
|
23
|
+
var SYSTEM_PROMPT = `You are a codebase assistant. Answer questions strictly based on the provided codebase context.
|
|
24
|
+
|
|
25
|
+
- Do not hallucinate or infer implementation details not present in the provided context.
|
|
26
|
+
- Prioritize code-level explanations and reference specific files and functions when available.
|
|
27
|
+
- If the provided context is insufficient to answer the question, say "I don't see enough context to answer that" rather than guessing.
|
|
28
|
+
- Be precise and grounded in the actual code shown.`;
|
|
29
|
+
async function runAskCodebase(question, opts) {
|
|
30
|
+
if (!process.env.ANTHROPIC_API_KEY) {
|
|
31
|
+
throw new Error("ANTHROPIC_API_KEY environment variable is not set.");
|
|
32
|
+
}
|
|
33
|
+
const buildOpts = {
|
|
34
|
+
maxTokens: opts?.maxContextTokens,
|
|
35
|
+
path: opts?.path
|
|
36
|
+
};
|
|
37
|
+
const contextResult = await runBuildContext(question, buildOpts);
|
|
38
|
+
process.stderr.write(
|
|
39
|
+
`brain-cache: context assembled
|
|
40
|
+
${formatTokenSavings({ tokensSent: contextResult.metadata.tokensSent, estimatedWithout: contextResult.metadata.estimatedWithoutBraincache, reductionPct: contextResult.metadata.reductionPct })}
|
|
41
|
+
`
|
|
42
|
+
);
|
|
43
|
+
const model = process.env.BRAIN_CACHE_CLAUDE_MODEL ?? DEFAULT_CLAUDE_MODEL;
|
|
44
|
+
const maxTokens = opts?.maxResponseTokens ?? DEFAULT_MAX_RESPONSE_TOKENS;
|
|
45
|
+
const client = new Anthropic();
|
|
46
|
+
const response = await client.messages.create({
|
|
47
|
+
model,
|
|
48
|
+
max_tokens: maxTokens,
|
|
49
|
+
system: SYSTEM_PROMPT,
|
|
50
|
+
messages: [
|
|
51
|
+
{
|
|
52
|
+
role: "user",
|
|
53
|
+
content: `Here is relevant context from the codebase:
|
|
54
|
+
|
|
55
|
+
${contextResult.content}
|
|
56
|
+
|
|
57
|
+
Question: ${question}`
|
|
58
|
+
}
|
|
59
|
+
]
|
|
60
|
+
});
|
|
61
|
+
const textBlock = response.content.find((b) => b.type === "text");
|
|
62
|
+
const answer = textBlock?.text ?? "(no text response from Claude)";
|
|
63
|
+
log.info(
|
|
64
|
+
{
|
|
65
|
+
model,
|
|
66
|
+
inputTokens: response.usage?.input_tokens,
|
|
67
|
+
outputTokens: response.usage?.output_tokens
|
|
68
|
+
},
|
|
69
|
+
"ask-codebase complete"
|
|
70
|
+
);
|
|
71
|
+
return {
|
|
72
|
+
answer,
|
|
73
|
+
contextMetadata: {
|
|
74
|
+
tokensSent: contextResult.metadata.tokensSent,
|
|
75
|
+
estimatedWithoutBraincache: contextResult.metadata.estimatedWithoutBraincache,
|
|
76
|
+
reductionPct: contextResult.metadata.reductionPct
|
|
77
|
+
},
|
|
78
|
+
model
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
export {
|
|
82
|
+
runAskCodebase
|
|
83
|
+
};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
runBuildContext
|
|
4
|
+
} from "./chunk-7JLSJNKU.js";
|
|
5
|
+
import "./chunk-OKWMQNH6.js";
|
|
6
|
+
import "./chunk-ZLB4VJQK.js";
|
|
7
|
+
import "./chunk-WCNMLSL2.js";
|
|
8
|
+
import "./chunk-P7WSTGLE.js";
|
|
9
|
+
import "./chunk-XXWJ57QP.js";
|
|
10
|
+
import "./chunk-PA4BZBWS.js";
|
|
11
|
+
import "./chunk-PDQXJSH4.js";
|
|
12
|
+
export {
|
|
13
|
+
runBuildContext
|
|
14
|
+
};
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
assembleContext,
|
|
4
|
+
countChunkTokens
|
|
5
|
+
} from "./chunk-OKWMQNH6.js";
|
|
6
|
+
import {
|
|
7
|
+
RETRIEVAL_STRATEGIES,
|
|
8
|
+
classifyQueryIntent,
|
|
9
|
+
deduplicateChunks,
|
|
10
|
+
searchChunks
|
|
11
|
+
} from "./chunk-ZLB4VJQK.js";
|
|
12
|
+
import {
|
|
13
|
+
embedBatchWithRetry
|
|
14
|
+
} from "./chunk-WCNMLSL2.js";
|
|
15
|
+
import {
|
|
16
|
+
isOllamaRunning
|
|
17
|
+
} from "./chunk-P7WSTGLE.js";
|
|
18
|
+
import {
|
|
19
|
+
openDatabase,
|
|
20
|
+
readIndexState
|
|
21
|
+
} from "./chunk-XXWJ57QP.js";
|
|
22
|
+
import {
|
|
23
|
+
readProfile
|
|
24
|
+
} from "./chunk-PA4BZBWS.js";
|
|
25
|
+
import {
|
|
26
|
+
DEFAULT_TOKEN_BUDGET
|
|
27
|
+
} from "./chunk-PDQXJSH4.js";
|
|
28
|
+
|
|
29
|
+
// src/workflows/buildContext.ts
|
|
30
|
+
import { readFile } from "fs/promises";
|
|
31
|
+
import { resolve } from "path";
|
|
32
|
+
async function runBuildContext(query, opts) {
|
|
33
|
+
const profile = await readProfile();
|
|
34
|
+
if (profile === null) {
|
|
35
|
+
throw new Error("No profile found. Run 'brain-cache init' first.");
|
|
36
|
+
}
|
|
37
|
+
const running = await isOllamaRunning();
|
|
38
|
+
if (!running) {
|
|
39
|
+
throw new Error("Ollama is not running. Start it with 'ollama serve' or run 'brain-cache init'.");
|
|
40
|
+
}
|
|
41
|
+
const rootDir = resolve(opts?.path ?? ".");
|
|
42
|
+
const indexState = await readIndexState(rootDir);
|
|
43
|
+
if (indexState === null) {
|
|
44
|
+
throw new Error(`No index found at ${rootDir}. Run 'brain-cache index' first.`);
|
|
45
|
+
}
|
|
46
|
+
const db = await openDatabase(rootDir);
|
|
47
|
+
const tableNames = await db.tableNames();
|
|
48
|
+
if (!tableNames.includes("chunks")) {
|
|
49
|
+
throw new Error("No chunks table found. Run 'brain-cache index' first.");
|
|
50
|
+
}
|
|
51
|
+
const table = await db.openTable("chunks");
|
|
52
|
+
const intent = classifyQueryIntent(query);
|
|
53
|
+
const strategy = {
|
|
54
|
+
limit: opts?.limit ?? RETRIEVAL_STRATEGIES[intent].limit,
|
|
55
|
+
distanceThreshold: RETRIEVAL_STRATEGIES[intent].distanceThreshold
|
|
56
|
+
};
|
|
57
|
+
const maxTokens = opts?.maxTokens ?? DEFAULT_TOKEN_BUDGET;
|
|
58
|
+
process.stderr.write(
|
|
59
|
+
`brain-cache: building context (intent=${intent}, budget=${maxTokens} tokens)
|
|
60
|
+
`
|
|
61
|
+
);
|
|
62
|
+
const vectors = await embedBatchWithRetry(indexState.embeddingModel, [query]);
|
|
63
|
+
const queryVector = vectors[0];
|
|
64
|
+
const results = await searchChunks(table, queryVector, strategy);
|
|
65
|
+
const deduped = deduplicateChunks(results);
|
|
66
|
+
const assembled = assembleContext(deduped, { maxTokens });
|
|
67
|
+
const uniqueFiles = [...new Set(assembled.chunks.map((c) => c.filePath))];
|
|
68
|
+
let estimatedWithoutBraincache = 0;
|
|
69
|
+
for (const filePath of uniqueFiles) {
|
|
70
|
+
try {
|
|
71
|
+
const fileContent = await readFile(filePath, "utf-8");
|
|
72
|
+
estimatedWithoutBraincache += countChunkTokens(fileContent);
|
|
73
|
+
} catch {
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
const reductionPct = estimatedWithoutBraincache > 0 ? Math.round((1 - assembled.tokenCount / estimatedWithoutBraincache) * 100) : 0;
|
|
77
|
+
const result = {
|
|
78
|
+
content: assembled.content,
|
|
79
|
+
chunks: assembled.chunks,
|
|
80
|
+
metadata: {
|
|
81
|
+
tokensSent: assembled.tokenCount,
|
|
82
|
+
estimatedWithoutBraincache,
|
|
83
|
+
reductionPct,
|
|
84
|
+
localTasksPerformed: ["embed_query", "vector_search", "dedup", "token_budget"],
|
|
85
|
+
cloudCallsMade: 0
|
|
86
|
+
}
|
|
87
|
+
};
|
|
88
|
+
process.stderr.write(
|
|
89
|
+
`brain-cache: context assembled (${assembled.tokenCount} tokens, ${reductionPct}% reduction, ${assembled.chunks.length} chunks)
|
|
90
|
+
`
|
|
91
|
+
);
|
|
92
|
+
return result;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export {
|
|
96
|
+
runBuildContext
|
|
97
|
+
};
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// src/lib/format.ts
|
|
4
|
+
function formatTokenSavings(input) {
|
|
5
|
+
const PAD = 27;
|
|
6
|
+
const lines = [
|
|
7
|
+
["Tokens sent to Claude:", input.tokensSent.toLocaleString()],
|
|
8
|
+
["Estimated without:", `~${input.estimatedWithout.toLocaleString()}`],
|
|
9
|
+
["Reduction:", `${input.reductionPct}%`]
|
|
10
|
+
];
|
|
11
|
+
return lines.map(([label, value]) => `${label.padEnd(PAD)}${value}`).join("\n");
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export {
|
|
15
|
+
formatTokenSavings
|
|
16
|
+
};
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
childLogger
|
|
4
|
+
} from "./chunk-PDQXJSH4.js";
|
|
5
|
+
|
|
6
|
+
// src/services/tokenCounter.ts
|
|
7
|
+
import { countTokens } from "@anthropic-ai/tokenizer";
|
|
8
|
+
var log = childLogger("tokenCounter");
|
|
9
|
+
function countChunkTokens(text) {
|
|
10
|
+
if (text.length === 0) return 0;
|
|
11
|
+
return countTokens(text);
|
|
12
|
+
}
|
|
13
|
+
function formatChunk(chunk) {
|
|
14
|
+
return `// File: ${chunk.filePath} (lines ${chunk.startLine}-${chunk.endLine})
|
|
15
|
+
${chunk.content}`;
|
|
16
|
+
}
|
|
17
|
+
function assembleContext(chunks, opts) {
|
|
18
|
+
const kept = [];
|
|
19
|
+
let totalTokens = 0;
|
|
20
|
+
const separator = "\n\n---\n\n";
|
|
21
|
+
const separatorTokens = countChunkTokens(separator);
|
|
22
|
+
for (const chunk of chunks) {
|
|
23
|
+
const formatted = formatChunk(chunk);
|
|
24
|
+
const chunkTokens = countChunkTokens(formatted);
|
|
25
|
+
const sepCost = kept.length > 0 ? separatorTokens : 0;
|
|
26
|
+
if (totalTokens + chunkTokens + sepCost > opts.maxTokens) {
|
|
27
|
+
log.debug({ totalTokens, chunkTokens, maxTokens: opts.maxTokens }, "Token budget reached");
|
|
28
|
+
break;
|
|
29
|
+
}
|
|
30
|
+
kept.push(chunk);
|
|
31
|
+
totalTokens += chunkTokens + sepCost;
|
|
32
|
+
}
|
|
33
|
+
const content = kept.map(formatChunk).join(separator);
|
|
34
|
+
return { content, chunks: kept, tokenCount: totalTokens };
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export {
|
|
38
|
+
countChunkTokens,
|
|
39
|
+
assembleContext
|
|
40
|
+
};
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
childLogger
|
|
4
|
+
} from "./chunk-PDQXJSH4.js";
|
|
5
|
+
|
|
6
|
+
// src/services/ollama.ts
|
|
7
|
+
import { execFile, spawn } from "child_process";
|
|
8
|
+
import { promisify } from "util";
|
|
9
|
+
import ollama from "ollama";
|
|
10
|
+
var execFileAsync = promisify(execFile);
|
|
11
|
+
var log = childLogger("ollama");
|
|
12
|
+
function getOllamaHost() {
|
|
13
|
+
return process.env.OLLAMA_HOST ?? "http://localhost:11434";
|
|
14
|
+
}
|
|
15
|
+
async function isOllamaInstalled() {
|
|
16
|
+
try {
|
|
17
|
+
const cmd = process.platform === "win32" ? "where" : "which";
|
|
18
|
+
await execFileAsync(cmd, ["ollama"]);
|
|
19
|
+
return true;
|
|
20
|
+
} catch {
|
|
21
|
+
return false;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
async function isOllamaRunning() {
|
|
25
|
+
try {
|
|
26
|
+
const res = await fetch(getOllamaHost());
|
|
27
|
+
return res.ok;
|
|
28
|
+
} catch {
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
async function startOllama() {
|
|
33
|
+
const host = getOllamaHost();
|
|
34
|
+
const isLocalhost = host === "http://localhost:11434" || host === "http://127.0.0.1:11434";
|
|
35
|
+
if (!isLocalhost) {
|
|
36
|
+
throw new Error(
|
|
37
|
+
`OLLAMA_HOST is set to a remote address (${host}). brain-cache cannot auto-start a remote Ollama server. Ensure Ollama is running at ${host} and try again.`
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
const alreadyRunning = await isOllamaRunning();
|
|
41
|
+
if (alreadyRunning) {
|
|
42
|
+
log.info("Ollama is already running, skipping spawn");
|
|
43
|
+
return true;
|
|
44
|
+
}
|
|
45
|
+
log.info("Starting Ollama server...");
|
|
46
|
+
const child = spawn("ollama", ["serve"], {
|
|
47
|
+
detached: true,
|
|
48
|
+
stdio: "ignore"
|
|
49
|
+
});
|
|
50
|
+
const pid = child.pid;
|
|
51
|
+
child.unref();
|
|
52
|
+
const MAX_ATTEMPTS = 10;
|
|
53
|
+
const POLL_INTERVAL_MS = 500;
|
|
54
|
+
const cleanup = () => {
|
|
55
|
+
try {
|
|
56
|
+
if (pid !== void 0) process.kill(pid, "SIGTERM");
|
|
57
|
+
} catch {
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
process.once("SIGINT", cleanup);
|
|
61
|
+
process.once("SIGTERM", cleanup);
|
|
62
|
+
let succeeded = false;
|
|
63
|
+
for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
|
|
64
|
+
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
|
65
|
+
const running = await isOllamaRunning();
|
|
66
|
+
if (running) {
|
|
67
|
+
log.info({ pid, attempt: attempt + 1 }, "Ollama is now running");
|
|
68
|
+
succeeded = true;
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
log.debug({ attempt: attempt + 1, maxAttempts: MAX_ATTEMPTS }, "Waiting for Ollama to start...");
|
|
72
|
+
}
|
|
73
|
+
process.removeListener("SIGINT", cleanup);
|
|
74
|
+
process.removeListener("SIGTERM", cleanup);
|
|
75
|
+
if (succeeded) {
|
|
76
|
+
return true;
|
|
77
|
+
}
|
|
78
|
+
try {
|
|
79
|
+
if (pid !== void 0) process.kill(pid, "SIGTERM");
|
|
80
|
+
} catch {
|
|
81
|
+
}
|
|
82
|
+
log.warn({ pid }, "Ollama did not start within timeout \u2014 killed spawned process (PID: " + pid + ")");
|
|
83
|
+
return false;
|
|
84
|
+
}
|
|
85
|
+
function modelMatches(listedName, profileModel) {
|
|
86
|
+
const listedBase = listedName.split(":")[0];
|
|
87
|
+
const profileBase = profileModel.split(":")[0];
|
|
88
|
+
return listedBase === profileBase;
|
|
89
|
+
}
|
|
90
|
+
async function pullModelIfMissing(model, onProgress) {
|
|
91
|
+
const list = await ollama.list();
|
|
92
|
+
const alreadyExists = list.models.some((m) => modelMatches(m.name, model));
|
|
93
|
+
if (alreadyExists) {
|
|
94
|
+
log.info({ model }, "Model already present, skipping pull");
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
log.info({ model }, "Model not found locally, pulling...");
|
|
98
|
+
const defaultProgress = (status) => {
|
|
99
|
+
process.stderr.write(`\rPulling ${model}: ${status}`);
|
|
100
|
+
};
|
|
101
|
+
const progress = onProgress ?? defaultProgress;
|
|
102
|
+
let lastStatus = "";
|
|
103
|
+
const stream = await ollama.pull({ model, stream: true });
|
|
104
|
+
for await (const chunk of stream) {
|
|
105
|
+
const pct = chunk.total ? ` ${Math.round((chunk.completed ?? 0) / chunk.total * 100)}%` : "";
|
|
106
|
+
const status = `${chunk.status}${pct}`;
|
|
107
|
+
if (status !== lastStatus) {
|
|
108
|
+
progress(status);
|
|
109
|
+
lastStatus = status;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
if (!onProgress) process.stderr.write("\n");
|
|
113
|
+
log.info({ model }, "Model pull complete");
|
|
114
|
+
}
|
|
115
|
+
async function getOllamaVersion() {
|
|
116
|
+
try {
|
|
117
|
+
const { stdout } = await execFileAsync("ollama", ["--version"]);
|
|
118
|
+
return stdout.trim();
|
|
119
|
+
} catch {
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export {
|
|
125
|
+
isOllamaInstalled,
|
|
126
|
+
isOllamaRunning,
|
|
127
|
+
startOllama,
|
|
128
|
+
modelMatches,
|
|
129
|
+
pullModelIfMissing,
|
|
130
|
+
getOllamaVersion
|
|
131
|
+
};
|