brainbank 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +125 -89
- package/dist/{base-DZWtdgIf.d.ts → base-B_vJSAbj.d.ts} +20 -20
- package/dist/chunk-424UFCY7.js +78 -0
- package/dist/chunk-424UFCY7.js.map +1 -0
- package/dist/{chunk-HNPABX7L.js → chunk-7EZR47JV.js} +1 -1
- package/dist/{chunk-HNPABX7L.js.map → chunk-7EZR47JV.js.map} +1 -1
- package/dist/chunk-B77KABWH.js +41 -0
- package/dist/chunk-B77KABWH.js.map +1 -0
- package/dist/{chunk-YRGUIRN5.js → chunk-C4KDZGRX.js} +9 -9
- package/dist/chunk-C4KDZGRX.js.map +1 -0
- package/dist/{chunk-TTXVJFAE.js → chunk-HPNUMUIF.js} +1 -1
- package/dist/{chunk-TTXVJFAE.js.map → chunk-HPNUMUIF.js.map} +1 -1
- package/dist/{chunk-DDECTPRM.js → chunk-PXK62M5W.js} +17 -12
- package/dist/chunk-PXK62M5W.js.map +1 -0
- package/dist/{chunk-BNV43SEF.js → chunk-VVXYZIIB.js} +5 -5
- package/dist/chunk-VVXYZIIB.js.map +1 -0
- package/dist/{chunk-MY36UPPQ.js → chunk-YC4ZQLDN.js} +355 -378
- package/dist/chunk-YC4ZQLDN.js.map +1 -0
- package/dist/chunk-ZNLN2VWV.js +110 -0
- package/dist/chunk-ZNLN2VWV.js.map +1 -0
- package/dist/cli.js +19 -27
- package/dist/cli.js.map +1 -1
- package/dist/code.d.ts +2 -2
- package/dist/code.js +1 -1
- package/dist/docs.d.ts +2 -2
- package/dist/docs.js +1 -1
- package/dist/git.d.ts +2 -2
- package/dist/git.js +1 -1
- package/dist/index.d.ts +77 -17
- package/dist/index.js +21 -9
- package/dist/index.js.map +1 -1
- package/dist/local-embedding-ZIMTK6PU.js +8 -0
- package/dist/local-embedding-ZIMTK6PU.js.map +1 -0
- package/dist/memory.d.ts +2 -2
- package/dist/memory.js +1 -1
- package/dist/notes.d.ts +2 -2
- package/dist/notes.js +1 -1
- package/dist/qwen3-reranker-3MHEENT5.js +8 -0
- package/dist/qwen3-reranker-3MHEENT5.js.map +1 -0
- package/dist/resolve-CUJWY6HP.js +10 -0
- package/dist/resolve-CUJWY6HP.js.map +1 -0
- package/package.json +9 -8
- package/dist/chunk-BNV43SEF.js.map +0 -1
- package/dist/chunk-DDECTPRM.js.map +0 -1
- package/dist/chunk-MY36UPPQ.js.map +0 -1
- package/dist/chunk-YRGUIRN5.js.map +0 -1
package/README.md
CHANGED
|
@@ -5,13 +5,14 @@
|
|
|
5
5
|
BrainBank gives LLMs a long-term memory that persists between sessions.
|
|
6
6
|
|
|
7
7
|
- **All-in-one** — core + code + git + docs + CLI in a single `brainbank` package
|
|
8
|
-
- **Pluggable
|
|
8
|
+
- **Pluggable plugins** — `.use()` only what you need (code, git, docs, or custom)
|
|
9
9
|
- **Dynamic collections** — `brain.collection('errors')` for any structured data
|
|
10
10
|
- **Hybrid search** — vector + BM25 fused with Reciprocal Rank Fusion
|
|
11
11
|
- **Pluggable embeddings** — local WASM (free), OpenAI, or Perplexity (standard & contextualized)
|
|
12
12
|
- **Multi-repo** — index multiple repositories into one shared database
|
|
13
13
|
- **Portable** — single `.brainbank/brainbank.db` file
|
|
14
|
-
- **Optional packages** — [`@brainbank/memory`](#memory) (fact extraction + entity graph), [`@brainbank/
|
|
14
|
+
- **Optional packages** — [`@brainbank/memory`](#memory) (fact extraction + entity graph), [`@brainbank/mcp`](#mcp-server) (MCP server)
|
|
15
|
+
- **Optional reranker** — Qwen3-0.6B cross-encoder via `Qwen3Reranker` (opt-in)
|
|
15
16
|
|
|
16
17
|

|
|
17
18
|
|
|
@@ -28,7 +29,7 @@ Most AI memory solutions (mem0, Zep, LangMem) require cloud services, external d
|
|
|
28
29
|
| Infrastructure | **SQLite file** | Vector DB + cloud | Neo4j + cloud | LangGraph Platform |
|
|
29
30
|
| LLM required to write | **No**¹ | Yes | Yes | Yes |
|
|
30
31
|
| Code-aware | **19 AST-parsed languages (tree-sitter), git, co-edits** | ✗ | ✗ | ✗ |
|
|
31
|
-
| Custom
|
|
32
|
+
| Custom plugins | **`.use()` plugin system** | ✗ | ✗ | ✗ |
|
|
32
33
|
| Search | **Vector + BM25 + RRF** | Vector + graph² | Vector + BM25 + graph | Vector only |
|
|
33
34
|
| Framework lock-in | **None** | Optional | Zep cloud | LangChain |
|
|
34
35
|
| Portable | **Copy one file** | Tied to DB | Tied to cloud | Tied to platform |
|
|
@@ -50,12 +51,12 @@ Most AI memory solutions (mem0, Zep, LangMem) require cloud services, external d
|
|
|
50
51
|
- [Quick Start](#quick-start)
|
|
51
52
|
- [CLI](#cli)
|
|
52
53
|
- [Programmatic API](#programmatic-api)
|
|
53
|
-
- [
|
|
54
|
+
- [Plugins](#plugins)
|
|
54
55
|
- [Collections](#collections)
|
|
55
56
|
- [Search](#search)
|
|
56
57
|
- [Document Collections](#document-collections)
|
|
57
58
|
- [Context Generation](#context-generation)
|
|
58
|
-
- [Custom
|
|
59
|
+
- [Custom Plugins](#custom-plugins)
|
|
59
60
|
- [AI Agent Integration](#ai-agent-integration)
|
|
60
61
|
- [Examples](#examples)
|
|
61
62
|
- [Watch Mode](#watch-mode)
|
|
@@ -88,20 +89,48 @@ npm install brainbank
|
|
|
88
89
|
| Package | When to install |
|
|
89
90
|
|---------|----------------|
|
|
90
91
|
| `@brainbank/memory` | Deterministic memory extraction + entity graph for LLM conversations |
|
|
91
|
-
| `@brainbank/reranker` | Cross-encoder reranker (Qwen3-0.6B, ~640MB model) |
|
|
92
92
|
| `@brainbank/mcp` | MCP server for AI tool integration |
|
|
93
93
|
|
|
94
94
|
```bash
|
|
95
95
|
# Memory — automatic fact extraction & dedup for chatbots/agents
|
|
96
96
|
npm install @brainbank/memory
|
|
97
97
|
|
|
98
|
-
# Reranker —
|
|
99
|
-
npm install
|
|
98
|
+
# Reranker — built-in, install the runtime dependency to enable
|
|
99
|
+
npm install node-llama-cpp
|
|
100
100
|
|
|
101
101
|
# MCP server — for Antigravity, Claude Desktop, etc.
|
|
102
102
|
npm install @brainbank/mcp
|
|
103
103
|
```
|
|
104
104
|
|
|
105
|
+
### Tree-Sitter Grammars
|
|
106
|
+
|
|
107
|
+
BrainBank uses [tree-sitter](https://tree-sitter.github.io/) for AST-aware code chunking. **JavaScript and TypeScript grammars are included by default.** Other languages require installing the corresponding grammar package:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Install only the grammars you need
|
|
111
|
+
npm install tree-sitter-python tree-sitter-go tree-sitter-rust
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
If you index a file whose grammar isn't installed, BrainBank will throw a clear error:
|
|
115
|
+
|
|
116
|
+
```
|
|
117
|
+
BrainBank: Grammar 'tree-sitter-python' is not installed. Run: npm install tree-sitter-python
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
<details>
|
|
121
|
+
<summary>All available grammars (19 languages)</summary>
|
|
122
|
+
|
|
123
|
+
| Category | Packages |
|
|
124
|
+
|----------|----------|
|
|
125
|
+
| **Included** | `tree-sitter-javascript`, `tree-sitter-typescript` |
|
|
126
|
+
| Web | `tree-sitter-html`, `tree-sitter-css` |
|
|
127
|
+
| Systems | `tree-sitter-go`, `tree-sitter-rust`, `tree-sitter-c`, `tree-sitter-cpp`, `tree-sitter-swift` |
|
|
128
|
+
| JVM | `tree-sitter-java`, `tree-sitter-kotlin`, `tree-sitter-scala` |
|
|
129
|
+
| Scripting | `tree-sitter-python`, `tree-sitter-ruby`, `tree-sitter-php`, `tree-sitter-lua`, `tree-sitter-bash`, `tree-sitter-elixir` |
|
|
130
|
+
| .NET | `tree-sitter-c-sharp` |
|
|
131
|
+
|
|
132
|
+
</details>
|
|
133
|
+
|
|
105
134
|
---
|
|
106
135
|
|
|
107
136
|
## Quick Start
|
|
@@ -178,10 +207,10 @@ brainbank watch # Watch repo, auto re-index on save
|
|
|
178
207
|
# Watching /path/to/repo for changes...
|
|
179
208
|
# 14:30:02 ✓ code: src/api.ts
|
|
180
209
|
# 14:30:05 ✓ code: src/routes.ts
|
|
181
|
-
# 14:30:08 ✓ csv: data/metrics.csv ← custom
|
|
210
|
+
# 14:30:08 ✓ csv: data/metrics.csv ← custom plugin
|
|
182
211
|
```
|
|
183
212
|
|
|
184
|
-
> Watch mode monitors **code files** by default. [Custom
|
|
213
|
+
> Watch mode monitors **code files** by default. [Custom plugins](#custom-plugins) that implement `watchPatterns()` and `onFileChange()` are automatically picked up — their name appears in the console output alongside the built-in `code` plugin. Git history and document collections are not affected by file-system changes and must be re-indexed explicitly with `brainbank index` / `brainbank docs`.
|
|
185
214
|
|
|
186
215
|
### Document Collections
|
|
187
216
|
|
|
@@ -235,11 +264,11 @@ brainbank serve # Start MCP server (stdio)
|
|
|
235
264
|
|
|
236
265
|
Use BrainBank as a library in your TypeScript/Node.js project.
|
|
237
266
|
|
|
238
|
-
###
|
|
267
|
+
### Plugins
|
|
239
268
|
|
|
240
|
-
BrainBank uses pluggable
|
|
269
|
+
BrainBank uses pluggable plugins. Register only what you need with `.use()`:
|
|
241
270
|
|
|
242
|
-
|
|
|
271
|
+
| Plugin | Import | Description |
|
|
243
272
|
|---------|--------|-------------|
|
|
244
273
|
| `code` | `brainbank/code` | AST-aware code chunking via tree-sitter (19 languages) |
|
|
245
274
|
| `git` | `brainbank/git` | Git commit history, diffs, co-edit relationships |
|
|
@@ -251,7 +280,7 @@ import { code } from 'brainbank/code';
|
|
|
251
280
|
import { git } from 'brainbank/git';
|
|
252
281
|
import { docs } from 'brainbank/docs';
|
|
253
282
|
|
|
254
|
-
// Pick only the
|
|
283
|
+
// Pick only the plugins you need
|
|
255
284
|
const brain = new BrainBank({ repoPath: '.' })
|
|
256
285
|
.use(code())
|
|
257
286
|
.use(git())
|
|
@@ -302,7 +331,7 @@ Auto-re-index when files change:
|
|
|
302
331
|
// API
|
|
303
332
|
const watcher = brain.watch({
|
|
304
333
|
debounceMs: 2000,
|
|
305
|
-
onIndex: (file,
|
|
334
|
+
onIndex: (file, plugin) => console.log(`${plugin}: ${file}`),
|
|
306
335
|
onError: (err) => console.error(err.message),
|
|
307
336
|
});
|
|
308
337
|
|
|
@@ -318,15 +347,15 @@ brainbank watch
|
|
|
318
347
|
# 14:30:05 ✓ code: src/routes.ts
|
|
319
348
|
```
|
|
320
349
|
|
|
321
|
-
#### Custom
|
|
350
|
+
#### Custom Plugin Watch
|
|
322
351
|
|
|
323
|
-
Custom
|
|
352
|
+
Custom plugins can hook into watch mode by implementing `onFileChange` and `watchPatterns`:
|
|
324
353
|
|
|
325
354
|
```typescript
|
|
326
|
-
import type {
|
|
355
|
+
import type { Plugin, PluginContext } from 'brainbank';
|
|
327
356
|
|
|
328
|
-
function
|
|
329
|
-
let ctx:
|
|
357
|
+
function csvPlugin(): Plugin {
|
|
358
|
+
let ctx: PluginContext;
|
|
330
359
|
|
|
331
360
|
return {
|
|
332
361
|
name: 'csv',
|
|
@@ -335,7 +364,7 @@ function csvIndexer(): Indexer {
|
|
|
335
364
|
ctx = context;
|
|
336
365
|
},
|
|
337
366
|
|
|
338
|
-
// Tell watch which files this
|
|
367
|
+
// Tell watch which files this plugin cares about
|
|
339
368
|
watchPatterns() {
|
|
340
369
|
return ['**/*.csv', '**/*.tsv'];
|
|
341
370
|
},
|
|
@@ -357,7 +386,7 @@ function csvIndexer(): Indexer {
|
|
|
357
386
|
|
|
358
387
|
const brain = new BrainBank({ dbPath: './brain.db' })
|
|
359
388
|
.use(code())
|
|
360
|
-
.use(
|
|
389
|
+
.use(csvPlugin());
|
|
361
390
|
|
|
362
391
|
await brain.initialize();
|
|
363
392
|
brain.watch(); // Now watches .ts, .py, etc. AND .csv, .tsv
|
|
@@ -424,16 +453,16 @@ const context = await brain.getContext('add rate limiting to the API', {
|
|
|
424
453
|
// Returns: ## Relevant Code, ## Git History, ## Relevant Documents
|
|
425
454
|
```
|
|
426
455
|
|
|
427
|
-
### Custom
|
|
456
|
+
### Custom Plugins
|
|
428
457
|
|
|
429
|
-
Implement the `
|
|
458
|
+
Implement the `Plugin` interface to build your own:
|
|
430
459
|
|
|
431
460
|
```typescript
|
|
432
|
-
import type {
|
|
461
|
+
import type { Plugin, PluginContext } from 'brainbank';
|
|
433
462
|
|
|
434
|
-
const
|
|
463
|
+
const myPlugin: Plugin = {
|
|
435
464
|
name: 'custom',
|
|
436
|
-
async initialize(ctx:
|
|
465
|
+
async initialize(ctx: PluginContext) {
|
|
437
466
|
// ctx.db — shared SQLite database
|
|
438
467
|
// ctx.embedding — shared embedding provider
|
|
439
468
|
// ctx.collection() — create dynamic collections
|
|
@@ -442,10 +471,10 @@ const myIndexer: Indexer = {
|
|
|
442
471
|
},
|
|
443
472
|
};
|
|
444
473
|
|
|
445
|
-
brain.use(
|
|
474
|
+
brain.use(myPlugin);
|
|
446
475
|
```
|
|
447
476
|
|
|
448
|
-
#### Using custom
|
|
477
|
+
#### Using custom plugins with the CLI
|
|
449
478
|
|
|
450
479
|
Drop `.ts` files into `.brainbank/indexers/` — the CLI auto-discovers them:
|
|
451
480
|
|
|
@@ -457,11 +486,11 @@ Drop `.ts` files into `.brainbank/indexers/` — the CLI auto-discovers them:
|
|
|
457
486
|
└── jira.ts
|
|
458
487
|
```
|
|
459
488
|
|
|
460
|
-
Each file exports a default `
|
|
489
|
+
Each file exports a default `Plugin`:
|
|
461
490
|
|
|
462
491
|
```typescript
|
|
463
492
|
// .brainbank/indexers/slack.ts
|
|
464
|
-
import type {
|
|
493
|
+
import type { Plugin } from 'brainbank';
|
|
465
494
|
|
|
466
495
|
export default {
|
|
467
496
|
name: 'slack',
|
|
@@ -469,14 +498,14 @@ export default {
|
|
|
469
498
|
const msgs = ctx.collection('slack_messages');
|
|
470
499
|
// ... fetch and index slack messages
|
|
471
500
|
},
|
|
472
|
-
} satisfies
|
|
501
|
+
} satisfies Plugin;
|
|
473
502
|
```
|
|
474
503
|
|
|
475
|
-
That's it — all CLI commands automatically pick up your
|
|
504
|
+
That's it — all CLI commands automatically pick up your plugins:
|
|
476
505
|
|
|
477
506
|
```bash
|
|
478
507
|
brainbank index # runs code + git + docs + slack + jira
|
|
479
|
-
brainbank stats # shows all
|
|
508
|
+
brainbank stats # shows all plugins
|
|
480
509
|
brainbank kv search slack_messages "deploy" # search slack data
|
|
481
510
|
```
|
|
482
511
|
|
|
@@ -494,18 +523,18 @@ export default {
|
|
|
494
523
|
};
|
|
495
524
|
```
|
|
496
525
|
|
|
497
|
-
Everything lives in `.brainbank/` — DB, config, and custom
|
|
526
|
+
Everything lives in `.brainbank/` — DB, config, and custom plugins:
|
|
498
527
|
|
|
499
528
|
```
|
|
500
529
|
.brainbank/
|
|
501
530
|
├── brainbank.db # SQLite database (auto-created)
|
|
502
531
|
├── config.ts # Optional project config
|
|
503
|
-
└── indexers/ # Optional custom
|
|
532
|
+
└── indexers/ # Optional custom plugin files
|
|
504
533
|
├── slack.ts
|
|
505
534
|
└── jira.ts
|
|
506
535
|
```
|
|
507
536
|
|
|
508
|
-
No folder and no config file? The CLI uses the built-in
|
|
537
|
+
No folder and no config file? The CLI uses the built-in plugins (`code`, `git`, `docs`).
|
|
509
538
|
|
|
510
539
|
---
|
|
511
540
|
|
|
@@ -556,19 +585,19 @@ Teach your AI coding agent to use BrainBank as persistent memory. Add an `AGENTS
|
|
|
556
585
|
| **Cursor** | Add rules in `.cursor/rules` |
|
|
557
586
|
| **MCP** (any agent) | See [MCP Server](#mcp-server) config below |
|
|
558
587
|
|
|
559
|
-
#### Custom
|
|
588
|
+
#### Custom Plugin: Auto-Ingest Conversation Logs
|
|
560
589
|
|
|
561
590
|
For agents that produce structured logs (e.g. Antigravity's `brain/` directory), auto-index them:
|
|
562
591
|
|
|
563
592
|
```typescript
|
|
564
593
|
// .brainbank/indexers/conversations.ts
|
|
565
|
-
import type {
|
|
594
|
+
import type { Plugin, PluginContext } from 'brainbank';
|
|
566
595
|
import * as fs from 'node:fs';
|
|
567
596
|
import * as path from 'node:path';
|
|
568
597
|
|
|
569
598
|
export default {
|
|
570
599
|
name: 'conversations',
|
|
571
|
-
async initialize(ctx:
|
|
600
|
+
async initialize(ctx: PluginContext) {
|
|
572
601
|
const conversations = ctx.collection('conversations');
|
|
573
602
|
const logsDir = path.join(ctx.config.repoPath, '.gemini/antigravity/brain');
|
|
574
603
|
if (!fs.existsSync(logsDir)) return;
|
|
@@ -584,7 +613,7 @@ export default {
|
|
|
584
613
|
});
|
|
585
614
|
}
|
|
586
615
|
},
|
|
587
|
-
} satisfies
|
|
616
|
+
} satisfies Plugin;
|
|
588
617
|
```
|
|
589
618
|
|
|
590
619
|
```bash
|
|
@@ -619,48 +648,36 @@ Add to your MCP config (`~/.gemini/antigravity/mcp_config.json` or Claude Deskto
|
|
|
619
648
|
"mcpServers": {
|
|
620
649
|
"brainbank": {
|
|
621
650
|
"command": "npx",
|
|
622
|
-
"args": ["-y", "@brainbank/mcp"]
|
|
623
|
-
"env": {
|
|
624
|
-
"BRAINBANK_EMBEDDING": "openai"
|
|
625
|
-
}
|
|
651
|
+
"args": ["-y", "@brainbank/mcp"]
|
|
626
652
|
}
|
|
627
653
|
}
|
|
628
654
|
}
|
|
629
655
|
```
|
|
630
656
|
|
|
631
|
-
The
|
|
657
|
+
**Zero-config.** The MCP server auto-detects:
|
|
658
|
+
- **Repo path** — from `repo` tool param > `BRAINBANK_REPO` env > `findRepoRoot(cwd)`
|
|
659
|
+
- **Embedding provider** — from `provider_key` stored in the DB (set during `brainbank index --embedding openai`)
|
|
632
660
|
|
|
633
|
-
>
|
|
661
|
+
> [!TIP]
|
|
662
|
+
> Index your repo once with the CLI to set up the embedding provider:
|
|
663
|
+
> ```bash
|
|
664
|
+
> brainbank index . --embedding openai # stores provider_key=openai in DB
|
|
665
|
+
> ```
|
|
666
|
+
> After that, the MCP server (and any future CLI runs) auto-resolve the correct provider from the DB — no env vars needed.
|
|
634
667
|
|
|
635
|
-
>
|
|
636
|
-
|
|
637
|
-
> [!CAUTION]
|
|
638
|
-
> **Embedding Provider Consistency is Critical**
|
|
639
|
-
>
|
|
640
|
-
> The embedding provider used by the MCP server **must match** the one used during indexing. Mismatched dimensions cause `initialize()` to throw or search to return empty results.
|
|
641
|
-
>
|
|
642
|
-
> **Common failure scenario:**
|
|
643
|
-
> 1. You index via CLI with `BRAINBANK_EMBEDDING=openai` (1536 dims)
|
|
644
|
-
> 2. MCP server starts without `BRAINBANK_EMBEDDING` env var → defaults to local (384 dims)
|
|
645
|
-
> 3. **Result:** BrainBank throws `Embedding dimension mismatch` on every search
|
|
646
|
-
>
|
|
647
|
-
> **Fix:** Always set `BRAINBANK_EMBEDDING` consistently in your MCP config, CLI, and API usage. If you indexed with OpenAI, your MCP config **must** include `"BRAINBANK_EMBEDDING": "openai"`. Same for `perplexity` or `perplexity-context`. If you switch providers, run `brainbank reembed` to regenerate all vectors.
|
|
668
|
+
> [!NOTE]
|
|
669
|
+
> If you switch embedding providers (e.g. local → OpenAI), run `brainbank reembed` to regenerate all vectors. BrainBank auto-detects dimension mismatches and warns you.
|
|
648
670
|
|
|
649
671
|
### Available Tools
|
|
650
672
|
|
|
651
673
|
| Tool | Description |
|
|
652
674
|
|------|-------------|
|
|
653
|
-
| `
|
|
654
|
-
| `
|
|
655
|
-
| `
|
|
656
|
-
| `
|
|
657
|
-
| `
|
|
658
|
-
| `
|
|
659
|
-
| `brainbank_history` | Git history for a file |
|
|
660
|
-
| `brainbank_coedits` | Files that change together |
|
|
661
|
-
| `brainbank_collection_add` | Add item to a KV collection |
|
|
662
|
-
| `brainbank_collection_search` | Search a KV collection |
|
|
663
|
-
| `brainbank_collection_trim` | Trim a KV collection |
|
|
675
|
+
| `brainbank_search` | Unified search — `mode: hybrid` (default), `vector`, or `keyword` |
|
|
676
|
+
| `brainbank_context` | Formatted context block for a task (code + git + co-edits) |
|
|
677
|
+
| `brainbank_index` | Trigger incremental code/git/docs indexing |
|
|
678
|
+
| `brainbank_stats` | Index statistics (files, commits, chunks, collections) |
|
|
679
|
+
| `brainbank_history` | Git history for a specific file |
|
|
680
|
+
| `brainbank_collection` | KV collection ops — `action: add`, `search`, or `trim` |
|
|
664
681
|
|
|
665
682
|
---
|
|
666
683
|
|
|
@@ -668,7 +685,7 @@ The agent passes the `repo` parameter on each tool call based on the active work
|
|
|
668
685
|
|
|
669
686
|
```typescript
|
|
670
687
|
import { BrainBank, OpenAIEmbedding } from 'brainbank';
|
|
671
|
-
import { Qwen3Reranker } from '
|
|
688
|
+
import { Qwen3Reranker } from 'brainbank'; // built-in, requires node-llama-cpp
|
|
672
689
|
|
|
673
690
|
const brain = new BrainBank({
|
|
674
691
|
repoPath: '.',
|
|
@@ -754,7 +771,12 @@ Real benchmarks on a production NestJS backend (1052 code chunks + git history):
|
|
|
754
771
|
|
|
755
772
|
### Reranker
|
|
756
773
|
|
|
757
|
-
BrainBank
|
|
774
|
+
BrainBank ships with an optional cross-encoder reranker using **Qwen3-Reranker-0.6B** via `node-llama-cpp`. It runs 100% locally — no API keys needed. The reranker is **disabled by default**.
|
|
775
|
+
|
|
776
|
+
```bash
|
|
777
|
+
# Only requirement — the LLM runtime (model auto-downloads on first use)
|
|
778
|
+
npm install node-llama-cpp
|
|
779
|
+
```
|
|
758
780
|
|
|
759
781
|
#### When to Use It
|
|
760
782
|
|
|
@@ -777,7 +799,7 @@ The reranker runs local neural inference on every search result, which improves
|
|
|
777
799
|
|
|
778
800
|
```typescript
|
|
779
801
|
import { BrainBank } from 'brainbank';
|
|
780
|
-
import { Qwen3Reranker } from '
|
|
802
|
+
import { Qwen3Reranker } from 'brainbank';
|
|
781
803
|
|
|
782
804
|
const brain = new BrainBank({
|
|
783
805
|
reranker: new Qwen3Reranker(), // ~640MB model, auto-downloaded on first use
|
|
@@ -837,7 +859,7 @@ const brain = new BrainBank({ repoPath: '.' });
|
|
|
837
859
|
brain.use(notes());
|
|
838
860
|
await brain.initialize();
|
|
839
861
|
|
|
840
|
-
const notesPlugin = brain.
|
|
862
|
+
const notesPlugin = brain.plugin('notes');
|
|
841
863
|
|
|
842
864
|
// Store a conversation digest
|
|
843
865
|
await notesPlugin.remember({
|
|
@@ -879,7 +901,7 @@ const brain = new BrainBank({ repoPath: '.' });
|
|
|
879
901
|
brain.use(memory());
|
|
880
902
|
await brain.initialize();
|
|
881
903
|
|
|
882
|
-
const mem = brain.
|
|
904
|
+
const mem = brain.plugin('memory');
|
|
883
905
|
|
|
884
906
|
// Record a learning pattern
|
|
885
907
|
await mem.learn({
|
|
@@ -974,10 +996,12 @@ The `LLMProvider` interface works with any framework:
|
|
|
974
996
|
| Variable | Description |
|
|
975
997
|
|----------|-------------|
|
|
976
998
|
| `BRAINBANK_REPO` | Default repository path (optional — auto-detected from `.git/` or passed per tool call) |
|
|
977
|
-
| `
|
|
999
|
+
| `BRAINBANK_RERANKER` | Reranker: `none` (default), `qwen3` |
|
|
978
1000
|
| `BRAINBANK_DEBUG` | Show full stack traces |
|
|
979
|
-
| `OPENAI_API_KEY` | Required when using
|
|
980
|
-
| `PERPLEXITY_API_KEY` | Required when using
|
|
1001
|
+
| `OPENAI_API_KEY` | Required when using `--embedding openai` |
|
|
1002
|
+
| `PERPLEXITY_API_KEY` | Required when using `--embedding perplexity` or `perplexity-context` |
|
|
1003
|
+
|
|
1004
|
+
> **Note:** `BRAINBANK_EMBEDDING` env var has been removed. Use `brainbank index --embedding <provider>` on first index — the provider is stored in the DB and auto-resolved on subsequent runs.
|
|
981
1005
|
|
|
982
1006
|
---
|
|
983
1007
|
|
|
@@ -987,7 +1011,7 @@ BrainBank can index multiple repositories into a **single shared database**. Thi
|
|
|
987
1011
|
|
|
988
1012
|
### How It Works
|
|
989
1013
|
|
|
990
|
-
When you point BrainBank at a directory that contains multiple Git repositories (subdirectories with `.git/`), the CLI **auto-detects** them and creates namespaced
|
|
1014
|
+
When you point BrainBank at a directory that contains multiple Git repositories (subdirectories with `.git/`), the CLI **auto-detects** them and creates namespaced plugins:
|
|
991
1015
|
|
|
992
1016
|
```bash
|
|
993
1017
|
~/projects/
|
|
@@ -1021,9 +1045,9 @@ brainbank hsearch "cancel job confirmation" --repo ~/projects
|
|
|
1021
1045
|
# and shared utilities — all in one search.
|
|
1022
1046
|
```
|
|
1023
1047
|
|
|
1024
|
-
### Namespaced
|
|
1048
|
+
### Namespaced Plugins
|
|
1025
1049
|
|
|
1026
|
-
Each sub-repository gets its own namespaced
|
|
1050
|
+
Each sub-repository gets its own namespaced plugin instances (e.g., `code:frontend`, `git:backend`). Same-type plugins share a single HNSW vector index for efficient memory usage and unified search.
|
|
1027
1051
|
|
|
1028
1052
|
### Programmatic API
|
|
1029
1053
|
|
|
@@ -1086,7 +1110,7 @@ For large classes (>80 lines), the chunker descends into the class body and extr
|
|
|
1086
1110
|
|
|
1087
1111
|
All indexing is **incremental by default** — only new or changed content is processed:
|
|
1088
1112
|
|
|
1089
|
-
|
|
|
1113
|
+
| Plugin | How it detects changes | What gets skipped |
|
|
1090
1114
|
|---------|----------------------|-------------------|
|
|
1091
1115
|
| **Code** | FNV-1a hash of file content | Unchanged files |
|
|
1092
1116
|
| **Git** | Unique commit hash | Already-indexed commits |
|
|
@@ -1242,11 +1266,23 @@ BrainBank's hybrid search pipeline (Vector + BM25 → RRF) with Perplexity Conte
|
|
|
1242
1266
|
| Benchmark | Metric | Score |
|
|
1243
1267
|
|---|---|:---:|
|
|
1244
1268
|
| **BEIR SciFact** (5,183 docs, 300 queries) | NDCG@10 | **0.761** |
|
|
1245
|
-
| **Custom semantic** (
|
|
1269
|
+
| **Custom semantic** (69 docs, 20 queries) | R@5 | **83%** |
|
|
1246
1270
|
|
|
1247
1271
|
The hybrid pipeline improved R@5 by **+26pp over vector-only** retrieval on our custom eval.
|
|
1248
1272
|
|
|
1249
|
-
|
|
1273
|
+
#### BrainBank vs QMD (Head-to-Head)
|
|
1274
|
+
|
|
1275
|
+
Compared against [QMD](https://github.com/tobi/qmd), a local-first search engine using GGUF models (embeddinggemma-300M + query expansion + reranker) — same corpus, same 20 queries:
|
|
1276
|
+
|
|
1277
|
+
| Metric | BrainBank + Reranker | QMD + Reranker |
|
|
1278
|
+
|---|:---:|:---:|
|
|
1279
|
+
| **R@5** | **83%** | 65% |
|
|
1280
|
+
| **MRR** | **0.57** | 0.45 |
|
|
1281
|
+
| **Misses** | **1/20** | 6/20 |
|
|
1282
|
+
|
|
1283
|
+
> BrainBank wins by +18pp R@5. QMD is competitive on semantic queries (81% vs 94%) and ties on broad queries (83% vs 83%) — impressive for a fully local pipeline with zero API calls.
|
|
1284
|
+
|
|
1285
|
+
See **[BENCHMARKS.md](./BENCHMARKS.md)** for full pipeline progression, per-technique impact, QMD comparison details, and reproduction instructions.
|
|
1250
1286
|
|
|
1251
1287
|
#### Running the RAG Eval
|
|
1252
1288
|
|
|
@@ -1287,7 +1323,7 @@ PERPLEXITY_API_KEY=pplx-... npx tsx test/benchmarks/rag/eval.ts --docs ~/path/to
|
|
|
1287
1323
|
│ │
|
|
1288
1324
|
│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌────────────┐│
|
|
1289
1325
|
│ │ Code │ │ Git │ │ Docs │ │ Collection ││
|
|
1290
|
-
│ │
|
|
1326
|
+
│ │ Plugin │ │ Indexer │ │ Indexer │ │ (dynamic) ││
|
|
1291
1327
|
│ └────┬────┘ └────┬────┘ └────┬────┘ └─────┬──────┘│
|
|
1292
1328
|
│ │ │ │ │ │
|
|
1293
1329
|
│ ┌────▼────┐ ┌────▼────┐ ┌────▼────┐ ┌─────▼──────┐│
|
|
@@ -1337,7 +1373,7 @@ Final results (sorted by blended score)
|
|
|
1337
1373
|
|
|
1338
1374
|
### Data Flow
|
|
1339
1375
|
|
|
1340
|
-
1. **Index** —
|
|
1376
|
+
1. **Index** — Plugins parse files into chunks (tree-sitter AST for code, heading-based for docs)
|
|
1341
1377
|
2. **Embed** — Each chunk gets a vector (local WASM or OpenAI)
|
|
1342
1378
|
3. **Store** — Chunks + vectors → SQLite, vectors → HNSW index
|
|
1343
1379
|
4. **Search** — Query → HNSW k-NN + BM25 keyword → RRF fusion → optional reranker
|
|
@@ -1348,8 +1384,8 @@ Final results (sorted by blended score)
|
|
|
1348
1384
|
## Testing
|
|
1349
1385
|
|
|
1350
1386
|
```bash
|
|
1351
|
-
npm test # Unit tests (
|
|
1352
|
-
npm test -- --integration # Full suite (
|
|
1387
|
+
npm test # Unit tests (172 tests)
|
|
1388
|
+
npm test -- --integration # Full suite (includes real models + all domains)
|
|
1353
1389
|
npm test -- --filter code # Filter by test name
|
|
1354
1390
|
npm test -- --verbose # Show assertion details
|
|
1355
1391
|
```
|
|
@@ -521,12 +521,12 @@ declare class Collection {
|
|
|
521
521
|
}
|
|
522
522
|
|
|
523
523
|
/**
|
|
524
|
-
* BrainBank —
|
|
524
|
+
* BrainBank — Plugin System
|
|
525
525
|
*
|
|
526
|
-
*
|
|
527
|
-
* and push content into BrainBank. Built-in
|
|
526
|
+
* Plugins are pluggable strategies that scan external data sources
|
|
527
|
+
* and push content into BrainBank. Built-in plugins handle code,
|
|
528
528
|
* git, and docs. Third-party frameworks (LangChain, etc.)
|
|
529
|
-
* can implement custom
|
|
529
|
+
* can implement custom plugins.
|
|
530
530
|
*
|
|
531
531
|
* import { BrainBank } from 'brainbank';
|
|
532
532
|
* import { code } from 'brainbank/indexers/code';
|
|
@@ -535,8 +535,8 @@ declare class Collection {
|
|
|
535
535
|
* .use(code({ repoPath: '.' }));
|
|
536
536
|
*/
|
|
537
537
|
|
|
538
|
-
interface
|
|
539
|
-
/** SQLite database (shared across all
|
|
538
|
+
interface PluginContext {
|
|
539
|
+
/** SQLite database (shared across all plugins). */
|
|
540
540
|
db: Database;
|
|
541
541
|
/** Embedding provider (shared). */
|
|
542
542
|
embedding: EmbeddingProvider;
|
|
@@ -555,31 +555,31 @@ interface IndexerContext {
|
|
|
555
555
|
/** Get or create a dynamic collection. */
|
|
556
556
|
collection(name: string): Collection;
|
|
557
557
|
}
|
|
558
|
-
interface
|
|
559
|
-
/** Unique
|
|
558
|
+
interface Plugin {
|
|
559
|
+
/** Unique plugin name (e.g. 'code', 'git', 'docs'). */
|
|
560
560
|
readonly name: string;
|
|
561
|
-
/** Initialize the
|
|
562
|
-
initialize(ctx:
|
|
563
|
-
/** Return stats for this
|
|
561
|
+
/** Initialize the plugin (create HNSW, load vectors, etc.). */
|
|
562
|
+
initialize(ctx: PluginContext): Promise<void>;
|
|
563
|
+
/** Return stats for this plugin. */
|
|
564
564
|
stats?(): Record<string, any>;
|
|
565
565
|
/** Clean up resources. */
|
|
566
566
|
close?(): void;
|
|
567
567
|
}
|
|
568
|
-
/**
|
|
569
|
-
interface IndexablePlugin extends
|
|
568
|
+
/** Plugins that can scan and index content (code, git). */
|
|
569
|
+
interface IndexablePlugin extends Plugin {
|
|
570
570
|
index(options?: any): Promise<any>;
|
|
571
571
|
}
|
|
572
|
-
/**
|
|
573
|
-
interface SearchablePlugin extends
|
|
572
|
+
/** Plugins that can search indexed content (docs). */
|
|
573
|
+
interface SearchablePlugin extends Plugin {
|
|
574
574
|
search(query: string, options?: any): Promise<SearchResult[]>;
|
|
575
575
|
}
|
|
576
|
-
/**
|
|
577
|
-
interface WatchablePlugin extends
|
|
576
|
+
/** Plugins that support file watch mode. */
|
|
577
|
+
interface WatchablePlugin extends Plugin {
|
|
578
578
|
onFileChange(filePath: string, event: 'create' | 'update' | 'delete'): Promise<boolean>;
|
|
579
579
|
watchPatterns(): string[];
|
|
580
580
|
}
|
|
581
|
-
/**
|
|
582
|
-
interface CollectionPlugin extends
|
|
581
|
+
/** Plugins that manage document collections. */
|
|
582
|
+
interface CollectionPlugin extends Plugin {
|
|
583
583
|
addCollection(collection: DocumentCollection): void;
|
|
584
584
|
removeCollection(name: string): void;
|
|
585
585
|
listCollections(): DocumentCollection[];
|
|
@@ -590,4 +590,4 @@ interface CollectionPlugin extends Indexer {
|
|
|
590
590
|
listContexts?(): any[];
|
|
591
591
|
}
|
|
592
592
|
|
|
593
|
-
export { type SearchResultType as A, type BrainBankConfig as B, Collection as C, type DocumentCollection as D, type EmbeddingProvider as E, type SearchablePlugin as F, type GitCommitRecord as G, HNSWIndex as H, type
|
|
593
|
+
export { type SearchResultType as A, type BrainBankConfig as B, Collection as C, type DocumentCollection as D, type EmbeddingProvider as E, type SearchablePlugin as F, type GitCommitRecord as G, HNSWIndex as H, type IndexResult as I, isCodeResult as J, isCollectionResult as K, type LearningPattern as L, isCommitResult as M, isDocumentResult as N, isPatternResult as O, type Plugin as P, matchResult as Q, type ResolvedConfig as R, type StageProgressCallback as S, type VectorIndex as V, type WatchablePlugin as W, type ProgressCallback as a, type SearchResult as b, type ContextOptions as c, type CoEditSuggestion as d, type IndexStats as e, type Reranker as f, type SearchHit as g, type CodeChunk as h, Database as i, type CodeResult as j, type CodeResultMetadata as k, type CollectionAddOptions as l, type CollectionItem as m, type CollectionPlugin as n, type CollectionResult as o, type CollectionSearchOptions as p, type CommitResult as q, type CommitResultMetadata as r, type DistilledStrategy as s, type DocChunk as t, type DocumentResult as u, type DocumentResultMetadata as v, type IndexablePlugin as w, type PatternResult as x, type PatternResultMetadata as y, type PluginContext as z };
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import {
|
|
2
|
+
__name
|
|
3
|
+
} from "./chunk-7QVYU63E.js";
|
|
4
|
+
|
|
5
|
+
// src/providers/embeddings/local-embedding.ts
|
|
6
|
+
var LocalEmbedding = class {
|
|
7
|
+
static {
|
|
8
|
+
__name(this, "LocalEmbedding");
|
|
9
|
+
}
|
|
10
|
+
dims = 384;
|
|
11
|
+
_pipeline = null;
|
|
12
|
+
_modelName;
|
|
13
|
+
_cacheDir;
|
|
14
|
+
constructor(options = {}) {
|
|
15
|
+
this._modelName = options.model ?? "Xenova/all-MiniLM-L6-v2";
|
|
16
|
+
this._cacheDir = options.cacheDir ?? ".model-cache";
|
|
17
|
+
}
|
|
18
|
+
_pipelinePromise = null;
|
|
19
|
+
/**
|
|
20
|
+
* Lazy-load the transformer pipeline.
|
|
21
|
+
* Singleton — created once and reused.
|
|
22
|
+
* Promise-deduped to prevent concurrent downloads.
|
|
23
|
+
*/
|
|
24
|
+
async _getPipeline() {
|
|
25
|
+
if (this._pipeline) return this._pipeline;
|
|
26
|
+
if (this._pipelinePromise) return this._pipelinePromise;
|
|
27
|
+
this._pipelinePromise = (async () => {
|
|
28
|
+
const { pipeline, env } = await import("@xenova/transformers");
|
|
29
|
+
env.cacheDir = this._cacheDir;
|
|
30
|
+
env.allowLocalModels = true;
|
|
31
|
+
this._pipeline = await pipeline("feature-extraction", this._modelName, {
|
|
32
|
+
quantized: true
|
|
33
|
+
});
|
|
34
|
+
return this._pipeline;
|
|
35
|
+
})();
|
|
36
|
+
try {
|
|
37
|
+
return await this._pipelinePromise;
|
|
38
|
+
} finally {
|
|
39
|
+
this._pipelinePromise = null;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Embed a single text string.
|
|
44
|
+
* Returns a normalized Float32Array of length 384.
|
|
45
|
+
*/
|
|
46
|
+
async embed(text) {
|
|
47
|
+
const pipe = await this._getPipeline();
|
|
48
|
+
const output = await pipe(text, { pooling: "mean", normalize: true });
|
|
49
|
+
return output.data;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Embed multiple texts using real batch processing.
|
|
53
|
+
* Chunks into groups of BATCH_SIZE to balance throughput vs memory.
|
|
54
|
+
*/
|
|
55
|
+
async embedBatch(texts) {
|
|
56
|
+
if (texts.length === 0) return [];
|
|
57
|
+
const BATCH_SIZE = 32;
|
|
58
|
+
const pipe = await this._getPipeline();
|
|
59
|
+
const results = [];
|
|
60
|
+
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
|
61
|
+
const batch = texts.slice(i, i + BATCH_SIZE);
|
|
62
|
+
const output = await pipe(batch, { pooling: "mean", normalize: true });
|
|
63
|
+
for (let j = 0; j < batch.length; j++) {
|
|
64
|
+
const start = j * this.dims;
|
|
65
|
+
results.push(output.data.slice(start, start + this.dims));
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return results;
|
|
69
|
+
}
|
|
70
|
+
async close() {
|
|
71
|
+
this._pipeline = null;
|
|
72
|
+
}
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
export {
|
|
76
|
+
LocalEmbedding
|
|
77
|
+
};
|
|
78
|
+
//# sourceMappingURL=chunk-424UFCY7.js.map
|