brainbank 0.1.0 → 0.1.1-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/README.md +85 -989
  2. package/assets/architecture.png +0 -0
  3. package/bin/brainbank +8 -1
  4. package/bin/brainbank-mcp +19 -0
  5. package/dist/chunk-3YBCD6DI.js +117 -0
  6. package/dist/chunk-3YBCD6DI.js.map +1 -0
  7. package/dist/chunk-63GBCDS5.js +3249 -0
  8. package/dist/chunk-63GBCDS5.js.map +1 -0
  9. package/dist/chunk-DMFMTOHF.js +123 -0
  10. package/dist/chunk-DMFMTOHF.js.map +1 -0
  11. package/dist/chunk-FQYKWB2Q.js +136 -0
  12. package/dist/chunk-FQYKWB2Q.js.map +1 -0
  13. package/dist/chunk-IMJJ2VEM.js +74 -0
  14. package/dist/chunk-IMJJ2VEM.js.map +1 -0
  15. package/dist/chunk-M744PCJQ.js +43 -0
  16. package/dist/chunk-M744PCJQ.js.map +1 -0
  17. package/dist/chunk-O3J6ZIXK.js +82 -0
  18. package/dist/chunk-O3J6ZIXK.js.map +1 -0
  19. package/dist/chunk-OPH7GZ7U.js +124 -0
  20. package/dist/chunk-OPH7GZ7U.js.map +1 -0
  21. package/dist/chunk-PXEWQMN7.js +89 -0
  22. package/dist/chunk-PXEWQMN7.js.map +1 -0
  23. package/dist/chunk-RDQYDLYZ.js +69 -0
  24. package/dist/chunk-RDQYDLYZ.js.map +1 -0
  25. package/dist/chunk-VIIHPCC4.js +254 -0
  26. package/dist/chunk-VIIHPCC4.js.map +1 -0
  27. package/dist/chunk-WCQVDF3K.js +14 -0
  28. package/dist/cli.js +2680 -304
  29. package/dist/cli.js.map +1 -1
  30. package/dist/haiku-expander-YRSIPGKP.js +8 -0
  31. package/dist/haiku-pruner-SHAXUPY6.js +8 -0
  32. package/dist/http-server-QUXHLWUM.js +9 -0
  33. package/dist/index.d.ts +1891 -575
  34. package/dist/index.js +323 -46
  35. package/dist/index.js.map +1 -1
  36. package/dist/local-embedding-NZQTILGV.js +8 -0
  37. package/dist/mcp.d.ts +2 -0
  38. package/dist/mcp.js +334 -0
  39. package/dist/mcp.js.map +1 -0
  40. package/dist/openai-embedding-ZP5TSUJG.js +8 -0
  41. package/dist/openai-embedding-ZP5TSUJG.js.map +1 -0
  42. package/dist/perplexity-context-embedding-GI5PHE6X.js +9 -0
  43. package/dist/perplexity-context-embedding-GI5PHE6X.js.map +1 -0
  44. package/dist/perplexity-embedding-KZRYGJRC.js +10 -0
  45. package/dist/perplexity-embedding-KZRYGJRC.js.map +1 -0
  46. package/dist/plugin-IKQ6IRSJ.js +32 -0
  47. package/dist/plugin-IKQ6IRSJ.js.map +1 -0
  48. package/dist/resolve-ASGLBNUC.js +10 -0
  49. package/dist/resolve-ASGLBNUC.js.map +1 -0
  50. package/dist/stats-tui-ZY2NQSEA.js +1904 -0
  51. package/dist/stats-tui-ZY2NQSEA.js.map +1 -0
  52. package/package.json +39 -34
  53. package/src/brainbank.ts +617 -0
  54. package/src/cli/commands/collection.ts +77 -0
  55. package/src/cli/commands/context.ts +179 -0
  56. package/src/cli/commands/daemon.ts +100 -0
  57. package/src/cli/commands/docs.ts +71 -0
  58. package/src/cli/commands/files.ts +69 -0
  59. package/src/cli/commands/help.ts +78 -0
  60. package/src/cli/commands/index.ts +482 -0
  61. package/src/cli/commands/kv.ts +140 -0
  62. package/src/cli/commands/mcp-export.ts +273 -0
  63. package/src/cli/commands/mcp.ts +6 -0
  64. package/src/cli/commands/reembed.ts +30 -0
  65. package/src/cli/commands/reindex.ts +40 -0
  66. package/src/cli/commands/scan.ts +336 -0
  67. package/src/cli/commands/search.ts +203 -0
  68. package/src/cli/commands/stats.ts +68 -0
  69. package/src/cli/commands/status.ts +47 -0
  70. package/src/cli/commands/watch.ts +47 -0
  71. package/src/cli/factory/brain-context.ts +43 -0
  72. package/src/cli/factory/builtin-registration.ts +87 -0
  73. package/src/cli/factory/config-loader.ts +77 -0
  74. package/src/cli/factory/index.ts +69 -0
  75. package/src/cli/factory/plugin-loader.ts +325 -0
  76. package/src/cli/index.ts +73 -0
  77. package/src/cli/server-client.ts +178 -0
  78. package/src/cli/tui/index-tui.tsx +667 -0
  79. package/src/cli/tui/stats-data.ts +523 -0
  80. package/src/cli/tui/stats-search.ts +262 -0
  81. package/src/cli/tui/stats-tui.tsx +1465 -0
  82. package/src/cli/tui/tree-scanner.ts +650 -0
  83. package/src/cli/utils.ts +137 -0
  84. package/src/config.ts +49 -0
  85. package/src/constants.ts +21 -0
  86. package/src/db/adapter.ts +112 -0
  87. package/src/db/metadata.ts +130 -0
  88. package/src/db/migrations.ts +66 -0
  89. package/src/db/sqlite-adapter.ts +218 -0
  90. package/src/db/tracker.ts +91 -0
  91. package/src/engine/index-api.ts +81 -0
  92. package/src/engine/reembed.ts +206 -0
  93. package/src/engine/search-api.ts +218 -0
  94. package/src/index.ts +154 -0
  95. package/src/lib/fts.ts +57 -0
  96. package/src/lib/languages.ts +180 -0
  97. package/src/lib/logger.ts +126 -0
  98. package/src/lib/math.ts +87 -0
  99. package/src/lib/provider-key.ts +20 -0
  100. package/src/lib/prune.ts +71 -0
  101. package/src/lib/rrf.ts +133 -0
  102. package/src/lib/write-lock.ts +108 -0
  103. package/src/mcp/mcp-server.ts +195 -0
  104. package/src/mcp/workspace-factory.ts +68 -0
  105. package/src/mcp/workspace-pool.ts +224 -0
  106. package/src/plugin.ts +381 -0
  107. package/src/providers/embeddings/embedding-worker-thread.ts +95 -0
  108. package/src/providers/embeddings/embedding-worker.ts +141 -0
  109. package/src/providers/embeddings/local-embedding.ts +115 -0
  110. package/src/providers/embeddings/openai-embedding.ts +167 -0
  111. package/src/providers/embeddings/perplexity-context-embedding.ts +195 -0
  112. package/src/providers/embeddings/perplexity-embedding.ts +165 -0
  113. package/src/providers/embeddings/resolve.ts +34 -0
  114. package/src/providers/pruners/haiku-expander.ts +166 -0
  115. package/src/providers/pruners/haiku-pruner.ts +112 -0
  116. package/src/providers/vector/hnsw-index.ts +174 -0
  117. package/src/providers/vector/hnsw-loader.ts +129 -0
  118. package/src/search/bm25-boost.ts +69 -0
  119. package/src/search/context-builder.ts +251 -0
  120. package/src/search/keyword/composite-bm25-search.ts +47 -0
  121. package/src/search/types.ts +37 -0
  122. package/src/search/vector/composite-vector-search.ts +61 -0
  123. package/src/search/vector/mmr.ts +64 -0
  124. package/src/services/collection.ts +384 -0
  125. package/src/services/daemon.ts +87 -0
  126. package/src/services/http-server.ts +336 -0
  127. package/src/services/kv-service.ts +64 -0
  128. package/src/services/plugin-registry.ts +77 -0
  129. package/src/services/watch.ts +340 -0
  130. package/src/services/webhook-server.ts +100 -0
  131. package/src/types.ts +493 -0
  132. package/dist/chunk-2P3EGY6S.js +0 -37
  133. package/dist/chunk-2P3EGY6S.js.map +0 -1
  134. package/dist/chunk-3GAIDXRW.js +0 -105
  135. package/dist/chunk-3GAIDXRW.js.map +0 -1
  136. package/dist/chunk-4ZKBQ33J.js +0 -56
  137. package/dist/chunk-4ZKBQ33J.js.map +0 -1
  138. package/dist/chunk-7QVYU63E.js +0 -7
  139. package/dist/chunk-EDKSKLX4.js +0 -490
  140. package/dist/chunk-EDKSKLX4.js.map +0 -1
  141. package/dist/chunk-GOUBW7UA.js +0 -373
  142. package/dist/chunk-GOUBW7UA.js.map +0 -1
  143. package/dist/chunk-MJ3Y24H6.js +0 -185
  144. package/dist/chunk-MJ3Y24H6.js.map +0 -1
  145. package/dist/chunk-N6ZMBFDE.js +0 -224
  146. package/dist/chunk-N6ZMBFDE.js.map +0 -1
  147. package/dist/chunk-YGSEUWLV.js +0 -2053
  148. package/dist/chunk-YGSEUWLV.js.map +0 -1
  149. package/dist/chunk-Z5SU54HP.js +0 -171
  150. package/dist/chunk-Z5SU54HP.js.map +0 -1
  151. package/dist/code.d.ts +0 -31
  152. package/dist/code.js +0 -8
  153. package/dist/docs.d.ts +0 -19
  154. package/dist/docs.js +0 -8
  155. package/dist/git.d.ts +0 -31
  156. package/dist/git.js +0 -8
  157. package/dist/memory.d.ts +0 -19
  158. package/dist/memory.js +0 -146
  159. package/dist/memory.js.map +0 -1
  160. package/dist/notes.d.ts +0 -19
  161. package/dist/notes.js +0 -57
  162. package/dist/notes.js.map +0 -1
  163. package/dist/openai-PCTYLOWI.js +0 -8
  164. package/dist/types-Da_zLLOl.d.ts +0 -474
  165. /package/dist/{chunk-7QVYU63E.js.map → chunk-WCQVDF3K.js.map} +0 -0
  166. /package/dist/{code.js.map → haiku-expander-YRSIPGKP.js.map} +0 -0
  167. /package/dist/{docs.js.map → haiku-pruner-SHAXUPY6.js.map} +0 -0
  168. /package/dist/{git.js.map → http-server-QUXHLWUM.js.map} +0 -0
  169. /package/dist/{openai-PCTYLOWI.js.map → local-embedding-NZQTILGV.js.map} +0 -0
package/README.md CHANGED
@@ -4,1056 +4,152 @@
4
4
 
5
5
  BrainBank gives LLMs a long-term memory that persists between sessions.
6
6
 
7
- - **All-in-one** — core + code + git + docs + CLI in a single `brainbank` package
8
- - **Pluggable indexers** — `.use()` only what you need (code, git, docs, or custom)
9
- - **Dynamic collections** — `brain.collection('errors')` for any structured data
7
+ - **Pluggable** — `.use()` only what you need: [code](#packages), [git](#packages), [docs](#packages), or [custom](docs/custom-plugins.md)
10
8
  - **Hybrid search** — vector + BM25 fused with Reciprocal Rank Fusion
11
- - **Pluggable embeddings** — local WASM (free) or OpenAI (higher quality)
12
- - **Multi-repo** — index multiple repositories into one shared database
13
- - **Portable** — single `.brainbank/brainbank.db` file
14
- - **Optional packages** — [`@brainbank/memory`](#memory) (deterministic fact extraction), [`@brainbank/reranker`](#reranker) (Qwen3 cross-encoder), [`@brainbank/mcp`](#mcp-server) (MCP server)
15
-
16
- ![BrainBank Architecture](assets/architecture.png)
9
+ - **Dynamic collections** — `brain.collection('errors')` for any structured data
10
+ - **Pluggable embeddings** — local WASM (free), OpenAI, or Perplexity
11
+ - **Multi-process safe** — concurrent CLI, MCP, and watch processes with automatic hot-reload
12
+ - **Portable** — single `.brainbank/brainbank.db` SQLite file
13
+ - **Modular** — lightweight core + optional [`@brainbank/*`](#packages) packages
17
14
 
18
15
  ---
19
16
 
20
- ## Why BrainBank?
21
-
22
- Built for a multi-repo codebase that needed unified AI context. Zero infrastructure, zero ongoing cost.
23
-
24
- Most AI memory solutions (mem0, Zep, LangMem) require cloud services, external databases, or LLM calls just to store a memory. BrainBank takes a different approach:
25
-
26
- | | **BrainBank** | **mem0** | **Zep** | **LangMem** |
27
- |---|:---:|:---:|:---:|:---:|
28
- | Infrastructure | **SQLite file** | Vector DB + cloud | Neo4j + cloud | LangGraph Platform |
29
- | LLM required to write | **No**¹ | Yes | Yes | Yes |
30
- | Code-aware | **30+ languages, git, co-edits** | ✗ | ✗ | ✗ |
31
- | Custom indexers | **`.use()` plugin system** | ✗ | ✗ | ✗ |
32
- | Search | **Vector + BM25 + RRF** | Vector only | Vector + graph | Vector only |
33
- | Framework lock-in | **None** | Optional | Zep cloud | LangChain |
34
- | Portable | **Copy one file** | Tied to DB | Tied to cloud | Tied to platform |
35
-
36
- > ¹ mem0 and Zep use LLMs to auto-extract memories from raw text. BrainBank is explicit — you decide what gets stored. Less magic, more control.
37
-
38
- **In short:**
39
- - **Code-first** — the only memory layer that understands code structure, git history, and file co-edit relationships
40
- - **$0 memory bill** — no LLM calls to extract/consolidate. You store what you want, BrainBank embeds deterministically
41
- - **Truly portable** — `.brainbank/brainbank.db` is a normal file. Copy it, back it up, `git lfs` it
42
- - **No vendor lock-in** — plain TypeScript, works with any agent framework or none at all
43
-
44
- ### Table of Contents
45
-
46
- - [Why BrainBank?](#why-brainbank)
47
- - [Installation](#installation)
48
- - [Quick Start](#quick-start)
49
- - [CLI](#cli)
50
- - [Programmatic API](#programmatic-api)
51
- - [Indexers](#indexers)
52
- - [Collections](#collections)
53
- - [Search](#search)
54
- - [Document Collections](#document-collections)
55
- - [Context Generation](#context-generation)
56
- - [Custom Indexers](#custom-indexers)
57
- - [AI Agent Integration](#ai-agent-integration)
58
- - [Examples](#examples)
59
- - [Watch Mode](#watch-mode)
60
- - [MCP Server](#mcp-server)
61
- - [Configuration](#configuration)
62
- - [Embedding Providers](#embedding-providers)
63
- - [Reranker](#reranker)
64
- - [Memory](#memory)
65
- - [Multi-Repository Indexing](#multi-repository-indexing)
66
- - [Indexing](#indexing-1)
67
- - [Incremental Indexing](#incremental-indexing)
68
- - [Re-embedding](#re-embedding)
69
- - [Architecture](#architecture)
70
- - [Search Pipeline](#search-pipeline)
17
+ <img src="assets/architecture.png" alt="BrainBank Architecture" width="600">
71
18
 
72
19
  ---
73
20
 
74
- ## Installation
75
-
76
- ```bash
77
- npm install brainbank
78
- ```
79
-
80
- ### Optional Packages
81
-
82
- | Package | When to install |
83
- |---------|----------------|
84
- | `@brainbank/memory` | Deterministic memory extraction for LLM conversations (mem0-style pipeline) |
85
- | `@brainbank/reranker` | Cross-encoder reranker (Qwen3-0.6B, ~640MB model) |
86
- | `@brainbank/mcp` | MCP server for AI tool integration |
21
+ ## Quick Start
87
22
 
88
23
  ```bash
89
- # Memory automatic fact extraction & dedup for chatbots/agents
90
- npm install @brainbank/memory
91
-
92
- # Reranker — improves search ranking with local neural inference
93
- npm install @brainbank/reranker node-llama-cpp
94
-
95
- # MCP server — for Antigravity, Claude Desktop, etc.
96
- npm install @brainbank/mcp
24
+ npm i -g brainbank @brainbank/code @brainbank/git @brainbank/docs
97
25
  ```
98
26
 
99
- ---
100
-
101
- ## Quick Start
102
-
103
- Get semantic search over your codebase in under a minute:
104
-
105
- ```typescript
106
- import { BrainBank } from 'brainbank';
107
- import { code } from 'brainbank/code';
108
- import { git } from 'brainbank/git';
109
-
110
- const brain = new BrainBank({ repoPath: '.' })
111
- .use(code())
112
- .use(git());
113
-
114
- await brain.index(); // indexes code + git history (incremental)
115
-
116
- // Search across everything
117
- const results = await brain.hybridSearch('authentication middleware');
118
- console.log(results.map(r => `${r.filePath}:${r.line} (${r.score.toFixed(2)})`));
119
-
120
- // Store agent memory
121
- const log = brain.collection('decisions');
122
- await log.add(
123
- 'Switched from bcrypt to argon2id for password hashing. ' +
124
- 'Argon2id is memory-hard and recommended by OWASP for new projects. ' +
125
- 'Updated src/auth/hash.ts and all tests.',
126
- { tags: ['security', 'auth'] }
127
- );
128
-
129
- // Recall later: "what did we decide about password hashing?"
130
- const hits = await log.search('password hashing decision');
27
+ > [!IMPORTANT]
28
+ > **Node 23+ users:** `@brainbank/code` uses [tree-sitter](https://github.com/tree-sitter/node-tree-sitter) native bindings for AST parsing. On **Node 23 and 24**, V8 headers require C++20 but tree-sitter's `binding.gyp` defaults to C++17, causing the install to fail with `"C++20 or later required."`. Fix it by setting the C++ standard before install:
29
+ >
30
+ > ```bash
31
+ > CXXFLAGS="-std=c++20" npm i -g brainbank
32
+ > ```
33
+ >
34
+ > Node ≤22 is unaffected — prebuilt binaries are available and no compilation is needed.
131
35
 
132
- await brain.close();
133
- ```
36
+ > If you get `ERESOLVE` errors, use `npm i --legacy-peer-deps` — tree-sitter grammars have overlapping peer dep ranges.
134
37
 
135
- Or use the CLI — zero code:
38
+ ### CLI — zero code
136
39
 
137
40
  ```bash
138
- npm install -g brainbank
139
- brainbank index . # index code + git
41
+ brainbank index . # scans repo → interactive select → index
42
+ brainbank index . --yes # skip prompts, auto-select all
140
43
  brainbank hsearch "rate limiting" # hybrid search
141
44
  brainbank kv add decisions "Use Redis..." # store a memory
142
45
  brainbank kv search decisions "caching" # recall it
143
46
  ```
144
47
 
145
- ## CLI
146
-
147
- BrainBank can be used entirely from the command line — no config file needed.
148
-
149
- ### Indexing
150
-
151
- `index` processes **code files + git history** only. Document collections are indexed separately with `docs`.
152
-
153
- ```bash
154
- brainbank index [path] # Index code + git history
155
- brainbank index [path] --force # Force re-index everything
156
- brainbank index [path] --depth 200 # Limit git commit depth
157
- brainbank docs [--collection <name>] # Index document collections
158
- ```
159
-
160
- > **Multi-repo:** If `[path]` contains multiple Git subdirectories (no root `.git/`), BrainBank auto-detects them and indexes all into one shared DB. See [Multi-Repository Indexing](#multi-repository-indexing).
161
-
162
- ### Watch Mode
163
-
164
- Auto-re-index code files when they change. Watches for file changes and re-indexes incrementally:
165
-
166
- ```bash
167
- brainbank watch # Watch repo, auto re-index on save
168
- # ━━━ BrainBank Watch ━━━
169
- # Watching /path/to/repo for changes...
170
- # 14:30:02 ✓ code: src/api.ts
171
- # 14:30:05 ✓ code: src/routes.ts
172
- # 14:30:08 ✓ csv: data/metrics.csv ← custom indexer
173
- ```
174
-
175
- > Watch mode monitors **code files** by default. [Custom indexers](#custom-indexers) that implement `watchPatterns()` and `onFileChange()` are automatically picked up — their name appears in the console output alongside the built-in `code` indexer. Git history and document collections are not affected by file-system changes and must be re-indexed explicitly with `brainbank index` / `brainbank docs`.
176
-
177
- ### Document Collections
178
-
179
- ```bash
180
- brainbank collection add <path> --name docs # Register a document folder
181
- brainbank collection list # List registered collections
182
- brainbank collection remove <name> # Remove a collection
183
- ```
184
-
185
- ### Search
186
-
187
- ```bash
188
- brainbank search <query> # Semantic search (vector)
189
- brainbank hsearch <query> # Hybrid search (best quality)
190
- brainbank ksearch <query> # Keyword search (BM25, instant)
191
- brainbank dsearch <query> # Document search
192
- ```
193
-
194
- ### Context
195
-
196
- ```bash
197
- brainbank context <task> # Get formatted context for a task
198
- brainbank context add <col> <path> <desc> # Add context metadata
199
- brainbank context list # List context metadata
200
- ```
201
-
202
- ### KV Store (dynamic collections)
203
-
204
- ```bash
205
- brainbank kv add <coll> <content> # Add item to a collection
206
- brainbank kv search <coll> <query> # Search a collection
207
- brainbank kv list [coll] # List collections or items
208
- brainbank kv trim <coll> --keep <n> # Keep only N most recent
209
- brainbank kv clear <coll> # Clear all items
210
- ```
211
-
212
- ### Utility
213
-
214
- ```bash
215
- brainbank stats # Show index statistics
216
- brainbank reembed # Re-embed all vectors (provider switch)
217
- brainbank watch # Watch files, auto re-index on change
218
- brainbank serve # Start MCP server (stdio)
219
- ```
220
-
221
- **Global options:** `--repo <path>`, `--force`, `--depth <n>`, `--collection <name>`, `--pattern <glob>`, `--context <desc>`, `--reranker <name>`
222
-
223
- ---
224
-
225
- ## Programmatic API
226
-
227
- Use BrainBank as a library in your TypeScript/Node.js project.
228
-
229
- ### Indexers
230
-
231
- BrainBank uses pluggable indexers. Register only what you need with `.use()`:
232
-
233
- | Indexer | Import | Description |
234
- |---------|--------|-------------|
235
- | `code` | `brainbank/code` | Language-aware code chunking (30+ languages) |
236
- | `git` | `brainbank/git` | Git commit history, diffs, co-edit relationships |
237
- | `docs` | `brainbank/docs` | Document collections (markdown, wikis) |
48
+ ### Programmatic API
238
49
 
239
50
  ```typescript
240
51
  import { BrainBank } from 'brainbank';
241
- import { code } from 'brainbank/code';
242
- import { git } from 'brainbank/git';
243
- import { docs } from 'brainbank/docs';
52
+ import { code } from '@brainbank/code';
53
+ import { git } from '@brainbank/git';
244
54
 
245
- // Pick only the indexers you need
246
55
  const brain = new BrainBank({ repoPath: '.' })
247
56
  .use(code())
248
- .use(git())
249
- .use(docs());
57
+ .use(git());
250
58
 
251
- // Index code + git (incremental — only processes changes)
252
59
  await brain.index();
253
60
 
254
- // Index document collections
255
- await brain.addCollection({ name: 'wiki', path: '~/docs', pattern: '**/*.md' });
256
- await brain.indexDocs();
257
- ```
258
-
259
- ### Collections
260
-
261
- Dynamic key-value collections with semantic search — the building block for agent memory:
262
-
263
- ```typescript
264
- const decisions = brain.collection('decisions');
265
-
266
- // Store rich content (auto-embedded for vector search)
267
- await decisions.add(
268
- 'Use SQLite with WAL mode instead of PostgreSQL. Portable single-file ' +
269
- 'storage, works offline, zero infrastructure.',
270
- { tags: ['architecture'], metadata: { files: ['src/db.ts'] } }
271
- );
272
-
273
- // Semantic search — finds by meaning, not keywords
274
- const hits = await decisions.search('why not postgres');
275
- // → [{ content: 'Use SQLite with WAL...', score: 0.95, tags: [...], metadata: {...} }]
276
-
277
- // Management
278
- decisions.list({ limit: 20 }); // newest first
279
- decisions.list({ tags: ['architecture'] }); // filter by tags
280
- decisions.count(); // total items
281
- decisions.trim({ keep: 50 }); // keep N most recent
282
- decisions.prune({ olderThan: '30d' }); // remove older than 30 days
283
- brain.listCollectionNames(); // → ['decisions', ...]
284
- ```
285
-
286
- > 📂 See [examples/collections](examples/collections/) for a complete runnable demo with cross-collection linking and metadata.
287
-
288
- ### Watch Mode
289
-
290
- Auto-re-index when files change:
291
-
292
- ```typescript
293
- // API
294
- const watcher = brain.watch({
295
- debounceMs: 2000,
296
- onIndex: (file, indexer) => console.log(`${indexer}: ${file}`),
297
- onError: (err) => console.error(err.message),
298
- });
299
-
300
- // Later: watcher.close();
301
- ```
302
-
303
- ```bash
304
- # CLI
305
- brainbank watch
306
- # ━━━ BrainBank Watch ━━━
307
- # Watching /path/to/repo for changes...
308
- # 14:30:02 ✓ code: src/api.ts
309
- # 14:30:05 ✓ code: src/routes.ts
310
- ```
311
-
312
- #### Custom Indexer Watch
313
-
314
- Custom indexers can hook into watch mode by implementing `onFileChange` and `watchPatterns`:
315
-
316
- ```typescript
317
- import type { Indexer, IndexerContext } from 'brainbank';
318
-
319
- function csvIndexer(): Indexer {
320
- let ctx: IndexerContext;
321
-
322
- return {
323
- name: 'csv',
324
-
325
- async initialize(context) {
326
- ctx = context;
327
- },
328
-
329
- // Tell watch which files this indexer cares about
330
- watchPatterns() {
331
- return ['**/*.csv', '**/*.tsv'];
332
- },
333
-
334
- // Called when a watched file changes
335
- async onFileChange(filePath, event) {
336
- if (event === 'delete') return true;
337
-
338
- const data = fs.readFileSync(filePath, 'utf-8');
339
- const col = ctx.collection('csv_data');
340
- await col.add(data, {
341
- tags: ['csv'],
342
- metadata: { file: filePath },
343
- });
344
- return true; // handled
345
- },
346
- };
347
- }
348
-
349
- const brain = new BrainBank({ dbPath: './brain.db' })
350
- .use(code())
351
- .use(csvIndexer());
352
-
353
- await brain.initialize();
354
- brain.watch(); // Now watches .ts, .py, etc. AND .csv, .tsv
355
- ```
356
-
357
- ### Search
358
-
359
- Three modes, from fastest to best quality:
360
-
361
- | Mode | Method | Speed | Quality |
362
- |------|--------|-------|---------|
363
- | Keyword | `searchBM25(q)` | ⚡ instant | Good for exact terms |
364
- | Vector | `search(q)` | ~50ms | Good for concepts |
365
- | **Hybrid** | `hybridSearch(q)` | ~100ms | **Best — catches both** |
366
-
367
- ```typescript
368
- // Hybrid search (recommended default)
369
61
  const results = await brain.hybridSearch('authentication middleware');
370
62
 
371
- // Scoped search
372
- const codeHits = await brain.searchCode('parse JSON config', 8);
373
- const commitHits = await brain.searchCommits('fix auth bug', 5);
374
- const docHits = await brain.searchDocs('getting started', { collection: 'wiki' });
375
- ```
376
-
377
- | Score | Meaning |
378
- |-------|---------|
379
- | 0.8+ | Near-exact match |
380
- | 0.5–0.8 | Strongly related |
381
- | 0.3–0.5 | Somewhat related |
382
- | < 0.3 | Weak match |
383
-
384
- ### Document Collections
385
-
386
- Register folders of documents. Files are chunked by heading structure:
387
-
388
- ```typescript
389
- await brain.addCollection({
390
- name: 'docs',
391
- path: '~/project/docs',
392
- pattern: '**/*.md',
393
- ignore: ['**/drafts/**'],
394
- context: 'Project documentation',
395
- });
396
-
397
- await brain.indexDocs();
398
-
399
- // Add context metadata (helps LLM understand what documents are about)
400
- brain.addContext('docs', '/api', 'REST API reference');
401
- brain.addContext('docs', '/guides', 'Step-by-step tutorials');
402
- ```
403
-
404
- ### Context Generation
405
-
406
- Get formatted markdown ready for system prompt injection:
407
-
408
- ```typescript
409
- const context = await brain.getContext('add rate limiting to the API', {
410
- codeResults: 6,
411
- gitResults: 5,
412
- affectedFiles: ['src/api/routes.ts'],
413
- useMMR: true,
414
- });
415
- // Returns: ## Relevant Code, ## Git History, ## Relevant Documents
416
- ```
417
-
418
- ### Custom Indexers
419
-
420
- Implement the `Indexer` interface to build your own:
421
-
422
- ```typescript
423
- import type { Indexer, IndexerContext } from 'brainbank';
424
-
425
- const myIndexer: Indexer = {
426
- name: 'custom',
427
- async initialize(ctx: IndexerContext) {
428
- // ctx.db — shared SQLite database
429
- // ctx.embedding — shared embedding provider
430
- // ctx.collection() — create dynamic collections
431
- const store = ctx.collection('my_data');
432
- await store.add('indexed content', { source: 'custom' });
433
- },
434
- };
435
-
436
- brain.use(myIndexer);
437
- ```
438
-
439
- #### Using custom indexers with the CLI
440
-
441
- Drop `.ts` files into `.brainbank/indexers/` — the CLI auto-discovers them:
442
-
443
- ```
444
- .brainbank/
445
- ├── brainbank.db
446
- └── indexers/
447
- ├── slack.ts
448
- └── jira.ts
449
- ```
450
-
451
- Each file exports a default `Indexer`:
452
-
453
- ```typescript
454
- // .brainbank/indexers/slack.ts
455
- import type { Indexer } from 'brainbank';
456
-
457
- export default {
458
- name: 'slack',
459
- async initialize(ctx) {
460
- const msgs = ctx.collection('slack_messages');
461
- // ... fetch and index slack messages
462
- },
463
- } satisfies Indexer;
464
- ```
465
-
466
- That's it — all CLI commands automatically pick up your indexers:
467
-
468
- ```bash
469
- brainbank index # runs code + git + docs + slack + jira
470
- brainbank stats # shows all indexers
471
- brainbank kv search slack_messages "deploy" # search slack data
472
- ```
473
-
474
- #### Advanced: config file
475
-
476
- For fine-grained control, create a `.brainbank/config.ts`:
477
-
478
- ```typescript
479
- // .brainbank/config.ts
480
- export default {
481
- builtins: ['code', 'docs'], // exclude git (default: all three)
482
- brainbank: { // BrainBank constructor options
483
- dbPath: '.brainbank/brain.db',
484
- },
485
- };
486
- ```
487
-
488
- Everything lives in `.brainbank/` — DB, config, and custom indexers:
489
-
490
- ```
491
- .brainbank/
492
- ├── brainbank.db # SQLite database (auto-created)
493
- ├── config.ts # Optional project config
494
- └── indexers/ # Optional custom indexer files
495
- ├── slack.ts
496
- └── jira.ts
497
- ```
498
-
499
- No folder and no config file? The CLI uses the built-in indexers (`code`, `git`, `docs`).
500
-
501
- ---
502
-
503
- ### AI Agent Integration
504
-
505
- Teach your AI coding agent to use BrainBank as persistent memory. Add an `AGENTS.md` (or `.cursor/rules`) to your project root — works with **Antigravity**, **Claude Code**, **Cursor**, and anything that reads project-level instructions.
506
-
507
- <details>
508
- <summary><strong>Option A: CLI commands</strong> (zero setup)</summary>
509
-
510
- > **Memory — BrainBank**
511
- >
512
- > **Store** a conversation summary after each task:
513
- > `brainbank kv add conversations "Refactored auth to AuthService with DI. JWT + refresh tokens + RBAC."`
514
- >
515
- > **Record** architecture decisions:
516
- > `brainbank kv add decisions "ADR: Fastify over Express. 2x throughput, schema validation, native TS."`
517
- >
518
- > **Search** before starting work:
519
- > `brainbank hsearch "auth middleware"` · `brainbank kv search decisions "auth"`
520
-
521
- </details>
522
-
523
- <details>
524
- <summary><strong>Option B: MCP tools</strong> (richer integration)</summary>
525
-
526
- > **Memory — BrainBank (MCP)**
527
- >
528
- > Use the BrainBank MCP tools for persistent agent memory:
529
- >
530
- > **Store** via `brainbank_kv_add`:
531
- > `{ collection: "conversations", content: "Refactored auth to AuthService with DI.", tags: ["auth"] }`
532
- >
533
- > **Search** via `brainbank_kv_search`:
534
- > `{ collection: "decisions", query: "authentication approach" }`
535
- >
536
- > **Code search** via `brainbank_hybrid_search`:
537
- > `{ query: "auth middleware", repo: "." }`
538
-
539
- </details>
540
-
541
- #### Setup
542
-
543
- | Agent | How to connect |
544
- |-------|---------------|
545
- | **Antigravity** | Add `AGENTS.md` to project root |
546
- | **Claude Code** | Add `AGENTS.md` to project root |
547
- | **Cursor** | Add rules in `.cursor/rules` |
548
- | **MCP** (any agent) | See [MCP Server](#mcp-server) config below |
549
-
550
- #### Custom Indexer: Auto-Ingest Conversation Logs
551
-
552
- For agents that produce structured logs (e.g. Antigravity's `brain/` directory), auto-index them:
553
-
554
- ```typescript
555
- // .brainbank/indexers/conversations.ts
556
- import type { Indexer, IndexerContext } from 'brainbank';
557
- import * as fs from 'node:fs';
558
- import * as path from 'node:path';
559
-
560
- export default {
561
- name: 'conversations',
562
- async initialize(ctx: IndexerContext) {
563
- const conversations = ctx.collection('conversations');
564
- const logsDir = path.join(ctx.repoPath, '.gemini/antigravity/brain');
565
- if (!fs.existsSync(logsDir)) return;
63
+ const log = brain.collection('decisions');
64
+ await log.add('Switched to argon2id for password hashing', { tags: ['security'] });
566
65
 
567
- for (const dir of fs.readdirSync(logsDir)) {
568
- const file = path.join(logsDir, dir, '.system_generated/logs/overview.txt');
569
- if (!fs.existsSync(file)) continue;
570
- const content = fs.readFileSync(file, 'utf-8');
571
- if (content.length < 100) continue;
572
- await conversations.add(content, {
573
- tags: ['auto'],
574
- metadata: { session: dir, source: 'antigravity' },
575
- });
576
- }
577
- },
578
- } satisfies Indexer;
66
+ brain.close();
579
67
  ```
580
68
 
581
- ```bash
582
- brainbank index # now auto-indexes conversation logs alongside code + git
583
- brainbank kv search conversations "what did we decide about auth"
584
- ```
585
-
586
- ### Examples
587
-
588
- | Example | Description | Run |
589
- |---------|-------------|-----|
590
- | [chatbot](examples/chatbot/) | CLI chatbot with streaming + persistent memory (context injection + function calling) | `OPENAI_API_KEY=sk-... npx tsx examples/chatbot/chatbot.ts` |
591
- | [collections](examples/collections/) | Collections, semantic search, tags, metadata linking | `npx tsx examples/collections/collections.ts` |
592
-
593
69
  ---
594
70
 
595
- ## MCP Server
596
-
597
- BrainBank ships with an MCP server (stdio) for AI tool integration.
598
-
599
- ```bash
600
- brainbank serve
601
- ```
71
+ ## Packages
602
72
 
603
- ### Antigravity / Claude Desktop
73
+ `brainbank` is the core framework — strictly plugin-agnostic. Plugins are separate `@brainbank/*` packages that own their database schema, search strategies, and context formatting. Install only what you need:
604
74
 
605
- Add to your MCP config (`~/.gemini/antigravity/mcp_config.json` or Claude Desktop settings):
75
+ ### Indexer Plugins
606
76
 
607
- ```json
608
- {
609
- "mcpServers": {
610
- "brainbank": {
611
- "command": "npx",
612
- "args": ["-y", "@brainbank/mcp"],
613
- "env": {
614
- "BRAINBANK_EMBEDDING": "openai"
615
- }
616
- }
617
- }
618
- }
619
- ```
77
+ Data sources that feed into BrainBank's hybrid search engine. Each plugin manages its own tables via the built-in migration system.
620
78
 
621
- The agent passes the `repo` parameter on each tool call based on the active workspace — no hardcoded paths needed.
79
+ | Package | Description | Install |
80
+ |---------|-------------|----------|
81
+ | [`@brainbank/code`](packages/code/) | AST chunking, import graph, symbol index (20 languages) | `npm i @brainbank/code` |
82
+ | [`@brainbank/git`](packages/git/) | Git history indexing + co-edit analysis | `npm i @brainbank/git` |
83
+ | [`@brainbank/docs`](packages/docs/) | Document collection search with smart chunking | `npm i @brainbank/docs` |
622
84
 
623
- > Set `BRAINBANK_EMBEDDING` to `openai` for higher quality search (requires `OPENAI_API_KEY`). Omit to use the free local WASM embeddings.
85
+ ### Integrations
624
86
 
625
- > Optionally set `BRAINBANK_REPO` as a default fallback repo. If omitted, every tool call must include the `repo` parameter (recommended for multi-workspace setups).
87
+ Extensions that connect BrainBank to external tools and workflows.
626
88
 
627
- ### Available Tools
628
-
629
- | Tool | Description |
630
- |------|-------------|
631
- | `brainbank_hybrid_search` | Best quality: vector + BM25 + reranker |
632
- | `brainbank_search` | Semantic vector search |
633
- | `brainbank_keyword_search` | Instant BM25 full-text |
634
- | `brainbank_context` | Formatted context for a task |
635
- | `brainbank_index` | Trigger code/git indexing |
636
- | `brainbank_stats` | Index statistics |
637
- | `brainbank_history` | Git history for a file |
638
- | `brainbank_coedits` | Files that change together |
639
- | `brainbank_collection_add` | Add item to a KV collection |
640
- | `brainbank_collection_search` | Search a KV collection |
641
- | `brainbank_collection_trim` | Trim a KV collection |
89
+ | Package | Description | Install |
90
+ |---------|-------------|----------|
91
+ | [`@brainbank/mcp`](packages/mcp/) | MCP server for Antigravity, Claude, Cursor (read-only, 2 tools) | `npm i @brainbank/mcp` |
642
92
 
643
93
  ---
644
94
 
645
- ## Configuration
646
-
647
- ```typescript
648
- import { BrainBank, OpenAIEmbedding } from 'brainbank';
649
- import { Qwen3Reranker } from '@brainbank/reranker'; // separate package
650
-
651
- const brain = new BrainBank({
652
- repoPath: '.',
653
- dbPath: '.brainbank/brainbank.db',
654
- gitDepth: 500,
655
- maxFileSize: 512_000,
656
- embeddingDims: 1536,
657
- maxElements: 2_000_000,
658
- embeddingProvider: new OpenAIEmbedding(), // or: omit for free local WASM (384d)
659
- reranker: new Qwen3Reranker(), // local cross-encoder (auto-downloads ~640MB)
660
- });
661
- ```
662
-
663
- ### Embedding Providers
664
-
665
- | Provider | Import | Dims | Speed | Cost |
666
- |----------|--------|------|-------|------|
667
- | **Local (default)** | built-in | 384 | ⚡ 0ms | Free |
668
- | **OpenAI** | `OpenAIEmbedding` | 1536 | ~100ms | $0.02/1M tokens |
669
-
670
- ```typescript
671
- import { OpenAIEmbedding } from 'brainbank';
672
-
673
- // Uses OPENAI_API_KEY env var by default
674
- new OpenAIEmbedding();
675
-
676
- // Custom options
677
- new OpenAIEmbedding({
678
- model: 'text-embedding-3-large',
679
- dims: 512, // custom dims (text-embedding-3 only)
680
- apiKey: 'sk-...',
681
- baseUrl: 'https://my-proxy.com/v1/embeddings', // Azure, proxies
682
- });
683
- ```
684
-
685
- > ⚠️ Switching embedding provider requires re-indexing — vectors are not cross-compatible.
686
-
687
- ### Reranker
688
-
689
- BrainBank includes an optional cross-encoder reranker using **Qwen3-Reranker-0.6B** via `node-llama-cpp`. It runs 100% locally — no API keys needed. The reranker is **disabled by default**.
690
-
691
- #### When to Use It
692
-
693
- The reranker runs local neural inference on every search result, which improves ranking precision but adds significant latency. Here are real benchmarks on a ~2100 file / 4000+ chunk codebase:
694
-
695
- | Metric | Without Reranker | With Reranker |
696
- |--------|-----------------|---------------|
697
- | **Warm query time** | ~480ms | ~5500ms |
698
- | **Cold start** | ~7s | ~12s |
699
- | **Memory overhead** | — | +640MB (model) |
700
- | **Ranking quality** | Good (RRF) | Slightly better |
701
-
702
- **Recommended:** Leave it disabled for interactive use (MCP, IDE integrations). The RRF fusion of vector + BM25 already produces high-quality results. Enable it only for:
703
-
704
- - Batch processing where latency doesn't matter
705
- - Very large codebases (50k+ files) where false positives are costly
706
- - Server environments with RAM to spare
707
-
708
- #### Enabling the Reranker
709
-
710
- ```typescript
711
- import { BrainBank } from 'brainbank';
712
- import { Qwen3Reranker } from '@brainbank/reranker';
713
-
714
- const brain = new BrainBank({
715
- reranker: new Qwen3Reranker(), // ~640MB model, auto-downloaded on first use
716
- });
717
- ```
718
-
719
- Or from the CLI:
720
-
721
- ```bash
722
- brainbank hsearch "auth middleware" --reranker qwen3
723
- ```
724
-
725
- Or via environment variable:
726
-
727
- ```bash
728
- BRAINBANK_RERANKER=qwen3 brainbank serve
729
- ```
730
-
731
- The model is cached at `~/.cache/brainbank/models/` after first download.
732
-
733
- #### Position-Aware Score Blending
734
-
735
- When enabled, the reranker uses position-aware blending — trusting retrieval scores more for top results and the reranker more for lower-ranked results:
736
-
737
- | Position | Retrieval (RRF) | Reranker | Rationale |
738
- |----------|----------------|----------|----------|
739
- | 1–3 | **75%** | 25% | Preserves exact keyword matches |
740
- | 4–10 | **60%** | 40% | Balanced blend |
741
- | 11+ | 40% | **60%** | Trust reranker for uncertain results |
742
-
743
- #### Custom Reranker
744
-
745
- Implement the `Reranker` interface to use your own:
746
-
747
- ```typescript
748
- import type { Reranker } from 'brainbank';
749
-
750
- const myReranker: Reranker = {
751
- async rank(query: string, documents: string[]): Promise<number[]> {
752
- // Return relevance scores 0.0-1.0 for each document
753
- },
754
- async close() { /* optional cleanup */ },
755
- };
756
- ```
757
-
758
- Without a reranker, BrainBank uses pure RRF fusion — which is already production-quality for most use cases.
95
+ ## Documentation
96
+
97
+ | Guide | Description |
98
+ |-------|-------------|
99
+ | **[Getting Started](docs/getting-started.md)** | Installation, quick start, first search |
100
+ | **[CLI Reference](docs/cli.md)** | Complete command reference |
101
+ | **[Plugins](docs/plugins.md)** | Built-in plugins overview + configuration |
102
+ | **[Collections](docs/collections.md)** | Dynamic KV store with semantic search |
103
+ | **[Search](docs/search.md)** | Hybrid search, scoped queries, context generation |
104
+ | **[Custom Plugins](docs/custom-plugins.md)** | Build plugins + publish as npm packages |
105
+ | **[Configuration](docs/config.md)** | `.brainbank/config.json`, env vars |
106
+ | **[Embeddings, Reranker & Pruner](docs/embeddings.md)** | Providers, benchmarks, per-plugin overrides, LLM noise filter |
107
+ | **[Multi-Repo](docs/multi-repo.md)** | Index multiple repositories into one DB |
108
+ | **[MCP Server](docs/mcp.md)** | AI tool integration (stdio), `mcp:export` setup |
109
+ | **[Indexing](docs/indexing.md)** | Code graph, incremental indexing, re-embedding |
110
+ | **[Migrations](docs/migrations.md)** | Plugin schema migrations, built-in schemas |
111
+ | **[Architecture](docs/architecture.md)** | System internals, data flows, design patterns |
759
112
 
760
113
  ---
761
114
 
762
- ## Memory
763
-
764
- `@brainbank/memory` adds **deterministic memory extraction** to any LLM conversation. After every turn, it automatically extracts facts, deduplicates against existing memories, and decides `ADD` / `UPDATE` / `NONE` — no function calling needed.
765
-
766
- Inspired by [mem0](https://github.com/mem0ai/mem0)'s pipeline, but framework-agnostic and built on BrainBank collections.
767
-
768
- ```bash
769
- npm install @brainbank/memory
770
- ```
771
-
772
- ```typescript
773
- import { BrainBank } from 'brainbank';
774
- import { Memory, OpenAIProvider } from '@brainbank/memory';
775
-
776
- const brain = new BrainBank({ dbPath: './memory.db' });
777
- await brain.initialize();
778
-
779
- const memory = new Memory(brain.collection('memories'), {
780
- llm: new OpenAIProvider({ model: 'gpt-4.1-nano' }),
781
- });
782
-
783
- // After every conversation turn (deterministic, automatic)
784
- await memory.process(userMessage, assistantResponse);
785
- // → extracts facts, deduplicates, executes ADD/UPDATE/NONE
786
-
787
- // For the system prompt
788
- const context = memory.buildContext();
789
- // → "## Memories\n- User's name is Berna\n- Prefers TypeScript"
790
- ```
115
+ ## Examples
791
116
 
792
- The `LLMProvider` interface works with any framework:
117
+ | Example | Description |
118
+ |---------|-------------|
119
+ | [notes-plugin](examples/notes-plugin/) | Programmatic plugin — reads `.txt` files |
120
+ | [custom-plugin](examples/custom-plugin/) | CLI auto-discovery plugin |
121
+ | [custom-package](examples/custom-package/) | Standalone npm package scaffold |
122
+ | [collection](examples/collection/) | Collections, search, tags, metadata |
123
+ | [rag](examples/rag/) | RAG chatbot — docs retrieval + generation ¹ |
793
124
 
794
- | Framework | Adapter |
795
- |-----------|--------|
796
- | OpenAI | Built-in `OpenAIProvider` |
797
- | LangChain | `ChatOpenAI.invoke()` → string |
798
- | Vercel AI SDK | `generateText()` → string |
799
- | Any LLM | Implement `{ generate(messages) → string }` |
800
-
801
- > 📂 See [examples/chatbot](examples/chatbot/) for runnable demos with all three frameworks.
802
-
803
- > 📦 Full docs: [packages/memory/README.md](packages/memory/README.md)
804
-
805
- ---
806
-
807
- ### Environment Variables
808
-
809
- | Variable | Description |
810
- |----------|-------------|
811
- | `BRAINBANK_REPO` | Default repository path (optional — auto-detected from `.git/` or passed per tool call) |
812
- | `BRAINBANK_EMBEDDING` | Embedding provider: `local` (default), `openai` |
813
- | `BRAINBANK_RERANKER` | Reranker: `none` (default), `qwen3` to enable |
814
- | `BRAINBANK_DEBUG` | Show full stack traces |
815
- | `OPENAI_API_KEY` | Required when using `BRAINBANK_EMBEDDING=openai` |
125
+ > ¹ Requires `OPENAI_API_KEY`. RAG also requires `PERPLEXITY_API_KEY`.
816
126
 
817
127
  ---
818
128
 
819
- ## Multi-Repository Indexing
820
-
821
- BrainBank can index multiple repositories into a **single shared database**. This is useful for monorepos, microservices, or any project split across multiple Git repositories.
822
-
823
- ### How It Works
824
-
825
- When you point BrainBank at a directory that contains multiple Git repositories (subdirectories with `.git/`), the CLI **auto-detects** them and creates namespaced indexers:
826
-
827
- ```bash
828
- ~/projects/
829
- ├── webapp-frontend/ # .git/
830
- ├── webapp-backend/ # .git/
831
- └── webapp-shared/ # .git/
832
- ```
833
-
834
- ```bash
835
- brainbank index ~/projects --depth 200
836
- ```
129
+ ## Benchmarks
837
130
 
838
- ```
839
- ━━━ BrainBank Index ━━━
840
- Repo: /Users/you/projects
841
- Multi-repo: found 3 git repos: webapp-frontend, webapp-backend, webapp-shared
842
- CODE:WEBAPP-BACKEND [0/1075] ...
843
- CODE:WEBAPP-FRONTEND [0/719] ...
844
- GIT:WEBAPP-SHARED [0/200] ...
131
+ Early benchmarks on Apple Silicon — single SQLite file, no external vector DB.
845
132
 
846
- Code: 2107 indexed, 4084 chunks
847
- Git: 600 indexed (200 per repo)
848
- Co-edit pairs: 1636
849
- ```
133
+ | Benchmark | Corpus | Metric | Score |
134
+ |-----------|--------|--------|:-----:|
135
+ | [BEIR SciFact](https://github.com/beir-cellar/beir) | 5,183 scientific abstracts, 300 queries | NDCG@10 | **0.761** |
136
+ | Custom RAG eval | 127 Pinecall.io docs, 20 queries — 1 miss | R@5 | **83%** |
850
137
 
851
- All code, git history, and co-edit relationships from every sub-repository go into **one** `.brainbank/brainbank.db` at the parent directory. Search queries automatically return results across all repositories:
138
+ **Pipeline progression** each stage's impact on the custom eval:
852
139
 
853
- ```bash
854
- brainbank hsearch "cancel job confirmation" --repo ~/projects
855
- # Results from frontend components, backend controllers,
856
- # and shared utilities all in one search.
857
- ```
140
+ | Stage | R@5 | Δ |
141
+ |-------|:---:|---|
142
+ | Vector-only (HNSW) | 57% | |
143
+ | + BM25 RRF | 78% | +21pp |
144
+ | + Qwen3 reranker | 83% | +5pp |
858
145
 
859
- ### Namespaced Indexers
146
+ > More benchmarks (code+graph retrieval, large-scale stress tests, multi-provider comparisons) are in progress.
147
+ > Full methodology and reproduction commands → [docs/benchmarks.md](docs/benchmarks.md)
860
148
 
861
- Each sub-repository gets its own namespaced indexer instances (e.g., `code:frontend`, `git:backend`). Same-type indexers share a single HNSW vector index for efficient memory usage and unified search.
149
+ ## Contributing
862
150
 
863
- ### Programmatic API
864
-
865
- ```typescript
866
- import { BrainBank } from 'brainbank';
867
- import { code } from 'brainbank/code';
868
- import { git } from 'brainbank/git';
869
-
870
- const brain = new BrainBank({ repoPath: '~/projects' })
871
- .use(code({ name: 'code:frontend', repoPath: '~/projects/webapp-frontend' }))
872
- .use(code({ name: 'code:backend', repoPath: '~/projects/webapp-backend' }))
873
- .use(git({ name: 'git:frontend', repoPath: '~/projects/webapp-frontend' }))
874
- .use(git({ name: 'git:backend', repoPath: '~/projects/webapp-backend' }));
875
-
876
- await brain.initialize();
877
- await brain.index();
878
-
879
- // Cross-repo search
880
- const results = await brain.hybridSearch('authentication guard');
881
- // → Results from both frontend and backend
882
- ```
883
-
884
- ### MCP Multi-Workspace
885
-
886
- The MCP server maintains a pool of BrainBank instances — one per unique `repo` path. Each tool call can target a different workspace:
887
-
888
- ```typescript
889
- // Agent working in one workspace
890
- brainbank_hybrid_search({ query: "login form", repo: "/Users/you/projects" })
891
-
892
- // Agent switches to a different project
893
- brainbank_hybrid_search({ query: "API routes", repo: "/Users/you/other-project" })
894
- ```
895
-
896
- Instances are cached in memory after first initialization, so subsequent queries to the same repo are fast (~480ms).
897
-
898
- ---
899
-
900
- ## Indexing
901
-
902
- ### Incremental Indexing
903
-
904
- All indexing is **incremental by default** — only new or changed content is processed:
905
-
906
- | Indexer | How it detects changes | What gets skipped |
907
- |---------|----------------------|-------------------|
908
- | **Code** | FNV-1a hash of file content | Unchanged files |
909
- | **Git** | Unique commit hash | Already-indexed commits |
910
- | **Docs** | SHA-256 of file content | Unchanged documents |
911
-
912
- ```typescript
913
- // First run: indexes everything
914
- await brain.index(); // → { indexed: 500, skipped: 0 }
915
-
916
- // Second run: skips everything unchanged
917
- await brain.index(); // → { indexed: 0, skipped: 500 }
918
-
919
- // Changed 1 file? Only that file re-indexes
920
- await brain.index(); // → { indexed: 1, skipped: 499 }
921
- ```
922
-
923
- Use `--force` to re-index everything:
924
-
925
- ```bash
926
- brainbank index --force
927
- ```
928
-
929
- ### Re-embedding
930
-
931
- When switching embedding providers (e.g. Local → OpenAI), you **don't need to re-index**. The `reembed()` method regenerates only the vectors — no file I/O, no git parsing, no re-chunking:
932
-
933
- ```typescript
934
- import { BrainBank, OpenAIEmbedding } from 'brainbank';
935
-
936
- // Previously indexed with local embeddings.
937
- // Now switch to OpenAI:
938
- const brain = new BrainBank({
939
- embeddingProvider: new OpenAIEmbedding(),
940
- });
941
- await brain.initialize();
942
-
943
- // ⚠ BrainBank emits 'warning' event if provider changed.
944
- brain.on('warning', (w) => console.warn(w.message));
945
- // → "Embedding provider changed (LocalEmbedding/384 → OpenAIEmbedding/1536). Run brain.reembed()"
946
-
947
- const result = await brain.reembed({
948
- onProgress: (table, current, total) => {
949
- console.log(`${table}: ${current}/${total}`);
950
- },
951
- });
952
- // → { code: 1200, git: 500, docs: 80, kv: 45, notes: 12, total: 1837 }
953
- ```
954
-
955
- Or from the CLI:
956
-
957
- ```bash
958
- brainbank reembed
959
- ```
960
-
961
- | Full re-index | `reembed()` |
962
- |---|---|
963
- | Walks all files | **Skipped** |
964
- | Parses git history | **Skipped** |
965
- | Re-chunks documents | **Skipped** |
966
- | Embeds text | ✓ |
967
- | Replaces vectors | ✓ |
968
- | Rebuilds HNSW | ✓ |
969
-
970
- > BrainBank tracks provider metadata in `embedding_meta` table. It auto-detects mismatches and warns you to run `reembed()`.
971
-
972
- ---
973
-
974
- ## Architecture
975
-
976
- <details>
977
- <summary>Text version</summary>
978
-
979
- ```
980
- ┌──────────────────────────────────────────────────────┐
981
- │ BrainBank Core │
982
- │ .use(code) .use(git) .use(docs) │
983
- │ .collection('name') │
984
- ├──────────────────────────────────────────────────────┤
985
- │ │
986
- │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌────────────┐│
987
- │ │ Code │ │ Git │ │ Docs │ │ Collection ││
988
- │ │ Indexer │ │ Indexer │ │ Indexer │ │ (dynamic) ││
989
- │ └────┬────┘ └────┬────┘ └────┬────┘ └─────┬──────┘│
990
- │ │ │ │ │ │
991
- │ ┌────▼────┐ ┌────▼────┐ ┌────▼────┐ ┌─────▼──────┐│
992
- │ │ HNSW │ │ HNSW │ │ HNSW │ │ Shared KV ││
993
- │ │ Index │ │ Index │ │ Index │ │ HNSW Index ││
994
- │ └─────────┘ └─────────┘ └─────────┘ └────────────┘│
995
- │ │
996
- │ ┌──────────────────────────────────────────────────┐│
997
- │ │ SQLite (.brainbank/brainbank.db) ││
998
- │ │ code_chunks │ git_commits │ doc_chunks ││
999
- │ │ kv_data │ FTS5 full-text │ vectors │ co_edits ││
1000
- │ └──────────────────────────────────────────────────┘│
1001
- │ │
1002
- │ ┌──────────────────────────────────────────────────┐│
1003
- │ │ Embedding (Local WASM 384d │ OpenAI 1536d) ││
1004
- │ └──────────────────────────────────────────────────┘│
1005
- │ ┌──────────────────────────────────────────────────┐│
1006
- │ │ Qwen3-Reranker (opt-in cross-encoder) ││
1007
- │ └──────────────────────────────────────────────────┘│
1008
- └──────────────────────────────────────────────────────┘
1009
- ```
1010
- </details>
1011
-
1012
- ### Search Pipeline
1013
-
1014
- ```
1015
- Query
1016
-
1017
- ├──► Vector Search (HNSW k-NN) ──► candidates
1018
- ├──► Keyword Search (BM25/FTS5) ──► candidates
1019
-
1020
-
1021
- Reciprocal Rank Fusion (RRF, k=60)
1022
-
1023
-
1024
- Qwen3-Reranker (yes/no + logprobs → score 0-1)
1025
-
1026
-
1027
- Position-Aware Blend
1028
- Top 1-3: 75% RRF / 25% reranker
1029
- Top 4-10: 60% RRF / 40% reranker
1030
- Top 11+: 40% RRF / 60% reranker
1031
-
1032
-
1033
- Final results (sorted by blended score)
1034
- ```
1035
-
1036
- ### Data Flow
1037
-
1038
- 1. **Index** — Indexers parse files into chunks
1039
- 2. **Embed** — Each chunk gets a vector (local WASM or OpenAI)
1040
- 3. **Store** — Chunks + vectors → SQLite, vectors → HNSW index
1041
- 4. **Search** — Query → HNSW k-NN + BM25 keyword → RRF fusion → optional reranker
1042
- 5. **Context** — Top results formatted as markdown for system prompts
1043
-
1044
- ---
1045
-
1046
- ## Testing
1047
-
1048
- ```bash
1049
- npm test # Unit tests (129 tests)
1050
- npm test -- --integration # Full suite (211 tests, includes real models + all domains)
1051
- npm test -- --filter code # Filter by test name
1052
- npm test -- --verbose # Show assertion details
1053
- ```
1054
-
1055
- ---
151
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and guidelines.
1056
152
 
1057
153
  ## License
1058
154
 
1059
- MIT
155
+ [MIT](LICENSE)