brainbank 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/README.md +84 -1107
  2. package/assets/architecture.png +0 -0
  3. package/bin/brainbank +8 -1
  4. package/bin/brainbank-mcp +19 -0
  5. package/dist/chunk-3UIWA32X.js +3341 -0
  6. package/dist/chunk-3UIWA32X.js.map +1 -0
  7. package/dist/chunk-3YBCD6DI.js +117 -0
  8. package/dist/chunk-3YBCD6DI.js.map +1 -0
  9. package/dist/chunk-DAGVUEXL.js +258 -0
  10. package/dist/chunk-DAGVUEXL.js.map +1 -0
  11. package/dist/chunk-DMFMTOHF.js +123 -0
  12. package/dist/chunk-DMFMTOHF.js.map +1 -0
  13. package/dist/chunk-FQYKWB2Q.js +136 -0
  14. package/dist/chunk-FQYKWB2Q.js.map +1 -0
  15. package/dist/chunk-IMJJ2VEM.js +74 -0
  16. package/dist/chunk-IMJJ2VEM.js.map +1 -0
  17. package/dist/chunk-M744PCJQ.js +43 -0
  18. package/dist/chunk-M744PCJQ.js.map +1 -0
  19. package/dist/chunk-NNDY7P2R.js +211 -0
  20. package/dist/chunk-NNDY7P2R.js.map +1 -0
  21. package/dist/chunk-O3J6ZIXK.js +82 -0
  22. package/dist/chunk-O3J6ZIXK.js.map +1 -0
  23. package/dist/chunk-RDQYDLYZ.js +69 -0
  24. package/dist/chunk-RDQYDLYZ.js.map +1 -0
  25. package/dist/chunk-WCQVDF3K.js +14 -0
  26. package/dist/cli.js +2713 -325
  27. package/dist/cli.js.map +1 -1
  28. package/dist/haiku-pruner-5KVT5AI2.js +8 -0
  29. package/dist/http-server-2ZQ6I43B.js +9 -0
  30. package/dist/index.d.ts +1886 -626
  31. package/dist/index.js +319 -46
  32. package/dist/index.js.map +1 -1
  33. package/dist/local-embedding-NZQTILGV.js +8 -0
  34. package/dist/mcp.d.ts +2 -0
  35. package/dist/mcp.js +333 -0
  36. package/dist/mcp.js.map +1 -0
  37. package/dist/openai-embedding-ZP5TSUJG.js +8 -0
  38. package/dist/perplexity-context-embedding-GI5PHE6X.js +9 -0
  39. package/dist/perplexity-context-embedding-GI5PHE6X.js.map +1 -0
  40. package/dist/perplexity-embedding-KZRYGJRC.js +10 -0
  41. package/dist/perplexity-embedding-KZRYGJRC.js.map +1 -0
  42. package/dist/plugin-IKQ6IRSJ.js +32 -0
  43. package/dist/plugin-IKQ6IRSJ.js.map +1 -0
  44. package/dist/resolve-ASGLBNUC.js +10 -0
  45. package/dist/resolve-ASGLBNUC.js.map +1 -0
  46. package/dist/stats-tui-AD3AMYGV.js +1904 -0
  47. package/dist/stats-tui-AD3AMYGV.js.map +1 -0
  48. package/package.json +38 -53
  49. package/src/brainbank.ts +617 -0
  50. package/src/cli/commands/collection.ts +77 -0
  51. package/src/cli/commands/context.ts +59 -0
  52. package/src/cli/commands/daemon.ts +100 -0
  53. package/src/cli/commands/docs.ts +71 -0
  54. package/src/cli/commands/files.ts +69 -0
  55. package/src/cli/commands/help.ts +82 -0
  56. package/src/cli/commands/index.ts +478 -0
  57. package/src/cli/commands/kv.ts +140 -0
  58. package/src/cli/commands/mcp-export.ts +273 -0
  59. package/src/cli/commands/mcp.ts +6 -0
  60. package/src/cli/commands/query.ts +167 -0
  61. package/src/cli/commands/reembed.ts +30 -0
  62. package/src/cli/commands/reindex.ts +40 -0
  63. package/src/cli/commands/scan.ts +336 -0
  64. package/src/cli/commands/search.ts +203 -0
  65. package/src/cli/commands/stats.ts +68 -0
  66. package/src/cli/commands/status.ts +47 -0
  67. package/src/cli/commands/watch.ts +47 -0
  68. package/src/cli/factory/brain-context.ts +43 -0
  69. package/src/cli/factory/builtin-registration.ts +87 -0
  70. package/src/cli/factory/config-loader.ts +77 -0
  71. package/src/cli/factory/index.ts +69 -0
  72. package/src/cli/factory/plugin-loader.ts +324 -0
  73. package/src/cli/index.ts +76 -0
  74. package/src/cli/server-client.ts +186 -0
  75. package/src/cli/tui/index-tui.tsx +667 -0
  76. package/src/cli/tui/stats-data.ts +523 -0
  77. package/src/cli/tui/stats-search.ts +262 -0
  78. package/src/cli/tui/stats-tui.tsx +1465 -0
  79. package/src/cli/tui/tree-scanner.ts +650 -0
  80. package/src/cli/utils.ts +137 -0
  81. package/src/config.ts +48 -0
  82. package/src/constants.ts +21 -0
  83. package/src/db/adapter.ts +112 -0
  84. package/src/db/metadata.ts +130 -0
  85. package/src/db/migrations.ts +66 -0
  86. package/src/db/sqlite-adapter.ts +218 -0
  87. package/src/db/tracker.ts +91 -0
  88. package/src/engine/index-api.ts +81 -0
  89. package/src/engine/reembed.ts +206 -0
  90. package/src/engine/search-api.ts +218 -0
  91. package/src/index.ts +150 -0
  92. package/src/lib/fts.ts +57 -0
  93. package/src/lib/languages.ts +179 -0
  94. package/src/lib/logger.ts +126 -0
  95. package/src/lib/math.ts +87 -0
  96. package/src/lib/provider-key.ts +20 -0
  97. package/src/lib/prune.ts +72 -0
  98. package/src/lib/rrf.ts +133 -0
  99. package/src/lib/write-lock.ts +108 -0
  100. package/src/mcp/mcp-server.ts +192 -0
  101. package/src/mcp/workspace-factory.ts +68 -0
  102. package/src/mcp/workspace-pool.ts +224 -0
  103. package/src/plugin.ts +381 -0
  104. package/src/providers/embeddings/embedding-worker-thread.ts +95 -0
  105. package/src/providers/embeddings/embedding-worker.ts +141 -0
  106. package/src/providers/embeddings/local-embedding.ts +115 -0
  107. package/src/providers/embeddings/openai-embedding.ts +167 -0
  108. package/src/providers/embeddings/perplexity-context-embedding.ts +195 -0
  109. package/src/providers/embeddings/perplexity-embedding.ts +165 -0
  110. package/src/providers/embeddings/resolve.ts +34 -0
  111. package/src/providers/pruners/haiku-expander.ts +178 -0
  112. package/src/providers/pruners/haiku-pruner.ts +263 -0
  113. package/src/providers/vector/hnsw-index.ts +174 -0
  114. package/src/providers/vector/hnsw-loader.ts +129 -0
  115. package/src/search/bm25-boost.ts +76 -0
  116. package/src/search/context-builder.ts +209 -0
  117. package/src/search/keyword/composite-bm25-search.ts +47 -0
  118. package/src/search/query-decomposer.ts +124 -0
  119. package/src/search/types.ts +37 -0
  120. package/src/search/vector/composite-vector-search.ts +105 -0
  121. package/src/search/vector/mmr.ts +64 -0
  122. package/src/services/collection.ts +384 -0
  123. package/src/services/daemon.ts +87 -0
  124. package/src/services/http-server.ts +344 -0
  125. package/src/services/kv-service.ts +64 -0
  126. package/src/services/plugin-registry.ts +77 -0
  127. package/src/services/watch.ts +340 -0
  128. package/src/services/webhook-server.ts +100 -0
  129. package/src/types.ts +509 -0
  130. package/dist/chunk-2P3EGY6S.js +0 -37
  131. package/dist/chunk-2P3EGY6S.js.map +0 -1
  132. package/dist/chunk-3GAIDXRW.js +0 -105
  133. package/dist/chunk-3GAIDXRW.js.map +0 -1
  134. package/dist/chunk-4ZKBQ33J.js +0 -56
  135. package/dist/chunk-4ZKBQ33J.js.map +0 -1
  136. package/dist/chunk-7QVYU63E.js +0 -7
  137. package/dist/chunk-GOUBW7UA.js +0 -373
  138. package/dist/chunk-GOUBW7UA.js.map +0 -1
  139. package/dist/chunk-MJ3Y24H6.js +0 -185
  140. package/dist/chunk-MJ3Y24H6.js.map +0 -1
  141. package/dist/chunk-N6ZMBFDE.js +0 -224
  142. package/dist/chunk-N6ZMBFDE.js.map +0 -1
  143. package/dist/chunk-RAEBYV75.js +0 -709
  144. package/dist/chunk-RAEBYV75.js.map +0 -1
  145. package/dist/chunk-TW5NTYYZ.js +0 -2066
  146. package/dist/chunk-TW5NTYYZ.js.map +0 -1
  147. package/dist/chunk-Z5SU54HP.js +0 -171
  148. package/dist/chunk-Z5SU54HP.js.map +0 -1
  149. package/dist/code.d.ts +0 -31
  150. package/dist/code.js +0 -8
  151. package/dist/docs.d.ts +0 -19
  152. package/dist/docs.js +0 -8
  153. package/dist/git.d.ts +0 -31
  154. package/dist/git.js +0 -8
  155. package/dist/memory.d.ts +0 -19
  156. package/dist/memory.js +0 -146
  157. package/dist/memory.js.map +0 -1
  158. package/dist/notes.d.ts +0 -19
  159. package/dist/notes.js +0 -57
  160. package/dist/notes.js.map +0 -1
  161. package/dist/openai-PCTYLOWI.js +0 -8
  162. package/dist/types-Da_zLLOl.d.ts +0 -474
  163. /package/dist/{chunk-7QVYU63E.js.map → chunk-WCQVDF3K.js.map} +0 -0
  164. /package/dist/{code.js.map → haiku-pruner-5KVT5AI2.js.map} +0 -0
  165. /package/dist/{docs.js.map → http-server-2ZQ6I43B.js.map} +0 -0
  166. /package/dist/{git.js.map → local-embedding-NZQTILGV.js.map} +0 -0
  167. /package/dist/{openai-PCTYLOWI.js.map → openai-embedding-ZP5TSUJG.js.map} +0 -0
package/README.md CHANGED
@@ -4,1175 +4,152 @@
4
4
 
5
5
  BrainBank gives LLMs a long-term memory that persists between sessions.
6
6
 
7
- - **All-in-one** — core + code + git + docs + CLI in a single `brainbank` package
8
- - **Pluggable indexers** — `.use()` only what you need (code, git, docs, or custom)
9
- - **Dynamic collections** — `brain.collection('errors')` for any structured data
7
+ - **Pluggable** — `.use()` only what you need: [code](#packages), [git](#packages), [docs](#packages), or [custom](docs/custom-plugins.md)
10
8
  - **Hybrid search** — vector + BM25 fused with Reciprocal Rank Fusion
11
- - **Pluggable embeddings** — local WASM (free) or OpenAI (higher quality)
12
- - **Multi-repo** — index multiple repositories into one shared database
13
- - **Portable** — single `.brainbank/brainbank.db` file
14
- - **Optional packages** — [`@brainbank/memory`](#memory) (deterministic fact extraction), [`@brainbank/reranker`](#reranker) (Qwen3 cross-encoder), [`@brainbank/mcp`](#mcp-server) (MCP server)
15
-
16
- ![BrainBank Architecture](assets/architecture.png)
9
+ - **Dynamic collections** — `brain.collection('errors')` for any structured data
10
+ - **Pluggable embeddings** — local WASM (free), OpenAI, or Perplexity
11
+ - **Multi-process safe** — concurrent CLI, MCP, and watch processes with automatic hot-reload
12
+ - **Portable** — single `.brainbank/brainbank.db` SQLite file
13
+ - **Modular** — lightweight core + optional [`@brainbank/*`](#packages) packages
17
14
 
18
15
  ---
19
16
 
20
- ## Why BrainBank?
21
-
22
- Built for a multi-repo codebase that needed unified AI context. Zero infrastructure, zero ongoing cost.
23
-
24
- Most AI memory solutions (mem0, Zep, LangMem) require cloud services, external databases, or LLM calls just to store a memory. BrainBank takes a different approach:
25
-
26
- | | **BrainBank** | **mem0** | **Zep** | **LangMem** |
27
- |---|:---:|:---:|:---:|:---:|
28
- | Infrastructure | **SQLite file** | Vector DB + cloud | Neo4j + cloud | LangGraph Platform |
29
- | LLM required to write | **No**¹ | Yes | Yes | Yes |
30
- | Code-aware | **19 AST-parsed languages (tree-sitter), git, co-edits** | ✗ | ✗ | ✗ |
31
- | Custom indexers | **`.use()` plugin system** | ✗ | ✗ | ✗ |
32
- | Search | **Vector + BM25 + RRF** | Vector only | Vector + graph | Vector only |
33
- | Framework lock-in | **None** | Optional | Zep cloud | LangChain |
34
- | Portable | **Copy one file** | Tied to DB | Tied to cloud | Tied to platform |
35
-
36
- > ¹ mem0 and Zep use LLMs to auto-extract memories from raw text. BrainBank is explicit — you decide what gets stored. Less magic, more control.
37
-
38
- **In short:**
39
- - **Code-first** — the only memory layer that understands code structure, git history, and file co-edit relationships
40
- - **$0 memory bill** — no LLM calls to extract/consolidate. You store what you want, BrainBank embeds deterministically
41
- - **Truly portable** — `.brainbank/brainbank.db` is a normal file. Copy it, back it up, `git lfs` it
42
- - **No vendor lock-in** — plain TypeScript, works with any agent framework or none at all
43
-
44
- ### Table of Contents
45
-
46
- - [Why BrainBank?](#why-brainbank)
47
- - [Installation](#installation)
48
- - [Quick Start](#quick-start)
49
- - [CLI](#cli)
50
- - [Programmatic API](#programmatic-api)
51
- - [Indexers](#indexers)
52
- - [Collections](#collections)
53
- - [Search](#search)
54
- - [Document Collections](#document-collections)
55
- - [Context Generation](#context-generation)
56
- - [Custom Indexers](#custom-indexers)
57
- - [AI Agent Integration](#ai-agent-integration)
58
- - [Examples](#examples)
59
- - [Watch Mode](#watch-mode)
60
- - [MCP Server](#mcp-server)
61
- - [Configuration](#configuration)
62
- - [Embedding Providers](#embedding-providers)
63
- - [Reranker](#reranker)
64
- - [Memory](#memory)
65
- - [Multi-Repository Indexing](#multi-repository-indexing)
66
- - [Indexing](#indexing-1)
67
- - [Incremental Indexing](#incremental-indexing)
68
- - [Re-embedding](#re-embedding)
69
- - [Architecture](#architecture)
70
- - [Search Pipeline](#search-pipeline)
71
- - [Benchmarks](#benchmarks)
72
- - [Search Quality: AST vs Sliding Window](#search-quality-ast-vs-sliding-window)
73
- - [Grammar Support](#grammar-support)
17
+ <img src="assets/architecture.png" alt="BrainBank Architecture" width="600">
74
18
 
75
19
  ---
76
20
 
77
- ## Installation
78
-
79
- ```bash
80
- npm install brainbank
81
- ```
82
-
83
- ### Optional Packages
84
-
85
- | Package | When to install |
86
- |---------|----------------|
87
- | `@brainbank/memory` | Deterministic memory extraction for LLM conversations (mem0-style pipeline) |
88
- | `@brainbank/reranker` | Cross-encoder reranker (Qwen3-0.6B, ~640MB model) |
89
- | `@brainbank/mcp` | MCP server for AI tool integration |
21
+ ## Quick Start
90
22
 
91
23
  ```bash
92
- # Memory automatic fact extraction & dedup for chatbots/agents
93
- npm install @brainbank/memory
94
-
95
- # Reranker — improves search ranking with local neural inference
96
- npm install @brainbank/reranker node-llama-cpp
97
-
98
- # MCP server — for Antigravity, Claude Desktop, etc.
99
- npm install @brainbank/mcp
24
+ npm i -g brainbank @brainbank/code @brainbank/git @brainbank/docs
100
25
  ```
101
26
 
102
- ---
103
-
104
- ## Quick Start
105
-
106
- Get semantic search over your codebase in under a minute:
107
-
108
- ```typescript
109
- import { BrainBank } from 'brainbank';
110
- import { code } from 'brainbank/code';
111
- import { git } from 'brainbank/git';
112
-
113
- const brain = new BrainBank({ repoPath: '.' })
114
- .use(code())
115
- .use(git());
116
-
117
- await brain.index(); // indexes code + git history (incremental)
118
-
119
- // Search across everything
120
- const results = await brain.hybridSearch('authentication middleware');
121
- console.log(results.map(r => `${r.filePath}:${r.line} (${r.score.toFixed(2)})`));
122
-
123
- // Store agent memory
124
- const log = brain.collection('decisions');
125
- await log.add(
126
- 'Switched from bcrypt to argon2id for password hashing. ' +
127
- 'Argon2id is memory-hard and recommended by OWASP for new projects. ' +
128
- 'Updated src/auth/hash.ts and all tests.',
129
- { tags: ['security', 'auth'] }
130
- );
131
-
132
- // Recall later: "what did we decide about password hashing?"
133
- const hits = await log.search('password hashing decision');
27
+ > [!IMPORTANT]
28
+ > **Node 23+ users:** `@brainbank/code` uses [tree-sitter](https://github.com/tree-sitter/node-tree-sitter) native bindings for AST parsing. On **Node 23 and 24**, V8 headers require C++20 but tree-sitter's `binding.gyp` defaults to C++17, causing the install to fail with `"C++20 or later required."`. Fix it by setting the C++ standard before install:
29
+ >
30
+ > ```bash
31
+ > CXXFLAGS="-std=c++20" npm i -g brainbank
32
+ > ```
33
+ >
34
+ > Node ≤22 is unaffected — prebuilt binaries are available and no compilation is needed.
134
35
 
135
- await brain.close();
136
- ```
36
+ > If you get `ERESOLVE` errors, use `npm i --legacy-peer-deps` — tree-sitter grammars have overlapping peer dep ranges.
137
37
 
138
- Or use the CLI — zero code:
38
+ ### CLI — zero code
139
39
 
140
40
  ```bash
141
- npm install -g brainbank
142
- brainbank index . # index code + git
41
+ brainbank index . # scans repo → interactive select → index
42
+ brainbank index . --yes # skip prompts, auto-select all
143
43
  brainbank hsearch "rate limiting" # hybrid search
144
44
  brainbank kv add decisions "Use Redis..." # store a memory
145
45
  brainbank kv search decisions "caching" # recall it
146
46
  ```
147
47
 
148
- ## CLI
149
-
150
- BrainBank can be used entirely from the command line — no config file needed.
151
-
152
- ### Indexing
153
-
154
- `index` processes **code files + git history** by default. Use `--only` to select specific modules, and `--docs` to include document collections.
155
-
156
- ```bash
157
- brainbank index [path] # Index code + git history
158
- brainbank index [path] --force # Force re-index everything
159
- brainbank index [path] --depth 200 # Limit git commit depth
160
- brainbank index [path] --only code # Index only code (skip git)
161
- brainbank index [path] --only git # Index only git history
162
- brainbank index [path] --docs ~/docs # Include a docs folder
163
- brainbank docs [--collection <name>] # Index document collections
164
- ```
165
-
166
- > **Multi-repo:** If `[path]` contains multiple Git subdirectories (no root `.git/`), BrainBank auto-detects them and indexes all into one shared DB. See [Multi-Repository Indexing](#multi-repository-indexing).
167
-
168
- ### Watch Mode
169
-
170
- Auto-re-index code files when they change. Watches for file changes and re-indexes incrementally:
171
-
172
- ```bash
173
- brainbank watch # Watch repo, auto re-index on save
174
- # ━━━ BrainBank Watch ━━━
175
- # Watching /path/to/repo for changes...
176
- # 14:30:02 ✓ code: src/api.ts
177
- # 14:30:05 ✓ code: src/routes.ts
178
- # 14:30:08 ✓ csv: data/metrics.csv ← custom indexer
179
- ```
180
-
181
- > Watch mode monitors **code files** by default. [Custom indexers](#custom-indexers) that implement `watchPatterns()` and `onFileChange()` are automatically picked up — their name appears in the console output alongside the built-in `code` indexer. Git history and document collections are not affected by file-system changes and must be re-indexed explicitly with `brainbank index` / `brainbank docs`.
182
-
183
- ### Document Collections
184
-
185
- ```bash
186
- brainbank collection add <path> --name docs # Register a document folder
187
- brainbank collection list # List registered collections
188
- brainbank collection remove <name> # Remove a collection
189
- ```
190
-
191
- ### Search
192
-
193
- ```bash
194
- brainbank search <query> # Semantic search (vector)
195
- brainbank hsearch <query> # Hybrid search (best quality)
196
- brainbank ksearch <query> # Keyword search (BM25, instant)
197
- brainbank dsearch <query> # Document search
198
- ```
199
-
200
- ### Context
201
-
202
- ```bash
203
- brainbank context <task> # Get formatted context for a task
204
- brainbank context add <col> <path> <desc> # Add context metadata
205
- brainbank context list # List context metadata
206
- ```
207
-
208
- ### KV Store (dynamic collections)
209
-
210
- ```bash
211
- brainbank kv add <coll> <content> # Add item to a collection
212
- brainbank kv search <coll> <query> # Search a collection
213
- brainbank kv list [coll] # List collections or items
214
- brainbank kv trim <coll> --keep <n> # Keep only N most recent
215
- brainbank kv clear <coll> # Clear all items
216
- ```
217
-
218
- ### Utility
219
-
220
- ```bash
221
- brainbank stats # Show index statistics
222
- brainbank reembed # Re-embed all vectors (provider switch)
223
- brainbank watch # Watch files, auto re-index on change
224
- brainbank serve # Start MCP server (stdio)
225
- ```
226
-
227
- **Global options:** `--repo <path>`, `--force`, `--depth <n>`, `--collection <name>`, `--pattern <glob>`, `--context <desc>`, `--reranker <name>`
228
-
229
- ---
230
-
231
- ## Programmatic API
232
-
233
- Use BrainBank as a library in your TypeScript/Node.js project.
234
-
235
- ### Indexers
236
-
237
- BrainBank uses pluggable indexers. Register only what you need with `.use()`:
238
-
239
- | Indexer | Import | Description |
240
- |---------|--------|-------------|
241
- | `code` | `brainbank/code` | AST-aware code chunking via tree-sitter (19 languages) |
242
- | `git` | `brainbank/git` | Git commit history, diffs, co-edit relationships |
243
- | `docs` | `brainbank/docs` | Document collections (markdown, wikis) |
48
+ ### Programmatic API
244
49
 
245
50
  ```typescript
246
51
  import { BrainBank } from 'brainbank';
247
- import { code } from 'brainbank/code';
248
- import { git } from 'brainbank/git';
249
- import { docs } from 'brainbank/docs';
52
+ import { code } from '@brainbank/code';
53
+ import { git } from '@brainbank/git';
250
54
 
251
- // Pick only the indexers you need
252
55
  const brain = new BrainBank({ repoPath: '.' })
253
56
  .use(code())
254
- .use(git())
255
- .use(docs());
57
+ .use(git());
256
58
 
257
- // Index code + git (incremental — only processes changes)
258
59
  await brain.index();
259
60
 
260
- // Index document collections
261
- await brain.addCollection({ name: 'wiki', path: '~/docs', pattern: '**/*.md' });
262
- await brain.indexDocs();
263
- ```
264
-
265
- ### Collections
266
-
267
- Dynamic key-value collections with semantic search — the building block for agent memory:
268
-
269
- ```typescript
270
- const decisions = brain.collection('decisions');
271
-
272
- // Store rich content (auto-embedded for vector search)
273
- await decisions.add(
274
- 'Use SQLite with WAL mode instead of PostgreSQL. Portable single-file ' +
275
- 'storage, works offline, zero infrastructure.',
276
- { tags: ['architecture'], metadata: { files: ['src/db.ts'] } }
277
- );
278
-
279
- // Semantic search — finds by meaning, not keywords
280
- const hits = await decisions.search('why not postgres');
281
- // → [{ content: 'Use SQLite with WAL...', score: 0.95, tags: [...], metadata: {...} }]
282
-
283
- // Management
284
- decisions.list({ limit: 20 }); // newest first
285
- decisions.list({ tags: ['architecture'] }); // filter by tags
286
- decisions.count(); // total items
287
- decisions.trim({ keep: 50 }); // keep N most recent
288
- decisions.prune({ olderThan: '30d' }); // remove older than 30 days
289
- brain.listCollectionNames(); // → ['decisions', ...]
290
- ```
291
-
292
- > 📂 See [examples/collections](examples/collections/) for a complete runnable demo with cross-collection linking and metadata.
293
-
294
- ### Watch Mode
295
-
296
- Auto-re-index when files change:
297
-
298
- ```typescript
299
- // API
300
- const watcher = brain.watch({
301
- debounceMs: 2000,
302
- onIndex: (file, indexer) => console.log(`${indexer}: ${file}`),
303
- onError: (err) => console.error(err.message),
304
- });
305
-
306
- // Later: watcher.close();
307
- ```
308
-
309
- ```bash
310
- # CLI
311
- brainbank watch
312
- # ━━━ BrainBank Watch ━━━
313
- # Watching /path/to/repo for changes...
314
- # 14:30:02 ✓ code: src/api.ts
315
- # 14:30:05 ✓ code: src/routes.ts
316
- ```
317
-
318
- #### Custom Indexer Watch
319
-
320
- Custom indexers can hook into watch mode by implementing `onFileChange` and `watchPatterns`:
321
-
322
- ```typescript
323
- import type { Indexer, IndexerContext } from 'brainbank';
324
-
325
- function csvIndexer(): Indexer {
326
- let ctx: IndexerContext;
327
-
328
- return {
329
- name: 'csv',
330
-
331
- async initialize(context) {
332
- ctx = context;
333
- },
334
-
335
- // Tell watch which files this indexer cares about
336
- watchPatterns() {
337
- return ['**/*.csv', '**/*.tsv'];
338
- },
339
-
340
- // Called when a watched file changes
341
- async onFileChange(filePath, event) {
342
- if (event === 'delete') return true;
343
-
344
- const data = fs.readFileSync(filePath, 'utf-8');
345
- const col = ctx.collection('csv_data');
346
- await col.add(data, {
347
- tags: ['csv'],
348
- metadata: { file: filePath },
349
- });
350
- return true; // handled
351
- },
352
- };
353
- }
354
-
355
- const brain = new BrainBank({ dbPath: './brain.db' })
356
- .use(code())
357
- .use(csvIndexer());
358
-
359
- await brain.initialize();
360
- brain.watch(); // Now watches .ts, .py, etc. AND .csv, .tsv
361
- ```
362
-
363
- ### Search
364
-
365
- Three modes, from fastest to best quality:
366
-
367
- | Mode | Method | Speed | Quality |
368
- |------|--------|-------|---------|
369
- | Keyword | `searchBM25(q)` | ⚡ instant | Good for exact terms |
370
- | Vector | `search(q)` | ~50ms | Good for concepts |
371
- | **Hybrid** | `hybridSearch(q)` | ~100ms | **Best — catches both** |
372
-
373
- ```typescript
374
- // Hybrid search (recommended default)
375
61
  const results = await brain.hybridSearch('authentication middleware');
376
62
 
377
- // Scoped search
378
- const codeHits = await brain.searchCode('parse JSON config', 8);
379
- const commitHits = await brain.searchCommits('fix auth bug', 5);
380
- const docHits = await brain.searchDocs('getting started', { collection: 'wiki' });
381
- ```
382
-
383
- | Score | Meaning |
384
- |-------|---------|
385
- | 0.8+ | Near-exact match |
386
- | 0.5–0.8 | Strongly related |
387
- | 0.3–0.5 | Somewhat related |
388
- | < 0.3 | Weak match |
389
-
390
- ### Document Collections
391
-
392
- Register folders of documents. Files are chunked by heading structure:
393
-
394
- ```typescript
395
- await brain.addCollection({
396
- name: 'docs',
397
- path: '~/project/docs',
398
- pattern: '**/*.md',
399
- ignore: ['**/drafts/**'],
400
- context: 'Project documentation',
401
- });
402
-
403
- await brain.indexDocs();
404
-
405
- // Add context metadata (helps LLM understand what documents are about)
406
- brain.addContext('docs', '/api', 'REST API reference');
407
- brain.addContext('docs', '/guides', 'Step-by-step tutorials');
408
- ```
409
-
410
- ### Context Generation
411
-
412
- Get formatted markdown ready for system prompt injection:
413
-
414
- ```typescript
415
- const context = await brain.getContext('add rate limiting to the API', {
416
- codeResults: 6,
417
- gitResults: 5,
418
- affectedFiles: ['src/api/routes.ts'],
419
- useMMR: true,
420
- });
421
- // Returns: ## Relevant Code, ## Git History, ## Relevant Documents
422
- ```
423
-
424
- ### Custom Indexers
425
-
426
- Implement the `Indexer` interface to build your own:
427
-
428
- ```typescript
429
- import type { Indexer, IndexerContext } from 'brainbank';
430
-
431
- const myIndexer: Indexer = {
432
- name: 'custom',
433
- async initialize(ctx: IndexerContext) {
434
- // ctx.db — shared SQLite database
435
- // ctx.embedding — shared embedding provider
436
- // ctx.collection() — create dynamic collections
437
- const store = ctx.collection('my_data');
438
- await store.add('indexed content', { source: 'custom' });
439
- },
440
- };
441
-
442
- brain.use(myIndexer);
443
- ```
444
-
445
- #### Using custom indexers with the CLI
446
-
447
- Drop `.ts` files into `.brainbank/indexers/` — the CLI auto-discovers them:
448
-
449
- ```
450
- .brainbank/
451
- ├── brainbank.db
452
- └── indexers/
453
- ├── slack.ts
454
- └── jira.ts
455
- ```
456
-
457
- Each file exports a default `Indexer`:
458
-
459
- ```typescript
460
- // .brainbank/indexers/slack.ts
461
- import type { Indexer } from 'brainbank';
462
-
463
- export default {
464
- name: 'slack',
465
- async initialize(ctx) {
466
- const msgs = ctx.collection('slack_messages');
467
- // ... fetch and index slack messages
468
- },
469
- } satisfies Indexer;
470
- ```
471
-
472
- That's it — all CLI commands automatically pick up your indexers:
473
-
474
- ```bash
475
- brainbank index # runs code + git + docs + slack + jira
476
- brainbank stats # shows all indexers
477
- brainbank kv search slack_messages "deploy" # search slack data
478
- ```
479
-
480
- #### Advanced: config file
481
-
482
- For fine-grained control, create a `.brainbank/config.ts`:
483
-
484
- ```typescript
485
- // .brainbank/config.ts
486
- export default {
487
- builtins: ['code', 'docs'], // exclude git (default: all three)
488
- brainbank: { // BrainBank constructor options
489
- dbPath: '.brainbank/brain.db',
490
- },
491
- };
492
- ```
493
-
494
- Everything lives in `.brainbank/` — DB, config, and custom indexers:
495
-
496
- ```
497
- .brainbank/
498
- ├── brainbank.db # SQLite database (auto-created)
499
- ├── config.ts # Optional project config
500
- └── indexers/ # Optional custom indexer files
501
- ├── slack.ts
502
- └── jira.ts
503
- ```
504
-
505
- No folder and no config file? The CLI uses the built-in indexers (`code`, `git`, `docs`).
506
-
507
- ---
508
-
509
- ### AI Agent Integration
510
-
511
- Teach your AI coding agent to use BrainBank as persistent memory. Add an `AGENTS.md` (or `.cursor/rules`) to your project root — works with **Antigravity**, **Claude Code**, **Cursor**, and anything that reads project-level instructions.
512
-
513
- <details>
514
- <summary><strong>Option A: CLI commands</strong> (zero setup)</summary>
515
-
516
- > **Memory — BrainBank**
517
- >
518
- > **Store** a conversation summary after each task:
519
- > `brainbank kv add conversations "Refactored auth to AuthService with DI. JWT + refresh tokens + RBAC."`
520
- >
521
- > **Record** architecture decisions:
522
- > `brainbank kv add decisions "ADR: Fastify over Express. 2x throughput, schema validation, native TS."`
523
- >
524
- > **Search** before starting work:
525
- > `brainbank hsearch "auth middleware"` · `brainbank kv search decisions "auth"`
526
-
527
- </details>
528
-
529
- <details>
530
- <summary><strong>Option B: MCP tools</strong> (richer integration)</summary>
531
-
532
- > **Memory — BrainBank (MCP)**
533
- >
534
- > Use the BrainBank MCP tools for persistent agent memory:
535
- >
536
- > **Store** via `brainbank_kv_add`:
537
- > `{ collection: "conversations", content: "Refactored auth to AuthService with DI.", tags: ["auth"] }`
538
- >
539
- > **Search** via `brainbank_kv_search`:
540
- > `{ collection: "decisions", query: "authentication approach" }`
541
- >
542
- > **Code search** via `brainbank_hybrid_search`:
543
- > `{ query: "auth middleware", repo: "." }`
544
-
545
- </details>
546
-
547
- #### Setup
548
-
549
- | Agent | How to connect |
550
- |-------|---------------|
551
- | **Antigravity** | Add `AGENTS.md` to project root |
552
- | **Claude Code** | Add `AGENTS.md` to project root |
553
- | **Cursor** | Add rules in `.cursor/rules` |
554
- | **MCP** (any agent) | See [MCP Server](#mcp-server) config below |
555
-
556
- #### Custom Indexer: Auto-Ingest Conversation Logs
557
-
558
- For agents that produce structured logs (e.g. Antigravity's `brain/` directory), auto-index them:
559
-
560
- ```typescript
561
- // .brainbank/indexers/conversations.ts
562
- import type { Indexer, IndexerContext } from 'brainbank';
563
- import * as fs from 'node:fs';
564
- import * as path from 'node:path';
565
-
566
- export default {
567
- name: 'conversations',
568
- async initialize(ctx: IndexerContext) {
569
- const conversations = ctx.collection('conversations');
570
- const logsDir = path.join(ctx.repoPath, '.gemini/antigravity/brain');
571
- if (!fs.existsSync(logsDir)) return;
572
-
573
- for (const dir of fs.readdirSync(logsDir)) {
574
- const file = path.join(logsDir, dir, '.system_generated/logs/overview.txt');
575
- if (!fs.existsSync(file)) continue;
576
- const content = fs.readFileSync(file, 'utf-8');
577
- if (content.length < 100) continue;
578
- await conversations.add(content, {
579
- tags: ['auto'],
580
- metadata: { session: dir, source: 'antigravity' },
581
- });
582
- }
583
- },
584
- } satisfies Indexer;
585
- ```
586
-
587
- ```bash
588
- brainbank index # now auto-indexes conversation logs alongside code + git
589
- brainbank kv search conversations "what did we decide about auth"
590
- ```
591
-
592
- ### Examples
593
-
594
- | Example | Description | Run |
595
- |---------|-------------|-----|
596
- | [chatbot](examples/chatbot/) | CLI chatbot with streaming + persistent memory (context injection + function calling) | `OPENAI_API_KEY=sk-... npx tsx examples/chatbot/chatbot.ts` |
597
- | [collections](examples/collections/) | Collections, semantic search, tags, metadata linking | `npx tsx examples/collections/collections.ts` |
598
-
599
- ---
600
-
601
- ## MCP Server
602
-
603
- BrainBank ships with an MCP server (stdio) for AI tool integration.
604
-
605
- ```bash
606
- brainbank serve
607
- ```
608
-
609
- ### Antigravity / Claude Desktop
610
-
611
- Add to your MCP config (`~/.gemini/antigravity/mcp_config.json` or Claude Desktop settings):
612
-
613
- ```json
614
- {
615
- "mcpServers": {
616
- "brainbank": {
617
- "command": "npx",
618
- "args": ["-y", "@brainbank/mcp"],
619
- "env": {
620
- "BRAINBANK_EMBEDDING": "openai"
621
- }
622
- }
623
- }
624
- }
625
- ```
626
-
627
- The agent passes the `repo` parameter on each tool call based on the active workspace — no hardcoded paths needed.
628
-
629
- > Set `BRAINBANK_EMBEDDING` to `openai` for higher quality search (requires `OPENAI_API_KEY`). Omit to use the free local WASM embeddings.
630
-
631
- > Optionally set `BRAINBANK_REPO` as a default fallback repo. If omitted, every tool call must include the `repo` parameter (recommended for multi-workspace setups).
632
-
633
- ### Available Tools
634
-
635
- | Tool | Description |
636
- |------|-------------|
637
- | `brainbank_hybrid_search` | Best quality: vector + BM25 + reranker |
638
- | `brainbank_search` | Semantic vector search |
639
- | `brainbank_keyword_search` | Instant BM25 full-text |
640
- | `brainbank_context` | Formatted context for a task |
641
- | `brainbank_index` | Trigger code/git indexing |
642
- | `brainbank_stats` | Index statistics |
643
- | `brainbank_history` | Git history for a file |
644
- | `brainbank_coedits` | Files that change together |
645
- | `brainbank_collection_add` | Add item to a KV collection |
646
- | `brainbank_collection_search` | Search a KV collection |
647
- | `brainbank_collection_trim` | Trim a KV collection |
648
-
649
- ---
650
-
651
- ## Configuration
652
-
653
- ```typescript
654
- import { BrainBank, OpenAIEmbedding } from 'brainbank';
655
- import { Qwen3Reranker } from '@brainbank/reranker'; // separate package
656
-
657
- const brain = new BrainBank({
658
- repoPath: '.',
659
- dbPath: '.brainbank/brainbank.db',
660
- gitDepth: 500,
661
- maxFileSize: 512_000,
662
- embeddingDims: 1536,
663
- maxElements: 2_000_000,
664
- embeddingProvider: new OpenAIEmbedding(), // or: omit for free local WASM (384d)
665
- reranker: new Qwen3Reranker(), // local cross-encoder (auto-downloads ~640MB)
666
- });
667
- ```
668
-
669
- ### Embedding Providers
670
-
671
- | Provider | Import | Dims | Speed | Cost |
672
- |----------|--------|------|-------|------|
673
- | **Local (default)** | built-in | 384 | ⚡ 0ms | Free |
674
- | **OpenAI** | `OpenAIEmbedding` | 1536 | ~100ms | $0.02/1M tokens |
675
-
676
- ```typescript
677
- import { OpenAIEmbedding } from 'brainbank';
678
-
679
- // Uses OPENAI_API_KEY env var by default
680
- new OpenAIEmbedding();
681
-
682
- // Custom options
683
- new OpenAIEmbedding({
684
- model: 'text-embedding-3-large',
685
- dims: 512, // custom dims (text-embedding-3 only)
686
- apiKey: 'sk-...',
687
- baseUrl: 'https://my-proxy.com/v1/embeddings', // Azure, proxies
688
- });
689
- ```
690
-
691
- > ⚠️ Switching embedding provider requires re-indexing — vectors are not cross-compatible.
692
-
693
- ### Reranker
694
-
695
- BrainBank includes an optional cross-encoder reranker using **Qwen3-Reranker-0.6B** via `node-llama-cpp`. It runs 100% locally — no API keys needed. The reranker is **disabled by default**.
696
-
697
- #### When to Use It
698
-
699
- The reranker runs local neural inference on every search result, which improves ranking precision but adds significant latency. Here are real benchmarks on a ~2100 file / 4000+ chunk codebase:
700
-
701
- | Metric | Without Reranker | With Reranker |
702
- |--------|-----------------|---------------|
703
- | **Warm query time** | ~480ms | ~5500ms |
704
- | **Cold start** | ~7s | ~12s |
705
- | **Memory overhead** | — | +640MB (model) |
706
- | **Ranking quality** | Good (RRF) | Slightly better |
707
-
708
- **Recommended:** Leave it disabled for interactive use (MCP, IDE integrations). The RRF fusion of vector + BM25 already produces high-quality results. Enable it only for:
709
-
710
- - Batch processing where latency doesn't matter
711
- - Very large codebases (50k+ files) where false positives are costly
712
- - Server environments with RAM to spare
713
-
714
- #### Enabling the Reranker
715
-
716
- ```typescript
717
- import { BrainBank } from 'brainbank';
718
- import { Qwen3Reranker } from '@brainbank/reranker';
719
-
720
- const brain = new BrainBank({
721
- reranker: new Qwen3Reranker(), // ~640MB model, auto-downloaded on first use
722
- });
723
- ```
724
-
725
- Or from the CLI:
726
-
727
- ```bash
728
- brainbank hsearch "auth middleware" --reranker qwen3
729
- ```
730
-
731
- Or via environment variable:
732
-
733
- ```bash
734
- BRAINBANK_RERANKER=qwen3 brainbank serve
735
- ```
736
-
737
- The model is cached at `~/.cache/brainbank/models/` after first download.
738
-
739
- #### Position-Aware Score Blending
740
-
741
- When enabled, the reranker uses position-aware blending — trusting retrieval scores more for top results and the reranker more for lower-ranked results:
742
-
743
- | Position | Retrieval (RRF) | Reranker | Rationale |
744
- |----------|----------------|----------|----------|
745
- | 1–3 | **75%** | 25% | Preserves exact keyword matches |
746
- | 4–10 | **60%** | 40% | Balanced blend |
747
- | 11+ | 40% | **60%** | Trust reranker for uncertain results |
748
-
749
- #### Custom Reranker
750
-
751
- Implement the `Reranker` interface to use your own:
752
-
753
- ```typescript
754
- import type { Reranker } from 'brainbank';
63
+ const log = brain.collection('decisions');
64
+ await log.add('Switched to argon2id for password hashing', { tags: ['security'] });
755
65
 
756
- const myReranker: Reranker = {
757
- async rank(query: string, documents: string[]): Promise<number[]> {
758
- // Return relevance scores 0.0-1.0 for each document
759
- },
760
- async close() { /* optional cleanup */ },
761
- };
66
+ brain.close();
762
67
  ```
763
68
 
764
- Without a reranker, BrainBank uses pure RRF fusion — which is already production-quality for most use cases.
765
-
766
69
  ---
767
70
 
768
- ## Memory
71
+ ## Packages
769
72
 
770
- `@brainbank/memory` adds **deterministic memory extraction** to any LLM conversation. After every turn, it automatically extracts facts, deduplicates against existing memories, and decides `ADD` / `UPDATE` / `NONE` — no function calling needed.
73
+ `brainbank` is the core framework strictly plugin-agnostic. Plugins are separate `@brainbank/*` packages that own their database schema, search strategies, and context formatting. Install only what you need:
771
74
 
772
- Inspired by [mem0](https://github.com/mem0ai/mem0)'s pipeline, but framework-agnostic and built on BrainBank collections.
75
+ ### Indexer Plugins
773
76
 
774
- ```bash
775
- npm install @brainbank/memory
776
- ```
77
+ Data sources that feed into BrainBank's hybrid search engine. Each plugin manages its own tables via the built-in migration system.
777
78
 
778
- ```typescript
779
- import { BrainBank } from 'brainbank';
780
- import { Memory, OpenAIProvider } from '@brainbank/memory';
781
-
782
- const brain = new BrainBank({ dbPath: './memory.db' });
783
- await brain.initialize();
784
-
785
- const memory = new Memory(brain.collection('memories'), {
786
- llm: new OpenAIProvider({ model: 'gpt-4.1-nano' }),
787
- });
788
-
789
- // After every conversation turn (deterministic, automatic)
790
- await memory.process(userMessage, assistantResponse);
791
- // → extracts facts, deduplicates, executes ADD/UPDATE/NONE
792
-
793
- // For the system prompt
794
- const context = memory.buildContext();
795
- // → "## Memories\n- User's name is Berna\n- Prefers TypeScript"
796
- ```
797
-
798
- The `LLMProvider` interface works with any framework:
799
-
800
- | Framework | Adapter |
801
- |-----------|--------|
802
- | OpenAI | Built-in `OpenAIProvider` |
803
- | LangChain | `ChatOpenAI.invoke()` → string |
804
- | Vercel AI SDK | `generateText()` → string |
805
- | Any LLM | Implement `{ generate(messages) → string }` |
79
+ | Package | Description | Install |
80
+ |---------|-------------|----------|
81
+ | [`@brainbank/code`](packages/code/) | AST chunking, import graph, symbol index (20 languages) | `npm i @brainbank/code` |
82
+ | [`@brainbank/git`](packages/git/) | Git history indexing + co-edit analysis | `npm i @brainbank/git` |
83
+ | [`@brainbank/docs`](packages/docs/) | Document collection search with smart chunking | `npm i @brainbank/docs` |
806
84
 
807
- > 📂 See [examples/chatbot](examples/chatbot/) for runnable demos with all three frameworks.
808
-
809
- > 📦 Full docs: [packages/memory/README.md](packages/memory/README.md)
810
-
811
- ---
85
+ ### Integrations
812
86
 
813
- ### Environment Variables
87
+ Extensions that connect BrainBank to external tools and workflows.
814
88
 
815
- | Variable | Description |
816
- |----------|-------------|
817
- | `BRAINBANK_REPO` | Default repository path (optional auto-detected from `.git/` or passed per tool call) |
818
- | `BRAINBANK_EMBEDDING` | Embedding provider: `local` (default), `openai` |
819
- | `BRAINBANK_RERANKER` | Reranker: `none` (default), `qwen3` to enable |
820
- | `BRAINBANK_DEBUG` | Show full stack traces |
821
- | `OPENAI_API_KEY` | Required when using `BRAINBANK_EMBEDDING=openai` |
89
+ | Package | Description | Install |
90
+ |---------|-------------|----------|
91
+ | [`@brainbank/mcp`](packages/mcp/) | MCP server for Antigravity, Claude, Cursor (read-only, 2 tools) | `npm i @brainbank/mcp` |
822
92
 
823
93
  ---
824
94
 
825
- ## Multi-Repository Indexing
826
-
827
- BrainBank can index multiple repositories into a **single shared database**. This is useful for monorepos, microservices, or any project split across multiple Git repositories.
828
-
829
- ### How It Works
830
-
831
- When you point BrainBank at a directory that contains multiple Git repositories (subdirectories with `.git/`), the CLI **auto-detects** them and creates namespaced indexers:
832
-
833
- ```bash
834
- ~/projects/
835
- ├── webapp-frontend/ # .git/
836
- ├── webapp-backend/ # .git/
837
- └── webapp-shared/ # .git/
838
- ```
839
-
840
- ```bash
841
- brainbank index ~/projects --depth 200
842
- ```
843
-
844
- ```
845
- ━━━ BrainBank Index ━━━
846
- Repo: /Users/you/projects
847
- Multi-repo: found 3 git repos: webapp-frontend, webapp-backend, webapp-shared
848
- CODE:WEBAPP-BACKEND [0/1075] ...
849
- CODE:WEBAPP-FRONTEND [0/719] ...
850
- GIT:WEBAPP-SHARED [0/200] ...
851
-
852
- Code: 2107 indexed, 4084 chunks
853
- Git: 600 indexed (200 per repo)
854
- Co-edit pairs: 1636
855
- ```
856
-
857
- All code, git history, and co-edit relationships from every sub-repository go into **one** `.brainbank/brainbank.db` at the parent directory. Search queries automatically return results across all repositories:
858
-
859
- ```bash
860
- brainbank hsearch "cancel job confirmation" --repo ~/projects
861
- # → Results from frontend components, backend controllers,
862
- # and shared utilities — all in one search.
863
- ```
864
-
865
- ### Namespaced Indexers
866
-
867
- Each sub-repository gets its own namespaced indexer instances (e.g., `code:frontend`, `git:backend`). Same-type indexers share a single HNSW vector index for efficient memory usage and unified search.
868
-
869
- ### Programmatic API
870
-
871
- ```typescript
872
- import { BrainBank } from 'brainbank';
873
- import { code } from 'brainbank/code';
874
- import { git } from 'brainbank/git';
875
-
876
- const brain = new BrainBank({ repoPath: '~/projects' })
877
- .use(code({ name: 'code:frontend', repoPath: '~/projects/webapp-frontend' }))
878
- .use(code({ name: 'code:backend', repoPath: '~/projects/webapp-backend' }))
879
- .use(git({ name: 'git:frontend', repoPath: '~/projects/webapp-frontend' }))
880
- .use(git({ name: 'git:backend', repoPath: '~/projects/webapp-backend' }));
881
-
882
- await brain.initialize();
883
- await brain.index();
884
-
885
- // Cross-repo search
886
- const results = await brain.hybridSearch('authentication guard');
887
- // → Results from both frontend and backend
888
- ```
889
-
890
- ### MCP Multi-Workspace
891
-
892
- The MCP server maintains a pool of BrainBank instances — one per unique `repo` path. Each tool call can target a different workspace:
893
-
894
- ```typescript
895
- // Agent working in one workspace
896
- brainbank_hybrid_search({ query: "login form", repo: "/Users/you/projects" })
897
-
898
- // Agent switches to a different project
899
- brainbank_hybrid_search({ query: "API routes", repo: "/Users/you/other-project" })
900
- ```
901
-
902
- Instances are cached in memory after first initialization, so subsequent queries to the same repo are fast (~480ms).
95
+ ## Documentation
96
+
97
+ | Guide | Description |
98
+ |-------|-------------|
99
+ | **[Getting Started](docs/getting-started.md)** | Installation, quick start, first search |
100
+ | **[CLI Reference](docs/cli.md)** | Complete command reference |
101
+ | **[Plugins](docs/plugins.md)** | Built-in plugins overview + configuration |
102
+ | **[Collections](docs/collections.md)** | Dynamic KV store with semantic search |
103
+ | **[Search](docs/search.md)** | Hybrid search, scoped queries, context generation |
104
+ | **[Custom Plugins](docs/custom-plugins.md)** | Build plugins + publish as npm packages |
105
+ | **[Configuration](docs/config.md)** | `.brainbank/config.json`, env vars |
106
+ | **[Embeddings, Reranker & Pruner](docs/embeddings.md)** | Providers, benchmarks, per-plugin overrides, LLM noise filter |
107
+ | **[Multi-Repo](docs/multi-repo.md)** | Index multiple repositories into one DB |
108
+ | **[MCP Server](docs/mcp.md)** | AI tool integration (stdio), `mcp:export` setup |
109
+ | **[Indexing](docs/indexing.md)** | Code graph, incremental indexing, re-embedding |
110
+ | **[Migrations](docs/migrations.md)** | Plugin schema migrations, built-in schemas |
111
+ | **[Architecture](docs/architecture.md)** | System internals, data flows, design patterns |
903
112
 
904
113
  ---
905
114
 
906
- ## Indexing
907
-
908
- ### Code Chunking (tree-sitter)
909
-
910
- BrainBank uses **native tree-sitter** to parse source code into ASTs and extract semantic blocks — functions, classes, methods, interfaces — as individual chunks. This produces dramatically better embeddings than naive line-based splitting.
911
-
912
- **Supported languages (AST-parsed):**
913
-
914
- | Category | Languages |
915
- |----------|-----------|
916
- | Web | TypeScript, JavaScript, HTML, CSS |
917
- | Systems | Go, Rust, C, C++, Swift |
918
- | JVM | Java, Kotlin, Scala |
919
- | Scripting | Python, Ruby, PHP, Lua, Bash, Elixir |
920
- | .NET | C# |
921
-
922
- For large classes (>80 lines), the chunker descends into the class body and extracts each method as a separate chunk. For unsupported languages, it falls back to a sliding window with overlap.
923
-
924
- > Tree-sitter grammars are **optional dependencies**. If a grammar isn't installed, that language falls back to the generic sliding window. Install only the grammars you need: `npm install tree-sitter-ruby tree-sitter-go` etc.
925
-
926
- ### Incremental Indexing
927
-
928
- All indexing is **incremental by default** — only new or changed content is processed:
929
-
930
- | Indexer | How it detects changes | What gets skipped |
931
- |---------|----------------------|-------------------|
932
- | **Code** | FNV-1a hash of file content | Unchanged files |
933
- | **Git** | Unique commit hash | Already-indexed commits |
934
- | **Docs** | SHA-256 of file content | Unchanged documents |
935
-
936
- ```typescript
937
- // First run: indexes everything
938
- await brain.index(); // → { indexed: 500, skipped: 0 }
939
-
940
- // Second run: skips everything unchanged
941
- await brain.index(); // → { indexed: 0, skipped: 500 }
942
-
943
- // Changed 1 file? Only that file re-indexes
944
- await brain.index(); // → { indexed: 1, skipped: 499 }
945
- ```
946
-
947
- Use `--force` to re-index everything:
948
-
949
- ```bash
950
- brainbank index --force
951
- ```
952
-
953
- ### Re-embedding
954
-
955
- When switching embedding providers (e.g. Local → OpenAI), you **don't need to re-index**. The `reembed()` method regenerates only the vectors — no file I/O, no git parsing, no re-chunking:
956
-
957
- ```typescript
958
- import { BrainBank, OpenAIEmbedding } from 'brainbank';
959
-
960
- // Previously indexed with local embeddings.
961
- // Now switch to OpenAI:
962
- const brain = new BrainBank({
963
- embeddingProvider: new OpenAIEmbedding(),
964
- });
965
- await brain.initialize();
966
-
967
- // ⚠ BrainBank emits 'warning' event if provider changed.
968
- brain.on('warning', (w) => console.warn(w.message));
969
- // → "Embedding provider changed (LocalEmbedding/384 → OpenAIEmbedding/1536). Run brain.reembed()"
970
-
971
- const result = await brain.reembed({
972
- onProgress: (table, current, total) => {
973
- console.log(`${table}: ${current}/${total}`);
974
- },
975
- });
976
- // → { code: 1200, git: 500, docs: 80, kv: 45, notes: 12, total: 1837 }
977
- ```
978
-
979
- Or from the CLI:
980
-
981
- ```bash
982
- brainbank reembed
983
- ```
115
+ ## Examples
984
116
 
985
- | Full re-index | `reembed()` |
986
- |---|---|
987
- | Walks all files | **Skipped** |
988
- | Parses git history | **Skipped** |
989
- | Re-chunks documents | **Skipped** |
990
- | Embeds text | |
991
- | Replaces vectors | |
992
- | Rebuilds HNSW | ✓ |
117
+ | Example | Description |
118
+ |---------|-------------|
119
+ | [notes-plugin](examples/notes-plugin/) | Programmatic plugin reads `.txt` files |
120
+ | [custom-plugin](examples/custom-plugin/) | CLI auto-discovery plugin |
121
+ | [custom-package](examples/custom-package/) | Standalone npm package scaffold |
122
+ | [collection](examples/collection/) | Collections, search, tags, metadata |
123
+ | [rag](examples/rag/) | RAG chatbot — docs retrieval + generation ¹ |
993
124
 
994
- > BrainBank tracks provider metadata in `embedding_meta` table. It auto-detects mismatches and warns you to run `reembed()`.
125
+ > ¹ Requires `OPENAI_API_KEY`. RAG also requires `PERPLEXITY_API_KEY`.
995
126
 
996
127
  ---
997
128
 
998
129
  ## Benchmarks
999
130
 
1000
- BrainBank includes benchmark scripts to validate chunking quality and search relevance. Run them against your own codebase to see the impact.
1001
-
1002
- ### Search Quality: AST vs Sliding Window
1003
-
1004
- We compared BrainBank's **tree-sitter AST chunker** against the traditional **sliding window** (80-line blocks) on a production NestJS backend (3,753 lines across 8 service files). Both strategies chunk the same files; all chunks are embedded and searched with the same 10 domain-specific queries.
1005
-
1006
- #### How It Works
1007
-
1008
- ```
1009
- Sliding Window Tree-Sitter AST
1010
- ┌────────────────────┐ ┌────────────────────┐
1011
- │ import { ... } │ │ ✓ constructor() │ → named chunk
1012
- │ @Injectable() │ → L1-80 block │ ✓ findAll() │ → named chunk
1013
- │ class JobsService {│ │ ✓ createJob() │ → named chunk
1014
- │ constructor() │ │ ✓ cancelJob() │ → named chunk
1015
- │ findAll() { ... }│ │ ✓ updateStatus() │ → named chunk
1016
- │ createJob() │ └────────────────────┘
1017
- │ ... │
1018
- │ ────────────────── │ overlaps ↕
1019
- │ cancelJob() │ → L75-155 block
1020
- │ updateStatus() │
1021
- │ ... │
1022
- └────────────────────┘
1023
- ```
1024
-
1025
- **Sliding window** mixes imports, constructors, and multiple methods into one embedding. Search for "cancel a job" and you get a generic block.
1026
- **AST chunking** gives each method its own embedding. Search for "cancel a job" → direct hit on `cancelJob()`.
1027
-
1028
- #### Results (Production NestJS Backend — 3,753 lines)
1029
-
1030
- Tested with 10 domain-specific queries on 8 service files (`orders.service.ts`, `bookings.service.ts`, `notifications.service.ts`, etc.):
1031
-
1032
- | Metric | Sliding Window | Tree-Sitter AST |
1033
- |--------|:-:|:-:|
1034
- | **Query Wins** | 0/10 | **8/10** (2 ties) |
1035
- | **Top-1 Relevant** | 3/10 | **8/10** |
1036
- | **Avg Precision@3** | 1.1/3 | **1.7/3** |
1037
- | **Avg Score Delta** | — | **+0.035** |
131
+ Early benchmarks on Apple Silicon single SQLite file, no external vector DB.
1038
132
 
1039
- #### Per-Query Breakdown
133
+ | Benchmark | Corpus | Metric | Score |
134
+ |-----------|--------|--------|:-----:|
135
+ | [BEIR SciFact](https://github.com/beir-cellar/beir) | 5,183 scientific abstracts, 300 queries | NDCG@10 | **0.761** |
136
+ | Custom RAG eval | 127 Pinecall.io docs, 20 queries — 1 miss | R@5 | **83%** |
1040
137
 
1041
- | Query | SW Top Result | AST Top Result | Δ Score |
1042
- |-------|:---:|:---:|:---:|
1043
- | cancel an order | generic `L451-458` | **`updateOrderStatus`** | +0.005 |
1044
- | create a booking | generic `L451-458` | **`createInstantBooking`** | +0.068 |
1045
- | confirm booking | generic `L451-458` | **`confirm`** | +0.034 |
1046
- | send notification | generic `L226-305` | **`publishNotificationEvent`** | +0.034 |
1047
- | authenticate JWT | generic `L1-80` | **`AuthModule`** | +0.032 |
1048
- | tenant DB connection | `L76-155` | **`onModuleDestroy`** | +0.037 |
1049
- | list orders paginated | `L76-155` | **`findAllActive`** | +0.045 |
1050
- | reject booking | generic `L451-458` | **`reject`** | +0.090 |
138
+ **Pipeline progression** each stage's impact on the custom eval:
1051
139
 
1052
- > Notice how the sliding window returns the **same generic block `L451-458`** for 4 different queries. The AST chunker returns a different, correctly named method each time.
140
+ | Stage | R@5 | Δ |
141
+ |-------|:---:|---|
142
+ | Vector-only (HNSW) | 57% | — |
143
+ | + BM25 → RRF | 78% | +21pp |
144
+ | + Qwen3 reranker | 83% | +5pp |
1053
145
 
1054
- #### Chunk Quality Comparison
146
+ > More benchmarks (code+graph retrieval, large-scale stress tests, multi-provider comparisons) are in progress.
147
+ > Full methodology and reproduction commands → [docs/benchmarks.md](docs/benchmarks.md)
1055
148
 
1056
- | | Sliding Window | Tree-Sitter AST |
1057
- |---|:-:|:-:|
1058
- | Total chunks | 53 | **83** |
1059
- | Avg lines/chunk | 75 | **39** |
1060
- | Named chunks | 0 | **83** (100%) |
1061
- | Chunk types | `block` | `method`, `interface`, `class` |
149
+ ## Contributing
1062
150
 
1063
- ### Grammar Support
1064
-
1065
- All 9 core grammars verified, each parsing in **<0.05ms**:
1066
-
1067
- | Language | AST Nodes Extracted | Parse Time |
1068
- |----------|:---:|:---:|
1069
- | TypeScript | `export_statement`, `interface_declaration` | 0.04ms |
1070
- | JavaScript | `function_declaration` × 3 | 0.04ms |
1071
- | Python | `class_definition`, `function_definition` × 2 | 0.03ms |
1072
- | Go | `function_declaration`, `method_declaration` × 3 | 0.04ms |
1073
- | Rust | `struct_item`, `impl_item`, `function_item` | 0.03ms |
1074
- | Ruby | `class`, `method` | 0.03ms |
1075
- | Java | `class_declaration` | 0.02ms |
1076
- | C | `function_definition` × 3 | 0.05ms |
1077
- | PHP | `class_declaration` | 0.03ms |
1078
-
1079
- > Additional grammars available: C++, Swift, C#, Kotlin, Scala, Lua, Elixir, Bash, HTML, CSS
1080
-
1081
- ### Running Benchmarks
1082
-
1083
- ```bash
1084
- # Grammar support (9 languages, parse speed)
1085
- node test/benchmarks/grammar-support.mjs
1086
-
1087
- # Search quality A/B (uses BrainBank's own source files)
1088
- node test/benchmarks/search-quality.mjs
1089
- ```
1090
-
1091
- ---
1092
-
1093
- ## Architecture
1094
-
1095
- <details>
1096
- <summary>Text version</summary>
1097
-
1098
- ```
1099
- ┌──────────────────────────────────────────────────────┐
1100
- │ BrainBank Core │
1101
- │ .use(code) .use(git) .use(docs) │
1102
- │ .collection('name') │
1103
- ├──────────────────────────────────────────────────────┤
1104
- │ │
1105
- │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌────────────┐│
1106
- │ │ Code │ │ Git │ │ Docs │ │ Collection ││
1107
- │ │ Indexer │ │ Indexer │ │ Indexer │ │ (dynamic) ││
1108
- │ └────┬────┘ └────┬────┘ └────┬────┘ └─────┬──────┘│
1109
- │ │ │ │ │ │
1110
- │ ┌────▼────┐ ┌────▼────┐ ┌────▼────┐ ┌─────▼──────┐│
1111
- │ │ HNSW │ │ HNSW │ │ HNSW │ │ Shared KV ││
1112
- │ │ Index │ │ Index │ │ Index │ │ HNSW Index ││
1113
- │ └─────────┘ └─────────┘ └─────────┘ └────────────┘│
1114
- │ │
1115
- │ ┌──────────────────────────────────────────────────┐│
1116
- │ │ SQLite (.brainbank/brainbank.db) ││
1117
- │ │ code_chunks │ git_commits │ doc_chunks ││
1118
- │ │ kv_data │ FTS5 full-text │ vectors │ co_edits ││
1119
- │ └──────────────────────────────────────────────────┘│
1120
- │ │
1121
- │ ┌──────────────────────────────────────────────────┐│
1122
- │ │ Embedding (Local WASM 384d │ OpenAI 1536d) ││
1123
- │ └──────────────────────────────────────────────────┘│
1124
- │ ┌──────────────────────────────────────────────────┐│
1125
- │ │ Qwen3-Reranker (opt-in cross-encoder) ││
1126
- │ └──────────────────────────────────────────────────┘│
1127
- └──────────────────────────────────────────────────────┘
1128
- ```
1129
- </details>
1130
-
1131
- ### Search Pipeline
1132
-
1133
- ```
1134
- Query
1135
-
1136
- ├──► Vector Search (HNSW k-NN) ──► candidates
1137
- ├──► Keyword Search (BM25/FTS5) ──► candidates
1138
-
1139
-
1140
- Reciprocal Rank Fusion (RRF, k=60)
1141
-
1142
-
1143
- Qwen3-Reranker (yes/no + logprobs → score 0-1)
1144
-
1145
-
1146
- Position-Aware Blend
1147
- Top 1-3: 75% RRF / 25% reranker
1148
- Top 4-10: 60% RRF / 40% reranker
1149
- Top 11+: 40% RRF / 60% reranker
1150
-
1151
-
1152
- Final results (sorted by blended score)
1153
- ```
1154
-
1155
- ### Data Flow
1156
-
1157
- 1. **Index** — Indexers parse files into chunks (tree-sitter AST for code, heading-based for docs)
1158
- 2. **Embed** — Each chunk gets a vector (local WASM or OpenAI)
1159
- 3. **Store** — Chunks + vectors → SQLite, vectors → HNSW index
1160
- 4. **Search** — Query → HNSW k-NN + BM25 keyword → RRF fusion → optional reranker
1161
- 5. **Context** — Top results formatted as markdown for system prompts
1162
-
1163
- ---
1164
-
1165
- ## Testing
1166
-
1167
- ```bash
1168
- npm test # Unit tests (129 tests)
1169
- npm test -- --integration # Full suite (211 tests, includes real models + all domains)
1170
- npm test -- --filter code # Filter by test name
1171
- npm test -- --verbose # Show assertion details
1172
- ```
1173
-
1174
- ---
151
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and guidelines.
1175
152
 
1176
153
  ## License
1177
154
 
1178
- MIT
155
+ [MIT](LICENSE)