brainbank 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +76 -1327
  2. package/bin/brainbank +5 -1
  3. package/dist/{chunk-N2OJRXSB.js → chunk-3HVCONGF.js} +1 -1
  4. package/dist/{chunk-N2OJRXSB.js.map → chunk-3HVCONGF.js.map} +1 -1
  5. package/dist/{chunk-CCXVL56V.js → chunk-3JZIM5AU.js} +6 -3
  6. package/dist/chunk-3JZIM5AU.js.map +1 -0
  7. package/dist/{chunk-6XOXM7MI.js → chunk-5KU2PP34.js} +2 -2
  8. package/dist/{chunk-6XOXM7MI.js.map → chunk-5KU2PP34.js.map} +1 -1
  9. package/dist/chunk-7JDCHUJV.js +89 -0
  10. package/dist/chunk-7JDCHUJV.js.map +1 -0
  11. package/dist/{chunk-B77KABWH.js → chunk-7T2ZCZQA.js} +17 -15
  12. package/dist/chunk-7T2ZCZQA.js.map +1 -0
  13. package/dist/chunk-E3J37GDA.js +74 -0
  14. package/dist/chunk-E3J37GDA.js.map +1 -0
  15. package/dist/chunk-JEFWMS5Z.js +217 -0
  16. package/dist/chunk-JEFWMS5Z.js.map +1 -0
  17. package/dist/chunk-JRVYSTMP.js +3256 -0
  18. package/dist/chunk-JRVYSTMP.js.map +1 -0
  19. package/dist/chunk-OLDHLOMT.js +69 -0
  20. package/dist/chunk-OLDHLOMT.js.map +1 -0
  21. package/dist/{chunk-424UFCY7.js → chunk-QTZNB6AK.js} +6 -2
  22. package/dist/chunk-QTZNB6AK.js.map +1 -0
  23. package/dist/chunk-RFF7HMP6.js +109 -0
  24. package/dist/chunk-RFF7HMP6.js.map +1 -0
  25. package/dist/{chunk-ZNLN2VWV.js → chunk-TD3TEFI3.js} +1 -1
  26. package/dist/chunk-TD3TEFI3.js.map +1 -0
  27. package/dist/cli.js +1040 -280
  28. package/dist/cli.js.map +1 -1
  29. package/dist/haiku-expander-WOVJIVXD.js +8 -0
  30. package/dist/haiku-pruner-DB77ZQLJ.js +8 -0
  31. package/dist/http-server-GIRELCCL.js +9 -0
  32. package/dist/index.d.ts +1774 -611
  33. package/dist/index.js +282 -70
  34. package/dist/index.js.map +1 -1
  35. package/dist/{local-embedding-ZIMTK6PU.js → local-embedding-2RNCC5EU.js} +2 -2
  36. package/dist/{openai-embedding-VQZCZQYT.js → openai-embedding-Z5I4K4CN.js} +2 -2
  37. package/dist/perplexity-context-embedding-V5YUMXDR.js +9 -0
  38. package/dist/{perplexity-embedding-227WQY4R.js → perplexity-embedding-X2S72OAC.js} +2 -2
  39. package/dist/plugin-FF4Q34TI.js +32 -0
  40. package/dist/{qwen3-reranker-3MHEENT5.js → qwen3-reranker-HVIQOLKS.js} +2 -2
  41. package/dist/{resolve-CUJWY6HP.js → resolve-Q5D6HECY.js} +2 -2
  42. package/package.json +25 -52
  43. package/src/brainbank.ts +620 -0
  44. package/src/cli/commands/collection.ts +77 -0
  45. package/src/cli/commands/context.ts +171 -0
  46. package/src/cli/commands/daemon.ts +100 -0
  47. package/src/cli/commands/docs.ts +71 -0
  48. package/src/cli/commands/files.ts +69 -0
  49. package/src/cli/commands/help.ts +72 -0
  50. package/src/cli/commands/index.ts +282 -0
  51. package/src/cli/commands/kv.ts +140 -0
  52. package/src/cli/commands/mcp.ts +13 -0
  53. package/src/cli/commands/reembed.ts +30 -0
  54. package/src/cli/commands/scan.ts +365 -0
  55. package/src/cli/commands/search.ts +130 -0
  56. package/src/cli/commands/stats.ts +44 -0
  57. package/src/cli/commands/status.ts +47 -0
  58. package/src/cli/commands/watch.ts +43 -0
  59. package/src/cli/factory/brain-context.ts +43 -0
  60. package/src/cli/factory/builtin-registration.ts +123 -0
  61. package/src/cli/factory/config-loader.ts +72 -0
  62. package/src/cli/factory/index.ts +65 -0
  63. package/src/cli/factory/plugin-loader.ts +146 -0
  64. package/src/cli/index.ts +63 -0
  65. package/src/cli/server-client.ts +135 -0
  66. package/src/cli/utils.ts +121 -0
  67. package/src/config.ts +50 -0
  68. package/src/constants.ts +13 -0
  69. package/src/db/adapter.ts +112 -0
  70. package/src/db/metadata.ts +130 -0
  71. package/src/db/migrations.ts +66 -0
  72. package/src/db/sqlite-adapter.ts +208 -0
  73. package/src/db/tracker.ts +91 -0
  74. package/src/engine/index-api.ts +85 -0
  75. package/src/engine/reembed.ts +206 -0
  76. package/src/engine/search-api.ts +222 -0
  77. package/src/index.ts +159 -0
  78. package/src/lib/fts.ts +57 -0
  79. package/src/lib/languages.ts +180 -0
  80. package/src/lib/logger.ts +125 -0
  81. package/src/lib/math.ts +87 -0
  82. package/src/lib/provider-key.ts +20 -0
  83. package/src/lib/prune.ts +71 -0
  84. package/src/lib/rerank.ts +33 -0
  85. package/src/lib/rrf.ts +133 -0
  86. package/src/lib/write-lock.ts +108 -0
  87. package/src/plugin.ts +323 -0
  88. package/src/providers/embeddings/embedding-worker-thread.ts +95 -0
  89. package/src/providers/embeddings/embedding-worker.ts +141 -0
  90. package/src/providers/embeddings/local-embedding.ts +115 -0
  91. package/src/providers/embeddings/openai-embedding.ts +167 -0
  92. package/src/providers/embeddings/perplexity-context-embedding.ts +195 -0
  93. package/src/providers/embeddings/perplexity-embedding.ts +165 -0
  94. package/src/providers/embeddings/resolve.ts +34 -0
  95. package/src/providers/pruners/haiku-expander.ts +152 -0
  96. package/src/providers/pruners/haiku-pruner.ts +112 -0
  97. package/src/providers/rerankers/qwen3-reranker.ts +180 -0
  98. package/src/providers/vector/hnsw-index.ts +174 -0
  99. package/src/providers/vector/hnsw-loader.ts +129 -0
  100. package/src/search/bm25-boost.ts +61 -0
  101. package/src/search/context-builder.ts +298 -0
  102. package/src/search/keyword/composite-bm25-search.ts +62 -0
  103. package/src/search/types.ts +35 -0
  104. package/src/search/vector/composite-vector-search.ts +76 -0
  105. package/src/search/vector/mmr.ts +64 -0
  106. package/src/services/collection.ts +405 -0
  107. package/src/services/daemon.ts +87 -0
  108. package/src/services/http-server.ts +288 -0
  109. package/src/services/kv-service.ts +65 -0
  110. package/src/services/plugin-registry.ts +109 -0
  111. package/src/services/watch.ts +348 -0
  112. package/src/services/webhook-server.ts +100 -0
  113. package/src/types.ts +504 -0
  114. package/dist/base-B_vJSAbj.d.ts +0 -593
  115. package/dist/chunk-424UFCY7.js.map +0 -1
  116. package/dist/chunk-7EZR47JV.js +0 -232
  117. package/dist/chunk-7EZR47JV.js.map +0 -1
  118. package/dist/chunk-B77KABWH.js.map +0 -1
  119. package/dist/chunk-C4KDZGRX.js +0 -625
  120. package/dist/chunk-C4KDZGRX.js.map +0 -1
  121. package/dist/chunk-CCXVL56V.js.map +0 -1
  122. package/dist/chunk-HPNUMUIF.js +0 -312
  123. package/dist/chunk-HPNUMUIF.js.map +0 -1
  124. package/dist/chunk-PXK62M5W.js +0 -753
  125. package/dist/chunk-PXK62M5W.js.map +0 -1
  126. package/dist/chunk-U2Q2XGPZ.js +0 -42
  127. package/dist/chunk-U2Q2XGPZ.js.map +0 -1
  128. package/dist/chunk-VVXYZIIB.js +0 -304
  129. package/dist/chunk-VVXYZIIB.js.map +0 -1
  130. package/dist/chunk-YC4ZQLDN.js +0 -2431
  131. package/dist/chunk-YC4ZQLDN.js.map +0 -1
  132. package/dist/chunk-YOLKSYWK.js +0 -79
  133. package/dist/chunk-YOLKSYWK.js.map +0 -1
  134. package/dist/chunk-ZNLN2VWV.js.map +0 -1
  135. package/dist/code.d.ts +0 -31
  136. package/dist/code.js +0 -9
  137. package/dist/docs.d.ts +0 -17
  138. package/dist/docs.js +0 -9
  139. package/dist/git.d.ts +0 -31
  140. package/dist/git.js +0 -9
  141. package/dist/memory.d.ts +0 -17
  142. package/dist/memory.js +0 -9
  143. package/dist/notes.d.ts +0 -17
  144. package/dist/notes.js +0 -10
  145. package/dist/perplexity-context-embedding-KSVSZXMD.js +0 -9
  146. package/dist/resolve-CUJWY6HP.js.map +0 -1
  147. /package/dist/{code.js.map → haiku-expander-WOVJIVXD.js.map} +0 -0
  148. /package/dist/{docs.js.map → haiku-pruner-DB77ZQLJ.js.map} +0 -0
  149. /package/dist/{git.js.map → http-server-GIRELCCL.js.map} +0 -0
  150. /package/dist/{local-embedding-ZIMTK6PU.js.map → local-embedding-2RNCC5EU.js.map} +0 -0
  151. /package/dist/{memory.js.map → openai-embedding-Z5I4K4CN.js.map} +0 -0
  152. /package/dist/{notes.js.map → perplexity-context-embedding-V5YUMXDR.js.map} +0 -0
  153. /package/dist/{openai-embedding-VQZCZQYT.js.map → perplexity-embedding-X2S72OAC.js.map} +0 -0
  154. /package/dist/{perplexity-context-embedding-KSVSZXMD.js.map → plugin-FF4Q34TI.js.map} +0 -0
  155. /package/dist/{perplexity-embedding-227WQY4R.js.map → qwen3-reranker-HVIQOLKS.js.map} +0 -0
  156. /package/dist/{qwen3-reranker-3MHEENT5.js.map → resolve-Q5D6HECY.js.map} +0 -0
package/README.md CHANGED
@@ -4,1394 +4,143 @@
4
4
 
5
5
  BrainBank gives LLMs a long-term memory that persists between sessions.
6
6
 
7
- - **All-in-one** — core + code + git + docs + CLI in a single `brainbank` package
8
- - **Pluggable plugins** — `.use()` only what you need (code, git, docs, or custom)
9
- - **Dynamic collections** — `brain.collection('errors')` for any structured data
7
+ - **Pluggable** — `.use()` only what you need: [code](#packages), [git](#packages), [docs](#packages), or [custom](docs/custom-plugins.md)
10
8
  - **Hybrid search** — vector + BM25 fused with Reciprocal Rank Fusion
11
- - **Pluggable embeddings** — local WASM (free), OpenAI, or Perplexity (standard & contextualized)
12
- - **Multi-repo** — index multiple repositories into one shared database
13
- - **Portable** — single `.brainbank/brainbank.db` file
14
- - **Optional packages** — [`@brainbank/memory`](#memory) (fact extraction + entity graph), [`@brainbank/mcp`](#mcp-server) (MCP server)
15
- - **Optional reranker** — Qwen3-0.6B cross-encoder via `Qwen3Reranker` (opt-in)
16
-
17
- ![BrainBank Architecture](assets/architecture.png)
9
+ - **Dynamic collections** — `brain.collection('errors')` for any structured data
10
+ - **Pluggable embeddings** — local WASM (free), OpenAI, or Perplexity
11
+ - **Multi-process safe** — concurrent CLI, MCP, and watch processes with automatic hot-reload
12
+ - **Portable** — single `.brainbank/brainbank.db` SQLite file
13
+ - **Modular** — lightweight core + optional [`@brainbank/*`](#packages) packages
18
14
 
19
15
  ---
20
16
 
21
- ## Why BrainBank?
22
-
23
- Built for a multi-repo codebase that needed unified AI context. Zero infrastructure, zero ongoing cost.
24
-
25
- Most AI memory solutions (mem0, Zep, LangMem) require cloud services, external databases, or LLM calls just to store a memory. BrainBank takes a different approach:
26
-
27
- | | **BrainBank** | **mem0** | **Zep** | **LangMem** |
28
- |---|:---:|:---:|:---:|:---:|
29
- | Infrastructure | **SQLite file** | Vector DB + cloud | Neo4j + cloud | LangGraph Platform |
30
- | LLM required to write | **No**¹ | Yes | Yes | Yes |
31
- | Code-aware | **19 AST-parsed languages (tree-sitter), git, co-edits** | ✗ | ✗ | ✗ |
32
- | Custom plugins | **`.use()` plugin system** | ✗ | ✗ | ✗ |
33
- | Search | **Vector + BM25 + RRF** | Vector + graph² | Vector + BM25 + graph | Vector only |
34
- | Framework lock-in | **None** | Optional | Zep cloud | LangChain |
35
- | Portable | **Copy one file** | Tied to DB | Tied to cloud | Tied to platform |
36
-
37
- > ¹ mem0 and Zep use LLMs to auto-extract memories from raw text. BrainBank is explicit — you decide what gets stored. Less magic, more control.
38
- >
39
- > ² mem0's graph store (mem0g) is available in the paid platform version.
40
-
41
- **In short:**
42
- - **Code-first** — the only memory layer that understands code structure, git history, and file co-edit relationships
43
- - **Framework-agnostic** — plain TypeScript, works with any agent framework (LangChain, Vercel AI SDK, custom) or none at all. Unopinionated — doesn't force you into a specific pattern
44
- - **$0 memory bill** — no LLM calls to extract/consolidate. You store what you want, BrainBank embeds deterministically
45
- - **Truly portable** — `.brainbank/brainbank.db` is a normal file. Copy it, back it up, `git lfs` it
46
-
47
- ### Table of Contents
48
-
49
- - [Why BrainBank?](#why-brainbank)
50
- - [Installation](#installation)
51
- - [Quick Start](#quick-start)
52
- - [CLI](#cli)
53
- - [Programmatic API](#programmatic-api)
54
- - [Plugins](#plugins)
55
- - [Collections](#collections)
56
- - [Search](#search)
57
- - [Document Collections](#document-collections)
58
- - [Context Generation](#context-generation)
59
- - [Custom Plugins](#custom-plugins)
60
- - [AI Agent Integration](#ai-agent-integration)
61
- - [Examples](#examples)
62
- - [Watch Mode](#watch-mode)
63
- - [MCP Server](#mcp-server)
64
- - [Configuration](#configuration)
65
- - [Embedding Providers](#embedding-providers)
66
- - [Reranker](#reranker)
67
- - [Memory](#memory)
68
- - [Multi-Repository Indexing](#multi-repository-indexing)
69
- - [Indexing](#indexing-1)
70
- - [Incremental Indexing](#incremental-indexing)
71
- - [Re-embedding](#re-embedding)
72
- - [Architecture](#architecture)
73
- - [Search Pipeline](#search-pipeline)
74
- - [Benchmarks](#benchmarks)
75
- - [Search Quality: AST vs Sliding Window](#search-quality-ast-vs-sliding-window)
76
- - [Grammar Support](#grammar-support)
77
- - [RAG Retrieval Quality](#rag-retrieval-quality) · [Full Results →](./BENCHMARKS.md)
17
+ <img src="assets/architecture.png" alt="BrainBank Architecture" width="600">
78
18
 
79
19
  ---
80
20
 
81
- ## Installation
82
-
83
- ```bash
84
- npm install brainbank
85
- ```
86
-
87
- ### Optional Packages
88
-
89
- | Package | When to install |
90
- |---------|----------------|
91
- | `@brainbank/memory` | Deterministic memory extraction + entity graph for LLM conversations |
92
- | `@brainbank/mcp` | MCP server for AI tool integration |
93
-
94
- ```bash
95
- # Memory — automatic fact extraction & dedup for chatbots/agents
96
- npm install @brainbank/memory
97
-
98
- # Reranker — built-in, install the runtime dependency to enable
99
- npm install node-llama-cpp
100
-
101
- # MCP server — for Antigravity, Claude Desktop, etc.
102
- npm install @brainbank/mcp
103
- ```
104
-
105
- ### Tree-Sitter Grammars
106
-
107
- BrainBank uses [tree-sitter](https://tree-sitter.github.io/) for AST-aware code chunking. **JavaScript and TypeScript grammars are included by default.** Other languages require installing the corresponding grammar package:
21
+ ## Quick Start
108
22
 
109
23
  ```bash
110
- # Install only the grammars you need
111
- npm install tree-sitter-python tree-sitter-go tree-sitter-rust
24
+ npm i -g brainbank @brainbank/code @brainbank/git @brainbank/docs
112
25
  ```
113
26
 
114
- If you index a file whose grammar isn't installed, BrainBank will throw a clear error:
115
-
116
- ```
117
- BrainBank: Grammar 'tree-sitter-python' is not installed. Run: npm install tree-sitter-python
118
- ```
119
-
120
- <details>
121
- <summary>All available grammars (19 languages)</summary>
122
-
123
- | Category | Packages |
124
- |----------|----------|
125
- | **Included** | `tree-sitter-javascript`, `tree-sitter-typescript` |
126
- | Web | `tree-sitter-html`, `tree-sitter-css` |
127
- | Systems | `tree-sitter-go`, `tree-sitter-rust`, `tree-sitter-c`, `tree-sitter-cpp`, `tree-sitter-swift` |
128
- | JVM | `tree-sitter-java`, `tree-sitter-kotlin`, `tree-sitter-scala` |
129
- | Scripting | `tree-sitter-python`, `tree-sitter-ruby`, `tree-sitter-php`, `tree-sitter-lua`, `tree-sitter-bash`, `tree-sitter-elixir` |
130
- | .NET | `tree-sitter-c-sharp` |
27
+ > If you get `ERESOLVE` errors, use `npm i --legacy-peer-deps` tree-sitter grammars have overlapping peer dep ranges.
131
28
 
132
- </details>
133
-
134
- ---
135
-
136
- ## Quick Start
137
-
138
- Get semantic search over your codebase in under a minute:
139
-
140
- ```typescript
141
- import { BrainBank } from 'brainbank';
142
- import { code } from 'brainbank/code';
143
- import { git } from 'brainbank/git';
144
-
145
- const brain = new BrainBank({ repoPath: '.' })
146
- .use(code())
147
- .use(git());
148
-
149
- await brain.index(); // indexes code + git history (incremental)
150
-
151
- // Search across everything
152
- const results = await brain.hybridSearch('authentication middleware');
153
- console.log(results.map(r => `${r.filePath}:L${r.metadata?.startLine} (${r.score.toFixed(2)})`));
154
-
155
- // Store agent memory
156
- const log = brain.collection('decisions');
157
- await log.add(
158
- 'Switched from bcrypt to argon2id for password hashing. ' +
159
- 'Argon2id is memory-hard and recommended by OWASP for new projects. ' +
160
- 'Updated src/auth/hash.ts and all tests.',
161
- { tags: ['security', 'auth'] }
162
- );
163
-
164
- // Recall later: "what did we decide about password hashing?"
165
- const hits = await log.search('password hashing decision');
166
-
167
- brain.close();
168
- ```
169
-
170
- Or use the CLI — zero code:
29
+ ### CLI — zero code
171
30
 
172
31
  ```bash
173
- npm install -g brainbank
174
- brainbank index . # index code + git
32
+ brainbank index . # scans repo → interactive select → index
33
+ brainbank index . --yes # skip prompts, auto-select all
175
34
  brainbank hsearch "rate limiting" # hybrid search
176
35
  brainbank kv add decisions "Use Redis..." # store a memory
177
36
  brainbank kv search decisions "caching" # recall it
178
37
  ```
179
38
 
180
- ## CLI
181
-
182
- BrainBank can be used entirely from the command line — no config file needed.
183
-
184
- ### Indexing
185
-
186
- `index` processes **code files + git history** by default. Use `--only` to select specific modules, and `--docs` to include document collections.
187
-
188
- ```bash
189
- brainbank index [path] # Index code + git history
190
- brainbank index [path] --force # Force re-index everything
191
- brainbank index [path] --depth 200 # Limit git commit depth
192
- brainbank index [path] --only code # Index only code (skip git)
193
- brainbank index [path] --only git # Index only git history
194
- brainbank index [path] --docs ~/docs # Include a docs folder
195
- brainbank docs [--collection <name>] # Index document collections
196
- ```
197
-
198
- > **Multi-repo:** If `[path]` contains multiple Git subdirectories (no root `.git/`), BrainBank auto-detects them and indexes all into one shared DB. See [Multi-Repository Indexing](#multi-repository-indexing).
199
-
200
- ### Watch Mode
201
-
202
- Auto-re-index code files when they change. Watches for file changes and re-indexes incrementally:
203
-
204
- ```bash
205
- brainbank watch # Watch repo, auto re-index on save
206
- # ━━━ BrainBank Watch ━━━
207
- # Watching /path/to/repo for changes...
208
- # 14:30:02 ✓ code: src/api.ts
209
- # 14:30:05 ✓ code: src/routes.ts
210
- # 14:30:08 ✓ csv: data/metrics.csv ← custom plugin
211
- ```
212
-
213
- > Watch mode monitors **code files** by default. [Custom plugins](#custom-plugins) that implement `watchPatterns()` and `onFileChange()` are automatically picked up — their name appears in the console output alongside the built-in `code` plugin. Git history and document collections are not affected by file-system changes and must be re-indexed explicitly with `brainbank index` / `brainbank docs`.
214
-
215
- ### Document Collections
216
-
217
- ```bash
218
- brainbank collection add <path> --name docs # Register a document folder
219
- brainbank collection list # List registered collections
220
- brainbank collection remove <name> # Remove a collection
221
- ```
222
-
223
- ### Search
224
-
225
- ```bash
226
- brainbank search <query> # Semantic search (vector)
227
- brainbank hsearch <query> # Hybrid search (best quality)
228
- brainbank ksearch <query> # Keyword search (BM25, instant)
229
- brainbank dsearch <query> # Document search
230
- ```
231
-
232
- ### Context
233
-
234
- ```bash
235
- brainbank context <task> # Get formatted context for a task
236
- brainbank context add <col> <path> <desc> # Add context metadata
237
- brainbank context list # List context metadata
238
- ```
239
-
240
- ### KV Store (dynamic collections)
241
-
242
- ```bash
243
- brainbank kv add <coll> <content> # Add item to a collection
244
- brainbank kv search <coll> <query> # Search a collection
245
- brainbank kv list [coll] # List collections or items
246
- brainbank kv trim <coll> --keep <n> # Keep only N most recent
247
- brainbank kv clear <coll> # Clear all items
248
- ```
249
-
250
- ### Utility
251
-
252
- ```bash
253
- brainbank stats # Show index statistics
254
- brainbank reembed # Re-embed all vectors (provider switch)
255
- brainbank watch # Watch files, auto re-index on change
256
- brainbank serve # Start MCP server (stdio)
257
- ```
258
-
259
- **Global options:** `--repo <path>`, `--force`, `--depth <n>`, `--collection <name>`, `--pattern <glob>`, `--context <desc>`, `--reranker <name>`
260
-
261
- ---
262
-
263
- ## Programmatic API
264
-
265
- Use BrainBank as a library in your TypeScript/Node.js project.
266
-
267
- ### Plugins
268
-
269
- BrainBank uses pluggable plugins. Register only what you need with `.use()`:
270
-
271
- | Plugin | Import | Description |
272
- |---------|--------|-------------|
273
- | `code` | `brainbank/code` | AST-aware code chunking via tree-sitter (19 languages) |
274
- | `git` | `brainbank/git` | Git commit history, diffs, co-edit relationships |
275
- | `docs` | `brainbank/docs` | Document collections (markdown, wikis) |
39
+ ### Programmatic API
276
40
 
277
41
  ```typescript
278
42
  import { BrainBank } from 'brainbank';
279
- import { code } from 'brainbank/code';
280
- import { git } from 'brainbank/git';
281
- import { docs } from 'brainbank/docs';
43
+ import { code } from '@brainbank/code';
44
+ import { git } from '@brainbank/git';
282
45
 
283
- // Pick only the plugins you need
284
46
  const brain = new BrainBank({ repoPath: '.' })
285
47
  .use(code())
286
- .use(git())
287
- .use(docs());
48
+ .use(git());
288
49
 
289
- // Index code + git (incremental — only processes changes)
290
50
  await brain.index();
291
51
 
292
- // Index document collections
293
- await brain.addCollection({ name: 'wiki', path: '~/docs', pattern: '**/*.md' });
294
- await brain.indexDocs();
295
- ```
296
-
297
- ### Collections
298
-
299
- Dynamic key-value collections with semantic search — the building block for agent memory:
300
-
301
- ```typescript
302
- const decisions = brain.collection('decisions');
303
-
304
- // Store rich content (auto-embedded for vector search)
305
- await decisions.add(
306
- 'Use SQLite with WAL mode instead of PostgreSQL. Portable single-file ' +
307
- 'storage, works offline, zero infrastructure.',
308
- { tags: ['architecture'], metadata: { files: ['src/db.ts'] } }
309
- );
310
-
311
- // Semantic search — finds by meaning, not keywords
312
- const hits = await decisions.search('why not postgres');
313
- // → [{ content: 'Use SQLite with WAL...', score: 0.95, tags: [...], metadata: {...} }]
314
-
315
- // Management
316
- decisions.list({ limit: 20 }); // newest first
317
- decisions.list({ tags: ['architecture'] }); // filter by tags
318
- decisions.count(); // total items
319
- decisions.trim({ keep: 50 }); // keep N most recent
320
- decisions.prune({ olderThan: '30d' }); // remove older than 30 days
321
- brain.listCollectionNames(); // → ['decisions', ...]
322
- ```
323
-
324
- > 📂 See [examples/collection](examples/collection/) for a complete runnable demo with cross-collection linking and metadata.
325
-
326
- ### Watch Mode
327
-
328
- Auto-re-index when files change:
329
-
330
- ```typescript
331
- // API
332
- const watcher = brain.watch({
333
- debounceMs: 2000,
334
- onIndex: (file, plugin) => console.log(`${plugin}: ${file}`),
335
- onError: (err) => console.error(err.message),
336
- });
337
-
338
- // Later: watcher.close();
339
- ```
340
-
341
- ```bash
342
- # CLI
343
- brainbank watch
344
- # ━━━ BrainBank Watch ━━━
345
- # Watching /path/to/repo for changes...
346
- # 14:30:02 ✓ code: src/api.ts
347
- # 14:30:05 ✓ code: src/routes.ts
348
- ```
349
-
350
- #### Custom Plugin Watch
351
-
352
- Custom plugins can hook into watch mode by implementing `onFileChange` and `watchPatterns`:
353
-
354
- ```typescript
355
- import type { Plugin, PluginContext } from 'brainbank';
356
-
357
- function csvPlugin(): Plugin {
358
- let ctx: PluginContext;
359
-
360
- return {
361
- name: 'csv',
362
-
363
- async initialize(context) {
364
- ctx = context;
365
- },
366
-
367
- // Tell watch which files this plugin cares about
368
- watchPatterns() {
369
- return ['**/*.csv', '**/*.tsv'];
370
- },
371
-
372
- // Called when a watched file changes
373
- async onFileChange(filePath, event) {
374
- if (event === 'delete') return true;
375
-
376
- const data = fs.readFileSync(filePath, 'utf-8');
377
- const col = ctx.collection('csv_data');
378
- await col.add(data, {
379
- tags: ['csv'],
380
- metadata: { file: filePath },
381
- });
382
- return true; // handled
383
- },
384
- };
385
- }
386
-
387
- const brain = new BrainBank({ dbPath: './brain.db' })
388
- .use(code())
389
- .use(csvPlugin());
390
-
391
- await brain.initialize();
392
- brain.watch(); // Now watches .ts, .py, etc. AND .csv, .tsv
393
- ```
394
-
395
- ### Search
396
-
397
- Three modes, from fastest to best quality:
398
-
399
- | Mode | Method | Speed | Quality |
400
- |------|--------|-------|---------|
401
- | Keyword | `searchBM25(q)` | ⚡ instant | Good for exact terms |
402
- | Vector | `search(q)` | ~50ms | Good for concepts |
403
- | **Hybrid** | `hybridSearch(q)` | ~100ms | **Best — catches both** |
404
-
405
- ```typescript
406
- // Hybrid search (recommended default)
407
52
  const results = await brain.hybridSearch('authentication middleware');
408
53
 
409
- // Scoped search
410
- const codeHits = await brain.searchCode('parse JSON config', 8);
411
- const commitHits = await brain.searchCommits('fix auth bug', 5);
412
- const docHits = await brain.searchDocs('getting started', { collection: 'wiki' });
413
- ```
414
-
415
- | Score | Meaning |
416
- |-------|---------|
417
- | 0.8+ | Near-exact match |
418
- | 0.5–0.8 | Strongly related |
419
- | 0.3–0.5 | Somewhat related |
420
- | < 0.3 | Weak match |
421
-
422
- ### Document Collections
423
-
424
- Register folders of documents. Files are chunked by heading structure:
425
-
426
- ```typescript
427
- await brain.addCollection({
428
- name: 'docs',
429
- path: '~/project/docs',
430
- pattern: '**/*.md',
431
- ignore: ['**/drafts/**'],
432
- context: 'Project documentation',
433
- });
434
-
435
- await brain.indexDocs();
436
-
437
- // Add context metadata (helps LLM understand what documents are about)
438
- brain.addContext('docs', '/api', 'REST API reference');
439
- brain.addContext('docs', '/guides', 'Step-by-step tutorials');
440
- ```
441
-
442
- ### Context Generation
443
-
444
- Get formatted markdown ready for system prompt injection:
445
-
446
- ```typescript
447
- const context = await brain.getContext('add rate limiting to the API', {
448
- codeResults: 6,
449
- gitResults: 5,
450
- affectedFiles: ['src/api/routes.ts'],
451
- useMMR: true,
452
- });
453
- // Returns: ## Relevant Code, ## Git History, ## Relevant Documents
454
- ```
455
-
456
- ### Custom Plugins
457
-
458
- Implement the `Plugin` interface to build your own:
459
-
460
- ```typescript
461
- import type { Plugin, PluginContext } from 'brainbank';
462
-
463
- const myPlugin: Plugin = {
464
- name: 'custom',
465
- async initialize(ctx: PluginContext) {
466
- // ctx.db — shared SQLite database
467
- // ctx.embedding — shared embedding provider
468
- // ctx.collection() — create dynamic collections
469
- const store = ctx.collection('my_data');
470
- await store.add('indexed content', { source: 'custom' });
471
- },
472
- };
473
-
474
- brain.use(myPlugin);
475
- ```
476
-
477
- #### Using custom plugins with the CLI
478
-
479
- Drop `.ts` files into `.brainbank/indexers/` — the CLI auto-discovers them:
480
-
481
- ```
482
- .brainbank/
483
- ├── brainbank.db
484
- └── indexers/
485
- ├── slack.ts
486
- └── jira.ts
487
- ```
488
-
489
- Each file exports a default `Plugin`:
490
-
491
- ```typescript
492
- // .brainbank/indexers/slack.ts
493
- import type { Plugin } from 'brainbank';
494
-
495
- export default {
496
- name: 'slack',
497
- async initialize(ctx) {
498
- const msgs = ctx.collection('slack_messages');
499
- // ... fetch and index slack messages
500
- },
501
- } satisfies Plugin;
502
- ```
503
-
504
- That's it — all CLI commands automatically pick up your plugins:
505
-
506
- ```bash
507
- brainbank index # runs code + git + docs + slack + jira
508
- brainbank stats # shows all plugins
509
- brainbank kv search slack_messages "deploy" # search slack data
510
- ```
511
-
512
- #### Advanced: config file
513
-
514
- For fine-grained control, create a `.brainbank/config.ts`:
515
-
516
- ```typescript
517
- // .brainbank/config.ts
518
- export default {
519
- builtins: ['code', 'docs'], // exclude git (default: all three)
520
- brainbank: { // BrainBank constructor options
521
- dbPath: '.brainbank/brain.db',
522
- },
523
- };
524
- ```
525
-
526
- Everything lives in `.brainbank/` — DB, config, and custom plugins:
527
-
528
- ```
529
- .brainbank/
530
- ├── brainbank.db # SQLite database (auto-created)
531
- ├── config.ts # Optional project config
532
- └── indexers/ # Optional custom plugin files
533
- ├── slack.ts
534
- └── jira.ts
535
- ```
536
-
537
- No folder and no config file? The CLI uses the built-in plugins (`code`, `git`, `docs`).
538
-
539
- ---
540
-
541
- ### AI Agent Integration
542
-
543
- Teach your AI coding agent to use BrainBank as persistent memory. Add an `AGENTS.md` (or `.cursor/rules`) to your project root — works with **Antigravity**, **Claude Code**, **Cursor**, and anything that reads project-level instructions.
544
-
545
- <details>
546
- <summary><strong>Option A: CLI commands</strong> (zero setup)</summary>
547
-
548
- > **Memory — BrainBank**
549
- >
550
- > **Store** a conversation summary after each task:
551
- > `brainbank kv add conversations "Refactored auth to AuthService with DI. JWT + refresh tokens + RBAC."`
552
- >
553
- > **Record** architecture decisions:
554
- > `brainbank kv add decisions "ADR: Fastify over Express. 2x throughput, schema validation, native TS."`
555
- >
556
- > **Search** before starting work:
557
- > `brainbank hsearch "auth middleware"` · `brainbank kv search decisions "auth"`
558
-
559
- </details>
560
-
561
- <details>
562
- <summary><strong>Option B: MCP tools</strong> (richer integration)</summary>
563
-
564
- > **Memory — BrainBank (MCP)**
565
- >
566
- > Use the BrainBank MCP tools for persistent agent memory:
567
- >
568
- > **Store** via `brainbank_kv_add`:
569
- > `{ collection: "conversations", content: "Refactored auth to AuthService with DI.", tags: ["auth"] }`
570
- >
571
- > **Search** via `brainbank_kv_search`:
572
- > `{ collection: "decisions", query: "authentication approach" }`
573
- >
574
- > **Code search** via `brainbank_hybrid_search`:
575
- > `{ query: "auth middleware", repo: "." }`
576
-
577
- </details>
578
-
579
- #### Setup
580
-
581
- | Agent | How to connect |
582
- |-------|---------------|
583
- | **Antigravity** | Add `AGENTS.md` to project root |
584
- | **Claude Code** | Add `AGENTS.md` to project root |
585
- | **Cursor** | Add rules in `.cursor/rules` |
586
- | **MCP** (any agent) | See [MCP Server](#mcp-server) config below |
587
-
588
- #### Custom Plugin: Auto-Ingest Conversation Logs
589
-
590
- For agents that produce structured logs (e.g. Antigravity's `brain/` directory), auto-index them:
591
-
592
- ```typescript
593
- // .brainbank/indexers/conversations.ts
594
- import type { Plugin, PluginContext } from 'brainbank';
595
- import * as fs from 'node:fs';
596
- import * as path from 'node:path';
597
-
598
- export default {
599
- name: 'conversations',
600
- async initialize(ctx: PluginContext) {
601
- const conversations = ctx.collection('conversations');
602
- const logsDir = path.join(ctx.config.repoPath, '.gemini/antigravity/brain');
603
- if (!fs.existsSync(logsDir)) return;
604
-
605
- for (const dir of fs.readdirSync(logsDir)) {
606
- const file = path.join(logsDir, dir, '.system_generated/logs/overview.txt');
607
- if (!fs.existsSync(file)) continue;
608
- const content = fs.readFileSync(file, 'utf-8');
609
- if (content.length < 100) continue;
610
- await conversations.add(content, {
611
- tags: ['auto'],
612
- metadata: { session: dir, source: 'antigravity' },
613
- });
614
- }
615
- },
616
- } satisfies Plugin;
617
- ```
618
-
619
- ```bash
620
- brainbank index # now auto-indexes conversation logs alongside code + git
621
- brainbank kv search conversations "what did we decide about auth"
622
- ```
623
-
624
- ### Examples
625
-
626
- | Example | Description | Run |
627
- |---------|-------------|-----|
628
- | [rag](examples/rag/) | RAG chatbot — docs retrieval + generation | `OPENAI_API_KEY=sk-... PERPLEXITY_API_KEY=pplx-... npx tsx examples/rag/rag.ts --docs <path>` |
629
- | [memory](examples/memory/) | Memory chatbot — fact extraction + entity graph | `OPENAI_API_KEY=sk-... npx tsx examples/memory/memory.ts` |
630
- | [collection](examples/collection/) | Collections, semantic search, tags, metadata linking | `npx tsx examples/collection/collection.ts` |
631
-
632
- ---
633
-
634
- ## MCP Server
635
-
636
- BrainBank ships with an MCP server (stdio) for AI tool integration.
637
-
638
- ```bash
639
- brainbank serve
640
- ```
641
-
642
- ### Antigravity / Claude Desktop
643
-
644
- Add to your MCP config (`~/.gemini/antigravity/mcp_config.json` or Claude Desktop settings):
645
-
646
- ```json
647
- {
648
- "mcpServers": {
649
- "brainbank": {
650
- "command": "npx",
651
- "args": ["-y", "@brainbank/mcp"]
652
- }
653
- }
654
- }
655
- ```
656
-
657
- **Zero-config.** The MCP server auto-detects:
658
- - **Repo path** — from `repo` tool param > `BRAINBANK_REPO` env > `findRepoRoot(cwd)`
659
- - **Embedding provider** — from `provider_key` stored in the DB (set during `brainbank index --embedding openai`)
660
-
661
- > [!TIP]
662
- > Index your repo once with the CLI to set up the embedding provider:
663
- > ```bash
664
- > brainbank index . --embedding openai # stores provider_key=openai in DB
665
- > ```
666
- > After that, the MCP server (and any future CLI runs) auto-resolve the correct provider from the DB — no env vars needed.
667
-
668
- > [!NOTE]
669
- > If you switch embedding providers (e.g. local → OpenAI), run `brainbank reembed` to regenerate all vectors. BrainBank auto-detects dimension mismatches and warns you.
670
-
671
- ### Available Tools
672
-
673
- | Tool | Description |
674
- |------|-------------|
675
- | `brainbank_search` | Unified search — `mode: hybrid` (default), `vector`, or `keyword` |
676
- | `brainbank_context` | Formatted context block for a task (code + git + co-edits) |
677
- | `brainbank_index` | Trigger incremental code/git/docs indexing |
678
- | `brainbank_stats` | Index statistics (files, commits, chunks, collections) |
679
- | `brainbank_history` | Git history for a specific file |
680
- | `brainbank_collection` | KV collection ops — `action: add`, `search`, or `trim` |
681
-
682
- ---
683
-
684
- ## Configuration
685
-
686
- ```typescript
687
- import { BrainBank, OpenAIEmbedding } from 'brainbank';
688
- import { Qwen3Reranker } from 'brainbank'; // built-in, requires node-llama-cpp
689
-
690
- const brain = new BrainBank({
691
- repoPath: '.',
692
- dbPath: '.brainbank/brainbank.db',
693
- gitDepth: 500,
694
- maxFileSize: 512_000,
695
- embeddingDims: 1536,
696
- maxElements: 2_000_000,
697
- embeddingProvider: new OpenAIEmbedding(), // or: omit for free local WASM (384d)
698
- reranker: new Qwen3Reranker(), // local cross-encoder (auto-downloads ~640MB)
699
- });
700
- ```
701
-
702
- ### Embedding Providers
703
-
704
- | Provider | Import | Dims | Speed | Cost |
705
- |----------|--------|------|-------|------|
706
- | **Local (default)** | built-in | 384 | ⚡ 0ms | Free |
707
- | **OpenAI** | `OpenAIEmbedding` | 1536 | ~100ms | $0.02/1M tokens |
708
- | **Perplexity** | `PerplexityEmbedding` | 2560 (4b) / 1024 (0.6b) | ~100ms | $0.02/1M tokens |
709
- | **Perplexity Context** | `PerplexityContextEmbedding` | 2560 (4b) / 1024 (0.6b) | ~100ms | $0.06/1M tokens |
710
-
711
- #### OpenAI
712
-
713
- ```typescript
714
- import { OpenAIEmbedding } from 'brainbank';
715
-
716
- new OpenAIEmbedding(); // uses OPENAI_API_KEY env var
717
- new OpenAIEmbedding({
718
- model: 'text-embedding-3-large',
719
- dims: 512, // Matryoshka reduction
720
- apiKey: 'sk-...',
721
- baseUrl: 'https://my-proxy.com/v1/embeddings',
722
- });
723
- ```
724
-
725
- #### Perplexity (Standard)
726
-
727
- Best for independent texts, queries, and code chunks.
728
-
729
- ```typescript
730
- import { PerplexityEmbedding } from 'brainbank';
731
-
732
- new PerplexityEmbedding(); // uses PERPLEXITY_API_KEY env var
733
- new PerplexityEmbedding({
734
- model: 'pplx-embed-v1-0.6b', // smaller, faster (1024d)
735
- dims: 512, // Matryoshka reduction
736
- });
737
- ```
738
-
739
- #### Perplexity (Contextualized)
740
-
741
- Chunks share document context → better retrieval for related code/docs.
742
-
743
- ```typescript
744
- import { PerplexityContextEmbedding } from 'brainbank';
745
-
746
- new PerplexityContextEmbedding(); // uses PERPLEXITY_API_KEY env var
747
- new PerplexityContextEmbedding({
748
- model: 'pplx-embed-context-v1-0.6b', // smaller, faster (1024d)
749
- dims: 512, // Matryoshka reduction
750
- });
751
- ```
752
-
753
- #### Benchmarks
754
-
755
- Real benchmarks on a production NestJS backend (1052 code chunks + git history):
756
-
757
- | Provider | Dims | Index Time | Avg Search | Cost |
758
- |----------|------|------------|------------|------|
759
- | **Local WASM** | 384 | 87s | **8ms** | Free |
760
- | **OpenAI** | 1536 | 106s | 202ms | $0.02/1M tok |
761
- | **Perplexity** | 2560 | **66s** ⚡ | 168ms | $0.02/1M tok |
762
- | **Perplexity Context** | 2560 | 78s | 135ms | $0.06/1M tok |
763
-
764
- - **Fastest indexing:** Perplexity standard — 38% faster than OpenAI
765
- - **Fastest search (API):** Perplexity Context — 33% faster than OpenAI
766
- - **Fastest search (total):** Local WASM — no network latency
767
- - **Best context awareness:** Perplexity Context — finds semantically related chunks others miss
768
-
769
- > [!WARNING]
770
- > Switching embedding provider (e.g. local → OpenAI) changes the vector dimensions. BrainBank will **refuse to initialize** if the stored dimensions don't match the current provider. Use `initialize({ force: true })` and then `reembed()` to migrate, or switch back to the original provider.
771
-
772
- ### Reranker
773
-
774
- BrainBank ships with an optional cross-encoder reranker using **Qwen3-Reranker-0.6B** via `node-llama-cpp`. It runs 100% locally — no API keys needed. The reranker is **disabled by default**.
775
-
776
- ```bash
777
- # Only requirement — the LLM runtime (model auto-downloads on first use)
778
- npm install node-llama-cpp
779
- ```
780
-
781
- #### When to Use It
782
-
783
- The reranker runs local neural inference on every search result, which improves ranking precision but adds significant latency. Here are real benchmarks on a ~2100 file / 4000+ chunk codebase:
784
-
785
- | Metric | Without Reranker | With Reranker |
786
- |--------|-----------------|---------------|
787
- | **Warm query time** | ~480ms | ~5500ms |
788
- | **Cold start** | ~7s | ~12s |
789
- | **Memory overhead** | — | +640MB (model) |
790
- | **Ranking quality** | Good (RRF) | Slightly better |
791
-
792
- **Recommended:** Leave it disabled for interactive use (MCP, IDE integrations). The RRF fusion of vector + BM25 already produces high-quality results. Enable it only for:
793
-
794
- - Batch processing where latency doesn't matter
795
- - Very large codebases (50k+ files) where false positives are costly
796
- - Server environments with RAM to spare
797
-
798
- #### Enabling the Reranker
799
-
800
- ```typescript
801
- import { BrainBank } from 'brainbank';
802
- import { Qwen3Reranker } from 'brainbank';
803
-
804
- const brain = new BrainBank({
805
- reranker: new Qwen3Reranker(), // ~640MB model, auto-downloaded on first use
806
- });
807
- ```
808
-
809
- Or from the CLI:
810
-
811
- ```bash
812
- brainbank hsearch "auth middleware" --reranker qwen3
813
- ```
814
-
815
- Or via environment variable:
816
-
817
- ```bash
818
- BRAINBANK_RERANKER=qwen3 brainbank serve
819
- ```
820
-
821
- The model is cached at `~/.cache/brainbank/models/` after first download.
822
-
823
- #### Position-Aware Score Blending
824
-
825
- When enabled, the reranker uses position-aware blending — trusting retrieval scores more for top results and the reranker more for lower-ranked results:
826
-
827
- | Position | Retrieval (RRF) | Reranker | Rationale |
828
- |----------|----------------|----------|----------|
829
- | 1–3 | **75%** | 25% | Preserves exact keyword matches |
830
- | 4–10 | **60%** | 40% | Balanced blend |
831
- | 11+ | 40% | **60%** | Trust reranker for uncertain results |
832
-
833
- #### Custom Reranker
834
-
835
- Implement the `Reranker` interface to use your own:
836
-
837
- ```typescript
838
- import type { Reranker } from 'brainbank';
839
-
840
- const myReranker: Reranker = {
841
- async rank(query: string, documents: string[]): Promise<number[]> {
842
- // Return relevance scores 0.0-1.0 for each document
843
- },
844
- async close() { /* optional cleanup */ },
845
- };
846
- ```
847
-
848
- Without a reranker, BrainBank uses pure RRF fusion — which is already production-quality for most use cases.
849
-
850
- ### Notes
851
-
852
- The notes plugin gives your agent **persistent conversation memory** — store structured digests of past sessions and recall them via hybrid search.
853
-
854
- ```typescript
855
- import { BrainBank } from 'brainbank';
856
- import { notes } from 'brainbank/notes';
857
-
858
- const brain = new BrainBank({ repoPath: '.' });
859
- brain.use(notes());
860
- await brain.initialize();
861
-
862
- const notesPlugin = brain.plugin('notes');
863
-
864
- // Store a conversation digest
865
- await notesPlugin.remember({
866
- title: 'Refactored auth module',
867
- summary: 'Extracted JWT validation into middleware, added refresh token rotation',
868
- decisions: ['Use RS256 over HS256', 'Refresh tokens stored in httpOnly cookie'],
869
- filesChanged: ['src/auth/jwt.ts', 'src/middleware/auth.ts'],
870
- patterns: ['Always validate token expiry before DB lookup'],
871
- openQuestions: ['Should we add rate limiting to the refresh endpoint?'],
872
- tags: ['auth', 'security'],
873
- });
874
-
875
- // Recall relevant notes
876
- const relevant = await notesPlugin.recall('JWT token validation', { k: 3 });
877
-
878
- // List recent notes
879
- const recent = notesPlugin.list(10);
880
- const longTermOnly = notesPlugin.list(10, 'long');
881
-
882
- // Consolidate: promote old short-term notes to long-term (keeps last 20 as short)
883
- const { promoted } = notesPlugin.consolidate(20);
884
- ```
885
-
886
- **Memory tiers:**
887
- - **`short`** (default) — Full digest with all fields, kept for recent sessions
888
- - **`long`** — Compressed: only title, summary, decisions, and patterns preserved. Files and open questions dropped
889
-
890
- Consolidation automatically promotes notes beyond the keep window from `short` → `long`, reducing storage while preserving key learnings.
891
-
892
- ### Agent Memory (Patterns)
893
-
894
- The memory plugin enables **learning from experience** — your agent records what worked (and what didn't) across tasks, then distills patterns into reusable strategies.
895
-
896
- ```typescript
897
- import { BrainBank } from 'brainbank';
898
- import { memory } from 'brainbank/memory';
899
-
900
- const brain = new BrainBank({ repoPath: '.' });
901
- brain.use(memory());
902
- await brain.initialize();
903
-
904
- const mem = brain.plugin('memory');
905
-
906
- // Record a learning pattern
907
- await mem.learn({
908
- taskType: 'refactor',
909
- task: 'Extract auth logic into middleware',
910
- approach: 'Created Express middleware, moved JWT validation from routes',
911
- outcome: 'Reduced route handler size by 60%, improved testability',
912
- successRate: 0.95,
913
- critique: 'Should have added integration tests before refactoring',
914
- });
915
-
916
- // Search for similar patterns before starting a new task
917
- const patterns = await mem.search('refactor database queries');
918
-
919
- // Consolidate: prune old failures + merge duplicates
920
- const { pruned, deduped } = mem.consolidate();
54
+ const log = brain.collection('decisions');
55
+ await log.add('Switched to argon2id for password hashing', { tags: ['security'] });
921
56
 
922
- // Distill top patterns into a strategy
923
- const strategy = mem.distill('refactor');
924
- // → "Strategy for 'refactor' (5 patterns, avg success 88%):
925
- // • Created middleware, moved validation from routes (95%)
926
- // └ Should have added integration tests before refactoring"
57
+ brain.close();
927
58
  ```
928
59
 
929
- **How it works:**
930
- 1. **Learn** — Records task, approach, outcome, and success rate. Embeds for semantic search
931
- 2. **Search** — Finds similar successful patterns (filters by `successRate ≥ 0.5`)
932
- 3. **Consolidate** — Auto-runs every 50 patterns: prunes failures older than 90 days, deduplicates (cosine > 0.95)
933
- 4. **Distill** — Aggregates top patterns per task type into a single strategy text with confidence score
934
-
935
60
  ---
936
61
 
937
- ## Memory
938
-
939
- `@brainbank/memory` adds **deterministic memory extraction** to any LLM conversation. After every turn, it automatically extracts facts, deduplicates against existing memories, and decides `ADD` / `UPDATE` / `NONE` — no function calling needed.
940
-
941
- Optionally extracts **entities and relationships** (knowledge graph) from the same LLM call — no extra cost. Includes **LLM-powered entity resolution** to merge aliases (e.g. "TS" → "TypeScript").
942
-
943
- Inspired by [mem0](https://github.com/mem0ai/mem0)'s pipeline, but framework-agnostic and built on BrainBank collections.
944
-
945
- ```bash
946
- npm install @brainbank/memory
947
- ```
948
-
949
- ```typescript
950
- import { BrainBank } from 'brainbank';
951
- import { Memory, EntityStore, OpenAIProvider } from '@brainbank/memory';
952
-
953
- const brain = new BrainBank({ dbPath: './memory.db' });
954
- await brain.initialize();
62
+ ## Packages
955
63
 
956
- const llm = new OpenAIProvider({ model: 'gpt-4.1-nano' });
64
+ `brainbank` is the core framework strictly plugin-agnostic. Plugins are separate `@brainbank/*` packages that own their database schema, search strategies, and context formatting. Install only what you need:
957
65
 
958
- // Opt-in entity extraction (knowledge graph)
959
- const entityStore = new EntityStore(brain, {
960
- onEntity: (op) => console.log(`${op.action}: ${op.name}`),
961
- });
66
+ ### Indexer Plugins
962
67
 
963
- const memory = new Memory(brain, {
964
- llm, // auto-shared with EntityStore
965
- entityStore, // optional — omit for facts-only mode
966
- onOperation: (op) => console.log(`${op.action}: ${op.fact}`),
967
- });
968
-
969
- // After every conversation turn
970
- const result = await memory.process(userMessage, assistantResponse);
971
- // result.operations → [{ fact, action: "ADD", reason }]
972
- // result.entities → { entitiesProcessed: 2, relationshipsProcessed: 1 }
973
-
974
- // System prompt with memories + entities
975
- const context = memory.buildContext();
976
- // → "## Memories\n- User's name is Berna\n\n## Known Entities\n- Berna (person, 3x)\n..."
977
- ```
68
+ Data sources that feed into BrainBank's hybrid search engine. Each plugin manages its own tables via the built-in migration system.
978
69
 
979
- The `LLMProvider` interface works with any framework:
70
+ | Package | Description | Install |
71
+ |---------|-------------|----------|
72
+ | [`@brainbank/code`](packages/code/) | AST chunking, import graph, symbol index (20 languages) | `npm i @brainbank/code` |
73
+ | [`@brainbank/git`](packages/git/) | Git history indexing + co-edit analysis | `npm i @brainbank/git` |
74
+ | [`@brainbank/docs`](packages/docs/) | Document collection search with smart chunking | `npm i @brainbank/docs` |
980
75
 
981
- | Framework | Adapter |
982
- |-----------|--------|
983
- | OpenAI | Built-in `OpenAIProvider` |
984
- | LangChain | `ChatOpenAI.invoke()` → string |
985
- | Vercel AI SDK | `generateText()` → string |
986
- | Any LLM | Implement `{ generate(messages) → string }` |
76
+ ### Integrations
987
77
 
988
- > 📂 See [examples/memory](examples/memory/) for a runnable demo. All three LLM backends supported via `--llm` flag.
78
+ Extensions that connect BrainBank to external tools and workflows.
989
79
 
990
- > 📦 Full docs: [packages/memory/README.md](packages/memory/README.md)
80
+ | Package | Description | Install |
81
+ |---------|-------------|----------|
82
+ | [`@brainbank/mcp`](packages/mcp/) | MCP server for Antigravity, Claude, Cursor | `npm i @brainbank/mcp` |
991
83
 
992
84
  ---
993
85
 
994
- ### Environment Variables
995
-
996
- | Variable | Description |
997
- |----------|-------------|
998
- | `BRAINBANK_REPO` | Default repository path (optional — auto-detected from `.git/` or passed per tool call) |
999
- | `BRAINBANK_RERANKER` | Reranker: `none` (default), `qwen3` |
1000
- | `BRAINBANK_DEBUG` | Show full stack traces |
1001
- | `OPENAI_API_KEY` | Required when using `--embedding openai` |
1002
- | `PERPLEXITY_API_KEY` | Required when using `--embedding perplexity` or `perplexity-context` |
1003
-
1004
- > **Note:** `BRAINBANK_EMBEDDING` env var has been removed. Use `brainbank index --embedding <provider>` on first index — the provider is stored in the DB and auto-resolved on subsequent runs.
86
+ ## Documentation
87
+
88
+ | Guide | Description |
89
+ |-------|-------------|
90
+ | **[Getting Started](docs/getting-started.md)** | Installation, quick start, first search |
91
+ | **[CLI Reference](docs/cli.md)** | Complete command reference |
92
+ | **[Plugins](docs/plugins.md)** | Built-in plugins overview + configuration |
93
+ | **[Collections](docs/collections.md)** | Dynamic KV store with semantic search |
94
+ | **[Search](docs/search.md)** | Hybrid search, scoped queries, context generation |
95
+ | **[Custom Plugins](docs/custom-plugins.md)** | Build plugins + publish as npm packages |
96
+ | **[Configuration](docs/config.md)** | `.brainbank/config.json`, env vars |
97
+ | **[Embeddings, Reranker & Pruner](docs/embeddings.md)** | Providers, benchmarks, per-plugin overrides, LLM noise filter |
98
+ | **[Multi-Repo](docs/multi-repo.md)** | Index multiple repositories into one DB |
99
+ | **[MCP Server](docs/mcp.md)** | AI tool integration (stdio) |
100
+ | **[Indexing](docs/indexing.md)** | Code graph, incremental indexing, re-embedding |
101
+ | **[Migrations](docs/migrations.md)** | Plugin schema migrations, built-in schemas |
102
+ | **[Architecture](docs/architecture.md)** | System internals, data flows, design patterns |
1005
103
 
1006
104
  ---
1007
105
 
1008
- ## Multi-Repository Indexing
1009
-
1010
- BrainBank can index multiple repositories into a **single shared database**. This is useful for monorepos, microservices, or any project split across multiple Git repositories.
1011
-
1012
- ### How It Works
1013
-
1014
- When you point BrainBank at a directory that contains multiple Git repositories (subdirectories with `.git/`), the CLI **auto-detects** them and creates namespaced plugins:
1015
-
1016
- ```bash
1017
- ~/projects/
1018
- ├── webapp-frontend/ # .git/
1019
- ├── webapp-backend/ # .git/
1020
- └── webapp-shared/ # .git/
1021
- ```
1022
-
1023
- ```bash
1024
- brainbank index ~/projects --depth 200
1025
- ```
106
+ ## Examples
1026
107
 
1027
- ```
1028
- ━━━ BrainBank Index ━━━
1029
- Repo: /Users/you/projects
1030
- Multi-repo: found 3 git repos: webapp-frontend, webapp-backend, webapp-shared
1031
- CODE:WEBAPP-BACKEND [0/1075] ...
1032
- CODE:WEBAPP-FRONTEND [0/719] ...
1033
- GIT:WEBAPP-SHARED [0/200] ...
1034
-
1035
- Code: 2107 indexed, 4084 chunks
1036
- Git: 600 indexed (200 per repo)
1037
- Co-edit pairs: 1636
1038
- ```
108
+ | Example | Description |
109
+ |---------|-------------|
110
+ | [notes-plugin](examples/notes-plugin/) | Programmatic plugin — reads `.txt` files |
111
+ | [custom-plugin](examples/custom-plugin/) | CLI auto-discovery plugin |
112
+ | [custom-package](examples/custom-package/) | Standalone npm package scaffold |
113
+ | [collection](examples/collection/) | Collections, search, tags, metadata |
114
+ | [rag](examples/rag/) | RAG chatbot — docs retrieval + generation ¹ |
1039
115
 
1040
- All code, git history, and co-edit relationships from every sub-repository go into **one** `.brainbank/brainbank.db` at the parent directory. Search queries automatically return results across all repositories:
1041
-
1042
- ```bash
1043
- brainbank hsearch "cancel job confirmation" --repo ~/projects
1044
- # → Results from frontend components, backend controllers,
1045
- # and shared utilities — all in one search.
1046
- ```
1047
-
1048
- ### Namespaced Plugins
1049
-
1050
- Each sub-repository gets its own namespaced plugin instances (e.g., `code:frontend`, `git:backend`). Same-type plugins share a single HNSW vector index for efficient memory usage and unified search.
1051
-
1052
- ### Programmatic API
1053
-
1054
- ```typescript
1055
- import { BrainBank } from 'brainbank';
1056
- import { code } from 'brainbank/code';
1057
- import { git } from 'brainbank/git';
1058
-
1059
- const brain = new BrainBank({ repoPath: '~/projects' })
1060
- .use(code({ name: 'code:frontend', repoPath: '~/projects/webapp-frontend' }))
1061
- .use(code({ name: 'code:backend', repoPath: '~/projects/webapp-backend' }))
1062
- .use(git({ name: 'git:frontend', repoPath: '~/projects/webapp-frontend' }))
1063
- .use(git({ name: 'git:backend', repoPath: '~/projects/webapp-backend' }));
1064
-
1065
- await brain.initialize();
1066
- await brain.index();
1067
-
1068
- // Cross-repo search
1069
- const results = await brain.hybridSearch('authentication guard');
1070
- // → Results from both frontend and backend
1071
- ```
1072
-
1073
- ### MCP Multi-Workspace
1074
-
1075
- The MCP server maintains a pool of BrainBank instances — one per unique `repo` path. Each tool call can target a different workspace:
1076
-
1077
- ```typescript
1078
- // Agent working in one workspace
1079
- brainbank_hybrid_search({ query: "login form", repo: "/Users/you/projects" })
1080
-
1081
- // Agent switches to a different project
1082
- brainbank_hybrid_search({ query: "API routes", repo: "/Users/you/other-project" })
1083
- ```
1084
-
1085
- Instances are cached in memory after first initialization, so subsequent queries to the same repo are fast (~480ms).
1086
-
1087
- ---
1088
-
1089
- ## Indexing
1090
-
1091
- ### Code Chunking (tree-sitter)
1092
-
1093
- BrainBank uses **native tree-sitter** to parse source code into ASTs and extract semantic blocks — functions, classes, methods, interfaces — as individual chunks. This produces dramatically better embeddings than naive line-based splitting.
1094
-
1095
- **Supported languages (AST-parsed):**
1096
-
1097
- | Category | Languages |
1098
- |----------|-----------|
1099
- | Web | TypeScript, JavaScript, HTML, CSS |
1100
- | Systems | Go, Rust, C, C++, Swift |
1101
- | JVM | Java, Kotlin, Scala |
1102
- | Scripting | Python, Ruby, PHP, Lua, Bash, Elixir |
1103
- | .NET | C# |
1104
-
1105
- For large classes (>80 lines), the chunker descends into the class body and extracts each method as a separate chunk. For unsupported languages, it falls back to a sliding window with overlap.
1106
-
1107
- > Tree-sitter grammars are **optional dependencies**. If a grammar isn't installed, that language falls back to the generic sliding window. Install only the grammars you need: `npm install tree-sitter-ruby tree-sitter-go` etc.
1108
-
1109
- ### Incremental Indexing
1110
-
1111
- All indexing is **incremental by default** — only new or changed content is processed:
1112
-
1113
- | Plugin | How it detects changes | What gets skipped |
1114
- |---------|----------------------|-------------------|
1115
- | **Code** | FNV-1a hash of file content | Unchanged files |
1116
- | **Git** | Unique commit hash | Already-indexed commits |
1117
- | **Docs** | SHA-256 of file content | Unchanged documents |
1118
-
1119
- ```typescript
1120
- // First run: indexes everything
1121
- await brain.index(); // → { indexed: 500, skipped: 0 }
1122
-
1123
- // Second run: skips everything unchanged
1124
- await brain.index(); // → { indexed: 0, skipped: 500 }
1125
-
1126
- // Changed 1 file? Only that file re-indexes
1127
- await brain.index(); // → { indexed: 1, skipped: 499 }
1128
- ```
1129
-
1130
- Use `--force` to re-index everything:
1131
-
1132
- ```bash
1133
- brainbank index --force
1134
- ```
1135
-
1136
- ### Re-embedding
1137
-
1138
- When switching embedding providers (e.g. Local → OpenAI), you **don't need to re-index**. The `reembed()` method regenerates only the vectors — no file I/O, no git parsing, no re-chunking:
1139
-
1140
- ```typescript
1141
- import { BrainBank, OpenAIEmbedding } from 'brainbank';
1142
-
1143
- // Previously indexed with local embeddings.
1144
- // Now switch to OpenAI:
1145
- const brain = new BrainBank({
1146
- embeddingProvider: new OpenAIEmbedding(),
1147
- });
1148
-
1149
- // force: true bypasses the dimension mismatch check for recovery
1150
- await brain.initialize({ force: true });
1151
-
1152
- const result = await brain.reembed({
1153
- onProgress: (table, current, total) => {
1154
- console.log(`${table}: ${current}/${total}`);
1155
- },
1156
- });
1157
- // → { code: 1200, git: 500, docs: 80, kv: 45, notes: 12, total: 1837 }
1158
- ```
1159
-
1160
- Or from the CLI:
1161
-
1162
- ```bash
1163
- brainbank reembed
1164
- ```
1165
-
1166
- | Full re-index | `reembed()` |
1167
- |---|---|
1168
- | Walks all files | **Skipped** |
1169
- | Parses git history | **Skipped** |
1170
- | Re-chunks documents | **Skipped** |
1171
- | Embeds text | ✓ |
1172
- | Replaces vectors | ✓ |
1173
- | Rebuilds HNSW | ✓ |
1174
-
1175
- > BrainBank tracks provider metadata in `embedding_meta` table. It auto-detects mismatches and warns you to run `reembed()`.
116
+ > ¹ Requires `OPENAI_API_KEY`. RAG also requires `PERPLEXITY_API_KEY`.
1176
117
 
1177
118
  ---
1178
119
 
1179
120
  ## Benchmarks
1180
121
 
1181
- BrainBank includes benchmark scripts to validate chunking quality and search relevance. Run them against your own codebase to see the impact.
1182
-
1183
- ### Search Quality: AST vs Sliding Window
1184
-
1185
- We compared BrainBank's **tree-sitter AST chunker** against the traditional **sliding window** (80-line blocks) on a production NestJS backend (3,753 lines across 8 service files). Both strategies chunk the same files; all chunks are embedded and searched with the same 10 domain-specific queries.
1186
-
1187
- #### How It Works
1188
-
1189
- ```
1190
- Sliding Window Tree-Sitter AST
1191
- ┌────────────────────┐ ┌────────────────────┐
1192
- │ import { ... } │ │ ✓ constructor() │ → named chunk
1193
- │ @Injectable() │ → L1-80 block │ ✓ findAll() │ → named chunk
1194
- │ class JobsService {│ │ ✓ createJob() │ → named chunk
1195
- │ constructor() │ │ ✓ cancelJob() │ → named chunk
1196
- │ findAll() { ... }│ │ ✓ updateStatus() │ → named chunk
1197
- │ createJob() │ └────────────────────┘
1198
- │ ... │
1199
- │ ────────────────── │ overlaps ↕
1200
- │ cancelJob() │ → L75-155 block
1201
- │ updateStatus() │
1202
- │ ... │
1203
- └────────────────────┘
1204
- ```
1205
-
1206
- **Sliding window** mixes imports, constructors, and multiple methods into one embedding. Search for "cancel a job" and you get a generic block.
1207
- **AST chunking** gives each method its own embedding. Search for "cancel a job" → direct hit on `cancelJob()`.
1208
-
1209
- #### Results (Production NestJS Backend — 3,753 lines)
1210
-
1211
- Tested with 10 domain-specific queries on 8 service files (`orders.service.ts`, `bookings.service.ts`, `notifications.service.ts`, etc.):
1212
-
1213
- | Metric | Sliding Window | Tree-Sitter AST |
1214
- |--------|:-:|:-:|
1215
- | **Query Wins** | 0/10 | **8/10** (2 ties) |
1216
- | **Top-1 Relevant** | 3/10 | **8/10** |
1217
- | **Avg Precision@3** | 1.1/3 | **1.7/3** |
1218
- | **Avg Score Delta** | — | **+0.035** |
1219
-
1220
- #### Per-Query Breakdown
1221
-
1222
- | Query | SW Top Result | AST Top Result | Δ Score |
1223
- |-------|:---:|:---:|:---:|
1224
- | cancel an order | generic `L451-458` | **`updateOrderStatus`** | +0.005 |
1225
- | create a booking | generic `L451-458` | **`createInstantBooking`** | +0.068 |
1226
- | confirm booking | generic `L451-458` | **`confirm`** | +0.034 |
1227
- | send notification | generic `L226-305` | **`publishNotificationEvent`** | +0.034 |
1228
- | authenticate JWT | generic `L1-80` | **`AuthModule`** | +0.032 |
1229
- | tenant DB connection | `L76-155` | **`onModuleDestroy`** | +0.037 |
1230
- | list orders paginated | `L76-155` | **`findAllActive`** | +0.045 |
1231
- | reject booking | generic `L451-458` | **`reject`** | +0.090 |
1232
-
1233
- > Notice how the sliding window returns the **same generic block `L451-458`** for 4 different queries. The AST chunker returns a different, correctly named method each time.
122
+ Early benchmarks on Apple Silicon single SQLite file, no external vector DB.
1234
123
 
1235
- #### Chunk Quality Comparison
124
+ | Benchmark | Corpus | Metric | Score |
125
+ |-----------|--------|--------|:-----:|
126
+ | [BEIR SciFact](https://github.com/beir-cellar/beir) | 5,183 scientific abstracts, 300 queries | NDCG@10 | **0.761** |
127
+ | Custom RAG eval | 127 Pinecall.io docs, 20 queries — 1 miss | R@5 | **83%** |
1236
128
 
1237
- | | Sliding Window | Tree-Sitter AST |
1238
- |---|:-:|:-:|
1239
- | Total chunks | 53 | **83** |
1240
- | Avg lines/chunk | 75 | **39** |
1241
- | Named chunks | 0 | **83** (100%) |
1242
- | Chunk types | `block` | `method`, `interface`, `class` |
129
+ **Pipeline progression** each stage's impact on the custom eval:
1243
130
 
1244
- ### Grammar Support
131
+ | Stage | R@5 | Δ |
132
+ |-------|:---:|---|
133
+ | Vector-only (HNSW) | 57% | — |
134
+ | + BM25 → RRF | 78% | +21pp |
135
+ | + Qwen3 reranker | 83% | +5pp |
1245
136
 
1246
- All 9 core grammars verified, each parsing in **<0.05ms**:
137
+ > More benchmarks (code+graph retrieval, large-scale stress tests, multi-provider comparisons) are in progress.
138
+ > Full methodology and reproduction commands → [docs/benchmarks.md](docs/benchmarks.md)
1247
139
 
1248
- | Language | AST Nodes Extracted | Parse Time |
1249
- |----------|:---:|:---:|
1250
- | TypeScript | `export_statement`, `interface_declaration` | 0.04ms |
1251
- | JavaScript | `function_declaration` × 3 | 0.04ms |
1252
- | Python | `class_definition`, `function_definition` × 2 | 0.03ms |
1253
- | Go | `function_declaration`, `method_declaration` × 3 | 0.04ms |
1254
- | Rust | `struct_item`, `impl_item`, `function_item` | 0.03ms |
1255
- | Ruby | `class`, `method` | 0.03ms |
1256
- | Java | `class_declaration` | 0.02ms |
1257
- | C | `function_definition` × 3 | 0.05ms |
1258
- | PHP | `class_declaration` | 0.03ms |
140
+ ## Contributing
1259
141
 
1260
- > Additional grammars available: C++, Swift, C#, Kotlin, Scala, Lua, Elixir, Bash, HTML, CSS
1261
-
1262
- ### RAG Retrieval Quality
1263
-
1264
- BrainBank's hybrid search pipeline (Vector + BM25 → RRF) with Perplexity Context embeddings (2560d):
1265
-
1266
- | Benchmark | Metric | Score |
1267
- |---|---|:---:|
1268
- | **BEIR SciFact** (5,183 docs, 300 queries) | NDCG@10 | **0.761** |
1269
- | **Custom semantic** (69 docs, 20 queries) | R@5 | **83%** |
1270
-
1271
- The hybrid pipeline improved R@5 by **+26pp over vector-only** retrieval on our custom eval.
1272
-
1273
- #### BrainBank vs QMD (Head-to-Head)
1274
-
1275
- Compared against [QMD](https://github.com/tobi/qmd), a local-first search engine using GGUF models (embeddinggemma-300M + query expansion + reranker) — same corpus, same 20 queries:
1276
-
1277
- | Metric | BrainBank + Reranker | QMD + Reranker |
1278
- |---|:---:|:---:|
1279
- | **R@5** | **83%** | 65% |
1280
- | **MRR** | **0.57** | 0.45 |
1281
- | **Misses** | **1/20** | 6/20 |
1282
-
1283
- > BrainBank wins by +18pp R@5. QMD is competitive on semantic queries (81% vs 94%) and ties on broad queries (83% vs 83%) — impressive for a fully local pipeline with zero API calls.
1284
-
1285
- See **[BENCHMARKS.md](./BENCHMARKS.md)** for full pipeline progression, per-technique impact, QMD comparison details, and reproduction instructions.
1286
-
1287
- #### Running the RAG Eval
1288
-
1289
- ```bash
1290
- # Custom eval on your own docs
1291
- PERPLEXITY_API_KEY=pplx-... npx tsx test/benchmarks/rag/eval.ts --docs ~/path/to/docs
1292
-
1293
- # BEIR standard benchmark
1294
- PERPLEXITY_API_KEY=pplx-... npx tsx test/benchmarks/rag/beir-eval.ts --dataset scifact
1295
- ```
1296
-
1297
- ### Running Benchmarks
1298
-
1299
- ```bash
1300
- # Grammar support (9 languages, parse speed)
1301
- node test/benchmarks/grammar-support.mjs
1302
-
1303
- # Search quality A/B (uses BrainBank's own source files)
1304
- node test/benchmarks/search-quality.mjs
1305
-
1306
- # RAG retrieval quality (requires Perplexity API key + docs folder)
1307
- PERPLEXITY_API_KEY=pplx-... npx tsx test/benchmarks/rag/eval.ts --docs ~/path/to/docs
1308
- ```
1309
-
1310
- ---
1311
-
1312
- ## Architecture
1313
-
1314
- <details>
1315
- <summary>Text version</summary>
1316
-
1317
- ```
1318
- ┌──────────────────────────────────────────────────────┐
1319
- │ BrainBank Core │
1320
- │ .use(code) .use(git) .use(docs) │
1321
- │ .collection('name') │
1322
- ├──────────────────────────────────────────────────────┤
1323
- │ │
1324
- │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌────────────┐│
1325
- │ │ Code │ │ Git │ │ Docs │ │ Collection ││
1326
- │ │ Plugin │ │ Indexer │ │ Indexer │ │ (dynamic) ││
1327
- │ └────┬────┘ └────┬────┘ └────┬────┘ └─────┬──────┘│
1328
- │ │ │ │ │ │
1329
- │ ┌────▼────┐ ┌────▼────┐ ┌────▼────┐ ┌─────▼──────┐│
1330
- │ │ HNSW │ │ HNSW │ │ HNSW │ │ Shared KV ││
1331
- │ │ Index │ │ Index │ │ Index │ │ HNSW Index ││
1332
- │ └─────────┘ └─────────┘ └─────────┘ └────────────┘│
1333
- │ │
1334
- │ ┌──────────────────────────────────────────────────┐│
1335
- │ │ SQLite (.brainbank/brainbank.db) ││
1336
- │ │ code_chunks │ git_commits │ doc_chunks ││
1337
- │ │ kv_data │ FTS5 full-text │ vectors │ co_edits ││
1338
- │ └──────────────────────────────────────────────────┘│
1339
- │ │
1340
- │ ┌──────────────────────────────────────────────────┐│
1341
- │ │ Embedding (Local 384d│OpenAI 1536d│Perplexity) ││
1342
- │ └──────────────────────────────────────────────────┘│
1343
- │ ┌──────────────────────────────────────────────────┐│
1344
- │ │ Qwen3-Reranker (opt-in cross-encoder) ││
1345
- │ └──────────────────────────────────────────────────┘│
1346
- └──────────────────────────────────────────────────────┘
1347
- ```
1348
- </details>
1349
-
1350
- ### Search Pipeline
1351
-
1352
- ```
1353
- Query
1354
-
1355
- ├──► Vector Search (HNSW k-NN) ──► candidates
1356
- ├──► Keyword Search (BM25/FTS5) ──► candidates
1357
-
1358
-
1359
- Reciprocal Rank Fusion (RRF, k=60)
1360
-
1361
-
1362
- Qwen3-Reranker (yes/no + logprobs → score 0-1)
1363
-
1364
-
1365
- Position-Aware Blend
1366
- Top 1-3: 75% RRF / 25% reranker
1367
- Top 4-10: 60% RRF / 40% reranker
1368
- Top 11+: 40% RRF / 60% reranker
1369
-
1370
-
1371
- Final results (sorted by blended score)
1372
- ```
1373
-
1374
- ### Data Flow
1375
-
1376
- 1. **Index** — Plugins parse files into chunks (tree-sitter AST for code, heading-based for docs)
1377
- 2. **Embed** — Each chunk gets a vector (local WASM or OpenAI)
1378
- 3. **Store** — Chunks + vectors → SQLite, vectors → HNSW index
1379
- 4. **Search** — Query → HNSW k-NN + BM25 keyword → RRF fusion → optional reranker
1380
- 5. **Context** — Top results formatted as markdown for system prompts
1381
-
1382
- ---
1383
-
1384
- ## Testing
1385
-
1386
- ```bash
1387
- npm test # Unit tests (172 tests)
1388
- npm test -- --integration # Full suite (includes real models + all domains)
1389
- npm test -- --filter code # Filter by test name
1390
- npm test -- --verbose # Show assertion details
1391
- ```
1392
-
1393
- ---
142
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and guidelines.
1394
143
 
1395
144
  ## License
1396
145
 
1397
- MIT
146
+ [MIT](LICENSE)