brainbank 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +149 -16
  2. package/dist/{types-Da_zLLOl.d.ts → base-9vfWRHCV.d.ts} +131 -31
  3. package/dist/{chunk-YGSEUWLV.js → chunk-6MFTQV3O.js} +911 -674
  4. package/dist/chunk-6MFTQV3O.js.map +1 -0
  5. package/dist/chunk-7JCEW7LT.js +266 -0
  6. package/dist/chunk-7JCEW7LT.js.map +1 -0
  7. package/dist/{chunk-GOUBW7UA.js → chunk-F6SJ3U4H.js} +98 -34
  8. package/dist/chunk-F6SJ3U4H.js.map +1 -0
  9. package/dist/{chunk-MJ3Y24H6.js → chunk-FJJY4H2Y.js} +11 -11
  10. package/dist/chunk-FJJY4H2Y.js.map +1 -0
  11. package/dist/{chunk-3GAIDXRW.js → chunk-GUT5MSJT.js} +5 -11
  12. package/dist/chunk-GUT5MSJT.js.map +1 -0
  13. package/dist/{chunk-2P3EGY6S.js → chunk-QNHBCOKB.js} +2 -2
  14. package/dist/chunk-QNHBCOKB.js.map +1 -0
  15. package/dist/{chunk-4ZKBQ33J.js → chunk-V4UJKXPK.js} +23 -5
  16. package/dist/chunk-V4UJKXPK.js.map +1 -0
  17. package/dist/chunk-WR4WXKJT.js +723 -0
  18. package/dist/chunk-WR4WXKJT.js.map +1 -0
  19. package/dist/{chunk-Z5SU54HP.js → chunk-X6645UVR.js} +3 -3
  20. package/dist/chunk-X6645UVR.js.map +1 -0
  21. package/dist/cli.js +150 -100
  22. package/dist/cli.js.map +1 -1
  23. package/dist/code.d.ts +5 -5
  24. package/dist/code.js +1 -1
  25. package/dist/docs.d.ts +4 -6
  26. package/dist/docs.js +1 -1
  27. package/dist/git.d.ts +5 -5
  28. package/dist/git.js +1 -1
  29. package/dist/index.d.ts +95 -104
  30. package/dist/index.js +13 -13
  31. package/dist/memory.d.ts +5 -7
  32. package/dist/memory.js +9 -12
  33. package/dist/memory.js.map +1 -1
  34. package/dist/notes.d.ts +4 -6
  35. package/dist/notes.js +7 -10
  36. package/dist/notes.js.map +1 -1
  37. package/dist/{openai-PCTYLOWI.js → openai-CYDMYX7X.js} +2 -2
  38. package/package.json +24 -4
  39. package/dist/chunk-2P3EGY6S.js.map +0 -1
  40. package/dist/chunk-3GAIDXRW.js.map +0 -1
  41. package/dist/chunk-4ZKBQ33J.js.map +0 -1
  42. package/dist/chunk-EDKSKLX4.js +0 -490
  43. package/dist/chunk-EDKSKLX4.js.map +0 -1
  44. package/dist/chunk-GOUBW7UA.js.map +0 -1
  45. package/dist/chunk-MJ3Y24H6.js.map +0 -1
  46. package/dist/chunk-N6ZMBFDE.js +0 -224
  47. package/dist/chunk-N6ZMBFDE.js.map +0 -1
  48. package/dist/chunk-YGSEUWLV.js.map +0 -1
  49. package/dist/chunk-Z5SU54HP.js.map +0 -1
  50. /package/dist/{openai-PCTYLOWI.js.map → openai-CYDMYX7X.js.map} +0 -0
package/README.md CHANGED
@@ -11,7 +11,7 @@ BrainBank gives LLMs a long-term memory that persists between sessions.
11
11
  - **Pluggable embeddings** — local WASM (free) or OpenAI (higher quality)
12
12
  - **Multi-repo** — index multiple repositories into one shared database
13
13
  - **Portable** — single `.brainbank/brainbank.db` file
14
- - **Optional packages** — [`@brainbank/memory`](#memory) (deterministic fact extraction), [`@brainbank/reranker`](#reranker) (Qwen3 cross-encoder), [`@brainbank/mcp`](#mcp-server) (MCP server)
14
+ - **Optional packages** — [`@brainbank/memory`](#memory) (fact extraction + entity graph), [`@brainbank/reranker`](#reranker) (Qwen3 cross-encoder), [`@brainbank/mcp`](#mcp-server) (MCP server)
15
15
 
16
16
  ![BrainBank Architecture](assets/architecture.png)
17
17
 
@@ -27,19 +27,21 @@ Most AI memory solutions (mem0, Zep, LangMem) require cloud services, external d
27
27
  |---|:---:|:---:|:---:|:---:|
28
28
  | Infrastructure | **SQLite file** | Vector DB + cloud | Neo4j + cloud | LangGraph Platform |
29
29
  | LLM required to write | **No**¹ | Yes | Yes | Yes |
30
- | Code-aware | **30+ languages, git, co-edits** | ✗ | ✗ | ✗ |
30
+ | Code-aware | **19 AST-parsed languages (tree-sitter), git, co-edits** | ✗ | ✗ | ✗ |
31
31
  | Custom indexers | **`.use()` plugin system** | ✗ | ✗ | ✗ |
32
- | Search | **Vector + BM25 + RRF** | Vector only | Vector + graph | Vector only |
32
+ | Search | **Vector + BM25 + RRF** | Vector + graph² | Vector + BM25 + graph | Vector only |
33
33
  | Framework lock-in | **None** | Optional | Zep cloud | LangChain |
34
34
  | Portable | **Copy one file** | Tied to DB | Tied to cloud | Tied to platform |
35
35
 
36
36
  > ¹ mem0 and Zep use LLMs to auto-extract memories from raw text. BrainBank is explicit — you decide what gets stored. Less magic, more control.
37
+ >
38
+ > ² mem0's graph store (mem0g) is available in the paid platform version.
37
39
 
38
40
  **In short:**
39
41
  - **Code-first** — the only memory layer that understands code structure, git history, and file co-edit relationships
42
+ - **Framework-agnostic** — plain TypeScript, works with any agent framework (LangChain, Vercel AI SDK, custom) or none at all. Unopinionated — doesn't force you into a specific pattern
40
43
  - **$0 memory bill** — no LLM calls to extract/consolidate. You store what you want, BrainBank embeds deterministically
41
44
  - **Truly portable** — `.brainbank/brainbank.db` is a normal file. Copy it, back it up, `git lfs` it
42
- - **No vendor lock-in** — plain TypeScript, works with any agent framework or none at all
43
45
 
44
46
  ### Table of Contents
45
47
 
@@ -68,6 +70,9 @@ Most AI memory solutions (mem0, Zep, LangMem) require cloud services, external d
68
70
  - [Re-embedding](#re-embedding)
69
71
  - [Architecture](#architecture)
70
72
  - [Search Pipeline](#search-pipeline)
73
+ - [Benchmarks](#benchmarks)
74
+ - [Search Quality: AST vs Sliding Window](#search-quality-ast-vs-sliding-window)
75
+ - [Grammar Support](#grammar-support)
71
76
 
72
77
  ---
73
78
 
@@ -81,7 +86,7 @@ npm install brainbank
81
86
 
82
87
  | Package | When to install |
83
88
  |---------|----------------|
84
- | `@brainbank/memory` | Deterministic memory extraction for LLM conversations (mem0-style pipeline) |
89
+ | `@brainbank/memory` | Deterministic memory extraction + entity graph for LLM conversations |
85
90
  | `@brainbank/reranker` | Cross-encoder reranker (Qwen3-0.6B, ~640MB model) |
86
91
  | `@brainbank/mcp` | MCP server for AI tool integration |
87
92
 
@@ -148,12 +153,15 @@ BrainBank can be used entirely from the command line — no config file needed.
148
153
 
149
154
  ### Indexing
150
155
 
151
- `index` processes **code files + git history** only. Document collections are indexed separately with `docs`.
156
+ `index` processes **code files + git history** by default. Use `--only` to select specific modules, and `--docs` to include document collections.
152
157
 
153
158
  ```bash
154
159
  brainbank index [path] # Index code + git history
155
160
  brainbank index [path] --force # Force re-index everything
156
161
  brainbank index [path] --depth 200 # Limit git commit depth
162
+ brainbank index [path] --only code # Index only code (skip git)
163
+ brainbank index [path] --only git # Index only git history
164
+ brainbank index [path] --docs ~/docs # Include a docs folder
157
165
  brainbank docs [--collection <name>] # Index document collections
158
166
  ```
159
167
 
@@ -232,7 +240,7 @@ BrainBank uses pluggable indexers. Register only what you need with `.use()`:
232
240
 
233
241
  | Indexer | Import | Description |
234
242
  |---------|--------|-------------|
235
- | `code` | `brainbank/code` | Language-aware code chunking (30+ languages) |
243
+ | `code` | `brainbank/code` | AST-aware code chunking via tree-sitter (19 languages) |
236
244
  | `git` | `brainbank/git` | Git commit history, diffs, co-edit relationships |
237
245
  | `docs` | `brainbank/docs` | Document collections (markdown, wikis) |
238
246
 
@@ -763,6 +771,8 @@ Without a reranker, BrainBank uses pure RRF fusion — which is already producti
763
771
 
764
772
  `@brainbank/memory` adds **deterministic memory extraction** to any LLM conversation. After every turn, it automatically extracts facts, deduplicates against existing memories, and decides `ADD` / `UPDATE` / `NONE` — no function calling needed.
765
773
 
774
+ Optionally extracts **entities and relationships** (knowledge graph) from the same LLM call — no extra cost. Includes **LLM-powered entity resolution** to merge aliases (e.g. "TS" → "TypeScript").
775
+
766
776
  Inspired by [mem0](https://github.com/mem0ai/mem0)'s pipeline, but framework-agnostic and built on BrainBank collections.
767
777
 
768
778
  ```bash
@@ -771,22 +781,32 @@ npm install @brainbank/memory
771
781
 
772
782
  ```typescript
773
783
  import { BrainBank } from 'brainbank';
774
- import { Memory, OpenAIProvider } from '@brainbank/memory';
784
+ import { Memory, EntityStore, OpenAIProvider } from '@brainbank/memory';
775
785
 
776
786
  const brain = new BrainBank({ dbPath: './memory.db' });
777
787
  await brain.initialize();
778
788
 
779
- const memory = new Memory(brain.collection('memories'), {
780
- llm: new OpenAIProvider({ model: 'gpt-4.1-nano' }),
789
+ const llm = new OpenAIProvider({ model: 'gpt-4.1-nano' });
790
+
791
+ // Opt-in entity extraction (knowledge graph)
792
+ const entityStore = new EntityStore(brain, {
793
+ onEntity: (op) => console.log(`${op.action}: ${op.name}`),
794
+ });
795
+
796
+ const memory = new Memory(brain, {
797
+ llm, // auto-shared with EntityStore
798
+ entityStore, // optional — omit for facts-only mode
799
+ onOperation: (op) => console.log(`${op.action}: ${op.fact}`),
781
800
  });
782
801
 
783
- // After every conversation turn (deterministic, automatic)
784
- await memory.process(userMessage, assistantResponse);
785
- // → extracts facts, deduplicates, executes ADD/UPDATE/NONE
802
+ // After every conversation turn
803
+ const result = await memory.process(userMessage, assistantResponse);
804
+ // result.operations [{ fact, action: "ADD", reason }]
805
+ // result.entities → { entitiesProcessed: 2, relationshipsProcessed: 1 }
786
806
 
787
- // For the system prompt
807
+ // System prompt with memories + entities
788
808
  const context = memory.buildContext();
789
- // → "## Memories\n- User's name is Berna\n- Prefers TypeScript"
809
+ // → "## Memories\n- User's name is Berna\n\n## Known Entities\n- Berna (person, 3x)\n..."
790
810
  ```
791
811
 
792
812
  The `LLMProvider` interface works with any framework:
@@ -899,6 +919,24 @@ Instances are cached in memory after first initialization, so subsequent queries
899
919
 
900
920
  ## Indexing
901
921
 
922
+ ### Code Chunking (tree-sitter)
923
+
924
+ BrainBank uses **native tree-sitter** to parse source code into ASTs and extract semantic blocks — functions, classes, methods, interfaces — as individual chunks. This produces dramatically better embeddings than naive line-based splitting.
925
+
926
+ **Supported languages (AST-parsed):**
927
+
928
+ | Category | Languages |
929
+ |----------|-----------|
930
+ | Web | TypeScript, JavaScript, HTML, CSS |
931
+ | Systems | Go, Rust, C, C++, Swift |
932
+ | JVM | Java, Kotlin, Scala |
933
+ | Scripting | Python, Ruby, PHP, Lua, Bash, Elixir |
934
+ | .NET | C# |
935
+
936
+ For large classes (>80 lines), the chunker descends into the class body and extracts each method as a separate chunk. For unsupported languages, it falls back to a sliding window with overlap.
937
+
938
+ > Tree-sitter grammars are **optional dependencies**. If a grammar isn't installed, that language falls back to the generic sliding window. Install only the grammars you need: `npm install tree-sitter-ruby tree-sitter-go` etc.
939
+
902
940
  ### Incremental Indexing
903
941
 
904
942
  All indexing is **incremental by default** — only new or changed content is processed:
@@ -971,6 +1009,101 @@ brainbank reembed
971
1009
 
972
1010
  ---
973
1011
 
1012
+ ## Benchmarks
1013
+
1014
+ BrainBank includes benchmark scripts to validate chunking quality and search relevance. Run them against your own codebase to see the impact.
1015
+
1016
+ ### Search Quality: AST vs Sliding Window
1017
+
1018
+ We compared BrainBank's **tree-sitter AST chunker** against the traditional **sliding window** (80-line blocks) on a production NestJS backend (3,753 lines across 8 service files). Both strategies chunk the same files; all chunks are embedded and searched with the same 10 domain-specific queries.
1019
+
1020
+ #### How It Works
1021
+
1022
+ ```
1023
+ Sliding Window Tree-Sitter AST
1024
+ ┌────────────────────┐ ┌────────────────────┐
1025
+ │ import { ... } │ │ ✓ constructor() │ → named chunk
1026
+ │ @Injectable() │ → L1-80 block │ ✓ findAll() │ → named chunk
1027
+ │ class JobsService {│ │ ✓ createJob() │ → named chunk
1028
+ │ constructor() │ │ ✓ cancelJob() │ → named chunk
1029
+ │ findAll() { ... }│ │ ✓ updateStatus() │ → named chunk
1030
+ │ createJob() │ └────────────────────┘
1031
+ │ ... │
1032
+ │ ────────────────── │ overlaps ↕
1033
+ │ cancelJob() │ → L75-155 block
1034
+ │ updateStatus() │
1035
+ │ ... │
1036
+ └────────────────────┘
1037
+ ```
1038
+
1039
+ **Sliding window** mixes imports, constructors, and multiple methods into one embedding. Search for "cancel a job" and you get a generic block.
1040
+ **AST chunking** gives each method its own embedding. Search for "cancel a job" → direct hit on `cancelJob()`.
1041
+
1042
+ #### Results (Production NestJS Backend — 3,753 lines)
1043
+
1044
+ Tested with 10 domain-specific queries on 8 service files (`orders.service.ts`, `bookings.service.ts`, `notifications.service.ts`, etc.):
1045
+
1046
+ | Metric | Sliding Window | Tree-Sitter AST |
1047
+ |--------|:-:|:-:|
1048
+ | **Query Wins** | 0/10 | **8/10** (2 ties) |
1049
+ | **Top-1 Relevant** | 3/10 | **8/10** |
1050
+ | **Avg Precision@3** | 1.1/3 | **1.7/3** |
1051
+ | **Avg Score Delta** | — | **+0.035** |
1052
+
1053
+ #### Per-Query Breakdown
1054
+
1055
+ | Query | SW Top Result | AST Top Result | Δ Score |
1056
+ |-------|:---:|:---:|:---:|
1057
+ | cancel an order | generic `L451-458` | **`updateOrderStatus`** | +0.005 |
1058
+ | create a booking | generic `L451-458` | **`createInstantBooking`** | +0.068 |
1059
+ | confirm booking | generic `L451-458` | **`confirm`** | +0.034 |
1060
+ | send notification | generic `L226-305` | **`publishNotificationEvent`** | +0.034 |
1061
+ | authenticate JWT | generic `L1-80` | **`AuthModule`** | +0.032 |
1062
+ | tenant DB connection | `L76-155` | **`onModuleDestroy`** | +0.037 |
1063
+ | list orders paginated | `L76-155` | **`findAllActive`** | +0.045 |
1064
+ | reject booking | generic `L451-458` | **`reject`** | +0.090 |
1065
+
1066
+ > Notice how the sliding window returns the **same generic block `L451-458`** for 4 different queries. The AST chunker returns a different, correctly named method each time.
1067
+
1068
+ #### Chunk Quality Comparison
1069
+
1070
+ | | Sliding Window | Tree-Sitter AST |
1071
+ |---|:-:|:-:|
1072
+ | Total chunks | 53 | **83** |
1073
+ | Avg lines/chunk | 75 | **39** |
1074
+ | Named chunks | 0 | **83** (100%) |
1075
+ | Chunk types | `block` | `method`, `interface`, `class` |
1076
+
1077
+ ### Grammar Support
1078
+
1079
+ All 9 core grammars verified, each parsing in **<0.05ms**:
1080
+
1081
+ | Language | AST Nodes Extracted | Parse Time |
1082
+ |----------|:---:|:---:|
1083
+ | TypeScript | `export_statement`, `interface_declaration` | 0.04ms |
1084
+ | JavaScript | `function_declaration` × 3 | 0.04ms |
1085
+ | Python | `class_definition`, `function_definition` × 2 | 0.03ms |
1086
+ | Go | `function_declaration`, `method_declaration` × 3 | 0.04ms |
1087
+ | Rust | `struct_item`, `impl_item`, `function_item` | 0.03ms |
1088
+ | Ruby | `class`, `method` | 0.03ms |
1089
+ | Java | `class_declaration` | 0.02ms |
1090
+ | C | `function_definition` × 3 | 0.05ms |
1091
+ | PHP | `class_declaration` | 0.03ms |
1092
+
1093
+ > Additional grammars available: C++, Swift, C#, Kotlin, Scala, Lua, Elixir, Bash, HTML, CSS
1094
+
1095
+ ### Running Benchmarks
1096
+
1097
+ ```bash
1098
+ # Grammar support (9 languages, parse speed)
1099
+ node test/benchmarks/grammar-support.mjs
1100
+
1101
+ # Search quality A/B (uses BrainBank's own source files)
1102
+ node test/benchmarks/search-quality.mjs
1103
+ ```
1104
+
1105
+ ---
1106
+
974
1107
  ## Architecture
975
1108
 
976
1109
  <details>
@@ -1035,7 +1168,7 @@ Final results (sorted by blended score)
1035
1168
 
1036
1169
  ### Data Flow
1037
1170
 
1038
- 1. **Index** — Indexers parse files into chunks
1171
+ 1. **Index** — Indexers parse files into chunks (tree-sitter AST for code, heading-based for docs)
1039
1172
  2. **Embed** — Each chunk gets a vector (local WASM or OpenAI)
1040
1173
  3. **Store** — Chunks + vectors → SQLite, vectors → HNSW index
1041
1174
  4. **Search** — Query → HNSW k-NN + BM25 keyword → RRF fusion → optional reranker
@@ -101,10 +101,14 @@ interface SearchHit {
101
101
  interface VectorIndex {
102
102
  /** Initialize the index. Must be called before add/search. */
103
103
  init(): Promise<this>;
104
- /** Add a vector with an integer ID. */
104
+ /** Add a vector with an integer ID. Idempotent: duplicate IDs are skipped. */
105
105
  add(vector: Float32Array, id: number): void;
106
+ /** Mark a vector as deleted so it no longer appears in searches. */
107
+ remove(id: number): void;
106
108
  /** Search for k nearest neighbors. */
107
109
  search(query: Float32Array, k: number): SearchHit[];
110
+ /** Clear all vectors and reset to empty state. */
111
+ reinit(): void;
108
112
  /** Number of vectors in the index. */
109
113
  readonly size: number;
110
114
  }
@@ -140,7 +144,7 @@ interface GitCommitRecord {
140
144
  deletions: number;
141
145
  isMerge: boolean;
142
146
  }
143
- interface MemoryPattern {
147
+ interface LearningPattern {
144
148
  id?: number;
145
149
  /** Category (e.g. 'api', 'refactor', 'debug') */
146
150
  taskType: string;
@@ -166,25 +170,88 @@ interface DistilledStrategy {
166
170
  updatedAt: number;
167
171
  }
168
172
  type SearchResultType = 'code' | 'commit' | 'pattern' | 'document' | 'collection';
169
- interface SearchResult {
170
- type: SearchResultType;
173
+ interface CodeResultMetadata {
174
+ chunkType: string;
175
+ name?: string;
176
+ startLine: number;
177
+ endLine: number;
178
+ language: string;
179
+ searchType?: string;
180
+ }
181
+ interface CommitResultMetadata {
182
+ hash: string;
183
+ shortHash: string;
184
+ author: string;
185
+ date: string;
186
+ files: string[];
187
+ additions?: number;
188
+ deletions?: number;
189
+ diff?: string;
190
+ searchType?: string;
191
+ }
192
+ interface PatternResultMetadata {
193
+ taskType: string;
194
+ task: string;
195
+ outcome?: string;
196
+ successRate: number;
197
+ critique?: string;
198
+ searchType?: string;
199
+ }
200
+ interface DocumentResultMetadata {
201
+ collection?: string;
202
+ title?: string;
203
+ seq?: number;
204
+ path?: string;
205
+ searchType?: string;
206
+ }
207
+ interface CodeResult {
208
+ type: 'code';
209
+ score: number;
210
+ filePath: string;
211
+ content: string;
212
+ context?: string;
213
+ metadata: CodeResultMetadata;
214
+ }
215
+ interface CommitResult {
216
+ type: 'commit';
217
+ score: number;
218
+ filePath?: string;
219
+ content: string;
220
+ context?: string;
221
+ metadata: CommitResultMetadata;
222
+ }
223
+ interface PatternResult {
224
+ type: 'pattern';
225
+ score: number;
226
+ filePath?: string;
227
+ content: string;
228
+ context?: string;
229
+ metadata: PatternResultMetadata;
230
+ }
231
+ interface DocumentResult {
232
+ type: 'document';
233
+ score: number;
234
+ filePath?: string;
235
+ content: string;
236
+ context?: string;
237
+ metadata: DocumentResultMetadata;
238
+ }
239
+ interface CollectionResult {
240
+ type: 'collection';
171
241
  score: number;
172
- /** File path (for code results) or document path */
173
242
  filePath?: string;
174
- /** Content / text */
175
243
  content: string;
176
- /** Context description (for document results) */
177
244
  context?: string;
178
- /** Extra metadata depending on type */
179
245
  metadata: Record<string, any>;
180
246
  }
247
+ type SearchResult = CodeResult | CommitResult | PatternResult | DocumentResult | CollectionResult;
181
248
  interface ContextOptions {
182
249
  /** Max code chunks to include. Default: 6 */
183
250
  codeResults?: number;
184
251
  /** Max git commits to include. Default: 5 */
185
252
  gitResults?: number;
186
253
  /** Max memory patterns to include. Default: 4 */
187
- memoryResults?: number;
254
+ patternResults?: number;
188
255
  /** Files the agent is about to modify (improves co-edit suggestions) */
189
256
  affectedFiles?: string[];
190
257
  /** Minimum similarity score threshold. Default: 0.25 */
@@ -252,7 +319,10 @@ interface IndexStats {
252
319
  long: number;
253
320
  };
254
321
  }
322
+ /** File-level progress (used by indexers). */
255
323
  type ProgressCallback = (file: string, current: number, total: number) => void;
324
+ /** Stage-level progress (used by BrainBank.index() orchestrator). */
325
+ type StageProgressCallback = (stage: string, message: string) => void;
256
326
  interface IndexResult {
257
327
  indexed: number;
258
328
  skipped: number;
@@ -278,18 +348,34 @@ declare class HNSWIndex implements VectorIndex {
278
348
  private _efConstruction;
279
349
  private _efSearch;
280
350
  private _index;
281
- private _count;
351
+ private _lib;
352
+ private _ids;
282
353
  constructor(_dims: number, _maxElements?: number, _M?: number, _efConstruction?: number, _efSearch?: number);
283
354
  /**
284
355
  * Initialize the HNSW index.
285
356
  * Must be called before add/search.
286
357
  */
287
358
  init(): Promise<this>;
359
+ /**
360
+ * Reinitialize the index in-place, clearing all vectors.
361
+ * Required after reembed or full re-index to avoid duplicate IDs.
362
+ * init() must have been called first.
363
+ */
364
+ reinit(): void;
365
+ private _createIndex;
366
+ /** Maximum capacity of this index. */
367
+ get maxElements(): number;
288
368
  /**
289
369
  * Add a vector with an integer ID.
290
370
  * The vector should be pre-normalized for cosine distance.
291
371
  */
292
372
  add(vector: Float32Array, id: number): void;
373
+ /**
374
+ * Mark a vector as deleted so it no longer appears in searches.
375
+ * Uses hnswlib-node markDelete under the hood.
376
+ * Safe to call with an ID that doesn't exist.
377
+ */
378
+ remove(id: number): void;
293
379
  /**
294
380
  * Search for the k nearest neighbors.
295
381
  * Returns results sorted by score (highest first).
@@ -434,41 +520,55 @@ interface Indexer {
434
520
  readonly name: string;
435
521
  /** Initialize the indexer (create HNSW, load vectors, etc.). */
436
522
  initialize(ctx: IndexerContext): Promise<void>;
437
- /** Index content. Implemented by code and git indexers. */
523
+ /** Index content (code, git plugins). */
438
524
  index?(options?: any): Promise<any>;
439
- /** Search indexed content. Implemented by docs indexer. */
525
+ /** Search indexed content (docs plugin). */
440
526
  search?(query: string, options?: any): Promise<any[]>;
441
- /** Register a document collection. */
527
+ /** Register a document collection (docs plugin). */
442
528
  addCollection?(collection: any): void;
443
- /** Remove a collection. */
529
+ /** Remove a collection (docs plugin). */
444
530
  removeCollection?(name: string): void;
445
- /** List registered collections. */
531
+ /** List registered collections (docs plugin). */
446
532
  listCollections?(): any[];
447
- /** Index all or specific collections. */
533
+ /** Index collections (docs plugin). */
448
534
  indexCollections?(options?: any): Promise<any>;
449
- /** Add context description for a collection path. */
535
+ /** Add context for a collection path (docs plugin). */
450
536
  addContext?(collection: string, path: string, context: string): void;
451
- /** Remove context for a collection path. */
537
+ /** Remove context (docs plugin). */
452
538
  removeContext?(collection: string, path: string): void;
453
- /** List all context entries. */
539
+ /** List context entries (docs plugin). */
454
540
  listContexts?(): any[];
455
- /**
456
- * Called by watch mode when a file changes.
457
- * Return true if this indexer handled the change.
458
- * If not implemented, watch will fall back to brain.index().
459
- */
541
+ /** Watch mode: handle file change (returns true if handled). */
460
542
  onFileChange?(filePath: string, event: 'create' | 'update' | 'delete'): Promise<boolean>;
461
- /**
462
- * Glob patterns this indexer watches.
463
- * If not set, defaults to all supported code extensions.
464
- */
543
+ /** Glob patterns for watch mode. */
465
544
  watchPatterns?(): string[];
466
545
  /** Return stats for this indexer. */
467
546
  stats?(): Record<string, any>;
468
547
  /** Clean up resources. */
469
548
  close?(): void;
470
549
  }
471
- type BrainBankModule = Indexer;
472
- type ModuleContext = IndexerContext;
550
+ /** Indexers that can scan and index content. */
551
+ interface IndexablePlugin extends Indexer {
552
+ index(options?: any): Promise<any>;
553
+ }
554
+ /** Indexers that can search indexed content. */
555
+ interface SearchablePlugin extends Indexer {
556
+ search(query: string, options?: any): Promise<any[]>;
557
+ }
558
+ /** Indexers that support file watch mode. */
559
+ interface WatchablePlugin extends Indexer {
560
+ onFileChange(filePath: string, event: 'create' | 'update' | 'delete'): Promise<boolean>;
561
+ watchPatterns(): string[];
562
+ }
563
+ /** Indexers that manage document collections. */
564
+ interface CollectionPlugin extends Indexer {
565
+ addCollection(collection: any): void;
566
+ removeCollection(name: string): void;
567
+ listCollections(): any[];
568
+ indexCollections(options?: any): Promise<any>;
569
+ addContext?(collection: string, path: string, context: string): void;
570
+ removeContext?(collection: string, path: string): void;
571
+ listContexts?(): any[];
572
+ }
473
573
 
474
- export { type BrainBankModule as B, Collection as C, type DocumentCollection as D, type EmbeddingProvider as E, type GitCommitRecord as G, HNSWIndex as H, type Indexer as I, type MemoryPattern as M, type ProgressCallback as P, type ResolvedConfig as R, type SearchResult as S, type VectorIndex as V, type BrainBankConfig as a, type IndexResult as b, type ContextOptions as c, type CoEditSuggestion as d, type IndexStats as e, type SearchHit as f, type CodeChunk as g, Database as h, type Reranker as i, type CollectionAddOptions as j, type CollectionItem as k, type CollectionSearchOptions as l, type DistilledStrategy as m, type DocChunk as n, type IndexerContext as o, type ModuleContext as p, type SearchResultType as q };
574
+ export { type SearchResultType as A, type BrainBankConfig as B, Collection as C, type DocumentCollection as D, type EmbeddingProvider as E, type SearchablePlugin as F, type GitCommitRecord as G, HNSWIndex as H, type Indexer as I, type LearningPattern as L, type ProgressCallback as P, type ResolvedConfig as R, type StageProgressCallback as S, type VectorIndex as V, type WatchablePlugin as W, type IndexResult as a, type SearchResult as b, type ContextOptions as c, type CoEditSuggestion as d, type IndexStats as e, type SearchHit as f, type CodeChunk as g, Database as h, type Reranker as i, type CodeResult as j, type CodeResultMetadata as k, type CollectionAddOptions as l, type CollectionItem as m, type CollectionPlugin as n, type CollectionResult as o, type CollectionSearchOptions as p, type CommitResult as q, type CommitResultMetadata as r, type DistilledStrategy as s, type DocChunk as t, type DocumentResult as u, type DocumentResultMetadata as v, type IndexablePlugin as w, type IndexerContext as x, type PatternResult as y, type PatternResultMetadata as z };