npm - brainbank - Versions diffs - 0.6.0 → 0.7.0 - Mend

brainbank 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +132 -61
package/dist/{base-B_vJSAbj.d.ts → base-3SNc_CeY.d.ts} +4 -4
package/dist/{chunk-YC4ZQLDN.js → chunk-DI3H6JVZ.js} +8 -7
package/dist/chunk-DI3H6JVZ.js.map +1 -0
package/dist/{chunk-PXK62M5W.js → chunk-FGL32LUJ.js} +4 -3
package/dist/{chunk-PXK62M5W.js.map → chunk-FGL32LUJ.js.map} +1 -1
package/dist/{chunk-HPNUMUIF.js → chunk-JRSKWF6K.js} +4 -3
package/dist/{chunk-HPNUMUIF.js.map → chunk-JRSKWF6K.js.map} +1 -1
package/dist/{chunk-C4KDZGRX.js → chunk-VQ27YUHH.js} +10 -6
package/dist/{chunk-C4KDZGRX.js.map → chunk-VQ27YUHH.js.map} +1 -1
package/dist/cli.js +90 -25
package/dist/cli.js.map +1 -1
package/dist/code.d.ts +3 -1
package/dist/code.js +1 -1
package/dist/docs.d.ts +7 -3
package/dist/docs.js +1 -1
package/dist/git.d.ts +3 -1
package/dist/git.js +1 -1
package/dist/index.d.ts +2 -2
package/dist/index.js +4 -4
package/dist/memory.d.ts +1 -1
package/dist/notes.d.ts +1 -1
package/package.json +2 -2
package/dist/chunk-YC4ZQLDN.js.map +0 -1

package/README.md CHANGED Viewed

@@ -20,29 +20,16 @@ BrainBank gives LLMs a long-term memory that persists between sessions.
 ## Why BrainBank?
-Built for a multi-repo codebase that needed unified AI context. Zero infrastructure, zero ongoing cost.
+BrainBank is a **code-aware knowledge engine** — not just a memory layer. It parses your codebase with tree-sitter ASTs, indexes git history and co-edit patterns, and makes everything searchable with hybrid vector + keyword retrieval. Optional packages add conversational memory (`@brainbank/memory`) and MCP integration (`@brainbank/mcp`).
-Most AI memory solutions (mem0, Zep, LangMem) require cloud services, external databases, or LLM calls just to store a memory. BrainBank takes a different approach:
-| | **BrainBank** | **mem0** | **Zep** | **LangMem** |
+| | **BrainBank** | **QMD** | **mem0 / Zep** | **LangChain** |
 |---|:---:|:---:|:---:|:---:|
-| Infrastructure | **SQLite file** | Vector DB + cloud | Neo4j + cloud | LangGraph Platform |
-| LLM required to write | **No**¹ | Yes | Yes | Yes |
-| Code-aware | **19 AST-parsed languages (tree-sitter), git, co-edits** | ✗ | ✗ | ✗ |
-| Custom plugins | **`.use()` plugin system** | ✗ | ✗ | ✗ |
-| Search | **Vector + BM25 + RRF** | Vector + graph² | Vector + BM25 + graph | Vector only |
-| Framework lock-in | **None** | Optional | Zep cloud | LangChain |
-| Portable | **Copy one file** | Tied to DB | Tied to cloud | Tied to platform |
-> ¹ mem0 and Zep use LLMs to auto-extract memories from raw text. BrainBank is explicit — you decide what gets stored. Less magic, more control.
->
-> ² mem0's graph store (mem0g) is available in the paid platform version.
-**In short:**
-- **Code-first** — the only memory layer that understands code structure, git history, and file co-edit relationships
-- **Framework-agnostic** — plain TypeScript, works with any agent framework (LangChain, Vercel AI SDK, custom) or none at all. Unopinionated — doesn't force you into a specific pattern
-- **$0 memory bill** — no LLM calls to extract/consolidate. You store what you want, BrainBank embeds deterministically
-- **Truly portable** — `.brainbank/brainbank.db` is a normal file. Copy it, back it up, `git lfs` it
+| Code-aware (AST) | **19 languages** (tree-sitter) | ✗ | ✗ | ✗ |
+| Git + co-edits | ✓ | ✗ | ✗ | ✗ |
+| Search | **Vector + BM25 + RRF** | Vector + reranker | Vector + graph | Vector only |
+| Infra | **SQLite file** | Local GGUF | Vector DB + cloud | Vector DB |
+| Plugins | **`.use()` builder** | ✗ | ✗ | ✗ |
+| Memory | **`@brainbank/memory`** (opt-in) | ✗ | **Core feature** | ✗ |
 ### Table of Contents
@@ -61,6 +48,7 @@ Most AI memory solutions (mem0, Zep, LangMem) require cloud services, external d
   - [Examples](#examples)
   - [Watch Mode](#watch-mode)
 - [MCP Server](#mcp-server)
+- [Project Config](#project-config)
 - [Configuration](#configuration)
   - [Embedding Providers](#embedding-providers)
   - [Reranker](#reranker)
@@ -74,7 +62,8 @@ Most AI memory solutions (mem0, Zep, LangMem) require cloud services, external d
 - [Benchmarks](#benchmarks)
   - [Search Quality: AST vs Sliding Window](#search-quality-ast-vs-sliding-window)
   - [Grammar Support](#grammar-support)
-  - [RAG Retrieval Quality](#rag-retrieval-quality) · [Full Results →](./BENCHMARKS.md)
+  - [RAG Retrieval Quality](#rag-retrieval-quality)
+    · [Full Results →](./BENCHMARKS.md)
 ---
@@ -104,17 +93,17 @@ npm install @brainbank/mcp
 ### Tree-Sitter Grammars
-BrainBank uses [tree-sitter](https://tree-sitter.github.io/) for AST-aware code chunking. **JavaScript and TypeScript grammars are included by default.** Other languages require installing the corresponding grammar package:
+BrainBank uses [tree-sitter](https://tree-sitter.github.io/) for AST-aware code chunking. **JavaScript, TypeScript, and Python grammars are included by default.** Other languages require installing the corresponding grammar package:
 ```bash
 # Install only the grammars you need
-npm install tree-sitter-python tree-sitter-go tree-sitter-rust
+npm install tree-sitter-go tree-sitter-rust tree-sitter-ruby
 ```
 If you index a file whose grammar isn't installed, BrainBank will throw a clear error:
 ```
-BrainBank: Grammar 'tree-sitter-python' is not installed. Run: npm install tree-sitter-python
+BrainBank: Grammar 'tree-sitter-go' is not installed. Run: npm install tree-sitter-go
 ```
 <details>
@@ -122,11 +111,11 @@ BrainBank: Grammar 'tree-sitter-python' is not installed. Run: npm install tree-
 | Category | Packages |
 |----------|----------|
-| **Included** | `tree-sitter-javascript`, `tree-sitter-typescript` |
+| **Included** | `tree-sitter-javascript`, `tree-sitter-typescript`, `tree-sitter-python` |
 | Web | `tree-sitter-html`, `tree-sitter-css` |
 | Systems | `tree-sitter-go`, `tree-sitter-rust`, `tree-sitter-c`, `tree-sitter-cpp`, `tree-sitter-swift` |
 | JVM | `tree-sitter-java`, `tree-sitter-kotlin`, `tree-sitter-scala` |
-| Scripting | `tree-sitter-python`, `tree-sitter-ruby`, `tree-sitter-php`, `tree-sitter-lua`, `tree-sitter-bash`, `tree-sitter-elixir` |
+| Scripting | `tree-sitter-ruby`, `tree-sitter-php`, `tree-sitter-lua`, `tree-sitter-bash`, `tree-sitter-elixir` |
 | .NET | `tree-sitter-c-sharp` |
 </details>
@@ -275,23 +264,33 @@ BrainBank uses pluggable plugins. Register only what you need with `.use()`:
 | `docs` | `brainbank/docs` | Document collections (markdown, wikis) |
 ```typescript
-import { BrainBank } from 'brainbank';
+import { BrainBank, OpenAIEmbedding } from 'brainbank';
 import { code } from 'brainbank/code';
 import { git } from 'brainbank/git';
 import { docs } from 'brainbank/docs';
-// Pick only the plugins you need
-const brain = new BrainBank({ repoPath: '.' })
-  .use(code())
-  .use(git())
-  .use(docs());
+// Each plugin can use a different embedding provider
+const brain = new BrainBank({ repoPath: '.' })       // default: local WASM (384d, free)
+  .use(code({ embeddingProvider: new OpenAIEmbedding() }))  // code: OpenAI (1536d)
+  .use(git())                                               // git: local (384d)
+  .use(docs());                                             // docs: local (384d)
 // Index code + git (incremental — only processes changes)
 await brain.index();
-// Index document collections
+// Register and index document collections
 await brain.addCollection({ name: 'wiki', path: '~/docs', pattern: '**/*.md' });
 await brain.indexDocs();
+// Dynamic collections — store anything
+const decisions = brain.collection('decisions');
+await decisions.add(
+  'Use SQLite with WAL mode instead of PostgreSQL. Portable, zero infra.',
+  { tags: ['architecture'] }
+);
+const hits = await decisions.search('why not postgres');
+brain.close();
 ```
 ### Collections
@@ -509,32 +508,72 @@ brainbank stats                             # shows all plugins
 brainbank kv search slack_messages "deploy"  # search slack data
 ```
-#### Advanced: config file
+---
-For fine-grained control, create a `.brainbank/config.ts`:
+## Project Config
-```typescript
-// .brainbank/config.ts
-export default {
-  builtins: ['code', 'docs'],   // exclude git (default: all three)
-  brainbank: {                   // BrainBank constructor options
-    dbPath: '.brainbank/brain.db',
+Drop a `.brainbank/config.json` in your repo root. Every `brainbank index` reads it automatically — no CLI flags needed.
+```jsonc
+// .brainbank/config.json
+{
+  // Which built-in plugins to load (default: all three)
+  "plugins": ["code", "git", "docs"],
+  // Per-plugin options
+  "code": {
+    "embedding": "openai",         // use OpenAI embeddings for code
+    "maxFileSize": 512000
   },
-};
+  "git": {
+    "depth": 200                    // index last 200 commits
+  },
+  "docs": {
+    "embedding": "perplexity-context",
+    "collections": [
+      { "name": "docs", "path": "./docs", "pattern": "**/*.md" },
+      { "name": "wiki", "path": "~/team-wiki", "pattern": "**/*.md", "ignore": ["drafts/**"] }
+    ]
+  },
+  // Global defaults
+  "embedding": "local",            // default for plugins without their own
+  "reranker": "qwen3"
+}
 ```
-Everything lives in `.brainbank/` — DB, config, and custom plugins:
+**Embedding keys:** `"local"` (default, free), `"openai"`, `"perplexity"`, `"perplexity-context"`.
+**Per-plugin embeddings** — each plugin creates its own HNSW index with the correct dimensions. A plugin without an `embedding` key uses the global default.
+**Docs collections** — registered automatically on every `brainbank index` run. No need for `--docs` flags.
+**Custom plugins** — auto-discovered from `.brainbank/indexers/`:
 ```
 .brainbank/
 ├── brainbank.db        # SQLite database (auto-created)
-├── config.ts           # Optional project config
-└── indexers/           # Optional custom plugin files
+├── config.json         # Project config (optional)
+└── indexers/           # Custom plugin files (optional)
     ├── slack.ts
     └── jira.ts
 ```
-No folder and no config file? The CLI uses the built-in plugins (`code`, `git`, `docs`).
+Custom plugins can also have their own config section:
+```jsonc
+{
+  "plugins": ["code", "git"],
+  "slack": { "embedding": "openai" },   // matched by plugin name
+  "jira": { "embedding": "perplexity" }
+}
+```
+**Config priority:** CLI flags > `config.json` > auto-resolve from DB > defaults.
+> `.brainbank/config.ts` (or `.js`, `.mjs`) is still supported for programmatic config with custom plugin instances. JSON is preferred for declarative setups.
+No config file? The CLI uses all built-in plugins with local embeddings — zero config required.
 ---
@@ -708,6 +747,50 @@ const brain = new BrainBank({
 | **Perplexity** | `PerplexityEmbedding` | 2560 (4b) / 1024 (0.6b) | ~100ms | $0.02/1M tokens |
 | **Perplexity Context** | `PerplexityContextEmbedding` | 2560 (4b) / 1024 (0.6b) | ~100ms | $0.06/1M tokens |
+#### How It Works
+BrainBank **auto-resolves** the embedding provider. Set it once → it's stored in the DB → every future run uses the same provider automatically.
+**Programmatic API** — pass `embeddingProvider` to the constructor:
+```typescript
+import { BrainBank, OpenAIEmbedding } from 'brainbank';
+const brain = new BrainBank({
+  repoPath: '.',
+  embeddingProvider: new OpenAIEmbedding(),  // stored in DB on first index
+});
+```
+**CLI** — use the `--embedding` flag on first index:
+```bash
+brainbank index . --embedding openai        # stores provider_key=openai in DB
+brainbank index .                            # auto-resolves openai from DB
+brainbank hsearch "auth middleware"           # uses the same provider
+```
+**MCP** — zero-config. Reads the provider from the DB automatically.
+> The provider key is persisted in the `embedding_meta` table. Priority on startup: explicit `embeddingProvider` in config > stored `provider_key` in DB > local WASM (default).
+**Per-plugin override** — each plugin can use a different embedding provider:
+```typescript
+import { BrainBank, OpenAIEmbedding } from 'brainbank';
+import { PerplexityContextEmbedding } from 'brainbank';
+import { code } from 'brainbank/code';
+import { git } from 'brainbank/git';
+import { docs } from 'brainbank/docs';
+const brain = new BrainBank({ repoPath: '.' })       // default: local WASM (384d)
+  .use(code({ embeddingProvider: new OpenAIEmbedding() }))              // code: OpenAI (1536d)
+  .use(git())                                                           // git: local (384d)
+  .use(docs({ embeddingProvider: new PerplexityContextEmbedding() }));  // docs: Perplexity (2560d)
+```
+> Each plugin creates its own HNSW index with the correct dimensions. The global `embeddingProvider` (or local default) is used for any plugin that doesn't specify one.
 #### OpenAI
 ```typescript
@@ -1104,7 +1187,7 @@ BrainBank uses **native tree-sitter** to parse source code into ASTs and extract
 For large classes (>80 lines), the chunker descends into the class body and extracts each method as a separate chunk. For unsupported languages, it falls back to a sliding window with overlap.
-> Tree-sitter grammars are **optional dependencies**. If a grammar isn't installed, that language falls back to the generic sliding window. Install only the grammars you need: `npm install tree-sitter-ruby tree-sitter-go` etc.
+> Tree-sitter grammars are **optional dependencies** (except JS and TS, which are included). If you index a file whose grammar isn't installed, BrainBank throws a clear error with the exact `npm install` command. See [Tree-Sitter Grammars](#tree-sitter-grammars) for the full list.
 ### Incremental Indexing
@@ -1270,19 +1353,7 @@ BrainBank's hybrid search pipeline (Vector + BM25 → RRF) with Perplexity Conte
 The hybrid pipeline improved R@5 by **+26pp over vector-only** retrieval on our custom eval.
-#### BrainBank vs QMD (Head-to-Head)
-Compared against [QMD](https://github.com/tobi/qmd), a local-first search engine using GGUF models (embeddinggemma-300M + query expansion + reranker) — same corpus, same 20 queries:
-| Metric | BrainBank + Reranker | QMD + Reranker |
-|---|:---:|:---:|
-| **R@5** | **83%** | 65% |
-| **MRR** | **0.57** | 0.45 |
-| **Misses** | **1/20** | 6/20 |
-> BrainBank wins by +18pp R@5. QMD is competitive on semantic queries (81% vs 94%) and ties on broad queries (83% vs 83%) — impressive for a fully local pipeline with zero API calls.
-See **[BENCHMARKS.md](./BENCHMARKS.md)** for full pipeline progression, per-technique impact, QMD comparison details, and reproduction instructions.
+See **[BENCHMARKS.md](./BENCHMARKS.md)** for full pipeline progression, per-technique impact, and reproduction instructions.
 #### Running the RAG Eval

package/dist/{base-B_vJSAbj.d.ts → base-3SNc_CeY.d.ts} RENAMED Viewed

@@ -542,12 +542,12 @@ interface PluginContext {
     embedding: EmbeddingProvider;
     /** Resolved BrainBank config. */
     config: ResolvedConfig;
-    /** Create and initialize an HNSW index. */
-    createHnsw(maxElements?: number): Promise<HNSWIndex>;
+    /** Create and initialize an HNSW index. Optionally override dims for per-plugin embeddings. */
+    createHnsw(maxElements?: number, dims?: number): Promise<HNSWIndex>;
     /** Load existing vectors from a SQLite vectors table into an HNSW index + cache. */
     loadVectors(table: string, idCol: string, hnsw: HNSWIndex, cache: Map<number, Float32Array>): void;
-    /** Get or create a shared HNSW index by type (e.g. 'code', 'git'). For multi-repo support. */
-    getOrCreateSharedHnsw(type: string, maxElements?: number): Promise<{
+    /** Get or create a shared HNSW index by type (e.g. 'code', 'git'). Optionally override dims for per-plugin embeddings. */
+    getOrCreateSharedHnsw(type: string, maxElements?: number, dims?: number): Promise<{
         hnsw: HNSWIndex;
         vecCache: Map<number, Float32Array>;
         isNew: boolean;

package/dist/{chunk-YC4ZQLDN.js → chunk-DI3H6JVZ.js} RENAMED Viewed

@@ -6,10 +6,10 @@ import {
   isIgnoredDir,
   isIgnoredFile,
   isSupported
-} from "./chunk-PXK62M5W.js";
+} from "./chunk-FGL32LUJ.js";
 import {
   rerank
-} from "./chunk-C4KDZGRX.js";
+} from "./chunk-VQ27YUHH.js";
 import {
   normalizeBM25,
   reciprocalRankFusion,
@@ -1443,8 +1443,8 @@ var Initializer = class {
       db,
       embedding,
       config,
-      createHnsw: /* @__PURE__ */ __name((maxElements) => new HNSWIndex(
-        config.embeddingDims,
+      createHnsw: /* @__PURE__ */ __name((maxElements, dims) => new HNSWIndex(
+        dims ?? config.embeddingDims,
         maxElements ?? config.maxElements,
         config.hnswM,
         config.hnswEfConstruction,
@@ -1461,11 +1461,12 @@ var Initializer = class {
           loadVectors(db, table, idCol, hnsw, cache);
         }
       }, "loadVectors"),
-      getOrCreateSharedHnsw: /* @__PURE__ */ __name(async (type, maxElements) => {
+      getOrCreateSharedHnsw: /* @__PURE__ */ __name(async (type, maxElements, dims) => {
         const existing = sharedHnsw.get(type);
         if (existing) return { ...existing, isNew: false };
+        const hnswDims = dims ?? config.embeddingDims;
         const hnsw = await new HNSWIndex(
-          config.embeddingDims,
+          hnswDims,
           maxElements ?? config.maxElements,
           config.hnswM,
           config.hnswEfConstruction,
@@ -2428,4 +2429,4 @@ export {
   ContextBuilder,
   BrainBank
 };
-//# sourceMappingURL=chunk-YC4ZQLDN.js.map
+//# sourceMappingURL=chunk-DI3H6JVZ.js.map