brainbank 0.1.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +155 -0
  3. package/assets/architecture.png +0 -0
  4. package/bin/brainbank +18 -0
  5. package/bin/brainbank-mcp +19 -0
  6. package/dist/chunk-3YBCD6DI.js +117 -0
  7. package/dist/chunk-3YBCD6DI.js.map +1 -0
  8. package/dist/chunk-63GBCDS5.js +3249 -0
  9. package/dist/chunk-63GBCDS5.js.map +1 -0
  10. package/dist/chunk-DMFMTOHF.js +123 -0
  11. package/dist/chunk-DMFMTOHF.js.map +1 -0
  12. package/dist/chunk-FQYKWB2Q.js +136 -0
  13. package/dist/chunk-FQYKWB2Q.js.map +1 -0
  14. package/dist/chunk-IMJJ2VEM.js +74 -0
  15. package/dist/chunk-IMJJ2VEM.js.map +1 -0
  16. package/dist/chunk-M744PCJQ.js +43 -0
  17. package/dist/chunk-M744PCJQ.js.map +1 -0
  18. package/dist/chunk-O3J6ZIXK.js +82 -0
  19. package/dist/chunk-O3J6ZIXK.js.map +1 -0
  20. package/dist/chunk-OPH7GZ7U.js +124 -0
  21. package/dist/chunk-OPH7GZ7U.js.map +1 -0
  22. package/dist/chunk-PXEWQMN7.js +89 -0
  23. package/dist/chunk-PXEWQMN7.js.map +1 -0
  24. package/dist/chunk-RDQYDLYZ.js +69 -0
  25. package/dist/chunk-RDQYDLYZ.js.map +1 -0
  26. package/dist/chunk-VIIHPCC4.js +254 -0
  27. package/dist/chunk-VIIHPCC4.js.map +1 -0
  28. package/dist/chunk-WCQVDF3K.js +14 -0
  29. package/dist/chunk-WCQVDF3K.js.map +1 -0
  30. package/dist/cli.d.ts +1 -0
  31. package/dist/cli.js +3076 -0
  32. package/dist/cli.js.map +1 -0
  33. package/dist/haiku-expander-YRSIPGKP.js +8 -0
  34. package/dist/haiku-expander-YRSIPGKP.js.map +1 -0
  35. package/dist/haiku-pruner-SHAXUPY6.js +8 -0
  36. package/dist/haiku-pruner-SHAXUPY6.js.map +1 -0
  37. package/dist/http-server-QUXHLWUM.js +9 -0
  38. package/dist/http-server-QUXHLWUM.js.map +1 -0
  39. package/dist/index.d.ts +2161 -0
  40. package/dist/index.js +357 -0
  41. package/dist/index.js.map +1 -0
  42. package/dist/local-embedding-NZQTILGV.js +8 -0
  43. package/dist/local-embedding-NZQTILGV.js.map +1 -0
  44. package/dist/mcp.d.ts +2 -0
  45. package/dist/mcp.js +334 -0
  46. package/dist/mcp.js.map +1 -0
  47. package/dist/openai-embedding-ZP5TSUJG.js +8 -0
  48. package/dist/openai-embedding-ZP5TSUJG.js.map +1 -0
  49. package/dist/perplexity-context-embedding-GI5PHE6X.js +9 -0
  50. package/dist/perplexity-context-embedding-GI5PHE6X.js.map +1 -0
  51. package/dist/perplexity-embedding-KZRYGJRC.js +10 -0
  52. package/dist/perplexity-embedding-KZRYGJRC.js.map +1 -0
  53. package/dist/plugin-IKQ6IRSJ.js +32 -0
  54. package/dist/plugin-IKQ6IRSJ.js.map +1 -0
  55. package/dist/resolve-ASGLBNUC.js +10 -0
  56. package/dist/resolve-ASGLBNUC.js.map +1 -0
  57. package/dist/stats-tui-ZY2NQSEA.js +1904 -0
  58. package/dist/stats-tui-ZY2NQSEA.js.map +1 -0
  59. package/package.json +96 -0
  60. package/src/brainbank.ts +617 -0
  61. package/src/cli/commands/collection.ts +77 -0
  62. package/src/cli/commands/context.ts +179 -0
  63. package/src/cli/commands/daemon.ts +100 -0
  64. package/src/cli/commands/docs.ts +71 -0
  65. package/src/cli/commands/files.ts +69 -0
  66. package/src/cli/commands/help.ts +77 -0
  67. package/src/cli/commands/index.ts +482 -0
  68. package/src/cli/commands/kv.ts +140 -0
  69. package/src/cli/commands/mcp-export.ts +273 -0
  70. package/src/cli/commands/mcp.ts +6 -0
  71. package/src/cli/commands/reembed.ts +30 -0
  72. package/src/cli/commands/scan.ts +336 -0
  73. package/src/cli/commands/search.ts +203 -0
  74. package/src/cli/commands/stats.ts +68 -0
  75. package/src/cli/commands/status.ts +47 -0
  76. package/src/cli/commands/watch.ts +47 -0
  77. package/src/cli/factory/brain-context.ts +43 -0
  78. package/src/cli/factory/builtin-registration.ts +87 -0
  79. package/src/cli/factory/config-loader.ts +77 -0
  80. package/src/cli/factory/index.ts +69 -0
  81. package/src/cli/factory/plugin-loader.ts +325 -0
  82. package/src/cli/index.ts +71 -0
  83. package/src/cli/server-client.ts +178 -0
  84. package/src/cli/tui/index-tui.tsx +667 -0
  85. package/src/cli/tui/stats-data.ts +523 -0
  86. package/src/cli/tui/stats-search.ts +262 -0
  87. package/src/cli/tui/stats-tui.tsx +1465 -0
  88. package/src/cli/tui/tree-scanner.ts +650 -0
  89. package/src/cli/utils.ts +137 -0
  90. package/src/config.ts +49 -0
  91. package/src/constants.ts +21 -0
  92. package/src/db/adapter.ts +112 -0
  93. package/src/db/metadata.ts +130 -0
  94. package/src/db/migrations.ts +66 -0
  95. package/src/db/sqlite-adapter.ts +218 -0
  96. package/src/db/tracker.ts +91 -0
  97. package/src/engine/index-api.ts +81 -0
  98. package/src/engine/reembed.ts +206 -0
  99. package/src/engine/search-api.ts +218 -0
  100. package/src/index.ts +154 -0
  101. package/src/lib/fts.ts +57 -0
  102. package/src/lib/languages.ts +180 -0
  103. package/src/lib/logger.ts +126 -0
  104. package/src/lib/math.ts +87 -0
  105. package/src/lib/provider-key.ts +20 -0
  106. package/src/lib/prune.ts +71 -0
  107. package/src/lib/rrf.ts +133 -0
  108. package/src/lib/write-lock.ts +108 -0
  109. package/src/mcp/mcp-server.ts +195 -0
  110. package/src/mcp/workspace-factory.ts +68 -0
  111. package/src/mcp/workspace-pool.ts +224 -0
  112. package/src/plugin.ts +381 -0
  113. package/src/providers/embeddings/embedding-worker-thread.ts +95 -0
  114. package/src/providers/embeddings/embedding-worker.ts +141 -0
  115. package/src/providers/embeddings/local-embedding.ts +115 -0
  116. package/src/providers/embeddings/openai-embedding.ts +167 -0
  117. package/src/providers/embeddings/perplexity-context-embedding.ts +195 -0
  118. package/src/providers/embeddings/perplexity-embedding.ts +165 -0
  119. package/src/providers/embeddings/resolve.ts +34 -0
  120. package/src/providers/pruners/haiku-expander.ts +166 -0
  121. package/src/providers/pruners/haiku-pruner.ts +112 -0
  122. package/src/providers/vector/hnsw-index.ts +174 -0
  123. package/src/providers/vector/hnsw-loader.ts +129 -0
  124. package/src/search/bm25-boost.ts +69 -0
  125. package/src/search/context-builder.ts +251 -0
  126. package/src/search/keyword/composite-bm25-search.ts +47 -0
  127. package/src/search/types.ts +37 -0
  128. package/src/search/vector/composite-vector-search.ts +61 -0
  129. package/src/search/vector/mmr.ts +64 -0
  130. package/src/services/collection.ts +384 -0
  131. package/src/services/daemon.ts +87 -0
  132. package/src/services/http-server.ts +336 -0
  133. package/src/services/kv-service.ts +64 -0
  134. package/src/services/plugin-registry.ts +77 -0
  135. package/src/services/watch.ts +340 -0
  136. package/src/services/webhook-server.ts +100 -0
  137. package/src/types.ts +493 -0
@@ -0,0 +1,218 @@
1
+ /**
2
+ * BrainBank — SQLite Adapter
3
+ *
4
+ * Implements `DatabaseAdapter` using Node.js built-in `node:sqlite`.
5
+ * Zero native addons — no ABI issues across Node versions.
6
+ * Handles WAL mode, directory creation, schema init, and transactions.
7
+ */
8
+
9
+ import type { DatabaseAdapter, AdapterCapabilities, PreparedStatement, ExecuteResult } from './adapter.ts';
10
+ import type { DatabaseSync as DatabaseSyncType, StatementSync as StatementSyncType } from 'node:sqlite';
11
+
12
+ import { DatabaseSync } from 'node:sqlite';
13
+ import * as fs from 'node:fs';
14
+ import * as path from 'node:path';
15
+
16
+
17
+ // ── Schema ──────────────────────────────────────────────────────────
18
+
19
+ export const SCHEMA_VERSION = 9;
20
+
21
+ /**
22
+ * Create core tables and indices.
23
+ * Safe to call multiple times — uses IF NOT EXISTS.
24
+ * Domain tables are created by plugins via runPluginMigrations().
25
+ */
26
+ function createSchema(adapter: DatabaseAdapter): void {
27
+ adapter.exec(`
28
+ -- ── Schema versioning ──────────────────────────
29
+ CREATE TABLE IF NOT EXISTS schema_version (
30
+ version INTEGER PRIMARY KEY,
31
+ applied_at INTEGER NOT NULL DEFAULT (unixepoch())
32
+ );
33
+ INSERT OR IGNORE INTO schema_version (version) VALUES (${SCHEMA_VERSION});
34
+
35
+ -- ── Plugin Versions (migration tracking) ──────
36
+ CREATE TABLE IF NOT EXISTS plugin_versions (
37
+ plugin_name TEXT PRIMARY KEY,
38
+ version INTEGER NOT NULL,
39
+ applied_at INTEGER NOT NULL DEFAULT (unixepoch())
40
+ );
41
+
42
+ -- ── Dynamic Collections (KV Store) ───────────
43
+ CREATE TABLE IF NOT EXISTS kv_data (
44
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
45
+ collection TEXT NOT NULL,
46
+ content TEXT NOT NULL,
47
+ meta_json TEXT NOT NULL DEFAULT '{}',
48
+ tags_json TEXT NOT NULL DEFAULT '[]',
49
+ expires_at INTEGER,
50
+ created_at INTEGER NOT NULL DEFAULT (unixepoch())
51
+ );
52
+
53
+ CREATE TABLE IF NOT EXISTS kv_vectors (
54
+ data_id INTEGER PRIMARY KEY REFERENCES kv_data(id) ON DELETE CASCADE,
55
+ embedding BLOB NOT NULL
56
+ );
57
+
58
+ CREATE VIRTUAL TABLE IF NOT EXISTS fts_kv USING fts5(
59
+ content,
60
+ collection,
61
+ content='kv_data',
62
+ content_rowid='id',
63
+ tokenize='porter unicode61'
64
+ );
65
+
66
+ CREATE TRIGGER IF NOT EXISTS trg_fts_kv_insert AFTER INSERT ON kv_data BEGIN
67
+ INSERT INTO fts_kv(rowid, content, collection)
68
+ VALUES (new.id, new.content, new.collection);
69
+ END;
70
+ CREATE TRIGGER IF NOT EXISTS trg_fts_kv_delete AFTER DELETE ON kv_data BEGIN
71
+ INSERT INTO fts_kv(fts_kv, rowid, content, collection)
72
+ VALUES ('delete', old.id, old.content, old.collection);
73
+ END;
74
+
75
+ CREATE INDEX IF NOT EXISTS idx_kv_collection ON kv_data(collection);
76
+ CREATE INDEX IF NOT EXISTS idx_kv_created ON kv_data(created_at DESC);
77
+
78
+ -- ── Embedding Metadata ───────────────────────
79
+ CREATE TABLE IF NOT EXISTS embedding_meta (
80
+ key TEXT PRIMARY KEY,
81
+ value TEXT NOT NULL
82
+ );
83
+
84
+ -- ── Index State (cross-process coordination) ─
85
+ CREATE TABLE IF NOT EXISTS index_state (
86
+ name TEXT PRIMARY KEY,
87
+ version INTEGER NOT NULL DEFAULT 0,
88
+ writer_pid INTEGER NOT NULL DEFAULT 0,
89
+ updated_at INTEGER NOT NULL DEFAULT (unixepoch())
90
+ );
91
+
92
+ -- ── Plugin Tracking (incremental indexing) ────
93
+ CREATE TABLE IF NOT EXISTS plugin_tracking (
94
+ plugin TEXT NOT NULL,
95
+ key TEXT NOT NULL,
96
+ content_hash TEXT NOT NULL,
97
+ indexed_at INTEGER NOT NULL DEFAULT (unixepoch()),
98
+ PRIMARY KEY (plugin, key)
99
+ );
100
+ `);
101
+ }
102
+
103
+ /** Get the current schema version from the database. */
104
+ export function getSchemaVersion(adapter: DatabaseAdapter): number {
105
+ try {
106
+ const row = adapter.prepare('SELECT MAX(version) as v FROM schema_version').get() as { v: number } | undefined;
107
+ return row?.v ?? 0;
108
+ } catch {
109
+ return 0;
110
+ }
111
+ }
112
+
113
+
114
+ // ── Statement Wrapper ───────────────────────────────────────────────
115
+
116
+ /** SQLite parameter type accepted by node:sqlite. */
117
+ type SqlParam = string | number | bigint | null | Uint8Array;
118
+
119
+ /** Wraps a `node:sqlite` StatementSync into a `PreparedStatement<T>`. */
120
+ function wrapStatement<T>(stmt: StatementSyncType): PreparedStatement<T> {
121
+ return {
122
+ get(...params: unknown[]): T | undefined {
123
+ return stmt.get(...(params as SqlParam[])) as T | undefined;
124
+ },
125
+ all(...params: unknown[]): T[] {
126
+ return stmt.all(...(params as SqlParam[])) as T[];
127
+ },
128
+ run(...params: unknown[]): ExecuteResult {
129
+ const info = stmt.run(...(params as SqlParam[]));
130
+ return {
131
+ lastInsertRowid: info.lastInsertRowid,
132
+ changes: Number(info.changes),
133
+ };
134
+ },
135
+ iterate(...params: unknown[]): IterableIterator<T> {
136
+ return stmt.iterate(...(params as SqlParam[])) as IterableIterator<T>;
137
+ },
138
+ };
139
+ }
140
+
141
+
142
+ // ── SQLiteAdapter ───────────────────────────────────────────────────
143
+
144
+ export class SQLiteAdapter implements DatabaseAdapter {
145
+ private _db: DatabaseSyncType;
146
+
147
+ readonly capabilities: AdapterCapabilities = {
148
+ fts: 'fts5',
149
+ upsert: 'or-replace',
150
+ json: true,
151
+ vectors: false,
152
+ };
153
+
154
+ constructor(dbPath: string) {
155
+ // Ensure parent directory exists
156
+ const dir = path.dirname(dbPath);
157
+ if (!fs.existsSync(dir)) {
158
+ fs.mkdirSync(dir, { recursive: true });
159
+ }
160
+
161
+ this._db = new DatabaseSync(dbPath);
162
+ this._db.exec('PRAGMA journal_mode = WAL');
163
+ this._db.exec('PRAGMA busy_timeout = 5000');
164
+ this._db.exec('PRAGMA synchronous = NORMAL');
165
+ this._db.exec('PRAGMA foreign_keys = ON');
166
+
167
+ // Initialize schema
168
+ createSchema(this);
169
+ }
170
+
171
+ /** Prepare a reusable statement. */
172
+ prepare<T = unknown>(sql: string): PreparedStatement<T> {
173
+ return wrapStatement<T>(this._db.prepare(sql));
174
+ }
175
+
176
+ /** Execute raw SQL (no results). */
177
+ exec(sql: string): void {
178
+ this._db.exec(sql);
179
+ }
180
+
181
+ /** Run a function inside a transaction. Auto-commits on success, auto-rollbacks on error. */
182
+ transaction<T>(fn: () => T): T {
183
+ this._db.exec('BEGIN');
184
+ try {
185
+ const result = fn();
186
+ this._db.exec('COMMIT');
187
+ return result;
188
+ } catch (err) {
189
+ this._db.exec('ROLLBACK');
190
+ throw err;
191
+ }
192
+ }
193
+
194
+ /** Run a prepared statement on multiple rows. Wraps in a single transaction. */
195
+ batch<T extends unknown[]>(sql: string, rows: T[]): void {
196
+ const stmt = this._db.prepare(sql);
197
+ this.transaction(() => {
198
+ for (const row of rows) {
199
+ stmt.run(...(row as SqlParam[]));
200
+ }
201
+ });
202
+ }
203
+
204
+ /** Close the database. */
205
+ close(): void {
206
+ this._db.close();
207
+ }
208
+
209
+ /**
210
+ * Access the underlying `node:sqlite` DatabaseSync instance.
211
+ *
212
+ * @deprecated Use `DatabaseAdapter` methods instead. This exists
213
+ * only for gradual migration of plugins that depend on driver internals.
214
+ */
215
+ raw<T = unknown>(): T {
216
+ return this._db as unknown as T;
217
+ }
218
+ }
@@ -0,0 +1,91 @@
1
+ /**
2
+ * BrainBank — Incremental Tracker
3
+ *
4
+ * Standardized helper for plugins to detect add/update/delete during indexing.
5
+ * Uses a shared `plugin_tracking` table with per-plugin namespacing.
6
+ *
7
+ * Usage in a plugin:
8
+ * const tracker = ctx.createTracker(); // uses plugin name
9
+ * for (const file of files) {
10
+ * const hash = sha256(content);
11
+ * if (tracker.isUnchanged(file, hash)) { skipped++; continue; }
12
+ * indexFile(file, content);
13
+ * tracker.markIndexed(file, hash);
14
+ * }
15
+ * const orphans = tracker.findOrphans(new Set(files));
16
+ * for (const key of orphans) { removeData(key); tracker.remove(key); }
17
+ */
18
+
19
+ import type { DatabaseAdapter } from './adapter.ts';
20
+
21
+ /** Incremental index tracker — detects add/update/delete for plugin files. */
22
+ export interface IncrementalTracker {
23
+ /** Check if a key's content is unchanged. Returns true if the hash matches (skip indexing). */
24
+ isUnchanged(key: string, contentHash: string): boolean;
25
+
26
+ /** Mark a key as successfully indexed with the given hash. Call after indexing completes. */
27
+ markIndexed(key: string, contentHash: string): void;
28
+
29
+ /** Find tracked keys that are NOT in the current set. Returns keys to delete. */
30
+ findOrphans(currentKeys: Set<string>): string[];
31
+
32
+ /** Remove tracking for a key. Call after cleaning up the key's data. */
33
+ remove(key: string): void;
34
+
35
+ /** Remove all tracking entries for this plugin. */
36
+ clear(): void;
37
+ }
38
+
39
+ /** Create tracking table. Called during core schema init. */
40
+ export function createTrackingTable(db: DatabaseAdapter): void {
41
+ db.exec(`
42
+ CREATE TABLE IF NOT EXISTS plugin_tracking (
43
+ plugin TEXT NOT NULL,
44
+ key TEXT NOT NULL,
45
+ content_hash TEXT NOT NULL,
46
+ indexed_at INTEGER NOT NULL DEFAULT (unixepoch()),
47
+ PRIMARY KEY (plugin, key)
48
+ );
49
+ `);
50
+ }
51
+
52
+ /** Create an IncrementalTracker scoped to a plugin name. */
53
+ export function createTracker(db: DatabaseAdapter, pluginName: string): IncrementalTracker {
54
+ return {
55
+ isUnchanged(key: string, contentHash: string): boolean {
56
+ const row = db.prepare(
57
+ 'SELECT content_hash FROM plugin_tracking WHERE plugin = ? AND key = ?'
58
+ ).get(pluginName, key) as { content_hash: string } | undefined;
59
+ return row?.content_hash === contentHash;
60
+ },
61
+
62
+ markIndexed(key: string, contentHash: string): void {
63
+ db.prepare(`
64
+ INSERT INTO plugin_tracking (plugin, key, content_hash)
65
+ VALUES (?, ?, ?)
66
+ ON CONFLICT(plugin, key) DO UPDATE SET
67
+ content_hash = excluded.content_hash,
68
+ indexed_at = unixepoch()
69
+ `).run(pluginName, key, contentHash);
70
+ },
71
+
72
+ findOrphans(currentKeys: Set<string>): string[] {
73
+ const rows = db.prepare(
74
+ 'SELECT key FROM plugin_tracking WHERE plugin = ?'
75
+ ).all(pluginName) as { key: string }[];
76
+ return rows.filter(r => !currentKeys.has(r.key)).map(r => r.key);
77
+ },
78
+
79
+ remove(key: string): void {
80
+ db.prepare(
81
+ 'DELETE FROM plugin_tracking WHERE plugin = ? AND key = ?'
82
+ ).run(pluginName, key);
83
+ },
84
+
85
+ clear(): void {
86
+ db.prepare(
87
+ 'DELETE FROM plugin_tracking WHERE plugin = ?'
88
+ ).run(pluginName);
89
+ },
90
+ };
91
+ }
@@ -0,0 +1,81 @@
1
+ /**
2
+ * BrainBank — Index API
3
+ *
4
+ * Orchestrates indexing across all registered plugins.
5
+ * Plugin-agnostic — uses capability interfaces to discover what can be indexed.
6
+ *
7
+ * After each plugin finishes indexing, bumps the version in `index_state`
8
+ * and saves HNSW indices to disk (with cross-process file locking).
9
+ */
10
+
11
+ import type { DatabaseAdapter } from '@/db/adapter.ts';
12
+ import type { HNSWIndex } from '@/providers/vector/hnsw-index.ts';
13
+ import type { PluginRegistry } from '@/services/plugin-registry.ts';
14
+ import type { IndexResult, StageProgressCallback } from '@/types.ts';
15
+
16
+ import { bumpVersion } from '@/db/metadata.ts';
17
+ import { isIndexable } from '@/plugin.ts';
18
+ import { saveAllHnsw } from '@/providers/vector/hnsw-loader.ts';
19
+
20
+ /** Deps injected by BrainBank at init time. */
21
+ export interface IndexDeps {
22
+ db: DatabaseAdapter;
23
+ dbPath: string;
24
+ sharedHnsw: Map<string, { hnsw: HNSWIndex; vecCache: Map<number, Float32Array> }>;
25
+ kvHnsw: HNSWIndex;
26
+ registry: PluginRegistry;
27
+ emit: (event: string, data: unknown) => void;
28
+ }
29
+
30
+ /** Merge two `IndexResult` values, accumulating counts. */
31
+ function mergeResult(acc: IndexResult | undefined, r: IndexResult): IndexResult {
32
+ if (!acc) return { ...r };
33
+ return {
34
+ indexed: acc.indexed + r.indexed,
35
+ skipped: acc.skipped + r.skipped,
36
+ chunks: (acc.chunks ?? 0) + (r.chunks ?? 0),
37
+ };
38
+ }
39
+
40
+ /** Run indexing across all indexable plugins. Filter with `modules` (base types). */
41
+ export async function runIndex(deps: IndexDeps, options: {
42
+ modules?: string[];
43
+ forceReindex?: boolean;
44
+ onProgress?: StageProgressCallback;
45
+ /** Plugin-specific options forwarded to `IndexablePlugin.index()`. */
46
+ pluginOptions?: Record<string, unknown>;
47
+ } = {}): Promise<Record<string, unknown>> {
48
+ const want = options.modules ? new Set(options.modules) : null;
49
+ const results: Record<string, unknown> = {};
50
+
51
+ for (const mod of deps.registry.all) {
52
+ if (want && !want.has(mod.name)) continue;
53
+ if (!isIndexable(mod)) continue;
54
+
55
+ const label = mod.name;
56
+ options.onProgress?.(label, 'Starting...');
57
+
58
+ const r = await mod.index({
59
+ forceReindex: options.forceReindex,
60
+ onProgress: (msg: string, cur: number, total: number) =>
61
+ options.onProgress?.(label, `[${cur}/${total}] ${msg}`),
62
+ ...options.pluginOptions,
63
+ });
64
+
65
+ results[mod.name] = mergeResult(results[mod.name] as IndexResult | undefined, r);
66
+
67
+ // Bump version per plugin name (= HNSW key) so hot-reload resolves correctly.
68
+ bumpVersion(deps.db, mod.name);
69
+ }
70
+
71
+ // Save HNSW indices with file locking after all plugins complete
72
+ await saveAllHnsw(
73
+ deps.dbPath,
74
+ deps.kvHnsw,
75
+ deps.sharedHnsw,
76
+ new Map(),
77
+ );
78
+
79
+ deps.emit('indexed', results);
80
+ return results;
81
+ }
@@ -0,0 +1,206 @@
1
+ /**
2
+ * BrainBank — Re-embedding Engine
3
+ *
4
+ * Regenerates all vectors without re-indexing.
5
+ * Reads existing text from SQLite, embeds with the current provider,
6
+ * and replaces vector BLOBs. No file I/O, no git parsing, no re-chunking.
7
+ *
8
+ * Usage:
9
+ * const result = await brain.reembed({ onProgress });
10
+ * // → { code: 1200, git: 500, docs: 80, kv: 45, total: 1837 }
11
+ */
12
+
13
+ import type { DatabaseAdapter, CountRow, VectorRow } from '@/db/adapter.ts';
14
+ import type { Plugin, ReembedTable } from '@/plugin.ts';
15
+ import type { HNSWIndex } from '@/providers/vector/hnsw-index.ts';
16
+ import type { EmbeddingProvider, ProgressCallback } from '@/types.ts';
17
+
18
+ import { setEmbeddingMeta } from '@/db/metadata.ts';
19
+ import { vecToBuffer } from '@/lib/math.ts';
20
+ import { isReembeddable } from '@/plugin.ts';
21
+ import { saveAllHnsw } from '@/providers/vector/hnsw-loader.ts';
22
+
23
+
24
+ const CORE_TABLES: ReembedTable[] = [
25
+ {
26
+ name: 'kv',
27
+ textTable: 'kv_data',
28
+ vectorTable: 'kv_vectors',
29
+ idColumn: 'id',
30
+ fkColumn: 'data_id',
31
+ textBuilder: (r) => String(r.content),
32
+ },
33
+ ];
34
+
35
+ /** Collect reembed tables from plugins + core. Deduplicates by vectorTable. */
36
+ function collectTables(plugins: Plugin[]): ReembedTable[] {
37
+ const byVectorTable = new Map<string, ReembedTable>();
38
+ for (const p of plugins) {
39
+ if (isReembeddable(p)) {
40
+ const config = p.reembedConfig();
41
+ byVectorTable.set(config.vectorTable, config);
42
+ }
43
+ }
44
+ for (const t of CORE_TABLES) {
45
+ byVectorTable.set(t.vectorTable, t);
46
+ }
47
+ return [...byVectorTable.values()];
48
+ }
49
+
50
+
51
+ export interface ReembedResult {
52
+ /** Per-table vector counts. Keys are table names (e.g. 'code', 'git', 'docs', 'kv'). */
53
+ counts: Record<string, number>;
54
+ total: number;
55
+ }
56
+
57
+ export interface ReembedOptions {
58
+ /** Progress callback: (tableName, current, total) */
59
+ onProgress?: ProgressCallback;
60
+ /** Batch size for embedBatch. Default: 50 */
61
+ batchSize?: number;
62
+ }
63
+
64
+
65
+ /**
66
+ * Re-embed all existing text with the current embedding provider.
67
+ * Does NOT re-parse files, git, or documents — only replaces vectors.
68
+ */
69
+ export async function reembedAll(
70
+ db: DatabaseAdapter,
71
+ embedding: EmbeddingProvider,
72
+ hnswMap: Map<string, { hnsw: HNSWIndex; vecs: Map<number, Float32Array> }>,
73
+ plugins: Plugin[],
74
+ options: ReembedOptions = {},
75
+ persist?: {
76
+ dbPath: string;
77
+ kvHnsw: HNSWIndex;
78
+ sharedHnsw: Map<string, { hnsw: HNSWIndex; vecCache: Map<number, Float32Array> }>;
79
+ },
80
+ ): Promise<ReembedResult> {
81
+ const { batchSize = 50, onProgress } = options;
82
+ const tables = collectTables(plugins);
83
+ const counts: Record<string, number> = {};
84
+ let total = 0;
85
+
86
+ for (const table of tables) {
87
+ // Skip tables that don't exist (plugin not installed)
88
+ try {
89
+ const textExists = (db.prepare(
90
+ `SELECT COUNT(*) as c FROM sqlite_master WHERE type='table' AND name=?`
91
+ ).get(table.textTable) as CountRow).c;
92
+ const vecExists = (db.prepare(
93
+ `SELECT COUNT(*) as c FROM sqlite_master WHERE type='table' AND name=?`
94
+ ).get(table.vectorTable) as CountRow).c;
95
+ if (!textExists || !vecExists) continue;
96
+ } catch (e: unknown) {
97
+ if (e instanceof Error && e.message.includes('no such table')) continue;
98
+ throw e;
99
+ }
100
+
101
+ const count = await reembedTable(db, embedding, table, batchSize, onProgress);
102
+ counts[table.name] = count;
103
+ total += count;
104
+
105
+ // Rebuild HNSW if available
106
+ const entry = hnswMap.get(table.name);
107
+ if (entry && count > 0) {
108
+ await rebuildHnsw(db, table, entry.hnsw, entry.vecs);
109
+ }
110
+ }
111
+
112
+ // Persist provider metadata + HNSW indexes to disk
113
+ setEmbeddingMeta(db, embedding);
114
+ if (persist) {
115
+ saveAllHnsw(persist.dbPath, persist.kvHnsw, persist.sharedHnsw, new Map());
116
+ }
117
+
118
+ return {
119
+ counts,
120
+ total,
121
+ };
122
+ }
123
+
124
+ /**
125
+ * Re-embed a single table. Returns count of vectors regenerated.
126
+ *
127
+ * Streams per-batch to avoid OOM on large tables — memory stays O(batchSize).
128
+ * Tradeoff: if embedBatch fails mid-way, partial vectors exist. Reembed is
129
+ * a destructive operation by design — re-run to completion if interrupted.
130
+ */
131
+ async function reembedTable(
132
+ db: DatabaseAdapter,
133
+ embedding: EmbeddingProvider,
134
+ table: ReembedTable,
135
+ batchSize: number,
136
+ onProgress?: ProgressCallback,
137
+ ): Promise<number> {
138
+ const totalCount = (db.prepare(
139
+ `SELECT COUNT(*) as c FROM ${table.textTable}`
140
+ ).get() as CountRow).c;
141
+
142
+ if (totalCount === 0) return 0;
143
+
144
+ // Phase 1: Build new vectors in a temp table (safe — old data untouched)
145
+ const tempTable = `_reembed_${table.vectorTable}`;
146
+ db.exec(`DROP TABLE IF EXISTS ${tempTable}`);
147
+ db.exec(`CREATE TABLE ${tempTable} AS SELECT * FROM ${table.vectorTable} WHERE 0`);
148
+
149
+ const insertTemp = db.prepare(
150
+ `INSERT INTO ${tempTable} (${table.fkColumn}, embedding) VALUES (?, ?)`
151
+ );
152
+
153
+ let processed = 0;
154
+ try {
155
+ for (let offset = 0; offset < totalCount; offset += batchSize) {
156
+ const batch = db.prepare(
157
+ `SELECT * FROM ${table.textTable} LIMIT ? OFFSET ?`
158
+ ).all(batchSize, offset) as Record<string, unknown>[];
159
+ const texts = batch.map(r => table.textBuilder(r));
160
+ const vectors = await embedding.embedBatch(texts);
161
+
162
+ db.transaction(() => {
163
+ for (let j = 0; j < batch.length; j++) {
164
+ insertTemp.run(batch[j][table.idColumn], vecToBuffer(vectors[j]));
165
+ }
166
+ });
167
+
168
+ processed += batch.length;
169
+ onProgress?.(table.name, processed, totalCount);
170
+ }
171
+
172
+ // Phase 2: Atomic swap — all or nothing
173
+ db.transaction(() => {
174
+ db.exec(`DELETE FROM ${table.vectorTable}`);
175
+ db.exec(`INSERT INTO ${table.vectorTable} SELECT * FROM ${tempTable}`);
176
+ });
177
+ } finally {
178
+ // Always clean up temp table — even if embedBatch fails mid-batch
179
+ db.exec(`DROP TABLE IF EXISTS ${tempTable}`);
180
+ }
181
+
182
+ return processed;
183
+ }
184
+
185
+ /** Rebuild HNSW index from vector table. */
186
+ async function rebuildHnsw(
187
+ db: DatabaseAdapter,
188
+ table: ReembedTable,
189
+ hnsw: HNSWIndex,
190
+ vecs: Map<number, Float32Array>,
191
+ ): Promise<void> {
192
+ // Wipe stale vectors before repopulating
193
+ vecs.clear();
194
+ hnsw.reinit();
195
+
196
+ const rows = db.prepare(
197
+ `SELECT ${table.fkColumn} as id, embedding FROM ${table.vectorTable}`
198
+ ).all() as VectorRow[];
199
+
200
+ for (const row of rows) {
201
+ const emb = row.embedding;
202
+ const vec = new Float32Array(emb.buffer.slice(emb.byteOffset, emb.byteOffset + emb.byteLength));
203
+ hnsw.add(vec, row.id);
204
+ vecs.set(row.id, vec);
205
+ }
206
+ }