brainbank 0.1.3 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -12
- package/dist/{types-Da_zLLOl.d.ts → base-9vfWRHCV.d.ts} +131 -31
- package/dist/{chunk-TW5NTYYZ.js → chunk-6MFTQV3O.js} +909 -685
- package/dist/chunk-6MFTQV3O.js.map +1 -0
- package/dist/chunk-7JCEW7LT.js +266 -0
- package/dist/chunk-7JCEW7LT.js.map +1 -0
- package/dist/{chunk-GOUBW7UA.js → chunk-F6SJ3U4H.js} +98 -34
- package/dist/chunk-F6SJ3U4H.js.map +1 -0
- package/dist/{chunk-MJ3Y24H6.js → chunk-FJJY4H2Y.js} +11 -11
- package/dist/chunk-FJJY4H2Y.js.map +1 -0
- package/dist/{chunk-3GAIDXRW.js → chunk-GUT5MSJT.js} +5 -11
- package/dist/chunk-GUT5MSJT.js.map +1 -0
- package/dist/{chunk-2P3EGY6S.js → chunk-QNHBCOKB.js} +2 -2
- package/dist/chunk-QNHBCOKB.js.map +1 -0
- package/dist/{chunk-4ZKBQ33J.js → chunk-V4UJKXPK.js} +23 -5
- package/dist/chunk-V4UJKXPK.js.map +1 -0
- package/dist/{chunk-RAEBYV75.js → chunk-WR4WXKJT.js} +37 -23
- package/dist/chunk-WR4WXKJT.js.map +1 -0
- package/dist/{chunk-Z5SU54HP.js → chunk-X6645UVR.js} +3 -3
- package/dist/chunk-X6645UVR.js.map +1 -0
- package/dist/cli.js +122 -102
- package/dist/cli.js.map +1 -1
- package/dist/code.d.ts +5 -5
- package/dist/code.js +1 -1
- package/dist/docs.d.ts +4 -6
- package/dist/docs.js +1 -1
- package/dist/git.d.ts +5 -5
- package/dist/git.js +1 -1
- package/dist/index.d.ts +54 -90
- package/dist/index.js +13 -13
- package/dist/memory.d.ts +5 -7
- package/dist/memory.js +9 -12
- package/dist/memory.js.map +1 -1
- package/dist/notes.d.ts +4 -6
- package/dist/notes.js +7 -10
- package/dist/notes.js.map +1 -1
- package/dist/{openai-PCTYLOWI.js → openai-CYDMYX7X.js} +2 -2
- package/package.json +2 -2
- package/dist/chunk-2P3EGY6S.js.map +0 -1
- package/dist/chunk-3GAIDXRW.js.map +0 -1
- package/dist/chunk-4ZKBQ33J.js.map +0 -1
- package/dist/chunk-GOUBW7UA.js.map +0 -1
- package/dist/chunk-MJ3Y24H6.js.map +0 -1
- package/dist/chunk-N6ZMBFDE.js +0 -224
- package/dist/chunk-N6ZMBFDE.js.map +0 -1
- package/dist/chunk-RAEBYV75.js.map +0 -1
- package/dist/chunk-TW5NTYYZ.js.map +0 -1
- package/dist/chunk-Z5SU54HP.js.map +0 -1
- /package/dist/{openai-PCTYLOWI.js.map → openai-CYDMYX7X.js.map} +0 -0
|
@@ -2,18 +2,20 @@ import {
|
|
|
2
2
|
isIgnoredDir,
|
|
3
3
|
isIgnoredFile,
|
|
4
4
|
isSupported
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-WR4WXKJT.js";
|
|
6
6
|
import {
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
normalizeBM25,
|
|
8
|
+
reciprocalRankFusion,
|
|
9
|
+
sanitizeFTS
|
|
10
|
+
} from "./chunk-V4UJKXPK.js";
|
|
9
11
|
import {
|
|
10
12
|
cosineSimilarity
|
|
11
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-QNHBCOKB.js";
|
|
12
14
|
import {
|
|
13
15
|
__name
|
|
14
16
|
} from "./chunk-7QVYU63E.js";
|
|
15
17
|
|
|
16
|
-
// src/
|
|
18
|
+
// src/config/defaults.ts
|
|
17
19
|
import * as path from "path";
|
|
18
20
|
var DEFAULTS = {
|
|
19
21
|
repoPath: ".",
|
|
@@ -49,7 +51,262 @@ function resolveConfig(partial = {}) {
|
|
|
49
51
|
}
|
|
50
52
|
__name(resolveConfig, "resolveConfig");
|
|
51
53
|
|
|
52
|
-
// src/
|
|
54
|
+
// src/app/collection.ts
|
|
55
|
+
var Collection = class {
|
|
56
|
+
constructor(_name, _db, _embedding, _hnsw, _vecs, _reranker) {
|
|
57
|
+
this._name = _name;
|
|
58
|
+
this._db = _db;
|
|
59
|
+
this._embedding = _embedding;
|
|
60
|
+
this._hnsw = _hnsw;
|
|
61
|
+
this._vecs = _vecs;
|
|
62
|
+
this._reranker = _reranker;
|
|
63
|
+
}
|
|
64
|
+
static {
|
|
65
|
+
__name(this, "Collection");
|
|
66
|
+
}
|
|
67
|
+
/** Collection name. */
|
|
68
|
+
get name() {
|
|
69
|
+
return this._name;
|
|
70
|
+
}
|
|
71
|
+
/** Add an item. Returns its ID. */
|
|
72
|
+
async add(content, options = {}) {
|
|
73
|
+
const opts = "tags" in options || "ttl" in options || "metadata" in options ? options : { metadata: options };
|
|
74
|
+
const metadata = opts.metadata ?? {};
|
|
75
|
+
const tags = opts.tags ?? [];
|
|
76
|
+
const expiresAt = opts.ttl ? Math.floor(Date.now() / 1e3) + parseDuration(opts.ttl) : null;
|
|
77
|
+
const vec = await this._embedding.embed(content);
|
|
78
|
+
const result = this._db.prepare(
|
|
79
|
+
"INSERT INTO kv_data (collection, content, meta_json, tags_json, expires_at) VALUES (?, ?, ?, ?, ?)"
|
|
80
|
+
).run(this._name, content, JSON.stringify(metadata), JSON.stringify(tags), expiresAt);
|
|
81
|
+
const id = Number(result.lastInsertRowid);
|
|
82
|
+
this._db.prepare(
|
|
83
|
+
"INSERT INTO kv_vectors (data_id, embedding) VALUES (?, ?)"
|
|
84
|
+
).run(id, Buffer.from(vec.buffer));
|
|
85
|
+
this._hnsw.add(vec, id);
|
|
86
|
+
this._vecs.set(id, vec);
|
|
87
|
+
return id;
|
|
88
|
+
}
|
|
89
|
+
/** Add multiple items. Returns their IDs. */
|
|
90
|
+
async addMany(items) {
|
|
91
|
+
if (items.length === 0) return [];
|
|
92
|
+
const texts = items.map((i) => i.content);
|
|
93
|
+
const vecs = await this._embedding.embedBatch(texts);
|
|
94
|
+
const ids = [];
|
|
95
|
+
const insertData = this._db.prepare(
|
|
96
|
+
"INSERT INTO kv_data (collection, content, meta_json, tags_json, expires_at) VALUES (?, ?, ?, ?, ?)"
|
|
97
|
+
);
|
|
98
|
+
const insertVec = this._db.prepare(
|
|
99
|
+
"INSERT INTO kv_vectors (data_id, embedding) VALUES (?, ?)"
|
|
100
|
+
);
|
|
101
|
+
this._db.transaction(() => {
|
|
102
|
+
for (let i = 0; i < items.length; i++) {
|
|
103
|
+
const item = items[i];
|
|
104
|
+
const expiresAt = item.ttl ? Math.floor(Date.now() / 1e3) + parseDuration(item.ttl) : null;
|
|
105
|
+
const result = insertData.run(
|
|
106
|
+
this._name,
|
|
107
|
+
item.content,
|
|
108
|
+
JSON.stringify(item.metadata ?? {}),
|
|
109
|
+
JSON.stringify(item.tags ?? []),
|
|
110
|
+
expiresAt
|
|
111
|
+
);
|
|
112
|
+
const id = Number(result.lastInsertRowid);
|
|
113
|
+
insertVec.run(id, Buffer.from(vecs[i].buffer));
|
|
114
|
+
ids.push(id);
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
for (let i = 0; i < ids.length; i++) {
|
|
118
|
+
this._hnsw.add(vecs[i], ids[i]);
|
|
119
|
+
this._vecs.set(ids[i], vecs[i]);
|
|
120
|
+
}
|
|
121
|
+
return ids;
|
|
122
|
+
}
|
|
123
|
+
/** Search this collection. */
|
|
124
|
+
async search(query, options = {}) {
|
|
125
|
+
const { k = 5, mode = "hybrid", minScore = 0.15, tags } = options;
|
|
126
|
+
this._pruneExpired();
|
|
127
|
+
if (mode === "keyword") return this._filterByTags(this._searchBM25(query, k, minScore), tags);
|
|
128
|
+
if (mode === "vector") return this._filterByTags(await this._searchVector(query, k, minScore), tags);
|
|
129
|
+
const [vectorHits, bm25Hits] = await Promise.all([
|
|
130
|
+
this._searchVector(query, k, 0),
|
|
131
|
+
Promise.resolve(this._searchBM25(query, k, 0))
|
|
132
|
+
]);
|
|
133
|
+
const fused = reciprocalRankFusion([
|
|
134
|
+
vectorHits.map((h) => ({ type: "document", score: h.score ?? 0, content: h.content, metadata: { id: h.id } })),
|
|
135
|
+
bm25Hits.map((h) => ({ type: "document", score: h.score ?? 0, content: h.content, metadata: { id: h.id } }))
|
|
136
|
+
]);
|
|
137
|
+
const allById = /* @__PURE__ */ new Map();
|
|
138
|
+
for (const h of [...vectorHits, ...bm25Hits]) allById.set(h.id, h);
|
|
139
|
+
const results = [];
|
|
140
|
+
for (const r of fused) {
|
|
141
|
+
const item = allById.get(r.metadata.id);
|
|
142
|
+
if (!item) continue;
|
|
143
|
+
const scored = { ...item, score: r.score };
|
|
144
|
+
if (scored.score >= minScore) results.push(scored);
|
|
145
|
+
if (results.length >= k) break;
|
|
146
|
+
}
|
|
147
|
+
if (this._reranker && results.length > 1) {
|
|
148
|
+
const documents = results.map((r) => r.content);
|
|
149
|
+
const scores = await this._reranker.rank(query, documents);
|
|
150
|
+
const blended = results.map((r, i) => ({
|
|
151
|
+
...r,
|
|
152
|
+
score: 0.6 * (r.score ?? 0) + 0.4 * (scores[i] ?? 0)
|
|
153
|
+
}));
|
|
154
|
+
return this._filterByTags(
|
|
155
|
+
blended.sort((a, b) => (b.score ?? 0) - (a.score ?? 0)),
|
|
156
|
+
tags
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
return this._filterByTags(results, tags);
|
|
160
|
+
}
|
|
161
|
+
/** List items (newest first). */
|
|
162
|
+
list(options = {}) {
|
|
163
|
+
const { limit = 20, offset = 0, tags } = options;
|
|
164
|
+
this._pruneExpired();
|
|
165
|
+
const rows = this._db.prepare(
|
|
166
|
+
"SELECT * FROM kv_data WHERE collection = ? AND (expires_at IS NULL OR expires_at > ?) ORDER BY created_at DESC, id DESC LIMIT ? OFFSET ?"
|
|
167
|
+
).all(this._name, Math.floor(Date.now() / 1e3), limit, offset);
|
|
168
|
+
return this._filterByTags(rows.map((r) => this._rowToItem(r)), tags);
|
|
169
|
+
}
|
|
170
|
+
/** Count items in this collection. */
|
|
171
|
+
count() {
|
|
172
|
+
return this._db.prepare(
|
|
173
|
+
"SELECT COUNT(*) as c FROM kv_data WHERE collection = ? AND (expires_at IS NULL OR expires_at > ?)"
|
|
174
|
+
).get(this._name, Math.floor(Date.now() / 1e3)).c;
|
|
175
|
+
}
|
|
176
|
+
/** Keep only the N most recent items, remove the rest. */
|
|
177
|
+
async trim(options) {
|
|
178
|
+
const before = this.count();
|
|
179
|
+
if (before <= options.keep) return { removed: 0 };
|
|
180
|
+
const toRemove = this._db.prepare(`
|
|
181
|
+
SELECT id FROM kv_data
|
|
182
|
+
WHERE collection = ?
|
|
183
|
+
ORDER BY created_at DESC, id DESC
|
|
184
|
+
LIMIT -1 OFFSET ?
|
|
185
|
+
`).all(this._name, options.keep);
|
|
186
|
+
for (const row of toRemove) {
|
|
187
|
+
this._removeById(row.id);
|
|
188
|
+
}
|
|
189
|
+
return { removed: toRemove.length };
|
|
190
|
+
}
|
|
191
|
+
/** Remove items older than a duration string (e.g. '30d', '12h'). */
|
|
192
|
+
async prune(options) {
|
|
193
|
+
const seconds = parseDuration(options.olderThan);
|
|
194
|
+
const cutoff = Math.floor(Date.now() / 1e3) - seconds;
|
|
195
|
+
const toRemove = this._db.prepare(
|
|
196
|
+
"SELECT id FROM kv_data WHERE collection = ? AND created_at < ?"
|
|
197
|
+
).all(this._name, cutoff);
|
|
198
|
+
for (const row of toRemove) {
|
|
199
|
+
this._removeById(row.id);
|
|
200
|
+
}
|
|
201
|
+
return { removed: toRemove.length };
|
|
202
|
+
}
|
|
203
|
+
/** Remove a specific item by ID. */
|
|
204
|
+
remove(id) {
|
|
205
|
+
this._removeById(id);
|
|
206
|
+
}
|
|
207
|
+
/** Clear all items in this collection. */
|
|
208
|
+
clear() {
|
|
209
|
+
const rows = this._db.prepare(
|
|
210
|
+
"SELECT id FROM kv_data WHERE collection = ?"
|
|
211
|
+
).all(this._name);
|
|
212
|
+
for (const row of rows) {
|
|
213
|
+
this._removeById(row.id);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
// ── Private ──────────────────────────────────────
|
|
217
|
+
_removeById(id) {
|
|
218
|
+
this._vecs.delete(id);
|
|
219
|
+
this._hnsw.remove(id);
|
|
220
|
+
this._db.prepare("DELETE FROM kv_data WHERE id = ?").run(id);
|
|
221
|
+
}
|
|
222
|
+
async _searchVector(query, k, minScore) {
|
|
223
|
+
if (this._hnsw.size === 0) return [];
|
|
224
|
+
const queryVec = await this._embedding.embed(query);
|
|
225
|
+
const now = Math.floor(Date.now() / 1e3);
|
|
226
|
+
const collectionCount = this._db.prepare(
|
|
227
|
+
"SELECT COUNT(*) as c FROM kv_data WHERE collection = ? AND (expires_at IS NULL OR expires_at > ?)"
|
|
228
|
+
).get(this._name, now)?.c ?? 0;
|
|
229
|
+
const ratio = collectionCount > 0 ? Math.max(3, Math.min(50, Math.ceil(this._hnsw.size / collectionCount))) : 3;
|
|
230
|
+
const searchK = Math.min(k * ratio, this._hnsw.size);
|
|
231
|
+
const hits = this._hnsw.search(queryVec, searchK);
|
|
232
|
+
const ids = hits.map((h) => h.id);
|
|
233
|
+
if (ids.length === 0) return [];
|
|
234
|
+
const scoreMap = new Map(hits.map((h) => [h.id, h.score]));
|
|
235
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
236
|
+
const rows = this._db.prepare(
|
|
237
|
+
`SELECT * FROM kv_data WHERE id IN (${placeholders}) AND collection = ?`
|
|
238
|
+
).all(...ids, this._name);
|
|
239
|
+
return rows.map((r) => ({ ...this._rowToItem(r), score: scoreMap.get(r.id) ?? 0 })).filter((r) => r.score >= minScore).sort((a, b) => (b.score ?? 0) - (a.score ?? 0)).slice(0, k);
|
|
240
|
+
}
|
|
241
|
+
_searchBM25(query, k, minScore) {
|
|
242
|
+
const ftsQuery = sanitizeFTS(query);
|
|
243
|
+
if (!ftsQuery) return [];
|
|
244
|
+
try {
|
|
245
|
+
const rows = this._db.prepare(`
|
|
246
|
+
SELECT d.*, bm25(fts_kv, 5.0, 1.0) AS score
|
|
247
|
+
FROM fts_kv f
|
|
248
|
+
JOIN kv_data d ON d.id = f.rowid
|
|
249
|
+
WHERE fts_kv MATCH ? AND d.collection = ?
|
|
250
|
+
ORDER BY score ASC
|
|
251
|
+
LIMIT ?
|
|
252
|
+
`).all(ftsQuery, this._name, k);
|
|
253
|
+
return rows.map((r) => ({
|
|
254
|
+
...this._rowToItem(r),
|
|
255
|
+
score: normalizeBM25(r.score)
|
|
256
|
+
})).filter((r) => (r.score ?? 0) >= minScore);
|
|
257
|
+
} catch {
|
|
258
|
+
return [];
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
_rowToItem(r) {
|
|
262
|
+
return {
|
|
263
|
+
id: r.id,
|
|
264
|
+
collection: r.collection,
|
|
265
|
+
content: r.content,
|
|
266
|
+
metadata: JSON.parse(r.meta_json || "{}"),
|
|
267
|
+
tags: JSON.parse(r.tags_json || "[]"),
|
|
268
|
+
createdAt: r.created_at,
|
|
269
|
+
expiresAt: r.expires_at ?? void 0
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
/** Filter results by tags (item must have ALL specified tags). */
|
|
273
|
+
_filterByTags(items, tags) {
|
|
274
|
+
if (!tags || tags.length === 0) return items;
|
|
275
|
+
return items.filter(
|
|
276
|
+
(item) => tags.every((t) => item.tags.includes(t))
|
|
277
|
+
);
|
|
278
|
+
}
|
|
279
|
+
/** Remove expired items (TTL). Called automatically on search/list. */
|
|
280
|
+
_pruneExpired() {
|
|
281
|
+
const now = Math.floor(Date.now() / 1e3);
|
|
282
|
+
const expired = this._db.prepare(
|
|
283
|
+
"SELECT id FROM kv_data WHERE collection = ? AND expires_at IS NOT NULL AND expires_at <= ?"
|
|
284
|
+
).all(this._name, now);
|
|
285
|
+
for (const row of expired) {
|
|
286
|
+
this._removeById(row.id);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
};
|
|
290
|
+
function parseDuration(s) {
|
|
291
|
+
const match = s.match(/^(\d+)([dhms])$/);
|
|
292
|
+
if (!match) throw new Error(`Invalid duration: "${s}". Use format like '30d', '12h', '5m'.`);
|
|
293
|
+
const n = parseInt(match[1], 10);
|
|
294
|
+
switch (match[2]) {
|
|
295
|
+
case "d":
|
|
296
|
+
return n * 86400;
|
|
297
|
+
case "h":
|
|
298
|
+
return n * 3600;
|
|
299
|
+
case "m":
|
|
300
|
+
return n * 60;
|
|
301
|
+
case "s":
|
|
302
|
+
return n;
|
|
303
|
+
default:
|
|
304
|
+
return n;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
__name(parseDuration, "parseDuration");
|
|
308
|
+
|
|
309
|
+
// src/providers/vector/hnsw.ts
|
|
53
310
|
var HNSWIndex = class {
|
|
54
311
|
constructor(_dims, _maxElements = 2e6, _M = 16, _efConstruction = 200, _efSearch = 50) {
|
|
55
312
|
this._dims = _dims;
|
|
@@ -62,18 +319,36 @@ var HNSWIndex = class {
|
|
|
62
319
|
__name(this, "HNSWIndex");
|
|
63
320
|
}
|
|
64
321
|
_index = null;
|
|
65
|
-
|
|
322
|
+
_lib = null;
|
|
323
|
+
_ids = /* @__PURE__ */ new Set();
|
|
66
324
|
/**
|
|
67
325
|
* Initialize the HNSW index.
|
|
68
326
|
* Must be called before add/search.
|
|
69
327
|
*/
|
|
70
328
|
async init() {
|
|
71
|
-
|
|
72
|
-
|
|
329
|
+
this._lib = await import("hnswlib-node");
|
|
330
|
+
this._createIndex();
|
|
331
|
+
return this;
|
|
332
|
+
}
|
|
333
|
+
/**
|
|
334
|
+
* Reinitialize the index in-place, clearing all vectors.
|
|
335
|
+
* Required after reembed or full re-index to avoid duplicate IDs.
|
|
336
|
+
* init() must have been called first.
|
|
337
|
+
*/
|
|
338
|
+
reinit() {
|
|
339
|
+
if (!this._lib) throw new Error("HNSW not initialized \u2014 call init() first");
|
|
340
|
+
this._createIndex();
|
|
341
|
+
}
|
|
342
|
+
_createIndex() {
|
|
343
|
+
const HNSW = this._lib.default?.HierarchicalNSW ?? this._lib.HierarchicalNSW;
|
|
73
344
|
this._index = new HNSW("cosine", this._dims);
|
|
74
345
|
this._index.initIndex(this._maxElements, this._M, this._efConstruction);
|
|
75
346
|
this._index.setEf(this._efSearch);
|
|
76
|
-
|
|
347
|
+
this._ids = /* @__PURE__ */ new Set();
|
|
348
|
+
}
|
|
349
|
+
/** Maximum capacity of this index. */
|
|
350
|
+
get maxElements() {
|
|
351
|
+
return this._maxElements;
|
|
77
352
|
}
|
|
78
353
|
/**
|
|
79
354
|
* Add a vector with an integer ID.
|
|
@@ -81,8 +356,28 @@ var HNSWIndex = class {
|
|
|
81
356
|
*/
|
|
82
357
|
add(vector, id) {
|
|
83
358
|
if (!this._index) throw new Error("HNSW index not initialized \u2014 call init() first");
|
|
359
|
+
if (this._ids.has(id)) return;
|
|
360
|
+
if (this._ids.size >= this._maxElements) {
|
|
361
|
+
throw new Error(
|
|
362
|
+
`HNSW index full (${this._maxElements} elements). Increase maxElements in config or prune old data.`
|
|
363
|
+
);
|
|
364
|
+
}
|
|
84
365
|
this._index.addPoint(Array.from(vector), id);
|
|
85
|
-
this.
|
|
366
|
+
this._ids.add(id);
|
|
367
|
+
}
|
|
368
|
+
/**
|
|
369
|
+
* Mark a vector as deleted so it no longer appears in searches.
|
|
370
|
+
* Uses hnswlib-node markDelete under the hood.
|
|
371
|
+
* Safe to call with an ID that doesn't exist.
|
|
372
|
+
*/
|
|
373
|
+
remove(id) {
|
|
374
|
+
if (!this._index || this._ids.size === 0) return;
|
|
375
|
+
if (!this._ids.has(id)) return;
|
|
376
|
+
try {
|
|
377
|
+
this._index.markDelete(id);
|
|
378
|
+
this._ids.delete(id);
|
|
379
|
+
} catch {
|
|
380
|
+
}
|
|
86
381
|
}
|
|
87
382
|
/**
|
|
88
383
|
* Search for the k nearest neighbors.
|
|
@@ -90,8 +385,8 @@ var HNSWIndex = class {
|
|
|
90
385
|
* Score is 1 - cosine_distance (1.0 = identical).
|
|
91
386
|
*/
|
|
92
387
|
search(query, k) {
|
|
93
|
-
if (!this._index || this.
|
|
94
|
-
const actualK = Math.min(k, this.
|
|
388
|
+
if (!this._index || this._ids.size === 0) return [];
|
|
389
|
+
const actualK = Math.min(k, this._ids.size);
|
|
95
390
|
const result = this._index.searchKnn(Array.from(query), actualK);
|
|
96
391
|
return result.neighbors.map((id, i) => ({
|
|
97
392
|
id,
|
|
@@ -100,11 +395,11 @@ var HNSWIndex = class {
|
|
|
100
395
|
}
|
|
101
396
|
/** Number of vectors in the index. */
|
|
102
397
|
get size() {
|
|
103
|
-
return this.
|
|
398
|
+
return this._ids.size;
|
|
104
399
|
}
|
|
105
400
|
};
|
|
106
401
|
|
|
107
|
-
// src/embeddings/local.ts
|
|
402
|
+
// src/providers/embeddings/local.ts
|
|
108
403
|
var LocalEmbedding = class {
|
|
109
404
|
static {
|
|
110
405
|
__name(this, "LocalEmbedding");
|
|
@@ -117,19 +412,29 @@ var LocalEmbedding = class {
|
|
|
117
412
|
this._modelName = options.model ?? "Xenova/all-MiniLM-L6-v2";
|
|
118
413
|
this._cacheDir = options.cacheDir ?? ".model-cache";
|
|
119
414
|
}
|
|
415
|
+
_pipelinePromise = null;
|
|
120
416
|
/**
|
|
121
417
|
* Lazy-load the transformer pipeline.
|
|
122
418
|
* Singleton — created once and reused.
|
|
419
|
+
* Promise-deduped to prevent concurrent downloads.
|
|
123
420
|
*/
|
|
124
421
|
async _getPipeline() {
|
|
125
422
|
if (this._pipeline) return this._pipeline;
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
423
|
+
if (this._pipelinePromise) return this._pipelinePromise;
|
|
424
|
+
this._pipelinePromise = (async () => {
|
|
425
|
+
const { pipeline, env } = await import("@xenova/transformers");
|
|
426
|
+
env.cacheDir = this._cacheDir;
|
|
427
|
+
env.allowLocalModels = true;
|
|
428
|
+
this._pipeline = await pipeline("feature-extraction", this._modelName, {
|
|
429
|
+
quantized: true
|
|
430
|
+
});
|
|
431
|
+
return this._pipeline;
|
|
432
|
+
})();
|
|
433
|
+
try {
|
|
434
|
+
return await this._pipelinePromise;
|
|
435
|
+
} finally {
|
|
436
|
+
this._pipelinePromise = null;
|
|
437
|
+
}
|
|
133
438
|
}
|
|
134
439
|
/**
|
|
135
440
|
* Embed a single text string.
|
|
@@ -156,7 +461,7 @@ var LocalEmbedding = class {
|
|
|
156
461
|
}
|
|
157
462
|
};
|
|
158
463
|
|
|
159
|
-
// src/vector/mmr.ts
|
|
464
|
+
// src/search/vector/mmr.ts
|
|
160
465
|
function searchMMR(index, query, vectorCache, k, lambda = 0.7) {
|
|
161
466
|
const candidates = index.search(query, k * 3);
|
|
162
467
|
if (candidates.length <= k) return candidates;
|
|
@@ -188,14 +493,14 @@ function searchMMR(index, query, vectorCache, k, lambda = 0.7) {
|
|
|
188
493
|
}
|
|
189
494
|
__name(searchMMR, "searchMMR");
|
|
190
495
|
|
|
191
|
-
// src/
|
|
192
|
-
var
|
|
496
|
+
// src/search/vector/multi-index.ts
|
|
497
|
+
var MultiIndexSearch = class {
|
|
193
498
|
static {
|
|
194
|
-
__name(this, "
|
|
499
|
+
__name(this, "MultiIndexSearch");
|
|
195
500
|
}
|
|
196
|
-
|
|
197
|
-
constructor(
|
|
198
|
-
this.
|
|
501
|
+
_config;
|
|
502
|
+
constructor(config) {
|
|
503
|
+
this._config = config;
|
|
199
504
|
}
|
|
200
505
|
/**
|
|
201
506
|
* Search across all indices.
|
|
@@ -205,20 +510,20 @@ var UnifiedSearch = class {
|
|
|
205
510
|
const {
|
|
206
511
|
codeK = 6,
|
|
207
512
|
gitK = 5,
|
|
208
|
-
|
|
513
|
+
patternK = 4,
|
|
209
514
|
minScore = 0.25,
|
|
210
515
|
useMMR = true,
|
|
211
516
|
mmrLambda = 0.7
|
|
212
517
|
} = options;
|
|
213
|
-
const queryVec = await this.
|
|
518
|
+
const queryVec = await this._config.embedding.embed(query);
|
|
214
519
|
const results = [];
|
|
215
|
-
if (this.
|
|
216
|
-
const hits = useMMR ? searchMMR(this.
|
|
520
|
+
if (this._config.codeHnsw && this._config.codeHnsw.size > 0) {
|
|
521
|
+
const hits = useMMR ? searchMMR(this._config.codeHnsw, queryVec, this._config.codeVecs, codeK, mmrLambda) : this._config.codeHnsw.search(queryVec, codeK);
|
|
217
522
|
if (hits.length > 0) {
|
|
218
523
|
const ids = hits.map((h) => h.id);
|
|
219
524
|
const scoreMap = new Map(hits.map((h) => [h.id, h.score]));
|
|
220
525
|
const placeholders = ids.map(() => "?").join(",");
|
|
221
|
-
const rows = this.
|
|
526
|
+
const rows = this._config.db.prepare(
|
|
222
527
|
`SELECT * FROM code_chunks WHERE id IN (${placeholders})`
|
|
223
528
|
).all(...ids);
|
|
224
529
|
for (const r of rows) {
|
|
@@ -241,13 +546,13 @@ var UnifiedSearch = class {
|
|
|
241
546
|
}
|
|
242
547
|
}
|
|
243
548
|
}
|
|
244
|
-
if (this.
|
|
245
|
-
const hits = this.
|
|
549
|
+
if (this._config.gitHnsw && this._config.gitHnsw.size > 0) {
|
|
550
|
+
const hits = this._config.gitHnsw.search(queryVec, gitK * 2);
|
|
246
551
|
if (hits.length > 0) {
|
|
247
552
|
const ids = hits.map((h) => h.id);
|
|
248
553
|
const scoreMap = new Map(hits.map((h) => [h.id, h.score]));
|
|
249
554
|
const placeholders = ids.map(() => "?").join(",");
|
|
250
|
-
const rows = this.
|
|
555
|
+
const rows = this._config.db.prepare(
|
|
251
556
|
`SELECT * FROM git_commits WHERE id IN (${placeholders}) AND is_merge = 0`
|
|
252
557
|
).all(...ids);
|
|
253
558
|
for (const r of rows) {
|
|
@@ -272,13 +577,13 @@ var UnifiedSearch = class {
|
|
|
272
577
|
}
|
|
273
578
|
}
|
|
274
579
|
}
|
|
275
|
-
if (this.
|
|
276
|
-
const hits = useMMR ? searchMMR(this.
|
|
580
|
+
if (this._config.patternHnsw && this._config.patternHnsw.size > 0) {
|
|
581
|
+
const hits = useMMR ? searchMMR(this._config.patternHnsw, queryVec, this._config.patternVecs, patternK, mmrLambda) : this._config.patternHnsw.search(queryVec, patternK);
|
|
277
582
|
if (hits.length > 0) {
|
|
278
583
|
const ids = hits.map((h) => h.id);
|
|
279
584
|
const scoreMap = new Map(hits.map((h) => [h.id, h.score]));
|
|
280
585
|
const placeholders = ids.map(() => "?").join(",");
|
|
281
|
-
const rows = this.
|
|
586
|
+
const rows = this._config.db.prepare(
|
|
282
587
|
`SELECT * FROM memory_patterns WHERE id IN (${placeholders}) AND success_rate >= 0.5`
|
|
283
588
|
).all(...ids);
|
|
284
589
|
for (const r of rows) {
|
|
@@ -301,7 +606,7 @@ var UnifiedSearch = class {
|
|
|
301
606
|
}
|
|
302
607
|
}
|
|
303
608
|
results.sort((a, b) => b.score - a.score);
|
|
304
|
-
if (this.
|
|
609
|
+
if (this._config.reranker && results.length > 1) {
|
|
305
610
|
return this._rerank(query, results);
|
|
306
611
|
}
|
|
307
612
|
return results;
|
|
@@ -314,7 +619,7 @@ var UnifiedSearch = class {
|
|
|
314
619
|
* Top 11+: 40% retrieval / 60% reranker (trust reranker more)
|
|
315
620
|
*/
|
|
316
621
|
async _rerank(query, results) {
|
|
317
|
-
const reranker = this.
|
|
622
|
+
const reranker = this._config.reranker;
|
|
318
623
|
const documents = results.map((r) => r.content);
|
|
319
624
|
const scores = await reranker.rank(query, documents);
|
|
320
625
|
const blended = results.map((r, i) => {
|
|
@@ -329,21 +634,7 @@ var UnifiedSearch = class {
|
|
|
329
634
|
}
|
|
330
635
|
};
|
|
331
636
|
|
|
332
|
-
// src/
|
|
333
|
-
function sanitizeFTS(query) {
|
|
334
|
-
const clean = query.replace(/[{}[\]()^~*:]/g, " ").replace(/\bAND\b|\bOR\b|\bNOT\b|\bNEAR\b/gi, "").trim();
|
|
335
|
-
const words = clean.split(/\s+/).filter((w) => w.length > 1);
|
|
336
|
-
if (words.length === 0) return "";
|
|
337
|
-
return words.map((w) => `"${w}"`).join(" ");
|
|
338
|
-
}
|
|
339
|
-
__name(sanitizeFTS, "sanitizeFTS");
|
|
340
|
-
function normalizeBM25(rawScore) {
|
|
341
|
-
const abs = Math.abs(rawScore);
|
|
342
|
-
return 1 / (1 + Math.exp(-0.3 * (abs - 5)));
|
|
343
|
-
}
|
|
344
|
-
__name(normalizeBM25, "normalizeBM25");
|
|
345
|
-
|
|
346
|
-
// src/query/bm25.ts
|
|
637
|
+
// src/search/keyword/bm25.ts
|
|
347
638
|
var BM25Search = class {
|
|
348
639
|
constructor(_db) {
|
|
349
640
|
this._db = _db;
|
|
@@ -357,7 +648,7 @@ var BM25Search = class {
|
|
|
357
648
|
* Query syntax: simple words, OR, NOT, "exact phrases", prefix*
|
|
358
649
|
*/
|
|
359
650
|
search(query, options = {}) {
|
|
360
|
-
const { codeK = 8, gitK = 5,
|
|
651
|
+
const { codeK = 8, gitK = 5, patternK = 4 } = options;
|
|
361
652
|
const results = [];
|
|
362
653
|
const ftsQuery = sanitizeFTS(query);
|
|
363
654
|
if (!ftsQuery) return [];
|
|
@@ -424,7 +715,7 @@ var BM25Search = class {
|
|
|
424
715
|
} catch {
|
|
425
716
|
}
|
|
426
717
|
}
|
|
427
|
-
if (
|
|
718
|
+
if (patternK > 0) {
|
|
428
719
|
try {
|
|
429
720
|
const rows = this._db.prepare(`
|
|
430
721
|
SELECT p.id, p.task_type, p.task, p.approach, p.outcome,
|
|
@@ -435,7 +726,7 @@ var BM25Search = class {
|
|
|
435
726
|
WHERE fts_patterns MATCH ? AND p.success_rate >= 0.5
|
|
436
727
|
ORDER BY score ASC
|
|
437
728
|
LIMIT ?
|
|
438
|
-
`).all(ftsQuery,
|
|
729
|
+
`).all(ftsQuery, patternK);
|
|
439
730
|
for (const r of rows) {
|
|
440
731
|
results.push({
|
|
441
732
|
type: "pattern",
|
|
@@ -470,7 +761,7 @@ var BM25Search = class {
|
|
|
470
761
|
}
|
|
471
762
|
};
|
|
472
763
|
|
|
473
|
-
// src/
|
|
764
|
+
// src/app/context-builder.ts
|
|
474
765
|
var ContextBuilder = class {
|
|
475
766
|
constructor(_search, _coEdits) {
|
|
476
767
|
this._search = _search;
|
|
@@ -487,7 +778,7 @@ var ContextBuilder = class {
|
|
|
487
778
|
const {
|
|
488
779
|
codeResults = 6,
|
|
489
780
|
gitResults = 5,
|
|
490
|
-
|
|
781
|
+
patternResults = 4,
|
|
491
782
|
affectedFiles = [],
|
|
492
783
|
minScore = 0.25,
|
|
493
784
|
useMMR = true,
|
|
@@ -496,7 +787,7 @@ var ContextBuilder = class {
|
|
|
496
787
|
const results = await this._search.search(task, {
|
|
497
788
|
codeK: codeResults,
|
|
498
789
|
gitK: gitResults,
|
|
499
|
-
|
|
790
|
+
patternK: patternResults,
|
|
500
791
|
minScore,
|
|
501
792
|
useMMR,
|
|
502
793
|
mmrLambda
|
|
@@ -544,7 +835,7 @@ var ContextBuilder = class {
|
|
|
544
835
|
parts.push("");
|
|
545
836
|
}
|
|
546
837
|
}
|
|
547
|
-
if (affectedFiles.length > 0) {
|
|
838
|
+
if (affectedFiles.length > 0 && this._coEdits) {
|
|
548
839
|
const coEditLines = [];
|
|
549
840
|
for (const file of affectedFiles.slice(0, 3)) {
|
|
550
841
|
const suggestions = this._coEdits.suggest(file, 4);
|
|
@@ -560,7 +851,7 @@ var ContextBuilder = class {
|
|
|
560
851
|
parts.push("");
|
|
561
852
|
}
|
|
562
853
|
}
|
|
563
|
-
const memHits = results.filter((r) => r.type === "pattern").slice(0,
|
|
854
|
+
const memHits = results.filter((r) => r.type === "pattern").slice(0, patternResults);
|
|
564
855
|
if (memHits.length > 0) {
|
|
565
856
|
parts.push("## Learned Patterns\n");
|
|
566
857
|
for (const p of memHits) {
|
|
@@ -578,241 +869,97 @@ var ContextBuilder = class {
|
|
|
578
869
|
}
|
|
579
870
|
};
|
|
580
871
|
|
|
581
|
-
// src/
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
this._hnsw = _hnsw;
|
|
588
|
-
this._vecs = _vecs;
|
|
589
|
-
this._reranker = _reranker;
|
|
590
|
-
}
|
|
872
|
+
// src/app/brainbank.ts
|
|
873
|
+
import { EventEmitter } from "events";
|
|
874
|
+
|
|
875
|
+
// src/app/registry.ts
|
|
876
|
+
var ALIASES = {};
|
|
877
|
+
var IndexerRegistry = class {
|
|
591
878
|
static {
|
|
592
|
-
__name(this, "
|
|
593
|
-
}
|
|
594
|
-
/** Collection name. */
|
|
595
|
-
get name() {
|
|
596
|
-
return this._name;
|
|
597
|
-
}
|
|
598
|
-
/** Add an item. Returns its ID. */
|
|
599
|
-
async add(content, options = {}) {
|
|
600
|
-
const opts = "tags" in options || "ttl" in options || "metadata" in options ? options : { metadata: options };
|
|
601
|
-
const metadata = opts.metadata ?? {};
|
|
602
|
-
const tags = opts.tags ?? [];
|
|
603
|
-
const expiresAt = opts.ttl ? Math.floor(Date.now() / 1e3) + parseDuration(opts.ttl) : null;
|
|
604
|
-
const result = this._db.prepare(
|
|
605
|
-
"INSERT INTO kv_data (collection, content, meta_json, tags_json, expires_at) VALUES (?, ?, ?, ?, ?)"
|
|
606
|
-
).run(this._name, content, JSON.stringify(metadata), JSON.stringify(tags), expiresAt);
|
|
607
|
-
const id = Number(result.lastInsertRowid);
|
|
608
|
-
const vec = await this._embedding.embed(content);
|
|
609
|
-
this._db.prepare(
|
|
610
|
-
"INSERT INTO kv_vectors (data_id, embedding) VALUES (?, ?)"
|
|
611
|
-
).run(id, Buffer.from(vec.buffer));
|
|
612
|
-
this._hnsw.add(vec, id);
|
|
613
|
-
this._vecs.set(id, vec);
|
|
614
|
-
return id;
|
|
615
|
-
}
|
|
616
|
-
/** Add multiple items. Returns their IDs. */
|
|
617
|
-
async addMany(items) {
|
|
618
|
-
const ids = [];
|
|
619
|
-
for (const item of items) {
|
|
620
|
-
ids.push(await this.add(item.content, {
|
|
621
|
-
metadata: item.metadata,
|
|
622
|
-
tags: item.tags,
|
|
623
|
-
ttl: item.ttl
|
|
624
|
-
}));
|
|
625
|
-
}
|
|
626
|
-
return ids;
|
|
627
|
-
}
|
|
628
|
-
/** Search this collection. */
|
|
629
|
-
async search(query, options = {}) {
|
|
630
|
-
const { k = 5, mode = "hybrid", minScore = 0.15, tags } = options;
|
|
631
|
-
this._pruneExpired();
|
|
632
|
-
if (mode === "keyword") return this._filterByTags(this._searchBM25(query, k, minScore), tags);
|
|
633
|
-
if (mode === "vector") return this._filterByTags(await this._searchVector(query, k, minScore), tags);
|
|
634
|
-
const [vectorHits, bm25Hits] = await Promise.all([
|
|
635
|
-
this._searchVector(query, k, 0),
|
|
636
|
-
Promise.resolve(this._searchBM25(query, k, 0))
|
|
637
|
-
]);
|
|
638
|
-
const fused = reciprocalRankFusion([
|
|
639
|
-
vectorHits.map((h) => ({ type: "document", score: h.score ?? 0, content: h.content, metadata: { id: h.id } })),
|
|
640
|
-
bm25Hits.map((h) => ({ type: "document", score: h.score ?? 0, content: h.content, metadata: { id: h.id } }))
|
|
641
|
-
]);
|
|
642
|
-
const allById = /* @__PURE__ */ new Map();
|
|
643
|
-
for (const h of [...vectorHits, ...bm25Hits]) allById.set(h.id, h);
|
|
644
|
-
const results = [];
|
|
645
|
-
for (const r of fused) {
|
|
646
|
-
const item = allById.get(r.metadata.id);
|
|
647
|
-
if (!item) continue;
|
|
648
|
-
const scored = { ...item, score: r.score };
|
|
649
|
-
if (scored.score >= minScore) results.push(scored);
|
|
650
|
-
if (results.length >= k) break;
|
|
651
|
-
}
|
|
652
|
-
if (this._reranker && results.length > 1) {
|
|
653
|
-
const documents = results.map((r) => r.content);
|
|
654
|
-
const scores = await this._reranker.rank(query, documents);
|
|
655
|
-
const blended = results.map((r, i) => ({
|
|
656
|
-
...r,
|
|
657
|
-
score: 0.6 * (r.score ?? 0) + 0.4 * (scores[i] ?? 0)
|
|
658
|
-
}));
|
|
659
|
-
return this._filterByTags(
|
|
660
|
-
blended.sort((a, b) => (b.score ?? 0) - (a.score ?? 0)),
|
|
661
|
-
tags
|
|
662
|
-
);
|
|
663
|
-
}
|
|
664
|
-
return this._filterByTags(results, tags);
|
|
665
|
-
}
|
|
666
|
-
/** List items (newest first). */
|
|
667
|
-
list(options = {}) {
|
|
668
|
-
const { limit = 20, offset = 0, tags } = options;
|
|
669
|
-
this._pruneExpired();
|
|
670
|
-
const rows = this._db.prepare(
|
|
671
|
-
"SELECT * FROM kv_data WHERE collection = ? AND (expires_at IS NULL OR expires_at > ?) ORDER BY created_at DESC, id DESC LIMIT ? OFFSET ?"
|
|
672
|
-
).all(this._name, Math.floor(Date.now() / 1e3), limit, offset);
|
|
673
|
-
return this._filterByTags(rows.map((r) => this._rowToItem(r)), tags);
|
|
879
|
+
__name(this, "IndexerRegistry");
|
|
674
880
|
}
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
881
|
+
_map = /* @__PURE__ */ new Map();
|
|
882
|
+
// ── Registration ────────────────────────────────
|
|
883
|
+
/** Store an indexer. Duplicate names silently overwrite. */
|
|
884
|
+
register(indexer) {
|
|
885
|
+
this._map.set(indexer.name, indexer);
|
|
680
886
|
}
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
for (const row of toRemove) {
|
|
692
|
-
this._removeById(row.id);
|
|
887
|
+
// ── Lookup ──────────────────────────────────────
|
|
888
|
+
/**
|
|
889
|
+
* Check whether an indexer is registered.
|
|
890
|
+
* Supports type-prefix matching: `has('code')` returns true if
|
|
891
|
+
* 'code', 'code:frontend', or 'code:backend' is registered.
|
|
892
|
+
*/
|
|
893
|
+
has(name) {
|
|
894
|
+
if (this._map.has(name)) return true;
|
|
895
|
+
for (const key of this._map.keys()) {
|
|
896
|
+
if (key.startsWith(name + ":")) return true;
|
|
693
897
|
}
|
|
694
|
-
return
|
|
898
|
+
return false;
|
|
695
899
|
}
|
|
696
|
-
/**
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
900
|
+
/**
|
|
901
|
+
* Get an indexer by name. Throws a descriptive error if not found.
|
|
902
|
+
*
|
|
903
|
+
* Resolution order:
|
|
904
|
+
* 1. Alias map (currently empty)
|
|
905
|
+
* 2. Exact match
|
|
906
|
+
* 3. First type-prefix match ('code' → 'code:frontend')
|
|
907
|
+
*/
|
|
908
|
+
get(name) {
|
|
909
|
+
const resolved = ALIASES[name] ?? name;
|
|
910
|
+
const exact = this._map.get(resolved);
|
|
911
|
+
if (exact) return exact;
|
|
912
|
+
const prefixed = this.firstByType(name);
|
|
913
|
+
if (prefixed) return prefixed;
|
|
914
|
+
throw new Error(
|
|
915
|
+
`BrainBank: Indexer '${name}' is not loaded. Add .use(${name}()) to your BrainBank instance.`
|
|
916
|
+
);
|
|
707
917
|
}
|
|
708
|
-
/**
|
|
709
|
-
|
|
710
|
-
|
|
918
|
+
/**
|
|
919
|
+
* Return every indexer whose name equals `type` or starts with `type + ':'`.
|
|
920
|
+
* Example: allByType('code') → [code, code:frontend, code:backend]
|
|
921
|
+
*/
|
|
922
|
+
allByType(type) {
|
|
923
|
+
return [...this._map.values()].filter(
|
|
924
|
+
(m) => m.name === type || m.name.startsWith(type + ":")
|
|
925
|
+
);
|
|
711
926
|
}
|
|
712
|
-
/**
|
|
713
|
-
|
|
714
|
-
const
|
|
715
|
-
|
|
716
|
-
).all(this._name);
|
|
717
|
-
for (const row of rows) {
|
|
718
|
-
this._removeById(row.id);
|
|
927
|
+
/** Return the first indexer that matches the type prefix, or undefined. */
|
|
928
|
+
firstByType(type) {
|
|
929
|
+
for (const m of this._map.values()) {
|
|
930
|
+
if (m.name === type || m.name.startsWith(type + ":")) return m;
|
|
719
931
|
}
|
|
932
|
+
return void 0;
|
|
720
933
|
}
|
|
721
|
-
// ──
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
this.
|
|
934
|
+
// ── Accessors ───────────────────────────────────
|
|
935
|
+
/** All registered indexer names (insertion order). */
|
|
936
|
+
get names() {
|
|
937
|
+
return [...this._map.keys()];
|
|
725
938
|
}
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
const hits = this._hnsw.search(queryVec, k * 3);
|
|
730
|
-
const ids = hits.map((h) => h.id);
|
|
731
|
-
if (ids.length === 0) return [];
|
|
732
|
-
const scoreMap = new Map(hits.map((h) => [h.id, h.score]));
|
|
733
|
-
const placeholders = ids.map(() => "?").join(",");
|
|
734
|
-
const rows = this._db.prepare(
|
|
735
|
-
`SELECT * FROM kv_data WHERE id IN (${placeholders}) AND collection = ?`
|
|
736
|
-
).all(...ids, this._name);
|
|
737
|
-
return rows.map((r) => ({ ...this._rowToItem(r), score: scoreMap.get(r.id) ?? 0 })).filter((r) => r.score >= minScore).sort((a, b) => (b.score ?? 0) - (a.score ?? 0)).slice(0, k);
|
|
939
|
+
/** All registered indexer instances (insertion order). */
|
|
940
|
+
get all() {
|
|
941
|
+
return [...this._map.values()];
|
|
738
942
|
}
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
FROM fts_kv f
|
|
746
|
-
JOIN kv_data d ON d.id = f.rowid
|
|
747
|
-
WHERE fts_kv MATCH ? AND d.collection = ?
|
|
748
|
-
ORDER BY score ASC
|
|
749
|
-
LIMIT ?
|
|
750
|
-
`).all(ftsQuery, this._name, k);
|
|
751
|
-
return rows.map((r) => ({
|
|
752
|
-
...this._rowToItem(r),
|
|
753
|
-
score: normalizeBM25(r.score)
|
|
754
|
-
})).filter((r) => (r.score ?? 0) >= minScore);
|
|
755
|
-
} catch {
|
|
756
|
-
return [];
|
|
757
|
-
}
|
|
758
|
-
}
|
|
759
|
-
_rowToItem(r) {
|
|
760
|
-
return {
|
|
761
|
-
id: r.id,
|
|
762
|
-
collection: r.collection,
|
|
763
|
-
content: r.content,
|
|
764
|
-
metadata: JSON.parse(r.meta_json || "{}"),
|
|
765
|
-
tags: JSON.parse(r.tags_json || "[]"),
|
|
766
|
-
createdAt: r.created_at,
|
|
767
|
-
expiresAt: r.expires_at ?? void 0
|
|
768
|
-
};
|
|
769
|
-
}
|
|
770
|
-
/** Filter results by tags (item must have ALL specified tags). */
|
|
771
|
-
_filterByTags(items, tags) {
|
|
772
|
-
if (!tags || tags.length === 0) return items;
|
|
773
|
-
return items.filter(
|
|
774
|
-
(item) => tags.every((t) => item.tags.includes(t))
|
|
775
|
-
);
|
|
943
|
+
/**
|
|
944
|
+
* Underlying Map.
|
|
945
|
+
* Prefer `all`, `allByType`, or `firstByType` everywhere else.
|
|
946
|
+
*/
|
|
947
|
+
get raw() {
|
|
948
|
+
return this._map;
|
|
776
949
|
}
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
"SELECT id FROM kv_data WHERE collection = ? AND expires_at IS NOT NULL AND expires_at <= ?"
|
|
782
|
-
).all(this._name, now);
|
|
783
|
-
for (const row of expired) {
|
|
784
|
-
this._removeById(row.id);
|
|
785
|
-
}
|
|
950
|
+
// ── Lifecycle ───────────────────────────────────
|
|
951
|
+
/** Remove all registered indexers. Called by BrainBank.close(). */
|
|
952
|
+
clear() {
|
|
953
|
+
this._map.clear();
|
|
786
954
|
}
|
|
787
955
|
};
|
|
788
|
-
function parseDuration(s) {
|
|
789
|
-
const match = s.match(/^(\d+)([dhms])$/);
|
|
790
|
-
if (!match) throw new Error(`Invalid duration: "${s}". Use format like '30d', '12h', '5m'.`);
|
|
791
|
-
const n = parseInt(match[1], 10);
|
|
792
|
-
switch (match[2]) {
|
|
793
|
-
case "d":
|
|
794
|
-
return n * 86400;
|
|
795
|
-
case "h":
|
|
796
|
-
return n * 3600;
|
|
797
|
-
case "m":
|
|
798
|
-
return n * 60;
|
|
799
|
-
case "s":
|
|
800
|
-
return n;
|
|
801
|
-
default:
|
|
802
|
-
return n;
|
|
803
|
-
}
|
|
804
|
-
}
|
|
805
|
-
__name(parseDuration, "parseDuration");
|
|
806
|
-
|
|
807
|
-
// src/core/brainbank.ts
|
|
808
|
-
import { EventEmitter } from "events";
|
|
809
956
|
|
|
810
|
-
// src/
|
|
957
|
+
// src/db/database.ts
|
|
811
958
|
import BetterSqlite3 from "better-sqlite3";
|
|
812
959
|
import * as fs from "fs";
|
|
813
960
|
import * as path2 from "path";
|
|
814
961
|
|
|
815
|
-
// src/
|
|
962
|
+
// src/db/schema.ts
|
|
816
963
|
var SCHEMA_VERSION = 4;
|
|
817
964
|
function createSchema(db) {
|
|
818
965
|
db.exec(`
|
|
@@ -1123,7 +1270,7 @@ function createSchema(db) {
|
|
|
1123
1270
|
}
|
|
1124
1271
|
__name(createSchema, "createSchema");
|
|
1125
1272
|
|
|
1126
|
-
// src/
|
|
1273
|
+
// src/db/database.ts
|
|
1127
1274
|
var Database = class {
|
|
1128
1275
|
static {
|
|
1129
1276
|
__name(this, "Database");
|
|
@@ -1175,7 +1322,7 @@ var Database = class {
|
|
|
1175
1322
|
}
|
|
1176
1323
|
};
|
|
1177
1324
|
|
|
1178
|
-
// src/
|
|
1325
|
+
// src/services/reembed.ts
|
|
1179
1326
|
var TABLES = [
|
|
1180
1327
|
{
|
|
1181
1328
|
name: "code",
|
|
@@ -1195,9 +1342,14 @@ var TABLES = [
|
|
|
1195
1342
|
vectorTable: "git_vectors",
|
|
1196
1343
|
idColumn: "id",
|
|
1197
1344
|
fkColumn: "commit_id",
|
|
1345
|
+
// Must match git-engine.ts:119-125 exactly
|
|
1198
1346
|
textBuilder: /* @__PURE__ */ __name((r) => [
|
|
1199
|
-
r.message
|
|
1200
|
-
r.
|
|
1347
|
+
`Commit: ${r.message}`,
|
|
1348
|
+
`Author: ${r.author}`,
|
|
1349
|
+
`Date: ${r.date}`,
|
|
1350
|
+
r.files_json && r.files_json !== "[]" ? `Files: ${JSON.parse(r.files_json).join(", ")}` : "",
|
|
1351
|
+
r.diff ? `Changes:
|
|
1352
|
+
${r.diff.slice(0, 2e3)}` : ""
|
|
1201
1353
|
].filter(Boolean).join("\n"), "textBuilder")
|
|
1202
1354
|
},
|
|
1203
1355
|
{
|
|
@@ -1206,12 +1358,8 @@ var TABLES = [
|
|
|
1206
1358
|
vectorTable: "memory_vectors",
|
|
1207
1359
|
idColumn: "id",
|
|
1208
1360
|
fkColumn: "pattern_id",
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
r.task,
|
|
1212
|
-
r.approach,
|
|
1213
|
-
r.outcome ?? ""
|
|
1214
|
-
].filter(Boolean).join("\n"), "textBuilder")
|
|
1361
|
+
// Must match memory/pattern-store.ts:49 exactly
|
|
1362
|
+
textBuilder: /* @__PURE__ */ __name((r) => `${r.task_type} ${r.task} ${r.approach}`, "textBuilder")
|
|
1215
1363
|
},
|
|
1216
1364
|
{
|
|
1217
1365
|
name: "notes",
|
|
@@ -1219,12 +1367,15 @@ var TABLES = [
|
|
|
1219
1367
|
vectorTable: "note_vectors",
|
|
1220
1368
|
idColumn: "id",
|
|
1221
1369
|
fkColumn: "note_id",
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
r.
|
|
1225
|
-
r.
|
|
1226
|
-
|
|
1227
|
-
|
|
1370
|
+
// Must match notes/engine.ts:90 exactly
|
|
1371
|
+
textBuilder: /* @__PURE__ */ __name((r) => {
|
|
1372
|
+
const decisions = JSON.parse(r.decisions_json || "[]").join(". ");
|
|
1373
|
+
const patterns = JSON.parse(r.patterns_json || "[]").join(". ");
|
|
1374
|
+
return `${r.title}
|
|
1375
|
+
${r.summary}
|
|
1376
|
+
${decisions}
|
|
1377
|
+
${patterns}`;
|
|
1378
|
+
}, "textBuilder")
|
|
1228
1379
|
},
|
|
1229
1380
|
{
|
|
1230
1381
|
name: "docs",
|
|
@@ -1232,10 +1383,8 @@ var TABLES = [
|
|
|
1232
1383
|
vectorTable: "doc_vectors",
|
|
1233
1384
|
idColumn: "id",
|
|
1234
1385
|
fkColumn: "chunk_id",
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
r.content
|
|
1238
|
-
].filter(Boolean).join("\n"), "textBuilder")
|
|
1386
|
+
// Must match docs-engine.ts:160 exactly
|
|
1387
|
+
textBuilder: /* @__PURE__ */ __name((r) => `title: ${r.title ?? ""} | text: ${r.content}`, "textBuilder")
|
|
1239
1388
|
},
|
|
1240
1389
|
{
|
|
1241
1390
|
name: "kv",
|
|
@@ -1282,39 +1431,45 @@ async function reembedAll(db, embedding, hnswMap, options = {}) {
|
|
|
1282
1431
|
}
|
|
1283
1432
|
__name(reembedAll, "reembedAll");
|
|
1284
1433
|
async function reembedTable(db, embedding, table, batchSize, onProgress) {
|
|
1285
|
-
const
|
|
1286
|
-
`SELECT * FROM ${table.textTable}`
|
|
1287
|
-
).
|
|
1288
|
-
if (
|
|
1289
|
-
|
|
1290
|
-
const insertVec = db.prepare(
|
|
1291
|
-
`INSERT INTO ${table.vectorTable} (${table.fkColumn}, embedding) VALUES (?, ?)`
|
|
1292
|
-
);
|
|
1434
|
+
const totalCount = db.prepare(
|
|
1435
|
+
`SELECT COUNT(*) as c FROM ${table.textTable}`
|
|
1436
|
+
).get().c;
|
|
1437
|
+
if (totalCount === 0) return 0;
|
|
1438
|
+
const allNewVectors = [];
|
|
1293
1439
|
let processed = 0;
|
|
1294
|
-
for (let
|
|
1295
|
-
const batch =
|
|
1440
|
+
for (let offset = 0; offset < totalCount; offset += batchSize) {
|
|
1441
|
+
const batch = db.prepare(
|
|
1442
|
+
`SELECT * FROM ${table.textTable} LIMIT ? OFFSET ?`
|
|
1443
|
+
).all(batchSize, offset);
|
|
1296
1444
|
const texts = batch.map((r) => table.textBuilder(r));
|
|
1297
1445
|
const vectors = await embedding.embedBatch(texts);
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
const vec = vectors[j];
|
|
1302
|
-
insertVec.run(id, Buffer.from(vec.buffer));
|
|
1303
|
-
}
|
|
1304
|
-
});
|
|
1446
|
+
for (let j = 0; j < batch.length; j++) {
|
|
1447
|
+
allNewVectors.push({ id: batch[j][table.idColumn], vec: vectors[j] });
|
|
1448
|
+
}
|
|
1305
1449
|
processed += batch.length;
|
|
1306
|
-
onProgress?.(table.name, processed,
|
|
1450
|
+
onProgress?.(table.name, processed, totalCount);
|
|
1307
1451
|
}
|
|
1452
|
+
const insertVec = db.prepare(
|
|
1453
|
+
`INSERT INTO ${table.vectorTable} (${table.fkColumn}, embedding) VALUES (?, ?)`
|
|
1454
|
+
);
|
|
1455
|
+
db.transaction(() => {
|
|
1456
|
+
db.prepare(`DELETE FROM ${table.vectorTable}`).run();
|
|
1457
|
+
for (const { id, vec } of allNewVectors) {
|
|
1458
|
+
insertVec.run(id, Buffer.from(vec.buffer));
|
|
1459
|
+
}
|
|
1460
|
+
});
|
|
1308
1461
|
return processed;
|
|
1309
1462
|
}
|
|
1310
1463
|
__name(reembedTable, "reembedTable");
|
|
1311
1464
|
async function rebuildHnsw(db, table, hnsw, vecs) {
|
|
1312
1465
|
vecs.clear();
|
|
1466
|
+
hnsw.reinit();
|
|
1313
1467
|
const rows = db.prepare(
|
|
1314
1468
|
`SELECT ${table.fkColumn} as id, embedding FROM ${table.vectorTable}`
|
|
1315
1469
|
).all();
|
|
1316
1470
|
for (const row of rows) {
|
|
1317
|
-
const
|
|
1471
|
+
const buf = Buffer.from(row.embedding);
|
|
1472
|
+
const vec = new Float32Array(buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength));
|
|
1318
1473
|
hnsw.add(vec, row.id);
|
|
1319
1474
|
vecs.set(row.id, vec);
|
|
1320
1475
|
}
|
|
@@ -1357,7 +1512,296 @@ function detectProviderMismatch(db, embedding) {
|
|
|
1357
1512
|
}
|
|
1358
1513
|
__name(detectProviderMismatch, "detectProviderMismatch");
|
|
1359
1514
|
|
|
1360
|
-
// src/
|
|
1515
|
+
// src/app/initializer.ts
|
|
1516
|
+
async function earlyInit(config, emit) {
|
|
1517
|
+
const db = new Database(config.dbPath);
|
|
1518
|
+
const embedding = config.embeddingProvider ?? new LocalEmbedding();
|
|
1519
|
+
const mismatch = detectProviderMismatch(db, embedding);
|
|
1520
|
+
const skipVectorLoad = !!mismatch?.mismatch;
|
|
1521
|
+
if (skipVectorLoad) {
|
|
1522
|
+
emit("warning", {
|
|
1523
|
+
type: "provider_mismatch",
|
|
1524
|
+
previous: mismatch.stored,
|
|
1525
|
+
current: mismatch.current,
|
|
1526
|
+
message: "Embedding provider changed \u2014 vectors not loaded. Run brain.reembed() to regenerate."
|
|
1527
|
+
});
|
|
1528
|
+
}
|
|
1529
|
+
setEmbeddingMeta(db, embedding);
|
|
1530
|
+
const kvHnsw = new HNSWIndex(
|
|
1531
|
+
config.embeddingDims,
|
|
1532
|
+
config.maxElements ?? 5e5,
|
|
1533
|
+
config.hnswM,
|
|
1534
|
+
config.hnswEfConstruction,
|
|
1535
|
+
config.hnswEfSearch
|
|
1536
|
+
);
|
|
1537
|
+
await kvHnsw.init();
|
|
1538
|
+
return { db, embedding, kvHnsw, skipVectorLoad };
|
|
1539
|
+
}
|
|
1540
|
+
__name(earlyInit, "earlyInit");
|
|
1541
|
+
async function lateInit(early, config, registry, sharedHnsw, kvVecs, getCollection) {
|
|
1542
|
+
const { db, embedding, kvHnsw, skipVectorLoad } = early;
|
|
1543
|
+
if (!skipVectorLoad) {
|
|
1544
|
+
loadVectors(db, "kv_vectors", "data_id", kvHnsw, kvVecs);
|
|
1545
|
+
}
|
|
1546
|
+
const ctx = {
|
|
1547
|
+
db,
|
|
1548
|
+
embedding,
|
|
1549
|
+
config,
|
|
1550
|
+
createHnsw: /* @__PURE__ */ __name((maxElements) => new HNSWIndex(
|
|
1551
|
+
config.embeddingDims,
|
|
1552
|
+
maxElements ?? config.maxElements,
|
|
1553
|
+
config.hnswM,
|
|
1554
|
+
config.hnswEfConstruction,
|
|
1555
|
+
config.hnswEfSearch
|
|
1556
|
+
).init(), "createHnsw"),
|
|
1557
|
+
loadVectors: /* @__PURE__ */ __name((table, idCol, hnsw, cache) => {
|
|
1558
|
+
if (skipVectorLoad) return;
|
|
1559
|
+
loadVectors(db, table, idCol, hnsw, cache);
|
|
1560
|
+
}, "loadVectors"),
|
|
1561
|
+
getOrCreateSharedHnsw: /* @__PURE__ */ __name(async (type, maxElements) => {
|
|
1562
|
+
const existing = sharedHnsw.get(type);
|
|
1563
|
+
if (existing) return { ...existing, isNew: false };
|
|
1564
|
+
const hnsw = await new HNSWIndex(
|
|
1565
|
+
config.embeddingDims,
|
|
1566
|
+
maxElements ?? config.maxElements,
|
|
1567
|
+
config.hnswM,
|
|
1568
|
+
config.hnswEfConstruction,
|
|
1569
|
+
config.hnswEfSearch
|
|
1570
|
+
).init();
|
|
1571
|
+
const vecCache = /* @__PURE__ */ new Map();
|
|
1572
|
+
sharedHnsw.set(type, { hnsw, vecCache });
|
|
1573
|
+
return { hnsw, vecCache, isNew: true };
|
|
1574
|
+
}, "getOrCreateSharedHnsw"),
|
|
1575
|
+
collection: getCollection
|
|
1576
|
+
};
|
|
1577
|
+
for (const mod of registry.all) {
|
|
1578
|
+
await mod.initialize(ctx);
|
|
1579
|
+
}
|
|
1580
|
+
const codeMod = sharedHnsw.get("code");
|
|
1581
|
+
const gitMod = sharedHnsw.get("git");
|
|
1582
|
+
const memMod = registry.firstByType("memory");
|
|
1583
|
+
let search;
|
|
1584
|
+
let bm25;
|
|
1585
|
+
let contextBuilder;
|
|
1586
|
+
if (codeMod || gitMod || memMod) {
|
|
1587
|
+
search = new MultiIndexSearch({
|
|
1588
|
+
db,
|
|
1589
|
+
codeHnsw: codeMod?.hnsw,
|
|
1590
|
+
gitHnsw: gitMod?.hnsw,
|
|
1591
|
+
patternHnsw: memMod?.hnsw,
|
|
1592
|
+
codeVecs: codeMod?.vecCache ?? /* @__PURE__ */ new Map(),
|
|
1593
|
+
gitVecs: gitMod?.vecCache ?? /* @__PURE__ */ new Map(),
|
|
1594
|
+
patternVecs: memMod?.vecCache ?? /* @__PURE__ */ new Map(),
|
|
1595
|
+
embedding,
|
|
1596
|
+
reranker: config.reranker
|
|
1597
|
+
});
|
|
1598
|
+
bm25 = new BM25Search(db);
|
|
1599
|
+
}
|
|
1600
|
+
if (search) {
|
|
1601
|
+
const firstGit = registry.firstByType("git");
|
|
1602
|
+
contextBuilder = new ContextBuilder(search, firstGit?.coEdits);
|
|
1603
|
+
}
|
|
1604
|
+
return { search, bm25, contextBuilder };
|
|
1605
|
+
}
|
|
1606
|
+
__name(lateInit, "lateInit");
|
|
1607
|
+
function loadVectors(db, table, idCol, hnsw, cache) {
|
|
1608
|
+
const rows = db.prepare(`SELECT ${idCol}, embedding FROM ${table}`).all();
|
|
1609
|
+
for (const row of rows) {
|
|
1610
|
+
const vec = new Float32Array(
|
|
1611
|
+
row.embedding.buffer.slice(
|
|
1612
|
+
row.embedding.byteOffset,
|
|
1613
|
+
row.embedding.byteOffset + row.embedding.byteLength
|
|
1614
|
+
)
|
|
1615
|
+
);
|
|
1616
|
+
hnsw.add(vec, row[idCol]);
|
|
1617
|
+
cache.set(row[idCol], vec);
|
|
1618
|
+
}
|
|
1619
|
+
}
|
|
1620
|
+
__name(loadVectors, "loadVectors");
|
|
1621
|
+
|
|
1622
|
+
// src/app/search-api.ts
|
|
1623
|
+
var SearchAPI = class {
|
|
1624
|
+
constructor(_d) {
|
|
1625
|
+
this._d = _d;
|
|
1626
|
+
}
|
|
1627
|
+
static {
|
|
1628
|
+
__name(this, "SearchAPI");
|
|
1629
|
+
}
|
|
1630
|
+
// ── Vector ──────────────────────────────────────
|
|
1631
|
+
async search(query, options) {
|
|
1632
|
+
if (!this._d.search) {
|
|
1633
|
+
return this._d.registry.has("docs") ? this._d.searchDocs(query, { k: 8 }) : [];
|
|
1634
|
+
}
|
|
1635
|
+
return this._d.search.search(query, options);
|
|
1636
|
+
}
|
|
1637
|
+
async searchCode(query, k = 8) {
|
|
1638
|
+
if (!this._d.registry.firstByType("code"))
|
|
1639
|
+
throw new Error("BrainBank: Indexer 'code' is not loaded. Add .use(code()) to your BrainBank instance.");
|
|
1640
|
+
if (!this._d.search)
|
|
1641
|
+
throw new Error("BrainBank: MultiIndexSearch not available. Ensure code indexer is loaded.");
|
|
1642
|
+
return this._d.search.search(query, { codeK: k, gitK: 0, patternK: 0 });
|
|
1643
|
+
}
|
|
1644
|
+
async searchCommits(query, k = 8) {
|
|
1645
|
+
if (!this._d.registry.firstByType("git"))
|
|
1646
|
+
throw new Error("BrainBank: Indexer 'git' is not loaded. Add .use(git()) to your BrainBank instance.");
|
|
1647
|
+
if (!this._d.search)
|
|
1648
|
+
throw new Error("BrainBank: MultiIndexSearch not available. Ensure git indexer is loaded.");
|
|
1649
|
+
return this._d.search.search(query, { codeK: 0, gitK: k, patternK: 0 });
|
|
1650
|
+
}
|
|
1651
|
+
// ── Hybrid ──────────────────────────────────────
|
|
1652
|
+
async hybridSearch(query, options) {
|
|
1653
|
+
const cols = options?.collections ?? {};
|
|
1654
|
+
const codeK = cols.code ?? options?.codeK ?? 6;
|
|
1655
|
+
const gitK = cols.git ?? options?.gitK ?? 5;
|
|
1656
|
+
const docsK = cols.docs ?? 8;
|
|
1657
|
+
const resultLists = [];
|
|
1658
|
+
if (this._d.search) {
|
|
1659
|
+
const [vec, kw] = await Promise.all([
|
|
1660
|
+
this._d.search.search(query, { ...options, codeK, gitK }),
|
|
1661
|
+
Promise.resolve(this._d.bm25.search(query, { codeK, gitK }))
|
|
1662
|
+
]);
|
|
1663
|
+
resultLists.push(vec, kw);
|
|
1664
|
+
}
|
|
1665
|
+
if (this._d.registry.has("docs")) {
|
|
1666
|
+
const docs = await this._d.searchDocs(query, { k: docsK });
|
|
1667
|
+
if (docs.length > 0) resultLists.push(docs);
|
|
1668
|
+
}
|
|
1669
|
+
const reserved = /* @__PURE__ */ new Set(["code", "git", "docs"]);
|
|
1670
|
+
for (const [name, k] of Object.entries(cols)) {
|
|
1671
|
+
if (reserved.has(name)) continue;
|
|
1672
|
+
const hits = await this._d.collection(name).search(query, { k });
|
|
1673
|
+
if (hits.length > 0) {
|
|
1674
|
+
resultLists.push(hits.map((h) => ({
|
|
1675
|
+
type: "collection",
|
|
1676
|
+
score: h.score ?? 0,
|
|
1677
|
+
content: h.content,
|
|
1678
|
+
metadata: { collection: name, id: h.id, ...h.metadata }
|
|
1679
|
+
})));
|
|
1680
|
+
}
|
|
1681
|
+
}
|
|
1682
|
+
if (resultLists.length === 0) return [];
|
|
1683
|
+
const fused = reciprocalRankFusion(resultLists);
|
|
1684
|
+
if (this._d.config.reranker && fused.length > 1) {
|
|
1685
|
+
const scores = await this._d.config.reranker.rank(query, fused.map((r) => r.content));
|
|
1686
|
+
return fused.map((r, i) => {
|
|
1687
|
+
const w = i < 3 ? 0.75 : i < 10 ? 0.6 : 0.4;
|
|
1688
|
+
return { ...r, score: w * r.score + (1 - w) * (scores[i] ?? 0) };
|
|
1689
|
+
}).sort((a, b) => b.score - a.score);
|
|
1690
|
+
}
|
|
1691
|
+
return fused;
|
|
1692
|
+
}
|
|
1693
|
+
// ── Keyword ─────────────────────────────────────
|
|
1694
|
+
searchBM25(query, options) {
|
|
1695
|
+
return this._d.bm25?.search(query, options) ?? [];
|
|
1696
|
+
}
|
|
1697
|
+
rebuildFTS() {
|
|
1698
|
+
this._d.bm25?.rebuild();
|
|
1699
|
+
}
|
|
1700
|
+
// ── Context ─────────────────────────────────────
|
|
1701
|
+
async getContext(task, options = {}) {
|
|
1702
|
+
const sections = [];
|
|
1703
|
+
if (this._d.contextBuilder) {
|
|
1704
|
+
const core = await this._d.contextBuilder.build(task, options);
|
|
1705
|
+
if (core) sections.push(core);
|
|
1706
|
+
}
|
|
1707
|
+
if (this._d.registry.has("docs")) {
|
|
1708
|
+
const docs = await this._d.searchDocs(task, { k: options.codeResults ?? 4 });
|
|
1709
|
+
if (docs.length > 0) {
|
|
1710
|
+
const body = docs.map((r) => {
|
|
1711
|
+
const m = r.metadata;
|
|
1712
|
+
const h = r.context ? `**[${m.collection}]** ${m.title} \u2014 _${r.context}_` : `**[${m.collection}]** ${m.title}`;
|
|
1713
|
+
return `${h}
|
|
1714
|
+
|
|
1715
|
+
${r.content}`;
|
|
1716
|
+
}).join("\n\n---\n\n");
|
|
1717
|
+
sections.push(`## Relevant Documents
|
|
1718
|
+
|
|
1719
|
+
${body}`);
|
|
1720
|
+
}
|
|
1721
|
+
}
|
|
1722
|
+
return sections.join("\n\n");
|
|
1723
|
+
}
|
|
1724
|
+
};
|
|
1725
|
+
|
|
1726
|
+
// src/app/index-api.ts
|
|
1727
|
+
var IndexAPI = class {
|
|
1728
|
+
constructor(_d) {
|
|
1729
|
+
this._d = _d;
|
|
1730
|
+
}
|
|
1731
|
+
static {
|
|
1732
|
+
__name(this, "IndexAPI");
|
|
1733
|
+
}
|
|
1734
|
+
async index(options = {}) {
|
|
1735
|
+
const want = new Set(options.modules ?? ["code", "git", "docs"]);
|
|
1736
|
+
const result = {};
|
|
1737
|
+
if (want.has("code")) {
|
|
1738
|
+
for (const mod of this._d.registry.allByType("code")) {
|
|
1739
|
+
const label = mod.name === "code" ? "code" : mod.name;
|
|
1740
|
+
options.onProgress?.(label, "Starting...");
|
|
1741
|
+
const r = await mod.index({
|
|
1742
|
+
forceReindex: options.forceReindex,
|
|
1743
|
+
onProgress: /* @__PURE__ */ __name((f, i, t) => options.onProgress?.(label, `[${i}/${t}] ${f}`), "onProgress")
|
|
1744
|
+
});
|
|
1745
|
+
if (result.code) {
|
|
1746
|
+
result.code.indexed += r.indexed;
|
|
1747
|
+
result.code.skipped += r.skipped;
|
|
1748
|
+
result.code.chunks = (result.code.chunks ?? 0) + (r.chunks ?? 0);
|
|
1749
|
+
} else {
|
|
1750
|
+
result.code = r;
|
|
1751
|
+
}
|
|
1752
|
+
}
|
|
1753
|
+
}
|
|
1754
|
+
if (want.has("git")) {
|
|
1755
|
+
for (const mod of this._d.registry.allByType("git")) {
|
|
1756
|
+
const label = mod.name === "git" ? "git" : mod.name;
|
|
1757
|
+
options.onProgress?.(label, "Starting...");
|
|
1758
|
+
const r = await mod.index({
|
|
1759
|
+
depth: options.gitDepth ?? this._d.gitDepth,
|
|
1760
|
+
onProgress: /* @__PURE__ */ __name((f, i, t) => options.onProgress?.(label, `[${i}/${t}] ${f}`), "onProgress")
|
|
1761
|
+
});
|
|
1762
|
+
if (result.git) {
|
|
1763
|
+
result.git.indexed += r.indexed;
|
|
1764
|
+
result.git.skipped += r.skipped;
|
|
1765
|
+
} else {
|
|
1766
|
+
result.git = r;
|
|
1767
|
+
}
|
|
1768
|
+
}
|
|
1769
|
+
}
|
|
1770
|
+
if (want.has("docs") && this._d.registry.has("docs")) {
|
|
1771
|
+
options.onProgress?.("docs", "Starting...");
|
|
1772
|
+
result.docs = await this._d.registry.get("docs").indexCollections({
|
|
1773
|
+
onProgress: /* @__PURE__ */ __name((coll, file, cur, total) => options.onProgress?.("docs", `[${coll}] ${cur}/${total}: ${file}`), "onProgress")
|
|
1774
|
+
});
|
|
1775
|
+
}
|
|
1776
|
+
this._d.emit("indexed", result);
|
|
1777
|
+
return result;
|
|
1778
|
+
}
|
|
1779
|
+
async indexCode(options = {}) {
|
|
1780
|
+
const mods = this._d.registry.allByType("code");
|
|
1781
|
+
if (!mods.length) throw new Error("BrainBank: Indexer 'code' is not loaded. Add .use(code()) to your BrainBank instance.");
|
|
1782
|
+
const acc = { indexed: 0, skipped: 0, chunks: 0 };
|
|
1783
|
+
for (const mod of mods) {
|
|
1784
|
+
const r = await mod.index(options);
|
|
1785
|
+
acc.indexed += r.indexed;
|
|
1786
|
+
acc.skipped += r.skipped;
|
|
1787
|
+
acc.chunks = (acc.chunks ?? 0) + (r.chunks ?? 0);
|
|
1788
|
+
}
|
|
1789
|
+
return acc;
|
|
1790
|
+
}
|
|
1791
|
+
async indexGit(options = {}) {
|
|
1792
|
+
const mods = this._d.registry.allByType("git");
|
|
1793
|
+
if (!mods.length) throw new Error("BrainBank: Indexer 'git' is not loaded. Add .use(git()) to your BrainBank instance.");
|
|
1794
|
+
const acc = { indexed: 0, skipped: 0 };
|
|
1795
|
+
for (const mod of mods) {
|
|
1796
|
+
const r = await mod.index(options);
|
|
1797
|
+
acc.indexed += r.indexed;
|
|
1798
|
+
acc.skipped += r.skipped;
|
|
1799
|
+
}
|
|
1800
|
+
return acc;
|
|
1801
|
+
}
|
|
1802
|
+
};
|
|
1803
|
+
|
|
1804
|
+
// src/services/watch.ts
|
|
1361
1805
|
import * as fs2 from "fs";
|
|
1362
1806
|
import * as path3 from "path";
|
|
1363
1807
|
function createWatcher(reindexFn, indexers, repoPath, options = {}) {
|
|
@@ -1457,7 +1901,8 @@ function createWatcher(reindexFn, indexers, repoPath, options = {}) {
|
|
|
1457
1901
|
for (const watchPath of paths) {
|
|
1458
1902
|
const resolved = path3.resolve(watchPath);
|
|
1459
1903
|
try {
|
|
1460
|
-
const
|
|
1904
|
+
const supportsRecursive = process.platform === "darwin" || process.platform === "win32";
|
|
1905
|
+
const watcher = fs2.watch(resolved, { recursive: supportsRecursive }, (_event, filename) => {
|
|
1461
1906
|
if (!active || !filename) return;
|
|
1462
1907
|
if (!shouldWatch(filename)) return;
|
|
1463
1908
|
pending.add(filename);
|
|
@@ -1486,92 +1931,56 @@ function createWatcher(reindexFn, indexers, repoPath, options = {}) {
|
|
|
1486
1931
|
}
|
|
1487
1932
|
__name(createWatcher, "createWatcher");
|
|
1488
1933
|
|
|
1489
|
-
// src/
|
|
1934
|
+
// src/app/brainbank.ts
|
|
1490
1935
|
var BrainBank = class extends EventEmitter {
|
|
1491
1936
|
static {
|
|
1492
1937
|
__name(this, "BrainBank");
|
|
1493
1938
|
}
|
|
1939
|
+
// ── State ───────────────────────────────────────
|
|
1494
1940
|
_config;
|
|
1495
1941
|
_db;
|
|
1496
1942
|
_embedding;
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
_bm25;
|
|
1501
|
-
_contextBuilder;
|
|
1943
|
+
_registry = new IndexerRegistry();
|
|
1944
|
+
_searchAPI;
|
|
1945
|
+
_indexAPI;
|
|
1502
1946
|
_initialized = false;
|
|
1947
|
+
_initPromise = null;
|
|
1503
1948
|
_watcher;
|
|
1504
|
-
// Collections
|
|
1949
|
+
// Collections (KV store)
|
|
1505
1950
|
_collections = /* @__PURE__ */ new Map();
|
|
1506
1951
|
_kvHnsw;
|
|
1507
1952
|
_kvVecs = /* @__PURE__ */ new Map();
|
|
1508
|
-
// Shared HNSW pool
|
|
1953
|
+
// Shared HNSW pool — code:frontend + code:backend share one index
|
|
1509
1954
|
_sharedHnsw = /* @__PURE__ */ new Map();
|
|
1510
1955
|
constructor(config = {}) {
|
|
1511
1956
|
super();
|
|
1512
1957
|
this._config = resolveConfig(config);
|
|
1513
1958
|
}
|
|
1514
|
-
// ── Indexer
|
|
1959
|
+
// ── Indexer registration ─────────────────────────
|
|
1515
1960
|
/**
|
|
1516
1961
|
* Register an indexer. Chainable.
|
|
1517
|
-
*
|
|
1962
|
+
*
|
|
1518
1963
|
* brain.use(code({ repoPath: '.' })).use(docs());
|
|
1519
1964
|
*/
|
|
1520
1965
|
use(indexer) {
|
|
1521
|
-
if (this._initialized)
|
|
1522
|
-
throw new Error(
|
|
1523
|
-
|
|
1524
|
-
);
|
|
1525
|
-
}
|
|
1526
|
-
this._modules.set(indexer.name, indexer);
|
|
1966
|
+
if (this._initialized)
|
|
1967
|
+
throw new Error(`BrainBank: Cannot add indexer '${indexer.name}' after initialization. Call .use() before any operations.`);
|
|
1968
|
+
this._registry.register(indexer);
|
|
1527
1969
|
return this;
|
|
1528
1970
|
}
|
|
1529
1971
|
/** Get the list of registered indexer names. */
|
|
1530
1972
|
get indexers() {
|
|
1531
|
-
return
|
|
1532
|
-
}
|
|
1533
|
-
/** @deprecated Use .indexers instead. */
|
|
1534
|
-
get modules() {
|
|
1535
|
-
return this.indexers;
|
|
1973
|
+
return this._registry.names;
|
|
1536
1974
|
}
|
|
1537
1975
|
/** Check if an indexer is loaded. Also matches type prefix (e.g. 'code' matches 'code:frontend'). */
|
|
1538
1976
|
has(name) {
|
|
1539
|
-
|
|
1540
|
-
for (const key of this._modules.keys()) {
|
|
1541
|
-
if (key.startsWith(name + ":")) return true;
|
|
1542
|
-
}
|
|
1543
|
-
return false;
|
|
1977
|
+
return this._registry.has(name);
|
|
1544
1978
|
}
|
|
1545
1979
|
/** Get an indexer instance. Throws if not loaded. */
|
|
1546
|
-
indexer(
|
|
1547
|
-
|
|
1548
|
-
if (!mod) {
|
|
1549
|
-
const first = this._findFirstByType(name);
|
|
1550
|
-
if (first) return first;
|
|
1551
|
-
throw new Error(
|
|
1552
|
-
`BrainBank: Indexer '${name}' is not loaded. Add .use(${name}()) to your BrainBank instance.`
|
|
1553
|
-
);
|
|
1554
|
-
}
|
|
1555
|
-
return mod;
|
|
1556
|
-
}
|
|
1557
|
-
/** @deprecated Use .indexer() instead. */
|
|
1558
|
-
module(name) {
|
|
1559
|
-
return this.indexer(name);
|
|
1560
|
-
}
|
|
1561
|
-
/** Find all indexers whose name equals or starts with the type prefix. */
|
|
1562
|
-
_findAllByType(type) {
|
|
1563
|
-
return [...this._modules.values()].filter(
|
|
1564
|
-
(m) => m.name === type || m.name.startsWith(type + ":")
|
|
1565
|
-
);
|
|
1566
|
-
}
|
|
1567
|
-
/** Find the first indexer that matches the type. */
|
|
1568
|
-
_findFirstByType(type) {
|
|
1569
|
-
for (const m of this._modules.values()) {
|
|
1570
|
-
if (m.name === type || m.name.startsWith(type + ":")) return m;
|
|
1571
|
-
}
|
|
1572
|
-
return void 0;
|
|
1980
|
+
indexer(n) {
|
|
1981
|
+
return this._registry.get(n);
|
|
1573
1982
|
}
|
|
1574
|
-
// ── Initialization
|
|
1983
|
+
// ── Initialization ───────────────────────────────
|
|
1575
1984
|
/**
|
|
1576
1985
|
* Initialize database, HNSW indices, and load existing vectors.
|
|
1577
1986
|
* Only initializes registered modules.
|
|
@@ -1579,339 +1988,184 @@ var BrainBank = class extends EventEmitter {
|
|
|
1579
1988
|
*/
|
|
1580
1989
|
async initialize() {
|
|
1581
1990
|
if (this._initialized) return;
|
|
1582
|
-
|
|
1583
|
-
this.
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1991
|
+
if (this._initPromise) return this._initPromise;
|
|
1992
|
+
this._initPromise = this._runInitialize().catch((err) => {
|
|
1993
|
+
for (const { hnsw } of this._sharedHnsw.values()) try {
|
|
1994
|
+
hnsw.reinit();
|
|
1995
|
+
} catch {
|
|
1996
|
+
}
|
|
1997
|
+
this._kvVecs.clear();
|
|
1998
|
+
if (this._kvHnsw) try {
|
|
1999
|
+
this._kvHnsw.reinit();
|
|
2000
|
+
} catch {
|
|
2001
|
+
}
|
|
2002
|
+
try {
|
|
2003
|
+
this._db?.close();
|
|
2004
|
+
} catch {
|
|
2005
|
+
}
|
|
2006
|
+
this._db = void 0;
|
|
2007
|
+
this._kvHnsw = void 0;
|
|
2008
|
+
this._searchAPI = void 0;
|
|
2009
|
+
this._indexAPI = void 0;
|
|
2010
|
+
throw err;
|
|
2011
|
+
}).finally(() => {
|
|
2012
|
+
this._initPromise = null;
|
|
2013
|
+
});
|
|
2014
|
+
return this._initPromise;
|
|
2015
|
+
}
|
|
2016
|
+
async _runInitialize() {
|
|
2017
|
+
if (this._initialized) return;
|
|
2018
|
+
const early = await earlyInit(this._config, (e, d) => this.emit(e, d));
|
|
2019
|
+
this._db = early.db;
|
|
2020
|
+
this._embedding = early.embedding;
|
|
2021
|
+
this._kvHnsw = early.kvHnsw;
|
|
2022
|
+
const late = await lateInit(
|
|
2023
|
+
early,
|
|
2024
|
+
this._config,
|
|
2025
|
+
this._registry,
|
|
2026
|
+
this._sharedHnsw,
|
|
2027
|
+
this._kvVecs,
|
|
2028
|
+
(name) => this.collection(name)
|
|
1591
2029
|
);
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
config.hnswEfSearch
|
|
1605
|
-
).init();
|
|
1606
|
-
}, "createHnsw"),
|
|
1607
|
-
loadVectors: /* @__PURE__ */ __name((table, idCol, hnsw, cache) => {
|
|
1608
|
-
this._loadVectors(table, idCol, hnsw, cache);
|
|
1609
|
-
}, "loadVectors"),
|
|
1610
|
-
getOrCreateSharedHnsw: /* @__PURE__ */ __name(async (type, maxElements) => {
|
|
1611
|
-
const existing = this._sharedHnsw.get(type);
|
|
1612
|
-
if (existing) return { ...existing, isNew: false };
|
|
1613
|
-
const hnsw = await new HNSWIndex(
|
|
1614
|
-
config.embeddingDims,
|
|
1615
|
-
maxElements ?? config.maxElements,
|
|
1616
|
-
config.hnswM,
|
|
1617
|
-
config.hnswEfConstruction,
|
|
1618
|
-
config.hnswEfSearch
|
|
1619
|
-
).init();
|
|
1620
|
-
const vecCache = /* @__PURE__ */ new Map();
|
|
1621
|
-
this._sharedHnsw.set(type, { hnsw, vecCache });
|
|
1622
|
-
return { hnsw, vecCache, isNew: true };
|
|
1623
|
-
}, "getOrCreateSharedHnsw"),
|
|
1624
|
-
collection: /* @__PURE__ */ __name((name) => this.collection(name), "collection")
|
|
1625
|
-
};
|
|
1626
|
-
for (const mod of this._modules.values()) {
|
|
1627
|
-
await mod.initialize(ctx);
|
|
1628
|
-
}
|
|
1629
|
-
const codeShared = this._sharedHnsw.get("code");
|
|
1630
|
-
const gitShared = this._sharedHnsw.get("git");
|
|
1631
|
-
const memMod = this._modules.get("memory");
|
|
1632
|
-
if (codeShared || gitShared || memMod) {
|
|
1633
|
-
this._search = new UnifiedSearch({
|
|
1634
|
-
db: this._db,
|
|
1635
|
-
codeHnsw: codeShared?.hnsw,
|
|
1636
|
-
gitHnsw: gitShared?.hnsw,
|
|
1637
|
-
memHnsw: memMod?.hnsw,
|
|
1638
|
-
codeVecs: codeShared?.vecCache ?? /* @__PURE__ */ new Map(),
|
|
1639
|
-
gitVecs: gitShared?.vecCache ?? /* @__PURE__ */ new Map(),
|
|
1640
|
-
memVecs: memMod?.vecCache ?? /* @__PURE__ */ new Map(),
|
|
1641
|
-
embedding: this._embedding,
|
|
1642
|
-
reranker: this._config.reranker
|
|
1643
|
-
});
|
|
1644
|
-
this._bm25 = new BM25Search(this._db);
|
|
1645
|
-
}
|
|
1646
|
-
if (this._search) {
|
|
1647
|
-
const firstGit = this._findFirstByType("git");
|
|
1648
|
-
this._contextBuilder = new ContextBuilder(this._search, firstGit?.coEdits);
|
|
1649
|
-
}
|
|
1650
|
-
setEmbeddingMeta(this._db, this._embedding);
|
|
1651
|
-
const mismatch = detectProviderMismatch(this._db, this._embedding);
|
|
1652
|
-
if (mismatch?.mismatch) {
|
|
1653
|
-
this.emit("warning", {
|
|
1654
|
-
type: "provider_mismatch",
|
|
1655
|
-
message: `Embedding provider changed (${mismatch.stored} \u2192 ${mismatch.current}). Run brain.reembed() to regenerate vectors.`
|
|
1656
|
-
});
|
|
1657
|
-
}
|
|
2030
|
+
this._searchAPI = new SearchAPI({
|
|
2031
|
+
...late,
|
|
2032
|
+
registry: this._registry,
|
|
2033
|
+
config: this._config,
|
|
2034
|
+
searchDocs: /* @__PURE__ */ __name((q, o) => this.searchDocs(q, o), "searchDocs"),
|
|
2035
|
+
collection: /* @__PURE__ */ __name((n) => this.collection(n), "collection")
|
|
2036
|
+
});
|
|
2037
|
+
this._indexAPI = new IndexAPI({
|
|
2038
|
+
registry: this._registry,
|
|
2039
|
+
gitDepth: this._config.gitDepth,
|
|
2040
|
+
emit: /* @__PURE__ */ __name((e, d) => this.emit(e, d), "emit")
|
|
2041
|
+
});
|
|
1658
2042
|
this._initialized = true;
|
|
1659
2043
|
this.emit("initialized", { indexers: this.indexers });
|
|
1660
2044
|
}
|
|
1661
|
-
// ── Collections
|
|
2045
|
+
// ── Collections (KV) ────────────────────────────
|
|
1662
2046
|
/**
|
|
1663
2047
|
* Get or create a dynamic collection.
|
|
1664
2048
|
* Collections are the universal data primitive — store anything, search semantically.
|
|
1665
|
-
*
|
|
2049
|
+
*
|
|
1666
2050
|
* const errors = brain.collection('debug_errors');
|
|
1667
2051
|
* await errors.add('Fixed null check', { file: 'api.ts' });
|
|
1668
2052
|
* const hits = await errors.search('null pointer');
|
|
1669
2053
|
*/
|
|
1670
2054
|
collection(name) {
|
|
1671
|
-
|
|
1672
|
-
if (
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
"BrainBank: Must call initialize() before using collections. Or use await brain.collection() after an async operation."
|
|
1676
|
-
);
|
|
1677
|
-
}
|
|
1678
|
-
if (!this._kvHnsw) {
|
|
1679
|
-
throw new Error("BrainBank: Collections HNSW not initialized. Call initialize() first.");
|
|
1680
|
-
}
|
|
1681
|
-
coll = new Collection(name, this._db, this._embedding, this._kvHnsw, this._kvVecs, this._config.reranker);
|
|
2055
|
+
if (this._collections.has(name)) return this._collections.get(name);
|
|
2056
|
+
if (!this._kvHnsw)
|
|
2057
|
+
throw new Error("BrainBank: Collections not ready. Call await brain.initialize() first.");
|
|
2058
|
+
const coll = new Collection(name, this._db, this._embedding, this._kvHnsw, this._kvVecs, this._config.reranker);
|
|
1682
2059
|
this._collections.set(name, coll);
|
|
1683
2060
|
return coll;
|
|
1684
2061
|
}
|
|
1685
2062
|
/** List all collection names that have data. */
|
|
1686
2063
|
listCollectionNames() {
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
).all();
|
|
1690
|
-
return rows.map((r) => r.collection);
|
|
2064
|
+
this._requireInit("listCollectionNames");
|
|
2065
|
+
return this._db.prepare("SELECT DISTINCT collection FROM kv_data ORDER BY collection").all().map((r) => r.collection);
|
|
1691
2066
|
}
|
|
1692
|
-
// ── Indexing
|
|
1693
|
-
/**
|
|
1694
|
-
* Index code, git, and/or docs in one call.
|
|
1695
|
-
* Incremental — only processes changes since last run.
|
|
1696
|
-
* @param modules - Which modules to index. Default: all available (['code', 'git', 'docs'])
|
|
1697
|
-
*/
|
|
2067
|
+
// ── Indexing (delegated to IndexAPI) ─────────────
|
|
1698
2068
|
async index(options = {}) {
|
|
1699
2069
|
await this.initialize();
|
|
1700
|
-
|
|
1701
|
-
const result = {};
|
|
1702
|
-
if (want.has("code")) {
|
|
1703
|
-
const codeMods = this._findAllByType("code");
|
|
1704
|
-
for (const mod of codeMods) {
|
|
1705
|
-
const label = mod.name === "code" ? "code" : mod.name;
|
|
1706
|
-
options.onProgress?.(label, "Starting...");
|
|
1707
|
-
const r = await mod.index({
|
|
1708
|
-
forceReindex: options.forceReindex,
|
|
1709
|
-
onProgress: /* @__PURE__ */ __name((f, i, t) => options.onProgress?.(label, `[${i}/${t}] ${f}`), "onProgress")
|
|
1710
|
-
});
|
|
1711
|
-
if (result.code) {
|
|
1712
|
-
result.code.indexed += r.indexed;
|
|
1713
|
-
result.code.skipped += r.skipped;
|
|
1714
|
-
result.code.chunks = (result.code.chunks ?? 0) + (r.chunks ?? 0);
|
|
1715
|
-
} else {
|
|
1716
|
-
result.code = r;
|
|
1717
|
-
}
|
|
1718
|
-
}
|
|
1719
|
-
}
|
|
1720
|
-
if (want.has("git")) {
|
|
1721
|
-
const gitMods = this._findAllByType("git");
|
|
1722
|
-
for (const mod of gitMods) {
|
|
1723
|
-
const label = mod.name === "git" ? "git" : mod.name;
|
|
1724
|
-
options.onProgress?.(label, "Starting...");
|
|
1725
|
-
const r = await mod.index({
|
|
1726
|
-
depth: options.gitDepth ?? this._config.gitDepth,
|
|
1727
|
-
onProgress: /* @__PURE__ */ __name((f, i, t) => options.onProgress?.(label, `[${i}/${t}] ${f}`), "onProgress")
|
|
1728
|
-
});
|
|
1729
|
-
if (result.git) {
|
|
1730
|
-
result.git.indexed += r.indexed;
|
|
1731
|
-
result.git.skipped += r.skipped;
|
|
1732
|
-
} else {
|
|
1733
|
-
result.git = r;
|
|
1734
|
-
}
|
|
1735
|
-
}
|
|
1736
|
-
}
|
|
1737
|
-
if (want.has("docs") && this._modules.has("docs")) {
|
|
1738
|
-
options.onProgress?.("docs", "Starting...");
|
|
1739
|
-
const docsResults = await this.module("docs").indexCollections({
|
|
1740
|
-
onProgress: /* @__PURE__ */ __name((coll, file, cur, total) => options.onProgress?.("docs", `[${coll}] ${cur}/${total}: ${file}`), "onProgress")
|
|
1741
|
-
});
|
|
1742
|
-
result.docs = docsResults;
|
|
1743
|
-
}
|
|
1744
|
-
this.emit("indexed", result);
|
|
1745
|
-
return result;
|
|
2070
|
+
return this._indexAPI.index(options);
|
|
1746
2071
|
}
|
|
1747
|
-
/** Index only code files. */
|
|
2072
|
+
/** Index only code files (all repos in multi-repo mode). */
|
|
1748
2073
|
async indexCode(options = {}) {
|
|
1749
2074
|
await this.initialize();
|
|
1750
|
-
return this.
|
|
2075
|
+
return this._indexAPI.indexCode(options);
|
|
1751
2076
|
}
|
|
1752
|
-
/** Index only git history. */
|
|
2077
|
+
/** Index only git history (all repos in multi-repo mode). */
|
|
1753
2078
|
async indexGit(options = {}) {
|
|
1754
2079
|
await this.initialize();
|
|
1755
|
-
return this.
|
|
2080
|
+
return this._indexAPI.indexGit(options);
|
|
1756
2081
|
}
|
|
1757
|
-
// ── Document
|
|
2082
|
+
// ── Document collections ─────────────────────────
|
|
1758
2083
|
/** Register a document collection. */
|
|
1759
2084
|
async addCollection(collection) {
|
|
1760
2085
|
await this.initialize();
|
|
1761
|
-
this.
|
|
2086
|
+
this.indexer("docs").addCollection(collection);
|
|
1762
2087
|
}
|
|
1763
2088
|
/** Remove a collection and all its indexed data. */
|
|
1764
2089
|
async removeCollection(name) {
|
|
1765
2090
|
await this.initialize();
|
|
1766
|
-
this.
|
|
2091
|
+
this.indexer("docs").removeCollection(name);
|
|
1767
2092
|
}
|
|
1768
2093
|
/** List all registered collections. */
|
|
1769
2094
|
listCollections() {
|
|
1770
|
-
|
|
2095
|
+
this._requireInit("listCollections");
|
|
2096
|
+
return this.indexer("docs").listCollections();
|
|
1771
2097
|
}
|
|
1772
2098
|
/** Index all (or specific) document collections. */
|
|
1773
2099
|
async indexDocs(options = {}) {
|
|
1774
2100
|
await this.initialize();
|
|
1775
|
-
const results = await this.
|
|
2101
|
+
const results = await this.indexer("docs").indexCollections(options);
|
|
1776
2102
|
this.emit("docsIndexed", results);
|
|
1777
2103
|
return results;
|
|
1778
2104
|
}
|
|
1779
2105
|
/** Search documents only. */
|
|
1780
2106
|
async searchDocs(query, options) {
|
|
1781
2107
|
await this.initialize();
|
|
1782
|
-
return this.
|
|
2108
|
+
return this.indexer("docs").search(query, options);
|
|
1783
2109
|
}
|
|
1784
|
-
// ── Context
|
|
2110
|
+
// ── Context metadata ─────────────────────────────
|
|
1785
2111
|
/** Add context description for a collection path. */
|
|
1786
2112
|
addContext(collection, path4, context) {
|
|
1787
|
-
this.
|
|
2113
|
+
this.indexer("docs").addContext(collection, path4, context);
|
|
1788
2114
|
}
|
|
1789
2115
|
/** Remove context for a collection path. */
|
|
1790
2116
|
removeContext(collection, path4) {
|
|
1791
|
-
this.
|
|
2117
|
+
this.indexer("docs").removeContext(collection, path4);
|
|
1792
2118
|
}
|
|
1793
2119
|
/** List all context entries. */
|
|
1794
2120
|
listContexts() {
|
|
1795
|
-
return this.
|
|
2121
|
+
return this.indexer("docs").listContexts();
|
|
1796
2122
|
}
|
|
1797
|
-
// ──
|
|
2123
|
+
// ── Search (delegated to SearchAPI) ─────────────
|
|
1798
2124
|
/**
|
|
1799
2125
|
* Get formatted context for a task.
|
|
1800
2126
|
* Returns markdown ready for system prompt injection.
|
|
1801
2127
|
*/
|
|
1802
2128
|
async getContext(task, options = {}) {
|
|
1803
2129
|
await this.initialize();
|
|
1804
|
-
|
|
1805
|
-
if (this._contextBuilder) {
|
|
1806
|
-
const coreContext = await this._contextBuilder.build(task, options);
|
|
1807
|
-
if (coreContext) sections.push(coreContext);
|
|
1808
|
-
}
|
|
1809
|
-
if (this.has("docs")) {
|
|
1810
|
-
const docResults = await this.searchDocs(task, { k: options.codeResults ?? 4 });
|
|
1811
|
-
if (docResults.length > 0) {
|
|
1812
|
-
const docSection = docResults.map((r) => {
|
|
1813
|
-
const header = r.context ? `**[${r.metadata.collection}]** ${r.metadata.title} \u2014 _${r.context}_` : `**[${r.metadata.collection}]** ${r.metadata.title}`;
|
|
1814
|
-
return `${header}
|
|
1815
|
-
|
|
1816
|
-
${r.content}`;
|
|
1817
|
-
}).join("\n\n---\n\n");
|
|
1818
|
-
sections.push(`## Relevant Documents
|
|
1819
|
-
|
|
1820
|
-
${docSection}`);
|
|
1821
|
-
}
|
|
1822
|
-
}
|
|
1823
|
-
return sections.join("\n\n");
|
|
2130
|
+
return this._searchAPI.getContext(task, options);
|
|
1824
2131
|
}
|
|
1825
|
-
// ── Search ──────────────────────────────────────
|
|
1826
2132
|
/** Semantic search across all loaded modules. */
|
|
1827
2133
|
async search(query, options) {
|
|
1828
2134
|
await this.initialize();
|
|
1829
|
-
|
|
1830
|
-
if (this.has("docs")) return this.searchDocs(query, { k: 8 });
|
|
1831
|
-
return [];
|
|
1832
|
-
}
|
|
1833
|
-
return this._search.search(query, options);
|
|
2135
|
+
return this._searchAPI.search(query, options);
|
|
1834
2136
|
}
|
|
1835
2137
|
/** Semantic search over code only. */
|
|
1836
2138
|
async searchCode(query, k = 8) {
|
|
1837
|
-
this.module("code");
|
|
1838
2139
|
await this.initialize();
|
|
1839
|
-
return this.
|
|
2140
|
+
return this._searchAPI.searchCode(query, k);
|
|
1840
2141
|
}
|
|
1841
2142
|
/** Semantic search over commits only. */
|
|
1842
2143
|
async searchCommits(query, k = 8) {
|
|
1843
|
-
this.module("git");
|
|
1844
2144
|
await this.initialize();
|
|
1845
|
-
return this.
|
|
2145
|
+
return this._searchAPI.searchCommits(query, k);
|
|
1846
2146
|
}
|
|
1847
|
-
// ── Hybrid Search ───────────────────────────────
|
|
1848
2147
|
/**
|
|
1849
2148
|
* Hybrid search: vector + BM25 fused with Reciprocal Rank Fusion.
|
|
1850
2149
|
* Best quality — catches both exact keyword matches and conceptual similarities.
|
|
1851
2150
|
*/
|
|
1852
2151
|
async hybridSearch(query, options) {
|
|
1853
2152
|
await this.initialize();
|
|
1854
|
-
|
|
1855
|
-
const codeK = cols.code ?? options?.codeK ?? 6;
|
|
1856
|
-
const gitK = cols.git ?? options?.gitK ?? 5;
|
|
1857
|
-
const docsK = cols.docs ?? 8;
|
|
1858
|
-
const resultLists = [];
|
|
1859
|
-
if (this._search) {
|
|
1860
|
-
const searchOpts = { ...options, codeK, gitK };
|
|
1861
|
-
const [vectorResults, bm25Results] = await Promise.all([
|
|
1862
|
-
this._search.search(query, searchOpts),
|
|
1863
|
-
Promise.resolve(this._bm25.search(query, searchOpts))
|
|
1864
|
-
]);
|
|
1865
|
-
resultLists.push(vectorResults, bm25Results);
|
|
1866
|
-
}
|
|
1867
|
-
if (this.has("docs")) {
|
|
1868
|
-
const docResults = await this.searchDocs(query, { k: docsK });
|
|
1869
|
-
if (docResults.length > 0) resultLists.push(docResults);
|
|
1870
|
-
}
|
|
1871
|
-
const reserved = /* @__PURE__ */ new Set(["code", "git", "docs"]);
|
|
1872
|
-
for (const [name, k] of Object.entries(cols)) {
|
|
1873
|
-
if (reserved.has(name)) continue;
|
|
1874
|
-
const col = this.collection(name);
|
|
1875
|
-
const hits = await col.search(query, { k });
|
|
1876
|
-
if (hits.length > 0) {
|
|
1877
|
-
resultLists.push(hits.map((h) => ({
|
|
1878
|
-
type: "collection",
|
|
1879
|
-
score: h.score ?? 0,
|
|
1880
|
-
content: h.content,
|
|
1881
|
-
metadata: { collection: name, id: h.id, ...h.metadata }
|
|
1882
|
-
})));
|
|
1883
|
-
}
|
|
1884
|
-
}
|
|
1885
|
-
if (resultLists.length === 0) return [];
|
|
1886
|
-
const fused = reciprocalRankFusion(resultLists);
|
|
1887
|
-
if (this._config.reranker && fused.length > 1) {
|
|
1888
|
-
const documents = fused.map((r) => r.content);
|
|
1889
|
-
const scores = await this._config.reranker.rank(query, documents);
|
|
1890
|
-
const blended = fused.map((r, i) => {
|
|
1891
|
-
const pos = i + 1;
|
|
1892
|
-
const rrfWeight = pos <= 3 ? 0.75 : pos <= 10 ? 0.6 : 0.4;
|
|
1893
|
-
return {
|
|
1894
|
-
...r,
|
|
1895
|
-
score: rrfWeight * r.score + (1 - rrfWeight) * (scores[i] ?? 0)
|
|
1896
|
-
};
|
|
1897
|
-
});
|
|
1898
|
-
return blended.sort((a, b) => b.score - a.score);
|
|
1899
|
-
}
|
|
1900
|
-
return fused;
|
|
2153
|
+
return this._searchAPI.hybridSearch(query, options);
|
|
1901
2154
|
}
|
|
1902
2155
|
/** BM25 keyword search only (no embeddings needed). */
|
|
1903
2156
|
searchBM25(query, options) {
|
|
1904
|
-
|
|
1905
|
-
return this.
|
|
2157
|
+
this._requireInit("searchBM25");
|
|
2158
|
+
return this._searchAPI.searchBM25(query, options);
|
|
1906
2159
|
}
|
|
1907
2160
|
/** Rebuild FTS5 indices. */
|
|
1908
2161
|
rebuildFTS() {
|
|
1909
|
-
this.
|
|
2162
|
+
this._requireInit("rebuildFTS");
|
|
2163
|
+
this._searchAPI.rebuildFTS();
|
|
1910
2164
|
}
|
|
1911
|
-
// ──
|
|
2165
|
+
// ── Queries ──────────────────────────────────────
|
|
1912
2166
|
/** Get git history for a specific file. */
|
|
1913
2167
|
async fileHistory(filePath, limit = 20) {
|
|
1914
|
-
this.
|
|
2168
|
+
this.indexer("git");
|
|
1915
2169
|
await this.initialize();
|
|
1916
2170
|
return this._db.prepare(`
|
|
1917
2171
|
SELECT c.short_hash, c.message, c.author, c.date, c.additions, c.deletions
|
|
@@ -1923,97 +2177,71 @@ ${docSection}`);
|
|
|
1923
2177
|
}
|
|
1924
2178
|
/** Get co-edit suggestions for a file. */
|
|
1925
2179
|
coEdits(filePath, limit = 5) {
|
|
1926
|
-
|
|
1927
|
-
return
|
|
2180
|
+
this._requireInit("coEdits");
|
|
2181
|
+
return this.indexer("git").suggestCoEdits(filePath, limit);
|
|
1928
2182
|
}
|
|
1929
|
-
// ── Stats
|
|
2183
|
+
// ── Stats ────────────────────────────────────────
|
|
1930
2184
|
/** Get statistics for all loaded modules. */
|
|
1931
2185
|
stats() {
|
|
2186
|
+
this._requireInit("stats");
|
|
1932
2187
|
const result = {};
|
|
1933
2188
|
if (this.has("code")) {
|
|
1934
|
-
const
|
|
2189
|
+
const sh = this._sharedHnsw.get("code");
|
|
1935
2190
|
result.code = {
|
|
1936
2191
|
files: this._db.prepare("SELECT COUNT(DISTINCT file_path) as c FROM code_chunks").get().c,
|
|
1937
2192
|
chunks: this._db.prepare("SELECT COUNT(*) as c FROM code_chunks").get().c,
|
|
1938
|
-
hnswSize:
|
|
2193
|
+
hnswSize: sh?.hnsw.size ?? 0
|
|
1939
2194
|
};
|
|
1940
2195
|
}
|
|
1941
2196
|
if (this.has("git")) {
|
|
1942
|
-
const
|
|
2197
|
+
const sh = this._sharedHnsw.get("git");
|
|
1943
2198
|
result.git = {
|
|
1944
2199
|
commits: this._db.prepare("SELECT COUNT(*) as c FROM git_commits").get().c,
|
|
1945
2200
|
filesTracked: this._db.prepare("SELECT COUNT(DISTINCT file_path) as c FROM commit_files").get().c,
|
|
1946
2201
|
coEdits: this._db.prepare("SELECT COUNT(*) as c FROM co_edits").get().c,
|
|
1947
|
-
hnswSize:
|
|
2202
|
+
hnswSize: sh?.hnsw.size ?? 0
|
|
1948
2203
|
};
|
|
1949
2204
|
}
|
|
1950
2205
|
if (this.has("docs")) {
|
|
1951
|
-
|
|
1952
|
-
result.documents = mod.stats();
|
|
2206
|
+
result.documents = this.indexer("docs").stats();
|
|
1953
2207
|
}
|
|
1954
2208
|
return result;
|
|
1955
2209
|
}
|
|
1956
|
-
// ── Watch
|
|
2210
|
+
// ── Watch ────────────────────────────────────────
|
|
1957
2211
|
/**
|
|
1958
2212
|
* Start watching for file changes and auto-re-index.
|
|
1959
2213
|
* Works with built-in and custom indexers.
|
|
1960
|
-
*
|
|
1961
|
-
* const watcher = brain.watch({
|
|
1962
|
-
* onIndex: (file, indexer) => console.log(`${indexer}: ${file}`),
|
|
1963
|
-
* });
|
|
1964
|
-
* // later: watcher.close();
|
|
1965
2214
|
*/
|
|
1966
2215
|
watch(options = {}) {
|
|
1967
|
-
|
|
1968
|
-
throw new Error("BrainBank: Not initialized. Call initialize() before watch().");
|
|
1969
|
-
}
|
|
2216
|
+
this._requireInit("watch");
|
|
1970
2217
|
this._watcher?.close();
|
|
1971
2218
|
this._watcher = createWatcher(
|
|
1972
2219
|
async () => {
|
|
1973
2220
|
await this.index();
|
|
1974
2221
|
},
|
|
1975
|
-
this.
|
|
2222
|
+
this._registry.raw,
|
|
1976
2223
|
this._config.repoPath,
|
|
1977
2224
|
options
|
|
1978
2225
|
);
|
|
1979
2226
|
return this._watcher;
|
|
1980
2227
|
}
|
|
1981
|
-
// ── Re-
|
|
2228
|
+
// ── Re-embed ─────────────────────────────────────
|
|
1982
2229
|
/**
|
|
1983
2230
|
* Re-embed all existing text with the current embedding provider.
|
|
1984
2231
|
* Use this when switching providers (e.g. Local → OpenAI).
|
|
1985
|
-
* Does NOT re-parse files, git history, or documents — only regenerates vectors.
|
|
1986
|
-
*
|
|
1987
|
-
* @example
|
|
1988
|
-
* const brain = new BrainBank({ embeddingProvider: new OpenAIEmbedding() });
|
|
1989
|
-
* await brain.initialize();
|
|
1990
|
-
* const result = await brain.reembed();
|
|
1991
|
-
* // → { code: 1200, git: 500, docs: 80, kv: 45, notes: 12, total: 1837 }
|
|
1992
2232
|
*/
|
|
1993
2233
|
async reembed(options = {}) {
|
|
1994
|
-
|
|
1995
|
-
throw new Error("BrainBank: Not initialized. Call initialize() before reembed().");
|
|
1996
|
-
}
|
|
2234
|
+
this._requireInit("reembed");
|
|
1997
2235
|
const hnswMap = /* @__PURE__ */ new Map();
|
|
1998
|
-
if (this._kvHnsw) {
|
|
1999
|
-
|
|
2236
|
+
if (this._kvHnsw) hnswMap.set("kv", { hnsw: this._kvHnsw, vecs: this._kvVecs });
|
|
2237
|
+
for (const [type, shared] of this._sharedHnsw) {
|
|
2238
|
+
hnswMap.set(type, { hnsw: shared.hnsw, vecs: shared.vecCache });
|
|
2000
2239
|
}
|
|
2001
|
-
const
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
const
|
|
2006
|
-
if (codeMod?.hnsw) hnswMap.set("code", { hnsw: codeMod.hnsw, vecs: codeMod.vecCache });
|
|
2007
|
-
if (gitMod?.hnsw) hnswMap.set("git", { hnsw: gitMod.hnsw, vecs: gitMod.vecCache });
|
|
2008
|
-
if (memMod?.hnsw) hnswMap.set("memory", { hnsw: memMod.hnsw, vecs: memMod.vecCache });
|
|
2009
|
-
if (notesMod?.hnsw) hnswMap.set("notes", { hnsw: notesMod.hnsw, vecs: notesMod.vecCache });
|
|
2010
|
-
if (docsMod?.hnsw) hnswMap.set("docs", { hnsw: docsMod.hnsw, vecs: docsMod.vecCache });
|
|
2011
|
-
const result = await reembedAll(
|
|
2012
|
-
this._db,
|
|
2013
|
-
this._embedding,
|
|
2014
|
-
hnswMap,
|
|
2015
|
-
options
|
|
2016
|
-
);
|
|
2240
|
+
for (const type of ["memory", "notes", "docs"]) {
|
|
2241
|
+
const mod = this._registry.firstByType(type);
|
|
2242
|
+
if (mod?.hnsw) hnswMap.set(type, { hnsw: mod.hnsw, vecs: mod.vecCache });
|
|
2243
|
+
}
|
|
2244
|
+
const result = await reembedAll(this._db, this._embedding, hnswMap, options);
|
|
2017
2245
|
this.emit("reembedded", result);
|
|
2018
2246
|
return result;
|
|
2019
2247
|
}
|
|
@@ -2021,12 +2249,16 @@ ${docSection}`);
|
|
|
2021
2249
|
/** Close database and release resources. */
|
|
2022
2250
|
close() {
|
|
2023
2251
|
this._watcher?.close();
|
|
2024
|
-
for (const indexer of this.
|
|
2025
|
-
|
|
2026
|
-
}
|
|
2027
|
-
if (this._db) this._db.close();
|
|
2252
|
+
for (const indexer of this._registry.all) indexer.close?.();
|
|
2253
|
+
this._db?.close();
|
|
2028
2254
|
this._initialized = false;
|
|
2029
2255
|
this._collections.clear();
|
|
2256
|
+
this._sharedHnsw.clear();
|
|
2257
|
+
this._kvVecs.clear();
|
|
2258
|
+
this._kvHnsw = void 0;
|
|
2259
|
+
this._searchAPI = void 0;
|
|
2260
|
+
this._indexAPI = void 0;
|
|
2261
|
+
this._registry.clear();
|
|
2030
2262
|
}
|
|
2031
2263
|
/** Whether the brainbank has been initialized. */
|
|
2032
2264
|
get isInitialized() {
|
|
@@ -2036,31 +2268,23 @@ ${docSection}`);
|
|
|
2036
2268
|
get config() {
|
|
2037
2269
|
return this._config;
|
|
2038
2270
|
}
|
|
2039
|
-
// ──
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
for (const row of rows) {
|
|
2044
|
-
const vec = new Float32Array(row.embedding.buffer.slice(
|
|
2045
|
-
row.embedding.byteOffset,
|
|
2046
|
-
row.embedding.byteOffset + row.embedding.byteLength
|
|
2047
|
-
));
|
|
2048
|
-
hnsw.add(vec, row[idCol]);
|
|
2049
|
-
cache.set(row[idCol], vec);
|
|
2050
|
-
}
|
|
2271
|
+
// ── Internal guard ───────────────────────────────
|
|
2272
|
+
_requireInit(method) {
|
|
2273
|
+
if (!this._initialized)
|
|
2274
|
+
throw new Error(`BrainBank: Not initialized. Call await brain.initialize() before ${method}().`);
|
|
2051
2275
|
}
|
|
2052
2276
|
};
|
|
2053
2277
|
|
|
2054
2278
|
export {
|
|
2055
2279
|
DEFAULTS,
|
|
2056
2280
|
resolveConfig,
|
|
2281
|
+
Collection,
|
|
2057
2282
|
HNSWIndex,
|
|
2058
2283
|
LocalEmbedding,
|
|
2059
2284
|
searchMMR,
|
|
2060
|
-
|
|
2285
|
+
MultiIndexSearch,
|
|
2061
2286
|
BM25Search,
|
|
2062
2287
|
ContextBuilder,
|
|
2063
|
-
Collection,
|
|
2064
2288
|
BrainBank
|
|
2065
2289
|
};
|
|
2066
|
-
//# sourceMappingURL=chunk-
|
|
2290
|
+
//# sourceMappingURL=chunk-6MFTQV3O.js.map
|