brainbank 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +19 -9
  2. package/dist/{base-9vfWRHCV.d.ts → base-4SUgeRWT.d.ts} +25 -2
  3. package/dist/{chunk-6MFTQV3O.js → chunk-2BEWWQL2.js} +435 -386
  4. package/dist/chunk-2BEWWQL2.js.map +1 -0
  5. package/dist/{chunk-FJJY4H2Y.js → chunk-5VUYPNH3.js} +47 -3
  6. package/dist/chunk-5VUYPNH3.js.map +1 -0
  7. package/dist/chunk-CCXVL56V.js +120 -0
  8. package/dist/chunk-CCXVL56V.js.map +1 -0
  9. package/dist/{chunk-V4UJKXPK.js → chunk-E6WQM4DN.js} +9 -4
  10. package/dist/chunk-E6WQM4DN.js.map +1 -0
  11. package/dist/chunk-FI7GWG4W.js +309 -0
  12. package/dist/chunk-FI7GWG4W.js.map +1 -0
  13. package/dist/{chunk-X6645UVR.js → chunk-FINIFKAY.js} +136 -4
  14. package/dist/chunk-FINIFKAY.js.map +1 -0
  15. package/dist/{chunk-WR4WXKJT.js → chunk-MGIFEPYZ.js} +62 -42
  16. package/dist/chunk-MGIFEPYZ.js.map +1 -0
  17. package/dist/{chunk-F6SJ3U4H.js → chunk-Y3JKI6QN.js} +152 -141
  18. package/dist/chunk-Y3JKI6QN.js.map +1 -0
  19. package/dist/cli.js +61 -32
  20. package/dist/cli.js.map +1 -1
  21. package/dist/code.d.ts +1 -1
  22. package/dist/code.js +1 -1
  23. package/dist/docs.d.ts +1 -1
  24. package/dist/docs.js +1 -1
  25. package/dist/git.d.ts +1 -1
  26. package/dist/git.js +1 -1
  27. package/dist/index.d.ts +121 -82
  28. package/dist/index.js +66 -15
  29. package/dist/index.js.map +1 -1
  30. package/dist/memory.d.ts +1 -1
  31. package/dist/memory.js +3 -137
  32. package/dist/memory.js.map +1 -1
  33. package/dist/notes.d.ts +1 -1
  34. package/dist/notes.js +4 -49
  35. package/dist/notes.js.map +1 -1
  36. package/dist/{openai-CYDMYX7X.js → openai-embedding-VQZCZQYT.js} +2 -2
  37. package/package.json +1 -1
  38. package/dist/chunk-6MFTQV3O.js.map +0 -1
  39. package/dist/chunk-7JCEW7LT.js +0 -266
  40. package/dist/chunk-7JCEW7LT.js.map +0 -1
  41. package/dist/chunk-F6SJ3U4H.js.map +0 -1
  42. package/dist/chunk-FJJY4H2Y.js.map +0 -1
  43. package/dist/chunk-GUT5MSJT.js +0 -99
  44. package/dist/chunk-GUT5MSJT.js.map +0 -1
  45. package/dist/chunk-V4UJKXPK.js.map +0 -1
  46. package/dist/chunk-WR4WXKJT.js.map +0 -1
  47. package/dist/chunk-X6645UVR.js.map +0 -1
  48. /package/dist/{openai-CYDMYX7X.js.map → openai-embedding-VQZCZQYT.js.map} +0 -0
@@ -33,10 +33,25 @@ var BREAK_SCORES = [
33
33
  var TARGET_CHARS = 3e3;
34
34
  var WINDOW_CHARS = 600;
35
35
  var MIN_CHUNK_CHARS = 200;
36
- function escapeRegex(s) {
37
- return s.replace(/[.+?^${}()|[\]\\]/g, "\\$&");
38
- }
39
- __name(escapeRegex, "escapeRegex");
36
+ var IGNORED_DOC_DIRS = /* @__PURE__ */ new Set([
37
+ "node_modules",
38
+ ".git",
39
+ ".hg",
40
+ ".svn",
41
+ "dist",
42
+ "build",
43
+ "out",
44
+ "coverage",
45
+ ".next",
46
+ "__pycache__",
47
+ ".tox",
48
+ ".venv",
49
+ "venv",
50
+ "vendor",
51
+ "target",
52
+ ".cache",
53
+ ".turbo"
54
+ ]);
40
55
  var DocsIndexer = class {
41
56
  constructor(_db, _embedding, _hnsw, _vecCache) {
42
57
  this._db = _db;
@@ -56,9 +71,30 @@ var DocsIndexer = class {
56
71
  if (!fs.existsSync(absDir)) {
57
72
  throw new Error(`Collection path does not exist: ${absDir}`);
58
73
  }
74
+ const files = this._walkFiles(absDir, pattern, options.ignore);
75
+ let indexed = 0, skipped = 0, totalChunks = 0;
76
+ for (let i = 0; i < files.length; i++) {
77
+ const relPath = files[i];
78
+ options.onProgress?.(relPath, i + 1, files.length);
79
+ const absPath = path.join(absDir, relPath);
80
+ const content = fs.readFileSync(absPath, "utf-8");
81
+ const hash = createHash("sha256").update(content).digest("hex").slice(0, 16);
82
+ if (this._isUnchanged(collection, relPath, hash)) {
83
+ skipped++;
84
+ continue;
85
+ }
86
+ this._removeOldChunks(collection, relPath);
87
+ const chunkCount = await this._indexFile(collection, relPath, content, hash);
88
+ indexed++;
89
+ totalChunks += chunkCount;
90
+ }
91
+ return { indexed, skipped, chunks: totalChunks };
92
+ }
93
+ /** Walk directory tree and collect matching files. */
94
+ _walkFiles(absDir, pattern, ignore) {
59
95
  const patternExt = pattern.match(/\.([\w]+)$/)?.[1];
60
96
  const files = [];
61
- const walkDir = /* @__PURE__ */ __name((dir, base) => {
97
+ const walk = /* @__PURE__ */ __name((dir, base) => {
62
98
  let entries;
63
99
  try {
64
100
  entries = fs.readdirSync(dir, { withFileTypes: true });
@@ -68,93 +104,89 @@ var DocsIndexer = class {
68
104
  for (const e of entries) {
69
105
  const rel = base ? `${base}/${e.name}` : e.name;
70
106
  if (e.isDirectory()) {
71
- if (this._isIgnoredDocDir(e.name)) continue;
72
- walkDir(path.join(dir, e.name), rel);
107
+ if (IGNORED_DOC_DIRS.has(e.name)) continue;
108
+ walk(path.join(dir, e.name), rel);
73
109
  } else if (e.isFile()) {
74
- const shouldIgnore = options.ignore?.some((ig) => {
75
- const escaped = escapeRegex(ig).replace(/\\\*\\\*/g, ".*").replace(/\\\*/g, "[^/]*");
76
- return new RegExp(escaped).test(rel);
77
- });
110
+ if (this._isIgnoredFile(rel, ignore)) continue;
78
111
  const ext = path.extname(e.name).slice(1);
79
- if (!shouldIgnore && (!patternExt || ext === patternExt)) {
80
- files.push(rel);
81
- }
112
+ if (!patternExt || ext === patternExt) files.push(rel);
82
113
  }
83
114
  }
84
- }, "walkDir");
85
- walkDir(absDir, "");
86
- let indexed = 0;
87
- let skipped = 0;
88
- let totalChunks = 0;
89
- for (let i = 0; i < files.length; i++) {
90
- const relPath = files[i];
91
- const absPath = path.join(absDir, relPath);
92
- options.onProgress?.(relPath, i + 1, files.length);
93
- const content = fs.readFileSync(absPath, "utf-8");
94
- const hash = createHash("sha256").update(content).digest("hex").slice(0, 16);
95
- const existingChunks = this._db.prepare(
96
- `SELECT dc.id, dc.content_hash, dv.chunk_id AS has_vector
97
- FROM doc_chunks dc
98
- LEFT JOIN doc_vectors dv ON dv.chunk_id = dc.id
99
- WHERE dc.collection = ? AND dc.file_path = ?`
100
- ).all(collection, relPath);
101
- const allMatch = existingChunks.length > 0 && existingChunks.every((c) => c.content_hash === hash && c.has_vector != null);
102
- if (allMatch) {
103
- skipped++;
104
- continue;
105
- }
106
- for (const old of existingChunks) {
107
- this._hnsw.remove(old.id);
108
- this._vecCache.delete(old.id);
115
+ }, "walk");
116
+ walk(absDir, "");
117
+ return files;
118
+ }
119
+ /** Check if a file matches any ignore patterns. */
120
+ _isIgnoredFile(relPath, ignore) {
121
+ if (!ignore) return false;
122
+ return ignore.some((ig) => {
123
+ const regex = ig.replace(/\*\*/g, "{{GLOBSTAR}}").replace(/\*/g, "{{STAR}}").replace(/[.+?^${}()|[\]\\]/g, "\\$&").replace(/\{\{GLOBSTAR\}\}/g, ".*").replace(/\{\{STAR\}\}/g, "[^/]*");
124
+ return new RegExp(regex).test(relPath);
125
+ });
126
+ }
127
+ /** Check if all chunks for a file match the current hash and have vectors. */
128
+ _isUnchanged(collection, relPath, hash) {
129
+ const existing = this._db.prepare(
130
+ `SELECT dc.id, dc.content_hash, dv.chunk_id AS has_vector
131
+ FROM doc_chunks dc
132
+ LEFT JOIN doc_vectors dv ON dv.chunk_id = dc.id
133
+ WHERE dc.collection = ? AND dc.file_path = ?`
134
+ ).all(collection, relPath);
135
+ return existing.length > 0 && existing.every((c) => c.content_hash === hash && c.has_vector != null);
136
+ }
137
+ /** Remove old chunks and their HNSW vectors for a file. */
138
+ _removeOldChunks(collection, relPath) {
139
+ const oldChunks = this._db.prepare(
140
+ "SELECT id FROM doc_chunks WHERE collection = ? AND file_path = ?"
141
+ ).all(collection, relPath);
142
+ for (const old of oldChunks) {
143
+ this._hnsw.remove(old.id);
144
+ this._vecCache.delete(old.id);
145
+ }
146
+ this._db.prepare(
147
+ "DELETE FROM doc_chunks WHERE collection = ? AND file_path = ?"
148
+ ).run(collection, relPath);
149
+ }
150
+ /** Index a single file: chunk, embed, store in DB + HNSW. */
151
+ async _indexFile(collection, relPath, content, hash) {
152
+ const title = this._extractTitle(content, relPath);
153
+ const chunks = this._smartChunk(content);
154
+ const insertChunk = this._db.prepare(`
155
+ INSERT INTO doc_chunks (collection, file_path, title, content, seq, pos, content_hash)
156
+ VALUES (?, ?, ?, ?, ?, ?, ?)
157
+ `);
158
+ const chunkIds = [];
159
+ this._db.transaction(() => {
160
+ for (let seq = 0; seq < chunks.length; seq++) {
161
+ const result = insertChunk.run(
162
+ collection,
163
+ relPath,
164
+ title,
165
+ chunks[seq].text,
166
+ seq,
167
+ chunks[seq].pos,
168
+ hash
169
+ );
170
+ chunkIds.push(Number(result.lastInsertRowid));
109
171
  }
110
- this._db.prepare(
111
- "DELETE FROM doc_chunks WHERE collection = ? AND file_path = ?"
112
- ).run(collection, relPath);
113
- const title = this._extractTitle(content, relPath);
114
- const chunks = this._smartChunk(content);
115
- const insertChunk = this._db.prepare(`
116
- INSERT INTO doc_chunks (collection, file_path, title, content, seq, pos, content_hash)
117
- VALUES (?, ?, ?, ?, ?, ?, ?)
118
- `);
119
- const chunkIds = [];
120
- this._db.transaction(() => {
121
- for (let seq = 0; seq < chunks.length; seq++) {
122
- const chunk = chunks[seq];
123
- const result = insertChunk.run(
124
- collection,
125
- relPath,
126
- title,
127
- chunk.text,
128
- seq,
129
- chunk.pos,
130
- hash
131
- );
132
- chunkIds.push(Number(result.lastInsertRowid));
133
- }
134
- });
135
- const texts = chunks.map((c) => `title: ${title} | text: ${c.text}`);
136
- const embeddings = await this._embedding.embedBatch(texts);
137
- const insertVec = this._db.prepare(
138
- "INSERT OR REPLACE INTO doc_vectors (chunk_id, embedding) VALUES (?, ?)"
139
- );
140
- this._db.transaction(() => {
141
- for (let j = 0; j < chunkIds.length; j++) {
142
- const buf = Buffer.from(embeddings[j].buffer);
143
- insertVec.run(chunkIds[j], buf);
144
- }
145
- });
172
+ });
173
+ const texts = chunks.map((c) => `title: ${title} | text: ${c.text}`);
174
+ const embeddings = await this._embedding.embedBatch(texts);
175
+ const insertVec = this._db.prepare(
176
+ "INSERT OR REPLACE INTO doc_vectors (chunk_id, embedding) VALUES (?, ?)"
177
+ );
178
+ this._db.transaction(() => {
146
179
  for (let j = 0; j < chunkIds.length; j++) {
147
- this._hnsw.add(embeddings[j], chunkIds[j]);
148
- this._vecCache.set(chunkIds[j], embeddings[j]);
180
+ insertVec.run(chunkIds[j], Buffer.from(embeddings[j].buffer));
149
181
  }
150
- indexed++;
151
- totalChunks += chunks.length;
182
+ });
183
+ for (let j = 0; j < chunkIds.length; j++) {
184
+ this._hnsw.add(embeddings[j], chunkIds[j]);
185
+ this._vecCache.set(chunkIds[j], embeddings[j]);
152
186
  }
153
- return { indexed, skipped, chunks: totalChunks };
187
+ return chunks.length;
154
188
  }
155
- /**
156
- * Remove all indexed data for a collection.
157
- */
189
+ /** Remove all indexed data for a collection. */
158
190
  removeCollection(collection) {
159
191
  const chunks = this._db.prepare(
160
192
  "SELECT id FROM doc_chunks WHERE collection = ?"
@@ -168,10 +200,7 @@ var DocsIndexer = class {
168
200
  this._db.prepare("DELETE FROM path_contexts WHERE collection = ?").run(collection);
169
201
  }
170
202
  // ── Smart Chunking ──────────────────────────────
171
- /**
172
- * Split document into chunks at natural markdown boundaries.
173
- * Uses heading-aware scoring like qmd.
174
- */
203
+ /** Split document into chunks at natural markdown boundaries. */
175
204
  _smartChunk(text) {
176
205
  if (text.length <= TARGET_CHARS) {
177
206
  return [{ text, pos: 0 }];
@@ -183,32 +212,10 @@ var DocsIndexer = class {
183
212
  while (chunkStart < text.length) {
184
213
  const remaining = text.length - chunkStart;
185
214
  if (remaining <= TARGET_CHARS + WINDOW_CHARS) {
186
- const lastText = text.slice(chunkStart).trim();
187
- if (lastText.length >= MIN_CHUNK_CHARS) {
188
- chunks.push({ text: lastText, pos: chunkStart });
189
- } else if (chunks.length > 0) {
190
- chunks[chunks.length - 1].text += "\n" + lastText;
191
- } else {
192
- chunks.push({ text: lastText, pos: chunkStart });
193
- }
215
+ this._pushLastChunk(text, chunkStart, chunks);
194
216
  break;
195
217
  }
196
- const targetEnd = chunkStart + TARGET_CHARS;
197
- const windowStart = targetEnd - WINDOW_CHARS;
198
- let bestBreak = targetEnd;
199
- let bestScore = 0;
200
- for (const bp of breakPoints) {
201
- if (bp.pos <= chunkStart) continue;
202
- if (bp.pos > targetEnd + WINDOW_CHARS / 2) break;
203
- if (bp.pos < windowStart) continue;
204
- const distance = Math.abs(bp.pos - targetEnd);
205
- const decay = 1 - (distance / WINDOW_CHARS) ** 2 * 0.7;
206
- const finalScore = bp.score * decay;
207
- if (finalScore > bestScore) {
208
- bestScore = finalScore;
209
- bestBreak = bp.pos;
210
- }
211
- }
218
+ const bestBreak = this._findBestBreak(chunkStart, breakPoints);
212
219
  const chunkText = text.slice(chunkStart, bestBreak).trim();
213
220
  if (chunkText.length >= MIN_CHUNK_CHARS) {
214
221
  chunks.push({ text: chunkText, pos: chunkStart });
@@ -217,9 +224,38 @@ var DocsIndexer = class {
217
224
  }
218
225
  return chunks;
219
226
  }
220
- /**
221
- * Find all potential break points in the document with scores.
222
- */
227
+ /** Handle the last chunk: merge if too small, otherwise push. */
228
+ _pushLastChunk(text, chunkStart, chunks) {
229
+ const lastText = text.slice(chunkStart).trim();
230
+ if (lastText.length >= MIN_CHUNK_CHARS) {
231
+ chunks.push({ text: lastText, pos: chunkStart });
232
+ } else if (chunks.length > 0) {
233
+ chunks[chunks.length - 1].text += "\n" + lastText;
234
+ } else {
235
+ chunks.push({ text: lastText, pos: chunkStart });
236
+ }
237
+ }
238
+ /** Find the best break position within the target window. */
239
+ _findBestBreak(chunkStart, breakPoints) {
240
+ const targetEnd = chunkStart + TARGET_CHARS;
241
+ const windowStart = targetEnd - WINDOW_CHARS;
242
+ let bestBreak = targetEnd;
243
+ let bestScore = 0;
244
+ for (const bp of breakPoints) {
245
+ if (bp.pos <= chunkStart) continue;
246
+ if (bp.pos > targetEnd + WINDOW_CHARS / 2) break;
247
+ if (bp.pos < windowStart) continue;
248
+ const distance = Math.abs(bp.pos - targetEnd);
249
+ const decay = 1 - (distance / WINDOW_CHARS) ** 2 * 0.7;
250
+ const finalScore = bp.score * decay;
251
+ if (finalScore > bestScore) {
252
+ bestScore = finalScore;
253
+ bestBreak = bp.pos;
254
+ }
255
+ }
256
+ return bestBreak;
257
+ }
258
+ /** Find all potential break points in the document with scores. */
223
259
  _findBreakPoints(lines) {
224
260
  const points = [];
225
261
  let charPos = 0;
@@ -247,37 +283,12 @@ var DocsIndexer = class {
247
283
  }
248
284
  return points;
249
285
  }
250
- /**
251
- * Extract document title from first heading or filename.
252
- */
286
+ /** Extract document title from first heading or filename. */
253
287
  _extractTitle(content, filePath) {
254
288
  const match = content.match(/^#{1,3}\s+(.+)$/m);
255
289
  if (match) return match[1].trim();
256
290
  return path.basename(filePath, path.extname(filePath));
257
291
  }
258
- /** Skip well-known output/vendor directories when walking docs. */
259
- _isIgnoredDocDir(name) {
260
- const IGNORED = /* @__PURE__ */ new Set([
261
- "node_modules",
262
- ".git",
263
- ".hg",
264
- ".svn",
265
- "dist",
266
- "build",
267
- "out",
268
- "coverage",
269
- ".next",
270
- "__pycache__",
271
- ".tox",
272
- ".venv",
273
- "venv",
274
- "vendor",
275
- "target",
276
- ".cache",
277
- ".turbo"
278
- ]);
279
- return IGNORED.has(name);
280
- }
281
292
  };
282
293
 
283
294
  // src/indexers/docs/docs-plugin.ts
@@ -434,4 +445,4 @@ export {
434
445
  DocsIndexer,
435
446
  docs
436
447
  };
437
- //# sourceMappingURL=chunk-F6SJ3U4H.js.map
448
+ //# sourceMappingURL=chunk-Y3JKI6QN.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/indexers/docs/docs-indexer.ts","../src/indexers/docs/docs-plugin.ts"],"sourcesContent":["/**\n * BrainBank — Document Indexer\n * \n * Indexes generic document collections (markdown, text, etc.)\n * with heading-aware smart chunking, inspired by qmd.\n * \n * const indexer = new DocsIndexer(db, embedding, hnsw, vecCache);\n * await indexer.indexCollection('notes', '/path/to/notes', '**\\/*.md');\n */\n\nimport * as fs from 'node:fs';\nimport * as path from 'node:path';\nimport { createHash } from 'node:crypto';\n\nimport type { Database } from '@/db/database.ts';\nimport type { EmbeddingProvider, VectorIndex } from '@/types.ts';\nimport type { HNSWIndex } from '@/providers/vector/hnsw-index.ts';\n\n// ── Break Point Scoring (qmd-inspired) ──────────────\n\ninterface BreakPoint {\n pos: number; // character position\n score: number; // break quality (higher = better)\n}\n\nconst BREAK_SCORES: [RegExp, number][] = [\n [/^# /, 100], // H1\n [/^## /, 90], // H2\n [/^### /, 80], // H3\n [/^#### /, 70], // H4\n [/^##### /, 60], // H5\n [/^###### /, 50], // H6\n [/^```/, 80], // Code fence\n [/^---$/, 60], // Horizontal rule\n [/^\\*\\*\\*$/, 60], // Horizontal rule alt\n [/^$/, 20], // Blank line (paragraph break)\n [/^[-*+] /, 5], // List item\n];\n\n// ── Chunk Target ────────────────────────────────────\n\nconst TARGET_CHARS = 3000; // ~900 tokens\nconst WINDOW_CHARS = 600; // search window before cutoff\nconst MIN_CHUNK_CHARS = 200; // don't create tiny chunks\n\n/** Escape special regex characters so user-provided patterns behave as literals. */\nfunction escapeRegex(s: string): string {\n return s.replace(/[.+?^${}()|[\\]\\\\]/g, '\\\\$&');\n}\n\n/** Ignored output/vendor directories when walking docs. */\nconst IGNORED_DOC_DIRS = new Set([\n 'node_modules', '.git', '.hg', '.svn',\n 'dist', 'build', 'out', 'coverage', '.next',\n '__pycache__', '.tox', '.venv', 'venv',\n 'vendor', 'target', '.cache', '.turbo',\n]);\n\n// ── DocsIndexer ──────────────────────────────────────\n\nexport class DocsIndexer {\n constructor(\n private _db: Database,\n private _embedding: EmbeddingProvider,\n private _hnsw: HNSWIndex,\n private _vecCache: Map<number, Float32Array>,\n ) {}\n\n /**\n * Index all documents in a collection.\n * Incremental — skips unchanged files (by content hash).\n */\n async indexCollection(\n collection: string,\n dirPath: string,\n pattern: string = '**/*.md',\n options: {\n ignore?: string[];\n onProgress?: (file: string, current: number, total: number) => void;\n } = {},\n ): Promise<{ indexed: number; skipped: number; chunks: number }> {\n const absDir = path.resolve(dirPath);\n if (!fs.existsSync(absDir)) {\n throw new Error(`Collection path does not exist: ${absDir}`);\n }\n\n const files = this._walkFiles(absDir, pattern, options.ignore);\n let indexed = 0, skipped = 0, totalChunks = 0;\n\n for (let i = 0; i < files.length; i++) {\n const relPath = files[i];\n options.onProgress?.(relPath, i + 1, files.length);\n\n const absPath = path.join(absDir, relPath);\n const content = fs.readFileSync(absPath, 'utf-8');\n const hash = createHash('sha256').update(content).digest('hex').slice(0, 16);\n\n if (this._isUnchanged(collection, relPath, hash)) {\n skipped++;\n continue;\n }\n\n this._removeOldChunks(collection, relPath);\n const chunkCount = await this._indexFile(collection, relPath, content, hash);\n indexed++;\n totalChunks += chunkCount;\n }\n\n return { indexed, skipped, chunks: totalChunks };\n }\n\n /** Walk directory tree and collect matching files. */\n private _walkFiles(absDir: string, pattern: string, ignore?: string[]): string[] {\n const patternExt = pattern.match(/\\.([\\w]+)$/)?.[1];\n const files: string[] = [];\n\n const walk = (dir: string, base: string): void => {\n let entries: fs.Dirent[];\n try { entries = fs.readdirSync(dir, { withFileTypes: true }); }\n catch { return; }\n for (const e of entries) {\n const rel = base ? `${base}/${e.name}` : e.name;\n if (e.isDirectory()) {\n if (IGNORED_DOC_DIRS.has(e.name)) continue;\n walk(path.join(dir, e.name), rel);\n } else if (e.isFile()) {\n if (this._isIgnoredFile(rel, ignore)) continue;\n const ext = path.extname(e.name).slice(1);\n if (!patternExt || ext === patternExt) files.push(rel);\n }\n }\n };\n walk(absDir, '');\n return files;\n }\n\n /** Check if a file matches any ignore patterns. */\n private _isIgnoredFile(relPath: string, ignore?: string[]): boolean {\n if (!ignore) return false;\n return ignore.some(ig => {\n const regex = ig\n .replace(/\\*\\*/g, '{{GLOBSTAR}}')\n .replace(/\\*/g, '{{STAR}}')\n .replace(/[.+?^${}()|[\\]\\\\]/g, '\\\\$&')\n .replace(/\\{\\{GLOBSTAR\\}\\}/g, '.*')\n .replace(/\\{\\{STAR\\}\\}/g, '[^/]*');\n return new RegExp(regex).test(relPath);\n });\n }\n\n /** Check if all chunks for a file match the current hash and have vectors. */\n private _isUnchanged(collection: string, relPath: string, hash: string): boolean {\n const existing = this._db.prepare(\n `SELECT dc.id, dc.content_hash, dv.chunk_id AS has_vector\n FROM doc_chunks dc\n LEFT JOIN doc_vectors dv ON dv.chunk_id = dc.id\n WHERE dc.collection = ? AND dc.file_path = ?`\n ).all(collection, relPath) as any[];\n\n return existing.length > 0 &&\n existing.every((c: any) => c.content_hash === hash && c.has_vector != null);\n }\n\n /** Remove old chunks and their HNSW vectors for a file. */\n private _removeOldChunks(collection: string, relPath: string): void {\n const oldChunks = this._db.prepare(\n 'SELECT id FROM doc_chunks WHERE collection = ? AND file_path = ?'\n ).all(collection, relPath) as any[];\n\n for (const old of oldChunks) {\n this._hnsw.remove(old.id);\n this._vecCache.delete(old.id);\n }\n this._db.prepare(\n 'DELETE FROM doc_chunks WHERE collection = ? AND file_path = ?'\n ).run(collection, relPath);\n }\n\n /** Index a single file: chunk, embed, store in DB + HNSW. */\n private async _indexFile(\n collection: string, relPath: string, content: string, hash: string,\n ): Promise<number> {\n const title = this._extractTitle(content, relPath);\n const chunks = this._smartChunk(content);\n\n const insertChunk = this._db.prepare(`\n INSERT INTO doc_chunks (collection, file_path, title, content, seq, pos, content_hash)\n VALUES (?, ?, ?, ?, ?, ?, ?)\n `);\n\n const chunkIds: number[] = [];\n this._db.transaction(() => {\n for (let seq = 0; seq < chunks.length; seq++) {\n const result = insertChunk.run(\n collection, relPath, title, chunks[seq].text, seq, chunks[seq].pos, hash,\n );\n chunkIds.push(Number(result.lastInsertRowid));\n }\n });\n\n const texts = chunks.map(c => `title: ${title} | text: ${c.text}`);\n const embeddings = await this._embedding.embedBatch(texts);\n\n const insertVec = this._db.prepare(\n 'INSERT OR REPLACE INTO doc_vectors (chunk_id, embedding) VALUES (?, ?)'\n );\n this._db.transaction(() => {\n for (let j = 0; j < chunkIds.length; j++) {\n insertVec.run(chunkIds[j], Buffer.from(embeddings[j].buffer));\n }\n });\n\n for (let j = 0; j < chunkIds.length; j++) {\n this._hnsw.add(embeddings[j], chunkIds[j]);\n this._vecCache.set(chunkIds[j], embeddings[j]);\n }\n\n return chunks.length;\n }\n\n /** Remove all indexed data for a collection. */\n removeCollection(collection: string): void {\n const chunks = this._db.prepare(\n 'SELECT id FROM doc_chunks WHERE collection = ?'\n ).all(collection) as any[];\n for (const chunk of chunks) {\n this._hnsw.remove(chunk.id);\n this._vecCache.delete(chunk.id);\n }\n\n this._db.prepare('DELETE FROM doc_chunks WHERE collection = ?').run(collection);\n this._db.prepare('DELETE FROM collections WHERE name = ?').run(collection);\n this._db.prepare('DELETE FROM path_contexts WHERE collection = ?').run(collection);\n }\n\n // ── Smart Chunking ──────────────────────────────\n\n /** Split document into chunks at natural markdown boundaries. */\n private _smartChunk(text: string): { text: string; pos: number }[] {\n if (text.length <= TARGET_CHARS) {\n return [{ text, pos: 0 }];\n }\n\n const lines = text.split('\\n');\n const breakPoints = this._findBreakPoints(lines);\n const chunks: { text: string; pos: number }[] = [];\n let chunkStart = 0;\n\n while (chunkStart < text.length) {\n const remaining = text.length - chunkStart;\n if (remaining <= TARGET_CHARS + WINDOW_CHARS) {\n this._pushLastChunk(text, chunkStart, chunks);\n break;\n }\n\n const bestBreak = this._findBestBreak(chunkStart, breakPoints);\n const chunkText = text.slice(chunkStart, bestBreak).trim();\n if (chunkText.length >= MIN_CHUNK_CHARS) {\n chunks.push({ text: chunkText, pos: chunkStart });\n }\n chunkStart = bestBreak;\n }\n\n return chunks;\n }\n\n /** Handle the last chunk: merge if too small, otherwise push. */\n private _pushLastChunk(\n text: string, chunkStart: number, chunks: { text: string; pos: number }[],\n ): void {\n const lastText = text.slice(chunkStart).trim();\n if (lastText.length >= MIN_CHUNK_CHARS) {\n chunks.push({ text: lastText, pos: chunkStart });\n } else if (chunks.length > 0) {\n chunks[chunks.length - 1].text += '\\n' + lastText;\n } else {\n chunks.push({ text: lastText, pos: chunkStart });\n }\n }\n\n /** Find the best break position within the target window. */\n private _findBestBreak(chunkStart: number, breakPoints: BreakPoint[]): number {\n const targetEnd = chunkStart + TARGET_CHARS;\n const windowStart = targetEnd - WINDOW_CHARS;\n\n let bestBreak = targetEnd;\n let bestScore = 0;\n\n for (const bp of breakPoints) {\n if (bp.pos <= chunkStart) continue;\n if (bp.pos > targetEnd + WINDOW_CHARS / 2) break;\n if (bp.pos < windowStart) continue;\n\n const distance = Math.abs(bp.pos - targetEnd);\n const decay = 1 - (distance / WINDOW_CHARS) ** 2 * 0.7;\n const finalScore = bp.score * decay;\n\n if (finalScore > bestScore) {\n bestScore = finalScore;\n bestBreak = bp.pos;\n }\n }\n\n return bestBreak;\n }\n\n /** Find all potential break points in the document with scores. */\n private _findBreakPoints(lines: string[]): BreakPoint[] {\n const points: BreakPoint[] = [];\n let charPos = 0;\n let inCodeBlock = false;\n\n for (const line of lines) {\n if (line.trimStart().startsWith('```')) {\n inCodeBlock = !inCodeBlock;\n if (!inCodeBlock) {\n points.push({ pos: charPos + line.length + 1, score: 80 });\n }\n charPos += line.length + 1;\n continue;\n }\n\n if (inCodeBlock) {\n charPos += line.length + 1;\n continue;\n }\n\n for (const [pattern, score] of BREAK_SCORES) {\n if (pattern.test(line.trim())) {\n points.push({ pos: charPos, score });\n break;\n }\n }\n\n charPos += line.length + 1;\n }\n\n return points;\n }\n\n /** Extract document title from first heading or filename. */\n private _extractTitle(content: string, filePath: string): string {\n const match = content.match(/^#{1,3}\\s+(.+)$/m);\n if (match) return match[1].trim();\n return path.basename(filePath, path.extname(filePath));\n }\n}\n","/**\n * BrainBank — Docs Module\n * \n * Index any folder of markdown/text files (notes, docs, wikis).\n * Heading-aware smart chunking inspired by qmd.\n * \n * import { docs } from 'brainbank/docs';\n * brain.use(docs());\n */\n\nimport type { Indexer, IndexerContext } from '@/indexers/base.ts';\nimport type { HNSWIndex } from '@/providers/vector/hnsw-index.ts';\nimport type { Database } from '@/db/database.ts';\nimport type { EmbeddingProvider, DocumentCollection, SearchResult } from '@/types.ts';\nimport { DocsIndexer } from './docs-indexer.ts';\n\nclass DocsPlugin implements Indexer {\n readonly name = 'docs';\n hnsw!: HNSWIndex;\n indexer!: DocsIndexer;\n vecCache = new Map<number, Float32Array>();\n private _db!: Database;\n private _embedding!: EmbeddingProvider;\n\n async initialize(ctx: IndexerContext): Promise<void> {\n this._db = ctx.db;\n this._embedding = ctx.embedding;\n this.hnsw = await ctx.createHnsw();\n ctx.loadVectors('doc_vectors', 'chunk_id', this.hnsw, this.vecCache);\n this.indexer = new DocsIndexer(ctx.db, ctx.embedding, this.hnsw, this.vecCache);\n }\n\n /** Register a document collection. */\n addCollection(collection: DocumentCollection): void {\n this._db.prepare(`\n INSERT OR REPLACE INTO collections (name, path, pattern, ignore_json, context)\n VALUES (?, ?, ?, ?, ?)\n `).run(\n collection.name,\n collection.path,\n collection.pattern ?? '**/*.md',\n JSON.stringify(collection.ignore ?? []),\n collection.context ?? null,\n );\n }\n\n /** Remove a collection and its indexed data. */\n removeCollection(name: string): void {\n this.indexer.removeCollection(name);\n }\n\n /** List all registered collections. */\n listCollections(): DocumentCollection[] {\n return (this._db.prepare('SELECT * FROM collections').all() as any[]).map(row => ({\n name: row.name,\n path: row.path,\n pattern: row.pattern,\n ignore: JSON.parse(row.ignore_json),\n context: row.context,\n }));\n }\n\n /** Index all (or specific) collections. Incremental. */\n async indexCollections(options: {\n collections?: string[];\n onProgress?: (collection: string, file: string, current: number, total: number) => void;\n } = {}): Promise<Record<string, { indexed: number; skipped: number; chunks: number }>> {\n const allCollections = this.listCollections();\n const toIndex = options.collections\n ? allCollections.filter(c => options.collections!.includes(c.name))\n : allCollections;\n\n const results: Record<string, { indexed: number; skipped: number; chunks: number }> = {};\n\n for (const coll of toIndex) {\n results[coll.name] = await this.indexer.indexCollection(\n coll.name,\n coll.path,\n coll.pattern,\n {\n ignore: coll.ignore,\n onProgress: (file, cur, total) => options.onProgress?.(coll.name, file, cur, total),\n },\n );\n }\n\n return results;\n }\n\n /** Search documents only. */\n async search(query: string, options?: {\n collection?: string;\n k?: number;\n minScore?: number;\n }): Promise<SearchResult[]> {\n const k = options?.k ?? 8;\n const queryVec = await this._embedding.embed(query);\n\n // Over-fetch from shared HNSW when filtering by collection\n // (same pattern as collection.ts ratio scaling)\n let searchK = k;\n if (options?.collection && this.hnsw.size > 0) {\n const collectionCount = (this._db.prepare(\n 'SELECT COUNT(*) as c FROM doc_chunks WHERE collection = ?'\n ).get(options.collection) as any)?.c ?? 0;\n const totalChunks = (this._db.prepare(\n 'SELECT COUNT(*) as c FROM doc_chunks'\n ).get() as any)?.c ?? 1;\n const ratio = collectionCount > 0\n ? Math.max(3, Math.min(50, Math.ceil(totalChunks / collectionCount)))\n : 3;\n searchK = Math.min(k * ratio, this.hnsw.size);\n }\n\n const hits = this.hnsw.search(queryVec, searchK);\n\n const results: SearchResult[] = [];\n for (const hit of hits) {\n if (options?.minScore && hit.score < options.minScore) continue;\n\n const chunk = this._db.prepare(\n 'SELECT * FROM doc_chunks WHERE id = ?'\n ).get(hit.id) as any;\n\n if (!chunk) continue;\n if (options?.collection && chunk.collection !== options.collection) continue;\n\n const ctx = this._getDocContext(chunk.collection, chunk.file_path);\n\n results.push({\n type: 'document',\n score: hit.score,\n filePath: chunk.file_path,\n content: chunk.content,\n context: ctx,\n metadata: {\n collection: chunk.collection,\n title: chunk.title,\n seq: chunk.seq,\n },\n });\n\n // Stop once we have enough results\n if (results.length >= k) break;\n }\n\n return results;\n }\n\n /** Add context description for a document path. */\n addContext(collection: string, path: string, context: string): void {\n this._db.prepare(`\n INSERT OR REPLACE INTO path_contexts (collection, path, context)\n VALUES (?, ?, ?)\n `).run(collection, path, context);\n }\n\n /** Remove context for a path. */\n removeContext(collection: string, path: string): void {\n this._db.prepare(\n 'DELETE FROM path_contexts WHERE collection = ? AND path = ?'\n ).run(collection, path);\n }\n\n /** List all context entries. */\n listContexts(): { collection: string; path: string; context: string }[] {\n return this._db.prepare('SELECT * FROM path_contexts').all() as any[];\n }\n\n stats(): Record<string, any> {\n return {\n collections: (this._db.prepare('SELECT COUNT(*) as c FROM collections').get() as any).c,\n documents: (this._db.prepare('SELECT COUNT(DISTINCT file_path) as c FROM doc_chunks').get() as any).c,\n chunks: (this._db.prepare('SELECT COUNT(*) as c FROM doc_chunks').get() as any).c,\n hnswSize: this.hnsw.size,\n };\n }\n\n /** Resolve context for a document (checks path_contexts tree → collection context). */\n private _getDocContext(collection: string, filePath: string): string | undefined {\n const parts = filePath.split('/');\n for (let i = parts.length; i >= 0; i--) {\n const checkPath = i === 0 ? '/' : '/' + parts.slice(0, i).join('/');\n const ctx = this._db.prepare(\n 'SELECT context FROM path_contexts WHERE collection = ? AND path = ?'\n ).get(collection, checkPath) as any;\n if (ctx) return ctx.context;\n }\n\n const coll = this._db.prepare(\n 'SELECT context FROM collections WHERE name = ?'\n ).get(collection) as any;\n return coll?.context ?? undefined;\n }\n}\n\n/** Create a document collections plugin. */\nexport function docs(): Indexer {\n return new DocsPlugin();\n}\n"],"mappings":";;;;;AAUA,YAAY,QAAQ;AACpB,YAAY,UAAU;AACtB,SAAS,kBAAkB;AAa3B,IAAM,eAAmC;AAAA,EACrC,CAAC,OAAY,GAAG;AAAA;AAAA,EAChB,CAAC,QAAa,EAAE;AAAA;AAAA,EAChB,CAAC,SAAa,EAAE;AAAA;AAAA,EAChB,CAAC,UAAa,EAAE;AAAA;AAAA,EAChB,CAAC,WAAa,EAAE;AAAA;AAAA,EAChB,CAAC,YAAa,EAAE;AAAA;AAAA,EAChB,CAAC,QAAa,EAAE;AAAA;AAAA,EAChB,CAAC,SAAa,EAAE;AAAA;AAAA,EAChB,CAAC,YAAa,EAAE;AAAA;AAAA,EAChB,CAAC,MAAa,EAAE;AAAA;AAAA,EAChB,CAAC,WAAc,CAAC;AAAA;AACpB;AAIA,IAAM,eAAe;AACrB,IAAM,eAAe;AACrB,IAAM,kBAAkB;AAQxB,IAAM,mBAAmB,oBAAI,IAAI;AAAA,EAC7B;AAAA,EAAgB;AAAA,EAAQ;AAAA,EAAO;AAAA,EAC/B;AAAA,EAAQ;AAAA,EAAS;AAAA,EAAO;AAAA,EAAY;AAAA,EACpC;AAAA,EAAe;AAAA,EAAQ;AAAA,EAAS;AAAA,EAChC;AAAA,EAAU;AAAA,EAAU;AAAA,EAAU;AAClC,CAAC;AAIM,IAAM,cAAN,MAAkB;AAAA,EACrB,YACY,KACA,YACA,OACA,WACV;AAJU;AACA;AACA;AACA;AAAA,EACT;AAAA,EAlEP,OA4DyB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYrB,MAAM,gBACF,YACA,SACA,UAAkB,WAClB,UAGI,CAAC,GACwD;AAC7D,UAAM,SAAc,aAAQ,OAAO;AACnC,QAAI,CAAI,cAAW,MAAM,GAAG;AACxB,YAAM,IAAI,MAAM,mCAAmC,MAAM,EAAE;AAAA,IAC/D;AAEA,UAAM,QAAQ,KAAK,WAAW,QAAQ,SAAS,QAAQ,MAAM;AAC7D,QAAI,UAAU,GAAG,UAAU,GAAG,cAAc;AAE5C,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACnC,YAAM,UAAU,MAAM,CAAC;AACvB,cAAQ,aAAa,SAAS,IAAI,GAAG,MAAM,MAAM;AAEjD,YAAM,UAAe,UAAK,QAAQ,OAAO;AACzC,YAAM,UAAa,gBAAa,SAAS,OAAO;AAChD,YAAM,OAAO,WAAW,QAAQ,EAAE,OAAO,OAAO,EAAE,OAAO,KAAK,EAAE,MAAM,GAAG,EAAE;AAE3E,UAAI,KAAK,aAAa,YAAY,SAAS,IAAI,GAAG;AAC9C;AACA;AAAA,MACJ;AAEA,WAAK,iBAAiB,YAAY,OAAO;AACzC,YAAM,aAAa,MAAM,KAAK,WAAW,YAAY,SAAS,SAAS,IAAI;AAC3E;AACA,qBAAe;AAAA,IACnB;AAEA,WAAO,EAAE,SAAS,SAAS,QAAQ,YAAY;AAAA,EACnD;AAAA;AAAA,EAGQ,WAAW,QAAgB,SAAiB,QAA6B;AAC7E,UAAM,aAAa,QAAQ,MAAM,YAAY,IAAI,CAAC;AAClD,UAAM,QAAkB,CAAC;AAEzB,UAAM,OAAO,wBAAC,KAAa,SAAuB;AAC9C,UAAI;AACJ,UAAI;AAAE,kBAAa,eAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,MAAG,QACxD;AAAE;AAAA,MAAQ;AAChB,iBAAW,KAAK,SAAS;AACrB,cAAM,MAAM,OAAO,GAAG,IAAI,IAAI,EAAE,IAAI,KAAK,EAAE;AAC3C,YAAI,EAAE,YAAY,GAAG;AACjB,cAAI,iBAAiB,IAAI,EAAE,IAAI,EAAG;AAClC,eAAU,UAAK,KAAK,EAAE,IAAI,GAAG,GAAG;AAAA,QACpC,WAAW,EAAE,OAAO,GAAG;AACnB,cAAI,KAAK,eAAe,KAAK,MAAM,EAAG;AACtC,gBAAM,MAAW,aAAQ,EAAE,IAAI,EAAE,MAAM,CAAC;AACxC,cAAI,CAAC,cAAc,QAAQ,WAAY,OAAM,KAAK,GAAG;AAAA,QACzD;AAAA,MACJ;AAAA,IACJ,GAfa;AAgBb,SAAK,QAAQ,EAAE;AACf,WAAO;AAAA,EACX;AAAA;AAAA,EAGQ,eAAe,SAAiB,QAA4B;AAChE,QAAI,CAAC,OAAQ,QAAO;AACpB,WAAO,OAAO,KAAK,QAAM;AACrB,YAAM,QAAQ,GACT,QAAQ,SAAS,cAAc,EAC/B,QAAQ,OAAO,UAAU,EACzB,QAAQ,sBAAsB,MAAM,EACpC,QAAQ,qBAAqB,IAAI,EACjC,QAAQ,iBAAiB,OAAO;AACrC,aAAO,IAAI,OAAO,KAAK,EAAE,KAAK,OAAO;AAAA,IACzC,CAAC;AAAA,EACL;AAAA;AAAA,EAGQ,aAAa,YAAoB,SAAiB,MAAuB;AAC7E,UAAM,WAAW,KAAK,IAAI;AAAA,MACtB;AAAA;AAAA;AAAA;AAAA,IAIJ,EAAE,IAAI,YAAY,OAAO;AAEzB,WAAO,SAAS,SAAS,KACrB,SAAS,MAAM,CAAC,MAAW,EAAE,iBAAiB,QAAQ,EAAE,cAAc,IAAI;AAAA,EAClF;AAAA;AAAA,EAGQ,iBAAiB,YAAoB,SAAuB;AAChE,UAAM,YAAY,KAAK,IAAI;AAAA,MACvB;AAAA,IACJ,EAAE,IAAI,YAAY,OAAO;AAEzB,eAAW,OAAO,WAAW;AACzB,WAAK,MAAM,OAAO,IAAI,EAAE;AACxB,WAAK,UAAU,OAAO,IAAI,EAAE;AAAA,IAChC;AACA,SAAK,IAAI;AAAA,MACL;AAAA,IACJ,EAAE,IAAI,YAAY,OAAO;AAAA,EAC7B;AAAA;AAAA,EAGA,MAAc,WACV,YAAoB,SAAiB,SAAiB,MACvC;AACf,UAAM,QAAQ,KAAK,cAAc,SAAS,OAAO;AACjD,UAAM,SAAS,KAAK,YAAY,OAAO;AAEvC,UAAM,cAAc,KAAK,IAAI,QAAQ;AAAA;AAAA;AAAA,SAGpC;AAED,UAAM,WAAqB,CAAC;AAC5B,SAAK,IAAI,YAAY,MAAM;AACvB,eAAS,MAAM,GAAG,MAAM,OAAO,QAAQ,OAAO;AAC1C,cAAM,SAAS,YAAY;AAAA,UACvB;AAAA,UAAY;AAAA,UAAS;AAAA,UAAO,OAAO,GAAG,EAAE;AAAA,UAAM;AAAA,UAAK,OAAO,GAAG,EAAE;AAAA,UAAK;AAAA,QACxE;AACA,iBAAS,KAAK,OAAO,OAAO,eAAe,CAAC;AAAA,MAChD;AAAA,IACJ,CAAC;AAED,UAAM,QAAQ,OAAO,IAAI,OAAK,UAAU,KAAK,YAAY,EAAE,IAAI,EAAE;AACjE,UAAM,aAAa,MAAM,KAAK,WAAW,WAAW,KAAK;AAEzD,UAAM,YAAY,KAAK,IAAI;AAAA,MACvB;AAAA,IACJ;AACA,SAAK,IAAI,YAAY,MAAM;AACvB,eAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACtC,kBAAU,IAAI,SAAS,CAAC,GAAG,OAAO,KAAK,WAAW,CAAC,EAAE,MAAM,CAAC;AAAA,MAChE;AAAA,IACJ,CAAC;AAED,aAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACtC,WAAK,MAAM,IAAI,WAAW,CAAC,GAAG,SAAS,CAAC,CAAC;AACzC,WAAK,UAAU,IAAI,SAAS,CAAC,GAAG,WAAW,CAAC,CAAC;AAAA,IACjD;AAEA,WAAO,OAAO;AAAA,EAClB;AAAA;AAAA,EAGA,iBAAiB,YAA0B;AACvC,UAAM,SAAS,KAAK,IAAI;AAAA,MACpB;AAAA,IACJ,EAAE,IAAI,UAAU;AAChB,eAAW,SAAS,QAAQ;AACxB,WAAK,MAAM,OAAO,MAAM,EAAE;AAC1B,WAAK,UAAU,OAAO,MAAM,EAAE;AAAA,IAClC;AAEA,SAAK,IAAI,QAAQ,6CAA6C,EAAE,IAAI,UAAU;AAC9E,SAAK,IAAI,QAAQ,wCAAwC,EAAE,IAAI,UAAU;AACzE,SAAK,IAAI,QAAQ,gDAAgD,EAAE,IAAI,UAAU;AAAA,EACrF;AAAA;AAAA;AAAA,EAKQ,YAAY,MAA+C;AAC/D,QAAI,KAAK,UAAU,cAAc;AAC7B,aAAO,CAAC,EAAE,MAAM,KAAK,EAAE,CAAC;AAAA,IAC5B;AAEA,UAAM,QAAQ,KAAK,MAAM,IAAI;AAC7B,UAAM,cAAc,KAAK,iBAAiB,KAAK;AAC/C,UAAM,SAA0C,CAAC;AACjD,QAAI,aAAa;AAEjB,WAAO,aAAa,KAAK,QAAQ;AAC7B,YAAM,YAAY,KAAK,SAAS;AAChC,UAAI,aAAa,eAAe,cAAc;AAC1C,aAAK,eAAe,MAAM,YAAY,MAAM;AAC5C;AAAA,MACJ;AAEA,YAAM,YAAY,KAAK,eAAe,YAAY,WAAW;AAC7D,YAAM,YAAY,KAAK,MAAM,YAAY,SAAS,EAAE,KAAK;AACzD,UAAI,UAAU,UAAU,iBAAiB;AACrC,eAAO,KAAK,EAAE,MAAM,WAAW,KAAK,WAAW,CAAC;AAAA,MACpD;AACA,mBAAa;AAAA,IACjB;AAEA,WAAO;AAAA,EACX;AAAA;AAAA,EAGQ,eACJ,MAAc,YAAoB,QAC9B;AACJ,UAAM,WAAW,KAAK,MAAM,UAAU,EAAE,KAAK;AAC7C,QAAI,SAAS,UAAU,iBAAiB;AACpC,aAAO,KAAK,EAAE,MAAM,UAAU,KAAK,WAAW,CAAC;AAAA,IACnD,WAAW,OAAO,SAAS,GAAG;AAC1B,aAAO,OAAO,SAAS,CAAC,EAAE,QAAQ,OAAO;AAAA,IAC7C,OAAO;AACH,aAAO,KAAK,EAAE,MAAM,UAAU,KAAK,WAAW,CAAC;AAAA,IACnD;AAAA,EACJ;AAAA;AAAA,EAGQ,eAAe,YAAoB,aAAmC;AAC1E,UAAM,YAAY,aAAa;AAC/B,UAAM,cAAc,YAAY;AAEhC,QAAI,YAAY;AAChB,QAAI,YAAY;AAEhB,eAAW,MAAM,aAAa;AAC1B,UAAI,GAAG,OAAO,WAAY;AAC1B,UAAI,GAAG,MAAM,YAAY,eAAe,EAAG;AAC3C,UAAI,GAAG,MAAM,YAAa;AAE1B,YAAM,WAAW,KAAK,IAAI,GAAG,MAAM,SAAS;AAC5C,YAAM,QAAQ,KAAK,WAAW,iBAAiB,IAAI;AACnD,YAAM,aAAa,GAAG,QAAQ;AAE9B,UAAI,aAAa,WAAW;AACxB,oBAAY;AACZ,oBAAY,GAAG;AAAA,MACnB;AAAA,IACJ;AAEA,WAAO;AAAA,EACX;AAAA;AAAA,EAGQ,iBAAiB,OAA+B;AACpD,UAAM,SAAuB,CAAC;AAC9B,QAAI,UAAU;AACd,QAAI,cAAc;AAElB,eAAW,QAAQ,OAAO;AACtB,UAAI,KAAK,UAAU,EAAE,WAAW,KAAK,GAAG;AACpC,sBAAc,CAAC;AACf,YAAI,CAAC,aAAa;AACd,iBAAO,KAAK,EAAE,KAAK,UAAU,KAAK,SAAS,GAAG,OAAO,GAAG,CAAC;AAAA,QAC7D;AACA,mBAAW,KAAK,SAAS;AACzB;AAAA,MACJ;AAEA,UAAI,aAAa;AACb,mBAAW,KAAK,SAAS;AACzB;AAAA,MACJ;AAEA,iBAAW,CAAC,SAAS,KAAK,KAAK,cAAc;AACzC,YAAI,QAAQ,KAAK,KAAK,KAAK,CAAC,GAAG;AAC3B,iBAAO,KAAK,EAAE,KAAK,SAAS,MAAM,CAAC;AACnC;AAAA,QACJ;AAAA,MACJ;AAEA,iBAAW,KAAK,SAAS;AAAA,IAC7B;AAEA,WAAO;AAAA,EACX;AAAA;AAAA,EAGQ,cAAc,SAAiB,UAA0B;AAC7D,UAAM,QAAQ,QAAQ,MAAM,kBAAkB;AAC9C,QAAI,MAAO,QAAO,MAAM,CAAC,EAAE,KAAK;AAChC,WAAY,cAAS,UAAe,aAAQ,QAAQ,CAAC;AAAA,EACzD;AACJ;;;AC1UA,IAAM,aAAN,MAAoC;AAAA,EAhBpC,OAgBoC;AAAA;AAAA;AAAA,EACvB,OAAO;AAAA,EAChB;AAAA,EACA;AAAA,EACA,WAAW,oBAAI,IAA0B;AAAA,EACjC;AAAA,EACA;AAAA,EAER,MAAM,WAAW,KAAoC;AACjD,SAAK,MAAM,IAAI;AACf,SAAK,aAAa,IAAI;AACtB,SAAK,OAAO,MAAM,IAAI,WAAW;AACjC,QAAI,YAAY,eAAe,YAAY,KAAK,MAAM,KAAK,QAAQ;AACnE,SAAK,UAAU,IAAI,YAAY,IAAI,IAAI,IAAI,WAAW,KAAK,MAAM,KAAK,QAAQ;AAAA,EAClF;AAAA;AAAA,EAGA,cAAc,YAAsC;AAChD,SAAK,IAAI,QAAQ;AAAA;AAAA;AAAA,SAGhB,EAAE;AAAA,MACC,WAAW;AAAA,MACX,WAAW;AAAA,MACX,WAAW,WAAW;AAAA,MACtB,KAAK,UAAU,WAAW,UAAU,CAAC,CAAC;AAAA,MACtC,WAAW,WAAW;AAAA,IAC1B;AAAA,EACJ;AAAA;AAAA,EAGA,iBAAiB,MAAoB;AACjC,SAAK,QAAQ,iBAAiB,IAAI;AAAA,EACtC;AAAA;AAAA,EAGA,kBAAwC;AACpC,WAAQ,KAAK,IAAI,QAAQ,2BAA2B,EAAE,IAAI,EAAY,IAAI,UAAQ;AAAA,MAC9E,MAAM,IAAI;AAAA,MACV,MAAM,IAAI;AAAA,MACV,SAAS,IAAI;AAAA,MACb,QAAQ,KAAK,MAAM,IAAI,WAAW;AAAA,MAClC,SAAS,IAAI;AAAA,IACjB,EAAE;AAAA,EACN;AAAA;AAAA,EAGA,MAAM,iBAAiB,UAGnB,CAAC,GAAkF;AACnF,UAAM,iBAAiB,KAAK,gBAAgB;AAC5C,UAAM,UAAU,QAAQ,cAClB,eAAe,OAAO,OAAK,QAAQ,YAAa,SAAS,EAAE,IAAI,CAAC,IAChE;AAEN,UAAM,UAAgF,CAAC;AAEvF,eAAW,QAAQ,SAAS;AACxB,cAAQ,KAAK,IAAI,IAAI,MAAM,KAAK,QAAQ;AAAA,QACpC,KAAK;AAAA,QACL,KAAK;AAAA,QACL,KAAK;AAAA,QACL;AAAA,UACI,QAAQ,KAAK;AAAA,UACb,YAAY,wBAAC,MAAM,KAAK,UAAU,QAAQ,aAAa,KAAK,MAAM,MAAM,KAAK,KAAK,GAAtE;AAAA,QAChB;AAAA,MACJ;AAAA,IACJ;AAEA,WAAO;AAAA,EACX;AAAA;AAAA,EAGA,MAAM,OAAO,OAAe,SAIA;AACxB,UAAM,IAAI,SAAS,KAAK;AACxB,UAAM,WAAW,MAAM,KAAK,WAAW,MAAM,KAAK;AAIlD,QAAI,UAAU;AACd,QAAI,SAAS,cAAc,KAAK,KAAK,OAAO,GAAG;AAC3C,YAAM,kBAAmB,KAAK,IAAI;AAAA,QAC9B;AAAA,MACJ,EAAE,IAAI,QAAQ,UAAU,GAAW,KAAK;AACxC,YAAM,cAAe,KAAK,IAAI;AAAA,QAC1B;AAAA,MACJ,EAAE,IAAI,GAAW,KAAK;AACtB,YAAM,QAAQ,kBAAkB,IAC1B,KAAK,IAAI,GAAG,KAAK,IAAI,IAAI,KAAK,KAAK,cAAc,eAAe,CAAC,CAAC,IAClE;AACN,gBAAU,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,IAAI;AAAA,IAChD;AAEA,UAAM,OAAO,KAAK,KAAK,OAAO,UAAU,OAAO;AAE/C,UAAM,UAA0B,CAAC;AACjC,eAAW,OAAO,MAAM;AACpB,UAAI,SAAS,YAAY,IAAI,QAAQ,QAAQ,SAAU;AAEvD,YAAM,QAAQ,KAAK,IAAI;AAAA,QACnB;AAAA,MACJ,EAAE,IAAI,IAAI,EAAE;AAEZ,UAAI,CAAC,MAAO;AACZ,UAAI,SAAS,cAAc,MAAM,eAAe,QAAQ,WAAY;AAEpE,YAAM,MAAM,KAAK,eAAe,MAAM,YAAY,MAAM,SAAS;AAEjE,cAAQ,KAAK;AAAA,QACT,MAAM;AAAA,QACN,OAAO,IAAI;AAAA,QACX,UAAU,MAAM;AAAA,QAChB,SAAS,MAAM;AAAA,QACf,SAAS;AAAA,QACT,UAAU;AAAA,UACN,YAAY,MAAM;AAAA,UAClB,OAAO,MAAM;AAAA,UACb,KAAK,MAAM;AAAA,QACf;AAAA,MACJ,CAAC;AAGD,UAAI,QAAQ,UAAU,EAAG;AAAA,IAC7B;AAEA,WAAO;AAAA,EACX;AAAA;AAAA,EAGA,WAAW,YAAoBA,OAAc,SAAuB;AAChE,SAAK,IAAI,QAAQ;AAAA;AAAA;AAAA,SAGhB,EAAE,IAAI,YAAYA,OAAM,OAAO;AAAA,EACpC;AAAA;AAAA,EAGA,cAAc,YAAoBA,OAAoB;AAClD,SAAK,IAAI;AAAA,MACL;AAAA,IACJ,EAAE,IAAI,YAAYA,KAAI;AAAA,EAC1B;AAAA;AAAA,EAGA,eAAwE;AACpE,WAAO,KAAK,IAAI,QAAQ,6BAA6B,EAAE,IAAI;AAAA,EAC/D;AAAA,EAEA,QAA6B;AACzB,WAAO;AAAA,MACH,aAAc,KAAK,IAAI,QAAQ,uCAAuC,EAAE,IAAI,EAAU;AAAA,MACtF,WAAY,KAAK,IAAI,QAAQ,uDAAuD,EAAE,IAAI,EAAU;AAAA,MACpG,QAAS,KAAK,IAAI,QAAQ,sCAAsC,EAAE,IAAI,EAAU;AAAA,MAChF,UAAU,KAAK,KAAK;AAAA,IACxB;AAAA,EACJ;AAAA;AAAA,EAGQ,eAAe,YAAoB,UAAsC;AAC7E,UAAM,QAAQ,SAAS,MAAM,GAAG;AAChC,aAAS,IAAI,MAAM,QAAQ,KAAK,GAAG,KAAK;AACpC,YAAM,YAAY,MAAM,IAAI,MAAM,MAAM,MAAM,MAAM,GAAG,CAAC,EAAE,KAAK,GAAG;AAClE,YAAM,MAAM,KAAK,IAAI;AAAA,QACjB;AAAA,MACJ,EAAE,IAAI,YAAY,SAAS;AAC3B,UAAI,IAAK,QAAO,IAAI;AAAA,IACxB;AAEA,UAAM,OAAO,KAAK,IAAI;AAAA,MAClB;AAAA,IACJ,EAAE,IAAI,UAAU;AAChB,WAAO,MAAM,WAAW;AAAA,EAC5B;AACJ;AAGO,SAAS,OAAgB;AAC5B,SAAO,IAAI,WAAW;AAC1B;AAFgB;","names":["path"]}
package/dist/cli.js CHANGED
@@ -1,17 +1,17 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  BrainBank
4
- } from "./chunk-6MFTQV3O.js";
4
+ } from "./chunk-2BEWWQL2.js";
5
5
  import {
6
6
  code
7
- } from "./chunk-WR4WXKJT.js";
7
+ } from "./chunk-MGIFEPYZ.js";
8
8
  import {
9
9
  git
10
- } from "./chunk-7JCEW7LT.js";
10
+ } from "./chunk-FI7GWG4W.js";
11
11
  import {
12
12
  docs
13
- } from "./chunk-F6SJ3U4H.js";
14
- import "./chunk-V4UJKXPK.js";
13
+ } from "./chunk-Y3JKI6QN.js";
14
+ import "./chunk-E6WQM4DN.js";
15
15
  import "./chunk-QNHBCOKB.js";
16
16
  import {
17
17
  __name
@@ -37,6 +37,36 @@ function hasFlag(name) {
37
37
  return args.includes(`--${name}`);
38
38
  }
39
39
  __name(hasFlag, "hasFlag");
40
+ var VALUE_FLAGS = /* @__PURE__ */ new Set([
41
+ "repo",
42
+ "depth",
43
+ "collection",
44
+ "pattern",
45
+ "context",
46
+ "name",
47
+ "keep",
48
+ "reranker",
49
+ "only",
50
+ "docs",
51
+ "ignore",
52
+ "meta",
53
+ "k",
54
+ "mode",
55
+ "limit"
56
+ ]);
57
+ function stripFlags(argv) {
58
+ const result = [];
59
+ for (let i = 0; i < argv.length; i++) {
60
+ if (argv[i].startsWith("--")) {
61
+ const name = argv[i].slice(2);
62
+ if (VALUE_FLAGS.has(name)) i++;
63
+ continue;
64
+ }
65
+ result.push(argv[i]);
66
+ }
67
+ return result;
68
+ }
69
+ __name(stripFlags, "stripFlags");
40
70
  function printResults(results) {
41
71
  if (results.length === 0) {
42
72
  console.log(c.yellow(" No results found."));
@@ -145,49 +175,48 @@ async function createBrain(repoPath) {
145
175
  const config = await loadConfig();
146
176
  const folderIndexers = await discoverFolderIndexers();
147
177
  const brainOpts = { repoPath: rp, ...config?.brainbank ?? {} };
178
+ await setupProviders(brainOpts);
179
+ const brain = new BrainBank(brainOpts);
180
+ const builtins = config?.builtins ?? ["code", "git", "docs"];
181
+ registerBuiltins(brain, rp, builtins);
182
+ for (const indexer of folderIndexers) brain.use(indexer);
183
+ if (config?.indexers) {
184
+ for (const indexer of config.indexers) brain.use(indexer);
185
+ }
186
+ return brain;
187
+ }
188
+ __name(createBrain, "createBrain");
189
+ async function setupProviders(brainOpts) {
148
190
  const rerankerFlag = getFlag("reranker");
149
191
  if (rerankerFlag === "qwen3") {
150
192
  const { Qwen3Reranker } = await import("@brainbank/reranker");
151
193
  brainOpts.reranker = new Qwen3Reranker();
152
194
  }
153
- const embeddingEnv = process.env.BRAINBANK_EMBEDDING;
154
- if (embeddingEnv === "openai") {
155
- const { OpenAIEmbedding } = await import("./openai-CYDMYX7X.js");
195
+ if (process.env.BRAINBANK_EMBEDDING === "openai") {
196
+ const { OpenAIEmbedding } = await import("./openai-embedding-VQZCZQYT.js");
156
197
  const provider = new OpenAIEmbedding();
157
198
  brainOpts.embeddingProvider = provider;
158
199
  brainOpts.embeddingDims = provider.dims;
159
200
  }
160
- const brain = new BrainBank(brainOpts);
161
- const builtins = config?.builtins ?? ["code", "git", "docs"];
201
+ }
202
+ __name(setupProviders, "setupProviders");
203
+ function registerBuiltins(brain, rp, builtins) {
162
204
  const resolvedRp = path.resolve(rp);
163
205
  const hasRootGit = fs.existsSync(path.join(resolvedRp, ".git"));
164
206
  const gitSubdirs = !hasRootGit ? detectGitSubdirs(resolvedRp) : [];
165
207
  if (gitSubdirs.length > 0 && (builtins.includes("code") || builtins.includes("git"))) {
166
208
  console.log(c.cyan(` Multi-repo: found ${gitSubdirs.length} git repos: ${gitSubdirs.map((d) => d.name).join(", ")}`));
167
209
  for (const sub of gitSubdirs) {
168
- if (builtins.includes("code")) {
169
- brain.use(code({ repoPath: sub.path, name: `code:${sub.name}` }));
170
- }
171
- if (builtins.includes("git")) {
172
- brain.use(git({ repoPath: sub.path, name: `git:${sub.name}` }));
173
- }
210
+ if (builtins.includes("code")) brain.use(code({ repoPath: sub.path, name: `code:${sub.name}` }));
211
+ if (builtins.includes("git")) brain.use(git({ repoPath: sub.path, name: `git:${sub.name}` }));
174
212
  }
175
213
  } else {
176
214
  if (builtins.includes("code")) brain.use(code({ repoPath: rp }));
177
215
  if (builtins.includes("git")) brain.use(git());
178
216
  }
179
217
  if (builtins.includes("docs")) brain.use(docs());
180
- for (const indexer of folderIndexers) {
181
- brain.use(indexer);
182
- }
183
- if (config?.indexers) {
184
- for (const indexer of config.indexers) {
185
- brain.use(indexer);
186
- }
187
- }
188
- return brain;
189
218
  }
190
- __name(createBrain, "createBrain");
219
+ __name(registerBuiltins, "registerBuiltins");
191
220
 
192
221
  // src/cli/commands/index-cmd.ts
193
222
  async function cmdIndex() {
@@ -454,7 +483,7 @@ async function cmdDocs() {
454
483
  }
455
484
  __name(cmdDocs, "cmdDocs");
456
485
  async function cmdDocSearch() {
457
- const query = args.slice(1).filter((a) => !a.startsWith("--")).join(" ");
486
+ const query = stripFlags(args).slice(1).join(" ");
458
487
  if (!query) {
459
488
  console.log(c.red("Usage: brainbank dsearch <query>"));
460
489
  process.exit(1);
@@ -485,7 +514,7 @@ __name(cmdDocSearch, "cmdDocSearch");
485
514
 
486
515
  // src/cli/commands/search.ts
487
516
  async function cmdSearch() {
488
- const query = args.slice(1).join(" ");
517
+ const query = stripFlags(args).slice(1).join(" ");
489
518
  if (!query) {
490
519
  console.log(c.red("Usage: brainbank search <query>"));
491
520
  process.exit(1);
@@ -500,7 +529,7 @@ async function cmdSearch() {
500
529
  }
501
530
  __name(cmdSearch, "cmdSearch");
502
531
  async function cmdHybridSearch() {
503
- const query = args.slice(1).filter((a) => !a.startsWith("--")).join(" ");
532
+ const query = stripFlags(args).slice(1).join(" ");
504
533
  if (!query) {
505
534
  console.log(c.red("Usage: brainbank hsearch <query>"));
506
535
  process.exit(1);
@@ -516,7 +545,7 @@ async function cmdHybridSearch() {
516
545
  }
517
546
  __name(cmdHybridSearch, "cmdHybridSearch");
518
547
  async function cmdKeywordSearch() {
519
- const query = args.slice(1).filter((a) => !a.startsWith("--")).join(" ");
548
+ const query = stripFlags(args).slice(1).join(" ");
520
549
  if (!query) {
521
550
  console.log(c.red("Usage: brainbank ksearch <query>"));
522
551
  process.exit(1);
@@ -527,7 +556,7 @@ async function cmdKeywordSearch() {
527
556
  \u2501\u2501\u2501 BrainBank Keyword Search: "${query}" \u2501\u2501\u2501`));
528
557
  console.log(c.dim(` Mode: BM25 full-text (instant)
529
558
  `));
530
- const results = brain.searchBM25(query);
559
+ const results = await brain.searchBM25(query);
531
560
  printResults(results);
532
561
  brain.close();
533
562
  }
@@ -566,7 +595,7 @@ async function cmdContext() {
566
595
  brain2.close();
567
596
  return;
568
597
  }
569
- const task = args.slice(1).join(" ");
598
+ const task = stripFlags(args).slice(1).join(" ");
570
599
  if (!task) {
571
600
  console.log(c.red("Usage: brainbank context <task description>"));
572
601
  console.log(c.dim(" brainbank context add <collection> <path> <description>"));