brainbank 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -1107
- package/assets/architecture.png +0 -0
- package/bin/brainbank +8 -1
- package/bin/brainbank-mcp +19 -0
- package/dist/chunk-3UIWA32X.js +3341 -0
- package/dist/chunk-3UIWA32X.js.map +1 -0
- package/dist/chunk-3YBCD6DI.js +117 -0
- package/dist/chunk-3YBCD6DI.js.map +1 -0
- package/dist/chunk-DAGVUEXL.js +258 -0
- package/dist/chunk-DAGVUEXL.js.map +1 -0
- package/dist/chunk-DMFMTOHF.js +123 -0
- package/dist/chunk-DMFMTOHF.js.map +1 -0
- package/dist/chunk-FQYKWB2Q.js +136 -0
- package/dist/chunk-FQYKWB2Q.js.map +1 -0
- package/dist/chunk-IMJJ2VEM.js +74 -0
- package/dist/chunk-IMJJ2VEM.js.map +1 -0
- package/dist/chunk-M744PCJQ.js +43 -0
- package/dist/chunk-M744PCJQ.js.map +1 -0
- package/dist/chunk-NNDY7P2R.js +211 -0
- package/dist/chunk-NNDY7P2R.js.map +1 -0
- package/dist/chunk-O3J6ZIXK.js +82 -0
- package/dist/chunk-O3J6ZIXK.js.map +1 -0
- package/dist/chunk-RDQYDLYZ.js +69 -0
- package/dist/chunk-RDQYDLYZ.js.map +1 -0
- package/dist/chunk-WCQVDF3K.js +14 -0
- package/dist/cli.js +2713 -325
- package/dist/cli.js.map +1 -1
- package/dist/haiku-pruner-5KVT5AI2.js +8 -0
- package/dist/http-server-2ZQ6I43B.js +9 -0
- package/dist/index.d.ts +1886 -626
- package/dist/index.js +319 -46
- package/dist/index.js.map +1 -1
- package/dist/local-embedding-NZQTILGV.js +8 -0
- package/dist/mcp.d.ts +2 -0
- package/dist/mcp.js +386 -0
- package/dist/mcp.js.map +1 -0
- package/dist/openai-embedding-ZP5TSUJG.js +8 -0
- package/dist/perplexity-context-embedding-GI5PHE6X.js +9 -0
- package/dist/perplexity-context-embedding-GI5PHE6X.js.map +1 -0
- package/dist/perplexity-embedding-KZRYGJRC.js +10 -0
- package/dist/perplexity-embedding-KZRYGJRC.js.map +1 -0
- package/dist/plugin-IKQ6IRSJ.js +32 -0
- package/dist/plugin-IKQ6IRSJ.js.map +1 -0
- package/dist/resolve-ASGLBNUC.js +10 -0
- package/dist/resolve-ASGLBNUC.js.map +1 -0
- package/dist/stats-tui-AD3AMYGV.js +1904 -0
- package/dist/stats-tui-AD3AMYGV.js.map +1 -0
- package/package.json +38 -53
- package/src/brainbank.ts +617 -0
- package/src/cli/commands/collection.ts +77 -0
- package/src/cli/commands/context.ts +59 -0
- package/src/cli/commands/daemon.ts +100 -0
- package/src/cli/commands/docs.ts +71 -0
- package/src/cli/commands/files.ts +69 -0
- package/src/cli/commands/help.ts +82 -0
- package/src/cli/commands/index.ts +478 -0
- package/src/cli/commands/kv.ts +140 -0
- package/src/cli/commands/mcp-export.ts +273 -0
- package/src/cli/commands/mcp.ts +6 -0
- package/src/cli/commands/query.ts +167 -0
- package/src/cli/commands/reembed.ts +30 -0
- package/src/cli/commands/reindex.ts +40 -0
- package/src/cli/commands/scan.ts +336 -0
- package/src/cli/commands/search.ts +203 -0
- package/src/cli/commands/stats.ts +68 -0
- package/src/cli/commands/status.ts +47 -0
- package/src/cli/commands/watch.ts +47 -0
- package/src/cli/factory/brain-context.ts +43 -0
- package/src/cli/factory/builtin-registration.ts +87 -0
- package/src/cli/factory/config-loader.ts +77 -0
- package/src/cli/factory/index.ts +69 -0
- package/src/cli/factory/plugin-loader.ts +324 -0
- package/src/cli/index.ts +76 -0
- package/src/cli/server-client.ts +186 -0
- package/src/cli/tui/index-tui.tsx +667 -0
- package/src/cli/tui/stats-data.ts +523 -0
- package/src/cli/tui/stats-search.ts +262 -0
- package/src/cli/tui/stats-tui.tsx +1465 -0
- package/src/cli/tui/tree-scanner.ts +650 -0
- package/src/cli/utils.ts +137 -0
- package/src/config.ts +48 -0
- package/src/constants.ts +21 -0
- package/src/db/adapter.ts +112 -0
- package/src/db/metadata.ts +130 -0
- package/src/db/migrations.ts +66 -0
- package/src/db/sqlite-adapter.ts +218 -0
- package/src/db/tracker.ts +91 -0
- package/src/engine/index-api.ts +81 -0
- package/src/engine/reembed.ts +206 -0
- package/src/engine/search-api.ts +218 -0
- package/src/index.ts +150 -0
- package/src/lib/fts.ts +57 -0
- package/src/lib/languages.ts +179 -0
- package/src/lib/logger.ts +126 -0
- package/src/lib/math.ts +87 -0
- package/src/lib/provider-key.ts +20 -0
- package/src/lib/prune.ts +72 -0
- package/src/lib/rrf.ts +133 -0
- package/src/lib/write-lock.ts +108 -0
- package/src/mcp/mcp-server.ts +268 -0
- package/src/mcp/workspace-factory.ts +68 -0
- package/src/mcp/workspace-pool.ts +224 -0
- package/src/plugin.ts +381 -0
- package/src/providers/embeddings/embedding-worker-thread.ts +95 -0
- package/src/providers/embeddings/embedding-worker.ts +141 -0
- package/src/providers/embeddings/local-embedding.ts +115 -0
- package/src/providers/embeddings/openai-embedding.ts +167 -0
- package/src/providers/embeddings/perplexity-context-embedding.ts +195 -0
- package/src/providers/embeddings/perplexity-embedding.ts +165 -0
- package/src/providers/embeddings/resolve.ts +34 -0
- package/src/providers/pruners/haiku-expander.ts +178 -0
- package/src/providers/pruners/haiku-pruner.ts +263 -0
- package/src/providers/vector/hnsw-index.ts +174 -0
- package/src/providers/vector/hnsw-loader.ts +129 -0
- package/src/search/bm25-boost.ts +76 -0
- package/src/search/context-builder.ts +209 -0
- package/src/search/keyword/composite-bm25-search.ts +47 -0
- package/src/search/query-decomposer.ts +124 -0
- package/src/search/types.ts +37 -0
- package/src/search/vector/composite-vector-search.ts +105 -0
- package/src/search/vector/mmr.ts +64 -0
- package/src/services/collection.ts +384 -0
- package/src/services/daemon.ts +87 -0
- package/src/services/http-server.ts +344 -0
- package/src/services/kv-service.ts +64 -0
- package/src/services/plugin-registry.ts +77 -0
- package/src/services/watch.ts +340 -0
- package/src/services/webhook-server.ts +100 -0
- package/src/types.ts +509 -0
- package/dist/chunk-2P3EGY6S.js +0 -37
- package/dist/chunk-2P3EGY6S.js.map +0 -1
- package/dist/chunk-3GAIDXRW.js +0 -105
- package/dist/chunk-3GAIDXRW.js.map +0 -1
- package/dist/chunk-4ZKBQ33J.js +0 -56
- package/dist/chunk-4ZKBQ33J.js.map +0 -1
- package/dist/chunk-7QVYU63E.js +0 -7
- package/dist/chunk-GOUBW7UA.js +0 -373
- package/dist/chunk-GOUBW7UA.js.map +0 -1
- package/dist/chunk-MJ3Y24H6.js +0 -185
- package/dist/chunk-MJ3Y24H6.js.map +0 -1
- package/dist/chunk-N6ZMBFDE.js +0 -224
- package/dist/chunk-N6ZMBFDE.js.map +0 -1
- package/dist/chunk-RAEBYV75.js +0 -709
- package/dist/chunk-RAEBYV75.js.map +0 -1
- package/dist/chunk-TW5NTYYZ.js +0 -2066
- package/dist/chunk-TW5NTYYZ.js.map +0 -1
- package/dist/chunk-Z5SU54HP.js +0 -171
- package/dist/chunk-Z5SU54HP.js.map +0 -1
- package/dist/code.d.ts +0 -31
- package/dist/code.js +0 -8
- package/dist/docs.d.ts +0 -19
- package/dist/docs.js +0 -8
- package/dist/git.d.ts +0 -31
- package/dist/git.js +0 -8
- package/dist/memory.d.ts +0 -19
- package/dist/memory.js +0 -146
- package/dist/memory.js.map +0 -1
- package/dist/notes.d.ts +0 -19
- package/dist/notes.js +0 -57
- package/dist/notes.js.map +0 -1
- package/dist/openai-PCTYLOWI.js +0 -8
- package/dist/types-Da_zLLOl.d.ts +0 -474
- /package/dist/{chunk-7QVYU63E.js.map → chunk-WCQVDF3K.js.map} +0 -0
- /package/dist/{code.js.map → haiku-pruner-5KVT5AI2.js.map} +0 -0
- /package/dist/{docs.js.map → http-server-2ZQ6I43B.js.map} +0 -0
- /package/dist/{git.js.map → local-embedding-NZQTILGV.js.map} +0 -0
- /package/dist/{openai-PCTYLOWI.js.map → openai-embedding-ZP5TSUJG.js.map} +0 -0
package/dist/chunk-4ZKBQ33J.js
DELETED
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
__name
|
|
3
|
-
} from "./chunk-7QVYU63E.js";
|
|
4
|
-
|
|
5
|
-
// src/query/rrf.ts
|
|
6
|
-
function reciprocalRankFusion(resultSets, k = 60, maxResults = 15) {
|
|
7
|
-
const fused = /* @__PURE__ */ new Map();
|
|
8
|
-
for (const results of resultSets) {
|
|
9
|
-
for (let rank = 0; rank < results.length; rank++) {
|
|
10
|
-
const r = results[rank];
|
|
11
|
-
const key = resultKey(r);
|
|
12
|
-
const rrfContribution = 1 / (k + rank + 1);
|
|
13
|
-
const existing = fused.get(key);
|
|
14
|
-
if (existing) {
|
|
15
|
-
existing.rrfScore += rrfContribution;
|
|
16
|
-
if (r.score > existing.result.score) {
|
|
17
|
-
existing.result = { ...r };
|
|
18
|
-
}
|
|
19
|
-
} else {
|
|
20
|
-
fused.set(key, {
|
|
21
|
-
result: { ...r },
|
|
22
|
-
rrfScore: rrfContribution
|
|
23
|
-
});
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
const sorted = Array.from(fused.values()).sort((a, b) => b.rrfScore - a.rrfScore).slice(0, maxResults);
|
|
28
|
-
const maxRRF = sorted[0]?.rrfScore ?? 1;
|
|
29
|
-
return sorted.map((entry) => ({
|
|
30
|
-
...entry.result,
|
|
31
|
-
score: entry.rrfScore / maxRRF,
|
|
32
|
-
metadata: {
|
|
33
|
-
...entry.result.metadata,
|
|
34
|
-
rrfScore: entry.rrfScore
|
|
35
|
-
}
|
|
36
|
-
}));
|
|
37
|
-
}
|
|
38
|
-
__name(reciprocalRankFusion, "reciprocalRankFusion");
|
|
39
|
-
function resultKey(r) {
|
|
40
|
-
switch (r.type) {
|
|
41
|
-
case "code":
|
|
42
|
-
return `code:${r.filePath}:${r.metadata.startLine}-${r.metadata.endLine}`;
|
|
43
|
-
case "commit":
|
|
44
|
-
return `commit:${r.metadata.hash || r.metadata.shortHash}`;
|
|
45
|
-
case "pattern":
|
|
46
|
-
return `pattern:${r.metadata.taskType}:${r.content?.slice(0, 60)}`;
|
|
47
|
-
default:
|
|
48
|
-
return `${r.type}:${r.content?.slice(0, 80)}`;
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
__name(resultKey, "resultKey");
|
|
52
|
-
|
|
53
|
-
export {
|
|
54
|
-
reciprocalRankFusion
|
|
55
|
-
};
|
|
56
|
-
//# sourceMappingURL=chunk-4ZKBQ33J.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/query/rrf.ts"],"sourcesContent":["/**\n * BrainBank — Reciprocal Rank Fusion (RRF)\n * \n * Combines results from multiple search systems (vector + BM25)\n * using the RRF algorithm: score = Σ 1/(k + rank_i)\n * \n * This is the same algorithm used by Elasticsearch, QMD, and most\n * production hybrid search systems. Simple but very effective.\n * \n * Reference: Cormack et al., \"Reciprocal Rank Fusion outperforms\n * Condorcet and individual Rank Learning Methods\" (2009)\n */\n\nimport type { SearchResult } from '../types.ts';\n\n/**\n * Fuse ranked lists from different search systems into a single ranked list.\n * \n * @param resultSets - Arrays of SearchResult from different systems (e.g. vector, BM25)\n * @param k - Smoothing constant. Default: 60 (standard value). Higher = less emphasis on top ranks.\n * @param maxResults - Maximum results to return.\n */\nexport function reciprocalRankFusion(\n resultSets: SearchResult[][],\n k: number = 60,\n maxResults: number = 15,\n): SearchResult[] {\n // Build a map: unique key → { bestResult, rrfScore }\n const fused = new Map<string, { result: SearchResult; rrfScore: number }>();\n\n for (const results of resultSets) {\n for (let rank = 0; rank < results.length; rank++) {\n const r = results[rank];\n const key = resultKey(r);\n const rrfContribution = 1.0 / (k + rank + 1);\n\n const existing = fused.get(key);\n if (existing) {\n existing.rrfScore += rrfContribution;\n // Keep the result with the higher original score\n if (r.score > existing.result.score) {\n existing.result = { ...r };\n }\n } else {\n fused.set(key, {\n result: { ...r },\n rrfScore: rrfContribution,\n });\n }\n }\n }\n\n // Sort by RRF score descending, normalize, and return\n const sorted = Array.from(fused.values())\n .sort((a, b) => b.rrfScore - a.rrfScore)\n .slice(0, maxResults);\n\n // Normalize RRF scores to 0..1 range\n const maxRRF = sorted[0]?.rrfScore ?? 1;\n return sorted.map(entry => ({\n ...entry.result,\n score: entry.rrfScore / maxRRF,\n metadata: {\n ...entry.result.metadata,\n rrfScore: entry.rrfScore,\n },\n }));\n}\n\n/**\n * Generate a unique key for a search result to detect duplicates across systems.\n */\nfunction resultKey(r: SearchResult): string {\n switch (r.type) {\n case 'code':\n return `code:${r.filePath}:${r.metadata.startLine}-${r.metadata.endLine}`;\n case 'commit':\n return `commit:${r.metadata.hash || r.metadata.shortHash}`;\n case 'pattern':\n return `pattern:${r.metadata.taskType}:${r.content?.slice(0, 60)}`;\n default:\n return `${r.type}:${r.content?.slice(0, 80)}`;\n }\n}\n"],"mappings":";;;;;AAsBO,SAAS,qBACZ,YACA,IAAY,IACZ,aAAqB,IACP;AAEd,QAAM,QAAQ,oBAAI,IAAwD;AAE1E,aAAW,WAAW,YAAY;AAC9B,aAAS,OAAO,GAAG,OAAO,QAAQ,QAAQ,QAAQ;AAC9C,YAAM,IAAI,QAAQ,IAAI;AACtB,YAAM,MAAM,UAAU,CAAC;AACvB,YAAM,kBAAkB,KAAO,IAAI,OAAO;AAE1C,YAAM,WAAW,MAAM,IAAI,GAAG;AAC9B,UAAI,UAAU;AACV,iBAAS,YAAY;AAErB,YAAI,EAAE,QAAQ,SAAS,OAAO,OAAO;AACjC,mBAAS,SAAS,EAAE,GAAG,EAAE;AAAA,QAC7B;AAAA,MACJ,OAAO;AACH,cAAM,IAAI,KAAK;AAAA,UACX,QAAQ,EAAE,GAAG,EAAE;AAAA,UACf,UAAU;AAAA,QACd,CAAC;AAAA,MACL;AAAA,IACJ;AAAA,EACJ;AAGA,QAAM,SAAS,MAAM,KAAK,MAAM,OAAO,CAAC,EACnC,KAAK,CAAC,GAAG,MAAM,EAAE,WAAW,EAAE,QAAQ,EACtC,MAAM,GAAG,UAAU;AAGxB,QAAM,SAAS,OAAO,CAAC,GAAG,YAAY;AACtC,SAAO,OAAO,IAAI,YAAU;AAAA,IACxB,GAAG,MAAM;AAAA,IACT,OAAO,MAAM,WAAW;AAAA,IACxB,UAAU;AAAA,MACN,GAAG,MAAM,OAAO;AAAA,MAChB,UAAU,MAAM;AAAA,IACpB;AAAA,EACJ,EAAE;AACN;AA7CgB;AAkDhB,SAAS,UAAU,GAAyB;AACxC,UAAQ,EAAE,MAAM;AAAA,IACZ,KAAK;AACD,aAAO,QAAQ,EAAE,QAAQ,IAAI,EAAE,SAAS,SAAS,IAAI,EAAE,SAAS,OAAO;AAAA,IAC3E,KAAK;AACD,aAAO,UAAU,EAAE,SAAS,QAAQ,EAAE,SAAS,SAAS;AAAA,IAC5D,KAAK;AACD,aAAO,WAAW,EAAE,SAAS,QAAQ,IAAI,EAAE,SAAS,MAAM,GAAG,EAAE,CAAC;AAAA,IACpE;AACI,aAAO,GAAG,EAAE,IAAI,IAAI,EAAE,SAAS,MAAM,GAAG,EAAE,CAAC;AAAA,EACnD;AACJ;AAXS;","names":[]}
|
package/dist/chunk-7QVYU63E.js
DELETED
package/dist/chunk-GOUBW7UA.js
DELETED
|
@@ -1,373 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
__name
|
|
3
|
-
} from "./chunk-7QVYU63E.js";
|
|
4
|
-
|
|
5
|
-
// src/indexers/doc-indexer.ts
|
|
6
|
-
import * as fs from "fs";
|
|
7
|
-
import * as path from "path";
|
|
8
|
-
import { createHash } from "crypto";
|
|
9
|
-
import { glob } from "fs/promises";
|
|
10
|
-
var BREAK_SCORES = [
|
|
11
|
-
[/^# /, 100],
|
|
12
|
-
// H1
|
|
13
|
-
[/^## /, 90],
|
|
14
|
-
// H2
|
|
15
|
-
[/^### /, 80],
|
|
16
|
-
// H3
|
|
17
|
-
[/^#### /, 70],
|
|
18
|
-
// H4
|
|
19
|
-
[/^##### /, 60],
|
|
20
|
-
// H5
|
|
21
|
-
[/^###### /, 50],
|
|
22
|
-
// H6
|
|
23
|
-
[/^```/, 80],
|
|
24
|
-
// Code fence
|
|
25
|
-
[/^---$/, 60],
|
|
26
|
-
// Horizontal rule
|
|
27
|
-
[/^\*\*\*$/, 60],
|
|
28
|
-
// Horizontal rule alt
|
|
29
|
-
[/^$/, 20],
|
|
30
|
-
// Blank line (paragraph break)
|
|
31
|
-
[/^[-*+] /, 5]
|
|
32
|
-
// List item
|
|
33
|
-
];
|
|
34
|
-
var TARGET_CHARS = 3e3;
|
|
35
|
-
var WINDOW_CHARS = 600;
|
|
36
|
-
var MIN_CHUNK_CHARS = 200;
|
|
37
|
-
var DocIndexer = class {
|
|
38
|
-
constructor(_db, _embedding, _hnsw, _vecCache) {
|
|
39
|
-
this._db = _db;
|
|
40
|
-
this._embedding = _embedding;
|
|
41
|
-
this._hnsw = _hnsw;
|
|
42
|
-
this._vecCache = _vecCache;
|
|
43
|
-
}
|
|
44
|
-
static {
|
|
45
|
-
__name(this, "DocIndexer");
|
|
46
|
-
}
|
|
47
|
-
/**
|
|
48
|
-
* Index all documents in a collection.
|
|
49
|
-
* Incremental — skips unchanged files (by content hash).
|
|
50
|
-
*/
|
|
51
|
-
async indexCollection(collection, dirPath, pattern = "**/*.md", options = {}) {
|
|
52
|
-
const absDir = path.resolve(dirPath);
|
|
53
|
-
if (!fs.existsSync(absDir)) {
|
|
54
|
-
throw new Error(`Collection path does not exist: ${absDir}`);
|
|
55
|
-
}
|
|
56
|
-
const files = [];
|
|
57
|
-
for await (const entry of glob(pattern, { cwd: absDir })) {
|
|
58
|
-
const fullPath = path.join(absDir, entry);
|
|
59
|
-
const stat = fs.statSync(fullPath);
|
|
60
|
-
if (stat.isFile()) {
|
|
61
|
-
const shouldIgnore = options.ignore?.some((ig) => {
|
|
62
|
-
const igRegex = new RegExp(ig.replace(/\*\*/g, ".*").replace(/\*/g, "[^/]*"));
|
|
63
|
-
return igRegex.test(entry);
|
|
64
|
-
});
|
|
65
|
-
if (!shouldIgnore) {
|
|
66
|
-
files.push(entry);
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
let indexed = 0;
|
|
71
|
-
let skipped = 0;
|
|
72
|
-
let totalChunks = 0;
|
|
73
|
-
for (let i = 0; i < files.length; i++) {
|
|
74
|
-
const relPath = files[i];
|
|
75
|
-
const absPath = path.join(absDir, relPath);
|
|
76
|
-
options.onProgress?.(relPath, i + 1, files.length);
|
|
77
|
-
const content = fs.readFileSync(absPath, "utf-8");
|
|
78
|
-
const hash = createHash("sha256").update(content).digest("hex").slice(0, 16);
|
|
79
|
-
const existing = this._db.prepare(
|
|
80
|
-
"SELECT id FROM doc_chunks WHERE collection = ? AND file_path = ? AND content_hash = ? LIMIT 1"
|
|
81
|
-
).get(collection, relPath, hash);
|
|
82
|
-
if (existing) {
|
|
83
|
-
skipped++;
|
|
84
|
-
continue;
|
|
85
|
-
}
|
|
86
|
-
this._db.prepare(
|
|
87
|
-
"DELETE FROM doc_chunks WHERE collection = ? AND file_path = ?"
|
|
88
|
-
).run(collection, relPath);
|
|
89
|
-
const title = this._extractTitle(content, relPath);
|
|
90
|
-
const chunks = this._smartChunk(content);
|
|
91
|
-
const insertChunk = this._db.prepare(`
|
|
92
|
-
INSERT INTO doc_chunks (collection, file_path, title, content, seq, pos, content_hash)
|
|
93
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
94
|
-
`);
|
|
95
|
-
const chunkIds = [];
|
|
96
|
-
this._db.transaction(() => {
|
|
97
|
-
for (let seq = 0; seq < chunks.length; seq++) {
|
|
98
|
-
const chunk = chunks[seq];
|
|
99
|
-
const result = insertChunk.run(
|
|
100
|
-
collection,
|
|
101
|
-
relPath,
|
|
102
|
-
title,
|
|
103
|
-
chunk.text,
|
|
104
|
-
seq,
|
|
105
|
-
chunk.pos,
|
|
106
|
-
hash
|
|
107
|
-
);
|
|
108
|
-
chunkIds.push(Number(result.lastInsertRowid));
|
|
109
|
-
}
|
|
110
|
-
});
|
|
111
|
-
const texts = chunks.map((c) => `title: ${title} | text: ${c.text}`);
|
|
112
|
-
const embeddings = await this._embedding.embedBatch(texts);
|
|
113
|
-
const insertVec = this._db.prepare(
|
|
114
|
-
"INSERT OR REPLACE INTO doc_vectors (chunk_id, embedding) VALUES (?, ?)"
|
|
115
|
-
);
|
|
116
|
-
this._db.transaction(() => {
|
|
117
|
-
for (let j = 0; j < chunkIds.length; j++) {
|
|
118
|
-
const buf = Buffer.from(embeddings[j].buffer);
|
|
119
|
-
insertVec.run(chunkIds[j], buf);
|
|
120
|
-
this._hnsw.add(embeddings[j], chunkIds[j]);
|
|
121
|
-
this._vecCache.set(chunkIds[j], embeddings[j]);
|
|
122
|
-
}
|
|
123
|
-
});
|
|
124
|
-
indexed++;
|
|
125
|
-
totalChunks += chunks.length;
|
|
126
|
-
}
|
|
127
|
-
return { indexed, skipped, chunks: totalChunks };
|
|
128
|
-
}
|
|
129
|
-
/**
|
|
130
|
-
* Remove all indexed data for a collection.
|
|
131
|
-
*/
|
|
132
|
-
removeCollection(collection) {
|
|
133
|
-
this._db.prepare("DELETE FROM doc_chunks WHERE collection = ?").run(collection);
|
|
134
|
-
this._db.prepare("DELETE FROM collections WHERE name = ?").run(collection);
|
|
135
|
-
this._db.prepare("DELETE FROM path_contexts WHERE collection = ?").run(collection);
|
|
136
|
-
}
|
|
137
|
-
// ── Smart Chunking ──────────────────────────────
|
|
138
|
-
/**
|
|
139
|
-
* Split document into chunks at natural markdown boundaries.
|
|
140
|
-
* Uses heading-aware scoring like qmd.
|
|
141
|
-
*/
|
|
142
|
-
_smartChunk(text) {
|
|
143
|
-
if (text.length <= TARGET_CHARS) {
|
|
144
|
-
return [{ text, pos: 0 }];
|
|
145
|
-
}
|
|
146
|
-
const lines = text.split("\n");
|
|
147
|
-
const breakPoints = this._findBreakPoints(lines);
|
|
148
|
-
const chunks = [];
|
|
149
|
-
let chunkStart = 0;
|
|
150
|
-
let lineStart = 0;
|
|
151
|
-
while (chunkStart < text.length) {
|
|
152
|
-
const remaining = text.length - chunkStart;
|
|
153
|
-
if (remaining <= TARGET_CHARS + WINDOW_CHARS) {
|
|
154
|
-
const lastText = text.slice(chunkStart).trim();
|
|
155
|
-
if (lastText.length >= MIN_CHUNK_CHARS) {
|
|
156
|
-
chunks.push({ text: lastText, pos: chunkStart });
|
|
157
|
-
} else if (chunks.length > 0) {
|
|
158
|
-
chunks[chunks.length - 1].text += "\n" + lastText;
|
|
159
|
-
} else {
|
|
160
|
-
chunks.push({ text: lastText, pos: chunkStart });
|
|
161
|
-
}
|
|
162
|
-
break;
|
|
163
|
-
}
|
|
164
|
-
const targetEnd = chunkStart + TARGET_CHARS;
|
|
165
|
-
const windowStart = targetEnd - WINDOW_CHARS;
|
|
166
|
-
let bestBreak = targetEnd;
|
|
167
|
-
let bestScore = 0;
|
|
168
|
-
for (const bp of breakPoints) {
|
|
169
|
-
if (bp.pos <= chunkStart) continue;
|
|
170
|
-
if (bp.pos > targetEnd + WINDOW_CHARS / 2) break;
|
|
171
|
-
if (bp.pos < windowStart) continue;
|
|
172
|
-
const distance = Math.abs(bp.pos - targetEnd);
|
|
173
|
-
const decay = 1 - (distance / WINDOW_CHARS) ** 2 * 0.7;
|
|
174
|
-
const finalScore = bp.score * decay;
|
|
175
|
-
if (finalScore > bestScore) {
|
|
176
|
-
bestScore = finalScore;
|
|
177
|
-
bestBreak = bp.pos;
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
const chunkText = text.slice(chunkStart, bestBreak).trim();
|
|
181
|
-
if (chunkText.length >= MIN_CHUNK_CHARS) {
|
|
182
|
-
chunks.push({ text: chunkText, pos: chunkStart });
|
|
183
|
-
}
|
|
184
|
-
chunkStart = bestBreak;
|
|
185
|
-
}
|
|
186
|
-
return chunks;
|
|
187
|
-
}
|
|
188
|
-
/**
|
|
189
|
-
* Find all potential break points in the document with scores.
|
|
190
|
-
*/
|
|
191
|
-
_findBreakPoints(lines) {
|
|
192
|
-
const points = [];
|
|
193
|
-
let charPos = 0;
|
|
194
|
-
let inCodeBlock = false;
|
|
195
|
-
for (const line of lines) {
|
|
196
|
-
if (line.trimStart().startsWith("```")) {
|
|
197
|
-
inCodeBlock = !inCodeBlock;
|
|
198
|
-
if (!inCodeBlock) {
|
|
199
|
-
points.push({ pos: charPos + line.length + 1, score: 80 });
|
|
200
|
-
}
|
|
201
|
-
charPos += line.length + 1;
|
|
202
|
-
continue;
|
|
203
|
-
}
|
|
204
|
-
if (inCodeBlock) {
|
|
205
|
-
charPos += line.length + 1;
|
|
206
|
-
continue;
|
|
207
|
-
}
|
|
208
|
-
for (const [pattern, score] of BREAK_SCORES) {
|
|
209
|
-
if (pattern.test(line.trim())) {
|
|
210
|
-
points.push({ pos: charPos, score });
|
|
211
|
-
break;
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
charPos += line.length + 1;
|
|
215
|
-
}
|
|
216
|
-
return points;
|
|
217
|
-
}
|
|
218
|
-
/**
|
|
219
|
-
* Extract document title from first heading or filename.
|
|
220
|
-
*/
|
|
221
|
-
_extractTitle(content, filePath) {
|
|
222
|
-
const match = content.match(/^#{1,3}\s+(.+)$/m);
|
|
223
|
-
if (match) return match[1].trim();
|
|
224
|
-
return path.basename(filePath, path.extname(filePath));
|
|
225
|
-
}
|
|
226
|
-
};
|
|
227
|
-
|
|
228
|
-
// src/plugins/docs.ts
|
|
229
|
-
var DocsModuleImpl = class {
|
|
230
|
-
constructor(opts = {}) {
|
|
231
|
-
this.opts = opts;
|
|
232
|
-
}
|
|
233
|
-
static {
|
|
234
|
-
__name(this, "DocsModuleImpl");
|
|
235
|
-
}
|
|
236
|
-
name = "docs";
|
|
237
|
-
hnsw;
|
|
238
|
-
indexer;
|
|
239
|
-
vecCache = /* @__PURE__ */ new Map();
|
|
240
|
-
_db;
|
|
241
|
-
_embedding;
|
|
242
|
-
async initialize(ctx) {
|
|
243
|
-
this._db = ctx.db;
|
|
244
|
-
this._embedding = ctx.embedding;
|
|
245
|
-
this.hnsw = await ctx.createHnsw();
|
|
246
|
-
ctx.loadVectors("doc_vectors", "chunk_id", this.hnsw, this.vecCache);
|
|
247
|
-
this.indexer = new DocIndexer(ctx.db, ctx.embedding, this.hnsw, this.vecCache);
|
|
248
|
-
}
|
|
249
|
-
/** Register a document collection. */
|
|
250
|
-
addCollection(collection) {
|
|
251
|
-
this._db.prepare(`
|
|
252
|
-
INSERT OR REPLACE INTO collections (name, path, pattern, ignore_json, context)
|
|
253
|
-
VALUES (?, ?, ?, ?, ?)
|
|
254
|
-
`).run(
|
|
255
|
-
collection.name,
|
|
256
|
-
collection.path,
|
|
257
|
-
collection.pattern ?? "**/*.md",
|
|
258
|
-
JSON.stringify(collection.ignore ?? []),
|
|
259
|
-
collection.context ?? null
|
|
260
|
-
);
|
|
261
|
-
}
|
|
262
|
-
/** Remove a collection and its indexed data. */
|
|
263
|
-
removeCollection(name) {
|
|
264
|
-
this.indexer.removeCollection(name);
|
|
265
|
-
}
|
|
266
|
-
/** List all registered collections. */
|
|
267
|
-
listCollections() {
|
|
268
|
-
return this._db.prepare("SELECT * FROM collections").all().map((row) => ({
|
|
269
|
-
name: row.name,
|
|
270
|
-
path: row.path,
|
|
271
|
-
pattern: row.pattern,
|
|
272
|
-
ignore: JSON.parse(row.ignore_json),
|
|
273
|
-
context: row.context
|
|
274
|
-
}));
|
|
275
|
-
}
|
|
276
|
-
/** Index all (or specific) collections. Incremental. */
|
|
277
|
-
async indexCollections(options = {}) {
|
|
278
|
-
const allCollections = this.listCollections();
|
|
279
|
-
const toIndex = options.collections ? allCollections.filter((c) => options.collections.includes(c.name)) : allCollections;
|
|
280
|
-
const results = {};
|
|
281
|
-
for (const coll of toIndex) {
|
|
282
|
-
results[coll.name] = await this.indexer.indexCollection(
|
|
283
|
-
coll.name,
|
|
284
|
-
coll.path,
|
|
285
|
-
coll.pattern,
|
|
286
|
-
{
|
|
287
|
-
ignore: coll.ignore,
|
|
288
|
-
onProgress: /* @__PURE__ */ __name((file, cur, total) => options.onProgress?.(coll.name, file, cur, total), "onProgress")
|
|
289
|
-
}
|
|
290
|
-
);
|
|
291
|
-
}
|
|
292
|
-
return results;
|
|
293
|
-
}
|
|
294
|
-
/** Search documents only. */
|
|
295
|
-
async search(query, options) {
|
|
296
|
-
const k = options?.k ?? 8;
|
|
297
|
-
const queryVec = await this._embedding.embed(query);
|
|
298
|
-
const hits = this.hnsw.search(queryVec, k);
|
|
299
|
-
const results = [];
|
|
300
|
-
for (const hit of hits) {
|
|
301
|
-
if (options?.minScore && hit.score < options.minScore) continue;
|
|
302
|
-
const chunk = this._db.prepare(
|
|
303
|
-
"SELECT * FROM doc_chunks WHERE id = ?"
|
|
304
|
-
).get(hit.id);
|
|
305
|
-
if (!chunk) continue;
|
|
306
|
-
if (options?.collection && chunk.collection !== options.collection) continue;
|
|
307
|
-
const ctx = this._getDocContext(chunk.collection, chunk.file_path);
|
|
308
|
-
results.push({
|
|
309
|
-
type: "document",
|
|
310
|
-
score: hit.score,
|
|
311
|
-
filePath: chunk.file_path,
|
|
312
|
-
content: chunk.content,
|
|
313
|
-
context: ctx,
|
|
314
|
-
metadata: {
|
|
315
|
-
collection: chunk.collection,
|
|
316
|
-
title: chunk.title,
|
|
317
|
-
seq: chunk.seq
|
|
318
|
-
}
|
|
319
|
-
});
|
|
320
|
-
}
|
|
321
|
-
return results;
|
|
322
|
-
}
|
|
323
|
-
/** Add context description for a document path. */
|
|
324
|
-
addContext(collection, path2, context) {
|
|
325
|
-
this._db.prepare(`
|
|
326
|
-
INSERT OR REPLACE INTO path_contexts (collection, path, context)
|
|
327
|
-
VALUES (?, ?, ?)
|
|
328
|
-
`).run(collection, path2, context);
|
|
329
|
-
}
|
|
330
|
-
/** Remove context for a path. */
|
|
331
|
-
removeContext(collection, path2) {
|
|
332
|
-
this._db.prepare(
|
|
333
|
-
"DELETE FROM path_contexts WHERE collection = ? AND path = ?"
|
|
334
|
-
).run(collection, path2);
|
|
335
|
-
}
|
|
336
|
-
/** List all context entries. */
|
|
337
|
-
listContexts() {
|
|
338
|
-
return this._db.prepare("SELECT * FROM path_contexts").all();
|
|
339
|
-
}
|
|
340
|
-
stats() {
|
|
341
|
-
return {
|
|
342
|
-
collections: this._db.prepare("SELECT COUNT(*) as c FROM collections").get().c,
|
|
343
|
-
documents: this._db.prepare("SELECT COUNT(DISTINCT file_path) as c FROM doc_chunks").get().c,
|
|
344
|
-
chunks: this._db.prepare("SELECT COUNT(*) as c FROM doc_chunks").get().c,
|
|
345
|
-
hnswSize: this.hnsw.size
|
|
346
|
-
};
|
|
347
|
-
}
|
|
348
|
-
/** Resolve context for a document (checks path_contexts tree → collection context). */
|
|
349
|
-
_getDocContext(collection, filePath) {
|
|
350
|
-
const parts = filePath.split("/");
|
|
351
|
-
for (let i = parts.length; i >= 0; i--) {
|
|
352
|
-
const checkPath = i === 0 ? "/" : "/" + parts.slice(0, i).join("/");
|
|
353
|
-
const ctx = this._db.prepare(
|
|
354
|
-
"SELECT context FROM path_contexts WHERE collection = ? AND path = ?"
|
|
355
|
-
).get(collection, checkPath);
|
|
356
|
-
if (ctx) return ctx.context;
|
|
357
|
-
}
|
|
358
|
-
const coll = this._db.prepare(
|
|
359
|
-
"SELECT context FROM collections WHERE name = ?"
|
|
360
|
-
).get(collection);
|
|
361
|
-
return coll?.context ?? void 0;
|
|
362
|
-
}
|
|
363
|
-
};
|
|
364
|
-
function docs(opts) {
|
|
365
|
-
return new DocsModuleImpl(opts);
|
|
366
|
-
}
|
|
367
|
-
__name(docs, "docs");
|
|
368
|
-
|
|
369
|
-
export {
|
|
370
|
-
DocIndexer,
|
|
371
|
-
docs
|
|
372
|
-
};
|
|
373
|
-
//# sourceMappingURL=chunk-GOUBW7UA.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/indexers/doc-indexer.ts","../src/plugins/docs.ts"],"sourcesContent":["/**\n * BrainBank — Document Indexer\n * \n * Indexes generic document collections (markdown, text, etc.)\n * with heading-aware smart chunking, inspired by qmd.\n * \n * const indexer = new DocIndexer(db, embedding, hnsw, vecCache);\n * await indexer.indexCollection('notes', '/path/to/notes', '**\\/*.md');\n */\n\nimport * as fs from 'node:fs';\nimport * as path from 'node:path';\nimport { createHash } from 'node:crypto';\nimport { glob } from 'node:fs/promises';\nimport type { Database } from '../storage/database.ts';\nimport type { EmbeddingProvider, VectorIndex } from '../types.ts';\nimport type { HNSWIndex } from '../vector/hnsw.ts';\n\n// ── Break Point Scoring (qmd-inspired) ──────────────\n\ninterface BreakPoint {\n pos: number; // character position\n score: number; // break quality (higher = better)\n}\n\nconst BREAK_SCORES: [RegExp, number][] = [\n [/^# /, 100], // H1\n [/^## /, 90], // H2\n [/^### /, 80], // H3\n [/^#### /, 70], // H4\n [/^##### /, 60], // H5\n [/^###### /, 50], // H6\n [/^```/, 80], // Code fence\n [/^---$/, 60], // Horizontal rule\n [/^\\*\\*\\*$/, 60], // Horizontal rule alt\n [/^$/, 20], // Blank line (paragraph break)\n [/^[-*+] /, 5], // List item\n];\n\n// ── Chunk Target ────────────────────────────────────\n\nconst TARGET_CHARS = 3000; // ~900 tokens\nconst WINDOW_CHARS = 600; // search window before cutoff\nconst MIN_CHUNK_CHARS = 200; // don't create tiny chunks\n\n// ── DocIndexer ──────────────────────────────────────\n\nexport class DocIndexer {\n constructor(\n private _db: Database,\n private _embedding: EmbeddingProvider,\n private _hnsw: HNSWIndex,\n private _vecCache: Map<number, Float32Array>,\n ) {}\n\n /**\n * Index all documents in a collection.\n * Incremental — skips unchanged files (by content hash).\n */\n async indexCollection(\n collection: string,\n dirPath: string,\n pattern: string = '**/*.md',\n options: {\n ignore?: string[];\n onProgress?: (file: string, current: number, total: number) => void;\n } = {},\n ): Promise<{ indexed: number; skipped: number; chunks: number }> {\n // Resolve absolute path\n const absDir = path.resolve(dirPath);\n if (!fs.existsSync(absDir)) {\n throw new Error(`Collection path does not exist: ${absDir}`);\n }\n\n // Find files matching pattern\n const files: string[] = [];\n for await (const entry of glob(pattern, { cwd: absDir })) {\n const fullPath = path.join(absDir, entry);\n const stat = fs.statSync(fullPath);\n if (stat.isFile()) {\n // Check ignore patterns\n const shouldIgnore = options.ignore?.some(ig => {\n const igRegex = new RegExp(ig.replace(/\\*\\*/g, '.*').replace(/\\*/g, '[^/]*'));\n return igRegex.test(entry);\n });\n if (!shouldIgnore) {\n files.push(entry); // relative path\n }\n }\n }\n\n let indexed = 0;\n let skipped = 0;\n let totalChunks = 0;\n\n for (let i = 0; i < files.length; i++) {\n const relPath = files[i];\n const absPath = path.join(absDir, relPath);\n\n options.onProgress?.(relPath, i + 1, files.length);\n\n // Read content and hash\n const content = fs.readFileSync(absPath, 'utf-8');\n const hash = createHash('sha256').update(content).digest('hex').slice(0, 16);\n\n // Check if already indexed with same hash\n const existing = this._db.prepare(\n 'SELECT id FROM doc_chunks WHERE collection = ? AND file_path = ? AND content_hash = ? LIMIT 1'\n ).get(collection, relPath, hash) as any;\n\n if (existing) {\n skipped++;\n continue;\n }\n\n // Remove old chunks for this file\n this._db.prepare(\n 'DELETE FROM doc_chunks WHERE collection = ? AND file_path = ?'\n ).run(collection, relPath);\n\n // Extract title and chunk\n const title = this._extractTitle(content, relPath);\n const chunks = this._smartChunk(content);\n\n // Insert chunks\n const insertChunk = this._db.prepare(`\n INSERT INTO doc_chunks (collection, file_path, title, content, seq, pos, content_hash)\n VALUES (?, ?, ?, ?, ?, ?, ?)\n `);\n\n const chunkIds: number[] = [];\n\n this._db.transaction(() => {\n for (let seq = 0; seq < chunks.length; seq++) {\n const chunk = chunks[seq];\n const result = insertChunk.run(\n collection, relPath, title, chunk.text, seq, chunk.pos, hash,\n );\n chunkIds.push(Number(result.lastInsertRowid));\n }\n });\n\n // Generate embeddings\n const texts = chunks.map(c => `title: ${title} | text: ${c.text}`);\n const embeddings = await this._embedding.embedBatch(texts);\n\n // Store vectors\n const insertVec = this._db.prepare(\n 'INSERT OR REPLACE INTO doc_vectors (chunk_id, embedding) VALUES (?, ?)'\n );\n\n this._db.transaction(() => {\n for (let j = 0; j < chunkIds.length; j++) {\n const buf = Buffer.from(embeddings[j].buffer);\n insertVec.run(chunkIds[j], buf);\n this._hnsw.add(embeddings[j], chunkIds[j]);\n this._vecCache.set(chunkIds[j], embeddings[j]);\n }\n });\n\n indexed++;\n totalChunks += chunks.length;\n }\n\n return { indexed, skipped, chunks: totalChunks };\n }\n\n /**\n * Remove all indexed data for a collection.\n */\n removeCollection(collection: string): void {\n this._db.prepare('DELETE FROM doc_chunks WHERE collection = ?').run(collection);\n this._db.prepare('DELETE FROM collections WHERE name = ?').run(collection);\n this._db.prepare('DELETE FROM path_contexts WHERE collection = ?').run(collection);\n }\n\n // ── Smart Chunking ──────────────────────────────\n\n /**\n * Split document into chunks at natural markdown boundaries.\n * Uses heading-aware scoring like qmd.\n */\n private _smartChunk(text: string): { text: string; pos: number }[] {\n if (text.length <= TARGET_CHARS) {\n return [{ text, pos: 0 }];\n }\n\n const lines = text.split('\\n');\n const breakPoints = this._findBreakPoints(lines);\n const chunks: { text: string; pos: number }[] = [];\n\n let chunkStart = 0; // char position\n let lineStart = 0; // line index\n\n while (chunkStart < text.length) {\n const remaining = text.length - chunkStart;\n if (remaining <= TARGET_CHARS + WINDOW_CHARS) {\n // Last chunk — take everything\n const lastText = text.slice(chunkStart).trim();\n if (lastText.length >= MIN_CHUNK_CHARS) {\n chunks.push({ text: lastText, pos: chunkStart });\n } else if (chunks.length > 0) {\n // Merge with previous chunk\n chunks[chunks.length - 1].text += '\\n' + lastText;\n } else {\n chunks.push({ text: lastText, pos: chunkStart });\n }\n break;\n }\n\n // Find best break point in window\n const targetEnd = chunkStart + TARGET_CHARS;\n const windowStart = targetEnd - WINDOW_CHARS;\n\n let bestBreak = targetEnd;\n let bestScore = 0;\n\n for (const bp of breakPoints) {\n if (bp.pos <= chunkStart) continue;\n if (bp.pos > targetEnd + WINDOW_CHARS / 2) break;\n if (bp.pos < windowStart) continue;\n\n // Score decay: prefer closer break points\n const distance = Math.abs(bp.pos - targetEnd);\n const decay = 1 - (distance / WINDOW_CHARS) ** 2 * 0.7;\n const finalScore = bp.score * decay;\n\n if (finalScore > bestScore) {\n bestScore = finalScore;\n bestBreak = bp.pos;\n }\n }\n\n const chunkText = text.slice(chunkStart, bestBreak).trim();\n if (chunkText.length >= MIN_CHUNK_CHARS) {\n chunks.push({ text: chunkText, pos: chunkStart });\n }\n\n chunkStart = bestBreak;\n }\n\n return chunks;\n }\n\n /**\n * Find all potential break points in the document with scores.\n */\n private _findBreakPoints(lines: string[]): BreakPoint[] {\n const points: BreakPoint[] = [];\n let charPos = 0;\n let inCodeBlock = false;\n\n for (const line of lines) {\n // Track code fences\n if (line.trimStart().startsWith('```')) {\n inCodeBlock = !inCodeBlock;\n if (!inCodeBlock) {\n // End of code block is a good break point\n points.push({ pos: charPos + line.length + 1, score: 80 });\n }\n charPos += line.length + 1;\n continue;\n }\n\n // Skip break points inside code blocks\n if (inCodeBlock) {\n charPos += line.length + 1;\n continue;\n }\n\n // Score this line as a potential break point\n for (const [pattern, score] of BREAK_SCORES) {\n if (pattern.test(line.trim())) {\n points.push({ pos: charPos, score });\n break;\n }\n }\n\n charPos += line.length + 1;\n }\n\n return points;\n }\n\n /**\n * Extract document title from first heading or filename.\n */\n private _extractTitle(content: string, filePath: string): string {\n const match = content.match(/^#{1,3}\\s+(.+)$/m);\n if (match) return match[1].trim();\n return path.basename(filePath, path.extname(filePath));\n }\n}\n","/**\n * BrainBank — Docs Module\n * \n * Index any folder of markdown/text files (notes, docs, wikis).\n * Heading-aware smart chunking inspired by qmd.\n * \n * import { docs } from 'brainbank/docs';\n * brain.use(docs());\n */\n\nimport type { BrainBankModule, ModuleContext } from './types.ts';\nimport type { HNSWIndex } from '../vector/hnsw.ts';\nimport type { Database } from '../storage/database.ts';\nimport type { EmbeddingProvider, DocumentCollection, SearchResult } from '../types.ts';\nimport { DocIndexer } from '../indexers/doc-indexer.ts';\n\nexport interface DocsModuleOptions {}\n\nclass DocsModuleImpl implements BrainBankModule {\n readonly name = 'docs';\n hnsw!: HNSWIndex;\n indexer!: DocIndexer;\n vecCache = new Map<number, Float32Array>();\n private _db!: Database;\n private _embedding!: EmbeddingProvider;\n\n constructor(private opts: DocsModuleOptions = {}) {}\n\n async initialize(ctx: ModuleContext): Promise<void> {\n this._db = ctx.db;\n this._embedding = ctx.embedding;\n this.hnsw = await ctx.createHnsw();\n ctx.loadVectors('doc_vectors', 'chunk_id', this.hnsw, this.vecCache);\n this.indexer = new DocIndexer(ctx.db, ctx.embedding, this.hnsw, this.vecCache);\n }\n\n /** Register a document collection. */\n addCollection(collection: DocumentCollection): void {\n this._db.prepare(`\n INSERT OR REPLACE INTO collections (name, path, pattern, ignore_json, context)\n VALUES (?, ?, ?, ?, ?)\n `).run(\n collection.name,\n collection.path,\n collection.pattern ?? '**/*.md',\n JSON.stringify(collection.ignore ?? []),\n collection.context ?? null,\n );\n }\n\n /** Remove a collection and its indexed data. */\n removeCollection(name: string): void {\n this.indexer.removeCollection(name);\n }\n\n /** List all registered collections. */\n listCollections(): DocumentCollection[] {\n return (this._db.prepare('SELECT * FROM collections').all() as any[]).map(row => ({\n name: row.name,\n path: row.path,\n pattern: row.pattern,\n ignore: JSON.parse(row.ignore_json),\n context: row.context,\n }));\n }\n\n /** Index all (or specific) collections. Incremental. */\n async indexCollections(options: {\n collections?: string[];\n onProgress?: (collection: string, file: string, current: number, total: number) => void;\n } = {}): Promise<Record<string, { indexed: number; skipped: number; chunks: number }>> {\n const allCollections = this.listCollections();\n const toIndex = options.collections\n ? allCollections.filter(c => options.collections!.includes(c.name))\n : allCollections;\n\n const results: Record<string, { indexed: number; skipped: number; chunks: number }> = {};\n\n for (const coll of toIndex) {\n results[coll.name] = await this.indexer.indexCollection(\n coll.name,\n coll.path,\n coll.pattern,\n {\n ignore: coll.ignore,\n onProgress: (file, cur, total) => options.onProgress?.(coll.name, file, cur, total),\n },\n );\n }\n\n return results;\n }\n\n /** Search documents only. */\n async search(query: string, options?: {\n collection?: string;\n k?: number;\n minScore?: number;\n }): Promise<SearchResult[]> {\n const k = options?.k ?? 8;\n const queryVec = await this._embedding.embed(query);\n const hits = this.hnsw.search(queryVec, k);\n\n const results: SearchResult[] = [];\n for (const hit of hits) {\n if (options?.minScore && hit.score < options.minScore) continue;\n\n const chunk = this._db.prepare(\n 'SELECT * FROM doc_chunks WHERE id = ?'\n ).get(hit.id) as any;\n\n if (!chunk) continue;\n if (options?.collection && chunk.collection !== options.collection) continue;\n\n const ctx = this._getDocContext(chunk.collection, chunk.file_path);\n\n results.push({\n type: 'document',\n score: hit.score,\n filePath: chunk.file_path,\n content: chunk.content,\n context: ctx,\n metadata: {\n collection: chunk.collection,\n title: chunk.title,\n seq: chunk.seq,\n },\n });\n }\n\n return results;\n }\n\n /** Add context description for a document path. */\n addContext(collection: string, path: string, context: string): void {\n this._db.prepare(`\n INSERT OR REPLACE INTO path_contexts (collection, path, context)\n VALUES (?, ?, ?)\n `).run(collection, path, context);\n }\n\n /** Remove context for a path. */\n removeContext(collection: string, path: string): void {\n this._db.prepare(\n 'DELETE FROM path_contexts WHERE collection = ? AND path = ?'\n ).run(collection, path);\n }\n\n /** List all context entries. */\n listContexts(): { collection: string; path: string; context: string }[] {\n return this._db.prepare('SELECT * FROM path_contexts').all() as any[];\n }\n\n stats(): Record<string, any> {\n return {\n collections: (this._db.prepare('SELECT COUNT(*) as c FROM collections').get() as any).c,\n documents: (this._db.prepare('SELECT COUNT(DISTINCT file_path) as c FROM doc_chunks').get() as any).c,\n chunks: (this._db.prepare('SELECT COUNT(*) as c FROM doc_chunks').get() as any).c,\n hnswSize: this.hnsw.size,\n };\n }\n\n /** Resolve context for a document (checks path_contexts tree → collection context). */\n private _getDocContext(collection: string, filePath: string): string | undefined {\n const parts = filePath.split('/');\n for (let i = parts.length; i >= 0; i--) {\n const checkPath = i === 0 ? '/' : '/' + parts.slice(0, i).join('/');\n const ctx = this._db.prepare(\n 'SELECT context FROM path_contexts WHERE collection = ? AND path = ?'\n ).get(collection, checkPath) as any;\n if (ctx) return ctx.context;\n }\n\n const coll = this._db.prepare(\n 'SELECT context FROM collections WHERE name = ?'\n ).get(collection) as any;\n return coll?.context ?? undefined;\n }\n}\n\n/** Create a document collections module. */\nexport function docs(opts?: DocsModuleOptions): BrainBankModule {\n return new DocsModuleImpl(opts);\n}\n"],"mappings":";;;;;AAUA,YAAY,QAAQ;AACpB,YAAY,UAAU;AACtB,SAAS,kBAAkB;AAC3B,SAAS,YAAY;AAYrB,IAAM,eAAmC;AAAA,EACrC,CAAC,OAAY,GAAG;AAAA;AAAA,EAChB,CAAC,QAAa,EAAE;AAAA;AAAA,EAChB,CAAC,SAAa,EAAE;AAAA;AAAA,EAChB,CAAC,UAAa,EAAE;AAAA;AAAA,EAChB,CAAC,WAAa,EAAE;AAAA;AAAA,EAChB,CAAC,YAAa,EAAE;AAAA;AAAA,EAChB,CAAC,QAAa,EAAE;AAAA;AAAA,EAChB,CAAC,SAAa,EAAE;AAAA;AAAA,EAChB,CAAC,YAAa,EAAE;AAAA;AAAA,EAChB,CAAC,MAAa,EAAE;AAAA;AAAA,EAChB,CAAC,WAAc,CAAC;AAAA;AACpB;AAIA,IAAM,eAAe;AACrB,IAAM,eAAe;AACrB,IAAM,kBAAkB;AAIjB,IAAM,aAAN,MAAiB;AAAA,EACpB,YACY,KACA,YACA,OACA,WACV;AAJU;AACA;AACA;AACA;AAAA,EACT;AAAA,EArDP,OA+CwB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYpB,MAAM,gBACF,YACA,SACA,UAAkB,WAClB,UAGI,CAAC,GACwD;AAE7D,UAAM,SAAc,aAAQ,OAAO;AACnC,QAAI,CAAI,cAAW,MAAM,GAAG;AACxB,YAAM,IAAI,MAAM,mCAAmC,MAAM,EAAE;AAAA,IAC/D;AAGA,UAAM,QAAkB,CAAC;AACzB,qBAAiB,SAAS,KAAK,SAAS,EAAE,KAAK,OAAO,CAAC,GAAG;AACtD,YAAM,WAAgB,UAAK,QAAQ,KAAK;AACxC,YAAM,OAAU,YAAS,QAAQ;AACjC,UAAI,KAAK,OAAO,GAAG;AAEf,cAAM,eAAe,QAAQ,QAAQ,KAAK,QAAM;AAC5C,gBAAM,UAAU,IAAI,OAAO,GAAG,QAAQ,SAAS,IAAI,EAAE,QAAQ,OAAO,OAAO,CAAC;AAC5E,iBAAO,QAAQ,KAAK,KAAK;AAAA,QAC7B,CAAC;AACD,YAAI,CAAC,cAAc;AACf,gBAAM,KAAK,KAAK;AAAA,QACpB;AAAA,MACJ;AAAA,IACJ;AAEA,QAAI,UAAU;AACd,QAAI,UAAU;AACd,QAAI,cAAc;AAElB,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACnC,YAAM,UAAU,MAAM,CAAC;AACvB,YAAM,UAAe,UAAK,QAAQ,OAAO;AAEzC,cAAQ,aAAa,SAAS,IAAI,GAAG,MAAM,MAAM;AAGjD,YAAM,UAAa,gBAAa,SAAS,OAAO;AAChD,YAAM,OAAO,WAAW,QAAQ,EAAE,OAAO,OAAO,EAAE,OAAO,KAAK,EAAE,MAAM,GAAG,EAAE;AAG3E,YAAM,WAAW,KAAK,IAAI;AAAA,QACtB;AAAA,MACJ,EAAE,IAAI,YAAY,SAAS,IAAI;AAE/B,UAAI,UAAU;AACV;AACA;AAAA,MACJ;AAGA,WAAK,IAAI;AAAA,QACL;AAAA,MACJ,EAAE,IAAI,YAAY,OAAO;AAGzB,YAAM,QAAQ,KAAK,cAAc,SAAS,OAAO;AACjD,YAAM,SAAS,KAAK,YAAY,OAAO;AAGvC,YAAM,cAAc,KAAK,IAAI,QAAQ;AAAA;AAAA;AAAA,aAGpC;AAED,YAAM,WAAqB,CAAC;AAE5B,WAAK,IAAI,YAAY,MAAM;AACvB,iBAAS,MAAM,GAAG,MAAM,OAAO,QAAQ,OAAO;AAC1C,gBAAM,QAAQ,OAAO,GAAG;AACxB,gBAAM,SAAS,YAAY;AAAA,YACvB;AAAA,YAAY;AAAA,YAAS;AAAA,YAAO,MAAM;AAAA,YAAM;AAAA,YAAK,MAAM;AAAA,YAAK;AAAA,UAC5D;AACA,mBAAS,KAAK,OAAO,OAAO,eAAe,CAAC;AAAA,QAChD;AAAA,MACJ,CAAC;AAGD,YAAM,QAAQ,OAAO,IAAI,OAAK,UAAU,KAAK,YAAY,EAAE,IAAI,EAAE;AACjE,YAAM,aAAa,MAAM,KAAK,WAAW,WAAW,KAAK;AAGzD,YAAM,YAAY,KAAK,IAAI;AAAA,QACvB;AAAA,MACJ;AAEA,WAAK,IAAI,YAAY,MAAM;AACvB,iBAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACtC,gBAAM,MAAM,OAAO,KAAK,WAAW,CAAC,EAAE,MAAM;AAC5C,oBAAU,IAAI,SAAS,CAAC,GAAG,GAAG;AAC9B,eAAK,MAAM,IAAI,WAAW,CAAC,GAAG,SAAS,CAAC,CAAC;AACzC,eAAK,UAAU,IAAI,SAAS,CAAC,GAAG,WAAW,CAAC,CAAC;AAAA,QACjD;AAAA,MACJ,CAAC;AAED;AACA,qBAAe,OAAO;AAAA,IAC1B;AAEA,WAAO,EAAE,SAAS,SAAS,QAAQ,YAAY;AAAA,EACnD;AAAA;AAAA;AAAA;AAAA,EAKA,iBAAiB,YAA0B;AACvC,SAAK,IAAI,QAAQ,6CAA6C,EAAE,IAAI,UAAU;AAC9E,SAAK,IAAI,QAAQ,wCAAwC,EAAE,IAAI,UAAU;AACzE,SAAK,IAAI,QAAQ,gDAAgD,EAAE,IAAI,UAAU;AAAA,EACrF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQQ,YAAY,MAA+C;AAC/D,QAAI,KAAK,UAAU,cAAc;AAC7B,aAAO,CAAC,EAAE,MAAM,KAAK,EAAE,CAAC;AAAA,IAC5B;AAEA,UAAM,QAAQ,KAAK,MAAM,IAAI;AAC7B,UAAM,cAAc,KAAK,iBAAiB,KAAK;AAC/C,UAAM,SAA0C,CAAC;AAEjD,QAAI,aAAa;AACjB,QAAI,YAAY;AAEhB,WAAO,aAAa,KAAK,QAAQ;AAC7B,YAAM,YAAY,KAAK,SAAS;AAChC,UAAI,aAAa,eAAe,cAAc;AAE1C,cAAM,WAAW,KAAK,MAAM,UAAU,EAAE,KAAK;AAC7C,YAAI,SAAS,UAAU,iBAAiB;AACpC,iBAAO,KAAK,EAAE,MAAM,UAAU,KAAK,WAAW,CAAC;AAAA,QACnD,WAAW,OAAO,SAAS,GAAG;AAE1B,iBAAO,OAAO,SAAS,CAAC,EAAE,QAAQ,OAAO;AAAA,QAC7C,OAAO;AACH,iBAAO,KAAK,EAAE,MAAM,UAAU,KAAK,WAAW,CAAC;AAAA,QACnD;AACA;AAAA,MACJ;AAGA,YAAM,YAAY,aAAa;AAC/B,YAAM,cAAc,YAAY;AAEhC,UAAI,YAAY;AAChB,UAAI,YAAY;AAEhB,iBAAW,MAAM,aAAa;AAC1B,YAAI,GAAG,OAAO,WAAY;AAC1B,YAAI,GAAG,MAAM,YAAY,eAAe,EAAG;AAC3C,YAAI,GAAG,MAAM,YAAa;AAG1B,cAAM,WAAW,KAAK,IAAI,GAAG,MAAM,SAAS;AAC5C,cAAM,QAAQ,KAAK,WAAW,iBAAiB,IAAI;AACnD,cAAM,aAAa,GAAG,QAAQ;AAE9B,YAAI,aAAa,WAAW;AACxB,sBAAY;AACZ,sBAAY,GAAG;AAAA,QACnB;AAAA,MACJ;AAEA,YAAM,YAAY,KAAK,MAAM,YAAY,SAAS,EAAE,KAAK;AACzD,UAAI,UAAU,UAAU,iBAAiB;AACrC,eAAO,KAAK,EAAE,MAAM,WAAW,KAAK,WAAW,CAAC;AAAA,MACpD;AAEA,mBAAa;AAAA,IACjB;AAEA,WAAO;AAAA,EACX;AAAA;AAAA;AAAA;AAAA,EAKQ,iBAAiB,OAA+B;AACpD,UAAM,SAAuB,CAAC;AAC9B,QAAI,UAAU;AACd,QAAI,cAAc;AAElB,eAAW,QAAQ,OAAO;AAEtB,UAAI,KAAK,UAAU,EAAE,WAAW,KAAK,GAAG;AACpC,sBAAc,CAAC;AACf,YAAI,CAAC,aAAa;AAEd,iBAAO,KAAK,EAAE,KAAK,UAAU,KAAK,SAAS,GAAG,OAAO,GAAG,CAAC;AAAA,QAC7D;AACA,mBAAW,KAAK,SAAS;AACzB;AAAA,MACJ;AAGA,UAAI,aAAa;AACb,mBAAW,KAAK,SAAS;AACzB;AAAA,MACJ;AAGA,iBAAW,CAAC,SAAS,KAAK,KAAK,cAAc;AACzC,YAAI,QAAQ,KAAK,KAAK,KAAK,CAAC,GAAG;AAC3B,iBAAO,KAAK,EAAE,KAAK,SAAS,MAAM,CAAC;AACnC;AAAA,QACJ;AAAA,MACJ;AAEA,iBAAW,KAAK,SAAS;AAAA,IAC7B;AAEA,WAAO;AAAA,EACX;AAAA;AAAA;AAAA;AAAA,EAKQ,cAAc,SAAiB,UAA0B;AAC7D,UAAM,QAAQ,QAAQ,MAAM,kBAAkB;AAC9C,QAAI,MAAO,QAAO,MAAM,CAAC,EAAE,KAAK;AAChC,WAAY,cAAS,UAAe,aAAQ,QAAQ,CAAC;AAAA,EACzD;AACJ;;;AClRA,IAAM,iBAAN,MAAgD;AAAA,EAQ5C,YAAoB,OAA0B,CAAC,GAAG;AAA9B;AAAA,EAA+B;AAAA,EA1BvD,OAkBgD;AAAA;AAAA;AAAA,EACnC,OAAO;AAAA,EAChB;AAAA,EACA;AAAA,EACA,WAAW,oBAAI,IAA0B;AAAA,EACjC;AAAA,EACA;AAAA,EAIR,MAAM,WAAW,KAAmC;AAChD,SAAK,MAAM,IAAI;AACf,SAAK,aAAa,IAAI;AACtB,SAAK,OAAO,MAAM,IAAI,WAAW;AACjC,QAAI,YAAY,eAAe,YAAY,KAAK,MAAM,KAAK,QAAQ;AACnE,SAAK,UAAU,IAAI,WAAW,IAAI,IAAI,IAAI,WAAW,KAAK,MAAM,KAAK,QAAQ;AAAA,EACjF;AAAA;AAAA,EAGA,cAAc,YAAsC;AAChD,SAAK,IAAI,QAAQ;AAAA;AAAA;AAAA,SAGhB,EAAE;AAAA,MACC,WAAW;AAAA,MACX,WAAW;AAAA,MACX,WAAW,WAAW;AAAA,MACtB,KAAK,UAAU,WAAW,UAAU,CAAC,CAAC;AAAA,MACtC,WAAW,WAAW;AAAA,IAC1B;AAAA,EACJ;AAAA;AAAA,EAGA,iBAAiB,MAAoB;AACjC,SAAK,QAAQ,iBAAiB,IAAI;AAAA,EACtC;AAAA;AAAA,EAGA,kBAAwC;AACpC,WAAQ,KAAK,IAAI,QAAQ,2BAA2B,EAAE,IAAI,EAAY,IAAI,UAAQ;AAAA,MAC9E,MAAM,IAAI;AAAA,MACV,MAAM,IAAI;AAAA,MACV,SAAS,IAAI;AAAA,MACb,QAAQ,KAAK,MAAM,IAAI,WAAW;AAAA,MAClC,SAAS,IAAI;AAAA,IACjB,EAAE;AAAA,EACN;AAAA;AAAA,EAGA,MAAM,iBAAiB,UAGnB,CAAC,GAAkF;AACnF,UAAM,iBAAiB,KAAK,gBAAgB;AAC5C,UAAM,UAAU,QAAQ,cAClB,eAAe,OAAO,OAAK,QAAQ,YAAa,SAAS,EAAE,IAAI,CAAC,IAChE;AAEN,UAAM,UAAgF,CAAC;AAEvF,eAAW,QAAQ,SAAS;AACxB,cAAQ,KAAK,IAAI,IAAI,MAAM,KAAK,QAAQ;AAAA,QACpC,KAAK;AAAA,QACL,KAAK;AAAA,QACL,KAAK;AAAA,QACL;AAAA,UACI,QAAQ,KAAK;AAAA,UACb,YAAY,wBAAC,MAAM,KAAK,UAAU,QAAQ,aAAa,KAAK,MAAM,MAAM,KAAK,KAAK,GAAtE;AAAA,QAChB;AAAA,MACJ;AAAA,IACJ;AAEA,WAAO;AAAA,EACX;AAAA;AAAA,EAGA,MAAM,OAAO,OAAe,SAIA;AACxB,UAAM,IAAI,SAAS,KAAK;AACxB,UAAM,WAAW,MAAM,KAAK,WAAW,MAAM,KAAK;AAClD,UAAM,OAAO,KAAK,KAAK,OAAO,UAAU,CAAC;AAEzC,UAAM,UAA0B,CAAC;AACjC,eAAW,OAAO,MAAM;AACpB,UAAI,SAAS,YAAY,IAAI,QAAQ,QAAQ,SAAU;AAEvD,YAAM,QAAQ,KAAK,IAAI;AAAA,QACnB;AAAA,MACJ,EAAE,IAAI,IAAI,EAAE;AAEZ,UAAI,CAAC,MAAO;AACZ,UAAI,SAAS,cAAc,MAAM,eAAe,QAAQ,WAAY;AAEpE,YAAM,MAAM,KAAK,eAAe,MAAM,YAAY,MAAM,SAAS;AAEjE,cAAQ,KAAK;AAAA,QACT,MAAM;AAAA,QACN,OAAO,IAAI;AAAA,QACX,UAAU,MAAM;AAAA,QAChB,SAAS,MAAM;AAAA,QACf,SAAS;AAAA,QACT,UAAU;AAAA,UACN,YAAY,MAAM;AAAA,UAClB,OAAO,MAAM;AAAA,UACb,KAAK,MAAM;AAAA,QACf;AAAA,MACJ,CAAC;AAAA,IACL;AAEA,WAAO;AAAA,EACX;AAAA;AAAA,EAGA,WAAW,YAAoBA,OAAc,SAAuB;AAChE,SAAK,IAAI,QAAQ;AAAA;AAAA;AAAA,SAGhB,EAAE,IAAI,YAAYA,OAAM,OAAO;AAAA,EACpC;AAAA;AAAA,EAGA,cAAc,YAAoBA,OAAoB;AAClD,SAAK,IAAI;AAAA,MACL;AAAA,IACJ,EAAE,IAAI,YAAYA,KAAI;AAAA,EAC1B;AAAA;AAAA,EAGA,eAAwE;AACpE,WAAO,KAAK,IAAI,QAAQ,6BAA6B,EAAE,IAAI;AAAA,EAC/D;AAAA,EAEA,QAA6B;AACzB,WAAO;AAAA,MACH,aAAc,KAAK,IAAI,QAAQ,uCAAuC,EAAE,IAAI,EAAU;AAAA,MACtF,WAAY,KAAK,IAAI,QAAQ,uDAAuD,EAAE,IAAI,EAAU;AAAA,MACpG,QAAS,KAAK,IAAI,QAAQ,sCAAsC,EAAE,IAAI,EAAU;AAAA,MAChF,UAAU,KAAK,KAAK;AAAA,IACxB;AAAA,EACJ;AAAA;AAAA,EAGQ,eAAe,YAAoB,UAAsC;AAC7E,UAAM,QAAQ,SAAS,MAAM,GAAG;AAChC,aAAS,IAAI,MAAM,QAAQ,KAAK,GAAG,KAAK;AACpC,YAAM,YAAY,MAAM,IAAI,MAAM,MAAM,MAAM,MAAM,GAAG,CAAC,EAAE,KAAK,GAAG;AAClE,YAAM,MAAM,KAAK,IAAI;AAAA,QACjB;AAAA,MACJ,EAAE,IAAI,YAAY,SAAS;AAC3B,UAAI,IAAK,QAAO,IAAI;AAAA,IACxB;AAEA,UAAM,OAAO,KAAK,IAAI;AAAA,MAClB;AAAA,IACJ,EAAE,IAAI,UAAU;AAChB,WAAO,MAAM,WAAW;AAAA,EAC5B;AACJ;AAGO,SAAS,KAAK,MAA2C;AAC5D,SAAO,IAAI,eAAe,IAAI;AAClC;AAFgB;","names":["path"]}
|