brainbank 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +149 -16
- package/dist/{types-Da_zLLOl.d.ts → base-9vfWRHCV.d.ts} +131 -31
- package/dist/{chunk-YGSEUWLV.js → chunk-6MFTQV3O.js} +911 -674
- package/dist/chunk-6MFTQV3O.js.map +1 -0
- package/dist/chunk-7JCEW7LT.js +266 -0
- package/dist/chunk-7JCEW7LT.js.map +1 -0
- package/dist/{chunk-GOUBW7UA.js → chunk-F6SJ3U4H.js} +98 -34
- package/dist/chunk-F6SJ3U4H.js.map +1 -0
- package/dist/{chunk-MJ3Y24H6.js → chunk-FJJY4H2Y.js} +11 -11
- package/dist/chunk-FJJY4H2Y.js.map +1 -0
- package/dist/{chunk-3GAIDXRW.js → chunk-GUT5MSJT.js} +5 -11
- package/dist/chunk-GUT5MSJT.js.map +1 -0
- package/dist/{chunk-2P3EGY6S.js → chunk-QNHBCOKB.js} +2 -2
- package/dist/chunk-QNHBCOKB.js.map +1 -0
- package/dist/{chunk-4ZKBQ33J.js → chunk-V4UJKXPK.js} +23 -5
- package/dist/chunk-V4UJKXPK.js.map +1 -0
- package/dist/chunk-WR4WXKJT.js +723 -0
- package/dist/chunk-WR4WXKJT.js.map +1 -0
- package/dist/{chunk-Z5SU54HP.js → chunk-X6645UVR.js} +3 -3
- package/dist/chunk-X6645UVR.js.map +1 -0
- package/dist/cli.js +150 -100
- package/dist/cli.js.map +1 -1
- package/dist/code.d.ts +5 -5
- package/dist/code.js +1 -1
- package/dist/docs.d.ts +4 -6
- package/dist/docs.js +1 -1
- package/dist/git.d.ts +5 -5
- package/dist/git.js +1 -1
- package/dist/index.d.ts +95 -104
- package/dist/index.js +13 -13
- package/dist/memory.d.ts +5 -7
- package/dist/memory.js +9 -12
- package/dist/memory.js.map +1 -1
- package/dist/notes.d.ts +4 -6
- package/dist/notes.js +7 -10
- package/dist/notes.js.map +1 -1
- package/dist/{openai-PCTYLOWI.js → openai-CYDMYX7X.js} +2 -2
- package/package.json +24 -4
- package/dist/chunk-2P3EGY6S.js.map +0 -1
- package/dist/chunk-3GAIDXRW.js.map +0 -1
- package/dist/chunk-4ZKBQ33J.js.map +0 -1
- package/dist/chunk-EDKSKLX4.js +0 -490
- package/dist/chunk-EDKSKLX4.js.map +0 -1
- package/dist/chunk-GOUBW7UA.js.map +0 -1
- package/dist/chunk-MJ3Y24H6.js.map +0 -1
- package/dist/chunk-N6ZMBFDE.js +0 -224
- package/dist/chunk-N6ZMBFDE.js.map +0 -1
- package/dist/chunk-YGSEUWLV.js.map +0 -1
- package/dist/chunk-Z5SU54HP.js.map +0 -1
- /package/dist/{openai-PCTYLOWI.js.map → openai-CYDMYX7X.js.map} +0 -0
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
import {
|
|
2
|
+
__name
|
|
3
|
+
} from "./chunk-7QVYU63E.js";
|
|
4
|
+
|
|
5
|
+
// src/indexers/git/git-indexer.ts
|
|
6
|
+
var GitIndexer = class {
|
|
7
|
+
static {
|
|
8
|
+
__name(this, "GitIndexer");
|
|
9
|
+
}
|
|
10
|
+
_deps;
|
|
11
|
+
_repoPath;
|
|
12
|
+
_maxDiffBytes;
|
|
13
|
+
constructor(repoPath, deps, maxDiffBytes = 8192) {
|
|
14
|
+
this._deps = deps;
|
|
15
|
+
this._repoPath = repoPath;
|
|
16
|
+
this._maxDiffBytes = maxDiffBytes;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Index git history.
|
|
20
|
+
* Only processes commits not already in the database.
|
|
21
|
+
*/
|
|
22
|
+
async index(options = {}) {
|
|
23
|
+
const { depth = 500, onProgress } = options;
|
|
24
|
+
let git2;
|
|
25
|
+
try {
|
|
26
|
+
const simpleGit = (await import("simple-git")).default;
|
|
27
|
+
git2 = simpleGit(this._repoPath);
|
|
28
|
+
} catch {
|
|
29
|
+
return { indexed: 0, skipped: 0 };
|
|
30
|
+
}
|
|
31
|
+
let log;
|
|
32
|
+
try {
|
|
33
|
+
log = await git2.log({ maxCount: depth });
|
|
34
|
+
} catch {
|
|
35
|
+
return { indexed: 0, skipped: 0 };
|
|
36
|
+
}
|
|
37
|
+
const commits = log.all;
|
|
38
|
+
let indexed = 0, skipped = 0;
|
|
39
|
+
const stmtCheck = this._deps.db.prepare(
|
|
40
|
+
`SELECT gc.id, gv.commit_id AS has_vector
|
|
41
|
+
FROM git_commits gc
|
|
42
|
+
LEFT JOIN git_vectors gv ON gv.commit_id = gc.id
|
|
43
|
+
WHERE gc.hash = ?`
|
|
44
|
+
);
|
|
45
|
+
const stmtDeleteFiles = this._deps.db.prepare("DELETE FROM commit_files WHERE commit_id = ?");
|
|
46
|
+
const stmtDeleteCommit = this._deps.db.prepare("DELETE FROM git_commits WHERE id = ?");
|
|
47
|
+
const stmtInsertCommit = this._deps.db.prepare(`
|
|
48
|
+
INSERT OR IGNORE INTO git_commits (hash, short_hash, message, author, date, timestamp, files_json, diff, additions, deletions, is_merge)
|
|
49
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
50
|
+
`);
|
|
51
|
+
const stmtInsertFile = this._deps.db.prepare(
|
|
52
|
+
"INSERT INTO commit_files (commit_id, file_path) VALUES (?, ?)"
|
|
53
|
+
);
|
|
54
|
+
const stmtInsertVec = this._deps.db.prepare(
|
|
55
|
+
"INSERT OR IGNORE INTO git_vectors (commit_id, embedding) VALUES (?, ?)"
|
|
56
|
+
);
|
|
57
|
+
const toProcess = [];
|
|
58
|
+
for (let i = 0; i < commits.length; i++) {
|
|
59
|
+
const c = commits[i];
|
|
60
|
+
onProgress?.(`[${c.hash.slice(0, 7)}] ${c.message.slice(0, 50)}`, i + 1, commits.length);
|
|
61
|
+
const exists = stmtCheck.get(c.hash);
|
|
62
|
+
if (exists?.has_vector) {
|
|
63
|
+
skipped++;
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
if (exists && !exists.has_vector) {
|
|
67
|
+
stmtDeleteFiles.run(exists.id);
|
|
68
|
+
stmtDeleteCommit.run(exists.id);
|
|
69
|
+
}
|
|
70
|
+
let diff = "";
|
|
71
|
+
let additions = 0, deletions = 0;
|
|
72
|
+
const filesChanged = [];
|
|
73
|
+
try {
|
|
74
|
+
const numstat = await git2.raw(["show", "--numstat", "--format=", c.hash]);
|
|
75
|
+
for (const line of numstat.trim().split("\n")) {
|
|
76
|
+
if (!line.trim()) continue;
|
|
77
|
+
const parts = line.split(" ");
|
|
78
|
+
if (parts.length < 3) continue;
|
|
79
|
+
const add = parseInt(parts[0], 10);
|
|
80
|
+
const del = parseInt(parts[1], 10);
|
|
81
|
+
const file = parts[2].trim();
|
|
82
|
+
if (file) {
|
|
83
|
+
filesChanged.push(file);
|
|
84
|
+
if (!isNaN(add)) additions += add;
|
|
85
|
+
if (!isNaN(del)) deletions += del;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
const rawDiff = await git2.raw(["show", "--format=", "--unified=3", "--no-color", c.hash]);
|
|
89
|
+
diff = rawDiff.length > this._maxDiffBytes ? rawDiff.slice(0, this._maxDiffBytes) + "\n... [truncated]" : rawDiff;
|
|
90
|
+
} catch {
|
|
91
|
+
}
|
|
92
|
+
const isMerge = /^(Merge|merge)\s+(branch|pull|remote|tag)\b/.test(c.message);
|
|
93
|
+
const text = [
|
|
94
|
+
`Commit: ${c.message}`,
|
|
95
|
+
`Author: ${c.author_name}`,
|
|
96
|
+
`Date: ${c.date}`,
|
|
97
|
+
filesChanged.length > 0 ? `Files: ${filesChanged.join(", ")}` : "",
|
|
98
|
+
diff ? `Changes:
|
|
99
|
+
${diff.slice(0, 2e3)}` : ""
|
|
100
|
+
].filter(Boolean).join("\n");
|
|
101
|
+
toProcess.push({ commit: c, diff, additions, deletions, filesChanged, isMerge, text });
|
|
102
|
+
}
|
|
103
|
+
if (toProcess.length === 0) return { indexed, skipped };
|
|
104
|
+
const embedTexts = toProcess.map((d) => d.text);
|
|
105
|
+
const vecs = await this._deps.embedding.embedBatch(embedTexts);
|
|
106
|
+
const inserted = [];
|
|
107
|
+
this._deps.db.transaction(() => {
|
|
108
|
+
for (let i = 0; i < toProcess.length; i++) {
|
|
109
|
+
const d = toProcess[i];
|
|
110
|
+
const c = d.commit;
|
|
111
|
+
const ts = Math.floor(new Date(c.date).getTime() / 1e3);
|
|
112
|
+
const result = stmtInsertCommit.run(
|
|
113
|
+
c.hash,
|
|
114
|
+
c.hash.slice(0, 7),
|
|
115
|
+
c.message,
|
|
116
|
+
c.author_name,
|
|
117
|
+
c.date,
|
|
118
|
+
ts,
|
|
119
|
+
JSON.stringify(d.filesChanged),
|
|
120
|
+
d.diff || null,
|
|
121
|
+
d.additions,
|
|
122
|
+
d.deletions,
|
|
123
|
+
d.isMerge ? 1 : 0
|
|
124
|
+
);
|
|
125
|
+
if (result.changes === 0) {
|
|
126
|
+
skipped++;
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
const commitId = Number(result.lastInsertRowid);
|
|
130
|
+
for (const f of d.filesChanged) {
|
|
131
|
+
stmtInsertFile.run(commitId, f);
|
|
132
|
+
}
|
|
133
|
+
stmtInsertVec.run(commitId, Buffer.from(vecs[i].buffer));
|
|
134
|
+
inserted.push({ commitId, vecIndex: i });
|
|
135
|
+
indexed++;
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
const newCommitIds = [];
|
|
139
|
+
for (const { commitId, vecIndex } of inserted) {
|
|
140
|
+
this._deps.hnsw.add(vecs[vecIndex], commitId);
|
|
141
|
+
this._deps.vectorCache.set(commitId, vecs[vecIndex]);
|
|
142
|
+
newCommitIds.push(commitId);
|
|
143
|
+
}
|
|
144
|
+
if (newCommitIds.length > 0) {
|
|
145
|
+
this._computeCoEdits(newCommitIds);
|
|
146
|
+
}
|
|
147
|
+
return { indexed, skipped };
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Compute which files tend to be edited together.
|
|
151
|
+
* Stored in the co_edits table for later suggestion.
|
|
152
|
+
*/
|
|
153
|
+
_computeCoEdits(newCommitIds) {
|
|
154
|
+
if (newCommitIds.length === 0) return;
|
|
155
|
+
const CHUNK_SIZE = 500;
|
|
156
|
+
const allRows = [];
|
|
157
|
+
for (let i = 0; i < newCommitIds.length; i += CHUNK_SIZE) {
|
|
158
|
+
const chunk = newCommitIds.slice(i, i + CHUNK_SIZE);
|
|
159
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
160
|
+
const rows2 = this._deps.db.prepare(
|
|
161
|
+
`SELECT commit_id, file_path FROM commit_files WHERE commit_id IN (${placeholders}) ORDER BY commit_id`
|
|
162
|
+
).all(...chunk);
|
|
163
|
+
allRows.push(...rows2);
|
|
164
|
+
}
|
|
165
|
+
const rows = allRows;
|
|
166
|
+
const byCommit = /* @__PURE__ */ new Map();
|
|
167
|
+
for (const r of rows) {
|
|
168
|
+
if (!byCommit.has(r.commit_id)) byCommit.set(r.commit_id, []);
|
|
169
|
+
byCommit.get(r.commit_id).push(r.file_path);
|
|
170
|
+
}
|
|
171
|
+
const upsert = this._deps.db.prepare(
|
|
172
|
+
`INSERT INTO co_edits (file_a, file_b, count)
|
|
173
|
+
VALUES (?, ?, 1)
|
|
174
|
+
ON CONFLICT(file_a, file_b) DO UPDATE SET count = count + 1`
|
|
175
|
+
);
|
|
176
|
+
this._deps.db.transaction(() => {
|
|
177
|
+
for (const files of byCommit.values()) {
|
|
178
|
+
if (files.length < 2 || files.length > 20) continue;
|
|
179
|
+
for (let i = 0; i < files.length; i++) {
|
|
180
|
+
for (let j = i + 1; j < files.length; j++) {
|
|
181
|
+
const [a, b] = [files[i], files[j]].sort();
|
|
182
|
+
upsert.run(a, b);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
// src/indexers/git/co-edit-analyzer.ts
|
|
191
|
+
var CoEditAnalyzer = class {
|
|
192
|
+
constructor(_db) {
|
|
193
|
+
this._db = _db;
|
|
194
|
+
}
|
|
195
|
+
static {
|
|
196
|
+
__name(this, "CoEditAnalyzer");
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Get files that frequently change alongside the given file.
|
|
200
|
+
* Returns sorted by co-edit count (highest first).
|
|
201
|
+
*/
|
|
202
|
+
suggest(filePath, limit = 5) {
|
|
203
|
+
const rows = this._db.prepare(`
|
|
204
|
+
SELECT
|
|
205
|
+
CASE WHEN file_a = ? THEN file_b ELSE file_a END AS file,
|
|
206
|
+
count
|
|
207
|
+
FROM co_edits
|
|
208
|
+
WHERE file_a = ? OR file_b = ?
|
|
209
|
+
ORDER BY count DESC
|
|
210
|
+
LIMIT ?
|
|
211
|
+
`).all(filePath, filePath, filePath, limit);
|
|
212
|
+
return rows.map((r) => ({ file: r.file, count: r.count }));
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
// src/indexers/git/git-plugin.ts
|
|
217
|
+
var GitPlugin = class {
|
|
218
|
+
constructor(opts = {}) {
|
|
219
|
+
this.opts = opts;
|
|
220
|
+
this.name = opts.name ?? "git";
|
|
221
|
+
}
|
|
222
|
+
static {
|
|
223
|
+
__name(this, "GitPlugin");
|
|
224
|
+
}
|
|
225
|
+
name;
|
|
226
|
+
hnsw;
|
|
227
|
+
indexer;
|
|
228
|
+
coEdits;
|
|
229
|
+
vecCache = /* @__PURE__ */ new Map();
|
|
230
|
+
async initialize(ctx) {
|
|
231
|
+
const shared = await ctx.getOrCreateSharedHnsw("git", 5e5);
|
|
232
|
+
this.hnsw = shared.hnsw;
|
|
233
|
+
this.vecCache = shared.vecCache;
|
|
234
|
+
if (shared.isNew) {
|
|
235
|
+
ctx.loadVectors("git_vectors", "commit_id", this.hnsw, this.vecCache);
|
|
236
|
+
}
|
|
237
|
+
const repoPath = this.opts.repoPath ?? ctx.config.repoPath;
|
|
238
|
+
this.indexer = new GitIndexer(repoPath, {
|
|
239
|
+
db: ctx.db,
|
|
240
|
+
hnsw: this.hnsw,
|
|
241
|
+
vectorCache: this.vecCache,
|
|
242
|
+
embedding: ctx.embedding
|
|
243
|
+
}, this.opts.maxDiffBytes ?? ctx.config.maxDiffBytes);
|
|
244
|
+
this.coEdits = new CoEditAnalyzer(ctx.db);
|
|
245
|
+
}
|
|
246
|
+
async index(options = {}) {
|
|
247
|
+
return this.indexer.index(options);
|
|
248
|
+
}
|
|
249
|
+
suggestCoEdits(filePath, limit = 5) {
|
|
250
|
+
return this.coEdits.suggest(filePath, limit);
|
|
251
|
+
}
|
|
252
|
+
stats() {
|
|
253
|
+
return { hnswSize: this.hnsw.size };
|
|
254
|
+
}
|
|
255
|
+
};
|
|
256
|
+
function git(opts) {
|
|
257
|
+
return new GitPlugin(opts);
|
|
258
|
+
}
|
|
259
|
+
__name(git, "git");
|
|
260
|
+
|
|
261
|
+
export {
|
|
262
|
+
GitIndexer,
|
|
263
|
+
CoEditAnalyzer,
|
|
264
|
+
git
|
|
265
|
+
};
|
|
266
|
+
//# sourceMappingURL=chunk-7JCEW7LT.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/indexers/git/git-indexer.ts","../src/indexers/git/co-edit-analyzer.ts","../src/indexers/git/git-plugin.ts"],"sourcesContent":["/**\n * BrainBank — Git Indexer\n * \n * Reads git history, embeds commit messages + diffs,\n * and computes file co-edit relationships.\n * Incremental: only processes new commits.\n */\n\nimport type { Database } from '../../db/database.ts';\nimport type { EmbeddingProvider, ProgressCallback, IndexResult } from '../../types.ts';\nimport type { HNSWIndex } from '../../providers/vector/hnsw.ts';\n\nexport interface GitIndexerDeps {\n db: Database;\n hnsw: HNSWIndex;\n vectorCache: Map<number, Float32Array>;\n embedding: EmbeddingProvider;\n}\n\nexport interface GitIndexOptions {\n depth?: number;\n onProgress?: ProgressCallback;\n}\n\nexport class GitIndexer {\n private _deps: GitIndexerDeps;\n private _repoPath: string;\n private _maxDiffBytes: number;\n\n constructor(repoPath: string, deps: GitIndexerDeps, maxDiffBytes: number = 8192) {\n this._deps = deps;\n this._repoPath = repoPath;\n this._maxDiffBytes = maxDiffBytes;\n }\n\n /**\n * Index git history.\n * Only processes commits not already in the database.\n */\n async index(options: GitIndexOptions = {}): Promise<IndexResult> {\n const { depth = 500, onProgress } = options;\n\n let git: any;\n try {\n const simpleGit = (await import('simple-git')).default;\n git = simpleGit(this._repoPath);\n } catch {\n return { indexed: 0, skipped: 0 };\n }\n\n let log: any;\n try { log = await git.log({ maxCount: depth }); }\n catch { return { indexed: 0, skipped: 0 }; }\n\n const commits = log.all;\n let indexed = 0, skipped = 0;\n\n // ── Prepared statements (hoisted outside loop) ──────────────\n const stmtCheck = this._deps.db.prepare(\n `SELECT gc.id, gv.commit_id AS has_vector\n FROM git_commits gc\n LEFT JOIN git_vectors gv ON gv.commit_id = gc.id\n WHERE gc.hash = ?`\n );\n const stmtDeleteFiles = this._deps.db.prepare('DELETE FROM commit_files WHERE commit_id = ?');\n const stmtDeleteCommit = this._deps.db.prepare('DELETE FROM git_commits WHERE id = ?');\n const stmtInsertCommit = this._deps.db.prepare(`\n INSERT OR IGNORE INTO git_commits (hash, short_hash, message, author, date, timestamp, files_json, diff, additions, deletions, is_merge)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\n `);\n const stmtInsertFile = this._deps.db.prepare(\n 'INSERT INTO commit_files (commit_id, file_path) VALUES (?, ?)'\n );\n const stmtInsertVec = this._deps.db.prepare(\n 'INSERT OR IGNORE INTO git_vectors (commit_id, embedding) VALUES (?, ?)'\n );\n\n // ── Phase 1: Collect commit data (async git calls) ──────────\n interface CommitData {\n commit: any;\n diff: string;\n additions: number;\n deletions: number;\n filesChanged: string[];\n isMerge: boolean;\n text: string;\n }\n const toProcess: CommitData[] = [];\n\n for (let i = 0; i < commits.length; i++) {\n const c = commits[i];\n onProgress?.(`[${c.hash.slice(0, 7)}] ${c.message.slice(0, 50)}`, i + 1, commits.length);\n\n // Skip if already indexed WITH vector\n const exists = stmtCheck.get(c.hash) as any;\n if (exists?.has_vector) { skipped++; continue; }\n\n // Zombie commit (data exists but vector missing) — clean up\n if (exists && !exists.has_vector) {\n stmtDeleteFiles.run(exists.id);\n stmtDeleteCommit.run(exists.id);\n }\n\n // Get diff and stat (async git calls)\n let diff = '';\n let additions = 0, deletions = 0;\n const filesChanged: string[] = [];\n\n try {\n const numstat = await git.raw(['show', '--numstat', '--format=', c.hash]);\n for (const line of numstat.trim().split('\\n')) {\n if (!line.trim()) continue;\n const parts = line.split('\\t');\n if (parts.length < 3) continue;\n const add = parseInt(parts[0], 10);\n const del = parseInt(parts[1], 10);\n const file = parts[2].trim();\n if (file) {\n filesChanged.push(file);\n if (!isNaN(add)) additions += add;\n if (!isNaN(del)) deletions += del;\n }\n }\n\n const rawDiff = await git.raw(['show', '--format=', '--unified=3', '--no-color', c.hash]);\n diff = rawDiff.length > this._maxDiffBytes\n ? rawDiff.slice(0, this._maxDiffBytes) + '\\n... [truncated]'\n : rawDiff;\n } catch {}\n\n const isMerge = /^(Merge|merge)\\s+(branch|pull|remote|tag)\\b/.test(c.message);\n const text = [\n `Commit: ${c.message}`,\n `Author: ${c.author_name}`,\n `Date: ${c.date}`,\n filesChanged.length > 0 ? `Files: ${filesChanged.join(', ')}` : '',\n diff ? `Changes:\\n${diff.slice(0, 2000)}` : '',\n ].filter(Boolean).join('\\n');\n\n toProcess.push({ commit: c, diff, additions, deletions, filesChanged, isMerge, text });\n }\n\n if (toProcess.length === 0) return { indexed, skipped };\n\n // ── Phase 2: Batch embed all commit texts at once ───────────\n const embedTexts = toProcess.map(d => d.text);\n const vecs = await this._deps.embedding.embedBatch(embedTexts);\n\n // ── Phase 3: Insert all data in a single transaction ────────\n // Track which items actually got inserted (INSERT OR IGNORE may skip)\n const inserted: { commitId: number; vecIndex: number }[] = [];\n\n this._deps.db.transaction(() => {\n for (let i = 0; i < toProcess.length; i++) {\n const d = toProcess[i];\n const c = d.commit;\n const ts = Math.floor(new Date(c.date).getTime() / 1000);\n\n const result = stmtInsertCommit.run(\n c.hash, c.hash.slice(0, 7), c.message, c.author_name, c.date,\n ts, JSON.stringify(d.filesChanged), d.diff || null,\n d.additions, d.deletions, d.isMerge ? 1 : 0,\n );\n\n if (result.changes === 0) { skipped++; continue; }\n const commitId = Number(result.lastInsertRowid);\n\n for (const f of d.filesChanged) {\n stmtInsertFile.run(commitId, f);\n }\n\n stmtInsertVec.run(commitId, Buffer.from(vecs[i].buffer));\n inserted.push({ commitId, vecIndex: i });\n indexed++;\n }\n });\n\n // ── Phase 4: HNSW updated AFTER transaction commit ──────────\n const newCommitIds: number[] = [];\n for (const { commitId, vecIndex } of inserted) {\n this._deps.hnsw.add(vecs[vecIndex], commitId);\n this._deps.vectorCache.set(commitId, vecs[vecIndex]);\n newCommitIds.push(commitId);\n }\n\n // Compute co-edits only for new commits\n if (newCommitIds.length > 0) {\n this._computeCoEdits(newCommitIds);\n }\n\n return { indexed, skipped };\n }\n\n /**\n * Compute which files tend to be edited together.\n * Stored in the co_edits table for later suggestion.\n */\n private _computeCoEdits(newCommitIds: number[]): void {\n if (newCommitIds.length === 0) return;\n\n // Chunk queries to stay under SQLite's 999-variable limit\n const CHUNK_SIZE = 500;\n const allRows: any[] = [];\n for (let i = 0; i < newCommitIds.length; i += CHUNK_SIZE) {\n const chunk = newCommitIds.slice(i, i + CHUNK_SIZE);\n const placeholders = chunk.map(() => '?').join(',');\n const rows = this._deps.db.prepare(\n `SELECT commit_id, file_path FROM commit_files WHERE commit_id IN (${placeholders}) ORDER BY commit_id`\n ).all(...chunk) as any[];\n allRows.push(...rows);\n }\n const rows = allRows;\n\n const byCommit = new Map<number, string[]>();\n for (const r of rows) {\n if (!byCommit.has(r.commit_id)) byCommit.set(r.commit_id, []);\n byCommit.get(r.commit_id)!.push(r.file_path);\n }\n\n const upsert = this._deps.db.prepare(\n `INSERT INTO co_edits (file_a, file_b, count)\n VALUES (?, ?, 1)\n ON CONFLICT(file_a, file_b) DO UPDATE SET count = count + 1`\n );\n\n this._deps.db.transaction(() => {\n for (const files of byCommit.values()) {\n // Skip very small or very large changesets\n if (files.length < 2 || files.length > 20) continue;\n for (let i = 0; i < files.length; i++) {\n for (let j = i + 1; j < files.length; j++) {\n const [a, b] = [files[i], files[j]].sort();\n upsert.run(a, b);\n }\n }\n }\n });\n }\n}\n","/**\n * BrainBank — Co-Edit Analyzer\n * \n * Suggests files that historically change together.\n * Based on git commit co-occurrence analysis.\n */\n\nimport type { Database } from '../../db/database.ts';\nimport type { CoEditSuggestion } from '../../types.ts';\n\nexport class CoEditAnalyzer {\n constructor(private _db: Database) {}\n\n /**\n * Get files that frequently change alongside the given file.\n * Returns sorted by co-edit count (highest first).\n */\n suggest(filePath: string, limit: number = 5): CoEditSuggestion[] {\n const rows = this._db.prepare(`\n SELECT\n CASE WHEN file_a = ? THEN file_b ELSE file_a END AS file,\n count\n FROM co_edits\n WHERE file_a = ? OR file_b = ?\n ORDER BY count DESC\n LIMIT ?\n `).all(filePath, filePath, filePath, limit) as any[];\n\n return rows.map(r => ({ file: r.file, count: r.count }));\n }\n}\n","/**\n * BrainBank — Git Module\n * \n * Git history indexing with co-edit relationships.\n * \n * import { git } from 'brainbank/git';\n * brain.use(git({ depth: 500 }));\n * \n * // Multi-repo: namespace to avoid key collisions\n * brain\n * .use(git({ repoPath: './frontend', name: 'git:frontend' }))\n * .use(git({ repoPath: './backend', name: 'git:backend' }));\n */\n\nimport type { Indexer, IndexerContext } from '../base.ts';\nimport type { HNSWIndex } from '../../providers/vector/hnsw.ts';\nimport { GitIndexer } from './git-indexer.ts';\nimport { CoEditAnalyzer } from './co-edit-analyzer.ts';\nimport type { IndexResult, ProgressCallback, CoEditSuggestion } from '../../types.ts';\n\nexport interface GitPluginOptions {\n /** Repository path. Default: from config */\n repoPath?: string;\n /** Max commits to index. Default: from config */\n depth?: number;\n /** Max diff bytes. Default: from config */\n maxDiffBytes?: number;\n /** Custom indexer name for multi-repo (e.g. 'git:frontend'). Default: 'git' */\n name?: string;\n}\n\nclass GitPlugin implements Indexer {\n readonly name: string;\n hnsw!: HNSWIndex;\n indexer!: GitIndexer;\n coEdits!: CoEditAnalyzer;\n vecCache = new Map<number, Float32Array>();\n\n constructor(private opts: GitPluginOptions = {}) {\n this.name = opts.name ?? 'git';\n }\n\n async initialize(ctx: IndexerContext): Promise<void> {\n // Use shared HNSW so all git indexers share one index\n const shared = await ctx.getOrCreateSharedHnsw('git', 500_000);\n this.hnsw = shared.hnsw;\n this.vecCache = shared.vecCache;\n\n if (shared.isNew) {\n ctx.loadVectors('git_vectors', 'commit_id', this.hnsw, this.vecCache);\n }\n\n const repoPath = this.opts.repoPath ?? ctx.config.repoPath;\n this.indexer = new GitIndexer(repoPath, {\n db: ctx.db,\n hnsw: this.hnsw,\n vectorCache: this.vecCache,\n embedding: ctx.embedding,\n }, this.opts.maxDiffBytes ?? ctx.config.maxDiffBytes);\n\n this.coEdits = new CoEditAnalyzer(ctx.db);\n }\n\n async index(options: {\n depth?: number;\n onProgress?: ProgressCallback;\n } = {}): Promise<IndexResult> {\n return this.indexer.index(options);\n }\n\n suggestCoEdits(filePath: string, limit: number = 5): CoEditSuggestion[] {\n return this.coEdits.suggest(filePath, limit);\n }\n\n stats(): Record<string, any> {\n return { hnswSize: this.hnsw.size };\n }\n}\n\n/** Create a git history plugin. */\nexport function git(opts?: GitPluginOptions): Indexer {\n return new GitPlugin(opts);\n}\n"],"mappings":";;;;;AAwBO,IAAM,aAAN,MAAiB;AAAA,EAxBxB,OAwBwB;AAAA;AAAA;AAAA,EACZ;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,UAAkB,MAAsB,eAAuB,MAAM;AAC7E,SAAK,QAAQ;AACb,SAAK,YAAY;AACjB,SAAK,gBAAgB;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,MAAM,UAA2B,CAAC,GAAyB;AAC7D,UAAM,EAAE,QAAQ,KAAK,WAAW,IAAI;AAEpC,QAAIA;AACJ,QAAI;AACA,YAAM,aAAa,MAAM,OAAO,YAAY,GAAG;AAC/C,MAAAA,OAAM,UAAU,KAAK,SAAS;AAAA,IAClC,QAAQ;AACJ,aAAO,EAAE,SAAS,GAAG,SAAS,EAAE;AAAA,IACpC;AAEA,QAAI;AACJ,QAAI;AAAE,YAAM,MAAMA,KAAI,IAAI,EAAE,UAAU,MAAM,CAAC;AAAA,IAAG,QAC1C;AAAE,aAAO,EAAE,SAAS,GAAG,SAAS,EAAE;AAAA,IAAG;AAE3C,UAAM,UAAU,IAAI;AACpB,QAAI,UAAU,GAAG,UAAU;AAG3B,UAAM,YAAY,KAAK,MAAM,GAAG;AAAA,MAC5B;AAAA;AAAA;AAAA;AAAA,IAIJ;AACA,UAAM,kBAAkB,KAAK,MAAM,GAAG,QAAQ,8CAA8C;AAC5F,UAAM,mBAAmB,KAAK,MAAM,GAAG,QAAQ,sCAAsC;AACrF,UAAM,mBAAmB,KAAK,MAAM,GAAG,QAAQ;AAAA;AAAA;AAAA,SAG9C;AACD,UAAM,iBAAiB,KAAK,MAAM,GAAG;AAAA,MACjC;AAAA,IACJ;AACA,UAAM,gBAAgB,KAAK,MAAM,GAAG;AAAA,MAChC;AAAA,IACJ;AAYA,UAAM,YAA0B,CAAC;AAEjC,aAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACrC,YAAM,IAAI,QAAQ,CAAC;AACnB,mBAAa,IAAI,EAAE,KAAK,MAAM,GAAG,CAAC,CAAC,KAAK,EAAE,QAAQ,MAAM,GAAG,EAAE,CAAC,IAAI,IAAI,GAAG,QAAQ,MAAM;AAGvF,YAAM,SAAS,UAAU,IAAI,EAAE,IAAI;AACnC,UAAI,QAAQ,YAAY;AAAE;AAAW;AAAA,MAAU;AAG/C,UAAI,UAAU,CAAC,OAAO,YAAY;AAC9B,wBAAgB,IAAI,OAAO,EAAE;AAC7B,yBAAiB,IAAI,OAAO,EAAE;AAAA,MAClC;AAGA,UAAI,OAAO;AACX,UAAI,YAAY,GAAG,YAAY;AAC/B,YAAM,eAAyB,CAAC;AAEhC,UAAI;AACA,cAAM,UAAU,MAAMA,KAAI,IAAI,CAAC,QAAQ,aAAa,aAAa,EAAE,IAAI,CAAC;AACxE,mBAAW,QAAQ,QAAQ,KAAK,EAAE,MAAM,IAAI,GAAG;AAC3C,cAAI,CAAC,KAAK,KAAK,EAAG;AAClB,gBAAM,QAAQ,KAAK,MAAM,GAAI;AAC7B,cAAI,MAAM,SAAS,EAAG;AACtB,gBAAM,MAAM,SAAS,MAAM,CAAC,GAAG,EAAE;AACjC,gBAAM,MAAM,SAAS,MAAM,CAAC,GAAG,EAAE;AACjC,gBAAM,OAAO,MAAM,CAAC,EAAE,KAAK;AAC3B,cAAI,MAAM;AACN,yBAAa,KAAK,IAAI;AACtB,gBAAI,CAAC,MAAM,GAAG,EAAG,cAAa;AAC9B,gBAAI,CAAC,MAAM,GAAG,EAAG,cAAa;AAAA,UAClC;AAAA,QACJ;AAEA,cAAM,UAAU,MAAMA,KAAI,IAAI,CAAC,QAAQ,aAAa,eAAe,cAAc,EAAE,IAAI,CAAC;AACxF,eAAO,QAAQ,SAAS,KAAK,gBACvB,QAAQ,MAAM,GAAG,KAAK,aAAa,IAAI,sBACvC;AAAA,MACV,QAAQ;AAAA,MAAC;AAET,YAAM,UAAU,8CAA8C,KAAK,EAAE,OAAO;AAC5E,YAAM,OAAO;AAAA,QACT,WAAW,EAAE,OAAO;AAAA,QACpB,WAAW,EAAE,WAAW;AAAA,QACxB,SAAS,EAAE,IAAI;AAAA,QACf,aAAa,SAAS,IAAI,UAAU,aAAa,KAAK,IAAI,CAAC,KAAK;AAAA,QAChE,OAAO;AAAA,EAAa,KAAK,MAAM,GAAG,GAAI,CAAC,KAAK;AAAA,MAChD,EAAE,OAAO,OAAO,EAAE,KAAK,IAAI;AAE3B,gBAAU,KAAK,EAAE,QAAQ,GAAG,MAAM,WAAW,WAAW,cAAc,SAAS,KAAK,CAAC;AAAA,IACzF;AAEA,QAAI,UAAU,WAAW,EAAG,QAAO,EAAE,SAAS,QAAQ;AAGtD,UAAM,aAAa,UAAU,IAAI,OAAK,EAAE,IAAI;AAC5C,UAAM,OAAO,MAAM,KAAK,MAAM,UAAU,WAAW,UAAU;AAI7D,UAAM,WAAqD,CAAC;AAE5D,SAAK,MAAM,GAAG,YAAY,MAAM;AAC5B,eAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AACvC,cAAM,IAAI,UAAU,CAAC;AACrB,cAAM,IAAI,EAAE;AACZ,cAAM,KAAK,KAAK,MAAM,IAAI,KAAK,EAAE,IAAI,EAAE,QAAQ,IAAI,GAAI;AAEvD,cAAM,SAAS,iBAAiB;AAAA,UAC5B,EAAE;AAAA,UAAM,EAAE,KAAK,MAAM,GAAG,CAAC;AAAA,UAAG,EAAE;AAAA,UAAS,EAAE;AAAA,UAAa,EAAE;AAAA,UACxD;AAAA,UAAI,KAAK,UAAU,EAAE,YAAY;AAAA,UAAG,EAAE,QAAQ;AAAA,UAC9C,EAAE;AAAA,UAAW,EAAE;AAAA,UAAW,EAAE,UAAU,IAAI;AAAA,QAC9C;AAEA,YAAI,OAAO,YAAY,GAAG;AAAE;AAAW;AAAA,QAAU;AACjD,cAAM,WAAW,OAAO,OAAO,eAAe;AAE9C,mBAAW,KAAK,EAAE,cAAc;AAC5B,yBAAe,IAAI,UAAU,CAAC;AAAA,QAClC;AAEA,sBAAc,IAAI,UAAU,OAAO,KAAK,KAAK,CAAC,EAAE,MAAM,CAAC;AACvD,iBAAS,KAAK,EAAE,UAAU,UAAU,EAAE,CAAC;AACvC;AAAA,MACJ;AAAA,IACJ,CAAC;AAGD,UAAM,eAAyB,CAAC;AAChC,eAAW,EAAE,UAAU,SAAS,KAAK,UAAU;AAC3C,WAAK,MAAM,KAAK,IAAI,KAAK,QAAQ,GAAG,QAAQ;AAC5C,WAAK,MAAM,YAAY,IAAI,UAAU,KAAK,QAAQ,CAAC;AACnD,mBAAa,KAAK,QAAQ;AAAA,IAC9B;AAGA,QAAI,aAAa,SAAS,GAAG;AACzB,WAAK,gBAAgB,YAAY;AAAA,IACrC;AAEA,WAAO,EAAE,SAAS,QAAQ;AAAA,EAC9B;AAAA;AAAA;AAAA;AAAA;AAAA,EAMQ,gBAAgB,cAA8B;AAClD,QAAI,aAAa,WAAW,EAAG;AAG/B,UAAM,aAAa;AACnB,UAAM,UAAiB,CAAC;AACxB,aAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK,YAAY;AACtD,YAAM,QAAQ,aAAa,MAAM,GAAG,IAAI,UAAU;AAClD,YAAM,eAAe,MAAM,IAAI,MAAM,GAAG,EAAE,KAAK,GAAG;AAClD,YAAMC,QAAO,KAAK,MAAM,GAAG;AAAA,QACvB,qEAAqE,YAAY;AAAA,MACrF,EAAE,IAAI,GAAG,KAAK;AACd,cAAQ,KAAK,GAAGA,KAAI;AAAA,IACxB;AACA,UAAM,OAAO;AAEb,UAAM,WAAW,oBAAI,IAAsB;AAC3C,eAAW,KAAK,MAAM;AAClB,UAAI,CAAC,SAAS,IAAI,EAAE,SAAS,EAAG,UAAS,IAAI,EAAE,WAAW,CAAC,CAAC;AAC5D,eAAS,IAAI,EAAE,SAAS,EAAG,KAAK,EAAE,SAAS;AAAA,IAC/C;AAEA,UAAM,SAAS,KAAK,MAAM,GAAG;AAAA,MACzB;AAAA;AAAA;AAAA,IAGJ;AAEA,SAAK,MAAM,GAAG,YAAY,MAAM;AAC5B,iBAAW,SAAS,SAAS,OAAO,GAAG;AAEnC,YAAI,MAAM,SAAS,KAAK,MAAM,SAAS,GAAI;AAC3C,iBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACnC,mBAAS,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACvC,kBAAM,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,EAAE,KAAK;AACzC,mBAAO,IAAI,GAAG,CAAC;AAAA,UACnB;AAAA,QACJ;AAAA,MACJ;AAAA,IACJ,CAAC;AAAA,EACL;AACJ;;;ACpOO,IAAM,iBAAN,MAAqB;AAAA,EACxB,YAAoB,KAAe;AAAf;AAAA,EAAgB;AAAA,EAXxC,OAU4B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOxB,QAAQ,UAAkB,QAAgB,GAAuB;AAC7D,UAAM,OAAO,KAAK,IAAI,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,SAQ7B,EAAE,IAAI,UAAU,UAAU,UAAU,KAAK;AAE1C,WAAO,KAAK,IAAI,QAAM,EAAE,MAAM,EAAE,MAAM,OAAO,EAAE,MAAM,EAAE;AAAA,EAC3D;AACJ;;;ACCA,IAAM,YAAN,MAAmC;AAAA,EAO/B,YAAoB,OAAyB,CAAC,GAAG;AAA7B;AAChB,SAAK,OAAO,KAAK,QAAQ;AAAA,EAC7B;AAAA,EAxCJ,OA+BmC;AAAA;AAAA;AAAA,EACtB;AAAA,EACT;AAAA,EACA;AAAA,EACA;AAAA,EACA,WAAW,oBAAI,IAA0B;AAAA,EAMzC,MAAM,WAAW,KAAoC;AAEjD,UAAM,SAAS,MAAM,IAAI,sBAAsB,OAAO,GAAO;AAC7D,SAAK,OAAO,OAAO;AACnB,SAAK,WAAW,OAAO;AAEvB,QAAI,OAAO,OAAO;AACd,UAAI,YAAY,eAAe,aAAa,KAAK,MAAM,KAAK,QAAQ;AAAA,IACxE;AAEA,UAAM,WAAW,KAAK,KAAK,YAAY,IAAI,OAAO;AAClD,SAAK,UAAU,IAAI,WAAW,UAAU;AAAA,MACpC,IAAI,IAAI;AAAA,MACR,MAAM,KAAK;AAAA,MACX,aAAa,KAAK;AAAA,MAClB,WAAW,IAAI;AAAA,IACnB,GAAG,KAAK,KAAK,gBAAgB,IAAI,OAAO,YAAY;AAEpD,SAAK,UAAU,IAAI,eAAe,IAAI,EAAE;AAAA,EAC5C;AAAA,EAEA,MAAM,MAAM,UAGR,CAAC,GAAyB;AAC1B,WAAO,KAAK,QAAQ,MAAM,OAAO;AAAA,EACrC;AAAA,EAEA,eAAe,UAAkB,QAAgB,GAAuB;AACpE,WAAO,KAAK,QAAQ,QAAQ,UAAU,KAAK;AAAA,EAC/C;AAAA,EAEA,QAA6B;AACzB,WAAO,EAAE,UAAU,KAAK,KAAK,KAAK;AAAA,EACtC;AACJ;AAGO,SAAS,IAAI,MAAkC;AAClD,SAAO,IAAI,UAAU,IAAI;AAC7B;AAFgB;","names":["git","rows"]}
|
|
@@ -2,11 +2,10 @@ import {
|
|
|
2
2
|
__name
|
|
3
3
|
} from "./chunk-7QVYU63E.js";
|
|
4
4
|
|
|
5
|
-
// src/indexers/
|
|
5
|
+
// src/indexers/docs/docs-indexer.ts
|
|
6
6
|
import * as fs from "fs";
|
|
7
7
|
import * as path from "path";
|
|
8
8
|
import { createHash } from "crypto";
|
|
9
|
-
import { glob } from "fs/promises";
|
|
10
9
|
var BREAK_SCORES = [
|
|
11
10
|
[/^# /, 100],
|
|
12
11
|
// H1
|
|
@@ -34,7 +33,11 @@ var BREAK_SCORES = [
|
|
|
34
33
|
var TARGET_CHARS = 3e3;
|
|
35
34
|
var WINDOW_CHARS = 600;
|
|
36
35
|
var MIN_CHUNK_CHARS = 200;
|
|
37
|
-
|
|
36
|
+
function escapeRegex(s) {
|
|
37
|
+
return s.replace(/[.+?^${}()|[\]\\]/g, "\\$&");
|
|
38
|
+
}
|
|
39
|
+
__name(escapeRegex, "escapeRegex");
|
|
40
|
+
var DocsIndexer = class {
|
|
38
41
|
constructor(_db, _embedding, _hnsw, _vecCache) {
|
|
39
42
|
this._db = _db;
|
|
40
43
|
this._embedding = _embedding;
|
|
@@ -42,7 +45,7 @@ var DocIndexer = class {
|
|
|
42
45
|
this._vecCache = _vecCache;
|
|
43
46
|
}
|
|
44
47
|
static {
|
|
45
|
-
__name(this, "
|
|
48
|
+
__name(this, "DocsIndexer");
|
|
46
49
|
}
|
|
47
50
|
/**
|
|
48
51
|
* Index all documents in a collection.
|
|
@@ -53,20 +56,33 @@ var DocIndexer = class {
|
|
|
53
56
|
if (!fs.existsSync(absDir)) {
|
|
54
57
|
throw new Error(`Collection path does not exist: ${absDir}`);
|
|
55
58
|
}
|
|
59
|
+
const patternExt = pattern.match(/\.([\w]+)$/)?.[1];
|
|
56
60
|
const files = [];
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
61
|
+
const walkDir = /* @__PURE__ */ __name((dir, base) => {
|
|
62
|
+
let entries;
|
|
63
|
+
try {
|
|
64
|
+
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
65
|
+
} catch {
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
for (const e of entries) {
|
|
69
|
+
const rel = base ? `${base}/${e.name}` : e.name;
|
|
70
|
+
if (e.isDirectory()) {
|
|
71
|
+
if (this._isIgnoredDocDir(e.name)) continue;
|
|
72
|
+
walkDir(path.join(dir, e.name), rel);
|
|
73
|
+
} else if (e.isFile()) {
|
|
74
|
+
const shouldIgnore = options.ignore?.some((ig) => {
|
|
75
|
+
const escaped = escapeRegex(ig).replace(/\\\*\\\*/g, ".*").replace(/\\\*/g, "[^/]*");
|
|
76
|
+
return new RegExp(escaped).test(rel);
|
|
77
|
+
});
|
|
78
|
+
const ext = path.extname(e.name).slice(1);
|
|
79
|
+
if (!shouldIgnore && (!patternExt || ext === patternExt)) {
|
|
80
|
+
files.push(rel);
|
|
81
|
+
}
|
|
67
82
|
}
|
|
68
83
|
}
|
|
69
|
-
}
|
|
84
|
+
}, "walkDir");
|
|
85
|
+
walkDir(absDir, "");
|
|
70
86
|
let indexed = 0;
|
|
71
87
|
let skipped = 0;
|
|
72
88
|
let totalChunks = 0;
|
|
@@ -76,13 +92,21 @@ var DocIndexer = class {
|
|
|
76
92
|
options.onProgress?.(relPath, i + 1, files.length);
|
|
77
93
|
const content = fs.readFileSync(absPath, "utf-8");
|
|
78
94
|
const hash = createHash("sha256").update(content).digest("hex").slice(0, 16);
|
|
79
|
-
const
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
95
|
+
const existingChunks = this._db.prepare(
|
|
96
|
+
`SELECT dc.id, dc.content_hash, dv.chunk_id AS has_vector
|
|
97
|
+
FROM doc_chunks dc
|
|
98
|
+
LEFT JOIN doc_vectors dv ON dv.chunk_id = dc.id
|
|
99
|
+
WHERE dc.collection = ? AND dc.file_path = ?`
|
|
100
|
+
).all(collection, relPath);
|
|
101
|
+
const allMatch = existingChunks.length > 0 && existingChunks.every((c) => c.content_hash === hash && c.has_vector != null);
|
|
102
|
+
if (allMatch) {
|
|
83
103
|
skipped++;
|
|
84
104
|
continue;
|
|
85
105
|
}
|
|
106
|
+
for (const old of existingChunks) {
|
|
107
|
+
this._hnsw.remove(old.id);
|
|
108
|
+
this._vecCache.delete(old.id);
|
|
109
|
+
}
|
|
86
110
|
this._db.prepare(
|
|
87
111
|
"DELETE FROM doc_chunks WHERE collection = ? AND file_path = ?"
|
|
88
112
|
).run(collection, relPath);
|
|
@@ -117,10 +141,12 @@ var DocIndexer = class {
|
|
|
117
141
|
for (let j = 0; j < chunkIds.length; j++) {
|
|
118
142
|
const buf = Buffer.from(embeddings[j].buffer);
|
|
119
143
|
insertVec.run(chunkIds[j], buf);
|
|
120
|
-
this._hnsw.add(embeddings[j], chunkIds[j]);
|
|
121
|
-
this._vecCache.set(chunkIds[j], embeddings[j]);
|
|
122
144
|
}
|
|
123
145
|
});
|
|
146
|
+
for (let j = 0; j < chunkIds.length; j++) {
|
|
147
|
+
this._hnsw.add(embeddings[j], chunkIds[j]);
|
|
148
|
+
this._vecCache.set(chunkIds[j], embeddings[j]);
|
|
149
|
+
}
|
|
124
150
|
indexed++;
|
|
125
151
|
totalChunks += chunks.length;
|
|
126
152
|
}
|
|
@@ -130,6 +156,13 @@ var DocIndexer = class {
|
|
|
130
156
|
* Remove all indexed data for a collection.
|
|
131
157
|
*/
|
|
132
158
|
removeCollection(collection) {
|
|
159
|
+
const chunks = this._db.prepare(
|
|
160
|
+
"SELECT id FROM doc_chunks WHERE collection = ?"
|
|
161
|
+
).all(collection);
|
|
162
|
+
for (const chunk of chunks) {
|
|
163
|
+
this._hnsw.remove(chunk.id);
|
|
164
|
+
this._vecCache.delete(chunk.id);
|
|
165
|
+
}
|
|
133
166
|
this._db.prepare("DELETE FROM doc_chunks WHERE collection = ?").run(collection);
|
|
134
167
|
this._db.prepare("DELETE FROM collections WHERE name = ?").run(collection);
|
|
135
168
|
this._db.prepare("DELETE FROM path_contexts WHERE collection = ?").run(collection);
|
|
@@ -147,7 +180,6 @@ var DocIndexer = class {
|
|
|
147
180
|
const breakPoints = this._findBreakPoints(lines);
|
|
148
181
|
const chunks = [];
|
|
149
182
|
let chunkStart = 0;
|
|
150
|
-
let lineStart = 0;
|
|
151
183
|
while (chunkStart < text.length) {
|
|
152
184
|
const remaining = text.length - chunkStart;
|
|
153
185
|
if (remaining <= TARGET_CHARS + WINDOW_CHARS) {
|
|
@@ -223,15 +255,35 @@ var DocIndexer = class {
|
|
|
223
255
|
if (match) return match[1].trim();
|
|
224
256
|
return path.basename(filePath, path.extname(filePath));
|
|
225
257
|
}
|
|
258
|
+
/** Skip well-known output/vendor directories when walking docs. */
|
|
259
|
+
_isIgnoredDocDir(name) {
|
|
260
|
+
const IGNORED = /* @__PURE__ */ new Set([
|
|
261
|
+
"node_modules",
|
|
262
|
+
".git",
|
|
263
|
+
".hg",
|
|
264
|
+
".svn",
|
|
265
|
+
"dist",
|
|
266
|
+
"build",
|
|
267
|
+
"out",
|
|
268
|
+
"coverage",
|
|
269
|
+
".next",
|
|
270
|
+
"__pycache__",
|
|
271
|
+
".tox",
|
|
272
|
+
".venv",
|
|
273
|
+
"venv",
|
|
274
|
+
"vendor",
|
|
275
|
+
"target",
|
|
276
|
+
".cache",
|
|
277
|
+
".turbo"
|
|
278
|
+
]);
|
|
279
|
+
return IGNORED.has(name);
|
|
280
|
+
}
|
|
226
281
|
};
|
|
227
282
|
|
|
228
|
-
// src/
|
|
229
|
-
var
|
|
230
|
-
constructor(opts = {}) {
|
|
231
|
-
this.opts = opts;
|
|
232
|
-
}
|
|
283
|
+
// src/indexers/docs/docs-plugin.ts
|
|
284
|
+
var DocsPlugin = class {
|
|
233
285
|
static {
|
|
234
|
-
__name(this, "
|
|
286
|
+
__name(this, "DocsPlugin");
|
|
235
287
|
}
|
|
236
288
|
name = "docs";
|
|
237
289
|
hnsw;
|
|
@@ -244,7 +296,7 @@ var DocsModuleImpl = class {
|
|
|
244
296
|
this._embedding = ctx.embedding;
|
|
245
297
|
this.hnsw = await ctx.createHnsw();
|
|
246
298
|
ctx.loadVectors("doc_vectors", "chunk_id", this.hnsw, this.vecCache);
|
|
247
|
-
this.indexer = new
|
|
299
|
+
this.indexer = new DocsIndexer(ctx.db, ctx.embedding, this.hnsw, this.vecCache);
|
|
248
300
|
}
|
|
249
301
|
/** Register a document collection. */
|
|
250
302
|
addCollection(collection) {
|
|
@@ -295,7 +347,18 @@ var DocsModuleImpl = class {
|
|
|
295
347
|
async search(query, options) {
|
|
296
348
|
const k = options?.k ?? 8;
|
|
297
349
|
const queryVec = await this._embedding.embed(query);
|
|
298
|
-
|
|
350
|
+
let searchK = k;
|
|
351
|
+
if (options?.collection && this.hnsw.size > 0) {
|
|
352
|
+
const collectionCount = this._db.prepare(
|
|
353
|
+
"SELECT COUNT(*) as c FROM doc_chunks WHERE collection = ?"
|
|
354
|
+
).get(options.collection)?.c ?? 0;
|
|
355
|
+
const totalChunks = this._db.prepare(
|
|
356
|
+
"SELECT COUNT(*) as c FROM doc_chunks"
|
|
357
|
+
).get()?.c ?? 1;
|
|
358
|
+
const ratio = collectionCount > 0 ? Math.max(3, Math.min(50, Math.ceil(totalChunks / collectionCount))) : 3;
|
|
359
|
+
searchK = Math.min(k * ratio, this.hnsw.size);
|
|
360
|
+
}
|
|
361
|
+
const hits = this.hnsw.search(queryVec, searchK);
|
|
299
362
|
const results = [];
|
|
300
363
|
for (const hit of hits) {
|
|
301
364
|
if (options?.minScore && hit.score < options.minScore) continue;
|
|
@@ -317,6 +380,7 @@ var DocsModuleImpl = class {
|
|
|
317
380
|
seq: chunk.seq
|
|
318
381
|
}
|
|
319
382
|
});
|
|
383
|
+
if (results.length >= k) break;
|
|
320
384
|
}
|
|
321
385
|
return results;
|
|
322
386
|
}
|
|
@@ -361,13 +425,13 @@ var DocsModuleImpl = class {
|
|
|
361
425
|
return coll?.context ?? void 0;
|
|
362
426
|
}
|
|
363
427
|
};
|
|
364
|
-
function docs(
|
|
365
|
-
return new
|
|
428
|
+
function docs() {
|
|
429
|
+
return new DocsPlugin();
|
|
366
430
|
}
|
|
367
431
|
__name(docs, "docs");
|
|
368
432
|
|
|
369
433
|
export {
|
|
370
|
-
|
|
434
|
+
DocsIndexer,
|
|
371
435
|
docs
|
|
372
436
|
};
|
|
373
|
-
//# sourceMappingURL=chunk-
|
|
437
|
+
//# sourceMappingURL=chunk-F6SJ3U4H.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/indexers/docs/docs-indexer.ts","../src/indexers/docs/docs-plugin.ts"],"sourcesContent":["/**\n * BrainBank — Document Indexer\n * \n * Indexes generic document collections (markdown, text, etc.)\n * with heading-aware smart chunking, inspired by qmd.\n * \n * const indexer = new DocsIndexer(db, embedding, hnsw, vecCache);\n * await indexer.indexCollection('notes', '/path/to/notes', '**\\/*.md');\n */\n\nimport * as fs from 'node:fs';\nimport * as path from 'node:path';\nimport { createHash } from 'node:crypto';\n\nimport type { Database } from '../../db/database.ts';\nimport type { EmbeddingProvider, VectorIndex } from '../../types.ts';\nimport type { HNSWIndex } from '../../providers/vector/hnsw.ts';\n\n// ── Break Point Scoring (qmd-inspired) ──────────────\n\ninterface BreakPoint {\n pos: number; // character position\n score: number; // break quality (higher = better)\n}\n\nconst BREAK_SCORES: [RegExp, number][] = [\n [/^# /, 100], // H1\n [/^## /, 90], // H2\n [/^### /, 80], // H3\n [/^#### /, 70], // H4\n [/^##### /, 60], // H5\n [/^###### /, 50], // H6\n [/^```/, 80], // Code fence\n [/^---$/, 60], // Horizontal rule\n [/^\\*\\*\\*$/, 60], // Horizontal rule alt\n [/^$/, 20], // Blank line (paragraph break)\n [/^[-*+] /, 5], // List item\n];\n\n// ── Chunk Target ────────────────────────────────────\n\nconst TARGET_CHARS = 3000; // ~900 tokens\nconst WINDOW_CHARS = 600; // search window before cutoff\nconst MIN_CHUNK_CHARS = 200; // don't create tiny chunks\n\n/** Escape special regex characters so user-provided patterns behave as literals. */\nfunction escapeRegex(s: string): string {\n return s.replace(/[.+?^${}()|[\\]\\\\]/g, '\\\\$&');\n}\n\n// ── DocsIndexer ──────────────────────────────────────\n\nexport class DocsIndexer {\n constructor(\n private _db: Database,\n private _embedding: EmbeddingProvider,\n private _hnsw: HNSWIndex,\n private _vecCache: Map<number, Float32Array>,\n ) {}\n\n /**\n * Index all documents in a collection.\n * Incremental — skips unchanged files (by content hash).\n */\n async indexCollection(\n collection: string,\n dirPath: string,\n pattern: string = '**/*.md',\n options: {\n ignore?: string[];\n onProgress?: (file: string, current: number, total: number) => void;\n } = {},\n ): Promise<{ indexed: number; skipped: number; chunks: number }> {\n // Resolve absolute path\n const absDir = path.resolve(dirPath);\n if (!fs.existsSync(absDir)) {\n throw new Error(`Collection path does not exist: ${absDir}`);\n }\n\n // Find files matching pattern\n const patternExt = pattern.match(/\\.([\\w]+)$/)?.[1];\n const files: string[] = [];\n\n const walkDir = (dir: string, base: string): void => {\n let entries: fs.Dirent[];\n try { entries = fs.readdirSync(dir, { withFileTypes: true }); }\n catch { return; }\n for (const e of entries) {\n const rel = base ? `${base}/${e.name}` : e.name;\n if (e.isDirectory()) {\n // Skip ignored directories (node_modules, .git, etc.)\n if (this._isIgnoredDocDir(e.name)) continue;\n walkDir(path.join(dir, e.name), rel);\n } else if (e.isFile()) {\n const shouldIgnore = options.ignore?.some(ig => {\n const escaped = escapeRegex(ig)\n .replace(/\\\\\\*\\\\\\*/g, '.*')\n .replace(/\\\\\\*/g, '[^/]*');\n return new RegExp(escaped).test(rel);\n });\n const ext = path.extname(e.name).slice(1);\n if (!shouldIgnore && (!patternExt || ext === patternExt)) {\n files.push(rel);\n }\n }\n }\n };\n walkDir(absDir, '');\n\n let indexed = 0;\n let skipped = 0;\n let totalChunks = 0;\n\n for (let i = 0; i < files.length; i++) {\n const relPath = files[i];\n const absPath = path.join(absDir, relPath);\n\n options.onProgress?.(relPath, i + 1, files.length);\n\n // Read content and hash\n const content = fs.readFileSync(absPath, 'utf-8');\n const hash = createHash('sha256').update(content).digest('hex').slice(0, 16);\n\n // Check if already indexed with same hash AND vectors exist\n const existingChunks = this._db.prepare(\n `SELECT dc.id, dc.content_hash, dv.chunk_id AS has_vector\n FROM doc_chunks dc\n LEFT JOIN doc_vectors dv ON dv.chunk_id = dc.id\n WHERE dc.collection = ? AND dc.file_path = ?`\n ).all(collection, relPath) as any[];\n\n const allMatch = existingChunks.length > 0 &&\n existingChunks.every((c: any) => c.content_hash === hash && c.has_vector != null);\n\n if (allMatch) {\n skipped++;\n continue;\n }\n\n // Remove old chunks + their HNSW vectors\n for (const old of existingChunks) {\n this._hnsw.remove(old.id);\n this._vecCache.delete(old.id);\n }\n this._db.prepare(\n 'DELETE FROM doc_chunks WHERE collection = ? AND file_path = ?'\n ).run(collection, relPath);\n\n // Extract title and chunk\n const title = this._extractTitle(content, relPath);\n const chunks = this._smartChunk(content);\n\n // Insert chunks\n const insertChunk = this._db.prepare(`\n INSERT INTO doc_chunks (collection, file_path, title, content, seq, pos, content_hash)\n VALUES (?, ?, ?, ?, ?, ?, ?)\n `);\n\n const chunkIds: number[] = [];\n\n this._db.transaction(() => {\n for (let seq = 0; seq < chunks.length; seq++) {\n const chunk = chunks[seq];\n const result = insertChunk.run(\n collection, relPath, title, chunk.text, seq, chunk.pos, hash,\n );\n chunkIds.push(Number(result.lastInsertRowid));\n }\n });\n\n // Generate embeddings\n const texts = chunks.map(c => `title: ${title} | text: ${c.text}`);\n const embeddings = await this._embedding.embedBatch(texts);\n\n // Store vectors — DB transaction commits first, then HNSW is updated.\n const insertVec = this._db.prepare(\n 'INSERT OR REPLACE INTO doc_vectors (chunk_id, embedding) VALUES (?, ?)'\n );\n\n this._db.transaction(() => {\n for (let j = 0; j < chunkIds.length; j++) {\n const buf = Buffer.from(embeddings[j].buffer);\n insertVec.run(chunkIds[j], buf);\n }\n });\n\n // Reached only if the transaction committed successfully — no orphan risk.\n for (let j = 0; j < chunkIds.length; j++) {\n this._hnsw.add(embeddings[j], chunkIds[j]);\n this._vecCache.set(chunkIds[j], embeddings[j]);\n }\n\n indexed++;\n totalChunks += chunks.length;\n }\n\n return { indexed, skipped, chunks: totalChunks };\n }\n\n /**\n * Remove all indexed data for a collection.\n */\n removeCollection(collection: string): void {\n // Clean HNSW entries before deleting DB rows\n const chunks = this._db.prepare(\n 'SELECT id FROM doc_chunks WHERE collection = ?'\n ).all(collection) as any[];\n for (const chunk of chunks) {\n this._hnsw.remove(chunk.id);\n this._vecCache.delete(chunk.id);\n }\n\n this._db.prepare('DELETE FROM doc_chunks WHERE collection = ?').run(collection);\n this._db.prepare('DELETE FROM collections WHERE name = ?').run(collection);\n this._db.prepare('DELETE FROM path_contexts WHERE collection = ?').run(collection);\n }\n\n // ── Smart Chunking ──────────────────────────────\n\n /**\n * Split document into chunks at natural markdown boundaries.\n * Uses heading-aware scoring like qmd.\n */\n private _smartChunk(text: string): { text: string; pos: number }[] {\n if (text.length <= TARGET_CHARS) {\n return [{ text, pos: 0 }];\n }\n\n const lines = text.split('\\n');\n const breakPoints = this._findBreakPoints(lines);\n const chunks: { text: string; pos: number }[] = [];\n\n let chunkStart = 0; // char position\n\n while (chunkStart < text.length) {\n const remaining = text.length - chunkStart;\n if (remaining <= TARGET_CHARS + WINDOW_CHARS) {\n // Last chunk — take everything\n const lastText = text.slice(chunkStart).trim();\n if (lastText.length >= MIN_CHUNK_CHARS) {\n chunks.push({ text: lastText, pos: chunkStart });\n } else if (chunks.length > 0) {\n // Merge with previous chunk\n chunks[chunks.length - 1].text += '\\n' + lastText;\n } else {\n chunks.push({ text: lastText, pos: chunkStart });\n }\n break;\n }\n\n // Find best break point in window\n const targetEnd = chunkStart + TARGET_CHARS;\n const windowStart = targetEnd - WINDOW_CHARS;\n\n let bestBreak = targetEnd;\n let bestScore = 0;\n\n for (const bp of breakPoints) {\n if (bp.pos <= chunkStart) continue;\n if (bp.pos > targetEnd + WINDOW_CHARS / 2) break;\n if (bp.pos < windowStart) continue;\n\n // Score decay: prefer closer break points\n const distance = Math.abs(bp.pos - targetEnd);\n const decay = 1 - (distance / WINDOW_CHARS) ** 2 * 0.7;\n const finalScore = bp.score * decay;\n\n if (finalScore > bestScore) {\n bestScore = finalScore;\n bestBreak = bp.pos;\n }\n }\n\n const chunkText = text.slice(chunkStart, bestBreak).trim();\n if (chunkText.length >= MIN_CHUNK_CHARS) {\n chunks.push({ text: chunkText, pos: chunkStart });\n }\n\n chunkStart = bestBreak;\n }\n\n return chunks;\n }\n\n /**\n * Find all potential break points in the document with scores.\n */\n private _findBreakPoints(lines: string[]): BreakPoint[] {\n const points: BreakPoint[] = [];\n let charPos = 0;\n let inCodeBlock = false;\n\n for (const line of lines) {\n // Track code fences\n if (line.trimStart().startsWith('```')) {\n inCodeBlock = !inCodeBlock;\n if (!inCodeBlock) {\n // End of code block is a good break point\n points.push({ pos: charPos + line.length + 1, score: 80 });\n }\n charPos += line.length + 1;\n continue;\n }\n\n // Skip break points inside code blocks\n if (inCodeBlock) {\n charPos += line.length + 1;\n continue;\n }\n\n // Score this line as a potential break point\n for (const [pattern, score] of BREAK_SCORES) {\n if (pattern.test(line.trim())) {\n points.push({ pos: charPos, score });\n break;\n }\n }\n\n charPos += line.length + 1;\n }\n\n return points;\n }\n\n /**\n * Extract document title from first heading or filename.\n */\n private _extractTitle(content: string, filePath: string): string {\n const match = content.match(/^#{1,3}\\s+(.+)$/m);\n if (match) return match[1].trim();\n return path.basename(filePath, path.extname(filePath));\n }\n\n /** Skip well-known output/vendor directories when walking docs. */\n private _isIgnoredDocDir(name: string): boolean {\n const IGNORED = new Set([\n 'node_modules', '.git', '.hg', '.svn',\n 'dist', 'build', 'out', 'coverage', '.next',\n '__pycache__', '.tox', '.venv', 'venv',\n 'vendor', 'target', '.cache', '.turbo',\n ]);\n return IGNORED.has(name);\n }\n}\n","/**\n * BrainBank — Docs Module\n * \n * Index any folder of markdown/text files (notes, docs, wikis).\n * Heading-aware smart chunking inspired by qmd.\n * \n * import { docs } from 'brainbank/docs';\n * brain.use(docs());\n */\n\nimport type { Indexer, IndexerContext } from '../base.ts';\nimport type { HNSWIndex } from '../../providers/vector/hnsw.ts';\nimport type { Database } from '../../db/database.ts';\nimport type { EmbeddingProvider, DocumentCollection, SearchResult } from '../../types.ts';\nimport { DocsIndexer } from './docs-indexer.ts';\n\nclass DocsPlugin implements Indexer {\n readonly name = 'docs';\n hnsw!: HNSWIndex;\n indexer!: DocsIndexer;\n vecCache = new Map<number, Float32Array>();\n private _db!: Database;\n private _embedding!: EmbeddingProvider;\n\n async initialize(ctx: IndexerContext): Promise<void> {\n this._db = ctx.db;\n this._embedding = ctx.embedding;\n this.hnsw = await ctx.createHnsw();\n ctx.loadVectors('doc_vectors', 'chunk_id', this.hnsw, this.vecCache);\n this.indexer = new DocsIndexer(ctx.db, ctx.embedding, this.hnsw, this.vecCache);\n }\n\n /** Register a document collection. */\n addCollection(collection: DocumentCollection): void {\n this._db.prepare(`\n INSERT OR REPLACE INTO collections (name, path, pattern, ignore_json, context)\n VALUES (?, ?, ?, ?, ?)\n `).run(\n collection.name,\n collection.path,\n collection.pattern ?? '**/*.md',\n JSON.stringify(collection.ignore ?? []),\n collection.context ?? null,\n );\n }\n\n /** Remove a collection and its indexed data. */\n removeCollection(name: string): void {\n this.indexer.removeCollection(name);\n }\n\n /** List all registered collections. */\n listCollections(): DocumentCollection[] {\n return (this._db.prepare('SELECT * FROM collections').all() as any[]).map(row => ({\n name: row.name,\n path: row.path,\n pattern: row.pattern,\n ignore: JSON.parse(row.ignore_json),\n context: row.context,\n }));\n }\n\n /** Index all (or specific) collections. Incremental. */\n async indexCollections(options: {\n collections?: string[];\n onProgress?: (collection: string, file: string, current: number, total: number) => void;\n } = {}): Promise<Record<string, { indexed: number; skipped: number; chunks: number }>> {\n const allCollections = this.listCollections();\n const toIndex = options.collections\n ? allCollections.filter(c => options.collections!.includes(c.name))\n : allCollections;\n\n const results: Record<string, { indexed: number; skipped: number; chunks: number }> = {};\n\n for (const coll of toIndex) {\n results[coll.name] = await this.indexer.indexCollection(\n coll.name,\n coll.path,\n coll.pattern,\n {\n ignore: coll.ignore,\n onProgress: (file, cur, total) => options.onProgress?.(coll.name, file, cur, total),\n },\n );\n }\n\n return results;\n }\n\n /** Search documents only. */\n async search(query: string, options?: {\n collection?: string;\n k?: number;\n minScore?: number;\n }): Promise<SearchResult[]> {\n const k = options?.k ?? 8;\n const queryVec = await this._embedding.embed(query);\n\n // Over-fetch from shared HNSW when filtering by collection\n // (same pattern as collection.ts ratio scaling)\n let searchK = k;\n if (options?.collection && this.hnsw.size > 0) {\n const collectionCount = (this._db.prepare(\n 'SELECT COUNT(*) as c FROM doc_chunks WHERE collection = ?'\n ).get(options.collection) as any)?.c ?? 0;\n const totalChunks = (this._db.prepare(\n 'SELECT COUNT(*) as c FROM doc_chunks'\n ).get() as any)?.c ?? 1;\n const ratio = collectionCount > 0\n ? Math.max(3, Math.min(50, Math.ceil(totalChunks / collectionCount)))\n : 3;\n searchK = Math.min(k * ratio, this.hnsw.size);\n }\n\n const hits = this.hnsw.search(queryVec, searchK);\n\n const results: SearchResult[] = [];\n for (const hit of hits) {\n if (options?.minScore && hit.score < options.minScore) continue;\n\n const chunk = this._db.prepare(\n 'SELECT * FROM doc_chunks WHERE id = ?'\n ).get(hit.id) as any;\n\n if (!chunk) continue;\n if (options?.collection && chunk.collection !== options.collection) continue;\n\n const ctx = this._getDocContext(chunk.collection, chunk.file_path);\n\n results.push({\n type: 'document',\n score: hit.score,\n filePath: chunk.file_path,\n content: chunk.content,\n context: ctx,\n metadata: {\n collection: chunk.collection,\n title: chunk.title,\n seq: chunk.seq,\n },\n });\n\n // Stop once we have enough results\n if (results.length >= k) break;\n }\n\n return results;\n }\n\n /** Add context description for a document path. */\n addContext(collection: string, path: string, context: string): void {\n this._db.prepare(`\n INSERT OR REPLACE INTO path_contexts (collection, path, context)\n VALUES (?, ?, ?)\n `).run(collection, path, context);\n }\n\n /** Remove context for a path. */\n removeContext(collection: string, path: string): void {\n this._db.prepare(\n 'DELETE FROM path_contexts WHERE collection = ? AND path = ?'\n ).run(collection, path);\n }\n\n /** List all context entries. */\n listContexts(): { collection: string; path: string; context: string }[] {\n return this._db.prepare('SELECT * FROM path_contexts').all() as any[];\n }\n\n stats(): Record<string, any> {\n return {\n collections: (this._db.prepare('SELECT COUNT(*) as c FROM collections').get() as any).c,\n documents: (this._db.prepare('SELECT COUNT(DISTINCT file_path) as c FROM doc_chunks').get() as any).c,\n chunks: (this._db.prepare('SELECT COUNT(*) as c FROM doc_chunks').get() as any).c,\n hnswSize: this.hnsw.size,\n };\n }\n\n /** Resolve context for a document (checks path_contexts tree → collection context). */\n private _getDocContext(collection: string, filePath: string): string | undefined {\n const parts = filePath.split('/');\n for (let i = parts.length; i >= 0; i--) {\n const checkPath = i === 0 ? '/' : '/' + parts.slice(0, i).join('/');\n const ctx = this._db.prepare(\n 'SELECT context FROM path_contexts WHERE collection = ? AND path = ?'\n ).get(collection, checkPath) as any;\n if (ctx) return ctx.context;\n }\n\n const coll = this._db.prepare(\n 'SELECT context FROM collections WHERE name = ?'\n ).get(collection) as any;\n return coll?.context ?? undefined;\n }\n}\n\n/** Create a document collections plugin. */\nexport function docs(): Indexer {\n return new DocsPlugin();\n}\n"],"mappings":";;;;;AAUA,YAAY,QAAQ;AACpB,YAAY,UAAU;AACtB,SAAS,kBAAkB;AAa3B,IAAM,eAAmC;AAAA,EACrC,CAAC,OAAY,GAAG;AAAA;AAAA,EAChB,CAAC,QAAa,EAAE;AAAA;AAAA,EAChB,CAAC,SAAa,EAAE;AAAA;AAAA,EAChB,CAAC,UAAa,EAAE;AAAA;AAAA,EAChB,CAAC,WAAa,EAAE;AAAA;AAAA,EAChB,CAAC,YAAa,EAAE;AAAA;AAAA,EAChB,CAAC,QAAa,EAAE;AAAA;AAAA,EAChB,CAAC,SAAa,EAAE;AAAA;AAAA,EAChB,CAAC,YAAa,EAAE;AAAA;AAAA,EAChB,CAAC,MAAa,EAAE;AAAA;AAAA,EAChB,CAAC,WAAc,CAAC;AAAA;AACpB;AAIA,IAAM,eAAe;AACrB,IAAM,eAAe;AACrB,IAAM,kBAAkB;AAGxB,SAAS,YAAY,GAAmB;AACpC,SAAO,EAAE,QAAQ,sBAAsB,MAAM;AACjD;AAFS;AAMF,IAAM,cAAN,MAAkB;AAAA,EACrB,YACY,KACA,YACA,OACA,WACV;AAJU;AACA;AACA;AACA;AAAA,EACT;AAAA,EA1DP,OAoDyB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYrB,MAAM,gBACF,YACA,SACA,UAAkB,WAClB,UAGI,CAAC,GACwD;AAE7D,UAAM,SAAc,aAAQ,OAAO;AACnC,QAAI,CAAI,cAAW,MAAM,GAAG;AACxB,YAAM,IAAI,MAAM,mCAAmC,MAAM,EAAE;AAAA,IAC/D;AAGA,UAAM,aAAa,QAAQ,MAAM,YAAY,IAAI,CAAC;AAClD,UAAM,QAAkB,CAAC;AAEzB,UAAM,UAAU,wBAAC,KAAa,SAAuB;AACjD,UAAI;AACJ,UAAI;AAAE,kBAAa,eAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,MAAG,QACxD;AAAE;AAAA,MAAQ;AAChB,iBAAW,KAAK,SAAS;AACrB,cAAM,MAAM,OAAO,GAAG,IAAI,IAAI,EAAE,IAAI,KAAK,EAAE;AAC3C,YAAI,EAAE,YAAY,GAAG;AAEjB,cAAI,KAAK,iBAAiB,EAAE,IAAI,EAAG;AACnC,kBAAa,UAAK,KAAK,EAAE,IAAI,GAAG,GAAG;AAAA,QACvC,WAAW,EAAE,OAAO,GAAG;AACnB,gBAAM,eAAe,QAAQ,QAAQ,KAAK,QAAM;AAC5C,kBAAM,UAAU,YAAY,EAAE,EACzB,QAAQ,aAAa,IAAI,EACzB,QAAQ,SAAS,OAAO;AAC7B,mBAAO,IAAI,OAAO,OAAO,EAAE,KAAK,GAAG;AAAA,UACvC,CAAC;AACD,gBAAM,MAAW,aAAQ,EAAE,IAAI,EAAE,MAAM,CAAC;AACxC,cAAI,CAAC,iBAAiB,CAAC,cAAc,QAAQ,aAAa;AACtD,kBAAM,KAAK,GAAG;AAAA,UAClB;AAAA,QACJ;AAAA,MACJ;AAAA,IACJ,GAvBgB;AAwBhB,YAAQ,QAAQ,EAAE;AAElB,QAAI,UAAU;AACd,QAAI,UAAU;AACd,QAAI,cAAc;AAElB,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACnC,YAAM,UAAU,MAAM,CAAC;AACvB,YAAM,UAAe,UAAK,QAAQ,OAAO;AAEzC,cAAQ,aAAa,SAAS,IAAI,GAAG,MAAM,MAAM;AAGjD,YAAM,UAAa,gBAAa,SAAS,OAAO;AAChD,YAAM,OAAO,WAAW,QAAQ,EAAE,OAAO,OAAO,EAAE,OAAO,KAAK,EAAE,MAAM,GAAG,EAAE;AAG3E,YAAM,iBAAiB,KAAK,IAAI;AAAA,QAC5B;AAAA;AAAA;AAAA;AAAA,MAIJ,EAAE,IAAI,YAAY,OAAO;AAEzB,YAAM,WAAW,eAAe,SAAS,KACrC,eAAe,MAAM,CAAC,MAAW,EAAE,iBAAiB,QAAQ,EAAE,cAAc,IAAI;AAEpF,UAAI,UAAU;AACV;AACA;AAAA,MACJ;AAGA,iBAAW,OAAO,gBAAgB;AAC9B,aAAK,MAAM,OAAO,IAAI,EAAE;AACxB,aAAK,UAAU,OAAO,IAAI,EAAE;AAAA,MAChC;AACA,WAAK,IAAI;AAAA,QACL;AAAA,MACJ,EAAE,IAAI,YAAY,OAAO;AAGzB,YAAM,QAAQ,KAAK,cAAc,SAAS,OAAO;AACjD,YAAM,SAAS,KAAK,YAAY,OAAO;AAGvC,YAAM,cAAc,KAAK,IAAI,QAAQ;AAAA;AAAA;AAAA,aAGpC;AAED,YAAM,WAAqB,CAAC;AAE5B,WAAK,IAAI,YAAY,MAAM;AACvB,iBAAS,MAAM,GAAG,MAAM,OAAO,QAAQ,OAAO;AAC1C,gBAAM,QAAQ,OAAO,GAAG;AACxB,gBAAM,SAAS,YAAY;AAAA,YACvB;AAAA,YAAY;AAAA,YAAS;AAAA,YAAO,MAAM;AAAA,YAAM;AAAA,YAAK,MAAM;AAAA,YAAK;AAAA,UAC5D;AACA,mBAAS,KAAK,OAAO,OAAO,eAAe,CAAC;AAAA,QAChD;AAAA,MACJ,CAAC;AAGD,YAAM,QAAQ,OAAO,IAAI,OAAK,UAAU,KAAK,YAAY,EAAE,IAAI,EAAE;AACjE,YAAM,aAAa,MAAM,KAAK,WAAW,WAAW,KAAK;AAGzD,YAAM,YAAY,KAAK,IAAI;AAAA,QACvB;AAAA,MACJ;AAEA,WAAK,IAAI,YAAY,MAAM;AACvB,iBAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACtC,gBAAM,MAAM,OAAO,KAAK,WAAW,CAAC,EAAE,MAAM;AAC5C,oBAAU,IAAI,SAAS,CAAC,GAAG,GAAG;AAAA,QAClC;AAAA,MACJ,CAAC;AAGD,eAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACtC,aAAK,MAAM,IAAI,WAAW,CAAC,GAAG,SAAS,CAAC,CAAC;AACzC,aAAK,UAAU,IAAI,SAAS,CAAC,GAAG,WAAW,CAAC,CAAC;AAAA,MACjD;AAEA;AACA,qBAAe,OAAO;AAAA,IAC1B;AAEA,WAAO,EAAE,SAAS,SAAS,QAAQ,YAAY;AAAA,EACnD;AAAA;AAAA;AAAA;AAAA,EAKA,iBAAiB,YAA0B;AAEvC,UAAM,SAAS,KAAK,IAAI;AAAA,MACpB;AAAA,IACJ,EAAE,IAAI,UAAU;AAChB,eAAW,SAAS,QAAQ;AACxB,WAAK,MAAM,OAAO,MAAM,EAAE;AAC1B,WAAK,UAAU,OAAO,MAAM,EAAE;AAAA,IAClC;AAEA,SAAK,IAAI,QAAQ,6CAA6C,EAAE,IAAI,UAAU;AAC9E,SAAK,IAAI,QAAQ,wCAAwC,EAAE,IAAI,UAAU;AACzE,SAAK,IAAI,QAAQ,gDAAgD,EAAE,IAAI,UAAU;AAAA,EACrF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQQ,YAAY,MAA+C;AAC/D,QAAI,KAAK,UAAU,cAAc;AAC7B,aAAO,CAAC,EAAE,MAAM,KAAK,EAAE,CAAC;AAAA,IAC5B;AAEA,UAAM,QAAQ,KAAK,MAAM,IAAI;AAC7B,UAAM,cAAc,KAAK,iBAAiB,KAAK;AAC/C,UAAM,SAA0C,CAAC;AAEjD,QAAI,aAAa;AAEjB,WAAO,aAAa,KAAK,QAAQ;AAC7B,YAAM,YAAY,KAAK,SAAS;AAChC,UAAI,aAAa,eAAe,cAAc;AAE1C,cAAM,WAAW,KAAK,MAAM,UAAU,EAAE,KAAK;AAC7C,YAAI,SAAS,UAAU,iBAAiB;AACpC,iBAAO,KAAK,EAAE,MAAM,UAAU,KAAK,WAAW,CAAC;AAAA,QACnD,WAAW,OAAO,SAAS,GAAG;AAE1B,iBAAO,OAAO,SAAS,CAAC,EAAE,QAAQ,OAAO;AAAA,QAC7C,OAAO;AACH,iBAAO,KAAK,EAAE,MAAM,UAAU,KAAK,WAAW,CAAC;AAAA,QACnD;AACA;AAAA,MACJ;AAGA,YAAM,YAAY,aAAa;AAC/B,YAAM,cAAc,YAAY;AAEhC,UAAI,YAAY;AAChB,UAAI,YAAY;AAEhB,iBAAW,MAAM,aAAa;AAC1B,YAAI,GAAG,OAAO,WAAY;AAC1B,YAAI,GAAG,MAAM,YAAY,eAAe,EAAG;AAC3C,YAAI,GAAG,MAAM,YAAa;AAG1B,cAAM,WAAW,KAAK,IAAI,GAAG,MAAM,SAAS;AAC5C,cAAM,QAAQ,KAAK,WAAW,iBAAiB,IAAI;AACnD,cAAM,aAAa,GAAG,QAAQ;AAE9B,YAAI,aAAa,WAAW;AACxB,sBAAY;AACZ,sBAAY,GAAG;AAAA,QACnB;AAAA,MACJ;AAEA,YAAM,YAAY,KAAK,MAAM,YAAY,SAAS,EAAE,KAAK;AACzD,UAAI,UAAU,UAAU,iBAAiB;AACrC,eAAO,KAAK,EAAE,MAAM,WAAW,KAAK,WAAW,CAAC;AAAA,MACpD;AAEA,mBAAa;AAAA,IACjB;AAEA,WAAO;AAAA,EACX;AAAA;AAAA;AAAA;AAAA,EAKQ,iBAAiB,OAA+B;AACpD,UAAM,SAAuB,CAAC;AAC9B,QAAI,UAAU;AACd,QAAI,cAAc;AAElB,eAAW,QAAQ,OAAO;AAEtB,UAAI,KAAK,UAAU,EAAE,WAAW,KAAK,GAAG;AACpC,sBAAc,CAAC;AACf,YAAI,CAAC,aAAa;AAEd,iBAAO,KAAK,EAAE,KAAK,UAAU,KAAK,SAAS,GAAG,OAAO,GAAG,CAAC;AAAA,QAC7D;AACA,mBAAW,KAAK,SAAS;AACzB;AAAA,MACJ;AAGA,UAAI,aAAa;AACb,mBAAW,KAAK,SAAS;AACzB;AAAA,MACJ;AAGA,iBAAW,CAAC,SAAS,KAAK,KAAK,cAAc;AACzC,YAAI,QAAQ,KAAK,KAAK,KAAK,CAAC,GAAG;AAC3B,iBAAO,KAAK,EAAE,KAAK,SAAS,MAAM,CAAC;AACnC;AAAA,QACJ;AAAA,MACJ;AAEA,iBAAW,KAAK,SAAS;AAAA,IAC7B;AAEA,WAAO;AAAA,EACX;AAAA;AAAA;AAAA;AAAA,EAKQ,cAAc,SAAiB,UAA0B;AAC7D,UAAM,QAAQ,QAAQ,MAAM,kBAAkB;AAC9C,QAAI,MAAO,QAAO,MAAM,CAAC,EAAE,KAAK;AAChC,WAAY,cAAS,UAAe,aAAQ,QAAQ,CAAC;AAAA,EACzD;AAAA;AAAA,EAGQ,iBAAiB,MAAuB;AAC5C,UAAM,UAAU,oBAAI,IAAI;AAAA,MACpB;AAAA,MAAgB;AAAA,MAAQ;AAAA,MAAO;AAAA,MAC/B;AAAA,MAAQ;AAAA,MAAS;AAAA,MAAO;AAAA,MAAY;AAAA,MACpC;AAAA,MAAe;AAAA,MAAQ;AAAA,MAAS;AAAA,MAChC;AAAA,MAAU;AAAA,MAAU;AAAA,MAAU;AAAA,IAClC,CAAC;AACD,WAAO,QAAQ,IAAI,IAAI;AAAA,EAC3B;AACJ;;;ACvUA,IAAM,aAAN,MAAoC;AAAA,EAhBpC,OAgBoC;AAAA;AAAA;AAAA,EACvB,OAAO;AAAA,EAChB;AAAA,EACA;AAAA,EACA,WAAW,oBAAI,IAA0B;AAAA,EACjC;AAAA,EACA;AAAA,EAER,MAAM,WAAW,KAAoC;AACjD,SAAK,MAAM,IAAI;AACf,SAAK,aAAa,IAAI;AACtB,SAAK,OAAO,MAAM,IAAI,WAAW;AACjC,QAAI,YAAY,eAAe,YAAY,KAAK,MAAM,KAAK,QAAQ;AACnE,SAAK,UAAU,IAAI,YAAY,IAAI,IAAI,IAAI,WAAW,KAAK,MAAM,KAAK,QAAQ;AAAA,EAClF;AAAA;AAAA,EAGA,cAAc,YAAsC;AAChD,SAAK,IAAI,QAAQ;AAAA;AAAA;AAAA,SAGhB,EAAE;AAAA,MACC,WAAW;AAAA,MACX,WAAW;AAAA,MACX,WAAW,WAAW;AAAA,MACtB,KAAK,UAAU,WAAW,UAAU,CAAC,CAAC;AAAA,MACtC,WAAW,WAAW;AAAA,IAC1B;AAAA,EACJ;AAAA;AAAA,EAGA,iBAAiB,MAAoB;AACjC,SAAK,QAAQ,iBAAiB,IAAI;AAAA,EACtC;AAAA;AAAA,EAGA,kBAAwC;AACpC,WAAQ,KAAK,IAAI,QAAQ,2BAA2B,EAAE,IAAI,EAAY,IAAI,UAAQ;AAAA,MAC9E,MAAM,IAAI;AAAA,MACV,MAAM,IAAI;AAAA,MACV,SAAS,IAAI;AAAA,MACb,QAAQ,KAAK,MAAM,IAAI,WAAW;AAAA,MAClC,SAAS,IAAI;AAAA,IACjB,EAAE;AAAA,EACN;AAAA;AAAA,EAGA,MAAM,iBAAiB,UAGnB,CAAC,GAAkF;AACnF,UAAM,iBAAiB,KAAK,gBAAgB;AAC5C,UAAM,UAAU,QAAQ,cAClB,eAAe,OAAO,OAAK,QAAQ,YAAa,SAAS,EAAE,IAAI,CAAC,IAChE;AAEN,UAAM,UAAgF,CAAC;AAEvF,eAAW,QAAQ,SAAS;AACxB,cAAQ,KAAK,IAAI,IAAI,MAAM,KAAK,QAAQ;AAAA,QACpC,KAAK;AAAA,QACL,KAAK;AAAA,QACL,KAAK;AAAA,QACL;AAAA,UACI,QAAQ,KAAK;AAAA,UACb,YAAY,wBAAC,MAAM,KAAK,UAAU,QAAQ,aAAa,KAAK,MAAM,MAAM,KAAK,KAAK,GAAtE;AAAA,QAChB;AAAA,MACJ;AAAA,IACJ;AAEA,WAAO;AAAA,EACX;AAAA;AAAA,EAGA,MAAM,OAAO,OAAe,SAIA;AACxB,UAAM,IAAI,SAAS,KAAK;AACxB,UAAM,WAAW,MAAM,KAAK,WAAW,MAAM,KAAK;AAIlD,QAAI,UAAU;AACd,QAAI,SAAS,cAAc,KAAK,KAAK,OAAO,GAAG;AAC3C,YAAM,kBAAmB,KAAK,IAAI;AAAA,QAC9B;AAAA,MACJ,EAAE,IAAI,QAAQ,UAAU,GAAW,KAAK;AACxC,YAAM,cAAe,KAAK,IAAI;AAAA,QAC1B;AAAA,MACJ,EAAE,IAAI,GAAW,KAAK;AACtB,YAAM,QAAQ,kBAAkB,IAC1B,KAAK,IAAI,GAAG,KAAK,IAAI,IAAI,KAAK,KAAK,cAAc,eAAe,CAAC,CAAC,IAClE;AACN,gBAAU,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,IAAI;AAAA,IAChD;AAEA,UAAM,OAAO,KAAK,KAAK,OAAO,UAAU,OAAO;AAE/C,UAAM,UAA0B,CAAC;AACjC,eAAW,OAAO,MAAM;AACpB,UAAI,SAAS,YAAY,IAAI,QAAQ,QAAQ,SAAU;AAEvD,YAAM,QAAQ,KAAK,IAAI;AAAA,QACnB;AAAA,MACJ,EAAE,IAAI,IAAI,EAAE;AAEZ,UAAI,CAAC,MAAO;AACZ,UAAI,SAAS,cAAc,MAAM,eAAe,QAAQ,WAAY;AAEpE,YAAM,MAAM,KAAK,eAAe,MAAM,YAAY,MAAM,SAAS;AAEjE,cAAQ,KAAK;AAAA,QACT,MAAM;AAAA,QACN,OAAO,IAAI;AAAA,QACX,UAAU,MAAM;AAAA,QAChB,SAAS,MAAM;AAAA,QACf,SAAS;AAAA,QACT,UAAU;AAAA,UACN,YAAY,MAAM;AAAA,UAClB,OAAO,MAAM;AAAA,UACb,KAAK,MAAM;AAAA,QACf;AAAA,MACJ,CAAC;AAGD,UAAI,QAAQ,UAAU,EAAG;AAAA,IAC7B;AAEA,WAAO;AAAA,EACX;AAAA;AAAA,EAGA,WAAW,YAAoBA,OAAc,SAAuB;AAChE,SAAK,IAAI,QAAQ;AAAA;AAAA;AAAA,SAGhB,EAAE,IAAI,YAAYA,OAAM,OAAO;AAAA,EACpC;AAAA;AAAA,EAGA,cAAc,YAAoBA,OAAoB;AAClD,SAAK,IAAI;AAAA,MACL;AAAA,IACJ,EAAE,IAAI,YAAYA,KAAI;AAAA,EAC1B;AAAA;AAAA,EAGA,eAAwE;AACpE,WAAO,KAAK,IAAI,QAAQ,6BAA6B,EAAE,IAAI;AAAA,EAC/D;AAAA,EAEA,QAA6B;AACzB,WAAO;AAAA,MACH,aAAc,KAAK,IAAI,QAAQ,uCAAuC,EAAE,IAAI,EAAU;AAAA,MACtF,WAAY,KAAK,IAAI,QAAQ,uDAAuD,EAAE,IAAI,EAAU;AAAA,MACpG,QAAS,KAAK,IAAI,QAAQ,sCAAsC,EAAE,IAAI,EAAU;AAAA,MAChF,UAAU,KAAK,KAAK;AAAA,IACxB;AAAA,EACJ;AAAA;AAAA,EAGQ,eAAe,YAAoB,UAAsC;AAC7E,UAAM,QAAQ,SAAS,MAAM,GAAG;AAChC,aAAS,IAAI,MAAM,QAAQ,KAAK,GAAG,KAAK;AACpC,YAAM,YAAY,MAAM,IAAI,MAAM,MAAM,MAAM,MAAM,GAAG,CAAC,EAAE,KAAK,GAAG;AAClE,YAAM,MAAM,KAAK,IAAI;AAAA,QACjB;AAAA,MACJ,EAAE,IAAI,YAAY,SAAS;AAC3B,UAAI,IAAK,QAAO,IAAI;AAAA,IACxB;AAEA,UAAM,OAAO,KAAK,IAAI;AAAA,MAClB;AAAA,IACJ,EAAE,IAAI,UAAU;AAChB,WAAO,MAAM,WAAW;AAAA,EAC5B;AACJ;AAGO,SAAS,OAAgB;AAC5B,SAAO,IAAI,WAAW;AAC1B;AAFgB;","names":["path"]}
|