wicked-brain 0.9.2 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -19
- package/package.json +1 -1
- package/server/bin/onboard-wiki.mjs +36 -0
- package/server/bin/wicked-brain-server.mjs +82 -2
- package/server/lib/brain-walker.mjs +78 -0
- package/server/lib/canonical-registry.mjs +128 -0
- package/server/lib/detect-mode.mjs +233 -0
- package/server/lib/frontmatter.mjs +204 -0
- package/server/lib/gen-contract-api.mjs +178 -0
- package/server/lib/gen-contract-schema.mjs +200 -0
- package/server/lib/gen-file-map.mjs +121 -0
- package/server/lib/lint-wiki.mjs +168 -0
- package/server/lib/mode-config.mjs +120 -0
- package/server/lib/mode.schema.json +53 -0
- package/server/lib/onboard-wiki.mjs +97 -0
- package/server/lib/sqlite-search.mjs +334 -7
- package/server/lib/stamp-pointer.mjs +103 -0
- package/server/lib/viewer-page.mjs +1096 -0
- package/server/package.json +8 -3
- package/skills/wicked-brain-agent/agents/onboard.md +161 -50
- package/skills/wicked-brain-compile/SKILL.md +42 -0
- package/skills/wicked-brain-ui/SKILL.md +137 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Onboard-wiki orchestrator.
|
|
3
|
+
*
|
|
4
|
+
* Single function that does everything the wiki stack needs at onboard time:
|
|
5
|
+
* 1. Detect repo mode.
|
|
6
|
+
* 2. Write `.wicked-brain/mode.json` (unless override blocks it).
|
|
7
|
+
* 3. Stamp `Contributor wiki: <path>` into CLAUDE.md and/or AGENTS.md if
|
|
8
|
+
* either exists. Never creates them — that's too opinionated.
|
|
9
|
+
*
|
|
10
|
+
* Returns a structured summary so the CLI can print a human-readable report
|
|
11
|
+
* and skills can branch on mode.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import fs from "node:fs/promises";
|
|
15
|
+
import path from "node:path";
|
|
16
|
+
import { detectRepoMode } from "./detect-mode.mjs";
|
|
17
|
+
import { readModeFile, writeModeFile, diffMode } from "./mode-config.mjs";
|
|
18
|
+
import { stampWikiPointer } from "./stamp-pointer.mjs";
|
|
19
|
+
|
|
20
|
+
const AGENT_CONFIG_FILES = ["CLAUDE.md", "AGENTS.md"];
|
|
21
|
+
|
|
22
|
+
export async function runOnboardWiki(repoRoot, { force = false } = {}) {
|
|
23
|
+
const resolvedRoot = path.resolve(repoRoot);
|
|
24
|
+
const detection = await detectRepoMode(resolvedRoot);
|
|
25
|
+
|
|
26
|
+
const existing = await readModeFile(resolvedRoot);
|
|
27
|
+
const diff = diffMode(existing, detection);
|
|
28
|
+
|
|
29
|
+
let modeWrite;
|
|
30
|
+
if (existing?.override === true && !force) {
|
|
31
|
+
modeWrite = {
|
|
32
|
+
action: "skipped",
|
|
33
|
+
reason: "override:true — not overwriting without --force",
|
|
34
|
+
mode: existing.mode,
|
|
35
|
+
wiki_root: existing.wiki_root,
|
|
36
|
+
};
|
|
37
|
+
} else {
|
|
38
|
+
const write = await writeModeFile(resolvedRoot, detection, { override: force });
|
|
39
|
+
modeWrite = {
|
|
40
|
+
action: write.written ? (existing ? "updated" : "created") : "skipped",
|
|
41
|
+
reason: write.reason ?? null,
|
|
42
|
+
mode: detection.mode,
|
|
43
|
+
wiki_root: detection.wiki_root,
|
|
44
|
+
diff_fields: diff.fields,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Use whatever wiki_root is live on disk after the write step — that's
|
|
49
|
+
// authoritative. Stamping needs the pointer path; we prefix with `./` for
|
|
50
|
+
// clarity in the stamped line.
|
|
51
|
+
const liveMode = await readModeFile(resolvedRoot);
|
|
52
|
+
const wikiRoot = liveMode?.wiki_root ?? detection.wiki_root;
|
|
53
|
+
const pointerPath = wikiRoot.startsWith("./") ? wikiRoot : `./${wikiRoot}`;
|
|
54
|
+
|
|
55
|
+
const stamps = [];
|
|
56
|
+
for (const name of AGENT_CONFIG_FILES) {
|
|
57
|
+
const abs = path.join(resolvedRoot, name);
|
|
58
|
+
let raw;
|
|
59
|
+
try {
|
|
60
|
+
raw = await fs.readFile(abs, "utf8");
|
|
61
|
+
} catch {
|
|
62
|
+
stamps.push({ file: name, action: "absent" });
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
const { content, changed } = stampWikiPointer(raw, pointerPath);
|
|
66
|
+
if (!changed) {
|
|
67
|
+
stamps.push({ file: name, action: "already-current" });
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
await fs.writeFile(abs, content, "utf8");
|
|
71
|
+
stamps.push({ file: name, action: "stamped" });
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
repo_root: resolvedRoot,
|
|
76
|
+
detection,
|
|
77
|
+
mode_write: modeWrite,
|
|
78
|
+
stamps,
|
|
79
|
+
wiki_root: wikiRoot,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Text summary of a result, suitable for CLI output.
|
|
85
|
+
*/
|
|
86
|
+
export function formatOnboardResult(result) {
|
|
87
|
+
const out = [];
|
|
88
|
+
out.push(`repo: ${result.repo_root}`);
|
|
89
|
+
out.push(`mode: ${result.detection.mode}`);
|
|
90
|
+
out.push(`wiki_root: ${result.wiki_root}`);
|
|
91
|
+
out.push(`score: code=${result.detection.score.code}, content=${result.detection.score.content}`);
|
|
92
|
+
out.push(`mode.json: ${result.mode_write.action}${result.mode_write.reason ? ` (${result.mode_write.reason})` : ""}`);
|
|
93
|
+
for (const s of result.stamps) {
|
|
94
|
+
out.push(`${s.file.padEnd(10)}: ${s.action}`);
|
|
95
|
+
}
|
|
96
|
+
return out.join("\n");
|
|
97
|
+
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import Database from "better-sqlite3";
|
|
2
2
|
import { parseWikilinks } from "./wikilinks.mjs";
|
|
3
|
+
import { parseFrontmatterBlock } from "./frontmatter.mjs";
|
|
3
4
|
import { statSync } from "node:fs";
|
|
4
5
|
|
|
5
6
|
/**
|
|
@@ -25,6 +26,148 @@ export function deriveSourceType(path) {
|
|
|
25
26
|
return "chunk";
|
|
26
27
|
}
|
|
27
28
|
|
|
29
|
+
/**
|
|
30
|
+
* Collapse duplicate rows so the hit list doesn't inflate identical content.
|
|
31
|
+
* Input: rows pre-sorted best-to-worst by boosted_score.
|
|
32
|
+
*
|
|
33
|
+
* Two collapse dimensions:
|
|
34
|
+
* - content_hash: rows with the same non-null hash are the same content at
|
|
35
|
+
* different paths (source + rendered copy, etc.).
|
|
36
|
+
* - canonical_for: rows that both claim the same canonical ID are rivals.
|
|
37
|
+
* A page *referencing* an ID does not participate — only rows that
|
|
38
|
+
* themselves claim a canonical ID collapse on that axis.
|
|
39
|
+
*
|
|
40
|
+
* Uses union-find so transitive collapse works: if row A shares a
|
|
41
|
+
* content_hash with B, and B shares a canonical_for ID with C, then A, B,
|
|
42
|
+
* and C end up in the same group with one survivor.
|
|
43
|
+
*
|
|
44
|
+
* Survivor per group is the first row encountered (which is the best-scoring
|
|
45
|
+
* row since input is pre-sorted). Absorbed rows are surfaced on
|
|
46
|
+
* `also_found_in` on the survivor, capped at `maxAlsoFoundIn`. Returns
|
|
47
|
+
* surviving rows in original score order.
|
|
48
|
+
*/
|
|
49
|
+
function collapseDuplicates(rows, { maxAlsoFoundIn = 5 } = {}) {
|
|
50
|
+
const parent = new Array(rows.length);
|
|
51
|
+
for (let i = 0; i < rows.length; i++) parent[i] = i;
|
|
52
|
+
|
|
53
|
+
const find = (i) => {
|
|
54
|
+
while (parent[i] !== i) {
|
|
55
|
+
parent[i] = parent[parent[i]];
|
|
56
|
+
i = parent[i];
|
|
57
|
+
}
|
|
58
|
+
return i;
|
|
59
|
+
};
|
|
60
|
+
const union = (a, b) => {
|
|
61
|
+
const ra = find(a);
|
|
62
|
+
const rb = find(b);
|
|
63
|
+
if (ra !== rb) parent[Math.max(ra, rb)] = Math.min(ra, rb);
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
// Pass 1: for each collapse key, union every row that carries it.
|
|
67
|
+
const keyFirst = new Map();
|
|
68
|
+
for (let i = 0; i < rows.length; i++) {
|
|
69
|
+
for (const k of collapseKeysFor(rows[i])) {
|
|
70
|
+
if (keyFirst.has(k)) union(i, keyFirst.get(k));
|
|
71
|
+
else keyFirst.set(k, i);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Pass 2: group by root. Since rows are pre-sorted by score and roots
|
|
76
|
+
// always point at the lowest-index member, the first row to appear in
|
|
77
|
+
// each group is the survivor.
|
|
78
|
+
const out = [];
|
|
79
|
+
const survivorByRoot = new Map();
|
|
80
|
+
for (let i = 0; i < rows.length; i++) {
|
|
81
|
+
const root = find(i);
|
|
82
|
+
if (!survivorByRoot.has(root)) {
|
|
83
|
+
rows[i].also_found_in = [];
|
|
84
|
+
survivorByRoot.set(root, rows[i]);
|
|
85
|
+
out.push(rows[i]);
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
const survivor = survivorByRoot.get(root);
|
|
89
|
+
if (survivor.also_found_in.length < maxAlsoFoundIn) {
|
|
90
|
+
survivor.also_found_in.push({
|
|
91
|
+
id: rows[i].id,
|
|
92
|
+
path: rows[i].path,
|
|
93
|
+
brain_id: rows[i].brain_id,
|
|
94
|
+
score: rows[i].boosted_score,
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
return out;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function collapseKeysFor(row) {
|
|
102
|
+
const keys = [];
|
|
103
|
+
if (row.content_hash) keys.push(`hash:${row.content_hash}`);
|
|
104
|
+
if (row.canonical_for) {
|
|
105
|
+
try {
|
|
106
|
+
const ids = JSON.parse(row.canonical_for);
|
|
107
|
+
if (Array.isArray(ids)) {
|
|
108
|
+
for (const id of ids) keys.push(`canon:${id}`);
|
|
109
|
+
}
|
|
110
|
+
} catch {
|
|
111
|
+
// Malformed JSON — skip canonical collapse for this row.
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// Translation/version collapse: the anchor is the path of the original
|
|
115
|
+
// (i.e. translation_of / version_of) or the doc's own path if it IS the
|
|
116
|
+
// original. A doc that declares translation_of: X and the original doc
|
|
117
|
+
// with path=X both produce the same trans-group key and collapse.
|
|
118
|
+
if (row.translation_of) {
|
|
119
|
+
keys.push(`trans:${row.translation_of}`);
|
|
120
|
+
} else if (row.path) {
|
|
121
|
+
keys.push(`trans:${row.path}`);
|
|
122
|
+
}
|
|
123
|
+
if (row.version_of) {
|
|
124
|
+
keys.push(`ver:${row.version_of}`);
|
|
125
|
+
} else if (row.path) {
|
|
126
|
+
keys.push(`ver:${row.path}`);
|
|
127
|
+
}
|
|
128
|
+
return keys;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Build the WHERE clause needed to restrict documents by derived source_type.
|
|
133
|
+
* Returns { where: string|null, params: string[] }.
|
|
134
|
+
*
|
|
135
|
+
* source_type is derived from the path prefix (deriveSourceType), not stored
|
|
136
|
+
* as a column — so the clause is a combination of LIKE patterns. The "chunk"
|
|
137
|
+
* type is defined as "anything that isn't wiki or memory," so it becomes
|
|
138
|
+
* a negation.
|
|
139
|
+
*/
|
|
140
|
+
function buildSourceTypeClauses(types) {
|
|
141
|
+
if (!types || !types.length) return { where: null, params: [] };
|
|
142
|
+
const wanted = new Set(types.map(String));
|
|
143
|
+
const ors = [];
|
|
144
|
+
const params = [];
|
|
145
|
+
if (wanted.has("wiki")) {
|
|
146
|
+
ors.push("path LIKE ?");
|
|
147
|
+
params.push("wiki/%");
|
|
148
|
+
}
|
|
149
|
+
if (wanted.has("memory")) {
|
|
150
|
+
ors.push("path LIKE ?");
|
|
151
|
+
ors.push("path LIKE ?");
|
|
152
|
+
params.push("memory/%");
|
|
153
|
+
params.push("memories/%");
|
|
154
|
+
}
|
|
155
|
+
if (wanted.has("chunk")) {
|
|
156
|
+
// Anything that's NOT wiki/ memory/ memories/.
|
|
157
|
+
ors.push("(path NOT LIKE ? AND path NOT LIKE ? AND path NOT LIKE ?)");
|
|
158
|
+
params.push("wiki/%", "memory/%", "memories/%");
|
|
159
|
+
}
|
|
160
|
+
if (!ors.length) return { where: null, params: [] };
|
|
161
|
+
return { where: `(${ors.join(" OR ")})`, params };
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function normalizeFrontmatterList(value) {
|
|
165
|
+
if (value == null) return [];
|
|
166
|
+
if (Array.isArray(value)) return value.map(String).filter((s) => s.length > 0);
|
|
167
|
+
if (typeof value === "string" && value.length > 0) return [value];
|
|
168
|
+
return [];
|
|
169
|
+
}
|
|
170
|
+
|
|
28
171
|
function escapeFtsQuery(query) {
|
|
29
172
|
return query
|
|
30
173
|
.trim()
|
|
@@ -111,9 +254,21 @@ export class SqliteSearch {
|
|
|
111
254
|
frontmatter TEXT,
|
|
112
255
|
brain_id TEXT NOT NULL,
|
|
113
256
|
indexed_at INTEGER NOT NULL,
|
|
114
|
-
content_hash TEXT
|
|
257
|
+
content_hash TEXT,
|
|
258
|
+
canonical_for TEXT,
|
|
259
|
+
refs TEXT,
|
|
260
|
+
translation_of TEXT,
|
|
261
|
+
version_of TEXT
|
|
115
262
|
);
|
|
116
263
|
|
|
264
|
+
CREATE TABLE IF NOT EXISTS canonical_ownership (
|
|
265
|
+
canonical_id TEXT PRIMARY KEY,
|
|
266
|
+
doc_id TEXT NOT NULL,
|
|
267
|
+
path TEXT NOT NULL,
|
|
268
|
+
brain_id TEXT NOT NULL
|
|
269
|
+
);
|
|
270
|
+
CREATE INDEX IF NOT EXISTS idx_canonical_doc ON canonical_ownership(doc_id);
|
|
271
|
+
|
|
117
272
|
CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
|
|
118
273
|
id,
|
|
119
274
|
path,
|
|
@@ -214,6 +369,39 @@ export class SqliteSearch {
|
|
|
214
369
|
currentVersion = 3;
|
|
215
370
|
}
|
|
216
371
|
|
|
372
|
+
// Migration 4: add canonical_for + refs columns, canonical_ownership table
|
|
373
|
+
// (renamed from 'references' because it's a SQL reserved word)
|
|
374
|
+
if (currentVersion < 4) {
|
|
375
|
+
try { this.#db.prepare(`SELECT canonical_for FROM documents LIMIT 0`).get(); } catch {
|
|
376
|
+
this.#db.exec(`ALTER TABLE documents ADD COLUMN canonical_for TEXT`);
|
|
377
|
+
}
|
|
378
|
+
try { this.#db.prepare(`SELECT refs FROM documents LIMIT 0`).get(); } catch {
|
|
379
|
+
this.#db.exec(`ALTER TABLE documents ADD COLUMN refs TEXT`);
|
|
380
|
+
}
|
|
381
|
+
this.#db.exec(`
|
|
382
|
+
CREATE TABLE IF NOT EXISTS canonical_ownership (
|
|
383
|
+
canonical_id TEXT PRIMARY KEY,
|
|
384
|
+
doc_id TEXT NOT NULL,
|
|
385
|
+
path TEXT NOT NULL,
|
|
386
|
+
brain_id TEXT NOT NULL
|
|
387
|
+
);
|
|
388
|
+
CREATE INDEX IF NOT EXISTS idx_canonical_doc ON canonical_ownership(doc_id);
|
|
389
|
+
`);
|
|
390
|
+
currentVersion = 4;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// Migration 5: add translation_of + version_of columns for locale/version
|
|
394
|
+
// collapse in content-mode repos.
|
|
395
|
+
if (currentVersion < 5) {
|
|
396
|
+
try { this.#db.prepare(`SELECT translation_of FROM documents LIMIT 0`).get(); } catch {
|
|
397
|
+
this.#db.exec(`ALTER TABLE documents ADD COLUMN translation_of TEXT`);
|
|
398
|
+
}
|
|
399
|
+
try { this.#db.prepare(`SELECT version_of FROM documents LIMIT 0`).get(); } catch {
|
|
400
|
+
this.#db.exec(`ALTER TABLE documents ADD COLUMN version_of TEXT`);
|
|
401
|
+
}
|
|
402
|
+
currentVersion = 5;
|
|
403
|
+
}
|
|
404
|
+
|
|
217
405
|
// Persist the current version
|
|
218
406
|
this.#db.exec(`DELETE FROM _schema_version`);
|
|
219
407
|
this.#db.prepare(`INSERT INTO _schema_version (version) VALUES (?)`).run(currentVersion);
|
|
@@ -252,16 +440,36 @@ export class SqliteSearch {
|
|
|
252
440
|
const indexedAt = Date.now();
|
|
253
441
|
const contentHash = this.#extractFrontmatterField(frontmatter, "content_hash");
|
|
254
442
|
|
|
443
|
+
// Parse frontmatter once (safely) for structured fields like canonical_for/references.
|
|
444
|
+
// Falls back to empty data on parse errors — malformed frontmatter should not
|
|
445
|
+
// block ingest of the rest of the document.
|
|
446
|
+
let fmData = {};
|
|
447
|
+
if (frontmatter) {
|
|
448
|
+
try { fmData = parseFrontmatterBlock(frontmatter); } catch { fmData = {}; }
|
|
449
|
+
}
|
|
450
|
+
const canonicalFor = normalizeFrontmatterList(fmData.canonical_for);
|
|
451
|
+
const refs = normalizeFrontmatterList(fmData.references);
|
|
452
|
+
const canonicalForJson = canonicalFor.length ? JSON.stringify(canonicalFor) : null;
|
|
453
|
+
const refsJson = refs.length ? JSON.stringify(refs) : null;
|
|
454
|
+
const translationOf = typeof fmData.translation_of === "string" && fmData.translation_of.length
|
|
455
|
+
? fmData.translation_of : null;
|
|
456
|
+
const versionOf = typeof fmData.version_of === "string" && fmData.version_of.length
|
|
457
|
+
? fmData.version_of : null;
|
|
458
|
+
|
|
255
459
|
const upsertDoc = this.#db.prepare(`
|
|
256
|
-
INSERT INTO documents (id, path, content, frontmatter, brain_id, indexed_at, content_hash)
|
|
257
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
460
|
+
INSERT INTO documents (id, path, content, frontmatter, brain_id, indexed_at, content_hash, canonical_for, refs, translation_of, version_of)
|
|
461
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
258
462
|
ON CONFLICT(id) DO UPDATE SET
|
|
259
463
|
path = excluded.path,
|
|
260
464
|
content = excluded.content,
|
|
261
465
|
frontmatter = excluded.frontmatter,
|
|
262
466
|
brain_id = excluded.brain_id,
|
|
263
467
|
indexed_at = excluded.indexed_at,
|
|
264
|
-
content_hash = excluded.content_hash
|
|
468
|
+
content_hash = excluded.content_hash,
|
|
469
|
+
canonical_for = excluded.canonical_for,
|
|
470
|
+
refs = excluded.refs,
|
|
471
|
+
translation_of = excluded.translation_of,
|
|
472
|
+
version_of = excluded.version_of
|
|
265
473
|
`);
|
|
266
474
|
|
|
267
475
|
const deleteFts = this.#db.prepare(`DELETE FROM documents_fts WHERE id = ?`);
|
|
@@ -276,8 +484,17 @@ export class SqliteSearch {
|
|
|
276
484
|
VALUES (?, ?, ?, ?, ?, ?)
|
|
277
485
|
`);
|
|
278
486
|
|
|
487
|
+
// Canonical ownership is maintained first-claimant-wins. A doc may register
|
|
488
|
+
// several IDs in one call, but a second doc claiming an already-owned ID is
|
|
489
|
+
// ignored at the SQL layer (PRIMARY KEY conflict). Lint surfaces the dup.
|
|
490
|
+
const deleteOwnership = this.#db.prepare(`DELETE FROM canonical_ownership WHERE doc_id = ?`);
|
|
491
|
+
const insertOwnership = this.#db.prepare(`
|
|
492
|
+
INSERT OR IGNORE INTO canonical_ownership (canonical_id, doc_id, path, brain_id)
|
|
493
|
+
VALUES (?, ?, ?, ?)
|
|
494
|
+
`);
|
|
495
|
+
|
|
279
496
|
const run = this.#db.transaction(() => {
|
|
280
|
-
upsertDoc.run(id, path, content, frontmatter, brainId, indexedAt, contentHash);
|
|
497
|
+
upsertDoc.run(id, path, content, frontmatter, brainId, indexedAt, contentHash, canonicalForJson, refsJson, translationOf, versionOf);
|
|
281
498
|
deleteFts.run(id);
|
|
282
499
|
insertFts.run(id, path, content, brainId);
|
|
283
500
|
deleteLinks.run(id);
|
|
@@ -285,6 +502,10 @@ export class SqliteSearch {
|
|
|
285
502
|
for (const link of wikilinks) {
|
|
286
503
|
insertLink.run(id, brainId, link.path, link.brain, link.rel || null, link.raw);
|
|
287
504
|
}
|
|
505
|
+
deleteOwnership.run(id);
|
|
506
|
+
for (const canonicalId of canonicalFor) {
|
|
507
|
+
insertOwnership.run(canonicalId, id, path, brainId);
|
|
508
|
+
}
|
|
288
509
|
});
|
|
289
510
|
|
|
290
511
|
run();
|
|
@@ -295,6 +516,7 @@ export class SqliteSearch {
|
|
|
295
516
|
this.#db.prepare(`DELETE FROM documents WHERE id = ?`).run(id);
|
|
296
517
|
this.#db.prepare(`DELETE FROM documents_fts WHERE id = ?`).run(id);
|
|
297
518
|
this.#db.prepare(`DELETE FROM links WHERE source_id = ?`).run(id);
|
|
519
|
+
this.#db.prepare(`DELETE FROM canonical_ownership WHERE doc_id = ?`).run(id);
|
|
298
520
|
});
|
|
299
521
|
run();
|
|
300
522
|
}
|
|
@@ -304,6 +526,7 @@ export class SqliteSearch {
|
|
|
304
526
|
this.#db.exec(`DELETE FROM documents`);
|
|
305
527
|
this.#db.exec(`DELETE FROM documents_fts`);
|
|
306
528
|
this.#db.exec(`DELETE FROM links`);
|
|
529
|
+
this.#db.exec(`DELETE FROM canonical_ownership`);
|
|
307
530
|
for (const doc of docs) {
|
|
308
531
|
this.index(doc);
|
|
309
532
|
}
|
|
@@ -311,6 +534,91 @@ export class SqliteSearch {
|
|
|
311
534
|
run();
|
|
312
535
|
}
|
|
313
536
|
|
|
537
|
+
/**
|
|
538
|
+
* Fetch a document by id with canonical_for and refs parsed from JSON.
|
|
539
|
+
* Returns null when absent.
|
|
540
|
+
*/
|
|
541
|
+
getDocument(id) {
|
|
542
|
+
const row = this.#db
|
|
543
|
+
.prepare(`SELECT id, path, content, frontmatter, brain_id, indexed_at, content_hash, canonical_for, refs FROM documents WHERE id = ?`)
|
|
544
|
+
.get(id);
|
|
545
|
+
return this.#hydrateDocumentRow(row);
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
/**
|
|
549
|
+
* Fetch a document by its stored path. Convenience for callers (like the
|
|
550
|
+
* viewer) that only know the path — wiki pages, search results, etc.
|
|
551
|
+
*/
|
|
552
|
+
getDocumentByPath(path) {
|
|
553
|
+
const row = this.#db
|
|
554
|
+
.prepare(`SELECT id, path, content, frontmatter, brain_id, indexed_at, content_hash, canonical_for, refs FROM documents WHERE path = ? LIMIT 1`)
|
|
555
|
+
.get(path);
|
|
556
|
+
return this.#hydrateDocumentRow(row);
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
#hydrateDocumentRow(row) {
|
|
560
|
+
if (!row) return null;
|
|
561
|
+
return {
|
|
562
|
+
id: row.id,
|
|
563
|
+
path: row.path,
|
|
564
|
+
content: row.content,
|
|
565
|
+
frontmatter: row.frontmatter,
|
|
566
|
+
brain_id: row.brain_id,
|
|
567
|
+
indexed_at: row.indexed_at,
|
|
568
|
+
content_hash: row.content_hash,
|
|
569
|
+
canonical_for: row.canonical_for ? JSON.parse(row.canonical_for) : [],
|
|
570
|
+
references: row.refs ? JSON.parse(row.refs) : [],
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
/**
|
|
575
|
+
* List documents without a search query, most-recent-first. Supports
|
|
576
|
+
* source_type filtering so browse-mode in the viewer can mirror what
|
|
577
|
+
* search returns (minus the snippet and score).
|
|
578
|
+
*
|
|
579
|
+
* Params:
|
|
580
|
+
* source_types — array of "wiki" | "chunk" | "memory". Null/empty = all.
|
|
581
|
+
* limit, offset — standard paging.
|
|
582
|
+
*/
|
|
583
|
+
listDocuments({ source_types = null, limit = 50, offset = 0 } = {}) {
|
|
584
|
+
const clauses = [];
|
|
585
|
+
const typeClauses = buildSourceTypeClauses(source_types);
|
|
586
|
+
if (typeClauses.where) clauses.push(typeClauses.where);
|
|
587
|
+
const whereSql = clauses.length ? `WHERE ${clauses.join(" AND ")}` : "";
|
|
588
|
+
const rows = this.#db.prepare(`
|
|
589
|
+
SELECT id, path, content, brain_id, indexed_at, content_hash, canonical_for, refs
|
|
590
|
+
FROM documents
|
|
591
|
+
${whereSql}
|
|
592
|
+
ORDER BY indexed_at DESC
|
|
593
|
+
LIMIT ? OFFSET ?
|
|
594
|
+
`).all(...typeClauses.params, limit, offset);
|
|
595
|
+
const totalRow = this.#db.prepare(`
|
|
596
|
+
SELECT COUNT(*) AS cnt FROM documents ${whereSql}
|
|
597
|
+
`).get(...typeClauses.params);
|
|
598
|
+
return {
|
|
599
|
+
results: rows.map((row) => ({
|
|
600
|
+
id: row.id,
|
|
601
|
+
path: row.path,
|
|
602
|
+
brain_id: row.brain_id,
|
|
603
|
+
indexed_at: row.indexed_at,
|
|
604
|
+
content_hash: row.content_hash,
|
|
605
|
+
canonical_for: row.canonical_for ? JSON.parse(row.canonical_for) : [],
|
|
606
|
+
source_type: deriveSourceType(row.path),
|
|
607
|
+
body_excerpt: extractBodyExcerpt(row.content ?? ""),
|
|
608
|
+
})),
|
|
609
|
+
total: totalRow ? totalRow.cnt : 0,
|
|
610
|
+
showing: rows.length,
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
/** Return the path of the document canonical for a given ID, or null. */
|
|
615
|
+
canonicalOwner(canonicalId) {
|
|
616
|
+
const row = this.#db
|
|
617
|
+
.prepare(`SELECT doc_id, path, brain_id FROM canonical_ownership WHERE canonical_id = ?`)
|
|
618
|
+
.get(canonicalId);
|
|
619
|
+
return row ? { doc_id: row.doc_id, path: row.path, brain_id: row.brain_id } : null;
|
|
620
|
+
}
|
|
621
|
+
|
|
314
622
|
search({ query, limit = 10, offset = 0, since = null, session_id = null }) {
|
|
315
623
|
const escaped = escapeFtsQuery(query);
|
|
316
624
|
if (!escaped) return { results: [], total_matches: 0, showing: 0 };
|
|
@@ -328,6 +636,10 @@ export class SqliteSearch {
|
|
|
328
636
|
d.id,
|
|
329
637
|
d.path,
|
|
330
638
|
d.brain_id,
|
|
639
|
+
d.content_hash,
|
|
640
|
+
d.canonical_for,
|
|
641
|
+
d.translation_of,
|
|
642
|
+
d.version_of,
|
|
331
643
|
snippet(documents_fts, 2, '<b>', '</b>', '…', 32) AS snippet,
|
|
332
644
|
SUBSTR(d.content, 1, 1000) AS raw_content,
|
|
333
645
|
COALESCE(link_count.cnt, 0) AS backlink_count,
|
|
@@ -368,13 +680,24 @@ export class SqliteSearch {
|
|
|
368
680
|
}
|
|
369
681
|
rawRows.sort((a, b) => a.boosted_score - b.boosted_score);
|
|
370
682
|
|
|
371
|
-
|
|
683
|
+
// Collapse duplicates so identical content (same content_hash) and
|
|
684
|
+
// rival claimants of the same canonical_for ID don't inflate the hit
|
|
685
|
+
// list. Survivor of each collapse group is the best-scoring row;
|
|
686
|
+
// absorbed rows are surfaced on `also_found_in`.
|
|
687
|
+
const collapsed = collapseDuplicates(rawRows);
|
|
688
|
+
|
|
689
|
+
const rows = collapsed.slice(offset, offset + limit).map((row) => {
|
|
372
690
|
const body_excerpt = extractBodyExcerpt(row.raw_content ?? "");
|
|
373
691
|
const source_type = deriveSourceType(row.path);
|
|
692
|
+
const canonical_for = row.canonical_for ? JSON.parse(row.canonical_for) : [];
|
|
693
|
+
const also_found_in = row.also_found_in ?? [];
|
|
374
694
|
delete row.raw_content;
|
|
375
695
|
delete row.composite_score;
|
|
376
696
|
delete row.boosted_score;
|
|
377
|
-
|
|
697
|
+
delete row.canonical_for;
|
|
698
|
+
delete row.content_hash;
|
|
699
|
+
delete row.also_found_in;
|
|
700
|
+
return { ...row, source_type, body_excerpt, canonical_for, also_found_in };
|
|
378
701
|
});
|
|
379
702
|
|
|
380
703
|
const countRow = this.#db
|
|
@@ -409,10 +732,14 @@ export class SqliteSearch {
|
|
|
409
732
|
logAll();
|
|
410
733
|
}
|
|
411
734
|
|
|
735
|
+
// Count how many raw rows were absorbed into others (sum of also_found_in).
|
|
736
|
+
const collapsed_count = rows.reduce((n, r) => n + (r.also_found_in?.length ?? 0), 0);
|
|
737
|
+
|
|
412
738
|
return {
|
|
413
739
|
results: rows,
|
|
414
740
|
total_matches,
|
|
415
741
|
showing: rows.length,
|
|
742
|
+
collapsed: collapsed_count,
|
|
416
743
|
};
|
|
417
744
|
}
|
|
418
745
|
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLAUDE.md / AGENTS.md contributor-wiki pointer stamping.
|
|
3
|
+
*
|
|
4
|
+
* Given the current contents of a markdown agent-config file and the wiki
|
|
5
|
+
* root, return a new string with a canonical `## Contributor wiki` section
|
|
6
|
+
* that carries the machine-readable `Contributor wiki: <path>` line.
|
|
7
|
+
*
|
|
8
|
+
* Idempotent: stamping a file that already carries the correct pointer
|
|
9
|
+
* returns the original string unchanged.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const POINTER_LINE_RE = /^\s*Contributor wiki:\s*(\S+)\s*$/m;
|
|
13
|
+
const SECTION_HEADING = "## Contributor wiki";
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Ensure `content` carries a `## Contributor wiki` section with the given
|
|
17
|
+
* wiki_root. Returns { content, changed }.
|
|
18
|
+
*
|
|
19
|
+
* Behavior:
|
|
20
|
+
* - No existing section → append it at the top (after any leading H1).
|
|
21
|
+
* - Existing section with the correct path → no-op.
|
|
22
|
+
* - Existing section with a different path → replace the pointer line.
|
|
23
|
+
* - Pointer line present outside a section → rewrite in place.
|
|
24
|
+
*/
|
|
25
|
+
export function stampWikiPointer(content, wikiRoot) {
|
|
26
|
+
const normalized = normalizeWikiRoot(wikiRoot);
|
|
27
|
+
const existing = content.match(POINTER_LINE_RE);
|
|
28
|
+
|
|
29
|
+
// Case 1: Pointer already points at the right place. No-op.
|
|
30
|
+
if (existing && normalizeWikiRoot(existing[1]) === normalized) {
|
|
31
|
+
return { content, changed: false };
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Case 2: Pointer exists but path is stale. Rewrite just the line.
|
|
35
|
+
if (existing) {
|
|
36
|
+
const updated = content.replace(
|
|
37
|
+
POINTER_LINE_RE,
|
|
38
|
+
`Contributor wiki: ${normalized}`,
|
|
39
|
+
);
|
|
40
|
+
return { content: updated, changed: true };
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Case 3: No pointer at all. Insert a section — after the first H1, or at
|
|
44
|
+
// the top if there is none.
|
|
45
|
+
const section = buildSection(normalized);
|
|
46
|
+
const updated = insertAfterFirstH1(content, section);
|
|
47
|
+
return { content: updated, changed: true };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Build a new contributor-wiki section. Kept as a small, stable template.
|
|
52
|
+
*/
|
|
53
|
+
export function buildSection(wikiRoot) {
|
|
54
|
+
return [
|
|
55
|
+
SECTION_HEADING,
|
|
56
|
+
"",
|
|
57
|
+
`Contributor wiki: ${wikiRoot}`,
|
|
58
|
+
"",
|
|
59
|
+
"Invariants, contracts, and extension recipes live there. This pointer is",
|
|
60
|
+
"the machine-readable anchor agents grep for.",
|
|
61
|
+
].join("\n");
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// --- internals ---
|
|
65
|
+
|
|
66
|
+
function normalizeWikiRoot(p) {
|
|
67
|
+
// Keep a leading `./` — the discovery contract treats it as a valid,
|
|
68
|
+
// repo-relative marker. But collapse `./` + trailing slash trivia.
|
|
69
|
+
let s = (p ?? "").trim();
|
|
70
|
+
if (s.length === 0) return "./wiki";
|
|
71
|
+
s = s.replace(/\/+$/, "");
|
|
72
|
+
// Normalize separators for cross-platform consistency.
|
|
73
|
+
s = s.replace(/\\/g, "/");
|
|
74
|
+
return s;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function insertAfterFirstH1(content, section) {
|
|
78
|
+
const trimmed = content.trimEnd();
|
|
79
|
+
const lines = content.split("\n");
|
|
80
|
+
// Find the first `# ...` heading.
|
|
81
|
+
let insertLine = 0;
|
|
82
|
+
for (let i = 0; i < lines.length; i++) {
|
|
83
|
+
if (/^# [^#]/.test(lines[i])) {
|
|
84
|
+
// Skip until we hit a blank line after the heading.
|
|
85
|
+
let j = i + 1;
|
|
86
|
+
while (j < lines.length && lines[j].trim() !== "") j++;
|
|
87
|
+
insertLine = j + 1;
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
// If insertLine is 0, the file has no H1 — prepend.
|
|
92
|
+
const before = lines.slice(0, insertLine).join("\n");
|
|
93
|
+
const after = lines.slice(insertLine).join("\n");
|
|
94
|
+
const needsGap = before.length > 0 && !before.endsWith("\n\n");
|
|
95
|
+
const result =
|
|
96
|
+
(insertLine === 0 ? "" : before) +
|
|
97
|
+
(insertLine === 0 ? "" : (needsGap ? "\n" : "")) +
|
|
98
|
+
section +
|
|
99
|
+
"\n" +
|
|
100
|
+
(after.length > 0 ? "\n" + after : "") +
|
|
101
|
+
(trimmed !== content.trimEnd() ? "\n" : "");
|
|
102
|
+
return result.endsWith("\n") ? result : result + "\n";
|
|
103
|
+
}
|