@isaacriehm/cairn-core 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -1
- package/dist/align-undo/log.d.ts +2 -2
- package/dist/align-undo/log.js +80 -17
- package/dist/align-undo/log.js.map +1 -1
- package/dist/align-undo/undo.d.ts +10 -11
- package/dist/align-undo/undo.js +233 -23
- package/dist/align-undo/undo.js.map +1 -1
- package/dist/attention/bulk-accept.js +65 -37
- package/dist/attention/bulk-accept.js.map +1 -1
- package/dist/attention/serve/api.js +10 -18
- package/dist/attention/serve/api.js.map +1 -1
- package/dist/attention/serve/index.js +3 -3
- package/dist/attention/serve/index.js.map +1 -1
- package/dist/drain/drain.js +4 -4
- package/dist/drain/drain.js.map +1 -1
- package/dist/fix-align/index.d.ts +2 -0
- package/dist/fix-align/index.js +1 -0
- package/dist/fix-align/index.js.map +1 -1
- package/dist/fix-align/sentinel.d.ts +59 -0
- package/dist/fix-align/sentinel.js +149 -0
- package/dist/fix-align/sentinel.js.map +1 -0
- package/dist/fs.d.ts +5 -0
- package/dist/fs.js +11 -0
- package/dist/fs.js.map +1 -0
- package/dist/gc/apply.js +4 -4
- package/dist/gc/apply.js.map +1 -1
- package/dist/ground/anchor-map.js +3 -4
- package/dist/ground/anchor-map.js.map +1 -1
- package/dist/ground/file-candidates-map.d.ts +23 -0
- package/dist/ground/file-candidates-map.js +76 -0
- package/dist/ground/file-candidates-map.js.map +1 -0
- package/dist/ground/frontmatter.d.ts +12 -0
- package/dist/ground/frontmatter.js +28 -0
- package/dist/ground/frontmatter.js.map +1 -1
- package/dist/ground/index.d.ts +6 -4
- package/dist/ground/index.js +6 -4
- package/dist/ground/index.js.map +1 -1
- package/dist/ground/paths.d.ts +2 -0
- package/dist/ground/paths.js +6 -0
- package/dist/ground/paths.js.map +1 -1
- package/dist/ground/rejected.d.ts +42 -0
- package/dist/ground/rejected.js +100 -0
- package/dist/ground/rejected.js.map +1 -0
- package/dist/ground/schemas.d.ts +88 -8
- package/dist/ground/schemas.js +73 -19
- package/dist/ground/schemas.js.map +1 -1
- package/dist/ground/scope-index.js +4 -4
- package/dist/ground/scope-index.js.map +1 -1
- package/dist/ground/sot-bindings.js +3 -4
- package/dist/ground/sot-bindings.js.map +1 -1
- package/dist/ground/sot-cache.js +3 -4
- package/dist/ground/sot-cache.js.map +1 -1
- package/dist/ground/topic-index.d.ts +7 -0
- package/dist/ground/topic-index.js +26 -4
- package/dist/ground/topic-index.js.map +1 -1
- package/dist/hooks/post-tool-use/index.d.ts +1 -1
- package/dist/hooks/post-tool-use/index.js +1 -1
- package/dist/hooks/post-tool-use/index.js.map +1 -1
- package/dist/hooks/post-tool-use/ledger-cache.d.ts +13 -0
- package/dist/hooks/post-tool-use/ledger-cache.js +48 -0
- package/dist/hooks/post-tool-use/ledger-cache.js.map +1 -1
- package/dist/hooks/post-tool-use/legend-builder.d.ts +10 -1
- package/dist/hooks/post-tool-use/legend-builder.js +27 -2
- package/dist/hooks/post-tool-use/legend-builder.js.map +1 -1
- package/dist/hooks/post-tool-use/read-enricher.js +8 -2
- package/dist/hooks/post-tool-use/read-enricher.js.map +1 -1
- package/dist/hooks/post-tool-use/sot-align.js +11 -16
- package/dist/hooks/post-tool-use/sot-align.js.map +1 -1
- package/dist/hooks/pre-commit/sot-align-precommit.js +3 -3
- package/dist/hooks/pre-commit/sot-align-precommit.js.map +1 -1
- package/dist/init/index.d.ts +2 -2
- package/dist/init/index.js +1 -1
- package/dist/init/index.js.map +1 -1
- package/dist/init/ingest-docs.d.ts +82 -22
- package/dist/init/ingest-docs.js +632 -108
- package/dist/init/ingest-docs.js.map +1 -1
- package/dist/init/init.d.ts +10 -1
- package/dist/init/init.js +113 -251
- package/dist/init/init.js.map +1 -1
- package/dist/init/mapper-parallel.js +8 -0
- package/dist/init/mapper-parallel.js.map +1 -1
- package/dist/init/mapper.js +6 -6
- package/dist/init/mapper.js.map +1 -1
- package/dist/init/phases/6-docs-ingest.d.ts +9 -4
- package/dist/init/phases/6-docs-ingest.js +13 -10
- package/dist/init/phases/6-docs-ingest.js.map +1 -1
- package/dist/init/phases/parallel-678.js +10 -4
- package/dist/init/phases/parallel-678.js.map +1 -1
- package/dist/init/sot-emit.d.ts +22 -0
- package/dist/init/sot-emit.js +51 -9
- package/dist/init/sot-emit.js.map +1 -1
- package/dist/init/source-comments/ingest.js +107 -7
- package/dist/init/source-comments/ingest.js.map +1 -1
- package/dist/init/topic-index/index.d.ts +14 -0
- package/dist/init/topic-index/index.js +83 -4
- package/dist/init/topic-index/index.js.map +1 -1
- package/dist/init/topic-index/judge.js +14 -1
- package/dist/init/topic-index/judge.js.map +1 -1
- package/dist/init/topic-index/resolve.d.ts +19 -0
- package/dist/init/topic-index/resolve.js +100 -14
- package/dist/init/topic-index/resolve.js.map +1 -1
- package/dist/init/topic-index/walk.d.ts +32 -0
- package/dist/init/topic-index/walk.js +70 -4
- package/dist/init/topic-index/walk.js.map +1 -1
- package/dist/mcp/history/summarizer.js +5 -0
- package/dist/mcp/history/summarizer.js.map +1 -1
- package/dist/mcp/schemas.d.ts +48 -0
- package/dist/mcp/schemas.js +43 -0
- package/dist/mcp/schemas.js.map +1 -1
- package/dist/mcp/tools/index.d.ts +1 -1
- package/dist/mcp/tools/index.js +8 -0
- package/dist/mcp/tools/index.js.map +1 -1
- package/dist/mcp/tools/init-phases.js.map +1 -1
- package/dist/mcp/tools/propose-decision.d.ts +34 -0
- package/dist/mcp/tools/propose-decision.js +200 -0
- package/dist/mcp/tools/propose-decision.js.map +1 -0
- package/dist/mcp/tools/record-decision.js +5 -2
- package/dist/mcp/tools/record-decision.js.map +1 -1
- package/dist/mcp/tools/reject-candidate.d.ts +24 -0
- package/dist/mcp/tools/reject-candidate.js +71 -0
- package/dist/mcp/tools/reject-candidate.js.map +1 -0
- package/dist/mcp/tools/resolve-attention.js +7 -54
- package/dist/mcp/tools/resolve-attention.js.map +1 -1
- package/dist/mcp/tools/search-candidates.d.ts +20 -0
- package/dist/mcp/tools/search-candidates.js +93 -0
- package/dist/mcp/tools/search-candidates.js.map +1 -0
- package/package.json +1 -1
- package/templates/attention-ui/app.js +40 -3
package/dist/init/ingest-docs.js
CHANGED
|
@@ -1,28 +1,78 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Phase 6 — docs ingestion (
|
|
2
|
+
* Phase 6 — staged docs ingestion (PHASE_6_REDESIGN §4.1).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* draft inbox, no LLM paraphrase — the doc paragraph itself IS the
|
|
8
|
-
* canonical body, recorded with `sot_kind: path` so the lens renders
|
|
9
|
-
* the live source on every read.
|
|
4
|
+
* Replaces the v0.6 bulk-classifier path. Cuts wall from ~15 min →
|
|
5
|
+
* ~75 s on gcb-platform-scale repos AND collapses the noisy ledger
|
|
6
|
+
* (7000 DECs) to a curated draft inbox (30-80 drafts).
|
|
10
7
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
8
|
+
* Pipeline:
|
|
9
|
+
*
|
|
10
|
+
* Stage 3 (deterministic, 0 Haiku) — marker scan
|
|
11
|
+
* Topic-index entries with `marker_kind` in {"decision","rule"} go
|
|
12
|
+
* straight to emit. The walker stamped them at parse time when it
|
|
13
|
+
* saw frontmatter `cairn.kind` or `<!-- cairn:decision -->` /
|
|
14
|
+
* `<!-- cairn:rule -->` within 3 lines of the heading.
|
|
15
|
+
*
|
|
16
|
+
* Stage 1 — file-purpose binary filter (batch=30, concurrency=5)
|
|
17
|
+
* Per file: filepath + frontmatter + first 800 chars + every
|
|
18
|
+
* H1/H2/H3 line (capped at 100). Locked rigid prompt: a file is
|
|
19
|
+
* authoritative ONLY if it's a canonical rulebook, formal ADR,
|
|
20
|
+
* or list of binding domain invariants. Plans / scratchpads /
|
|
21
|
+
* UAT logs / API docs are NOT authoritative even if they
|
|
22
|
+
* contain proposed or historical decisions.
|
|
23
|
+
*
|
|
24
|
+
* Stage 2 — section-level batch classifier (batch=30, concurrency=5)
|
|
25
|
+
* Same shape as the v0.6 classifier, but scoped to sections
|
|
26
|
+
* belonging to Stage-1-authoritative files AND not already
|
|
27
|
+
* handled by a marker. This is where Haiku still adds signal —
|
|
28
|
+
* the file passed the rigid filter; now decide WHICH sections
|
|
29
|
+
* of it are decisions vs context.
|
|
30
|
+
*
|
|
31
|
+
* Stage 4 — emit
|
|
32
|
+
* Stage 2 + Stage 3 outputs → `.cairn/ground/decisions/_inbox/<id>.draft.md`.
|
|
33
|
+
* `status: draft`, `capture_source: init-docs-ingest`,
|
|
34
|
+
* `decided_by: cairn-init`. Body is verbatim via
|
|
35
|
+
* `readSotBody` — no Haiku paraphrasing. Operator triages via
|
|
36
|
+
* the existing `cairn-attention` skill.
|
|
37
|
+
*
|
|
38
|
+
* Skipped entries (everything else) stay in the topic-index as
|
|
39
|
+
* unpromoted candidates. The PR 2 `cairn_search_candidates` /
|
|
40
|
+
* `cairn_propose_decision` MCP tools surface them to AI agents as
|
|
41
|
+
* the project lives.
|
|
16
42
|
*/
|
|
17
|
-
import { existsSync, readdirSync, statSync, } from "node:fs";
|
|
43
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync, } from "node:fs";
|
|
18
44
|
import { join, relative } from "node:path";
|
|
45
|
+
import { stringify as stringifyYaml } from "yaml";
|
|
19
46
|
import { runClaude } from "../claude/index.js";
|
|
20
|
-
import { readAnchorMap, readTopicIndex,
|
|
47
|
+
import { bodyContentHash, decisionsDir, deriveDecId, readAnchorMap, readRejectedYaml, readTopicIndex, setTopic, writeFileCandidatesMap, writeTopicIndex, } from "../ground/index.js";
|
|
21
48
|
import { logger } from "../logger.js";
|
|
22
|
-
import {
|
|
49
|
+
import { firstLineFallback, readSotBody } from "./sot-emit.js";
|
|
23
50
|
const log = logger("init.ingest-docs");
|
|
24
|
-
|
|
25
|
-
|
|
51
|
+
/* -------------------------------------------------------------------------- */
|
|
52
|
+
/* Tunables — locked in §3 of PHASE_6_REDESIGN */
|
|
53
|
+
/* -------------------------------------------------------------------------- */
|
|
54
|
+
/** N files per Stage-1 Haiku call. */
|
|
55
|
+
const FILE_FILTER_BATCH_SIZE = 30;
|
|
56
|
+
/** Concurrent Stage-1 batches. */
|
|
57
|
+
const FILE_FILTER_CONCURRENCY = 5;
|
|
58
|
+
/** Stage 1 per-file context — first chars of body, frontmatter stripped. */
|
|
59
|
+
const FILE_FILTER_INTRO_CHARS = 800;
|
|
60
|
+
/** Stage 1 max ToC lines (H1/H2/H3 only). */
|
|
61
|
+
const FILE_FILTER_TOC_MAX_LINES = 100;
|
|
62
|
+
/** Stage 1 wall budget per Haiku call. */
|
|
63
|
+
const FILE_FILTER_TIMEOUT_MS = 60_000;
|
|
64
|
+
/** N sections per Stage-2 Haiku call. */
|
|
65
|
+
const SECTION_BATCH_SIZE = 30;
|
|
66
|
+
/** Concurrent Stage-2 batches. */
|
|
67
|
+
const SECTION_CONCURRENCY = 5;
|
|
68
|
+
/** Stage 2 per-section body cap (chars) before truncation marker. */
|
|
69
|
+
const SECTION_BODY_CAP = 2_000;
|
|
70
|
+
/** Stage 2 wall budget per Haiku call. */
|
|
71
|
+
const SECTION_TIMEOUT_MS = 120_000;
|
|
72
|
+
/** Capture source stamped on every Stage 2/3 emit. */
|
|
73
|
+
const CAPTURE_SOURCE = "init-docs-ingest";
|
|
74
|
+
/** Decided-by stamp on every Stage 2/3 emit. */
|
|
75
|
+
const DECIDED_BY = "cairn-init";
|
|
26
76
|
/** Subdirs we never descend into when discovering candidate doc files. */
|
|
27
77
|
const SKIP_DIRS = new Set([
|
|
28
78
|
".cairn",
|
|
@@ -73,7 +123,11 @@ function walkDocsDir(dir, repoRoot, out) {
|
|
|
73
123
|
catch {
|
|
74
124
|
continue;
|
|
75
125
|
}
|
|
76
|
-
out.push({
|
|
126
|
+
out.push({
|
|
127
|
+
path: relative(repoRoot, abs),
|
|
128
|
+
size: st.size,
|
|
129
|
+
group: dirGroup(relative(repoRoot, abs)),
|
|
130
|
+
});
|
|
77
131
|
}
|
|
78
132
|
}
|
|
79
133
|
function dirGroup(rel) {
|
|
@@ -83,140 +137,610 @@ function dirGroup(rel) {
|
|
|
83
137
|
return `${parts[0]}/`;
|
|
84
138
|
}
|
|
85
139
|
/* -------------------------------------------------------------------------- */
|
|
86
|
-
/*
|
|
140
|
+
/* Stage 1 — file-purpose binary filter */
|
|
141
|
+
/* */
|
|
142
|
+
/* Locked rigid prompt — DO NOT paraphrase. A file is authoritative ONLY */
|
|
143
|
+
/* if it's a canonical rulebook, a formal ADR, or a list of active binding */
|
|
144
|
+
/* domain invariants. Plans / scratchpads / UAT logs / API docs are NOT */
|
|
145
|
+
/* authoritative even if they contain proposed or historical decisions. */
|
|
87
146
|
/* -------------------------------------------------------------------------- */
|
|
88
|
-
const
|
|
147
|
+
const FILE_FILTER_SCHEMA = {
|
|
89
148
|
type: "object",
|
|
90
149
|
additionalProperties: false,
|
|
150
|
+
required: ["files"],
|
|
91
151
|
properties: {
|
|
92
|
-
|
|
93
|
-
type: "
|
|
94
|
-
|
|
152
|
+
files: {
|
|
153
|
+
type: "array",
|
|
154
|
+
items: {
|
|
155
|
+
type: "object",
|
|
156
|
+
additionalProperties: false,
|
|
157
|
+
required: ["path", "is_authoritative", "reason"],
|
|
158
|
+
properties: {
|
|
159
|
+
path: { type: "string" },
|
|
160
|
+
is_authoritative: { type: "boolean" },
|
|
161
|
+
reason: { type: "string" },
|
|
162
|
+
},
|
|
163
|
+
},
|
|
95
164
|
},
|
|
96
|
-
proposedTitle: { type: "string" },
|
|
97
165
|
},
|
|
98
|
-
required: ["kind", "proposedTitle"],
|
|
99
166
|
};
|
|
100
|
-
const
|
|
167
|
+
const FILE_FILTER_SYSTEM = `You are a rigid filter for an architecture ledger. A file is authoritative ONLY if it is a canonical rulebook, a formal Architecture Decision Record (ADR), or a list of active, binding domain invariants.
|
|
101
168
|
|
|
102
|
-
|
|
169
|
+
If a file is a project plan, research scratchpad, UAT log, status update, or API documentation, it is NOT authoritative, even if it contains proposed or historical decisions.
|
|
170
|
+
|
|
171
|
+
Evaluate the provided filepath, frontmatter, intro, and Table of Contents. Return JSON:
|
|
172
|
+
{ "files": [ { "path": "<filepath>", "is_authoritative": <bool>, "reason": "10 words max" }, ... ] }
|
|
173
|
+
|
|
174
|
+
EXACTLY one entry per input filepath. Do NOT omit. Do NOT invent paths.`;
|
|
175
|
+
function buildFileFilterInputs(repoRoot, files) {
|
|
176
|
+
const out = [];
|
|
177
|
+
for (const rel of files) {
|
|
178
|
+
const abs = join(repoRoot, rel);
|
|
179
|
+
if (!existsSync(abs))
|
|
180
|
+
continue;
|
|
181
|
+
let raw;
|
|
182
|
+
try {
|
|
183
|
+
raw = readFileSync(abs, "utf8");
|
|
184
|
+
}
|
|
185
|
+
catch {
|
|
186
|
+
continue;
|
|
187
|
+
}
|
|
188
|
+
const { frontmatter, body } = splitFrontmatter(raw);
|
|
189
|
+
const introChars = body.slice(0, FILE_FILTER_INTRO_CHARS);
|
|
190
|
+
const toc = extractToc(body);
|
|
191
|
+
out.push({ path: rel, frontmatter, introChars, toc });
|
|
192
|
+
}
|
|
193
|
+
return out;
|
|
194
|
+
}
|
|
195
|
+
function splitFrontmatter(raw) {
|
|
196
|
+
const m = raw.match(/^---\n([\s\S]*?)\n---\n?/);
|
|
197
|
+
if (m === null)
|
|
198
|
+
return { frontmatter: null, body: raw };
|
|
199
|
+
const fm = m[1] ?? "";
|
|
200
|
+
return { frontmatter: fm, body: raw.slice(m[0].length) };
|
|
201
|
+
}
|
|
202
|
+
function extractToc(body) {
|
|
203
|
+
const lines = body.split("\n");
|
|
204
|
+
const toc = [];
|
|
205
|
+
for (const line of lines) {
|
|
206
|
+
if (/^#{1,3}\s+/.test(line)) {
|
|
207
|
+
toc.push(line.trim());
|
|
208
|
+
if (toc.length >= FILE_FILTER_TOC_MAX_LINES)
|
|
209
|
+
break;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
return toc.join("\n");
|
|
213
|
+
}
|
|
214
|
+
async function classifyFileBatch(inputs) {
|
|
215
|
+
if (inputs.length === 0)
|
|
216
|
+
return new Map();
|
|
217
|
+
const blocks = inputs
|
|
218
|
+
.map((it) => {
|
|
219
|
+
const fmBlock = it.frontmatter !== null
|
|
220
|
+
? `frontmatter:\n${it.frontmatter}\n`
|
|
221
|
+
: `frontmatter: (none)\n`;
|
|
222
|
+
const tocBlock = it.toc.length > 0 ? `toc:\n${it.toc}\n` : `toc: (none)\n`;
|
|
223
|
+
const intro = it.introChars.length > 0
|
|
224
|
+
? `intro:\n${it.introChars}`
|
|
225
|
+
: `intro: (empty)`;
|
|
226
|
+
return `=== path: ${it.path}\n${fmBlock}${tocBlock}${intro}`;
|
|
227
|
+
})
|
|
228
|
+
.join("\n\n");
|
|
229
|
+
const prompt = `Classify each file. Return one entry per path.\n\n${blocks}`;
|
|
230
|
+
const result = await runClaude({
|
|
231
|
+
tier: "haiku",
|
|
232
|
+
system: FILE_FILTER_SYSTEM,
|
|
233
|
+
prompt,
|
|
234
|
+
jsonSchema: FILE_FILTER_SCHEMA,
|
|
235
|
+
timeoutMs: FILE_FILTER_TIMEOUT_MS,
|
|
236
|
+
isolateAmbientContext: true,
|
|
237
|
+
});
|
|
238
|
+
const parsed = result.parsed;
|
|
239
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
240
|
+
throw new Error("haiku file-filter returned non-object");
|
|
241
|
+
}
|
|
242
|
+
const arr = parsed["files"];
|
|
243
|
+
if (!Array.isArray(arr)) {
|
|
244
|
+
throw new Error("haiku file-filter missing `files` array");
|
|
245
|
+
}
|
|
246
|
+
const out = new Map();
|
|
247
|
+
for (const raw of arr) {
|
|
248
|
+
if (typeof raw !== "object" || raw === null)
|
|
249
|
+
continue;
|
|
250
|
+
const e = raw;
|
|
251
|
+
const path = e["path"];
|
|
252
|
+
const flag = e["is_authoritative"];
|
|
253
|
+
const reason = e["reason"];
|
|
254
|
+
if (typeof path !== "string")
|
|
255
|
+
continue;
|
|
256
|
+
if (typeof flag !== "boolean")
|
|
257
|
+
continue;
|
|
258
|
+
out.set(path, {
|
|
259
|
+
is_authoritative: flag,
|
|
260
|
+
reason: typeof reason === "string" ? reason : "",
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
return out;
|
|
264
|
+
}
|
|
265
|
+
/* -------------------------------------------------------------------------- */
|
|
266
|
+
/* Stage 2 — section batch classifier (kind + proposedTitle) */
|
|
267
|
+
/* -------------------------------------------------------------------------- */
|
|
268
|
+
const SECTION_SCHEMA = {
|
|
269
|
+
type: "object",
|
|
270
|
+
additionalProperties: false,
|
|
271
|
+
required: ["classifications"],
|
|
272
|
+
properties: {
|
|
273
|
+
classifications: {
|
|
274
|
+
type: "array",
|
|
275
|
+
items: {
|
|
276
|
+
type: "object",
|
|
277
|
+
additionalProperties: false,
|
|
278
|
+
required: ["slug", "kind", "proposedTitle"],
|
|
279
|
+
properties: {
|
|
280
|
+
slug: { type: "string" },
|
|
281
|
+
kind: {
|
|
282
|
+
type: "string",
|
|
283
|
+
enum: ["decision", "domain-rule", "voice-guidelines", "api-docs", "other"],
|
|
284
|
+
},
|
|
285
|
+
proposedTitle: { type: "string" },
|
|
286
|
+
},
|
|
287
|
+
},
|
|
288
|
+
},
|
|
289
|
+
},
|
|
290
|
+
};
|
|
291
|
+
const SECTION_SYSTEM = `You classify N sections from authoritative project documentation for Cairn's Single-Source-of-Truth ledger.
|
|
292
|
+
|
|
293
|
+
These sections come from files already filtered as canonical rulebooks, ADRs, or binding invariant lists. Decide which sections are themselves binding decisions / rules vs supporting context.
|
|
294
|
+
|
|
295
|
+
Return JSON: { "classifications": [ { "slug": "...", "kind": "...", "proposedTitle": "..." }, ... ] }
|
|
296
|
+
|
|
297
|
+
EXACTLY one classification per input section, keyed by its slug. Do NOT omit. Do NOT invent slugs. If unsure, kind="other".
|
|
103
298
|
|
|
104
299
|
\`kind\` choices:
|
|
105
|
-
- "decision"
|
|
106
|
-
- "domain-rule"
|
|
107
|
-
- "voice-guidelines"
|
|
108
|
-
- "api-docs"
|
|
300
|
+
- "decision" binding decision or architectural choice
|
|
301
|
+
- "domain-rule" domain rule or constraint developers must obey
|
|
302
|
+
- "voice-guidelines" brand voice / tone guidance
|
|
303
|
+
- "api-docs" API surface / schema documentation (descriptive)
|
|
109
304
|
- "other" nothing actionable for the cairn state layer
|
|
110
305
|
|
|
111
|
-
\`proposedTitle\` 5-10 words, imperative voice
|
|
306
|
+
\`proposedTitle\` 5-10 words, imperative voice. Empty string for "other".
|
|
112
307
|
|
|
113
|
-
Be conservative — false-positive decisions pollute the ground state worse
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
const
|
|
308
|
+
Be conservative — false-positive decisions pollute the ground state worse than missed capture. Default to "other" when uncertain.`;
|
|
309
|
+
async function classifySectionBatch(items) {
|
|
310
|
+
if (items.length === 0)
|
|
311
|
+
return new Map();
|
|
312
|
+
const sections = items
|
|
313
|
+
.map((it, i) => {
|
|
314
|
+
const capped = it.body.length > SECTION_BODY_CAP
|
|
315
|
+
? `${it.body.slice(0, SECTION_BODY_CAP)}\n…[truncated]`
|
|
316
|
+
: it.body;
|
|
317
|
+
return `[${i + 1}] slug=${it.slug} source=${it.sot_source}\n${capped}`;
|
|
318
|
+
})
|
|
319
|
+
.join("\n\n---\n\n");
|
|
320
|
+
const prompt = `Classify each section. Return one entry per slug.\n\n${sections}`;
|
|
118
321
|
const result = await runClaude({
|
|
119
322
|
tier: "haiku",
|
|
120
|
-
system:
|
|
323
|
+
system: SECTION_SYSTEM,
|
|
121
324
|
prompt,
|
|
122
|
-
jsonSchema:
|
|
123
|
-
timeoutMs:
|
|
325
|
+
jsonSchema: SECTION_SCHEMA,
|
|
326
|
+
timeoutMs: SECTION_TIMEOUT_MS,
|
|
124
327
|
isolateAmbientContext: true,
|
|
125
328
|
});
|
|
126
329
|
const parsed = result.parsed;
|
|
127
330
|
if (typeof parsed !== "object" || parsed === null) {
|
|
128
|
-
throw new Error("haiku returned non-object
|
|
331
|
+
throw new Error("haiku section batch returned non-object");
|
|
129
332
|
}
|
|
130
|
-
const
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
kind !== "domain-rule" &&
|
|
134
|
-
kind !== "voice-guidelines" &&
|
|
135
|
-
kind !== "api-docs" &&
|
|
136
|
-
kind !== "other") {
|
|
137
|
-
throw new Error(`haiku returned unexpected kind: ${String(kind)}`);
|
|
333
|
+
const arr = parsed["classifications"];
|
|
334
|
+
if (!Array.isArray(arr)) {
|
|
335
|
+
throw new Error("haiku section batch missing `classifications`");
|
|
138
336
|
}
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
337
|
+
const out = new Map();
|
|
338
|
+
for (const raw of arr) {
|
|
339
|
+
if (typeof raw !== "object" || raw === null)
|
|
340
|
+
continue;
|
|
341
|
+
const e = raw;
|
|
342
|
+
const slug = e["slug"];
|
|
343
|
+
const kind = e["kind"];
|
|
344
|
+
if (typeof slug !== "string")
|
|
345
|
+
continue;
|
|
346
|
+
if (kind !== "decision" &&
|
|
347
|
+
kind !== "domain-rule" &&
|
|
348
|
+
kind !== "voice-guidelines" &&
|
|
349
|
+
kind !== "api-docs" &&
|
|
350
|
+
kind !== "other") {
|
|
351
|
+
continue;
|
|
352
|
+
}
|
|
353
|
+
out.set(slug, {
|
|
354
|
+
kind,
|
|
355
|
+
proposedTitle: typeof e["proposedTitle"] === "string" ? e["proposedTitle"] : "",
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
return out;
|
|
143
359
|
}
|
|
144
|
-
/* -------------------------------------------------------------------------- */
|
|
145
|
-
/* Orchestrator */
|
|
146
|
-
/* -------------------------------------------------------------------------- */
|
|
147
360
|
export async function runDocsIngestion(args) {
|
|
148
361
|
const topicIndex = readTopicIndex(args.repoRoot);
|
|
149
362
|
const anchorMap = readAnchorMap(args.repoRoot);
|
|
150
|
-
const
|
|
151
|
-
|
|
363
|
+
const rejected = readRejectedYaml(args.repoRoot);
|
|
364
|
+
const allCandidates = Object.values(topicIndex.topics).filter((entry) => isDocSoT(entry) && entry.dec_id === undefined && !rejected.has(entry.slug));
|
|
365
|
+
if (allCandidates.length === 0) {
|
|
152
366
|
log.info("phase 6 found no eligible docs entries in topic-index");
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
367
|
+
writeFileCandidatesMap(args.repoRoot, topicIndex);
|
|
368
|
+
return zeroResult(allCandidates.length, topicIndex);
|
|
369
|
+
}
|
|
370
|
+
// Read each candidate body once. Stage 3 needs the body for title
|
|
371
|
+
// derivation; Stages 1/2 don't, but reading up front keeps the
|
|
372
|
+
// pipeline single-pass over entries. Bodies that fail to read are
|
|
373
|
+
// dropped — anchor-map drift is the only realistic cause and the
|
|
374
|
+
// entry stays as a candidate for the next phase 5b refresh.
|
|
375
|
+
const ctxBySlug = new Map();
|
|
376
|
+
for (const entry of allCandidates) {
|
|
377
|
+
const body = readSotBody(args.repoRoot, entry, anchorMap);
|
|
378
|
+
if (body === null)
|
|
379
|
+
continue;
|
|
380
|
+
ctxBySlug.set(entry.slug, { entry, body });
|
|
381
|
+
}
|
|
382
|
+
// ── Stage 3 — marker scan (deterministic, 0 Haiku) ──
|
|
383
|
+
const markerCandidates = [];
|
|
384
|
+
const nonMarkerCandidates = [];
|
|
385
|
+
for (const ctx of ctxBySlug.values()) {
|
|
386
|
+
if (ctx.entry.marker_kind !== undefined)
|
|
387
|
+
markerCandidates.push(ctx);
|
|
388
|
+
else
|
|
389
|
+
nonMarkerCandidates.push(ctx);
|
|
390
|
+
}
|
|
391
|
+
// ── Mock path — bypass Stages 1+2; run mockClassify on every
|
|
392
|
+
// non-marker candidate. Smokes only.
|
|
393
|
+
let sectionEmits = [];
|
|
394
|
+
let authoritativeFileCount = 0;
|
|
395
|
+
let filesEvaluated = 0;
|
|
396
|
+
if (args.mockClassify !== undefined) {
|
|
397
|
+
for (const ctx of nonMarkerCandidates) {
|
|
162
398
|
let cls;
|
|
163
399
|
try {
|
|
164
|
-
cls = args.mockClassify
|
|
165
|
-
? args.mockClassify(entry, body)
|
|
166
|
-
: await classifyEntry(entry, body);
|
|
400
|
+
cls = args.mockClassify(ctx.entry, ctx.body);
|
|
167
401
|
}
|
|
168
402
|
catch (err) {
|
|
169
|
-
log.warn({ slug: entry.slug, err: err instanceof Error ? err.message : String(err) }, "
|
|
170
|
-
|
|
171
|
-
}
|
|
172
|
-
processed += 1;
|
|
173
|
-
if (args.onEntryProgress !== undefined) {
|
|
174
|
-
args.onEntryProgress({
|
|
175
|
-
slug: entry.slug,
|
|
176
|
-
emitted: cls.kind === "decision" || cls.kind === "domain-rule",
|
|
177
|
-
total: candidateEntries.length,
|
|
178
|
-
});
|
|
403
|
+
log.warn({ slug: ctx.entry.slug, err: err instanceof Error ? err.message : String(err) }, "mockClassify failed; skipping");
|
|
404
|
+
continue;
|
|
179
405
|
}
|
|
180
406
|
if (cls.kind === "decision" || cls.kind === "domain-rule") {
|
|
181
|
-
|
|
407
|
+
sectionEmits.push({ ctx, cls });
|
|
182
408
|
}
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
409
|
+
}
|
|
410
|
+
if (args.onChunkProgress !== undefined) {
|
|
411
|
+
args.onChunkProgress({
|
|
412
|
+
chunksDone: 1,
|
|
413
|
+
totalChunks: 1,
|
|
414
|
+
entriesDone: nonMarkerCandidates.length,
|
|
415
|
+
totalEntries: nonMarkerCandidates.length,
|
|
416
|
+
stage: "section-classify",
|
|
417
|
+
});
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
else {
|
|
421
|
+
// ── Stage 1 — file-purpose binary filter ──
|
|
422
|
+
const distinctFiles = [
|
|
423
|
+
...new Set(nonMarkerCandidates.map((c) => c.entry.sot_source)),
|
|
424
|
+
].sort();
|
|
425
|
+
filesEvaluated = distinctFiles.length;
|
|
426
|
+
const stage1Args = {
|
|
427
|
+
repoRoot: args.repoRoot,
|
|
428
|
+
files: distinctFiles,
|
|
429
|
+
};
|
|
430
|
+
if (args.onChunkProgress !== undefined) {
|
|
431
|
+
stage1Args.onChunkProgress = args.onChunkProgress;
|
|
432
|
+
}
|
|
433
|
+
const fileVerdicts = await runStage1FileFilter(stage1Args);
|
|
434
|
+
const authoritativeFiles = new Set();
|
|
435
|
+
for (const [path, v] of fileVerdicts.entries()) {
|
|
436
|
+
if (v.is_authoritative)
|
|
437
|
+
authoritativeFiles.add(path);
|
|
438
|
+
}
|
|
439
|
+
authoritativeFileCount = authoritativeFiles.size;
|
|
440
|
+
// ── Stage 2 — section batch classifier (scoped) ──
|
|
441
|
+
const stage2Inputs = nonMarkerCandidates.filter((c) => authoritativeFiles.has(c.entry.sot_source));
|
|
442
|
+
const stage2Args = {
|
|
443
|
+
candidates: stage2Inputs,
|
|
444
|
+
};
|
|
445
|
+
if (args.onChunkProgress !== undefined) {
|
|
446
|
+
stage2Args.onChunkProgress = args.onChunkProgress;
|
|
447
|
+
}
|
|
448
|
+
sectionEmits = await runStage2SectionClassifier(stage2Args);
|
|
449
|
+
}
|
|
450
|
+
// ── Stage 4 — emit drafts to `_inbox/` ──
|
|
451
|
+
const existingDecIds = args.existingDecIds ?? scanExistingDecIds(args.repoRoot);
|
|
452
|
+
const finalEmits = [
|
|
453
|
+
...markerCandidates.map((ctx) => {
|
|
454
|
+
const kind = ctx.entry.marker_kind === "rule" ? "domain-rule" : "decision";
|
|
455
|
+
return { ctx, cls: { kind, proposedTitle: deriveMarkerTitle(ctx) } };
|
|
456
|
+
}),
|
|
457
|
+
...sectionEmits,
|
|
458
|
+
];
|
|
459
|
+
let updatedTopicIndex = topicIndex;
|
|
460
|
+
const decsWritten = [];
|
|
461
|
+
const skipped = [];
|
|
462
|
+
for (const { ctx, cls } of finalEmits) {
|
|
463
|
+
const sot_path = entryToSotPath(ctx.entry);
|
|
464
|
+
const titleSeed = cls.proposedTitle.length > 0
|
|
465
|
+
? cls.proposedTitle
|
|
466
|
+
: firstLineFallback(ctx.body);
|
|
467
|
+
const id = allocateUniqueDecId({ sot_path, title: titleSeed, capture_source: CAPTURE_SOURCE }, existingDecIds);
|
|
468
|
+
const draftPath = writeDraftToInbox({
|
|
469
|
+
repoRoot: args.repoRoot,
|
|
470
|
+
id,
|
|
471
|
+
title: titleSeed,
|
|
472
|
+
body: ctx.body,
|
|
473
|
+
sot_path,
|
|
474
|
+
source_file: ctx.entry.sot_source,
|
|
475
|
+
});
|
|
476
|
+
decsWritten.push({
|
|
477
|
+
id,
|
|
478
|
+
path: relativeInboxPath(id),
|
|
479
|
+
sourceFile: ctx.entry.sot_source,
|
|
480
|
+
slug: ctx.entry.slug,
|
|
481
|
+
});
|
|
482
|
+
updatedTopicIndex = setTopic(updatedTopicIndex, ctx.entry.slug, {
|
|
483
|
+
...ctx.entry,
|
|
484
|
+
dec_id: id,
|
|
485
|
+
});
|
|
486
|
+
log.debug({ id, slug: ctx.entry.slug, draftPath }, "phase 6 emitted draft");
|
|
487
|
+
}
|
|
488
|
+
// Refresh topic-index + file-candidates-map so the read-enrich hook
|
|
489
|
+
// sees the post-emit candidate counts. Anchor-map / sot-bindings /
|
|
490
|
+
// sot-cache stay untouched — drafts in `_inbox/` aren't canonical
|
|
491
|
+
// until the operator (or `cairn attention`) accepts them.
|
|
492
|
+
writeTopicIndex(args.repoRoot, updatedTopicIndex);
|
|
493
|
+
writeFileCandidatesMap(args.repoRoot, updatedTopicIndex);
|
|
494
|
+
const unpromotedCandidates = countUnpromoted(updatedTopicIndex);
|
|
197
495
|
log.info({
|
|
198
|
-
scanned:
|
|
496
|
+
scanned: allCandidates.length,
|
|
199
497
|
emitted: decsWritten.length,
|
|
200
|
-
|
|
201
|
-
|
|
498
|
+
markerEmits: markerCandidates.length,
|
|
499
|
+
sectionEmits: sectionEmits.length,
|
|
500
|
+
authoritativeFiles: authoritativeFileCount,
|
|
501
|
+
filesEvaluated,
|
|
502
|
+
unpromotedCandidates,
|
|
202
503
|
}, "phase 6 complete");
|
|
203
504
|
return {
|
|
204
505
|
decsWritten,
|
|
205
|
-
skipped
|
|
206
|
-
scannedEntries:
|
|
506
|
+
skipped,
|
|
507
|
+
scannedEntries: allCandidates.length,
|
|
508
|
+
markerEmits: markerCandidates.length,
|
|
509
|
+
sectionEmits: sectionEmits.length,
|
|
510
|
+
authoritativeFiles: authoritativeFileCount,
|
|
511
|
+
filesEvaluated,
|
|
512
|
+
unpromotedCandidates,
|
|
207
513
|
};
|
|
208
514
|
}
|
|
209
|
-
|
|
210
|
-
|
|
515
|
+
/* -------------------------------------------------------------------------- */
|
|
516
|
+
/* Stage runners */
|
|
517
|
+
/* -------------------------------------------------------------------------- */
|
|
518
|
+
export async function runStage1FileFilter(args) {
|
|
519
|
+
const verdicts = new Map();
|
|
520
|
+
if (args.files.length === 0)
|
|
521
|
+
return verdicts;
|
|
522
|
+
const inputs = buildFileFilterInputs(args.repoRoot, args.files);
|
|
523
|
+
const chunks = [];
|
|
524
|
+
for (let i = 0; i < inputs.length; i += FILE_FILTER_BATCH_SIZE) {
|
|
525
|
+
chunks.push(inputs.slice(i, i + FILE_FILTER_BATCH_SIZE));
|
|
526
|
+
}
|
|
527
|
+
let nextIdx = 0;
|
|
528
|
+
let chunksDone = 0;
|
|
529
|
+
let entriesDone = 0;
|
|
530
|
+
const worker = async () => {
|
|
531
|
+
for (;;) {
|
|
532
|
+
const idx = nextIdx;
|
|
533
|
+
nextIdx += 1;
|
|
534
|
+
if (idx >= chunks.length)
|
|
535
|
+
return;
|
|
536
|
+
const chunk = chunks[idx];
|
|
537
|
+
try {
|
|
538
|
+
const map = await classifyFileBatch(chunk);
|
|
539
|
+
for (const [path, v] of map.entries())
|
|
540
|
+
verdicts.set(path, v);
|
|
541
|
+
}
|
|
542
|
+
catch (err) {
|
|
543
|
+
log.warn({ chunkIdx: idx, size: chunk.length, err: err instanceof Error ? err.message : String(err) }, "phase 6 stage 1 file-filter failed; chunk treated as non-authoritative");
|
|
544
|
+
}
|
|
545
|
+
chunksDone += 1;
|
|
546
|
+
entriesDone += chunk.length;
|
|
547
|
+
if (args.onChunkProgress !== undefined) {
|
|
548
|
+
args.onChunkProgress({
|
|
549
|
+
chunksDone,
|
|
550
|
+
totalChunks: chunks.length,
|
|
551
|
+
entriesDone,
|
|
552
|
+
totalEntries: inputs.length,
|
|
553
|
+
stage: "file-filter",
|
|
554
|
+
});
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
};
|
|
558
|
+
await Promise.all(Array.from({ length: Math.min(FILE_FILTER_CONCURRENCY, Math.max(1, chunks.length)) }, () => worker()));
|
|
559
|
+
return verdicts;
|
|
211
560
|
}
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
561
|
+
async function runStage2SectionClassifier(args) {
|
|
562
|
+
const out = [];
|
|
563
|
+
if (args.candidates.length === 0)
|
|
564
|
+
return out;
|
|
565
|
+
const items = args.candidates.map((c) => ({
|
|
566
|
+
slug: c.entry.slug,
|
|
567
|
+
body: c.body,
|
|
568
|
+
sot_source: c.entry.sot_source,
|
|
569
|
+
}));
|
|
570
|
+
const ctxBySlug = new Map(args.candidates.map((c) => [c.entry.slug, c]));
|
|
571
|
+
const chunks = [];
|
|
572
|
+
for (let i = 0; i < items.length; i += SECTION_BATCH_SIZE) {
|
|
573
|
+
chunks.push(items.slice(i, i + SECTION_BATCH_SIZE));
|
|
574
|
+
}
|
|
575
|
+
let nextIdx = 0;
|
|
576
|
+
let chunksDone = 0;
|
|
577
|
+
let entriesDone = 0;
|
|
578
|
+
const verdicts = new Map();
|
|
579
|
+
const worker = async () => {
|
|
580
|
+
for (;;) {
|
|
581
|
+
const idx = nextIdx;
|
|
582
|
+
nextIdx += 1;
|
|
583
|
+
if (idx >= chunks.length)
|
|
584
|
+
return;
|
|
585
|
+
const chunk = chunks[idx];
|
|
586
|
+
try {
|
|
587
|
+
const map = await classifySectionBatch(chunk);
|
|
588
|
+
for (const [slug, cls] of map.entries())
|
|
589
|
+
verdicts.set(slug, cls);
|
|
590
|
+
}
|
|
591
|
+
catch (err) {
|
|
592
|
+
log.warn({ chunkIdx: idx, size: chunk.length, err: err instanceof Error ? err.message : String(err) }, "phase 6 stage 2 batch failed; chunk skipped");
|
|
593
|
+
}
|
|
594
|
+
chunksDone += 1;
|
|
595
|
+
entriesDone += chunk.length;
|
|
596
|
+
if (args.onChunkProgress !== undefined) {
|
|
597
|
+
args.onChunkProgress({
|
|
598
|
+
chunksDone,
|
|
599
|
+
totalChunks: chunks.length,
|
|
600
|
+
entriesDone,
|
|
601
|
+
totalEntries: items.length,
|
|
602
|
+
stage: "section-classify",
|
|
603
|
+
});
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
};
|
|
607
|
+
await Promise.all(Array.from({ length: Math.min(SECTION_CONCURRENCY, Math.max(1, chunks.length)) }, () => worker()));
|
|
608
|
+
for (const [slug, cls] of verdicts.entries()) {
|
|
609
|
+
if (cls.kind !== "decision" && cls.kind !== "domain-rule")
|
|
610
|
+
continue;
|
|
611
|
+
const ctx = ctxBySlug.get(slug);
|
|
612
|
+
if (ctx === undefined)
|
|
613
|
+
continue;
|
|
614
|
+
out.push({ ctx, cls });
|
|
615
|
+
}
|
|
616
|
+
return out;
|
|
617
|
+
}
|
|
618
|
+
function writeDraftToInbox(args) {
|
|
619
|
+
const inboxDir = join(decisionsDir(args.repoRoot), "_inbox");
|
|
620
|
+
mkdirSync(inboxDir, { recursive: true });
|
|
621
|
+
const abs = join(inboxDir, `${args.id}.draft.md`);
|
|
622
|
+
const now = new Date().toISOString();
|
|
623
|
+
const fm = {
|
|
624
|
+
id: args.id,
|
|
625
|
+
title: args.title,
|
|
626
|
+
type: "adr",
|
|
627
|
+
status: "draft",
|
|
628
|
+
audience: "dual",
|
|
629
|
+
generated: now,
|
|
630
|
+
"verified-at": now,
|
|
631
|
+
decided_at: now,
|
|
632
|
+
decided_by: DECIDED_BY,
|
|
633
|
+
sot_kind: "path",
|
|
634
|
+
sot_path: args.sot_path,
|
|
635
|
+
sot_content_hash: bodyContentHash(args.body),
|
|
636
|
+
capture_source: CAPTURE_SOURCE,
|
|
637
|
+
source_file: args.source_file,
|
|
638
|
+
};
|
|
639
|
+
const out = [];
|
|
640
|
+
out.push("---");
|
|
641
|
+
out.push(stringifyYaml(fm).trimEnd());
|
|
642
|
+
out.push("---");
|
|
643
|
+
out.push("");
|
|
644
|
+
out.push(args.body.trimEnd());
|
|
645
|
+
out.push("");
|
|
646
|
+
writeFileSync(abs, out.join("\n"), "utf8");
|
|
647
|
+
return abs;
|
|
648
|
+
}
|
|
649
|
+
function relativeInboxPath(id) {
|
|
650
|
+
return `.cairn/ground/decisions/_inbox/${id}.draft.md`;
|
|
651
|
+
}
|
|
652
|
+
/* -------------------------------------------------------------------------- */
|
|
653
|
+
/* Helpers */
|
|
654
|
+
/* -------------------------------------------------------------------------- */
|
|
218
655
|
function isDocSoT(entry) {
|
|
219
656
|
const sot = entry.candidates.find((c) => c.file === entry.sot_source);
|
|
220
657
|
return sot !== undefined && sot.kind === "doc";
|
|
221
658
|
}
|
|
659
|
+
function entryToSotPath(entry) {
|
|
660
|
+
const sot = entry.candidates.find((c) => c.file === entry.sot_source);
|
|
661
|
+
if (sot === undefined)
|
|
662
|
+
return entry.sot_source;
|
|
663
|
+
if (sot.anchor !== undefined && sot.anchor.length > 0) {
|
|
664
|
+
return `${entry.sot_source}#${sot.anchor}`;
|
|
665
|
+
}
|
|
666
|
+
return entry.sot_source;
|
|
667
|
+
}
|
|
668
|
+
// firstLineFallback now lives in sot-emit.ts (single source of truth).
|
|
669
|
+
// Imported above as `firstLineFallback`.
|
|
670
|
+
function deriveMarkerTitle(ctx) {
|
|
671
|
+
// Prefer the topic-index entry's anchor text (post-walker normalization)
|
|
672
|
+
// when present; fall back to the SoT body's first non-blank line.
|
|
673
|
+
const sot = ctx.entry.candidates.find((c) => c.file === ctx.entry.sot_source);
|
|
674
|
+
if (sot?.anchor !== undefined && sot.anchor.length > 0) {
|
|
675
|
+
return sot.anchor.replace(/[-_]+/g, " ").trim().slice(0, 120) || firstLineFallback(ctx.body);
|
|
676
|
+
}
|
|
677
|
+
return firstLineFallback(ctx.body);
|
|
678
|
+
}
|
|
679
|
+
/**
|
|
680
|
+
* Allocate a DEC id that doesn't collide with `existingIds`. The
|
|
681
|
+
* derivation is content-stable, but two distinct topics with identical
|
|
682
|
+
* `(sot_path, title, capture_source)` tuples would clash — fall back
|
|
683
|
+
* to a counter suffix in that pathological case.
|
|
684
|
+
*/
|
|
685
|
+
function allocateUniqueDecId(input, existingIds) {
|
|
686
|
+
let id = deriveDecId(input);
|
|
687
|
+
if (!existingIds.has(id)) {
|
|
688
|
+
existingIds.add(id);
|
|
689
|
+
return id;
|
|
690
|
+
}
|
|
691
|
+
for (let suffix = 2; suffix < 1_000; suffix += 1) {
|
|
692
|
+
const tagged = deriveDecId({ ...input, title: `${input.title} #${suffix}` });
|
|
693
|
+
if (!existingIds.has(tagged)) {
|
|
694
|
+
existingIds.add(tagged);
|
|
695
|
+
return tagged;
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
// Exceedingly unlikely. If we hit it, return the deterministic id and
|
|
699
|
+
// let the filesystem write fail loudly rather than fabricating a
|
|
700
|
+
// random suffix that would break subsequent re-runs.
|
|
701
|
+
existingIds.add(id);
|
|
702
|
+
return id;
|
|
703
|
+
}
|
|
704
|
+
function scanExistingDecIds(repoRoot) {
|
|
705
|
+
const out = new Set();
|
|
706
|
+
const dir = decisionsDir(repoRoot);
|
|
707
|
+
for (const sub of [dir, join(dir, "_inbox")]) {
|
|
708
|
+
let entries;
|
|
709
|
+
try {
|
|
710
|
+
entries = readdirSync(sub, { withFileTypes: true, encoding: "utf8" });
|
|
711
|
+
}
|
|
712
|
+
catch {
|
|
713
|
+
continue;
|
|
714
|
+
}
|
|
715
|
+
for (const e of entries) {
|
|
716
|
+
if (!e.isFile())
|
|
717
|
+
continue;
|
|
718
|
+
const m = e.name.match(/^(DEC-[0-9a-f]{7,})/);
|
|
719
|
+
if (m === null)
|
|
720
|
+
continue;
|
|
721
|
+
out.add(m[1]);
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
return out;
|
|
725
|
+
}
|
|
726
|
+
function countUnpromoted(topicIndex) {
|
|
727
|
+
let n = 0;
|
|
728
|
+
for (const e of Object.values(topicIndex.topics)) {
|
|
729
|
+
if (e.dec_id === undefined)
|
|
730
|
+
n += 1;
|
|
731
|
+
}
|
|
732
|
+
return n;
|
|
733
|
+
}
|
|
734
|
+
function zeroResult(scanned, topicIndex) {
|
|
735
|
+
return {
|
|
736
|
+
decsWritten: [],
|
|
737
|
+
skipped: [],
|
|
738
|
+
scannedEntries: scanned,
|
|
739
|
+
markerEmits: 0,
|
|
740
|
+
sectionEmits: 0,
|
|
741
|
+
authoritativeFiles: 0,
|
|
742
|
+
filesEvaluated: 0,
|
|
743
|
+
unpromotedCandidates: countUnpromoted(topicIndex),
|
|
744
|
+
};
|
|
745
|
+
}
|
|
222
746
|
//# sourceMappingURL=ingest-docs.js.map
|