npm - @isaacriehm/cairn-core - Versions diffs - 0.6.0 → 0.7.0 - Mend

@isaacriehm/cairn-core 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

package/dist/.tsbuildinfo +1 -1
package/dist/attention/bulk-accept.js +56 -15
package/dist/attention/bulk-accept.js.map +1 -1
package/dist/attention/serve/api.js +7 -1
package/dist/attention/serve/api.js.map +1 -1
package/dist/ground/file-candidates-map.d.ts +23 -0
package/dist/ground/file-candidates-map.js +76 -0
package/dist/ground/file-candidates-map.js.map +1 -0
package/dist/ground/index.d.ts +4 -2
package/dist/ground/index.js +4 -2
package/dist/ground/index.js.map +1 -1
package/dist/ground/paths.d.ts +2 -0
package/dist/ground/paths.js +6 -0
package/dist/ground/paths.js.map +1 -1
package/dist/ground/rejected.d.ts +42 -0
package/dist/ground/rejected.js +100 -0
package/dist/ground/rejected.js.map +1 -0
package/dist/ground/schemas.d.ts +80 -0
package/dist/ground/schemas.js +54 -0
package/dist/ground/schemas.js.map +1 -1
package/dist/hooks/post-tool-use/index.d.ts +1 -1
package/dist/hooks/post-tool-use/index.js +1 -1
package/dist/hooks/post-tool-use/index.js.map +1 -1
package/dist/hooks/post-tool-use/ledger-cache.d.ts +13 -0
package/dist/hooks/post-tool-use/ledger-cache.js +48 -0
package/dist/hooks/post-tool-use/ledger-cache.js.map +1 -1
package/dist/hooks/post-tool-use/legend-builder.d.ts +10 -1
package/dist/hooks/post-tool-use/legend-builder.js +27 -2
package/dist/hooks/post-tool-use/legend-builder.js.map +1 -1
package/dist/hooks/post-tool-use/read-enricher.js +8 -2
package/dist/hooks/post-tool-use/read-enricher.js.map +1 -1
package/dist/init/index.d.ts +2 -2
package/dist/init/index.js +1 -1
package/dist/init/index.js.map +1 -1
package/dist/init/ingest-docs.d.ts +82 -22
package/dist/init/ingest-docs.js +632 -108
package/dist/init/ingest-docs.js.map +1 -1
package/dist/init/init.d.ts +10 -1
package/dist/init/init.js +113 -251
package/dist/init/init.js.map +1 -1
package/dist/init/mapper-parallel.js +8 -0
package/dist/init/mapper-parallel.js.map +1 -1
package/dist/init/phases/6-docs-ingest.d.ts +9 -4
package/dist/init/phases/6-docs-ingest.js +13 -10
package/dist/init/phases/6-docs-ingest.js.map +1 -1
package/dist/init/phases/parallel-678.js +10 -4
package/dist/init/phases/parallel-678.js.map +1 -1
package/dist/init/sot-emit.d.ts +22 -0
package/dist/init/sot-emit.js +50 -4
package/dist/init/sot-emit.js.map +1 -1
package/dist/init/source-comments/ingest.js +107 -7
package/dist/init/source-comments/ingest.js.map +1 -1
package/dist/init/topic-index/index.d.ts +14 -0
package/dist/init/topic-index/index.js +83 -4
package/dist/init/topic-index/index.js.map +1 -1
package/dist/init/topic-index/judge.js +14 -1
package/dist/init/topic-index/judge.js.map +1 -1
package/dist/init/topic-index/resolve.d.ts +19 -0
package/dist/init/topic-index/resolve.js +100 -14
package/dist/init/topic-index/resolve.js.map +1 -1
package/dist/init/topic-index/walk.d.ts +32 -0
package/dist/init/topic-index/walk.js +70 -4
package/dist/init/topic-index/walk.js.map +1 -1
package/dist/mcp/history/summarizer.js +5 -0
package/dist/mcp/history/summarizer.js.map +1 -1
package/dist/mcp/schemas.d.ts +48 -0
package/dist/mcp/schemas.js +43 -0
package/dist/mcp/schemas.js.map +1 -1
package/dist/mcp/tools/index.js +8 -0
package/dist/mcp/tools/index.js.map +1 -1
package/dist/mcp/tools/propose-decision.d.ts +34 -0
package/dist/mcp/tools/propose-decision.js +200 -0
package/dist/mcp/tools/propose-decision.js.map +1 -0
package/dist/mcp/tools/reject-candidate.d.ts +24 -0
package/dist/mcp/tools/reject-candidate.js +71 -0
package/dist/mcp/tools/reject-candidate.js.map +1 -0
package/dist/mcp/tools/search-candidates.d.ts +20 -0
package/dist/mcp/tools/search-candidates.js +93 -0
package/dist/mcp/tools/search-candidates.js.map +1 -0
package/package.json +1 -1
package/templates/attention-ui/app.js +40 -3

package/dist/init/ingest-docs.js CHANGED Viewed

@@ -1,28 +1,78 @@
 /**
- * Phase 6 — docs ingestion (v0.5.0 SoT model).
+ * Phase 6 — staged docs ingestion (PHASE_6_REDESIGN §4.1).
  *
- * Reads the topic-index built by phase 5b, filters to entries whose SoT
- * source lives under `docs/*`, and emits verbatim DEC files under
- * `.cairn/ground/decisions/`. Auto-promoted to `status: accepted`. No
- * draft inbox, no LLM paraphrase — the doc paragraph itself IS the
- * canonical body, recorded with `sot_kind: path` so the lens renders
- * the live source on every read.
+ * Replaces the v0.6 bulk-classifier path. Cuts wall from ~15 min →
+ * ~75 s on gcb-platform-scale repos AND collapses the noisy ledger
+ * (7000 DECs) to a curated draft inbox (30-80 drafts).
  *
- * Per-entry Haiku call decides `kind` only (decision / domain-rule /
- * voice-guidelines / api-docs / other). The first two emit a DEC; the
- * rest are skipped at this layer (voice + canonical-topic flows are
- * handled by other tooling now — they were file-level concerns under
- * the v0.4.x model and have no clean paragraph-level analogue).
+ * Pipeline:
+ *
+ *   Stage 3 (deterministic, 0 Haiku) — marker scan
+ *     Topic-index entries with `marker_kind` in {"decision","rule"} go
+ *     straight to emit. The walker stamped them at parse time when it
+ *     saw frontmatter `cairn.kind` or `<!-- cairn:decision -->` /
+ *     `<!-- cairn:rule -->` within 3 lines of the heading.
+ *
+ *   Stage 1 — file-purpose binary filter (batch=30, concurrency=5)
+ *     Per file: filepath + frontmatter + first 800 chars + every
+ *     H1/H2/H3 line (capped at 100). Locked rigid prompt: a file is
+ *     authoritative ONLY if it's a canonical rulebook, formal ADR,
+ *     or list of binding domain invariants. Plans / scratchpads /
+ *     UAT logs / API docs are NOT authoritative even if they
+ *     contain proposed or historical decisions.
+ *
+ *   Stage 2 — section-level batch classifier (batch=30, concurrency=5)
+ *     Same shape as the v0.6 classifier, but scoped to sections
+ *     belonging to Stage-1-authoritative files AND not already
+ *     handled by a marker. This is where Haiku still adds signal —
+ *     the file passed the rigid filter; now decide WHICH sections
+ *     of it are decisions vs context.
+ *
+ *   Stage 4 — emit
+ *     Stage 2 + Stage 3 outputs → `.cairn/ground/decisions/_inbox/<id>.draft.md`.
+ *     `status: draft`, `capture_source: init-docs-ingest`,
+ *     `decided_by: cairn-init`. Body is verbatim via
+ *     `readSotBody` — no Haiku paraphrasing. Operator triages via
+ *     the existing `cairn-attention` skill.
+ *
+ * Skipped entries (everything else) stay in the topic-index as
+ * unpromoted candidates. The PR 2 `cairn_search_candidates` /
+ * `cairn_propose_decision` MCP tools surface them to AI agents as
+ * the project lives.
  */
-import { existsSync, readdirSync, statSync, } from "node:fs";
+import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync, } from "node:fs";
 import { join, relative } from "node:path";
+import { stringify as stringifyYaml } from "yaml";
 import { runClaude } from "../claude/index.js";
-import { readAnchorMap, readTopicIndex, writeSotBindings, writeSotCache, writeTopicIndex, } from "../ground/index.js";
+import { bodyContentHash, decisionsDir, deriveDecId, readAnchorMap, readRejectedYaml, readTopicIndex, setTopic, writeFileCandidatesMap, writeTopicIndex, } from "../ground/index.js";
 import { logger } from "../logger.js";
-import { emitFromTopicIndex } from "./sot-emit.js";
+import { firstLineFallback, readSotBody } from "./sot-emit.js";
 const log = logger("init.ingest-docs");
-const PER_DOC_TIMEOUT_MS = 60_000;
-const DOC_BODY_CAP = 8_000;
+/* -------------------------------------------------------------------------- */
+/* Tunables — locked in §3 of PHASE_6_REDESIGN                                */
+/* -------------------------------------------------------------------------- */
+/** N files per Stage-1 Haiku call. */
+const FILE_FILTER_BATCH_SIZE = 30;
+/** Concurrent Stage-1 batches. */
+const FILE_FILTER_CONCURRENCY = 5;
+/** Stage 1 per-file context — first chars of body, frontmatter stripped. */
+const FILE_FILTER_INTRO_CHARS = 800;
+/** Stage 1 max ToC lines (H1/H2/H3 only). */
+const FILE_FILTER_TOC_MAX_LINES = 100;
+/** Stage 1 wall budget per Haiku call. */
+const FILE_FILTER_TIMEOUT_MS = 60_000;
+/** N sections per Stage-2 Haiku call. */
+const SECTION_BATCH_SIZE = 30;
+/** Concurrent Stage-2 batches. */
+const SECTION_CONCURRENCY = 5;
+/** Stage 2 per-section body cap (chars) before truncation marker. */
+const SECTION_BODY_CAP = 2_000;
+/** Stage 2 wall budget per Haiku call. */
+const SECTION_TIMEOUT_MS = 120_000;
+/** Capture source stamped on every Stage 2/3 emit. */
+const CAPTURE_SOURCE = "init-docs-ingest";
+/** Decided-by stamp on every Stage 2/3 emit. */
+const DECIDED_BY = "cairn-init";
 /** Subdirs we never descend into when discovering candidate doc files. */
 const SKIP_DIRS = new Set([
     ".cairn",
@@ -73,7 +123,11 @@ function walkDocsDir(dir, repoRoot, out) {
         catch {
             continue;
         }
-        out.push({ path: relative(repoRoot, abs), size: st.size, group: dirGroup(relative(repoRoot, abs)) });
+        out.push({
+            path: relative(repoRoot, abs),
+            size: st.size,
+            group: dirGroup(relative(repoRoot, abs)),
+        });
     }
 }
 function dirGroup(rel) {
@@ -83,140 +137,610 @@ function dirGroup(rel) {
     return `${parts[0]}/`;
 }
 /* -------------------------------------------------------------------------- */
-/* Haiku classifier — kind only, no rewriting                                 */
+/* Stage 1 — file-purpose binary filter                                       */
+/*                                                                            */
+/* Locked rigid prompt — DO NOT paraphrase. A file is authoritative ONLY      */
+/* if it's a canonical rulebook, a formal ADR, or a list of active binding   */
+/* domain invariants. Plans / scratchpads / UAT logs / API docs are NOT     */
+/* authoritative even if they contain proposed or historical decisions.      */
 /* -------------------------------------------------------------------------- */
-const CLASSIFY_SCHEMA = {
+const FILE_FILTER_SCHEMA = {
     type: "object",
     additionalProperties: false,
+    required: ["files"],
     properties: {
-        kind: {
-            type: "string",
-            enum: ["decision", "domain-rule", "voice-guidelines", "api-docs", "other"],
+        files: {
+            type: "array",
+            items: {
+                type: "object",
+                additionalProperties: false,
+                required: ["path", "is_authoritative", "reason"],
+                properties: {
+                    path: { type: "string" },
+                    is_authoritative: { type: "boolean" },
+                    reason: { type: "string" },
+                },
+            },
         },
-        proposedTitle: { type: "string" },
     },
-    required: ["kind", "proposedTitle"],
 };
-const CLASSIFY_SYSTEM = `You classify project documentation paragraphs for Cairn's Single-Source-of-Truth ledger.
+const FILE_FILTER_SYSTEM = `You are a rigid filter for an architecture ledger. A file is authoritative ONLY if it is a canonical rulebook, a formal Architecture Decision Record (ADR), or a list of active, binding domain invariants.
-Return JSON matching the supplied schema.
+If a file is a project plan, research scratchpad, UAT log, status update, or API documentation, it is NOT authoritative, even if it contains proposed or historical decisions.
+Evaluate the provided filepath, frontmatter, intro, and Table of Contents. Return JSON:
+{ "files": [ { "path": "<filepath>", "is_authoritative": <bool>, "reason": "10 words max" }, ... ] }
+EXACTLY one entry per input filepath. Do NOT omit. Do NOT invent paths.`;
+function buildFileFilterInputs(repoRoot, files) {
+    const out = [];
+    for (const rel of files) {
+        const abs = join(repoRoot, rel);
+        if (!existsSync(abs))
+            continue;
+        let raw;
+        try {
+            raw = readFileSync(abs, "utf8");
+        }
+        catch {
+            continue;
+        }
+        const { frontmatter, body } = splitFrontmatter(raw);
+        const introChars = body.slice(0, FILE_FILTER_INTRO_CHARS);
+        const toc = extractToc(body);
+        out.push({ path: rel, frontmatter, introChars, toc });
+    }
+    return out;
+}
+function splitFrontmatter(raw) {
+    const m = raw.match(/^---\n([\s\S]*?)\n---\n?/);
+    if (m === null)
+        return { frontmatter: null, body: raw };
+    const fm = m[1] ?? "";
+    return { frontmatter: fm, body: raw.slice(m[0].length) };
+}
+function extractToc(body) {
+    const lines = body.split("\n");
+    const toc = [];
+    for (const line of lines) {
+        if (/^#{1,3}\s+/.test(line)) {
+            toc.push(line.trim());
+            if (toc.length >= FILE_FILTER_TOC_MAX_LINES)
+                break;
+        }
+    }
+    return toc.join("\n");
+}
+async function classifyFileBatch(inputs) {
+    if (inputs.length === 0)
+        return new Map();
+    const blocks = inputs
+        .map((it) => {
+        const fmBlock = it.frontmatter !== null
+            ? `frontmatter:\n${it.frontmatter}\n`
+            : `frontmatter: (none)\n`;
+        const tocBlock = it.toc.length > 0 ? `toc:\n${it.toc}\n` : `toc: (none)\n`;
+        const intro = it.introChars.length > 0
+            ? `intro:\n${it.introChars}`
+            : `intro: (empty)`;
+        return `=== path: ${it.path}\n${fmBlock}${tocBlock}${intro}`;
+    })
+        .join("\n\n");
+    const prompt = `Classify each file. Return one entry per path.\n\n${blocks}`;
+    const result = await runClaude({
+        tier: "haiku",
+        system: FILE_FILTER_SYSTEM,
+        prompt,
+        jsonSchema: FILE_FILTER_SCHEMA,
+        timeoutMs: FILE_FILTER_TIMEOUT_MS,
+        isolateAmbientContext: true,
+    });
+    const parsed = result.parsed;
+    if (typeof parsed !== "object" || parsed === null) {
+        throw new Error("haiku file-filter returned non-object");
+    }
+    const arr = parsed["files"];
+    if (!Array.isArray(arr)) {
+        throw new Error("haiku file-filter missing `files` array");
+    }
+    const out = new Map();
+    for (const raw of arr) {
+        if (typeof raw !== "object" || raw === null)
+            continue;
+        const e = raw;
+        const path = e["path"];
+        const flag = e["is_authoritative"];
+        const reason = e["reason"];
+        if (typeof path !== "string")
+            continue;
+        if (typeof flag !== "boolean")
+            continue;
+        out.set(path, {
+            is_authoritative: flag,
+            reason: typeof reason === "string" ? reason : "",
+        });
+    }
+    return out;
+}
+/* -------------------------------------------------------------------------- */
+/* Stage 2 — section batch classifier (kind + proposedTitle)                  */
+/* -------------------------------------------------------------------------- */
+const SECTION_SCHEMA = {
+    type: "object",
+    additionalProperties: false,
+    required: ["classifications"],
+    properties: {
+        classifications: {
+            type: "array",
+            items: {
+                type: "object",
+                additionalProperties: false,
+                required: ["slug", "kind", "proposedTitle"],
+                properties: {
+                    slug: { type: "string" },
+                    kind: {
+                        type: "string",
+                        enum: ["decision", "domain-rule", "voice-guidelines", "api-docs", "other"],
+                    },
+                    proposedTitle: { type: "string" },
+                },
+            },
+        },
+    },
+};
+const SECTION_SYSTEM = `You classify N sections from authoritative project documentation for Cairn's Single-Source-of-Truth ledger.
+These sections come from files already filtered as canonical rulebooks, ADRs, or binding invariant lists. Decide which sections are themselves binding decisions / rules vs supporting context.
+Return JSON: { "classifications": [ { "slug": "...", "kind": "...", "proposedTitle": "..." }, ... ] }
+EXACTLY one classification per input section, keyed by its slug. Do NOT omit. Do NOT invent slugs. If unsure, kind="other".
 \`kind\` choices:
-  - "decision"          paragraph describes a binding decision or architectural choice
-  - "domain-rule"       paragraph describes a domain rule or constraint developers must obey
-  - "voice-guidelines"  paragraph is brand voice / tone guidance
-  - "api-docs"          paragraph documents an API surface or schema (descriptive, not binding)
+  - "decision"          binding decision or architectural choice
+  - "domain-rule"       domain rule or constraint developers must obey
+  - "voice-guidelines"  brand voice / tone guidance
+  - "api-docs"          API surface / schema documentation (descriptive)
   - "other"             nothing actionable for the cairn state layer
-\`proposedTitle\` 5-10 words, imperative voice, empty for "other".
+\`proposedTitle\` 5-10 words, imperative voice. Empty string for "other".
-Be conservative — false-positive decisions pollute the ground state worse
-than missed capture. Default to "other" when uncertain.`;
-async function classifyEntry(entry, body) {
-    const capped = body.length > DOC_BODY_CAP ? `${body.slice(0, DOC_BODY_CAP)}\n…[truncated]` : body;
-    const prompt = `Source: ${entry.sot_source}\nSlug: ${entry.slug}\n\n---\n${capped}`;
+Be conservative — false-positive decisions pollute the ground state worse than missed capture. Default to "other" when uncertain.`;
+async function classifySectionBatch(items) {
+    if (items.length === 0)
+        return new Map();
+    const sections = items
+        .map((it, i) => {
+        const capped = it.body.length > SECTION_BODY_CAP
+            ? `${it.body.slice(0, SECTION_BODY_CAP)}\n…[truncated]`
+            : it.body;
+        return `[${i + 1}] slug=${it.slug} source=${it.sot_source}\n${capped}`;
+    })
+        .join("\n\n---\n\n");
+    const prompt = `Classify each section. Return one entry per slug.\n\n${sections}`;
     const result = await runClaude({
         tier: "haiku",
-        system: CLASSIFY_SYSTEM,
+        system: SECTION_SYSTEM,
         prompt,
-        jsonSchema: CLASSIFY_SCHEMA,
-        timeoutMs: PER_DOC_TIMEOUT_MS,
+        jsonSchema: SECTION_SCHEMA,
+        timeoutMs: SECTION_TIMEOUT_MS,
         isolateAmbientContext: true,
     });
     const parsed = result.parsed;
     if (typeof parsed !== "object" || parsed === null) {
-        throw new Error("haiku returned non-object classification");
+        throw new Error("haiku section batch returned non-object");
     }
-    const r = parsed;
-    const kind = r["kind"];
-    if (kind !== "decision" &&
-        kind !== "domain-rule" &&
-        kind !== "voice-guidelines" &&
-        kind !== "api-docs" &&
-        kind !== "other") {
-        throw new Error(`haiku returned unexpected kind: ${String(kind)}`);
+    const arr = parsed["classifications"];
+    if (!Array.isArray(arr)) {
+        throw new Error("haiku section batch missing `classifications`");
     }
-    return {
-        kind,
-        proposedTitle: typeof r["proposedTitle"] === "string" ? r["proposedTitle"] : "",
-    };
+    const out = new Map();
+    for (const raw of arr) {
+        if (typeof raw !== "object" || raw === null)
+            continue;
+        const e = raw;
+        const slug = e["slug"];
+        const kind = e["kind"];
+        if (typeof slug !== "string")
+            continue;
+        if (kind !== "decision" &&
+            kind !== "domain-rule" &&
+            kind !== "voice-guidelines" &&
+            kind !== "api-docs" &&
+            kind !== "other") {
+            continue;
+        }
+        out.set(slug, {
+            kind,
+            proposedTitle: typeof e["proposedTitle"] === "string" ? e["proposedTitle"] : "",
+        });
+    }
+    return out;
 }
-/* -------------------------------------------------------------------------- */
-/* Orchestrator                                                               */
-/* -------------------------------------------------------------------------- */
 export async function runDocsIngestion(args) {
     const topicIndex = readTopicIndex(args.repoRoot);
     const anchorMap = readAnchorMap(args.repoRoot);
-    const candidateEntries = Object.values(topicIndex.topics).filter((entry) => isDocSoT(entry) && entry.dec_id === undefined);
-    if (candidateEntries.length === 0) {
+    const rejected = readRejectedYaml(args.repoRoot);
+    const allCandidates = Object.values(topicIndex.topics).filter((entry) => isDocSoT(entry) && entry.dec_id === undefined && !rejected.has(entry.slug));
+    if (allCandidates.length === 0) {
         log.info("phase 6 found no eligible docs entries in topic-index");
-        return { decsWritten: [], skipped: [], scannedEntries: 0 };
-    }
-    let processed = 0;
-    const result = await emitFromTopicIndex({
-        repoRoot: args.repoRoot,
-        topicIndex,
-        anchorMap,
-        filter: (entry) => isDocSoT(entry) && entry.dec_id === undefined,
-        classifier: async ({ body, entry }) => {
+        writeFileCandidatesMap(args.repoRoot, topicIndex);
+        return zeroResult(allCandidates.length, topicIndex);
+    }
+    // Read each candidate body once. Stage 3 needs the body for title
+    // derivation; Stages 1/2 don't, but reading up front keeps the
+    // pipeline single-pass over entries. Bodies that fail to read are
+    // dropped — anchor-map drift is the only realistic cause and the
+    // entry stays as a candidate for the next phase 5b refresh.
+    const ctxBySlug = new Map();
+    for (const entry of allCandidates) {
+        const body = readSotBody(args.repoRoot, entry, anchorMap);
+        if (body === null)
+            continue;
+        ctxBySlug.set(entry.slug, { entry, body });
+    }
+    // ── Stage 3 — marker scan (deterministic, 0 Haiku) ──
+    const markerCandidates = [];
+    const nonMarkerCandidates = [];
+    for (const ctx of ctxBySlug.values()) {
+        if (ctx.entry.marker_kind !== undefined)
+            markerCandidates.push(ctx);
+        else
+            nonMarkerCandidates.push(ctx);
+    }
+    // ── Mock path — bypass Stages 1+2; run mockClassify on every
+    //    non-marker candidate. Smokes only.
+    let sectionEmits = [];
+    let authoritativeFileCount = 0;
+    let filesEvaluated = 0;
+    if (args.mockClassify !== undefined) {
+        for (const ctx of nonMarkerCandidates) {
             let cls;
             try {
-                cls = args.mockClassify !== undefined
-                    ? args.mockClassify(entry, body)
-                    : await classifyEntry(entry, body);
+                cls = args.mockClassify(ctx.entry, ctx.body);
             }
             catch (err) {
-                log.warn({ slug: entry.slug, err: err instanceof Error ? err.message : String(err) }, "classifier failed; skipping");
-                return { kind: "skip", title: "" };
-            }
-            processed += 1;
-            if (args.onEntryProgress !== undefined) {
-                args.onEntryProgress({
-                    slug: entry.slug,
-                    emitted: cls.kind === "decision" || cls.kind === "domain-rule",
-                    total: candidateEntries.length,
-                });
+                log.warn({ slug: ctx.entry.slug, err: err instanceof Error ? err.message : String(err) }, "mockClassify failed; skipping");
+                continue;
             }
             if (cls.kind === "decision" || cls.kind === "domain-rule") {
-                return { kind: "decision", title: cls.proposedTitle };
+                sectionEmits.push({ ctx, cls });
             }
-            return { kind: "skip", title: cls.proposedTitle };
-        },
-        sot_kind: "path",
-        capture_source: "init-docs-ingest",
-    });
-    writeSotBindings(args.repoRoot, result.bindings);
-    writeSotCache(args.repoRoot, result.cache);
-    writeTopicIndex(args.repoRoot, result.topicIndex);
-    const decsWritten = result.emitted.map((rec) => ({
-        id: rec.id,
-        path: relativeDecPath(rec.id),
-        sourceFile: rec.source_file,
-        slug: rec.slug,
-    }));
+        }
+        if (args.onChunkProgress !== undefined) {
+            args.onChunkProgress({
+                chunksDone: 1,
+                totalChunks: 1,
+                entriesDone: nonMarkerCandidates.length,
+                totalEntries: nonMarkerCandidates.length,
+                stage: "section-classify",
+            });
+        }
+    }
+    else {
+        // ── Stage 1 — file-purpose binary filter ──
+        const distinctFiles = [
+            ...new Set(nonMarkerCandidates.map((c) => c.entry.sot_source)),
+        ].sort();
+        filesEvaluated = distinctFiles.length;
+        const stage1Args = {
+            repoRoot: args.repoRoot,
+            files: distinctFiles,
+        };
+        if (args.onChunkProgress !== undefined) {
+            stage1Args.onChunkProgress = args.onChunkProgress;
+        }
+        const fileVerdicts = await runStage1FileFilter(stage1Args);
+        const authoritativeFiles = new Set();
+        for (const [path, v] of fileVerdicts.entries()) {
+            if (v.is_authoritative)
+                authoritativeFiles.add(path);
+        }
+        authoritativeFileCount = authoritativeFiles.size;
+        // ── Stage 2 — section batch classifier (scoped) ──
+        const stage2Inputs = nonMarkerCandidates.filter((c) => authoritativeFiles.has(c.entry.sot_source));
+        const stage2Args = {
+            candidates: stage2Inputs,
+        };
+        if (args.onChunkProgress !== undefined) {
+            stage2Args.onChunkProgress = args.onChunkProgress;
+        }
+        sectionEmits = await runStage2SectionClassifier(stage2Args);
+    }
+    // ── Stage 4 — emit drafts to `_inbox/` ──
+    const existingDecIds = args.existingDecIds ?? scanExistingDecIds(args.repoRoot);
+    const finalEmits = [
+        ...markerCandidates.map((ctx) => {
+            const kind = ctx.entry.marker_kind === "rule" ? "domain-rule" : "decision";
+            return { ctx, cls: { kind, proposedTitle: deriveMarkerTitle(ctx) } };
+        }),
+        ...sectionEmits,
+    ];
+    let updatedTopicIndex = topicIndex;
+    const decsWritten = [];
+    const skipped = [];
+    for (const { ctx, cls } of finalEmits) {
+        const sot_path = entryToSotPath(ctx.entry);
+        const titleSeed = cls.proposedTitle.length > 0
+            ? cls.proposedTitle
+            : firstLineFallback(ctx.body);
+        const id = allocateUniqueDecId({ sot_path, title: titleSeed, capture_source: CAPTURE_SOURCE }, existingDecIds);
+        const draftPath = writeDraftToInbox({
+            repoRoot: args.repoRoot,
+            id,
+            title: titleSeed,
+            body: ctx.body,
+            sot_path,
+            source_file: ctx.entry.sot_source,
+        });
+        decsWritten.push({
+            id,
+            path: relativeInboxPath(id),
+            sourceFile: ctx.entry.sot_source,
+            slug: ctx.entry.slug,
+        });
+        updatedTopicIndex = setTopic(updatedTopicIndex, ctx.entry.slug, {
+            ...ctx.entry,
+            dec_id: id,
+        });
+        log.debug({ id, slug: ctx.entry.slug, draftPath }, "phase 6 emitted draft");
+    }
+    // Refresh topic-index + file-candidates-map so the read-enrich hook
+    // sees the post-emit candidate counts. Anchor-map / sot-bindings /
+    // sot-cache stay untouched — drafts in `_inbox/` aren't canonical
+    // until the operator (or `cairn attention`) accepts them.
+    writeTopicIndex(args.repoRoot, updatedTopicIndex);
+    writeFileCandidatesMap(args.repoRoot, updatedTopicIndex);
+    const unpromotedCandidates = countUnpromoted(updatedTopicIndex);
     log.info({
-        scanned: candidateEntries.length,
+        scanned: allCandidates.length,
         emitted: decsWritten.length,
-        skipped: result.skipped.length,
-        processed,
+        markerEmits: markerCandidates.length,
+        sectionEmits: sectionEmits.length,
+        authoritativeFiles: authoritativeFileCount,
+        filesEvaluated,
+        unpromotedCandidates,
     }, "phase 6 complete");
     return {
         decsWritten,
-        skipped: result.skipped,
-        scannedEntries: candidateEntries.length,
+        skipped,
+        scannedEntries: allCandidates.length,
+        markerEmits: markerCandidates.length,
+        sectionEmits: sectionEmits.length,
+        authoritativeFiles: authoritativeFileCount,
+        filesEvaluated,
+        unpromotedCandidates,
     };
 }
-function relativeDecPath(id) {
-    return `.cairn/ground/decisions/${id}.md`;
+/* -------------------------------------------------------------------------- */
+/* Stage runners                                                              */
+/* -------------------------------------------------------------------------- */
+export async function runStage1FileFilter(args) {
+    const verdicts = new Map();
+    if (args.files.length === 0)
+        return verdicts;
+    const inputs = buildFileFilterInputs(args.repoRoot, args.files);
+    const chunks = [];
+    for (let i = 0; i < inputs.length; i += FILE_FILTER_BATCH_SIZE) {
+        chunks.push(inputs.slice(i, i + FILE_FILTER_BATCH_SIZE));
+    }
+    let nextIdx = 0;
+    let chunksDone = 0;
+    let entriesDone = 0;
+    const worker = async () => {
+        for (;;) {
+            const idx = nextIdx;
+            nextIdx += 1;
+            if (idx >= chunks.length)
+                return;
+            const chunk = chunks[idx];
+            try {
+                const map = await classifyFileBatch(chunk);
+                for (const [path, v] of map.entries())
+                    verdicts.set(path, v);
+            }
+            catch (err) {
+                log.warn({ chunkIdx: idx, size: chunk.length, err: err instanceof Error ? err.message : String(err) }, "phase 6 stage 1 file-filter failed; chunk treated as non-authoritative");
+            }
+            chunksDone += 1;
+            entriesDone += chunk.length;
+            if (args.onChunkProgress !== undefined) {
+                args.onChunkProgress({
+                    chunksDone,
+                    totalChunks: chunks.length,
+                    entriesDone,
+                    totalEntries: inputs.length,
+                    stage: "file-filter",
+                });
+            }
+        }
+    };
+    await Promise.all(Array.from({ length: Math.min(FILE_FILTER_CONCURRENCY, Math.max(1, chunks.length)) }, () => worker()));
+    return verdicts;
 }
-/**
- * Phase 6 owns every topic-index entry whose SoT candidate was tagged
- * `kind="doc"` by the phase 5b walker. Path-prefix matching would lock
- * us to `docs/` and miss `documentation/`, `official_docs/`, etc.; the
- * walker's per-candidate kind is already the right discriminant.
- */
+async function runStage2SectionClassifier(args) {
+    const out = [];
+    if (args.candidates.length === 0)
+        return out;
+    const items = args.candidates.map((c) => ({
+        slug: c.entry.slug,
+        body: c.body,
+        sot_source: c.entry.sot_source,
+    }));
+    const ctxBySlug = new Map(args.candidates.map((c) => [c.entry.slug, c]));
+    const chunks = [];
+    for (let i = 0; i < items.length; i += SECTION_BATCH_SIZE) {
+        chunks.push(items.slice(i, i + SECTION_BATCH_SIZE));
+    }
+    let nextIdx = 0;
+    let chunksDone = 0;
+    let entriesDone = 0;
+    const verdicts = new Map();
+    const worker = async () => {
+        for (;;) {
+            const idx = nextIdx;
+            nextIdx += 1;
+            if (idx >= chunks.length)
+                return;
+            const chunk = chunks[idx];
+            try {
+                const map = await classifySectionBatch(chunk);
+                for (const [slug, cls] of map.entries())
+                    verdicts.set(slug, cls);
+            }
+            catch (err) {
+                log.warn({ chunkIdx: idx, size: chunk.length, err: err instanceof Error ? err.message : String(err) }, "phase 6 stage 2 batch failed; chunk skipped");
+            }
+            chunksDone += 1;
+            entriesDone += chunk.length;
+            if (args.onChunkProgress !== undefined) {
+                args.onChunkProgress({
+                    chunksDone,
+                    totalChunks: chunks.length,
+                    entriesDone,
+                    totalEntries: items.length,
+                    stage: "section-classify",
+                });
+            }
+        }
+    };
+    await Promise.all(Array.from({ length: Math.min(SECTION_CONCURRENCY, Math.max(1, chunks.length)) }, () => worker()));
+    for (const [slug, cls] of verdicts.entries()) {
+        if (cls.kind !== "decision" && cls.kind !== "domain-rule")
+            continue;
+        const ctx = ctxBySlug.get(slug);
+        if (ctx === undefined)
+            continue;
+        out.push({ ctx, cls });
+    }
+    return out;
+}
+function writeDraftToInbox(args) {
+    const inboxDir = join(decisionsDir(args.repoRoot), "_inbox");
+    mkdirSync(inboxDir, { recursive: true });
+    const abs = join(inboxDir, `${args.id}.draft.md`);
+    const now = new Date().toISOString();
+    const fm = {
+        id: args.id,
+        title: args.title,
+        type: "adr",
+        status: "draft",
+        audience: "dual",
+        generated: now,
+        "verified-at": now,
+        decided_at: now,
+        decided_by: DECIDED_BY,
+        sot_kind: "path",
+        sot_path: args.sot_path,
+        sot_content_hash: bodyContentHash(args.body),
+        capture_source: CAPTURE_SOURCE,
+        source_file: args.source_file,
+    };
+    const out = [];
+    out.push("---");
+    out.push(stringifyYaml(fm).trimEnd());
+    out.push("---");
+    out.push("");
+    out.push(args.body.trimEnd());
+    out.push("");
+    writeFileSync(abs, out.join("\n"), "utf8");
+    return abs;
+}
+function relativeInboxPath(id) {
+    return `.cairn/ground/decisions/_inbox/${id}.draft.md`;
+}
+/* -------------------------------------------------------------------------- */
+/* Helpers                                                                    */
+/* -------------------------------------------------------------------------- */
 function isDocSoT(entry) {
     const sot = entry.candidates.find((c) => c.file === entry.sot_source);
     return sot !== undefined && sot.kind === "doc";
 }
+function entryToSotPath(entry) {
+    const sot = entry.candidates.find((c) => c.file === entry.sot_source);
+    if (sot === undefined)
+        return entry.sot_source;
+    if (sot.anchor !== undefined && sot.anchor.length > 0) {
+        return `${entry.sot_source}#${sot.anchor}`;
+    }
+    return entry.sot_source;
+}
+// firstLineFallback now lives in sot-emit.ts (single source of truth).
+// Imported above as `firstLineFallback`.
+function deriveMarkerTitle(ctx) {
+    // Prefer the topic-index entry's anchor text (post-walker normalization)
+    // when present; fall back to the SoT body's first non-blank line.
+    const sot = ctx.entry.candidates.find((c) => c.file === ctx.entry.sot_source);
+    if (sot?.anchor !== undefined && sot.anchor.length > 0) {
+        return sot.anchor.replace(/[-_]+/g, " ").trim().slice(0, 120) || firstLineFallback(ctx.body);
+    }
+    return firstLineFallback(ctx.body);
+}
+/**
+ * Allocate a DEC id that doesn't collide with `existingIds`. The
+ * derivation is content-stable, but two distinct topics with identical
+ * `(sot_path, title, capture_source)` tuples would clash — fall back
+ * to a counter suffix in that pathological case.
+ */
+function allocateUniqueDecId(input, existingIds) {
+    let id = deriveDecId(input);
+    if (!existingIds.has(id)) {
+        existingIds.add(id);
+        return id;
+    }
+    for (let suffix = 2; suffix < 1_000; suffix += 1) {
+        const tagged = deriveDecId({ ...input, title: `${input.title} #${suffix}` });
+        if (!existingIds.has(tagged)) {
+            existingIds.add(tagged);
+            return tagged;
+        }
+    }
+    // Exceedingly unlikely. If we hit it, return the deterministic id and
+    // let the filesystem write fail loudly rather than fabricating a
+    // random suffix that would break subsequent re-runs.
+    existingIds.add(id);
+    return id;
+}
+function scanExistingDecIds(repoRoot) {
+    const out = new Set();
+    const dir = decisionsDir(repoRoot);
+    for (const sub of [dir, join(dir, "_inbox")]) {
+        let entries;
+        try {
+            entries = readdirSync(sub, { withFileTypes: true, encoding: "utf8" });
+        }
+        catch {
+            continue;
+        }
+        for (const e of entries) {
+            if (!e.isFile())
+                continue;
+            const m = e.name.match(/^(DEC-[0-9a-f]{7,})/);
+            if (m === null)
+                continue;
+            out.add(m[1]);
+        }
+    }
+    return out;
+}
+function countUnpromoted(topicIndex) {
+    let n = 0;
+    for (const e of Object.values(topicIndex.topics)) {
+        if (e.dec_id === undefined)
+            n += 1;
+    }
+    return n;
+}
+function zeroResult(scanned, topicIndex) {
+    return {
+        decsWritten: [],
+        skipped: [],
+        scannedEntries: scanned,
+        markerEmits: 0,
+        sectionEmits: 0,
+        authoritativeFiles: 0,
+        filesEvaluated: 0,
+        unpromotedCandidates: countUnpromoted(topicIndex),
+    };
+}
 //# sourceMappingURL=ingest-docs.js.map