npm - @meetless/mla - Versions diffs - 0.1.5 → 0.1.6 - Mend

@meetless/mla 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/dist/build-info.json +3 -3
package/dist/cli.js +31 -5
package/dist/commands/activate.js +39 -18
package/dist/commands/agent-memory.js +333 -0
package/dist/commands/enrich.js +211 -2
package/dist/commands/internal-auto-index.js +64 -1
package/dist/commands/internal-pretool-observe.js +86 -1
package/dist/commands/internal-redact-capture.js +130 -0
package/dist/commands/pilot.js +385 -0
package/dist/lib/agent-memory-capture/binding.js +115 -0
package/dist/lib/agent-memory-capture/classify.js +68 -0
package/dist/lib/agent-memory-capture/collector.js +69 -0
package/dist/lib/agent-memory-capture/containment.js +74 -0
package/dist/lib/agent-memory-capture/ledger.js +43 -0
package/dist/lib/agent-memory-capture/live-collector.js +148 -0
package/dist/lib/agent-memory-capture/live-ledger.js +45 -0
package/dist/lib/agent-memory-capture/live-pipeline.js +344 -0
package/dist/lib/agent-memory-capture/lock.js +98 -0
package/dist/lib/agent-memory-capture/paths.js +47 -0
package/dist/lib/agent-memory-capture/pipeline.js +222 -0
package/dist/lib/agent-memory-capture/report.js +131 -0
package/dist/lib/agent-memory-capture/types.js +14 -0
package/dist/lib/agent-memory-capture/upsert-client.js +104 -0
package/dist/lib/analytics/enforcement-classify.js +65 -0
package/dist/lib/analytics/enforcement-incident.js +83 -0
package/dist/lib/analytics/envelope.js +55 -1
package/dist/lib/analytics/pilot.js +313 -0
package/dist/lib/enrichment/ingest.js +98 -13
package/dist/lib/enrichment/materialize-rules.js +81 -0
package/dist/lib/enrichment/plan.js +72 -15
package/dist/lib/enrichment/protocol.js +85 -5
package/dist/lib/enrichment/scout-brief.js +35 -6
package/dist/lib/redactor.js +104 -1
package/dist/lib/scanner/agent-memory.js +55 -4
package/dist/lib/scanner/managed-rules.js +0 -0
package/dist/lib/scanner/scan.js +52 -1
package/dist/lib/scanner/score.js +41 -3
package/dist/lib/scanner/scout-mission.js +9 -7
package/dist/lib/upgrade-apply.js +30 -0
package/dist/lib/wire.js +2 -0
package/package.json +1 -1

package/dist/lib/enrichment/ingest.js CHANGED Viewed

@@ -20,6 +20,39 @@ const node_path_1 = require("node:path");
 const protocol_1 = require("./protocol");
 const plan_1 = require("./plan");
 const SCOUT_SLOTS = ["documentation", "history"];
+/**
+ * Fair-share the run's remaining candidate budget across the scouts that produced
+ * candidates THIS invocation. The scouts run independently and cannot coordinate to
+ * jointly honor one shared total, so a naive running-total cap applied in array order
+ * lets the first scout (documentation) swallow the whole budget and starves every
+ * later scout (history) to zero. We instead deal the budget round-robin in slot order,
+ * each round bounded by what a scout actually sent, so an under-producing scout cedes
+ * its surplus to the others and no scout is wiped out purely by ordering. Leftover
+ * slots that cannot divide evenly fall to the earliest scouts in slot order, which is
+ * deterministic. `remainingCap` already excludes budget consumed by scouts that
+ * completed in a prior ingest (resume).
+ */
+function allocateScoutBudgets(demands, remainingCap) {
+    const budget = new Map();
+    for (const s of SCOUT_SLOTS)
+        budget.set(s, 0);
+    let remaining = Math.max(0, remainingCap);
+    let progressed = true;
+    while (remaining > 0 && progressed) {
+        progressed = false;
+        for (const s of SCOUT_SLOTS) {
+            if (remaining === 0)
+                break;
+            const want = demands.get(s) ?? 0;
+            if ((budget.get(s) ?? 0) < want) {
+                budget.set(s, (budget.get(s) ?? 0) + 1);
+                remaining--;
+                progressed = true;
+            }
+        }
+    }
+    return budget;
+}
 function safeRealpath(p) {
     try {
         return (0, node_fs_1.realpathSync)(p);
@@ -118,6 +151,17 @@ function renderCandidateDocument(candidate) {
     lines.push("");
     lines.push(`Kind: ${candidate.kind}. Source: ${candidate.sourceScout} scout (onboarding enrichment, advisory; pending human review).`);
     lines.push("");
+    // Rationale carries a provenance label so the persisted artifact never presents an
+    // agent's paraphrase as the user's own words (memo Phase 1). Rendered only when present;
+    // a missing rationale is simply omitted (missing beats fabricated).
+    if (candidate.rationale && candidate.rationale.trim().length > 0) {
+        const heading = candidate.rationaleSource === "USER_EXPLICIT"
+            ? "## Rationale (user-stated)"
+            : "## Rationale (agent summary; not the user's words)";
+        lines.push(heading);
+        lines.push(candidate.rationale.trim());
+        lines.push("");
+    }
     lines.push("## Evidence");
     for (const ev of candidate.evidence) {
         if (ev.type === "file") {
@@ -130,17 +174,26 @@ function renderCandidateDocument(candidate) {
     return lines.join("\n") + "\n";
 }
 // --- per-scout state persistence (§6) --------------------------------------------
-function statePath(home, workspaceId) {
-    return (0, node_path_1.join)(home, "workspaces", workspaceId, "onboarding-state.json");
+// Per-run resume state lives BESIDE the run record it belongs to, keyed by runId, so two
+// repos sharing one workspace never collide on a single onboarding-state.json (§6). A
+// stale path keyed only by workspace made the first repo's completion permanently skip
+// every later repo's scouts. Named `<runId>.state.json` so it sorts next to `<runId>.json`
+// and prune can drop the pair together.
+function statePath(home, workspaceId, runId) {
+    return (0, node_path_1.join)(home, "workspaces", workspaceId, "onboarding-runs", `${runId}.state.json`);
 }
-function loadState(home, workspaceId) {
-    const path = statePath(home, workspaceId);
+function loadState(home, workspaceId, runId) {
+    const path = statePath(home, workspaceId, runId);
     if (!(0, node_fs_1.existsSync)(path))
         return null;
     try {
         const parsed = JSON.parse((0, node_fs_1.readFileSync)(path, "utf8"));
         if (parsed?.schemaVersion !== 1)
             return null;
+        // A state file is only valid for the run it names: ignore one whose stored runId
+        // drifted from its path (corruption / hand-edit), rather than resuming the wrong run.
+        if (parsed.runId !== runId)
+            return null;
         return parsed;
     }
     catch {
@@ -148,9 +201,9 @@ function loadState(home, workspaceId) {
     }
 }
 function writeState(home, state) {
-    const dir = (0, node_path_1.join)(home, "workspaces", state.workspaceId);
+    const dir = (0, node_path_1.join)(home, "workspaces", state.workspaceId, "onboarding-runs");
     (0, node_fs_1.mkdirSync)(dir, { recursive: true });
-    (0, node_fs_1.writeFileSync)(statePath(home, state.workspaceId), JSON.stringify(state, null, 2), "utf8");
+    (0, node_fs_1.writeFileSync)(statePath(home, state.workspaceId, state.runId), JSON.stringify(state, null, 2), "utf8");
 }
 function emptyScoutState() {
     return { status: "not_started" };
@@ -176,15 +229,40 @@ async function ingestRun(input) {
     }
     const probe = input.probe ?? defaultProbe(env.repositoryRoot, input.gitRunner);
     // Resume: a scout already "complete" is never re-processed (its candidates are
-    // immutable; §6). Carry prior state forward.
-    const prior = loadState(env.home, env.workspaceId);
+    // immutable; §6). Carry prior state forward. Keyed by runId, so a different repo's run
+    // in the same workspace starts from a clean slate instead of inheriting "complete".
+    const prior = loadState(env.home, env.workspaceId, runId);
     const scoutState = {
         documentation: prior?.scouts.documentation ?? emptyScoutState(),
         history: prior?.scouts.history ?? emptyScoutState(),
     };
     const outcomes = [];
-    let totalAccepted = 0;
     const cap = run.limits.maxCandidatesTotal;
+    // Budget consumed by scouts that completed in a PRIOR ingest (resume): they are
+    // skipped in the loop below but still count against the run's total.
+    const committedPrior = SCOUT_SLOTS.reduce((n, s) => {
+        const st = scoutState[s];
+        return n + (st.status === "complete" ? (st.candidateCount ?? 0) : 0);
+    }, 0);
+    const remainingCap = Math.max(0, cap - committedPrior);
+    // Demand per scout = candidates it sent THIS invocation, counted only for scouts
+    // that (a) sent a well-formed COMPLETE envelope and (b) are not already complete
+    // from a prior run. Everything else demands nothing; the loop's own branches report
+    // malformed / not-complete / already-complete. First occurrence of a scout wins.
+    const demands = new Map();
+    for (const rawResult of results) {
+        const shape = (0, protocol_1.validateScoutResultShape)(rawResult);
+        if (!shape.ok)
+            continue;
+        const r = shape.result;
+        if (r.status !== "complete")
+            continue;
+        if (scoutState[r.scout].status === "complete")
+            continue;
+        if (!demands.has(r.scout))
+            demands.set(r.scout, r.candidates.length);
+    }
+    const budget = allocateScoutBudgets(demands, remainingCap);
     for (const rawResult of results) {
         const shape = (0, protocol_1.validateScoutResultShape)(rawResult);
         if (!shape.ok) {
@@ -229,12 +307,18 @@ async function ingestRun(input) {
             });
             continue;
         }
-        // Complete + valid envelope: validate each candidate independently.
+        // Complete + valid envelope: validate each candidate independently, bounded by
+        // this scout's fair share of the run budget (computed above across all scouts).
         const accepted = [];
         const errors = [];
+        const scoutBudget = budget.get(scout) ?? 0;
         result.candidates.forEach((raw, i) => {
-            if (totalAccepted + accepted.length >= cap) {
-                errors.push({ index: i, code: "candidate_cap_exceeded", message: `run candidate cap (${cap}) reached` });
+            if (accepted.length >= scoutBudget) {
+                errors.push({
+                    index: i,
+                    code: "candidate_cap_exceeded",
+                    message: `run candidate cap (${cap}) reached; this scout's fair share was ${scoutBudget}`,
+                });
                 return;
             }
             const shapeRes = (0, protocol_1.validateCandidateShape)(raw, i);
@@ -268,7 +352,6 @@ async function ingestRun(input) {
                 errors.push({ index: -1, code: "persistence_failed", message: e instanceof Error ? e.message : String(e) });
             }
         }
-        totalAccepted += accepted.length;
         scoutState[scout] =
             status === "complete" ? { status: "complete", candidateCount: accepted.length } : { status, error: "kb-add persistence failed" };
         outcomes.push({
@@ -283,6 +366,8 @@ async function ingestRun(input) {
     const allComplete = SCOUT_SLOTS.every((s) => scoutState[s].status === "complete");
     const state = {
         workspaceId: env.workspaceId,
+        runId,
+        repositoryRoot: env.repositoryRoot,
         schemaVersion: 1,
         status: allComplete ? "complete" : "partial",
         updatedAt: now,

package/dist/lib/enrichment/materialize-rules.js ADDED Viewed

@@ -0,0 +1,81 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.MATERIALIZE_SHARE_MESSAGE = void 0;
+exports.isDurableRuleKind = isDurableRuleKind;
+exports.candidateToManagedRule = candidateToManagedRule;
+exports.materializeRules = materializeRules;
+// src/lib/enrichment/materialize-rules.ts
+//
+// The bridge from an accepted onboarding candidate to the mla-managed rule file
+// (.meetless/rules.md). Memo Phase 1, line 535: "accepted DURABLE rules materialize into the
+// managed file; accepted decisions enter governed knowledge and do NOT silently become rules."
+// This is the one place that enforces that split (INV-AUTH-1 / INV-AUTH-2):
+//   - constraint / convention / boundary  -> a durable RULE, materialized into the file.
+//   - decision                            -> governed knowledge only; NEVER touches the file.
+//   - deprecation                         -> a staleness signal, not an injected rule; skipped.
+// The function is pure (text in, text out) so the required invariant test "accepting a decision
+// does not modify the managed rules file" is a byte-equality assertion, and re-materializing the
+// same accepted rule is idempotent (managed-rules upsert dedupes by content-derived id).
+//
+// Layering: enrichment depends on the scanner's managed-rules engine, never the reverse. The
+// managed file stays dependency-light (it knows only ./types); this module owns the mapping.
+const managed_rules_1 = require("../scanner/managed-rules");
+// The CLI prints this verbatim after a materialize so the operator knows the rule is live in
+// their own session immediately and that sharing it is an explicit, un-automated git step (the
+// memo forbids mla from auto-committing or auto-pushing).
+exports.MATERIALIZE_SHARE_MESSAGE = "Effective locally. Commit and push to share with teammates.";
+// The candidate kinds that are DURABLE repository rules (normative, always-on policy). A decision
+// is a point-in-time choice that enters governed knowledge but is not an always-on rule
+// (INV-AUTH-2); a deprecation is a staleness signal. Neither materializes into the rule file.
+const DURABLE_RULE_KINDS = new Set(["constraint", "convention", "boundary"]);
+function isDurableRuleKind(kind) {
+    return DURABLE_RULE_KINDS.has(kind);
+}
+// Pull the rule's provenance out of its evidence so the materialized rule keeps a citation back to
+// the source the scout grounded it in. File evidence contributes its path; commit evidence
+// contributes `commit:<sha>`. Deduped + sorted downstream by makeManagedRule.
+function candidateSources(candidate) {
+    return candidate.evidence.map((ev) => (ev.type === "file" ? ev.path : `commit:${ev.commit}`));
+}
+// Map one durable-rule candidate to a ManagedRule. Strength defaults to the conservative
+// SHOULD_FOLLOW: an onboarding candidate carries no explicit MUST signal, and only an explicit
+// human escalation should earn must-follow injection (memo: "only an explicit MUST"). Scope is
+// repository-wide (candidates carry no glob today); a future scoped-rule signal slots in here.
+function candidateToManagedRule(candidate) {
+    return (0, managed_rules_1.makeManagedRule)({
+        statement: candidate.statement,
+        strength: "SHOULD_FOLLOW",
+        sources: candidateSources(candidate),
+    });
+}
+// Materialize the accepted candidates into the managed file content. `existingText` is the current
+// file (pass "" when it does not exist yet). The result is a full re-render, so ordering is
+// deterministic and the write is idempotent regardless of how many times the same rule is accepted.
+function materializeRules(existingText, accepted) {
+    let rules = (0, managed_rules_1.parseManagedRules)(existingText);
+    const materialized = [];
+    const skipped = [];
+    for (const c of accepted) {
+        if (!c.statement || c.statement.trim().length === 0) {
+            skipped.push({ statement: c.statement ?? "", kind: c.kind, reason: "empty_statement" });
+            continue;
+        }
+        if (!isDurableRuleKind(c.kind)) {
+            // A decision or deprecation: governed knowledge / staleness, never an always-on rule.
+            skipped.push({ statement: c.statement, kind: c.kind, reason: "not_a_durable_rule" });
+            continue;
+        }
+        const rule = candidateToManagedRule(c);
+        rules = (0, managed_rules_1.upsertManagedRule)(rules, rule);
+        materialized.push(rule);
+    }
+    // Re-render from the (possibly unchanged) rule set. When nothing durable was accepted, parsing
+    // then re-rendering the original could differ from the raw input only by formatting; to make the
+    // "decision does not modify the file" guarantee exact, short-circuit to the original bytes when
+    // no rule was materialized.
+    if (materialized.length === 0) {
+        return { text: existingText, materialized, skipped, changed: false };
+    }
+    const text = (0, managed_rules_1.renderManagedRules)(rules);
+    return { text, materialized, skipped, changed: text !== existingText };
+}

package/dist/lib/enrichment/plan.js CHANGED Viewed

@@ -34,10 +34,23 @@ const META_END_MARK = "@@MLA-ENRICH-ENDMETA@@";
 function defaultGitRunner(repoRoot) {
     return (args) => (0, node_child_process_1.execFileSync)("git", args, { cwd: repoRoot, encoding: "utf8", maxBuffer: 64 * 1024 * 1024 });
 }
-const TIER_RANK = { T1: 0, T2: 1, T3: 99, T4: 2 };
+// Within-target ordering band (lower is read first). T1 instruction files first; then,
+// among T2, curated decision/instruction-adjacent docs (known doc names, ADR/RFC/spec
+// dirs) ahead of arbitrary prose, so a tight target budget surfaces a repo's ADRs and
+// package READMEs instead of spending slots on generic marketing .md that merely sorts
+// early; T4 legacy notes last. Path breaks ties so the plan stays deterministic. T3 is
+// grounding-only and never a target.
+function targetBand(path, tier) {
+    if (tier === "T1")
+        return 0;
+    if (tier === "T4")
+        return 3;
+    return (0, score_1.isCuratedDoc)(path) ? 1 : 2; // T2: curated docs above generic prose
+}
 // Rank the doc targets the documentation scout should read: T1 instruction files first,
-// then T2 decision docs, then T4 legacy notes; within a tier, deterministic by path.
-// T3 (grounding-only) and unclassified files are excluded. Capped to the limit.
+// then curated T2 decision docs, then generic prose, then T4 legacy notes; within a
+// band, deterministic by path. T3 (grounding-only) and unclassified files are excluded.
+// Capped to the limit.
 function buildDocumentationTargets(repoRoot, limit, gitRunner = defaultGitRunner(repoRoot)) {
     let tracked;
     try {
@@ -56,7 +69,7 @@ function buildDocumentationTargets(repoRoot, limit, gitRunner = defaultGitRunner
             continue;
         scored.push({ path, tier });
     }
-    scored.sort((a, b) => TIER_RANK[a.tier] - TIER_RANK[b.tier] || a.path.localeCompare(b.path));
+    scored.sort((a, b) => targetBand(a.path, a.tier) - targetBand(b.path, b.tier) || a.path.localeCompare(b.path));
     return scored.slice(0, Math.max(0, limit)).map((s, i) => ({ path: s.path, tier: s.tier, rank: i + 1 }));
 }
 // Prepare a bounded slice of recent git history: the commit allowlist (full SHAs) plus
@@ -67,12 +80,13 @@ function buildDocumentationTargets(repoRoot, limit, gitRunner = defaultGitRunner
 // HEAD); it is a future toggle.
 function prepareGitEvidence(repoRoot, opts) {
     const gitRunner = opts.gitRunner ?? defaultGitRunner(repoRoot);
+    const scanCap = Math.max(0, opts.maxScanCommits);
     let raw;
     try {
         raw = gitRunner([
             "log",
             `-n`,
-            String(Math.max(0, opts.maxCommits)),
+            String(scanCap),
             "--no-merges",
             "--date=iso-strict",
             "--name-status",
@@ -82,24 +96,33 @@ function prepareGitEvidence(repoRoot, opts) {
     catch {
         return { evidence: [], truncated: false }; // empty history / not a repo: no evidence
     }
+    // Scan a WIDE window (maxScanCommits) but inline only maxSelectedCommits (verdict item 7).
+    // The byte budget SKIPS rather than HALTS: a single fat commit (huge body / many files)
+    // no longer starves the rest, so the recency-ordered fill reaches deeper into the pool
+    // and the scout sees more distinct decisions within the same byte budget. Selection stays
+    // deterministic (recency order) and taste-free; substance ranking is a future toggle, not
+    // built here. The first commit is always kept even if it alone exceeds the byte budget, so
+    // a repo whose newest commit is oversized still yields evidence.
     const parsed = parseGitLog(raw);
     const evidence = [];
     let bytes = 0;
     let truncated = false;
     for (const commit of parsed) {
-        if (evidence.length >= opts.maxCommits) {
+        if (evidence.length >= opts.maxSelectedCommits) {
             truncated = true;
             break;
         }
         const size = Buffer.byteLength(JSON.stringify(commit), "utf8");
         if (bytes + size > opts.maxBytes && evidence.length > 0) {
             truncated = true;
-            break; // keep at least one commit even if it alone exceeds the byte budget
+            continue; // skip this oversized commit, keep filling from smaller later ones
         }
         bytes += size;
         evidence.push(commit);
     }
-    if (parsed.length > evidence.length)
+    // Truncated if anything in the scanned pool was dropped, OR the scan itself hit its
+    // ceiling (there may be older commits the scan never reached).
+    if (parsed.length > evidence.length || parsed.length >= scanCap)
         truncated = true;
     return { evidence, truncated };
 }
@@ -206,23 +229,56 @@ function loadRunRecord(home, workspaceId, runId) {
         return null;
     }
 }
-// Keep only the current active run record; drop older ones (§5b: no run-history
-// retention). runId collisions are impossible (random), so "older" == "any other".
-function pruneOldRuns(home, workspaceId, currentRunId) {
+function safeRealpath(p) {
+    try {
+        return (0, node_fs_1.realpathSync)(p);
+    }
+    catch {
+        return p;
+    }
+}
+// Keep only the current active run record FOR THIS REPO; drop this repo's older ones
+// (§5b: no run-history retention). A workspace can bind more than one repo (the Meetless
+// monorepo and intel share one), so "older" must mean "same repo, different runId", never
+// "any other run": deleting another repo's in-flight run would strand its resume/ingest.
+// We compare repositoryRoot by realpath (symlink/`..` safe). Records we cannot read are
+// left alone (harmless; ingest loads strictly by runId). The paired `<runId>.state.json`
+// is dropped with its record so stale resume state never lingers.
+function pruneOldRuns(home, workspaceId, currentRunId, currentRepoRoot) {
     const dir = runsDir(home, workspaceId);
     if (!(0, node_fs_1.existsSync)(dir))
         return 0;
+    const currentRepoReal = safeRealpath(currentRepoRoot);
     let removed = 0;
     for (const name of (0, node_fs_1.readdirSync)(dir)) {
-        if (!name.endsWith(".json") || name === `${currentRunId}.json`)
+        // Only run-record files (`<runId>.json`); skip state sidecars and the current record.
+        if (!name.endsWith(".json") || name.endsWith(".state.json") || name === `${currentRunId}.json`)
+            continue;
+        const recordPath = (0, node_path_1.join)(dir, name);
+        let sameRepo = false;
+        try {
+            const rec = JSON.parse((0, node_fs_1.readFileSync)(recordPath, "utf8"));
+            sameRepo = safeRealpath(rec.repositoryRoot) === currentRepoReal;
+        }
+        catch {
+            continue; // unreadable / corrupt: leave it, do not risk deleting another repo's run
+        }
+        if (!sameRepo)
             continue;
         try {
-            (0, node_fs_1.unlinkSync)((0, node_path_1.join)(dir, name));
+            (0, node_fs_1.unlinkSync)(recordPath);
             removed++;
         }
         catch {
             // best-effort cleanup; a leftover record is harmless (ingest loads by runId)
         }
+        // Drop the paired resume-state sidecar, if any, so it cannot outlive its record.
+        try {
+            (0, node_fs_1.unlinkSync)((0, node_path_1.join)(dir, `${name.slice(0, -".json".length)}.state.json`));
+        }
+        catch {
+            // no sidecar (run never ingested) or already gone: nothing to do
+        }
     }
     return removed;
 }
@@ -234,7 +290,8 @@ function createPlan(input) {
     const gitRunner = input.gitRunner ?? defaultGitRunner(input.repositoryRoot);
     const documentationTargets = buildDocumentationTargets(input.repositoryRoot, limits.maxDocumentTargets, gitRunner);
     const { evidence: historyEvidence, truncated: historyTruncated } = prepareGitEvidence(input.repositoryRoot, {
-        maxCommits: limits.maxHistoryCommits,
+        maxScanCommits: limits.maxHistoryScanCommits,
+        maxSelectedCommits: limits.maxHistorySelectedCommits,
         maxBytes: limits.maxPreparedInputBytes,
         gitRunner,
     });
@@ -248,6 +305,6 @@ function createPlan(input) {
         historyEvidence,
     });
     const recordPath = writeRunRecord(input.home, run);
-    const pruned = pruneOldRuns(input.home, input.workspaceId, input.runId);
+    const pruned = pruneOldRuns(input.home, input.workspaceId, input.runId, input.repositoryRoot);
     return { run, recordPath, pruned, historyTruncated };
 }

package/dist/lib/enrichment/protocol.js CHANGED Viewed

@@ -7,7 +7,8 @@
 // real file length, fs/network) live in ingest.ts; clock + id injection lives in
 // plan.ts. See notes/20260626-mla-agent-onboarding-enrichment-plan.md (§5, §5b, §6, §6b, §8).
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.SCOUT_STATUSES = exports.SCOUT_NAMES = exports.ENRICHMENT_KINDS = exports.MIN_COMMIT_SHA_LENGTH = exports.MAX_EVIDENCE_PER_CANDIDATE = exports.MIN_STATEMENT_LENGTH = exports.MAX_STATEMENT_LENGTH = exports.DEFAULT_BUDGET_MS = exports.MAX_CANDIDATES_TOTAL = exports.MAX_PREPARED_INPUT_BYTES = exports.MAX_HISTORY_COMMITS = exports.MAX_DOCUMENT_TARGETS = exports.PROTOCOL_VERSION = void 0;
+exports.SCOUT_STATUSES = exports.SCOUT_NAMES = exports.ENRICHMENT_KINDS = exports.RATIONALE_SOURCES = exports.MAX_RATIONALE_LENGTH = exports.MIN_COMMIT_SHA_LENGTH = exports.MAX_EVIDENCE_PER_CANDIDATE = exports.MIN_STATEMENT_LENGTH = exports.MAX_STATEMENT_LENGTH = exports.REVIEW_BATCH_DEFAULT = exports.DEFAULT_BUDGET_MS = exports.MAX_CANDIDATES_TOTAL = exports.MAX_PREPARED_INPUT_BYTES = exports.MAX_HISTORY_SELECTED_COMMITS = exports.MAX_HISTORY_SCAN_COMMITS = exports.MAX_DOCUMENT_TARGETS = exports.PROTOCOL_VERSION = void 0;
+exports.selectReviewBatch = selectReviewBatch;
 exports.normalizeStatement = normalizeStatement;
 exports.candidateAnchors = candidateAnchors;
 exports.candidateId = candidateId;
@@ -25,16 +26,47 @@ const crypto_1 = require("crypto");
 exports.PROTOCOL_VERSION = 1;
 // Input bounds (§8). Explicit MVP constants; only the time budget is configurable.
 exports.MAX_DOCUMENT_TARGETS = 20;
-exports.MAX_HISTORY_COMMITS = 40;
+// History bounds are SPLIT into scan vs selected (verdict item 7): the scan window is the
+// pool `git log` walks (bounded, never the whole repo), and the selected count is what we
+// actually inline as the commit allowlist. Keeping them distinct lets the byte-budget fill
+// reach DEEPER than the inline cap (a single fat commit no longer starves the rest) without
+// ever loading an unbounded log. scan >= selected by construction.
+exports.MAX_HISTORY_SCAN_COMMITS = 300;
+exports.MAX_HISTORY_SELECTED_COMMITS = 40;
 exports.MAX_PREPARED_INPUT_BYTES = 200_000;
-exports.MAX_CANDIDATES_TOTAL = 20; // ceiling, not a target; zero is valid
+exports.MAX_CANDIDATES_TOTAL = 20; // EXTRACTION ceiling, not a target; zero is valid
 exports.DEFAULT_BUDGET_MS = 240_000;
+// The REVIEW batch is the human-facing presentation size, kept deliberately SEPARATE
+// from the extraction cap above (notes/20260624-mla-new-user-value-and-brownfield-proof.md,
+// Phase 2). A run may surface up to MAX_CANDIDATES_TOTAL (20) candidates, but dumping 20
+// PENDING items on a reviewer in one screen is how review queues rot. The reviewer sees
+// REVIEW_BATCH_DEFAULT at a time and the remainder sits behind "show more". This bounds
+// the reviewer's cognitive load WITHOUT lowering extraction recall, so it must never be
+// folded into EnrichmentLimits (that would re-conflate the two the plan tells us to split).
+exports.REVIEW_BATCH_DEFAULT = 6; // within the plan's 5-8 window
+// Pure: split a count of pending-review items into the first batch and the remainder.
+// A non-positive batchSize disables batching (show everything). Inputs are floored at
+// zero, so a negative/garbage total can never produce a negative "remaining".
+function selectReviewBatch(total, batchSize = exports.REVIEW_BATCH_DEFAULT) {
+    const t = Math.max(0, Math.floor(Number.isFinite(total) ? total : 0));
+    if (batchSize <= 0 || t <= batchSize) {
+        return { shown: t, remaining: 0, total: t, hasMore: false };
+    }
+    return { shown: batchSize, remaining: t - batchSize, total: t, hasMore: true };
+}
 // Defensive bounds NOT pinned by the plan (§5 says only "max statement length" and
 // "allowed kind"); these are conservative defaults, tune freely.
 exports.MAX_STATEMENT_LENGTH = 500;
 exports.MIN_STATEMENT_LENGTH = 1; // non-empty after normalization; no semantic floor (the human governs durability)
 exports.MAX_EVIDENCE_PER_CANDIDATE = 12;
 exports.MIN_COMMIT_SHA_LENGTH = 7; // git's conventional abbreviation floor
+exports.MAX_RATIONALE_LENGTH = 1000; // rationale is a short "why", not an essay
+// Provenance of a candidate's rationale (memo Phase 1). The scouts are AGENTS, so any "why"
+// THEY compose is an AGENT_SUMMARY; USER_EXPLICIT is reserved for the human's own words
+// (e.g. a verbatim quote from an instruction file the user wrote). The two must never be
+// conflated: presenting an agent paraphrase as user-provided is the exact failure this
+// field exists to prevent, and a missing rationale always beats a fabricated one.
+exports.RATIONALE_SOURCES = ["USER_EXPLICIT", "AGENT_SUMMARY"];
 exports.ENRICHMENT_KINDS = [
     "constraint",
     "decision",
@@ -133,7 +165,8 @@ function computePlanDigest(run) {
 function defaultLimits(budgetMs = exports.DEFAULT_BUDGET_MS) {
     return {
         maxDocumentTargets: exports.MAX_DOCUMENT_TARGETS,
-        maxHistoryCommits: exports.MAX_HISTORY_COMMITS,
+        maxHistoryScanCommits: exports.MAX_HISTORY_SCAN_COMMITS,
+        maxHistorySelectedCommits: exports.MAX_HISTORY_SELECTED_COMMITS,
         maxPreparedInputBytes: exports.MAX_PREPARED_INPUT_BYTES,
         maxCandidatesTotal: exports.MAX_CANDIDATES_TOTAL,
         budgetMs,
@@ -158,7 +191,7 @@ function resolveAllowedCommit(allowlist, cited) {
     return prefixed.length === 1 ? prefixed[0] : null;
 }
 // --- Pure shape validators -------------------------------------------------------
-const CANDIDATE_FIELDS = new Set(["kind", "statement", "evidence", "sourceScout"]);
+const CANDIDATE_FIELDS = new Set(["kind", "statement", "evidence", "sourceScout", "rationale", "rationaleSource"]);
 const FILE_EVIDENCE_FIELDS = new Set(["type", "path", "startLine", "endLine"]);
 const COMMIT_EVIDENCE_FIELDS = new Set(["type", "commit", "path"]);
 function isPlainObject(v) {
@@ -226,6 +259,10 @@ function validateCandidateShape(raw, index) {
     if (sourceScout === "history" && !validEvidence.some((e) => e.type === "commit")) {
         err("missing_commit_anchor", "history candidate requires at least one commit anchor", "evidence");
     }
+    // Rationale provenance (memo Phase 1): rationale and rationaleSource are paired. A
+    // non-empty rationale must declare a valid source; a null/absent rationale must NOT carry
+    // an orphan source claiming provenance for nothing. Missing rationale is always allowed.
+    const rationale = validateRationale(raw, err);
     if (errors.length > 0)
         return { ok: false, errors };
     return {
@@ -235,9 +272,52 @@ function validateCandidateShape(raw, index) {
             statement: statement,
             evidence: validEvidence,
             sourceScout: sourceScout,
+            rationale: rationale.rationale,
+            rationaleSource: rationale.rationaleSource,
         },
     };
 }
+// Validate the rationale/rationaleSource pair on a raw candidate, pushing errors via `err`.
+// Returns the canonicalized pair: a non-empty rationale carries its declared source; an
+// absent/null rationale canonicalizes to { rationale: null, rationaleSource: null } so the
+// two never drift. Whitespace-only rationale is rejected (omit the field or send null
+// instead of an empty "why").
+function validateRationale(raw, err) {
+    const rawRationale = raw.rationale;
+    const rawSource = raw.rationaleSource;
+    const hasRationale = rawRationale !== undefined && rawRationale !== null;
+    const hasSource = rawSource !== undefined && rawSource !== null;
+    if (!hasRationale) {
+        // No rationale: a source would be an orphan claiming provenance for nothing.
+        if (hasSource) {
+            err("orphan_rationale_source", "rationaleSource set without a rationale", "rationaleSource");
+        }
+        return { rationale: null, rationaleSource: null };
+    }
+    if (typeof rawRationale !== "string") {
+        err("bad_rationale", "rationale must be a string or null", "rationale");
+        return { rationale: null, rationaleSource: null };
+    }
+    const trimmed = rawRationale.trim();
+    if (trimmed.length < 1) {
+        err("empty_rationale", "rationale is empty; omit it or send null instead", "rationale");
+    }
+    else if (trimmed.length > exports.MAX_RATIONALE_LENGTH) {
+        err("rationale_too_long", `rationale exceeds ${exports.MAX_RATIONALE_LENGTH} chars`, "rationale");
+    }
+    if (!hasSource) {
+        err("missing_rationale_source", `rationale requires rationaleSource (one of: ${exports.RATIONALE_SOURCES.join(", ")})`, "rationaleSource");
+    }
+    else if (typeof rawSource !== "string" || !exports.RATIONALE_SOURCES.includes(rawSource)) {
+        err("bad_rationale_source", `rationaleSource must be one of: ${exports.RATIONALE_SOURCES.join(", ")}`, "rationaleSource");
+    }
+    return {
+        rationale: trimmed.length >= 1 ? trimmed : null,
+        rationaleSource: hasSource && exports.RATIONALE_SOURCES.includes(rawSource)
+            ? rawSource
+            : null,
+    };
+}
 function validateEvidenceShape(raw, candidateIndex, evidenceIndex, err) {
     const field = `evidence[${evidenceIndex}]`;
     if (!isPlainObject(raw)) {

package/dist/lib/enrichment/scout-brief.js CHANGED Viewed

@@ -21,6 +21,7 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.SCOUT_AGENT_NAME = exports.SCOUT_TOOL_ALLOWLIST = void 0;
 exports.buildScoutPrompt = buildScoutPrompt;
+const protocol_1 = require("./protocol");
 const scout_mission_1 = require("../scanner/scout-mission");
 // The capability each scout role is granted. Read-only for documentation; no tools
 // for history (the plan precomputes and inlines its evidence). Deliberately narrow:
@@ -97,6 +98,13 @@ function toolLine(role) {
     }
     return `Your only tools are: ${tools.join(", ")}. Do not attempt any other tool.`;
 }
+// Each scout's fair slice of the run-wide candidate budget. ingest deals the budget
+// round-robin across scouts, so an even split is the share a fully-producing scout
+// can expect; telling the scout this up front stops it over-producing candidates that
+// ingest would only drop. At least 1 so a scout is never told to surface nothing.
+function perScoutTarget(run) {
+    return Math.max(1, Math.floor(run.limits.maxCandidatesTotal / protocol_1.SCOUT_NAMES.length));
+}
 function renderOutputContract(run, role) {
     const evidenceExample = role === "documentation"
         ? '{ "type": "file", "path": "<one of the documents above>", "startLine": 10, "endLine": 24 }'
@@ -113,20 +121,34 @@ function renderOutputContract(run, role) {
         '  "candidates": [',
         "    {",
         '      "kind": "<one of the kinds listed above>",',
-        '      "statement": "<one specific claim, 500 characters or fewer>",',
+        `      "statement": "<one specific claim, ${protocol_1.MAX_STATEMENT_LENGTH} characters or fewer>",`,
         `      "evidence": [ ${evidenceExample} ],`,
-        `      "sourceScout": "${role}"`,
+        `      "sourceScout": "${role}",`,
+        '      "rationale": "<optional: WHY this governs, in YOUR words, or omit it entirely>",',
+        '      "rationaleSource": "AGENT_SUMMARY"   // omit alongside rationale; for a scout it is always AGENT_SUMMARY',
         "    }",
         "  ]",
         "}",
         "",
         anchorRule,
-        `Surface at most ${run.limits.maxCandidatesTotal} candidates total across all scouts; ` +
-            "choose the highest-value ones rather than padding.",
+        "The `rationale` and `rationaleSource` fields are OPTIONAL. Include a rationale only when " +
+            "the evidence makes the WHY non-obvious, and keep it to one short sentence. You are an " +
+            'agent, so your rationale is always `"AGENT_SUMMARY"`: it is recorded as your paraphrase, ' +
+            "never as the user's own words. Do NOT invent a rationale to look thorough: omitting both " +
+            "fields is always better than a fabricated reason.",
+        `Keep each statement to ${protocol_1.MAX_STATEMENT_LENGTH} characters or fewer: a longer statement is ` +
+            "rejected outright at ingest, not truncated, so state the claim concisely and let the " +
+            "evidence anchor carry the detail.",
+        `Aim for the highest-value ${perScoutTarget(run)} candidates or fewer. The run keeps at most ` +
+            `${run.limits.maxCandidatesTotal} candidates total and shares that budget fairly across the ` +
+            `${protocol_1.SCOUT_NAMES.length} scouts, so candidates past your share are dropped at ingest. Pick the ` +
+            "highest-value ones rather than padding.",
         'Zero candidates with status "complete" is a valid, successful result: only record a',
         "candidate you can anchor to the evidence above.",
-        "Also note any contradictions you see in a short prose summary after the JSON; a",
-        "contradiction is a flag for the human, not a candidate of its own.",
+        "If two sources contradict each other on a governing point, that IS a governance",
+        "signal: surface it as a `decision` or `deprecation` candidate that names which",
+        "source supersedes which, anchored to both. Do not append free prose; the JSON",
+        "object above is the entire output.",
     ];
 }
 /**
@@ -162,6 +184,13 @@ function buildScoutPrompt(run, role) {
             "Read ONLY these documents, in rank order. The plan already selected and ranked",
             "them; do not search for, glob, or open any other file.",
             "",
+            `The paths below are relative to the repository root: ${run.repositoryRoot}`,
+            "Your working directory may NOT be that root, so read each document by its",
+            "absolute path (join the root and the relative path). In every candidate's",
+            "evidence, write the path exactly as listed below (relative), not the absolute",
+            "one: ingest anchors evidence against the repository root and rejects absolute",
+            "paths.",
+            "",
             ...renderDocumentationTargets(run.documentationTargets),
         ]
         : [