@meetless/mla 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/build-info.json +3 -3
  2. package/dist/cli.js +31 -5
  3. package/dist/commands/activate.js +39 -18
  4. package/dist/commands/agent-memory.js +333 -0
  5. package/dist/commands/enrich.js +211 -2
  6. package/dist/commands/internal-auto-index.js +64 -1
  7. package/dist/commands/internal-pretool-observe.js +86 -1
  8. package/dist/commands/internal-redact-capture.js +130 -0
  9. package/dist/commands/pilot.js +385 -0
  10. package/dist/lib/agent-memory-capture/binding.js +115 -0
  11. package/dist/lib/agent-memory-capture/classify.js +68 -0
  12. package/dist/lib/agent-memory-capture/collector.js +69 -0
  13. package/dist/lib/agent-memory-capture/containment.js +74 -0
  14. package/dist/lib/agent-memory-capture/ledger.js +43 -0
  15. package/dist/lib/agent-memory-capture/live-collector.js +148 -0
  16. package/dist/lib/agent-memory-capture/live-ledger.js +45 -0
  17. package/dist/lib/agent-memory-capture/live-pipeline.js +344 -0
  18. package/dist/lib/agent-memory-capture/lock.js +98 -0
  19. package/dist/lib/agent-memory-capture/paths.js +47 -0
  20. package/dist/lib/agent-memory-capture/pipeline.js +222 -0
  21. package/dist/lib/agent-memory-capture/report.js +131 -0
  22. package/dist/lib/agent-memory-capture/types.js +14 -0
  23. package/dist/lib/agent-memory-capture/upsert-client.js +104 -0
  24. package/dist/lib/analytics/enforcement-classify.js +65 -0
  25. package/dist/lib/analytics/enforcement-incident.js +83 -0
  26. package/dist/lib/analytics/envelope.js +55 -1
  27. package/dist/lib/analytics/pilot.js +313 -0
  28. package/dist/lib/enrichment/ingest.js +98 -13
  29. package/dist/lib/enrichment/materialize-rules.js +81 -0
  30. package/dist/lib/enrichment/plan.js +72 -15
  31. package/dist/lib/enrichment/protocol.js +85 -5
  32. package/dist/lib/enrichment/scout-brief.js +35 -6
  33. package/dist/lib/redactor.js +104 -1
  34. package/dist/lib/scanner/agent-memory.js +55 -4
  35. package/dist/lib/scanner/managed-rules.js +0 -0
  36. package/dist/lib/scanner/scan.js +52 -1
  37. package/dist/lib/scanner/score.js +41 -3
  38. package/dist/lib/scanner/scout-mission.js +9 -7
  39. package/dist/lib/upgrade-apply.js +30 -0
  40. package/dist/lib/wire.js +2 -0
  41. package/package.json +1 -1
@@ -20,6 +20,39 @@ const node_path_1 = require("node:path");
20
20
  const protocol_1 = require("./protocol");
21
21
  const plan_1 = require("./plan");
22
22
  const SCOUT_SLOTS = ["documentation", "history"];
23
+ /**
24
+ * Fair-share the run's remaining candidate budget across the scouts that produced
25
+ * candidates THIS invocation. The scouts run independently and cannot coordinate to
26
+ * jointly honor one shared total, so a naive running-total cap applied in array order
27
+ * lets the first scout (documentation) swallow the whole budget and starves every
28
+ * later scout (history) to zero. We instead deal the budget round-robin in slot order,
29
+ * each round bounded by what a scout actually sent, so an under-producing scout cedes
30
+ * its surplus to the others and no scout is wiped out purely by ordering. Leftover
31
+ * slots that cannot divide evenly fall to the earliest scouts in slot order, which is
32
+ * deterministic. `remainingCap` already excludes budget consumed by scouts that
33
+ * completed in a prior ingest (resume).
34
+ */
35
+ function allocateScoutBudgets(demands, remainingCap) {
36
+ const budget = new Map();
37
+ for (const s of SCOUT_SLOTS)
38
+ budget.set(s, 0);
39
+ let remaining = Math.max(0, remainingCap);
40
+ let progressed = true;
41
+ while (remaining > 0 && progressed) {
42
+ progressed = false;
43
+ for (const s of SCOUT_SLOTS) {
44
+ if (remaining === 0)
45
+ break;
46
+ const want = demands.get(s) ?? 0;
47
+ if ((budget.get(s) ?? 0) < want) {
48
+ budget.set(s, (budget.get(s) ?? 0) + 1);
49
+ remaining--;
50
+ progressed = true;
51
+ }
52
+ }
53
+ }
54
+ return budget;
55
+ }
23
56
  function safeRealpath(p) {
24
57
  try {
25
58
  return (0, node_fs_1.realpathSync)(p);
@@ -118,6 +151,17 @@ function renderCandidateDocument(candidate) {
118
151
  lines.push("");
119
152
  lines.push(`Kind: ${candidate.kind}. Source: ${candidate.sourceScout} scout (onboarding enrichment, advisory; pending human review).`);
120
153
  lines.push("");
154
+ // Rationale carries a provenance label so the persisted artifact never presents an
155
+ // agent's paraphrase as the user's own words (memo Phase 1). Rendered only when present;
156
+ // a missing rationale is simply omitted (missing beats fabricated).
157
+ if (candidate.rationale && candidate.rationale.trim().length > 0) {
158
+ const heading = candidate.rationaleSource === "USER_EXPLICIT"
159
+ ? "## Rationale (user-stated)"
160
+ : "## Rationale (agent summary; not the user's words)";
161
+ lines.push(heading);
162
+ lines.push(candidate.rationale.trim());
163
+ lines.push("");
164
+ }
121
165
  lines.push("## Evidence");
122
166
  for (const ev of candidate.evidence) {
123
167
  if (ev.type === "file") {
@@ -130,17 +174,26 @@ function renderCandidateDocument(candidate) {
130
174
  return lines.join("\n") + "\n";
131
175
  }
132
176
  // --- per-scout state persistence (§6) --------------------------------------------
133
- function statePath(home, workspaceId) {
134
- return (0, node_path_1.join)(home, "workspaces", workspaceId, "onboarding-state.json");
177
+ // Per-run resume state lives BESIDE the run record it belongs to, keyed by runId, so two
178
+ // repos sharing one workspace never collide on a single onboarding-state.json (§6). A
179
+ // stale path keyed only by workspace made the first repo's completion permanently skip
180
+ // every later repo's scouts. Named `<runId>.state.json` so it sorts next to `<runId>.json`
181
+ // and prune can drop the pair together.
182
+ function statePath(home, workspaceId, runId) {
183
+ return (0, node_path_1.join)(home, "workspaces", workspaceId, "onboarding-runs", `${runId}.state.json`);
135
184
  }
136
- function loadState(home, workspaceId) {
137
- const path = statePath(home, workspaceId);
185
+ function loadState(home, workspaceId, runId) {
186
+ const path = statePath(home, workspaceId, runId);
138
187
  if (!(0, node_fs_1.existsSync)(path))
139
188
  return null;
140
189
  try {
141
190
  const parsed = JSON.parse((0, node_fs_1.readFileSync)(path, "utf8"));
142
191
  if (parsed?.schemaVersion !== 1)
143
192
  return null;
193
+ // A state file is only valid for the run it names: ignore one whose stored runId
194
+ // drifted from its path (corruption / hand-edit), rather than resuming the wrong run.
195
+ if (parsed.runId !== runId)
196
+ return null;
144
197
  return parsed;
145
198
  }
146
199
  catch {
@@ -148,9 +201,9 @@ function loadState(home, workspaceId) {
148
201
  }
149
202
  }
150
203
  function writeState(home, state) {
151
- const dir = (0, node_path_1.join)(home, "workspaces", state.workspaceId);
204
+ const dir = (0, node_path_1.join)(home, "workspaces", state.workspaceId, "onboarding-runs");
152
205
  (0, node_fs_1.mkdirSync)(dir, { recursive: true });
153
- (0, node_fs_1.writeFileSync)(statePath(home, state.workspaceId), JSON.stringify(state, null, 2), "utf8");
206
+ (0, node_fs_1.writeFileSync)(statePath(home, state.workspaceId, state.runId), JSON.stringify(state, null, 2), "utf8");
154
207
  }
155
208
  function emptyScoutState() {
156
209
  return { status: "not_started" };
@@ -176,15 +229,40 @@ async function ingestRun(input) {
176
229
  }
177
230
  const probe = input.probe ?? defaultProbe(env.repositoryRoot, input.gitRunner);
178
231
  // Resume: a scout already "complete" is never re-processed (its candidates are
179
- // immutable; §6). Carry prior state forward.
180
- const prior = loadState(env.home, env.workspaceId);
232
+ // immutable; §6). Carry prior state forward. Keyed by runId, so a different repo's run
233
+ // in the same workspace starts from a clean slate instead of inheriting "complete".
234
+ const prior = loadState(env.home, env.workspaceId, runId);
181
235
  const scoutState = {
182
236
  documentation: prior?.scouts.documentation ?? emptyScoutState(),
183
237
  history: prior?.scouts.history ?? emptyScoutState(),
184
238
  };
185
239
  const outcomes = [];
186
- let totalAccepted = 0;
187
240
  const cap = run.limits.maxCandidatesTotal;
241
+ // Budget consumed by scouts that completed in a PRIOR ingest (resume): they are
242
+ // skipped in the loop below but still count against the run's total.
243
+ const committedPrior = SCOUT_SLOTS.reduce((n, s) => {
244
+ const st = scoutState[s];
245
+ return n + (st.status === "complete" ? (st.candidateCount ?? 0) : 0);
246
+ }, 0);
247
+ const remainingCap = Math.max(0, cap - committedPrior);
248
+ // Demand per scout = candidates it sent THIS invocation, counted only for scouts
249
+ // that (a) sent a well-formed COMPLETE envelope and (b) are not already complete
250
+ // from a prior run. Everything else demands nothing; the loop's own branches report
251
+ // malformed / not-complete / already-complete. First occurrence of a scout wins.
252
+ const demands = new Map();
253
+ for (const rawResult of results) {
254
+ const shape = (0, protocol_1.validateScoutResultShape)(rawResult);
255
+ if (!shape.ok)
256
+ continue;
257
+ const r = shape.result;
258
+ if (r.status !== "complete")
259
+ continue;
260
+ if (scoutState[r.scout].status === "complete")
261
+ continue;
262
+ if (!demands.has(r.scout))
263
+ demands.set(r.scout, r.candidates.length);
264
+ }
265
+ const budget = allocateScoutBudgets(demands, remainingCap);
188
266
  for (const rawResult of results) {
189
267
  const shape = (0, protocol_1.validateScoutResultShape)(rawResult);
190
268
  if (!shape.ok) {
@@ -229,12 +307,18 @@ async function ingestRun(input) {
229
307
  });
230
308
  continue;
231
309
  }
232
- // Complete + valid envelope: validate each candidate independently.
310
+ // Complete + valid envelope: validate each candidate independently, bounded by
311
+ // this scout's fair share of the run budget (computed above across all scouts).
233
312
  const accepted = [];
234
313
  const errors = [];
314
+ const scoutBudget = budget.get(scout) ?? 0;
235
315
  result.candidates.forEach((raw, i) => {
236
- if (totalAccepted + accepted.length >= cap) {
237
- errors.push({ index: i, code: "candidate_cap_exceeded", message: `run candidate cap (${cap}) reached` });
316
+ if (accepted.length >= scoutBudget) {
317
+ errors.push({
318
+ index: i,
319
+ code: "candidate_cap_exceeded",
320
+ message: `run candidate cap (${cap}) reached; this scout's fair share was ${scoutBudget}`,
321
+ });
238
322
  return;
239
323
  }
240
324
  const shapeRes = (0, protocol_1.validateCandidateShape)(raw, i);
@@ -268,7 +352,6 @@ async function ingestRun(input) {
268
352
  errors.push({ index: -1, code: "persistence_failed", message: e instanceof Error ? e.message : String(e) });
269
353
  }
270
354
  }
271
- totalAccepted += accepted.length;
272
355
  scoutState[scout] =
273
356
  status === "complete" ? { status: "complete", candidateCount: accepted.length } : { status, error: "kb-add persistence failed" };
274
357
  outcomes.push({
@@ -283,6 +366,8 @@ async function ingestRun(input) {
283
366
  const allComplete = SCOUT_SLOTS.every((s) => scoutState[s].status === "complete");
284
367
  const state = {
285
368
  workspaceId: env.workspaceId,
369
+ runId,
370
+ repositoryRoot: env.repositoryRoot,
286
371
  schemaVersion: 1,
287
372
  status: allComplete ? "complete" : "partial",
288
373
  updatedAt: now,
@@ -0,0 +1,81 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.MATERIALIZE_SHARE_MESSAGE = void 0;
4
+ exports.isDurableRuleKind = isDurableRuleKind;
5
+ exports.candidateToManagedRule = candidateToManagedRule;
6
+ exports.materializeRules = materializeRules;
7
+ // src/lib/enrichment/materialize-rules.ts
8
+ //
9
+ // The bridge from an accepted onboarding candidate to the mla-managed rule file
10
+ // (.meetless/rules.md). Memo Phase 1, line 535: "accepted DURABLE rules materialize into the
11
+ // managed file; accepted decisions enter governed knowledge and do NOT silently become rules."
12
+ // This is the one place that enforces that split (INV-AUTH-1 / INV-AUTH-2):
13
+ // - constraint / convention / boundary -> a durable RULE, materialized into the file.
14
+ // - decision -> governed knowledge only; NEVER touches the file.
15
+ // - deprecation -> a staleness signal, not an injected rule; skipped.
16
+ // The function is pure (text in, text out) so the required invariant test "accepting a decision
17
+ // does not modify the managed rules file" is a byte-equality assertion, and re-materializing the
18
+ // same accepted rule is idempotent (managed-rules upsert dedupes by content-derived id).
19
+ //
20
+ // Layering: enrichment depends on the scanner's managed-rules engine, never the reverse. The
21
+ // managed file stays dependency-light (it knows only ./types); this module owns the mapping.
22
+ const managed_rules_1 = require("../scanner/managed-rules");
23
+ // The CLI prints this verbatim after a materialize so the operator knows the rule is live in
24
+ // their own session immediately and that sharing it is an explicit, un-automated git step (the
25
+ // memo forbids mla from auto-committing or auto-pushing).
26
+ exports.MATERIALIZE_SHARE_MESSAGE = "Effective locally. Commit and push to share with teammates.";
27
+ // The candidate kinds that are DURABLE repository rules (normative, always-on policy). A decision
28
+ // is a point-in-time choice that enters governed knowledge but is not an always-on rule
29
+ // (INV-AUTH-2); a deprecation is a staleness signal. Neither materializes into the rule file.
30
+ const DURABLE_RULE_KINDS = new Set(["constraint", "convention", "boundary"]);
31
+ function isDurableRuleKind(kind) {
32
+ return DURABLE_RULE_KINDS.has(kind);
33
+ }
34
+ // Pull the rule's provenance out of its evidence so the materialized rule keeps a citation back to
35
+ // the source the scout grounded it in. File evidence contributes its path; commit evidence
36
+ // contributes `commit:<sha>`. Deduped + sorted downstream by makeManagedRule.
37
+ function candidateSources(candidate) {
38
+ return candidate.evidence.map((ev) => (ev.type === "file" ? ev.path : `commit:${ev.commit}`));
39
+ }
40
+ // Map one durable-rule candidate to a ManagedRule. Strength defaults to the conservative
41
+ // SHOULD_FOLLOW: an onboarding candidate carries no explicit MUST signal, and only an explicit
42
+ // human escalation should earn must-follow injection (memo: "only an explicit MUST"). Scope is
43
+ // repository-wide (candidates carry no glob today); a future scoped-rule signal slots in here.
44
+ function candidateToManagedRule(candidate) {
45
+ return (0, managed_rules_1.makeManagedRule)({
46
+ statement: candidate.statement,
47
+ strength: "SHOULD_FOLLOW",
48
+ sources: candidateSources(candidate),
49
+ });
50
+ }
51
+ // Materialize the accepted candidates into the managed file content. `existingText` is the current
52
+ // file (pass "" when it does not exist yet). The result is a full re-render, so ordering is
53
+ // deterministic and the write is idempotent regardless of how many times the same rule is accepted.
54
+ function materializeRules(existingText, accepted) {
55
+ let rules = (0, managed_rules_1.parseManagedRules)(existingText);
56
+ const materialized = [];
57
+ const skipped = [];
58
+ for (const c of accepted) {
59
+ if (!c.statement || c.statement.trim().length === 0) {
60
+ skipped.push({ statement: c.statement ?? "", kind: c.kind, reason: "empty_statement" });
61
+ continue;
62
+ }
63
+ if (!isDurableRuleKind(c.kind)) {
64
+ // A decision or deprecation: governed knowledge / staleness, never an always-on rule.
65
+ skipped.push({ statement: c.statement, kind: c.kind, reason: "not_a_durable_rule" });
66
+ continue;
67
+ }
68
+ const rule = candidateToManagedRule(c);
69
+ rules = (0, managed_rules_1.upsertManagedRule)(rules, rule);
70
+ materialized.push(rule);
71
+ }
72
+ // Re-render from the (possibly unchanged) rule set. When nothing durable was accepted, parsing
73
+ // then re-rendering the original could differ from the raw input only by formatting; to make the
74
+ // "decision does not modify the file" guarantee exact, short-circuit to the original bytes when
75
+ // no rule was materialized.
76
+ if (materialized.length === 0) {
77
+ return { text: existingText, materialized, skipped, changed: false };
78
+ }
79
+ const text = (0, managed_rules_1.renderManagedRules)(rules);
80
+ return { text, materialized, skipped, changed: text !== existingText };
81
+ }
@@ -34,10 +34,23 @@ const META_END_MARK = "@@MLA-ENRICH-ENDMETA@@";
34
34
  function defaultGitRunner(repoRoot) {
35
35
  return (args) => (0, node_child_process_1.execFileSync)("git", args, { cwd: repoRoot, encoding: "utf8", maxBuffer: 64 * 1024 * 1024 });
36
36
  }
37
- const TIER_RANK = { T1: 0, T2: 1, T3: 99, T4: 2 };
37
+ // Within-target ordering band (lower is read first). T1 instruction files first; then,
38
+ // among T2, curated decision/instruction-adjacent docs (known doc names, ADR/RFC/spec
39
+ // dirs) ahead of arbitrary prose, so a tight target budget surfaces a repo's ADRs and
40
+ // package READMEs instead of spending slots on generic marketing .md that merely sorts
41
+ // early; T4 legacy notes last. Path breaks ties so the plan stays deterministic. T3 is
42
+ // grounding-only and never a target.
43
+ function targetBand(path, tier) {
44
+ if (tier === "T1")
45
+ return 0;
46
+ if (tier === "T4")
47
+ return 3;
48
+ return (0, score_1.isCuratedDoc)(path) ? 1 : 2; // T2: curated docs above generic prose
49
+ }
38
50
  // Rank the doc targets the documentation scout should read: T1 instruction files first,
39
- // then T2 decision docs, then T4 legacy notes; within a tier, deterministic by path.
40
- // T3 (grounding-only) and unclassified files are excluded. Capped to the limit.
51
+ // then curated T2 decision docs, then generic prose, then T4 legacy notes; within a
52
+ // band, deterministic by path. T3 (grounding-only) and unclassified files are excluded.
53
+ // Capped to the limit.
41
54
  function buildDocumentationTargets(repoRoot, limit, gitRunner = defaultGitRunner(repoRoot)) {
42
55
  let tracked;
43
56
  try {
@@ -56,7 +69,7 @@ function buildDocumentationTargets(repoRoot, limit, gitRunner = defaultGitRunner
56
69
  continue;
57
70
  scored.push({ path, tier });
58
71
  }
59
- scored.sort((a, b) => TIER_RANK[a.tier] - TIER_RANK[b.tier] || a.path.localeCompare(b.path));
72
+ scored.sort((a, b) => targetBand(a.path, a.tier) - targetBand(b.path, b.tier) || a.path.localeCompare(b.path));
60
73
  return scored.slice(0, Math.max(0, limit)).map((s, i) => ({ path: s.path, tier: s.tier, rank: i + 1 }));
61
74
  }
62
75
  // Prepare a bounded slice of recent git history: the commit allowlist (full SHAs) plus
@@ -67,12 +80,13 @@ function buildDocumentationTargets(repoRoot, limit, gitRunner = defaultGitRunner
67
80
  // HEAD); it is a future toggle.
68
81
  function prepareGitEvidence(repoRoot, opts) {
69
82
  const gitRunner = opts.gitRunner ?? defaultGitRunner(repoRoot);
83
+ const scanCap = Math.max(0, opts.maxScanCommits);
70
84
  let raw;
71
85
  try {
72
86
  raw = gitRunner([
73
87
  "log",
74
88
  `-n`,
75
- String(Math.max(0, opts.maxCommits)),
89
+ String(scanCap),
76
90
  "--no-merges",
77
91
  "--date=iso-strict",
78
92
  "--name-status",
@@ -82,24 +96,33 @@ function prepareGitEvidence(repoRoot, opts) {
82
96
  catch {
83
97
  return { evidence: [], truncated: false }; // empty history / not a repo: no evidence
84
98
  }
99
+ // Scan a WIDE window (maxScanCommits) but inline only maxSelectedCommits (verdict item 7).
100
+ // The byte budget SKIPS rather than HALTS: a single fat commit (huge body / many files)
101
+ // no longer starves the rest, so the recency-ordered fill reaches deeper into the pool
102
+ // and the scout sees more distinct decisions within the same byte budget. Selection stays
103
+ // deterministic (recency order) and taste-free; substance ranking is a future toggle, not
104
+ // built here. The first commit is always kept even if it alone exceeds the byte budget, so
105
+ // a repo whose newest commit is oversized still yields evidence.
85
106
  const parsed = parseGitLog(raw);
86
107
  const evidence = [];
87
108
  let bytes = 0;
88
109
  let truncated = false;
89
110
  for (const commit of parsed) {
90
- if (evidence.length >= opts.maxCommits) {
111
+ if (evidence.length >= opts.maxSelectedCommits) {
91
112
  truncated = true;
92
113
  break;
93
114
  }
94
115
  const size = Buffer.byteLength(JSON.stringify(commit), "utf8");
95
116
  if (bytes + size > opts.maxBytes && evidence.length > 0) {
96
117
  truncated = true;
97
- break; // keep at least one commit even if it alone exceeds the byte budget
118
+ continue; // skip this oversized commit, keep filling from smaller later ones
98
119
  }
99
120
  bytes += size;
100
121
  evidence.push(commit);
101
122
  }
102
- if (parsed.length > evidence.length)
123
+ // Truncated if anything in the scanned pool was dropped, OR the scan itself hit its
124
+ // ceiling (there may be older commits the scan never reached).
125
+ if (parsed.length > evidence.length || parsed.length >= scanCap)
103
126
  truncated = true;
104
127
  return { evidence, truncated };
105
128
  }
@@ -206,23 +229,56 @@ function loadRunRecord(home, workspaceId, runId) {
206
229
  return null;
207
230
  }
208
231
  }
209
- // Keep only the current active run record; drop older ones (§5b: no run-history
210
- // retention). runId collisions are impossible (random), so "older" == "any other".
211
- function pruneOldRuns(home, workspaceId, currentRunId) {
232
+ function safeRealpath(p) {
233
+ try {
234
+ return (0, node_fs_1.realpathSync)(p);
235
+ }
236
+ catch {
237
+ return p;
238
+ }
239
+ }
240
+ // Keep only the current active run record FOR THIS REPO; drop this repo's older ones
241
+ // (§5b: no run-history retention). A workspace can bind more than one repo (the Meetless
242
+ // monorepo and intel share one), so "older" must mean "same repo, different runId", never
243
+ // "any other run": deleting another repo's in-flight run would strand its resume/ingest.
244
+ // We compare repositoryRoot by realpath (symlink/`..` safe). Records we cannot read are
245
+ // left alone (harmless; ingest loads strictly by runId). The paired `<runId>.state.json`
246
+ // is dropped with its record so stale resume state never lingers.
247
+ function pruneOldRuns(home, workspaceId, currentRunId, currentRepoRoot) {
212
248
  const dir = runsDir(home, workspaceId);
213
249
  if (!(0, node_fs_1.existsSync)(dir))
214
250
  return 0;
251
+ const currentRepoReal = safeRealpath(currentRepoRoot);
215
252
  let removed = 0;
216
253
  for (const name of (0, node_fs_1.readdirSync)(dir)) {
217
- if (!name.endsWith(".json") || name === `${currentRunId}.json`)
254
+ // Only run-record files (`<runId>.json`); skip state sidecars and the current record.
255
+ if (!name.endsWith(".json") || name.endsWith(".state.json") || name === `${currentRunId}.json`)
256
+ continue;
257
+ const recordPath = (0, node_path_1.join)(dir, name);
258
+ let sameRepo = false;
259
+ try {
260
+ const rec = JSON.parse((0, node_fs_1.readFileSync)(recordPath, "utf8"));
261
+ sameRepo = safeRealpath(rec.repositoryRoot) === currentRepoReal;
262
+ }
263
+ catch {
264
+ continue; // unreadable / corrupt: leave it, do not risk deleting another repo's run
265
+ }
266
+ if (!sameRepo)
218
267
  continue;
219
268
  try {
220
- (0, node_fs_1.unlinkSync)((0, node_path_1.join)(dir, name));
269
+ (0, node_fs_1.unlinkSync)(recordPath);
221
270
  removed++;
222
271
  }
223
272
  catch {
224
273
  // best-effort cleanup; a leftover record is harmless (ingest loads by runId)
225
274
  }
275
+ // Drop the paired resume-state sidecar, if any, so it cannot outlive its record.
276
+ try {
277
+ (0, node_fs_1.unlinkSync)((0, node_path_1.join)(dir, `${name.slice(0, -".json".length)}.state.json`));
278
+ }
279
+ catch {
280
+ // no sidecar (run never ingested) or already gone: nothing to do
281
+ }
226
282
  }
227
283
  return removed;
228
284
  }
@@ -234,7 +290,8 @@ function createPlan(input) {
234
290
  const gitRunner = input.gitRunner ?? defaultGitRunner(input.repositoryRoot);
235
291
  const documentationTargets = buildDocumentationTargets(input.repositoryRoot, limits.maxDocumentTargets, gitRunner);
236
292
  const { evidence: historyEvidence, truncated: historyTruncated } = prepareGitEvidence(input.repositoryRoot, {
237
- maxCommits: limits.maxHistoryCommits,
293
+ maxScanCommits: limits.maxHistoryScanCommits,
294
+ maxSelectedCommits: limits.maxHistorySelectedCommits,
238
295
  maxBytes: limits.maxPreparedInputBytes,
239
296
  gitRunner,
240
297
  });
@@ -248,6 +305,6 @@ function createPlan(input) {
248
305
  historyEvidence,
249
306
  });
250
307
  const recordPath = writeRunRecord(input.home, run);
251
- const pruned = pruneOldRuns(input.home, input.workspaceId, input.runId);
308
+ const pruned = pruneOldRuns(input.home, input.workspaceId, input.runId, input.repositoryRoot);
252
309
  return { run, recordPath, pruned, historyTruncated };
253
310
  }
@@ -7,7 +7,8 @@
7
7
  // real file length, fs/network) live in ingest.ts; clock + id injection lives in
8
8
  // plan.ts. See notes/20260626-mla-agent-onboarding-enrichment-plan.md (§5, §5b, §6, §6b, §8).
9
9
  Object.defineProperty(exports, "__esModule", { value: true });
10
- exports.SCOUT_STATUSES = exports.SCOUT_NAMES = exports.ENRICHMENT_KINDS = exports.MIN_COMMIT_SHA_LENGTH = exports.MAX_EVIDENCE_PER_CANDIDATE = exports.MIN_STATEMENT_LENGTH = exports.MAX_STATEMENT_LENGTH = exports.DEFAULT_BUDGET_MS = exports.MAX_CANDIDATES_TOTAL = exports.MAX_PREPARED_INPUT_BYTES = exports.MAX_HISTORY_COMMITS = exports.MAX_DOCUMENT_TARGETS = exports.PROTOCOL_VERSION = void 0;
10
+ exports.SCOUT_STATUSES = exports.SCOUT_NAMES = exports.ENRICHMENT_KINDS = exports.RATIONALE_SOURCES = exports.MAX_RATIONALE_LENGTH = exports.MIN_COMMIT_SHA_LENGTH = exports.MAX_EVIDENCE_PER_CANDIDATE = exports.MIN_STATEMENT_LENGTH = exports.MAX_STATEMENT_LENGTH = exports.REVIEW_BATCH_DEFAULT = exports.DEFAULT_BUDGET_MS = exports.MAX_CANDIDATES_TOTAL = exports.MAX_PREPARED_INPUT_BYTES = exports.MAX_HISTORY_SELECTED_COMMITS = exports.MAX_HISTORY_SCAN_COMMITS = exports.MAX_DOCUMENT_TARGETS = exports.PROTOCOL_VERSION = void 0;
11
+ exports.selectReviewBatch = selectReviewBatch;
11
12
  exports.normalizeStatement = normalizeStatement;
12
13
  exports.candidateAnchors = candidateAnchors;
13
14
  exports.candidateId = candidateId;
@@ -25,16 +26,47 @@ const crypto_1 = require("crypto");
25
26
  exports.PROTOCOL_VERSION = 1;
26
27
  // Input bounds (§8). Explicit MVP constants; only the time budget is configurable.
27
28
  exports.MAX_DOCUMENT_TARGETS = 20;
28
- exports.MAX_HISTORY_COMMITS = 40;
29
+ // History bounds are SPLIT into scan vs selected (verdict item 7): the scan window is the
30
+ // pool `git log` walks (bounded, never the whole repo), and the selected count is what we
31
+ // actually inline as the commit allowlist. Keeping them distinct lets the byte-budget fill
32
+ // reach DEEPER than the inline cap (a single fat commit no longer starves the rest) without
33
+ // ever loading an unbounded log. scan >= selected by construction.
34
+ exports.MAX_HISTORY_SCAN_COMMITS = 300;
35
+ exports.MAX_HISTORY_SELECTED_COMMITS = 40;
29
36
  exports.MAX_PREPARED_INPUT_BYTES = 200_000;
30
- exports.MAX_CANDIDATES_TOTAL = 20; // ceiling, not a target; zero is valid
37
+ exports.MAX_CANDIDATES_TOTAL = 20; // EXTRACTION ceiling, not a target; zero is valid
31
38
  exports.DEFAULT_BUDGET_MS = 240_000;
39
+ // The REVIEW batch is the human-facing presentation size, kept deliberately SEPARATE
40
+ // from the extraction cap above (notes/20260624-mla-new-user-value-and-brownfield-proof.md,
41
+ // Phase 2). A run may surface up to MAX_CANDIDATES_TOTAL (20) candidates, but dumping 20
42
+ // PENDING items on a reviewer in one screen is how review queues rot. The reviewer sees
43
+ // REVIEW_BATCH_DEFAULT at a time and the remainder sits behind "show more". This bounds
44
+ // the reviewer's cognitive load WITHOUT lowering extraction recall, so it must never be
45
+ // folded into EnrichmentLimits (that would re-conflate the two the plan tells us to split).
46
+ exports.REVIEW_BATCH_DEFAULT = 6; // within the plan's 5-8 window
47
+ // Pure: split a count of pending-review items into the first batch and the remainder.
48
+ // A non-positive batchSize disables batching (show everything). Inputs are floored at
49
+ // zero, so a negative/garbage total can never produce a negative "remaining".
50
+ function selectReviewBatch(total, batchSize = exports.REVIEW_BATCH_DEFAULT) {
51
+ const t = Math.max(0, Math.floor(Number.isFinite(total) ? total : 0));
52
+ if (batchSize <= 0 || t <= batchSize) {
53
+ return { shown: t, remaining: 0, total: t, hasMore: false };
54
+ }
55
+ return { shown: batchSize, remaining: t - batchSize, total: t, hasMore: true };
56
+ }
32
57
  // Defensive bounds NOT pinned by the plan (§5 says only "max statement length" and
33
58
  // "allowed kind"); these are conservative defaults, tune freely.
34
59
  exports.MAX_STATEMENT_LENGTH = 500;
35
60
  exports.MIN_STATEMENT_LENGTH = 1; // non-empty after normalization; no semantic floor (the human governs durability)
36
61
  exports.MAX_EVIDENCE_PER_CANDIDATE = 12;
37
62
  exports.MIN_COMMIT_SHA_LENGTH = 7; // git's conventional abbreviation floor
63
+ exports.MAX_RATIONALE_LENGTH = 1000; // rationale is a short "why", not an essay
64
+ // Provenance of a candidate's rationale (memo Phase 1). The scouts are AGENTS, so any "why"
65
+ // THEY compose is an AGENT_SUMMARY; USER_EXPLICIT is reserved for the human's own words
66
+ // (e.g. a verbatim quote from an instruction file the user wrote). The two must never be
67
+ // conflated: presenting an agent paraphrase as user-provided is the exact failure this
68
+ // field exists to prevent, and a missing rationale always beats a fabricated one.
69
+ exports.RATIONALE_SOURCES = ["USER_EXPLICIT", "AGENT_SUMMARY"];
38
70
  exports.ENRICHMENT_KINDS = [
39
71
  "constraint",
40
72
  "decision",
@@ -133,7 +165,8 @@ function computePlanDigest(run) {
133
165
  function defaultLimits(budgetMs = exports.DEFAULT_BUDGET_MS) {
134
166
  return {
135
167
  maxDocumentTargets: exports.MAX_DOCUMENT_TARGETS,
136
- maxHistoryCommits: exports.MAX_HISTORY_COMMITS,
168
+ maxHistoryScanCommits: exports.MAX_HISTORY_SCAN_COMMITS,
169
+ maxHistorySelectedCommits: exports.MAX_HISTORY_SELECTED_COMMITS,
137
170
  maxPreparedInputBytes: exports.MAX_PREPARED_INPUT_BYTES,
138
171
  maxCandidatesTotal: exports.MAX_CANDIDATES_TOTAL,
139
172
  budgetMs,
@@ -158,7 +191,7 @@ function resolveAllowedCommit(allowlist, cited) {
158
191
  return prefixed.length === 1 ? prefixed[0] : null;
159
192
  }
160
193
  // --- Pure shape validators -------------------------------------------------------
161
- const CANDIDATE_FIELDS = new Set(["kind", "statement", "evidence", "sourceScout"]);
194
+ const CANDIDATE_FIELDS = new Set(["kind", "statement", "evidence", "sourceScout", "rationale", "rationaleSource"]);
162
195
  const FILE_EVIDENCE_FIELDS = new Set(["type", "path", "startLine", "endLine"]);
163
196
  const COMMIT_EVIDENCE_FIELDS = new Set(["type", "commit", "path"]);
164
197
  function isPlainObject(v) {
@@ -226,6 +259,10 @@ function validateCandidateShape(raw, index) {
226
259
  if (sourceScout === "history" && !validEvidence.some((e) => e.type === "commit")) {
227
260
  err("missing_commit_anchor", "history candidate requires at least one commit anchor", "evidence");
228
261
  }
262
+ // Rationale provenance (memo Phase 1): rationale and rationaleSource are paired. A
263
+ // non-empty rationale must declare a valid source; a null/absent rationale must NOT carry
264
+ // an orphan source claiming provenance for nothing. Missing rationale is always allowed.
265
+ const rationale = validateRationale(raw, err);
229
266
  if (errors.length > 0)
230
267
  return { ok: false, errors };
231
268
  return {
@@ -235,9 +272,52 @@ function validateCandidateShape(raw, index) {
235
272
  statement: statement,
236
273
  evidence: validEvidence,
237
274
  sourceScout: sourceScout,
275
+ rationale: rationale.rationale,
276
+ rationaleSource: rationale.rationaleSource,
238
277
  },
239
278
  };
240
279
  }
280
+ // Validate the rationale/rationaleSource pair on a raw candidate, pushing errors via `err`.
281
+ // Returns the canonicalized pair: a non-empty rationale carries its declared source; an
282
+ // absent/null rationale canonicalizes to { rationale: null, rationaleSource: null } so the
283
+ // two never drift. Whitespace-only rationale is rejected (omit the field or send null
284
+ // instead of an empty "why").
285
+ function validateRationale(raw, err) {
286
+ const rawRationale = raw.rationale;
287
+ const rawSource = raw.rationaleSource;
288
+ const hasRationale = rawRationale !== undefined && rawRationale !== null;
289
+ const hasSource = rawSource !== undefined && rawSource !== null;
290
+ if (!hasRationale) {
291
+ // No rationale: a source would be an orphan claiming provenance for nothing.
292
+ if (hasSource) {
293
+ err("orphan_rationale_source", "rationaleSource set without a rationale", "rationaleSource");
294
+ }
295
+ return { rationale: null, rationaleSource: null };
296
+ }
297
+ if (typeof rawRationale !== "string") {
298
+ err("bad_rationale", "rationale must be a string or null", "rationale");
299
+ return { rationale: null, rationaleSource: null };
300
+ }
301
+ const trimmed = rawRationale.trim();
302
+ if (trimmed.length < 1) {
303
+ err("empty_rationale", "rationale is empty; omit it or send null instead", "rationale");
304
+ }
305
+ else if (trimmed.length > exports.MAX_RATIONALE_LENGTH) {
306
+ err("rationale_too_long", `rationale exceeds ${exports.MAX_RATIONALE_LENGTH} chars`, "rationale");
307
+ }
308
+ if (!hasSource) {
309
+ err("missing_rationale_source", `rationale requires rationaleSource (one of: ${exports.RATIONALE_SOURCES.join(", ")})`, "rationaleSource");
310
+ }
311
+ else if (typeof rawSource !== "string" || !exports.RATIONALE_SOURCES.includes(rawSource)) {
312
+ err("bad_rationale_source", `rationaleSource must be one of: ${exports.RATIONALE_SOURCES.join(", ")}`, "rationaleSource");
313
+ }
314
+ return {
315
+ rationale: trimmed.length >= 1 ? trimmed : null,
316
+ rationaleSource: hasSource && exports.RATIONALE_SOURCES.includes(rawSource)
317
+ ? rawSource
318
+ : null,
319
+ };
320
+ }
241
321
  function validateEvidenceShape(raw, candidateIndex, evidenceIndex, err) {
242
322
  const field = `evidence[${evidenceIndex}]`;
243
323
  if (!isPlainObject(raw)) {
@@ -21,6 +21,7 @@
21
21
  Object.defineProperty(exports, "__esModule", { value: true });
22
22
  exports.SCOUT_AGENT_NAME = exports.SCOUT_TOOL_ALLOWLIST = void 0;
23
23
  exports.buildScoutPrompt = buildScoutPrompt;
24
+ const protocol_1 = require("./protocol");
24
25
  const scout_mission_1 = require("../scanner/scout-mission");
25
26
  // The capability each scout role is granted. Read-only for documentation; no tools
26
27
  // for history (the plan precomputes and inlines its evidence). Deliberately narrow:
@@ -97,6 +98,13 @@ function toolLine(role) {
97
98
  }
98
99
  return `Your only tools are: ${tools.join(", ")}. Do not attempt any other tool.`;
99
100
  }
101
+ // Each scout's fair slice of the run-wide candidate budget. ingest deals the budget
102
+ // round-robin across scouts, so an even split is the share a fully-producing scout
103
+ // can expect; telling the scout this up front stops it over-producing candidates that
104
+ // ingest would only drop. At least 1 so a scout is never told to surface nothing.
105
+ function perScoutTarget(run) {
106
+ return Math.max(1, Math.floor(run.limits.maxCandidatesTotal / protocol_1.SCOUT_NAMES.length));
107
+ }
100
108
  function renderOutputContract(run, role) {
101
109
  const evidenceExample = role === "documentation"
102
110
  ? '{ "type": "file", "path": "<one of the documents above>", "startLine": 10, "endLine": 24 }'
@@ -113,20 +121,34 @@ function renderOutputContract(run, role) {
113
121
  ' "candidates": [',
114
122
  " {",
115
123
  ' "kind": "<one of the kinds listed above>",',
116
- ' "statement": "<one specific claim, 500 characters or fewer>",',
124
+ ` "statement": "<one specific claim, ${protocol_1.MAX_STATEMENT_LENGTH} characters or fewer>",`,
117
125
  ` "evidence": [ ${evidenceExample} ],`,
118
- ` "sourceScout": "${role}"`,
126
+ ` "sourceScout": "${role}",`,
127
+ ' "rationale": "<optional: WHY this governs, in YOUR words, or omit it entirely>",',
128
+ ' "rationaleSource": "AGENT_SUMMARY" // omit alongside rationale; for a scout it is always AGENT_SUMMARY',
119
129
  " }",
120
130
  " ]",
121
131
  "}",
122
132
  "",
123
133
  anchorRule,
124
- `Surface at most ${run.limits.maxCandidatesTotal} candidates total across all scouts; ` +
125
- "choose the highest-value ones rather than padding.",
134
+ "The `rationale` and `rationaleSource` fields are OPTIONAL. Include a rationale only when " +
135
+ "the evidence makes the WHY non-obvious, and keep it to one short sentence. You are an " +
136
+ 'agent, so your rationale is always `"AGENT_SUMMARY"`: it is recorded as your paraphrase, ' +
137
+ "never as the user's own words. Do NOT invent a rationale to look thorough: omitting both " +
138
+ "fields is always better than a fabricated reason.",
139
+ `Keep each statement to ${protocol_1.MAX_STATEMENT_LENGTH} characters or fewer: a longer statement is ` +
140
+ "rejected outright at ingest, not truncated, so state the claim concisely and let the " +
141
+ "evidence anchor carry the detail.",
142
+ `Aim for the highest-value ${perScoutTarget(run)} candidates or fewer. The run keeps at most ` +
143
+ `${run.limits.maxCandidatesTotal} candidates total and shares that budget fairly across the ` +
144
+ `${protocol_1.SCOUT_NAMES.length} scouts, so candidates past your share are dropped at ingest. Pick the ` +
145
+ "highest-value ones rather than padding.",
126
146
  'Zero candidates with status "complete" is a valid, successful result: only record a',
127
147
  "candidate you can anchor to the evidence above.",
128
- "Also note any contradictions you see in a short prose summary after the JSON; a",
129
- "contradiction is a flag for the human, not a candidate of its own.",
148
+ "If two sources contradict each other on a governing point, that IS a governance",
149
+ "signal: surface it as a `decision` or `deprecation` candidate that names which",
150
+ "source supersedes which, anchored to both. Do not append free prose; the JSON",
151
+ "object above is the entire output.",
130
152
  ];
131
153
  }
132
154
  /**
@@ -162,6 +184,13 @@ function buildScoutPrompt(run, role) {
162
184
  "Read ONLY these documents, in rank order. The plan already selected and ranked",
163
185
  "them; do not search for, glob, or open any other file.",
164
186
  "",
187
+ `The paths below are relative to the repository root: ${run.repositoryRoot}`,
188
+ "Your working directory may NOT be that root, so read each document by its",
189
+ "absolute path (join the root and the relative path). In every candidate's",
190
+ "evidence, write the path exactly as listed below (relative), not the absolute",
191
+ "one: ingest anchors evidence against the repository root and rejects absolute",
192
+ "paths.",
193
+ "",
165
194
  ...renderDocumentationTargets(run.documentationTargets),
166
195
  ]
167
196
  : [