npm - @zhixuan92/multi-model-agent-core - Versions diffs - 4.0.6 → 4.1.0 - Mend

@zhixuan92/multi-model-agent-core 4.0.6 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/dist/tools/debug/tool-config.js CHANGED Viewed

@@ -4,7 +4,7 @@ import { debugBriefSlot } from '../../intake/brief-compiler-slots/debug.js';
 import { debugHeadlineTemplate } from '../../reporting/headline-templates/debug.js';
 import { DEFAULT_TASK_TIMEOUT_MS } from '../../config/schema.js';
 import { SEVERITY_LADDER } from '../../review/templates/finding-criteria.js';
-import { EVIDENCE_RULE_DEBUG, SCOPE_RULE_DEBUG, ANNOTATOR_AWARENESS_DEBUG, } from './implementer-criteria.js';
+import { DEBUG_PURPOSE_ORIENTATION, EVIDENCE_RULE_DEBUG, SCOPE_RULE_DEBUG, ANNOTATOR_AWARENESS_DEBUG, DEBUG_FAILURE_MODES, THOROUGHNESS_REMINDER_DEBUG, } from './implementer-criteria.js';
 export function registerDebug(registry) {
     registry.register({
         routeName: 'debug',
@@ -19,13 +19,22 @@ export function registerDebug(registry) {
     });
 }
 const FINDING_FORMAT_INSTRUCTIONS = [
+    // Orientation goes FIRST — the worker needs to know why this debug
+    // exists (fix specification, not a hint; symptom-vs-cause matters)
+    // before reading the format spec / taxonomy / evidence rules.
+    // Without it, workers point at the failing line and call it the cause.
+    DEBUG_PURPOSE_ORIENTATION,
+    '',
     'Use hypothesis-driven debugging. Use this EXACT per-finding format — both the structured reviewer and the deterministic fallback extract from this same format:',
     '',
     '## Finding 1: <one-line title>',
     '- Severity: critical | high | medium | low',
-    '- Hypothesis: the candidate cause',
-    '- Evidence: trace, log, or code path with file:line',
+    '- Reproduction: command/input/state to trigger the failure',
+    '- Symptom: file:line where the failure surfaces',
+    '- Trace: each step file:line + observed value, ending at the cause',
+    '- Cause: file:line that, if changed, would prevent the failure',
     '- Fix: proposed change (PROPOSE only — do NOT apply the fix)',
+    '- Falsifier: how the maintainer verifies the fix works',
     '',
     '## Finding 2: <one-line title>',
     '- Severity: ...',
@@ -33,13 +42,22 @@ const FINDING_FORMAT_INSTRUCTIONS = [
     '',
     'Rules:',
     '- Each finding heading MUST start with "## Finding N: " (h2, "Finding ", number, colon, title) — number sequentially from 1.',
-    '- Severity / Hypothesis / Evidence / Fix bullets are on their own lines with the labels exactly as shown.',
+    '- Reproduction / Symptom / Trace / Cause / Fix / Falsifier bullets are on their own lines with the labels exactly as shown.',
     '- This is a read-only diagnostic — do NOT edit any file. Propose fixes; the caller applies them.',
     '- Limit yourself to 3-5 most-likely hypotheses. Do not enumerate implausible ones to pad the list.',
     '',
     // Tool sweep #12: shared rubric so worker self-aligns with the annotator.
     SEVERITY_LADDER,
     '',
+    // Debug failure-mode taxonomy. Without this block, workers stop at
+    // the first plausible explanation (often the symptom) instead of
+    // tracing upstream to the actual cause.
+    DEBUG_FAILURE_MODES,
+    '',
+    // Counter-balances the SEVERITY_LADDER's anti-inflation hint and
+    // includes the symptom→cause walk with worked example.
+    THOROUGHNESS_REMINDER_DEBUG,
+    '',
     EVIDENCE_RULE_DEBUG,
     '',
     SCOPE_RULE_DEBUG,

package/dist/tools/debug/tool-config.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"tool-config.js","sourceRoot":"","sources":["../../../src/tools/debug/tool-config.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AAGvE,OAAO,EAAE,cAAc,EAAuB,MAAM,4CAA4C,CAAC;AACjG,OAAO,EAAE,qBAAqB,EAAE,MAAM,6CAA6C,CAAC;AACpF,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,4CAA4C,CAAC;AAC7E,OAAO,EACL,mBAAmB,EACnB,gBAAgB,EAChB,yBAAyB,~~GAC1B~~,MAAM,2BAA2B,CAAC;AAEnC,MAAM,UAAU,aAAa,CAAC,QAA6B;IACzD,QAAQ,CAAC,QAAQ,CAAC;QAChB,SAAS,EAAE,OAAO;QAClB,UAAU,EAAE,MAAM;QAClB,QAAQ,EAAE,QAAQ;QAClB,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,WAAW;QACnB,YAAY,EAAE,WAAW;QACzB,gBAAgB,EAAE,SAAS;QAC3B,oBAAoB,EAAE,KAAK;QAC3B,iBAAiB,EAAE,eAAe;KACnC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,2BAA2B,GAAG;IAClC,iKAAiK;IACjK,EAAE;IACF,gCAAgC;IAChC,4CAA4C;IAC5C,~~mCAAmC~~;~~IACnC~~,~~qDAAqD~~;~~IACrD~~,8DAA8D;IAC9D,EAAE;IACF,gCAAgC;IAChC,iBAAiB;IACjB,OAAO;IACP,EAAE;IACF,QAAQ;IACR,8HAA8H;IAC9H,~~2GAA2G~~;~~IAC3G~~,kGAAkG;IAClG,oGAAoG;IACpG,EAAE;IACF,0EAA0E;IAC1E,eAAe;IACf,EAAE;IACF,mBAAmB;IACnB,EAAE;IACF,gBAAgB;IAChB,EAAE;IACF,yBAAyB;CAC1B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,SAAS,oBAAoB,CAAC,SAAoB;IAChD,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACpD,OAAO,kCAAkC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;AACrF,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAA+C;IACpE,IAAI,EAAE,OAAO;IACb,QAAQ,EAAE,WAAW;IACrB,SAAS,EAAE,SAAS;IACpB,SAAS,EAAE,cAAc;IACzB,aAAa,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;QAC5B,MAAM,KAAK,GAAa,CAAC,0BAA0B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACpE,IAAI,KAAK,CAAC,OAAO;YAAE,KAAK,CAAC,IAAI,CAAC,YAAY,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC3D,IAAI,KAAK,CAAC,UAAU;YAAE,KAAK,CAAC,IAAI,CAAC,uBAAuB,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC;QAC5E,MAAM,WAAW,GAAG,oBAAoB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QAC1D,IAAI,WAAW;YAAE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACzC,KAAK,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAElC,OAAO;YACL,MAAM;YACN,SAAS,EAAE,SAAS;YACpB,YAAY,EAAE,cAAc;YAC5B,kBAAkB,EAAE,KAAK;YACzB,IAAI,EAAE,+KAA+K;YACrL,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,MAAM;YAC3C,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,IAAI,uBAAuB;YACpE,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,UAAU,IAAI,EAAE;YACjD,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,IAAI,UAAU;YAC/D,GAAG,EAAE,GAAG,CAAC,cAAc,EAAE,GAAG,IAAI,GAAG,CAAC,GAAG;YACvC,eAAe,EAAE,KAAK,CAAC,eAAe;YACtC,UAAU,EAAE,KAAK;YACjB,SAAS,EAAE,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;YACtF,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,SAAS;SACtC,CAAC;IACJ,CAAC;IACD,YAAY,EAAE,EAAE,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE,GAAG,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC,CAAC,CAAC,EAAE;IACzG,gBAAgB,EAAE,qBAAqB;IACvC,eAAe,EAAE;QACf,SAAS,EAAE,oBAAoB;KAChC;CACF,CAAC"}
1	+ {"version":3,"file":"tool-config.js","sourceRoot":"","sources":["../../../src/tools/debug/tool-config.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AAGvE,OAAO,EAAE,cAAc,EAAuB,MAAM,4CAA4C,CAAC;AACjG,OAAO,EAAE,qBAAqB,EAAE,MAAM,6CAA6C,CAAC;AACpF,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,4CAA4C,CAAC;AAC7E,OAAO,EACL,yBAAyB,EACzB,mBAAmB,EACnB,gBAAgB,EAChB,yBAAyB,EACzB,mBAAmB,EACnB,2BAA2B,GAC5B,MAAM,2BAA2B,CAAC;AAEnC,MAAM,UAAU,aAAa,CAAC,QAA6B;IACzD,QAAQ,CAAC,QAAQ,CAAC;QAChB,SAAS,EAAE,OAAO;QAClB,UAAU,EAAE,MAAM;QAClB,QAAQ,EAAE,QAAQ;QAClB,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,WAAW;QACnB,YAAY,EAAE,WAAW;QACzB,gBAAgB,EAAE,SAAS;QAC3B,oBAAoB,EAAE,KAAK;QAC3B,iBAAiB,EAAE,eAAe;KACnC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,2BAA2B,GAAG;IAClC,mEAAmE;IACnE,mEAAmE;IACnE,8DAA8D;IAC9D,uEAAuE;IACvE,yBAAyB;IACzB,EAAE;IACF,iKAAiK;IACjK,EAAE;IACF,gCAAgC;IAChC,4CAA4C;IAC5C,4DAA4D;IAC5D,iDAAiD;IACjD,oEAAoE;IACpE,gEAAgE;IAChE,8DAA8D;IAC9D,wDAAwD;IACxD,EAAE;IACF,gCAAgC;IAChC,iBAAiB;IACjB,OAAO;IACP,EAAE;IACF,QAAQ;IACR,8HAA8H;IAC9H,6HAA6H;IAC7H,kGAAkG;IAClG,oGAAoG;IACpG,EAAE;IACF,0EAA0E;IAC1E,eAAe;IACf,EAAE;IACF,mEAAmE;IACnE,iEAAiE;IACjE,wCAAwC;IACxC,mBAAmB;IACnB,EAAE;IACF,iEAAiE;IACjE,uDAAuD;IACvD,2BAA2B;IAC3B,EAAE;IACF,mBAAmB;IACnB,EAAE;IACF,gBAAgB;IAChB,EAAE;IACF,yBAAyB;CAC1B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,SAAS,oBAAoB,CAAC,SAAoB;IAChD,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACpD,OAAO,kCAAkC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;AACrF,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAA+C;IACpE,IAAI,EAAE,OAAO;IACb,QAAQ,EAAE,WAAW;IACrB,SAAS,EAAE,SAAS;IACpB,SAAS,EAAE,cAAc;IACzB,aAAa,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;QAC5B,MAAM,KAAK,GAAa,CAAC,0BAA0B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACpE,IAAI,KAAK,CAAC,OAAO;YAAE,KAAK,CAAC,IAAI,CAAC,YAAY,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC3D,IAAI,KAAK,CAAC,UAAU;YAAE,KAAK,CAAC,IAAI,CAAC,uBAAuB,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC;QAC5E,MAAM,WAAW,GAAG,oBAAoB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QAC1D,IAAI,WAAW;YAAE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACzC,KAAK,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAElC,OAAO;YACL,MAAM;YACN,SAAS,EAAE,SAAS;YACpB,YAAY,EAAE,cAAc;YAC5B,kBAAkB,EAAE,KAAK;YACzB,IAAI,EAAE,+KAA+K;YACrL,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,MAAM;YAC3C,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,IAAI,uBAAuB;YACpE,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,UAAU,IAAI,EAAE;YACjD,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,IAAI,UAAU;YAC/D,GAAG,EAAE,GAAG,CAAC,cAAc,EAAE,GAAG,IAAI,GAAG,CAAC,GAAG;YACvC,eAAe,EAAE,KAAK,CAAC,eAAe;YACtC,UAAU,EAAE,KAAK;YACjB,SAAS,EAAE,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;YACtF,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,SAAS;SACtC,CAAC;IACJ,CAAC;IACD,YAAY,EAAE,EAAE,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE,GAAG,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC,CAAC,CAAC,EAAE;IACzG,gBAAgB,EAAE,qBAAqB;IACvC,eAAe,EAAE;QACf,SAAS,EAAE,oBAAoB;KAChC;CACF,CAAC"}

package/dist/tools/delegate/implementer-criteria.d.ts ADDED Viewed

@@ -0,0 +1,62 @@
+/**
+ * Delegate-specific implementer criteria.
+ *
+ * DELEGATE'S PURPOSE — read this before adding categories.
+ * mma-delegate is the generic dispatcher for ad-hoc implementation
+ * tasks. The caller hands you a `prompt` (and optionally a `done`
+ * acceptance criterion, `filePaths`, `verifyCommand`); your output is
+ * a diff a REVIEWER will read alongside the brief. The success
+ * criterion is:
+ *
+ *   "Could a reviewer who reads only the brief and your diff approve
+ *    the merge without flagging gaps the worker should have caught
+ *    or extras the brief did not authorize?"
+ *
+ * That criterion is what makes a write load-bearing. The reviewer is
+ * NOT a rubber stamp — they will ask "did you finish that?" if the
+ * fix is partial, and "why did you also touch X?" if the diff has
+ * scope creep. Your job is to produce the SMALLEST COMPLETE CHANGE
+ * that satisfies the brief — minimal AND complete simultaneously.
+ *
+ * Delegate is artifact-producing — you write files. Cross-agent
+ * spec + quality + diff review applies. The spec the spec-reviewer
+ * checks against is the BRIEF (prompt + done), not your interpretation
+ * of it. The quality-reviewer checks safety / correctness / style.
+ */
+/**
+ * The orientation block. Goes at the TOP of every delegate prompt.
+ *
+ * Without an explicit orientation, workers default to "implement
+ * something good" — which produces over-implementation (SCOPE CREEP)
+ * and under-implementation (SILENT PARTIAL FIX). With this orientation,
+ * the worker calibrates against the reviewer's standard: minimal +
+ * complete, the brief is the contract.
+ */
+export declare const DELEGATE_PURPOSE_ORIENTATION: string;
+/**
+ * The scope rule for delegate.
+ *
+ * Replaces the prior one-liner with a concrete contract about what
+ * is in scope, what is off-limits, and what to do at the boundary.
+ */
+export declare const DELEGATE_SCOPE_RULE: string;
+/**
+ * The failure-mode taxonomy for delegate.
+ *
+ * Workers calibrated on "implement something good" tend to over-deliver
+ * (scope creep) or under-deliver (silent partial fix). The 9 categories
+ * below are the specific patterns reviewers raise as merge-blockers.
+ */
+export declare const DELEGATE_FAILURE_MODES: string;
+/**
+ * Completeness reminder.
+ *
+ * The shared SEVERITY_LADDER does not apply to write tools. The
+ * counter-balance for delegate is opposite to read-only tools: the
+ * typical failure is OVER-IMPLEMENTATION (scope creep) and UNDER-
+ * IMPLEMENTATION (silent partial fix), often in the same task. This
+ * block tells the worker the load-bearing constraint is "minimal AND
+ * complete simultaneously".
+ */
+export declare const COMPLETENESS_REMINDER_DELEGATE: string;
+//# sourceMappingURL=implementer-criteria.d.ts.map

package/dist/tools/delegate/implementer-criteria.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/delegate/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH;;;;;;;;GAQG;AACH,eAAO,MAAM,4BAA4B,QAc7B,CAAC;AAEb;;;;;GAKG;AACH,eAAO,MAAM,mBAAmB,QAMpB,CAAC;AAEb;;;;;;GAMG;AACH,eAAO,MAAM,sBAAsB,QAiBvB,CAAC;AAEb;;;;;;;;;GASG;AACH,eAAO,MAAM,8BAA8B,QAe/B,CAAC"}

package/dist/tools/delegate/implementer-criteria.js ADDED Viewed

@@ -0,0 +1,114 @@
+/**
+ * Delegate-specific implementer criteria.
+ *
+ * DELEGATE'S PURPOSE — read this before adding categories.
+ * mma-delegate is the generic dispatcher for ad-hoc implementation
+ * tasks. The caller hands you a `prompt` (and optionally a `done`
+ * acceptance criterion, `filePaths`, `verifyCommand`); your output is
+ * a diff a REVIEWER will read alongside the brief. The success
+ * criterion is:
+ *
+ *   "Could a reviewer who reads only the brief and your diff approve
+ *    the merge without flagging gaps the worker should have caught
+ *    or extras the brief did not authorize?"
+ *
+ * That criterion is what makes a write load-bearing. The reviewer is
+ * NOT a rubber stamp — they will ask "did you finish that?" if the
+ * fix is partial, and "why did you also touch X?" if the diff has
+ * scope creep. Your job is to produce the SMALLEST COMPLETE CHANGE
+ * that satisfies the brief — minimal AND complete simultaneously.
+ *
+ * Delegate is artifact-producing — you write files. Cross-agent
+ * spec + quality + diff review applies. The spec the spec-reviewer
+ * checks against is the BRIEF (prompt + done), not your interpretation
+ * of it. The quality-reviewer checks safety / correctness / style.
+ */
+/**
+ * The orientation block. Goes at the TOP of every delegate prompt.
+ *
+ * Without an explicit orientation, workers default to "implement
+ * something good" — which produces over-implementation (SCOPE CREEP)
+ * and under-implementation (SILENT PARTIAL FIX). With this orientation,
+ * the worker calibrates against the reviewer's standard: minimal +
+ * complete, the brief is the contract.
+ */
+export const DELEGATE_PURPOSE_ORIENTATION = [
+    'Why this delegation exists:',
+    'mma-delegate produces a diff a reviewer will read alongside the brief. Success = the diff is the SMALLEST COMPLETE CHANGE that satisfies the brief — minimal AND complete simultaneously. A reviewer should not need to ask "did you finish that?" or "why did you also touch X?".',
+    '',
+    'For your output to clear that bar:',
+    '- Implement EXACTLY what the brief asks for. Not less (SILENT PARTIAL FIX). Not more (SCOPE CREEP).',
+    '- If the brief lists `filePaths`, those are the authorized targets. Existing files in the list = pre-verified to read; non-existent paths in the list = explicit output targets you must create. Files NOT in the list are off-limits to write (touch only when the brief\'s task genuinely requires it, and call out the deviation in your summary).',
+    '- If the brief includes a `done` acceptance criterion, the reviewer will check your diff against that criterion. Match it precisely.',
+    '- If the brief includes a `verifyCommand`, run it after your changes. A green verify is part of "complete"; a red verify is part of "incomplete".',
+    '- Match the surrounding code\'s conventions (naming, import style, error handling, formatting). Inventing patterns instead of matching is convention drift — the reviewer will flag it.',
+    '- If you change a public symbol (exported function signature, exported type, public method), update the callers in the named files. Leaving callers stale is an INCOMPLETE REFACTOR.',
+    '- Do NOT modify tests or fixtures or specs to make a wrong implementation pass. If a test fails, fix the implementation, not the test (unless the brief explicitly says the test is wrong).',
+    '',
+    'The completion test: would a reviewer who reads ONLY the brief and your diff approve the merge — or would they raise a concern (gap, scope creep, drift, broken caller, undocumented assumption) you should have caught?',
+].join('\n');
+/**
+ * The scope rule for delegate.
+ *
+ * Replaces the prior one-liner with a concrete contract about what
+ * is in scope, what is off-limits, and what to do at the boundary.
+ */
+export const DELEGATE_SCOPE_RULE = [
+    'Scope:',
+    '- Strictly what the brief\'s `prompt` (and `done` if present) requests. The brief is the contract.',
+    '- Reading: the named `filePaths` plus what the task obviously implies (caller files when the diff changes a public symbol; sibling test files when the brief changes behavior; types files when the diff changes a typed interface).',
+    '- Writing: existing files in `filePaths` (modify) and non-existent paths in `filePaths` (create). Files outside `filePaths` are off-limits to write unless the brief\'s task genuinely requires it (e.g. updating a caller because the task changed a signature — call this out in your summary).',
+    '- Out of scope: refactors not in the brief, tangential cleanup ("while I\'m here…"), modifying tests/fixtures/specs to mask a wrong implementation, opportunistic style fixes, dependency upgrades the brief did not request.',
+].join('\n');
+/**
+ * The failure-mode taxonomy for delegate.
+ *
+ * Workers calibrated on "implement something good" tend to over-deliver
+ * (scope creep) or under-deliver (silent partial fix). The 9 categories
+ * below are the specific patterns reviewers raise as merge-blockers.
+ */
+export const DELEGATE_FAILURE_MODES = [
+    'Patterns to consciously check for. Apply on EVERY delegated task:',
+    '',
+    '1. SCOPE CREEP — touched files / added features beyond the brief. The reviewer reads the diff and asks "why did you also change Y?" If you cannot answer with "the brief required it", remove the change.',
+    '2. SILENT PARTIAL FIX — declared done, work demonstrably incomplete. Naming a step in your summary as "done" when the diff does not contain it is the worst delegate failure mode. Either implement it or report explicitly that you did not.',
+    '3. WRONG FILE TARGET — wrote to a path not in `filePaths` (when the caller specified `filePaths`). Existing files outside `filePaths` are off-limits to write. New files outside `filePaths` are scope creep.',
+    '4. PHANTOM TEST PASS — claimed "tests pass" without actually running them, OR ran a non-affected suite (e.g. unit tests pass but the change is in a path covered by integration tests). If the brief includes `verifyCommand`, run that exact command and quote the output.',
+    '5. CROSS-CUTTING DAMAGE — your fix introduced an unrelated regression in the same edit (e.g. fixing a parser bug but breaking the formatter). Re-read the diff before declaring done; check that nothing OTHER than the brief\'s target changed semantically.',
+    '6. CONVENTION DRIFT — invented a naming / import / error-handling / formatting pattern instead of matching the surrounding code. The reviewer will flag this as "matches no neighboring file" — it slows merge.',
+    '7. INCOMPLETE REFACTOR — changed a public symbol (exported function signature, exported type, public method) and did not update its callers. Stale callers either crash at runtime or compile but behave wrong. Update callers in the named files; report in your summary if callers exist outside `filePaths`.',
+    '8. SPEC OVERREACH — modified tests, fixtures, or interface contracts to make a wrong implementation pass, instead of fixing the implementation. If a test is failing, the FIRST hypothesis is that the implementation is wrong, not the test.',
+    '9. UNDOCUMENTED ASSUMPTION — diff relies on the caller doing X (env var set, init function called, dependency installed) without saying so in the brief\'s authoring contract. Either remove the assumption, or document it in your summary so the reviewer can decide if it is acceptable.',
+    '',
+    'Severity calibration for delegate (in your summary, not via SEVERITY_LADDER which is for read-only tools):',
+    '- Issues you notice but do NOT fix: report in summary so the reviewer can decide.',
+    '- Issues you encounter that block the brief: report and stop. Do not pick a workaround unilaterally.',
+    '- Issues clearly implied by the brief but not literally stated (e.g. "fix bug" implies "regression test added"): implement and name them as "implicit per the brief" in summary.',
+].join('\n');
+/**
+ * Completeness reminder.
+ *
+ * The shared SEVERITY_LADDER does not apply to write tools. The
+ * counter-balance for delegate is opposite to read-only tools: the
+ * typical failure is OVER-IMPLEMENTATION (scope creep) and UNDER-
+ * IMPLEMENTATION (silent partial fix), often in the same task. This
+ * block tells the worker the load-bearing constraint is "minimal AND
+ * complete simultaneously".
+ */
+export const COMPLETENESS_REMINDER_DELEGATE = [
+    'Completeness reminder:',
+    '- "Smallest complete change" is the bar. Smallest = no extras. Complete = no gaps.',
+    '- Most workers on first pass either bloat (extra refactor / extra cleanup / extra abstraction) or skim (declared done with the regression test missing). Both are merge-blockers; aim for the intersection.',
+    '- Before declaring done, walk the brief literally:',
+    '    1. List every requirement in the prompt (and `done` if present).',
+    '    2. For each, ask: "is this in my diff?" If no, you are not done.',
+    '    3. Walk the diff in reverse: for each changed file/line, ask: "is this required by a brief item?" If no, remove it.',
+    '    4. If `verifyCommand` is set, run it. Quote the relevant output line in your summary.',
+    '',
+    'Brief-vs-diff walk (REQUIRED on every task):',
+    '- For each item in the brief\'s `prompt` and `done`, locate the diff hunk that satisfies it. If you cannot, the item is unsatisfied.',
+    '- For each diff hunk, name the brief item it satisfies. If you cannot, the hunk is scope creep.',
+    '- Worked example. Brief: "fix the off-by-one in `paginate(page, total)` — `total < pageSize` should still produce one page; add a regression test in `tests/pagination.test.ts`." Naive worker rewrites `paginate` as a clean three-liner with new docstrings, skips the test → SILENT PARTIAL FIX (no test) + SCOPE CREEP (rewrote a function that needed a one-line fix). Correct worker: changes one boundary condition in `paginate` (one line of diff in the implementation file), adds one test in `tests/pagination.test.ts` covering the `total < pageSize` case, runs `verifyCommand` if set, quotes the test name and "1 passed" in the summary, stops. Two diff hunks total, both directly tied to the brief.',
+    '- Most workers miss findings of this shape on first pass because the rewrite "feels cleaner". The brief-vs-diff walk forces the question "what did the brief ACTUALLY ask for?".',
+].join('\n');
+//# sourceMappingURL=implementer-criteria.js.map

package/dist/tools/delegate/implementer-criteria.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"implementer-criteria.js","sourceRoot":"","sources":["../../../src/tools/delegate/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,4BAA4B,GAAG;IAC1C,6BAA6B;IAC7B,oRAAoR;IACpR,EAAE;IACF,oCAAoC;IACpC,qGAAqG;IACrG,uVAAuV;IACvV,sIAAsI;IACtI,mJAAmJ;IACnJ,yLAAyL;IACzL,sLAAsL;IACtL,6LAA6L;IAC7L,EAAE;IACF,0NAA0N;CAC3N,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;GAKG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,QAAQ;IACR,oGAAoG;IACpG,sOAAsO;IACtO,mSAAmS;IACnS,+NAA+N;CAChO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAG;IACpC,mEAAmE;IACnE,EAAE;IACF,2MAA2M;IAC3M,+OAA+O;IAC/O,+MAA+M;IAC/M,6QAA6Q;IAC7Q,+PAA+P;IAC/P,iNAAiN;IACjN,iTAAiT;IACjT,+OAA+O;IAC/O,6RAA6R;IAC7R,EAAE;IACF,4GAA4G;IAC5G,mFAAmF;IACnF,sGAAsG;IACtG,kLAAkL;CACnL,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;;;;GASG;AACH,MAAM,CAAC,MAAM,8BAA8B,GAAG;IAC5C,wBAAwB;IACxB,oFAAoF;IACpF,6MAA6M;IAC7M,oDAAoD;IACpD,sEAAsE;IACtE,sEAAsE;IACtE,yHAAyH;IACzH,2FAA2F;IAC3F,EAAE;IACF,8CAA8C;IAC9C,sIAAsI;IACtI,iGAAiG;IACjG,0rBAA0rB;IAC1rB,kLAAkL;CACnL,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC"}

package/dist/tools/execute-plan/implementer-criteria.d.ts ADDED Viewed

@@ -0,0 +1,52 @@
+/**
+ * Execute-plan-specific implementer criteria.
+ *
+ * EXECUTE-PLAN'S PURPOSE — read this before adding categories.
+ * mma-execute-plan implements one task from a plan that was written by a
+ * higher-capability model. Your output is a diff the PLAN AUTHOR will
+ * read. They wrote the plan precisely; your job is execution, not
+ * improvement. The success criterion is:
+ *
+ *   "Could the plan author read your diff and say 'yes, that's exactly
+ *    what I wrote' — not 'close, but you took liberties' or 'wrong, you
+ *    missed step 3'?"
+ *
+ * That criterion is what makes a write load-bearing. The fidelity bar
+ * is sharper than mma-delegate's: even a "better" implementation that
+ * deviates from the plan is wrong here. If you think the plan is wrong:
+ * REPORT IT and stop. Do NOT silently improve.
+ *
+ * Plan execution is artifact-producing — you write files. Cross-agent
+ * spec + quality review still applies. But the spec the spec-reviewer
+ * checks against is the PLAN, not your interpretation of it.
+ */
+/**
+ * The orientation block. Goes at the TOP of every execute-plan prompt.
+ *
+ * Without an explicit fidelity statement, workers default to "implement
+ * the goal" — which produces "improvements" that diverge from the plan
+ * (CODE SUBSTITUTION, ACCEPTANCE-CRITERIA OVERRUN). With this
+ * orientation, the worker treats the plan as authoritative and reports
+ * defects rather than silently working around them.
+ */
+export declare const EXECUTE_PLAN_PURPOSE_ORIENTATION: string;
+export declare const EXECUTE_PLAN_SCOPE_RULE: string;
+/**
+ * The failure-mode taxonomy for execute-plan.
+ *
+ * Workers calibrated on "implement the goal" tend to make "small
+ * improvements" to plans they think are imperfect. The 9 categories
+ * below are the specific ways execution diverges from intent.
+ */
+export declare const EXECUTE_PLAN_FAILURE_MODES: string;
+/**
+ * Plan-fidelity reminder.
+ *
+ * The shared SEVERITY_LADDER does not apply to write tools. The
+ * counter-balance for execute-plan is opposite to read-only tools:
+ * the typical failure is OVER-IMPLEMENTATION (improving the plan), not
+ * under-finding. This block tells the worker the load-bearing
+ * constraint is fidelity, not "good code".
+ */
+export declare const PLAN_FIDELITY_REMINDER: string;
+//# sourceMappingURL=implementer-criteria.d.ts.map

package/dist/tools/execute-plan/implementer-criteria.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/execute-plan/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH;;;;;;;;GAQG;AACH,eAAO,MAAM,gCAAgC,QAiBjC,CAAC;AAEb,eAAO,MAAM,uBAAuB,QAMxB,CAAC;AAEb;;;;;;GAMG;AACH,eAAO,MAAM,0BAA0B,QAiB3B,CAAC;AAEb;;;;;;;;GAQG;AACH,eAAO,MAAM,sBAAsB,QAYvB,CAAC"}

package/dist/tools/execute-plan/implementer-criteria.js ADDED Viewed

@@ -0,0 +1,104 @@
+/**
+ * Execute-plan-specific implementer criteria.
+ *
+ * EXECUTE-PLAN'S PURPOSE — read this before adding categories.
+ * mma-execute-plan implements one task from a plan that was written by a
+ * higher-capability model. Your output is a diff the PLAN AUTHOR will
+ * read. They wrote the plan precisely; your job is execution, not
+ * improvement. The success criterion is:
+ *
+ *   "Could the plan author read your diff and say 'yes, that's exactly
+ *    what I wrote' — not 'close, but you took liberties' or 'wrong, you
+ *    missed step 3'?"
+ *
+ * That criterion is what makes a write load-bearing. The fidelity bar
+ * is sharper than mma-delegate's: even a "better" implementation that
+ * deviates from the plan is wrong here. If you think the plan is wrong:
+ * REPORT IT and stop. Do NOT silently improve.
+ *
+ * Plan execution is artifact-producing — you write files. Cross-agent
+ * spec + quality review still applies. But the spec the spec-reviewer
+ * checks against is the PLAN, not your interpretation of it.
+ */
+/**
+ * The orientation block. Goes at the TOP of every execute-plan prompt.
+ *
+ * Without an explicit fidelity statement, workers default to "implement
+ * the goal" — which produces "improvements" that diverge from the plan
+ * (CODE SUBSTITUTION, ACCEPTANCE-CRITERIA OVERRUN). With this
+ * orientation, the worker treats the plan as authoritative and reports
+ * defects rather than silently working around them.
+ */
+export const EXECUTE_PLAN_PURPOSE_ORIENTATION = [
+    'Why this execution exists:',
+    'mma-execute-plan executes ONE task from a plan written by a higher-capability model. Your output is a diff the PLAN AUTHOR will read. They wrote the plan precisely. Your job is execution, not improvement.',
+    '',
+    'The completion test: would the plan author, reading your diff, say "yes, that\'s exactly what I wrote" — or would they say "close, but you took liberties" / "wrong, you missed step 3"?',
+    '',
+    'Fidelity rules — these override your usual instincts:',
+    '- Follow the plan EXACTLY as written. If the plan provides code blocks, use them VERBATIM (same names, same signatures, same comments, same imports).',
+    '- Do NOT redesign. Do NOT substitute your own approach. Do NOT improve names you find unidiomatic.',
+    '- Do NOT add steps the plan does not list. Do NOT skip steps the plan does list.',
+    '- Do NOT widen scope ("while I\'m here…"). Touch only what this task heading authorizes; another task probably owns the rest.',
+    '- If the plan looks wrong (typo, contradiction, undefined symbol, missing dependency): REPORT IT in your summary and stop. Do NOT silently work around it. Do NOT silently fix it.',
+    '- The plan was written by a higher-capability model than you. Your judgment about "what would be cleaner" is not load-bearing here; the plan is.',
+    '',
+    'Reviewer awareness for plan execution:',
+    '- The spec-reviewer compares your diff against the PLAN section, not against general "good code" heuristics. A diff that improves on the plan will fail spec review.',
+    '- The quality-reviewer checks safety/correctness without overriding the plan. If the plan is genuinely unsafe, that surfaces as a quality concern that the caller resolves — not as your unilateral fix.',
+].join('\n');
+export const EXECUTE_PLAN_SCOPE_RULE = [
+    'Scope:',
+    '- Strictly the task the descriptor names. Other tasks in the plan have other workers; do not implement them on the side.',
+    '- Touch only the files the named task authorizes (explicit file paths in the plan section, or files clearly implied by the named task).',
+    '- Out of scope: other plan tasks; refactors not in the plan; "while I\'m here" cleanup; renaming code blocks the plan provided verbatim.',
+    '- Genuinely necessary cross-cutting work (e.g. updating a caller because the plan changed a signature): allowed when the plan implies it. When in doubt, REPORT it as part of your summary and let the caller decide.',
+].join('\n');
+/**
+ * The failure-mode taxonomy for execute-plan.
+ *
+ * Workers calibrated on "implement the goal" tend to make "small
+ * improvements" to plans they think are imperfect. The 9 categories
+ * below are the specific ways execution diverges from intent.
+ */
+export const EXECUTE_PLAN_FAILURE_MODES = [
+    'Patterns to consciously check for. Apply on EVERY plan execution:',
+    '',
+    '1. PLAN REWRITE — you decided the plan was suboptimal and "improved" it. This is the worst execute-plan failure mode. The plan author treats the plan as the contract; your improvements are a contract violation.',
+    '2. STEP SKIP — the plan section lists multiple steps; you implemented some and silently omitted others. Every step listed in the plan is a required deliverable unless the plan explicitly marks it optional.',
+    '3. STEP REORDER — you executed plan steps in a different order than the plan specifies. Order may be load-bearing (later steps may depend on earlier ones); preserve it.',
+    '4. CODE SUBSTITUTION — the plan provided a code block (function body, import line, type definition) and you wrote DIFFERENT code that "does the same thing". The plan\'s code is verbatim; copy it. Renaming, reformatting, or replacing with idiomatic equivalents is substitution.',
+    '5. ACCEPTANCE-CRITERIA OVERRUN — the plan listed criteria A and B; you also delivered C ("seemed natural"). Adding extras the plan did not list is scope creep — even if C is technically good code.',
+    '6. ACCEPTANCE-CRITERIA UNDERRUN — the plan implies sub-criteria (e.g. "add the function" implies "add the export to the index file"; "fix the bug" implies "add a regression test"). Missing implicit sub-criteria is the most common silent-partial-fix in plan execution.',
+    '7. WRONG-TASK MATCH — you matched a different plan section than the descriptor names (e.g. matched "Step 4: foo" when descriptor said "Step 4: bar"). The descriptor must match the plan heading verbatim; if no unique match exists, report that and stop.',
+    '8. CROSS-TASK CONTAMINATION — you touched files the named task does not authorize, on the assumption that another task in the plan will eventually need them. Other tasks have other workers; touching their files creates merge conflicts and ownership ambiguity.',
+    '9. PROBLEM-NOT-FLAGGED — you noticed a defect in the plan (typo, contradiction, undefined symbol, broken example) and silently worked around it. The defect must be reported in your summary so the caller can correct the plan; silent workarounds make the next plan execution harder.',
+    '',
+    'Severity calibration for plan execution (in your summary, not via SEVERITY_LADDER which is for read-only tools):',
+    '- Plan defects you notice: ALWAYS report. The caller may have a fix or may want to update the plan first.',
+    '- Sub-criteria you cannot satisfy without deviating from the plan: report and stop. Do not pick a workaround unilaterally.',
+    '- Sub-criteria that are clearly implied but not literally stated: implement them, name them in your summary as "implicit per the task heading".',
+].join('\n');
+/**
+ * Plan-fidelity reminder.
+ *
+ * The shared SEVERITY_LADDER does not apply to write tools. The
+ * counter-balance for execute-plan is opposite to read-only tools:
+ * the typical failure is OVER-IMPLEMENTATION (improving the plan), not
+ * under-finding. This block tells the worker the load-bearing
+ * constraint is fidelity, not "good code".
+ */
+export const PLAN_FIDELITY_REMINDER = [
+    'Plan-fidelity reminder:',
+    '- Your judgment about "what would be cleaner" is NOT load-bearing here. The plan is.',
+    '- Every deviation from the plan needs a reason and a report. Silent deviations are the most common defect.',
+    '- "Smallest faithful change" — touch the minimum the task authorizes, in the order the plan specifies, with the code the plan provides verbatim where provided.',
+    '- If the plan is wrong: report and stop. Do NOT silently fix the plan.',
+    '',
+    'Code-block faithfulness walk (REQUIRED on every task that includes plan-provided code):',
+    '- For each code block in the matched plan section, ask: did I copy this verbatim? Same names, same signatures, same comments, same imports?',
+    '- If no — what did I change? Why? Is the change required by the task or am I improving?',
+    '- Worked example. A plan section says: "Step 2: create `src/parser.ts` with content (verbatim): `export function parse(input: string): Token[] { ... }`". Naive worker writes `src/parser.ts` exporting `parseTokens` (renamed for clarity) with JSDoc added. Result: CODE SUBSTITUTION + ACCEPTANCE-CRITERIA OVERRUN. The downstream code that imports `parse` now breaks; the plan author reads the diff and says "I wrote `parse`, why is this `parseTokens`?". Correct worker creates `src/parser.ts` with exactly the named export `parse`, no JSDoc additions, no rename. If JSDoc would be valuable, mention it in the summary as a follow-up rather than adding it here.',
+    '- Most workers miss findings of this shape on first pass because the renamed/reformatted version "feels right" and they trust their instincts. The faithfulness walk forces the verbatim check.',
+].join('\n');
+//# sourceMappingURL=implementer-criteria.js.map

package/dist/tools/execute-plan/implementer-criteria.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"implementer-criteria.js","sourceRoot":"","sources":["../../../src/tools/execute-plan/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,gCAAgC,GAAG;IAC9C,4BAA4B;IAC5B,8MAA8M;IAC9M,EAAE;IACF,0LAA0L;IAC1L,EAAE;IACF,uDAAuD;IACvD,uJAAuJ;IACvJ,oGAAoG;IACpG,kFAAkF;IAClF,+HAA+H;IAC/H,oLAAoL;IACpL,kJAAkJ;IAClJ,EAAE;IACF,wCAAwC;IACxC,sKAAsK;IACtK,0MAA0M;CAC3M,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,uBAAuB,GAAG;IACrC,QAAQ;IACR,0HAA0H;IAC1H,yIAAyI;IACzI,0IAA0I;IAC1I,uNAAuN;CACxN,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,0BAA0B,GAAG;IACxC,mEAAmE;IACnE,EAAE;IACF,oNAAoN;IACpN,+MAA+M;IAC/M,0KAA0K;IAC1K,sRAAsR;IACtR,sMAAsM;IACtM,6QAA6Q;IAC7Q,6PAA6P;IAC7P,qQAAqQ;IACrQ,0RAA0R;IAC1R,EAAE;IACF,kHAAkH;IAClH,2GAA2G;IAC3G,4HAA4H;IAC5H,iJAAiJ;CAClJ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAG;IACpC,yBAAyB;IACzB,sFAAsF;IACtF,4GAA4G;IAC5G,iKAAiK;IACjK,wEAAwE;IACxE,EAAE;IACF,yFAAyF;IACzF,6IAA6I;IAC7I,yFAAyF;IACzF,kpBAAkpB;IAClpB,iMAAiM;CAClM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC"}

package/dist/tools/execute-plan/tool-config.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/execute-plan/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAMlF,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AACvE,OAAO,EAA4B,KAAK,oBAAoB,EAAE,MAAM,mDAAmD,CAAC;~~AAMxH~~,eAAO,MAAM,sBAAsB;;;;;;;;;;;;kBAOxB,CAAC;AAEZ,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,sBAAsB,CAAC,CAAC;AAE1E,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI,CAYvE;~~AA0CD~~,eAAO,MAAM,UAAU,EAAE,UAAU,CAAC,oBAAoB,EAAE,oBAAoB,CA4B7E,CAAC"}
1	+ {"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/execute-plan/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAMlF,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AACvE,OAAO,EAA4B,KAAK,oBAAoB,EAAE,MAAM,mDAAmD,CAAC;AAYxH,eAAO,MAAM,sBAAsB;;;;;;;;;;;;kBAOxB,CAAC;AAEZ,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,sBAAsB,CAAC,CAAC;AAE1E,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI,CAYvE;AA6DD,eAAO,MAAM,UAAU,EAAE,UAAU,CAAC,oBAAoB,EAAE,oBAAoB,CA4B7E,CAAC"}

package/dist/tools/execute-plan/tool-config.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { executePlanHeadlineTemplate } from '../../reporting/headline-templates/
 import { executePlanReportSchema } from '../../reporting/report-parser-slots/execute-plan-report.js';
 import { DEFAULT_TASK_TIMEOUT_MS } from '../../config/schema.js';
 import { REVIEWER_AWARENESS_AP } from '../../review/templates/finding-criteria.js';
+import { EXECUTE_PLAN_PURPOSE_ORIENTATION, EXECUTE_PLAN_SCOPE_RULE, EXECUTE_PLAN_FAILURE_MODES, PLAN_FIDELITY_REMINDER, } from './implementer-criteria.js';
 export const executePlanInputSchema = z.object({
     filePaths: z.array(z.string()).length(1, { message: "execute_plan requires exactly one plan filePath" }),
     taskDescriptors: z.array(z.string()).min(1),
@@ -29,9 +30,22 @@ export function registerExecutePlan(registry) {
 /**
  * Build a compact worker prompt for one plan task. Extracted from the legacy
  * executor — just the section matched by the slot, not the full plan file.
+ *
+ * The prompt is structured top-down: orientation (why this exists) →
+ * task descriptor → matched plan section → file paths → fidelity rules
+ * (RESTORED in 4.1.0; the older `compileExecutePlan` had them, the
+ * slot-style refactor that became the canonical path dropped them) →
+ * failure-mode taxonomy → reviewer awareness. Without the orientation
+ * + fidelity blocks, workers default to "implement the goal" and treat
+ * the plan as a starting suggestion rather than the contract.
  */
 function buildExecutePlanPrompt(filePaths, task, taskSection) {
     const parts = [
+        // Orientation goes FIRST — fidelity-first framing before the
+        // task descriptor, so the worker reads the section through the
+        // execution lens instead of the "improve it" lens.
+        EXECUTE_PLAN_PURPOSE_ORIENTATION,
+        '',
         `Execute this task from the plan: "${task}"`,
         '',
     ];
@@ -39,10 +53,10 @@ function buildExecutePlanPrompt(filePaths, task, taskSection) {
         parts.push('Relevant plan section:', '', '---', taskSection.trim(), '---', '');
     }
     else {
-        parts.push('No unique plan section matched that task heading. The full plan file is at:', ...filePaths.map((p) => `  - ${p}`), 'Read the plan file(s) yourself to find the task.', '');
+        parts.push('No unique plan section matched that task heading. The full plan file is at:', ...filePaths.map((p) => `  - ${p}`), 'Read the plan file(s) yourself to find the task. If still no unique match, report that and stop — do not implement anything.', '');
     }
-    parts.push('Plan files for reference (read on demand if you need adjacent context):', ...filePaths.map((p) => `  - ${p}`), '');
-    parts.push('Implement the task fully. Follow any acceptance criteria, file paths, and', 'constraints in the plan section above. If you cannot find or understand', 'the task, report that explicitly and do not implement anything.', '',
+    parts.push('Plan files for reference (read on demand if you need adjacent context — but do not enlarge scope into other tasks):', ...filePaths.map((p) => `  - ${p}`), '');
+    parts.push('Implement the task fully. Follow any acceptance criteria, file paths, and', 'constraints in the plan section above. If you cannot find or understand', 'the task, report that explicitly and do not implement anything.', '', EXECUTE_PLAN_SCOPE_RULE, '', EXECUTE_PLAN_FAILURE_MODES, '', PLAN_FIDELITY_REMINDER, '',
     // Tool sweep #12: share spec + quality reviewer rubric so the
     // worker self-aligns on what each reviewer will judge against.
     REVIEWER_AWARENESS_AP);

package/dist/tools/execute-plan/tool-config.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"tool-config.js","sourceRoot":"","sources":["../../../src/tools/execute-plan/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EACL,YAAY,EACZ,iBAAiB,EACjB,YAAY,GACb,MAAM,iCAAiC,CAAC;AAEzC,OAAO,EAAE,wBAAwB,EAA6B,MAAM,mDAAmD,CAAC;AACxH,OAAO,EAAE,2BAA2B,EAAE,MAAM,oDAAoD,CAAC;AACjG,OAAO,EAAE,uBAAuB,EAAE,MAAM,4DAA4D,CAAC;AACrG,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,4CAA4C,CAAC;~~AAEnF~~,MAAM,CAAC,MAAM,sBAAsB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC7C,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,OAAO,EAAE,iDAAiD,EAAE,CAAC;IACxG,eAAe,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3C,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC1B,mBAAmB,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE;IAC3G,eAAe,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;IAC/C,aAAa,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;CAC9C,CAAC,CAAC,MAAM,EAAE,CAAC;AAIZ,MAAM,UAAU,mBAAmB,CAAC,QAA6B;IAC/D,QAAQ,CAAC,QAAQ,CAAC;QAChB,SAAS,EAAE,cAAc;QACzB,UAAU,EAAE,MAAM;QAClB,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,sBAAsB;QAC9B,YAAY,EAAE,oBAAoB;QAClC,gBAAgB,EAAE,UAAU;QAC5B,oBAAoB,EAAE,KAAK;QAC3B,iBAAiB,EAAE,eAAe;KACnC,CAAC,CAAC;AACL,CAAC;AAED~~;;;GAGG~~;AACH,SAAS,sBAAsB,CAC7B,SAAmB,EACnB,IAAY,EACZ,WAA+B;IAE/B,MAAM,KAAK,GAAa;QACtB,qCAAqC,IAAI,GAAG;QAC5C,EAAE;KACH,CAAC;IACF,IAAI,WAAW,EAAE,CAAC;QAChB,KAAK,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,EAAE,KAAK,EAAE,WAAW,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;IACjF,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CACR,6EAA6E,EAC7E,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,EACnC,~~kDAAkD~~,~~EAClD~~,EAAE,CACH,CAAC;IACJ,CAAC;IACD,KAAK,CAAC,IAAI,CACR,~~yEAAyE~~,~~EACzE~~,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,EACnC,EAAE,CACH,CAAC;IACF,KAAK,CAAC,IAAI,CACR,2EAA2E,EAC3E,yEAAyE,EACzE,iEAAiE,EACjE,EAAE;IACF,8DAA8D;IAC9D,+DAA+D;IAC/D,qBAAqB,CACtB,CAAC;IACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAA2D;IAChF,IAAI,EAAE,cAAc;IACpB,QAAQ,EAAE,oBAAoB;IAC9B,SAAS,EAAE,UAAU;IACrB,SAAS,EAAE,wBAAwB;IACnC,aAAa,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;QAC9B,MAAM,EAAE,sBAAsB,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,cAAc,EAAE,KAAK,CAAC,WAAW,CAAC;QACxF,SAAS,EAAE,UAAU;QACrB,YAAY,EAAE,KAAK,CAAC,YAAY;QAChC,IAAI,EAAE,gOAAgO;QACtO,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,MAAM;QAC3C,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,IAAI,uBAAuB;QACpE,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,UAAU,IAAI,EAAE;QACjD,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,IAAI,UAAU;QAC/D,GAAG,EAAE,KAAK,CAAC,GAAG;QACd,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,eAAe,EAAE,KAAK,CAAC,eAAe;QACtC,UAAU,EAAE,IAAI;QAChB,aAAa,EAAE,KAAK,CAAC,aAAa;QAClC,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACjE,CAAC;IACF,YAAY,EAAE,uBAAuB;IACrC,gBAAgB,EAAE,2BAA2B;IAC7C,eAAe,EAAE;QACf,IAAI,EAAE,YAAY;QAClB,SAAS,EAAE,iBAAiB;QAC5B,IAAI,EAAE,YAAY;KACnB;CACF,CAAC"}
1	+ {"version":3,"file":"tool-config.js","sourceRoot":"","sources":["../../../src/tools/execute-plan/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EACL,YAAY,EACZ,iBAAiB,EACjB,YAAY,GACb,MAAM,iCAAiC,CAAC;AAEzC,OAAO,EAAE,wBAAwB,EAA6B,MAAM,mDAAmD,CAAC;AACxH,OAAO,EAAE,2BAA2B,EAAE,MAAM,oDAAoD,CAAC;AACjG,OAAO,EAAE,uBAAuB,EAAE,MAAM,4DAA4D,CAAC;AACrG,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,4CAA4C,CAAC;AACnF,OAAO,EACL,gCAAgC,EAChC,uBAAuB,EACvB,0BAA0B,EAC1B,sBAAsB,GACvB,MAAM,2BAA2B,CAAC;AAEnC,MAAM,CAAC,MAAM,sBAAsB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC7C,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,OAAO,EAAE,iDAAiD,EAAE,CAAC;IACxG,eAAe,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3C,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC1B,mBAAmB,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE;IAC3G,eAAe,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;IAC/C,aAAa,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;CAC9C,CAAC,CAAC,MAAM,EAAE,CAAC;AAIZ,MAAM,UAAU,mBAAmB,CAAC,QAA6B;IAC/D,QAAQ,CAAC,QAAQ,CAAC;QAChB,SAAS,EAAE,cAAc;QACzB,UAAU,EAAE,MAAM;QAClB,QAAQ,EAAE,eAAe;QACzB,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,sBAAsB;QAC9B,YAAY,EAAE,oBAAoB;QAClC,gBAAgB,EAAE,UAAU;QAC5B,oBAAoB,EAAE,KAAK;QAC3B,iBAAiB,EAAE,eAAe;KACnC,CAAC,CAAC;AACL,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAS,sBAAsB,CAC7B,SAAmB,EACnB,IAAY,EACZ,WAA+B;IAE/B,MAAM,KAAK,GAAa;QACtB,6DAA6D;QAC7D,+DAA+D;QAC/D,mDAAmD;QACnD,gCAAgC;QAChC,EAAE;QACF,qCAAqC,IAAI,GAAG;QAC5C,EAAE;KACH,CAAC;IACF,IAAI,WAAW,EAAE,CAAC;QAChB,KAAK,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,EAAE,KAAK,EAAE,WAAW,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;IACjF,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CACR,6EAA6E,EAC7E,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,EACnC,8HAA8H,EAC9H,EAAE,CACH,CAAC;IACJ,CAAC;IACD,KAAK,CAAC,IAAI,CACR,qHAAqH,EACrH,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,EACnC,EAAE,CACH,CAAC;IACF,KAAK,CAAC,IAAI,CACR,2EAA2E,EAC3E,yEAAyE,EACzE,iEAAiE,EACjE,EAAE,EACF,uBAAuB,EACvB,EAAE,EACF,0BAA0B,EAC1B,EAAE,EACF,sBAAsB,EACtB,EAAE;IACF,8DAA8D;IAC9D,+DAA+D;IAC/D,qBAAqB,CACtB,CAAC;IACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAA2D;IAChF,IAAI,EAAE,cAAc;IACpB,QAAQ,EAAE,oBAAoB;IAC9B,SAAS,EAAE,UAAU;IACrB,SAAS,EAAE,wBAAwB;IACnC,aAAa,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;QAC9B,MAAM,EAAE,sBAAsB,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,cAAc,EAAE,KAAK,CAAC,WAAW,CAAC;QACxF,SAAS,EAAE,UAAU;QACrB,YAAY,EAAE,KAAK,CAAC,YAAY;QAChC,IAAI,EAAE,gOAAgO;QACtO,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,MAAM;QAC3C,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,IAAI,uBAAuB;QACpE,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,UAAU,IAAI,EAAE;QACjD,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,IAAI,UAAU;QAC/D,GAAG,EAAE,KAAK,CAAC,GAAG;QACd,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,eAAe,EAAE,KAAK,CAAC,eAAe;QACtC,UAAU,EAAE,IAAI;QAChB,aAAa,EAAE,KAAK,CAAC,aAAa;QAClC,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACjE,CAAC;IACF,YAAY,EAAE,uBAAuB;IACrC,gBAAgB,EAAE,2BAA2B;IAC7C,eAAe,EAAE;QACf,IAAI,EAAE,YAAY;QAClB,SAAS,EAAE,iBAAiB;QAC5B,IAAI,EAAE,YAAY;KACnB;CACF,CAAC"}

package/dist/tools/investigate/implementer-criteria.d.ts CHANGED Viewed

@@ -1,17 +1,60 @@
 /**
  * Investigate-specific implementer criteria.
  *
+ * INVESTIGATE'S PURPOSE — read this before adding categories.
+ * mma-investigate answers a question about the codebase. The caller is
+ * about to ACT on your answer — write code, edit a file, choose between
+ * approaches. The success criterion is:
+ *
+ *   "If the caller acts on this answer literally — opens the cited
+ *    files, follows the cited chain, takes the synthesis at face value
+ *    — will they end up with correct code?"
+ *
+ * That criterion is what makes a finding load-bearing. A wrong file
+ * path, a stale quote, a hand-waved synthesis, an overstated confidence
+ * — all become bugs the caller writes. The investigate-equivalent of
+ * "fix is unimplementable" is "the answer points at a file that does
+ * not contain what you said it contained."
+ *
  * Investigate answers a question about the codebase. Findings can be
  * code-level citations, project-level synthesis, or NEGATIVE results
- * ("searched X, not found"). The shared evidence rule that demands a
- * code quote for every finding wrongly suppresses negative results,
- * which are often the most useful answer to "is X still used?" or
- * "where does Y live?".
+ * ("searched X, not found"). Negative findings are legitimate answers
+ * to "is X still used?" or "where does Y live?" and must not be
+ * suppressed.
  *
  * Note: investigate does NOT use SEVERITY_LADDER — its findings are
- * citations and synthesis, not severity-rated issues.
+ * citations and synthesis, not severity-rated issues. Confidence is the
+ * calibration dial, not severity.
+ */
+/**
+ * The orientation block. Goes at the TOP of every investigate prompt.
+ *
+ * Without an explicit purpose statement, workers default to "give a
+ * plausible-sounding answer" — which produces hallucinated citations
+ * and overstated confidence. With this orientation, every claim is
+ * ground-truthed against the file system.
  */
+export declare const INVESTIGATE_PURPOSE_ORIENTATION: string;
 export declare const EVIDENCE_RULE_INVESTIGATE: string;
 export declare const SCOPE_RULE_INVESTIGATE: string;
+/**
+ * The failure-mode taxonomy for investigations.
+ *
+ * Without this block, workers tend to give plausible-sounding answers
+ * with shaky citations. The 8 categories below are the specific ways
+ * an investigation answer becomes a bug when the caller acts on it.
+ */
+export declare const INVESTIGATE_FAILURE_MODES: string;
+/**
+ * Confidence-discipline reminder.
+ *
+ * The shared SEVERITY_LADDER does not apply to investigate (findings
+ * are citations, not severity-rated). Instead, confidence is the
+ * calibration dial. The common failure mode is over-confidence —
+ * stating "high confidence" because the worker sounds certain,
+ * not because the evidence is strong. This block tells the worker
+ * confidence reflects evidence strength only.
+ */
+export declare const CONFIDENCE_REMINDER_INVESTIGATE: string;
 export declare const ANNOTATOR_AWARENESS_INVESTIGATE: string;
 //# sourceMappingURL=implementer-criteria.d.ts.map

package/dist/tools/investigate/implementer-criteria.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/investigate/implementer-criteria.ts"],"names":[],"mappings":"AAAA~~;;;;;;;;;;;;GAYG~~;AAEH,eAAO,MAAM,yBAAyB,~~QAK1B~~,CAAC;AAEb,eAAO,MAAM,sBAAsB,~~QAIvB~~,CAAC;AAEb,eAAO,MAAM,+BAA+B,~~QAOhC~~,CAAC"}
1	+ {"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/investigate/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH;;;;;;;GAOG;AACH,eAAO,MAAM,+BAA+B,QAahC,CAAC;AAEb,eAAO,MAAM,yBAAyB,QAO1B,CAAC;AAEb,eAAO,MAAM,sBAAsB,QAKvB,CAAC;AAEb;;;;;;GAMG;AACH,eAAO,MAAM,yBAAyB,QAiB1B,CAAC;AAEb;;;;;;;;;GASG;AACH,eAAO,MAAM,+BAA+B,QAYhC,CAAC;AAEb,eAAO,MAAM,+BAA+B,QAShC,CAAC"}