npm - @zhixuan92/multi-model-agent-core - Versions diffs - 4.0.5 → 4.1.0 - Mend

@zhixuan92/multi-model-agent-core 4.0.5 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

package/dist/tools/debug/implementer-criteria.js CHANGED Viewed

@@ -1,29 +1,121 @@
 /**
  * Debug-specific implementer criteria.
  *
- * Debug is hypothesis-driven root-cause investigation. The shared
- * "do not speculate about caller behavior" rule directly contradicts
- * debug's job — debugging IS speculation, narrowed by evidence.
- * Cross-file tracing is required, not forbidden.
+ * DEBUG'S PURPOSE — read this before adding categories.
+ * mma-debug is hypothesis-driven root-cause investigation. The output is
+ * a fix specification, not a hint. The success criterion is:
+ *
+ *   "Could a maintainer who reads ONLY your debug report apply the fix,
+ *    reproduce the original failure, verify the fix, and re-merge —
+ *    without redoing the investigation?"
+ *
+ * That criterion is what makes a finding load-bearing. A correctly-
+ * identified line that is just a SYMPTOM (the real cause is upstream)
+ * is the debug-equivalent of an unimplementable fix — it sends the
+ * maintainer down the wrong path. A hypothesis with no falsifier is a
+ * guess dressed up as a finding.
+ *
+ * Debug is hypothesis-driven; cross-file tracing is required, not
+ * forbidden. Findings are evidence chains, not point observations.
+ */
+/**
+ * The orientation block. Goes at the TOP of every debug prompt.
+ *
+ * Without an explicit purpose statement, workers default to "find a
+ * suspicious line" — which often points at the symptom, not the cause.
+ * With this orientation, they trace from the failure point upstream
+ * until they hit something that, if changed, would prevent the failure.
  */
+export const DEBUG_PURPOSE_ORIENTATION = [
+    'Why this debug investigation exists:',
+    'mma-debug produces a fix specification a maintainer can apply WITHOUT redoing the investigation. Your output replaces the maintainer\'s own root-cause work — not augments it.',
+    '',
+    'For your output to clear that bar, every finding must answer:',
+    '- Reproduction: how does the maintainer trigger the failure (command, input, state)?',
+    '- Symptom: where does the failure surface (file:line of the error, the failing assertion, the wrong output)?',
+    '- Cause: where is the actual defect (file:line that, if changed, would prevent the failure)?',
+    '- Trace: the evidence chain that links symptom to cause — each step a file:line citation or an observed value.',
+    '- Fix: the specific change to make at the cause (PROPOSE only — read-only contract; the caller applies).',
+    '- Falsifier: how the maintainer can verify the fix works (the assertion that should now pass, the wrong output that should now be right).',
+    '',
+    'A finding missing the trace from symptom to cause is a guess. A finding that names a symptom location as the cause is misdirection. Both are worse than no finding because they send the maintainer down the wrong path.',
+    '',
+    'The completion test: would a maintainer who reads only your report and the source code reproduce the failure, find the cited cause, apply the proposed fix, and confirm the falsifier — all without doing the investigation a second time?',
+].join('\n');
 export const EVIDENCE_RULE_DEBUG = [
     'Evidence grounding (REQUIRED for every finding):',
-    '- Each finding is a hypothesis with a supporting evidence chain.',
-    '- Evidence: the reproducer + the code path traced from failure point to suspected cause + observed output.',
-    '- Hypothesis-level findings with PARTIAL evidence are valid — that\'s how root-causing works. Show your reasoning chain.',
-    '- Severity reflects evidence strength: confirmed root cause = `high`; plausible candidate = `medium`; ruled out = `low` (or note in summary, not as a Finding).',
+    '- Each finding is a hypothesis with a supporting evidence chain. Cite `file:line` at every step of the chain.',
+    '- The chain has at least three points: SYMPTOM (where the failure surfaces) → INTERMEDIATE STATE (the wrong value, the unexpected branch, the missing call) → CAUSE (the file:line that, if changed, would prevent the failure).',
+    '- Evidence forms accepted: reproducer commands, captured logs / stack traces, observed values, and code-path traces with file:line per step.',
+    '- Hypothesis-level findings with PARTIAL evidence are valid — that is how root-causing works. Show the reasoning chain. State which step is firm and which is conjecture.',
+    '- A hypothesis with NO falsifier (no way to check if the proposed cause is right) is a guess, not a finding. Always state how the maintainer can verify the fix.',
+    '- Severity reflects evidence strength AND impact: confirmed root cause that ships a wrong fix = `critical`; confirmed root cause = `high`; plausible candidate with most of the chain = `medium`; partial trace / multiple plausible explanations = `low` (or note in summary).',
 ].join('\n');
 export const SCOPE_RULE_DEBUG = [
     'Scope:',
     '- Follow the failure path wherever it leads. Cross-file tracing is required.',
+    '- Reproduction discovery IS in scope: if the caller did not provide reproduction steps, infer them from test files, error messages, or recent commits and state your inferred reproduction explicitly.',
+    '- Pre-existing-vs-new separation: if multiple bugs are entangled in the same failure, separate them. Identify which is the one the caller asked about; note the others under "Other defects observed (out of scope for this investigation)".',
     '- Out of scope: applying fixes (debug is read-only — propose, do not apply); rewriting code; auditing unrelated subsystems; broadening into general code review.',
 ].join('\n');
+/**
+ * The failure-mode taxonomy for debug investigations.
+ *
+ * Without this block, workers default to "find a suspicious line" —
+ * which catches surface symptoms but misses the chain upstream that
+ * actually caused the failure. The 9 categories below are the patterns
+ * a careful debugger would consciously check for.
+ */
+export const DEBUG_FAILURE_MODES = [
+    'Patterns to consciously check for. Apply on EVERY debug investigation:',
+    '',
+    '1. SYMPTOM-NOT-CAUSE — the suspicious line you first hit is where the failure SURFACES, not where it is CAUSED. Trace upstream until you find a state that, if changed, prevents the failure. The cited cause must be upstream of the cited symptom in the call/data flow.',
+    '2. SCAPEGOAT FILE — the failing test or the throwing line is in a file that is just the messenger. The actual defect lives in a file the failure passes through. Read the call stack / data path and place the finding at the source, not the sink.',
+    '3. INCOMPLETE TRACE — the chain from symptom to cause has unfilled gaps ("...somewhere in the middleware..."). Each step in the trace must be a file:line citation or an observed value. Gaps mean the maintainer redoes the investigation.',
+    '4. UNTESTED HYPOTHESIS — the proposed fix has no falsifier. Always state HOW the maintainer can verify the fix worked: which assertion now passes, which output is now right, which command no longer errors.',
+    '5. PARALLEL CAUSES — more than one independent root cause is plausible. If the evidence is consistent with two unrelated mechanisms, name both as separate findings. Do NOT collapse them.',
+    '6. PRE-EXISTING-VS-NEW ENTANGLEMENT — multiple bugs are entangled in one failure. Separate them. Identify which one the caller asked about; note the others under a separate section.',
+    '7. WRONG FIX SCOPE — the proposed fix is broader than the cause requires (a refactor when a one-line fix would do) or narrower than the cause requires (a band-aid that masks the bug). Match fix scope to the cause depth.',
+    '8. MISSING REPRODUCTION — there is no command, input, or state the maintainer can use to trigger the failure. Without reproduction the fix cannot be verified. If the caller did not provide one, infer it and state it explicitly.',
+    '9. CONFIDENCE OVERSTATEMENT — the chain has gaps but the finding is filed at `high` severity. Calibrate severity to evidence strength: gaps in the chain = lower severity OR explicit "this is the most likely candidate; verify by X" caveat.',
+    '',
+    'Severity calibration for debug:',
+    '- critical: confirmed root cause where applying the proposed fix incorrectly would ship a regression (e.g. fix at the wrong layer that hides the bug).',
+    '- high: confirmed root cause with full chain symptom → cause → reproduction → falsifier. The maintainer can apply directly.',
+    '- medium: plausible cause with most of the chain, one or two gaps the maintainer can fill. Mark gaps explicitly.',
+    '- low: partial trace, multiple candidates, or hypothesis without sufficient evidence. Use sparingly — most findings should be medium or high.',
+].join('\n');
+/**
+ * Counter-balance to the SEVERITY_LADDER's anti-inflation hint.
+ *
+ * The shared severity ladder warns against inflation. For debug, the
+ * common failure is the OPPOSITE — workers stop at the first plausible
+ * explanation (over-confidence on a shallow trace) rather than tracing
+ * to the actual cause. This block tells the worker the typical debug
+ * failure is shallow root-cause, not noisy hypothesis lists.
+ */
+export const THOROUGHNESS_REMINDER_DEBUG = [
+    'Thoroughness expectation for debug investigations:',
+    '- For non-trivial failures (test failure, runtime error, unexpected behavior), stopping at the first plausible explanation is the typical debug failure mode. Always check for SYMPTOM-NOT-CAUSE before filing a finding: ask "if I changed this line, would the failure still happen via a different path?"',
+    '- The SEVERITY_LADDER warns against inflation. That warning is calibrated for code reviews — for debug, the common failure is OVER-CONFIDENCE on a shallow trace (calling a symptom location the cause). Apply the failure-mode taxonomy first; THEN calibrate severity.',
+    '- Do not invent hypotheses to hit a quota. But if you have only one finding and the failure is non-trivial, double-check categories 1, 2, 3, and 5 (symptom-not-cause, scapegoat file, incomplete trace, parallel causes) — these are the ones investigators most often miss on first pass.',
+    '- Limit yourself to 3-5 most-likely hypotheses. Do NOT enumerate implausible ones to pad the list.',
+    '',
+    'Symptom → cause walk (REQUIRED on every investigation):',
+    '- Start at the SYMPTOM (where the failure surfaces — the error message, the failing assertion, the wrong output).',
+    '- Walk UPSTREAM in the call/data flow. At each step, check whether the state at that point is consistent with the failure or already wrong. The point where the state first becomes wrong is the cause.',
+    '- For each step in the walk, cite a file:line. If the walk crosses a function boundary, cite both sides (caller line + callee line).',
+    '- Worked example. A test fails with `TypeError: cannot read property "id" of undefined` at `tests/users.test.ts:42` (assertion on the response). The walk: assertion sees `response.user === undefined`; the route handler at `src/handlers/getUser.ts:18` returns `{ user: rows[0] }` from a DB call; the DB call at `src/db/users.ts:34` returns `[]` for the test fixture id; the fixture loader at `tests/fixtures/users.ts:12` writes to a different table than the handler reads. → CAUSE is `tests/fixtures/users.ts:12` (wrong table). The TypeError at `tests/users.test.ts:42` is the SYMPTOM. A finding that named `getUser.ts:18` as the cause would have shipped a fix that adds null-checking — masking the bug instead of fixing it.',
+    '- Most investigators miss findings of this shape on first pass because the failing line is loud and the upstream cause is quiet. The symptom → cause walk forces the trace.',
+].join('\n');
 export const ANNOTATOR_AWARENESS_DEBUG = [
     'After your output, an annotator validates each finding against this debug rubric:',
-    '- Is each finding a hypothesis or evidence chain (not an unrelated observation)?',
-    '- Does the reasoning chain logically connect the cited evidence to the hypothesis?',
-    '- Did you propose fixes without applying them (read-only contract)?',
-    '- Is severity calibrated to evidence strength?',
-    'Self-check before emitting. Findings that fail any check are downgraded or dropped.',
+    '- Is each finding a hypothesis with a complete trace from symptom to cause (not a point observation at the symptom)?',
+    '- Does the cited cause come UPSTREAM of the cited symptom in the call/data flow?',
+    '- Is there a reproduction step the maintainer can use to trigger the failure?',
+    '- Is there a falsifier the maintainer can use to verify the fix?',
+    '- Did you propose fixes WITHOUT applying them (read-only contract)?',
+    '- Is severity calibrated to evidence strength (gaps in chain = lower severity, not the same severity with hand-waving)?',
+    'Self-check before emitting. Findings that fail any check are downgraded or dropped — but partial-evidence hypotheses with explicit "the gap is here, verify by X" notes are FULLY VALID, do NOT downgrade them as "speculation". Debug is speculation narrowed by evidence; hand-waving is the failure mode, not careful gap-marking.',
 ].join('\n');
 //# sourceMappingURL=implementer-criteria.js.map

package/dist/tools/debug/implementer-criteria.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"implementer-criteria.js","sourceRoot":"","sources":["../../../src/tools/debug/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;~~AAEH~~,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,kDAAkD;IAClD,~~kEAAkE~~;~~IAClE~~,~~4GAA4G~~;~~IAC5G~~,~~0HAA0H~~;~~IAC1H~~,~~iKAAiK~~;~~CAClK~~,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,gBAAgB,GAAG;IAC9B,QAAQ;IACR,8EAA8E;IAC9E,kKAAkK;CACnK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,yBAAyB,GAAG;IACvC,mFAAmF;IACnF,kFAAkF;IAClF,~~oFAAoF~~;~~IACpF~~,qEAAqE;IACrE,~~gDAAgD~~;~~IAChD~~,~~qFAAqF~~;~~CACtF~~,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC"}
1	+ {"version":3,"file":"implementer-criteria.js","sourceRoot":"","sources":["../../../src/tools/debug/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,yBAAyB,GAAG;IACvC,sCAAsC;IACtC,gLAAgL;IAChL,EAAE;IACF,+DAA+D;IAC/D,sFAAsF;IACtF,8GAA8G;IAC9G,8FAA8F;IAC9F,gHAAgH;IAChH,0GAA0G;IAC1G,2IAA2I;IAC3I,EAAE;IACF,0NAA0N;IAC1N,EAAE;IACF,4OAA4O;CAC7O,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,kDAAkD;IAClD,+GAA+G;IAC/G,kOAAkO;IAClO,8IAA8I;IAC9I,2KAA2K;IAC3K,kKAAkK;IAClK,iRAAiR;CAClR,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,gBAAgB,GAAG;IAC9B,QAAQ;IACR,8EAA8E;IAC9E,wMAAwM;IACxM,8OAA8O;IAC9O,kKAAkK;CACnK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,wEAAwE;IACxE,EAAE;IACF,4QAA4Q;IAC5Q,qPAAqP;IACrP,6OAA6O;IAC7O,+MAA+M;IAC/M,4LAA4L;IAC5L,uLAAuL;IACvL,6NAA6N;IAC7N,qOAAqO;IACrO,gPAAgP;IAChP,EAAE;IACF,iCAAiC;IACjC,wJAAwJ;IACxJ,6HAA6H;IAC7H,kHAAkH;IAClH,+IAA+I;CAChJ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,2BAA2B,GAAG;IACzC,oDAAoD;IACpD,8SAA8S;IAC9S,0QAA0Q;IAC1Q,6RAA6R;IAC7R,oGAAoG;IACpG,EAAE;IACF,yDAAyD;IACzD,mHAAmH;IACnH,yMAAyM;IACzM,sIAAsI;IACtI,qtBAAqtB;IACrtB,6KAA6K;CAC9K,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,yBAAyB,GAAG;IACvC,mFAAmF;IACnF,sHAAsH;IACtH,kFAAkF;IAClF,+EAA+E;IAC/E,kEAAkE;IAClE,qEAAqE;IACrE,yHAAyH;IACzH,uUAAuU;CACxU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC"}

package/dist/tools/debug/tool-config.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/debug/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAElF,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AAEvE,OAAO,EAAkB,KAAK,cAAc,EAAE,MAAM,4CAA4C,CAAC;~~AAUjG~~,wBAAgB,aAAa,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI,CAYjE;~~AAoCD~~,eAAO,MAAM,UAAU,EAAE,UAAU,CAAC,KAAK,EAAE,cAAc,EAAE,OAAO,CAoCjE,CAAC"}
1	+ {"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/debug/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAElF,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AAEvE,OAAO,EAAkB,KAAK,cAAc,EAAE,MAAM,4CAA4C,CAAC;AAajG,wBAAgB,aAAa,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI,CAYjE;AAsDD,eAAO,MAAM,UAAU,EAAE,UAAU,CAAC,KAAK,EAAE,cAAc,EAAE,OAAO,CAoCjE,CAAC"}

package/dist/tools/debug/tool-config.js CHANGED Viewed

@@ -4,7 +4,7 @@ import { debugBriefSlot } from '../../intake/brief-compiler-slots/debug.js';
 import { debugHeadlineTemplate } from '../../reporting/headline-templates/debug.js';
 import { DEFAULT_TASK_TIMEOUT_MS } from '../../config/schema.js';
 import { SEVERITY_LADDER } from '../../review/templates/finding-criteria.js';
-import { EVIDENCE_RULE_DEBUG, SCOPE_RULE_DEBUG, ANNOTATOR_AWARENESS_DEBUG, } from './implementer-criteria.js';
+import { DEBUG_PURPOSE_ORIENTATION, EVIDENCE_RULE_DEBUG, SCOPE_RULE_DEBUG, ANNOTATOR_AWARENESS_DEBUG, DEBUG_FAILURE_MODES, THOROUGHNESS_REMINDER_DEBUG, } from './implementer-criteria.js';
 export function registerDebug(registry) {
     registry.register({
         routeName: 'debug',
@@ -19,13 +19,22 @@ export function registerDebug(registry) {
     });
 }
 const FINDING_FORMAT_INSTRUCTIONS = [
+    // Orientation goes FIRST — the worker needs to know why this debug
+    // exists (fix specification, not a hint; symptom-vs-cause matters)
+    // before reading the format spec / taxonomy / evidence rules.
+    // Without it, workers point at the failing line and call it the cause.
+    DEBUG_PURPOSE_ORIENTATION,
+    '',
     'Use hypothesis-driven debugging. Use this EXACT per-finding format — both the structured reviewer and the deterministic fallback extract from this same format:',
     '',
     '## Finding 1: <one-line title>',
     '- Severity: critical | high | medium | low',
-    '- Hypothesis: the candidate cause',
-    '- Evidence: trace, log, or code path with file:line',
+    '- Reproduction: command/input/state to trigger the failure',
+    '- Symptom: file:line where the failure surfaces',
+    '- Trace: each step file:line + observed value, ending at the cause',
+    '- Cause: file:line that, if changed, would prevent the failure',
     '- Fix: proposed change (PROPOSE only — do NOT apply the fix)',
+    '- Falsifier: how the maintainer verifies the fix works',
     '',
     '## Finding 2: <one-line title>',
     '- Severity: ...',
@@ -33,13 +42,22 @@ const FINDING_FORMAT_INSTRUCTIONS = [
     '',
     'Rules:',
     '- Each finding heading MUST start with "## Finding N: " (h2, "Finding ", number, colon, title) — number sequentially from 1.',
-    '- Severity / Hypothesis / Evidence / Fix bullets are on their own lines with the labels exactly as shown.',
+    '- Reproduction / Symptom / Trace / Cause / Fix / Falsifier bullets are on their own lines with the labels exactly as shown.',
     '- This is a read-only diagnostic — do NOT edit any file. Propose fixes; the caller applies them.',
     '- Limit yourself to 3-5 most-likely hypotheses. Do not enumerate implausible ones to pad the list.',
     '',
     // Tool sweep #12: shared rubric so worker self-aligns with the annotator.
     SEVERITY_LADDER,
     '',
+    // Debug failure-mode taxonomy. Without this block, workers stop at
+    // the first plausible explanation (often the symptom) instead of
+    // tracing upstream to the actual cause.
+    DEBUG_FAILURE_MODES,
+    '',
+    // Counter-balances the SEVERITY_LADDER's anti-inflation hint and
+    // includes the symptom→cause walk with worked example.
+    THOROUGHNESS_REMINDER_DEBUG,
+    '',
     EVIDENCE_RULE_DEBUG,
     '',
     SCOPE_RULE_DEBUG,

package/dist/tools/debug/tool-config.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"tool-config.js","sourceRoot":"","sources":["../../../src/tools/debug/tool-config.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AAGvE,OAAO,EAAE,cAAc,EAAuB,MAAM,4CAA4C,CAAC;AACjG,OAAO,EAAE,qBAAqB,EAAE,MAAM,6CAA6C,CAAC;AACpF,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,4CAA4C,CAAC;AAC7E,OAAO,EACL,mBAAmB,EACnB,gBAAgB,EAChB,yBAAyB,~~GAC1B~~,MAAM,2BAA2B,CAAC;AAEnC,MAAM,UAAU,aAAa,CAAC,QAA6B;IACzD,QAAQ,CAAC,QAAQ,CAAC;QAChB,SAAS,EAAE,OAAO;QAClB,UAAU,EAAE,MAAM;QAClB,QAAQ,EAAE,QAAQ;QAClB,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,WAAW;QACnB,YAAY,EAAE,WAAW;QACzB,gBAAgB,EAAE,SAAS;QAC3B,oBAAoB,EAAE,KAAK;QAC3B,iBAAiB,EAAE,eAAe;KACnC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,2BAA2B,GAAG;IAClC,iKAAiK;IACjK,EAAE;IACF,gCAAgC;IAChC,4CAA4C;IAC5C,~~mCAAmC~~;~~IACnC~~,~~qDAAqD~~;~~IACrD~~,8DAA8D;IAC9D,EAAE;IACF,gCAAgC;IAChC,iBAAiB;IACjB,OAAO;IACP,EAAE;IACF,QAAQ;IACR,8HAA8H;IAC9H,~~2GAA2G~~;~~IAC3G~~,kGAAkG;IAClG,oGAAoG;IACpG,EAAE;IACF,0EAA0E;IAC1E,eAAe;IACf,EAAE;IACF,mBAAmB;IACnB,EAAE;IACF,gBAAgB;IAChB,EAAE;IACF,yBAAyB;CAC1B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,SAAS,oBAAoB,CAAC,SAAoB;IAChD,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACpD,OAAO,kCAAkC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;AACrF,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAA+C;IACpE,IAAI,EAAE,OAAO;IACb,QAAQ,EAAE,WAAW;IACrB,SAAS,EAAE,SAAS;IACpB,SAAS,EAAE,cAAc;IACzB,aAAa,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;QAC5B,MAAM,KAAK,GAAa,CAAC,0BAA0B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACpE,IAAI,KAAK,CAAC,OAAO;YAAE,KAAK,CAAC,IAAI,CAAC,YAAY,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC3D,IAAI,KAAK,CAAC,UAAU;YAAE,KAAK,CAAC,IAAI,CAAC,uBAAuB,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC;QAC5E,MAAM,WAAW,GAAG,oBAAoB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QAC1D,IAAI,WAAW;YAAE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACzC,KAAK,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAElC,OAAO;YACL,MAAM;YACN,SAAS,EAAE,SAAS;YACpB,YAAY,EAAE,cAAc;YAC5B,kBAAkB,EAAE,KAAK;YACzB,IAAI,EAAE,+KAA+K;YACrL,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,MAAM;YAC3C,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,IAAI,uBAAuB;YACpE,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,UAAU,IAAI,EAAE;YACjD,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,IAAI,UAAU;YAC/D,GAAG,EAAE,GAAG,CAAC,cAAc,EAAE,GAAG,IAAI,GAAG,CAAC,GAAG;YACvC,eAAe,EAAE,KAAK,CAAC,eAAe;YACtC,UAAU,EAAE,KAAK;YACjB,SAAS,EAAE,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;YACtF,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,SAAS;SACtC,CAAC;IACJ,CAAC;IACD,YAAY,EAAE,EAAE,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE,GAAG,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC,CAAC,CAAC,EAAE;IACzG,gBAAgB,EAAE,qBAAqB;IACvC,eAAe,EAAE;QACf,SAAS,EAAE,oBAAoB;KAChC;CACF,CAAC"}
1	+ {"version":3,"file":"tool-config.js","sourceRoot":"","sources":["../../../src/tools/debug/tool-config.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AAGvE,OAAO,EAAE,cAAc,EAAuB,MAAM,4CAA4C,CAAC;AACjG,OAAO,EAAE,qBAAqB,EAAE,MAAM,6CAA6C,CAAC;AACpF,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,4CAA4C,CAAC;AAC7E,OAAO,EACL,yBAAyB,EACzB,mBAAmB,EACnB,gBAAgB,EAChB,yBAAyB,EACzB,mBAAmB,EACnB,2BAA2B,GAC5B,MAAM,2BAA2B,CAAC;AAEnC,MAAM,UAAU,aAAa,CAAC,QAA6B;IACzD,QAAQ,CAAC,QAAQ,CAAC;QAChB,SAAS,EAAE,OAAO;QAClB,UAAU,EAAE,MAAM;QAClB,QAAQ,EAAE,QAAQ;QAClB,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,WAAW;QACnB,YAAY,EAAE,WAAW;QACzB,gBAAgB,EAAE,SAAS;QAC3B,oBAAoB,EAAE,KAAK;QAC3B,iBAAiB,EAAE,eAAe;KACnC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,2BAA2B,GAAG;IAClC,mEAAmE;IACnE,mEAAmE;IACnE,8DAA8D;IAC9D,uEAAuE;IACvE,yBAAyB;IACzB,EAAE;IACF,iKAAiK;IACjK,EAAE;IACF,gCAAgC;IAChC,4CAA4C;IAC5C,4DAA4D;IAC5D,iDAAiD;IACjD,oEAAoE;IACpE,gEAAgE;IAChE,8DAA8D;IAC9D,wDAAwD;IACxD,EAAE;IACF,gCAAgC;IAChC,iBAAiB;IACjB,OAAO;IACP,EAAE;IACF,QAAQ;IACR,8HAA8H;IAC9H,6HAA6H;IAC7H,kGAAkG;IAClG,oGAAoG;IACpG,EAAE;IACF,0EAA0E;IAC1E,eAAe;IACf,EAAE;IACF,mEAAmE;IACnE,iEAAiE;IACjE,wCAAwC;IACxC,mBAAmB;IACnB,EAAE;IACF,iEAAiE;IACjE,uDAAuD;IACvD,2BAA2B;IAC3B,EAAE;IACF,mBAAmB;IACnB,EAAE;IACF,gBAAgB;IAChB,EAAE;IACF,yBAAyB;CAC1B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,SAAS,oBAAoB,CAAC,SAAoB;IAChD,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACpD,OAAO,kCAAkC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;AACrF,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAA+C;IACpE,IAAI,EAAE,OAAO;IACb,QAAQ,EAAE,WAAW;IACrB,SAAS,EAAE,SAAS;IACpB,SAAS,EAAE,cAAc;IACzB,aAAa,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;QAC5B,MAAM,KAAK,GAAa,CAAC,0BAA0B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACpE,IAAI,KAAK,CAAC,OAAO;YAAE,KAAK,CAAC,IAAI,CAAC,YAAY,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC3D,IAAI,KAAK,CAAC,UAAU;YAAE,KAAK,CAAC,IAAI,CAAC,uBAAuB,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC;QAC5E,MAAM,WAAW,GAAG,oBAAoB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QAC1D,IAAI,WAAW;YAAE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACzC,KAAK,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAElC,OAAO;YACL,MAAM;YACN,SAAS,EAAE,SAAS;YACpB,YAAY,EAAE,cAAc;YAC5B,kBAAkB,EAAE,KAAK;YACzB,IAAI,EAAE,+KAA+K;YACrL,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,MAAM;YAC3C,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,IAAI,uBAAuB;YACpE,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,UAAU,IAAI,EAAE;YACjD,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,IAAI,UAAU;YAC/D,GAAG,EAAE,GAAG,CAAC,cAAc,EAAE,GAAG,IAAI,GAAG,CAAC,GAAG;YACvC,eAAe,EAAE,KAAK,CAAC,eAAe;YACtC,UAAU,EAAE,KAAK;YACjB,SAAS,EAAE,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;YACtF,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,SAAS;SACtC,CAAC;IACJ,CAAC;IACD,YAAY,EAAE,EAAE,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE,GAAG,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC,CAAC,CAAC,EAAE;IACzG,gBAAgB,EAAE,qBAAqB;IACvC,eAAe,EAAE;QACf,SAAS,EAAE,oBAAoB;KAChC;CACF,CAAC"}

package/dist/tools/delegate/implementer-criteria.d.ts ADDED Viewed

@@ -0,0 +1,62 @@
+/**
+ * Delegate-specific implementer criteria.
+ *
+ * DELEGATE'S PURPOSE — read this before adding categories.
+ * mma-delegate is the generic dispatcher for ad-hoc implementation
+ * tasks. The caller hands you a `prompt` (and optionally a `done`
+ * acceptance criterion, `filePaths`, `verifyCommand`); your output is
+ * a diff a REVIEWER will read alongside the brief. The success
+ * criterion is:
+ *
+ *   "Could a reviewer who reads only the brief and your diff approve
+ *    the merge without flagging gaps the worker should have caught
+ *    or extras the brief did not authorize?"
+ *
+ * That criterion is what makes a write load-bearing. The reviewer is
+ * NOT a rubber stamp — they will ask "did you finish that?" if the
+ * fix is partial, and "why did you also touch X?" if the diff has
+ * scope creep. Your job is to produce the SMALLEST COMPLETE CHANGE
+ * that satisfies the brief — minimal AND complete simultaneously.
+ *
+ * Delegate is artifact-producing — you write files. Cross-agent
+ * spec + quality + diff review applies. The spec the spec-reviewer
+ * checks against is the BRIEF (prompt + done), not your interpretation
+ * of it. The quality-reviewer checks safety / correctness / style.
+ */
+/**
+ * The orientation block. Goes at the TOP of every delegate prompt.
+ *
+ * Without an explicit orientation, workers default to "implement
+ * something good" — which produces over-implementation (SCOPE CREEP)
+ * and under-implementation (SILENT PARTIAL FIX). With this orientation,
+ * the worker calibrates against the reviewer's standard: minimal +
+ * complete, the brief is the contract.
+ */
+export declare const DELEGATE_PURPOSE_ORIENTATION: string;
+/**
+ * The scope rule for delegate.
+ *
+ * Replaces the prior one-liner with a concrete contract about what
+ * is in scope, what is off-limits, and what to do at the boundary.
+ */
+export declare const DELEGATE_SCOPE_RULE: string;
+/**
+ * The failure-mode taxonomy for delegate.
+ *
+ * Workers calibrated on "implement something good" tend to over-deliver
+ * (scope creep) or under-deliver (silent partial fix). The 9 categories
+ * below are the specific patterns reviewers raise as merge-blockers.
+ */
+export declare const DELEGATE_FAILURE_MODES: string;
+/**
+ * Completeness reminder.
+ *
+ * The shared SEVERITY_LADDER does not apply to write tools. The
+ * counter-balance for delegate is opposite to read-only tools: the
+ * typical failure is OVER-IMPLEMENTATION (scope creep) and UNDER-
+ * IMPLEMENTATION (silent partial fix), often in the same task. This
+ * block tells the worker the load-bearing constraint is "minimal AND
+ * complete simultaneously".
+ */
+export declare const COMPLETENESS_REMINDER_DELEGATE: string;
+//# sourceMappingURL=implementer-criteria.d.ts.map

package/dist/tools/delegate/implementer-criteria.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/delegate/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH;;;;;;;;GAQG;AACH,eAAO,MAAM,4BAA4B,QAc7B,CAAC;AAEb;;;;;GAKG;AACH,eAAO,MAAM,mBAAmB,QAMpB,CAAC;AAEb;;;;;;GAMG;AACH,eAAO,MAAM,sBAAsB,QAiBvB,CAAC;AAEb;;;;;;;;;GASG;AACH,eAAO,MAAM,8BAA8B,QAe/B,CAAC"}

package/dist/tools/delegate/implementer-criteria.js ADDED Viewed

@@ -0,0 +1,114 @@
+/**
+ * Delegate-specific implementer criteria.
+ *
+ * DELEGATE'S PURPOSE — read this before adding categories.
+ * mma-delegate is the generic dispatcher for ad-hoc implementation
+ * tasks. The caller hands you a `prompt` (and optionally a `done`
+ * acceptance criterion, `filePaths`, `verifyCommand`); your output is
+ * a diff a REVIEWER will read alongside the brief. The success
+ * criterion is:
+ *
+ *   "Could a reviewer who reads only the brief and your diff approve
+ *    the merge without flagging gaps the worker should have caught
+ *    or extras the brief did not authorize?"
+ *
+ * That criterion is what makes a write load-bearing. The reviewer is
+ * NOT a rubber stamp — they will ask "did you finish that?" if the
+ * fix is partial, and "why did you also touch X?" if the diff has
+ * scope creep. Your job is to produce the SMALLEST COMPLETE CHANGE
+ * that satisfies the brief — minimal AND complete simultaneously.
+ *
+ * Delegate is artifact-producing — you write files. Cross-agent
+ * spec + quality + diff review applies. The spec the spec-reviewer
+ * checks against is the BRIEF (prompt + done), not your interpretation
+ * of it. The quality-reviewer checks safety / correctness / style.
+ */
+/**
+ * The orientation block. Goes at the TOP of every delegate prompt.
+ *
+ * Without an explicit orientation, workers default to "implement
+ * something good" — which produces over-implementation (SCOPE CREEP)
+ * and under-implementation (SILENT PARTIAL FIX). With this orientation,
+ * the worker calibrates against the reviewer's standard: minimal +
+ * complete, the brief is the contract.
+ */
+export const DELEGATE_PURPOSE_ORIENTATION = [
+    'Why this delegation exists:',
+    'mma-delegate produces a diff a reviewer will read alongside the brief. Success = the diff is the SMALLEST COMPLETE CHANGE that satisfies the brief — minimal AND complete simultaneously. A reviewer should not need to ask "did you finish that?" or "why did you also touch X?".',
+    '',
+    'For your output to clear that bar:',
+    '- Implement EXACTLY what the brief asks for. Not less (SILENT PARTIAL FIX). Not more (SCOPE CREEP).',
+    '- If the brief lists `filePaths`, those are the authorized targets. Existing files in the list = pre-verified to read; non-existent paths in the list = explicit output targets you must create. Files NOT in the list are off-limits to write (touch only when the brief\'s task genuinely requires it, and call out the deviation in your summary).',
+    '- If the brief includes a `done` acceptance criterion, the reviewer will check your diff against that criterion. Match it precisely.',
+    '- If the brief includes a `verifyCommand`, run it after your changes. A green verify is part of "complete"; a red verify is part of "incomplete".',
+    '- Match the surrounding code\'s conventions (naming, import style, error handling, formatting). Inventing patterns instead of matching is convention drift — the reviewer will flag it.',
+    '- If you change a public symbol (exported function signature, exported type, public method), update the callers in the named files. Leaving callers stale is an INCOMPLETE REFACTOR.',
+    '- Do NOT modify tests or fixtures or specs to make a wrong implementation pass. If a test fails, fix the implementation, not the test (unless the brief explicitly says the test is wrong).',
+    '',
+    'The completion test: would a reviewer who reads ONLY the brief and your diff approve the merge — or would they raise a concern (gap, scope creep, drift, broken caller, undocumented assumption) you should have caught?',
+].join('\n');
+/**
+ * The scope rule for delegate.
+ *
+ * Replaces the prior one-liner with a concrete contract about what
+ * is in scope, what is off-limits, and what to do at the boundary.
+ */
+export const DELEGATE_SCOPE_RULE = [
+    'Scope:',
+    '- Strictly what the brief\'s `prompt` (and `done` if present) requests. The brief is the contract.',
+    '- Reading: the named `filePaths` plus what the task obviously implies (caller files when the diff changes a public symbol; sibling test files when the brief changes behavior; types files when the diff changes a typed interface).',
+    '- Writing: existing files in `filePaths` (modify) and non-existent paths in `filePaths` (create). Files outside `filePaths` are off-limits to write unless the brief\'s task genuinely requires it (e.g. updating a caller because the task changed a signature — call this out in your summary).',
+    '- Out of scope: refactors not in the brief, tangential cleanup ("while I\'m here…"), modifying tests/fixtures/specs to mask a wrong implementation, opportunistic style fixes, dependency upgrades the brief did not request.',
+].join('\n');
+/**
+ * The failure-mode taxonomy for delegate.
+ *
+ * Workers calibrated on "implement something good" tend to over-deliver
+ * (scope creep) or under-deliver (silent partial fix). The 9 categories
+ * below are the specific patterns reviewers raise as merge-blockers.
+ */
+export const DELEGATE_FAILURE_MODES = [
+    'Patterns to consciously check for. Apply on EVERY delegated task:',
+    '',
+    '1. SCOPE CREEP — touched files / added features beyond the brief. The reviewer reads the diff and asks "why did you also change Y?" If you cannot answer with "the brief required it", remove the change.',
+    '2. SILENT PARTIAL FIX — declared done, work demonstrably incomplete. Naming a step in your summary as "done" when the diff does not contain it is the worst delegate failure mode. Either implement it or report explicitly that you did not.',
+    '3. WRONG FILE TARGET — wrote to a path not in `filePaths` (when the caller specified `filePaths`). Existing files outside `filePaths` are off-limits to write. New files outside `filePaths` are scope creep.',
+    '4. PHANTOM TEST PASS — claimed "tests pass" without actually running them, OR ran a non-affected suite (e.g. unit tests pass but the change is in a path covered by integration tests). If the brief includes `verifyCommand`, run that exact command and quote the output.',
+    '5. CROSS-CUTTING DAMAGE — your fix introduced an unrelated regression in the same edit (e.g. fixing a parser bug but breaking the formatter). Re-read the diff before declaring done; check that nothing OTHER than the brief\'s target changed semantically.',
+    '6. CONVENTION DRIFT — invented a naming / import / error-handling / formatting pattern instead of matching the surrounding code. The reviewer will flag this as "matches no neighboring file" — it slows merge.',
+    '7. INCOMPLETE REFACTOR — changed a public symbol (exported function signature, exported type, public method) and did not update its callers. Stale callers either crash at runtime or compile but behave wrong. Update callers in the named files; report in your summary if callers exist outside `filePaths`.',
+    '8. SPEC OVERREACH — modified tests, fixtures, or interface contracts to make a wrong implementation pass, instead of fixing the implementation. If a test is failing, the FIRST hypothesis is that the implementation is wrong, not the test.',
+    '9. UNDOCUMENTED ASSUMPTION — diff relies on the caller doing X (env var set, init function called, dependency installed) without saying so in the brief\'s authoring contract. Either remove the assumption, or document it in your summary so the reviewer can decide if it is acceptable.',
+    '',
+    'Severity calibration for delegate (in your summary, not via SEVERITY_LADDER which is for read-only tools):',
+    '- Issues you notice but do NOT fix: report in summary so the reviewer can decide.',
+    '- Issues you encounter that block the brief: report and stop. Do not pick a workaround unilaterally.',
+    '- Issues clearly implied by the brief but not literally stated (e.g. "fix bug" implies "regression test added"): implement and name them as "implicit per the brief" in summary.',
+].join('\n');
+/**
+ * Completeness reminder.
+ *
+ * The shared SEVERITY_LADDER does not apply to write tools. The
+ * counter-balance for delegate is opposite to read-only tools: the
+ * typical failure is OVER-IMPLEMENTATION (scope creep) and UNDER-
+ * IMPLEMENTATION (silent partial fix), often in the same task. This
+ * block tells the worker the load-bearing constraint is "minimal AND
+ * complete simultaneously".
+ */
+export const COMPLETENESS_REMINDER_DELEGATE = [
+    'Completeness reminder:',
+    '- "Smallest complete change" is the bar. Smallest = no extras. Complete = no gaps.',
+    '- Most workers on first pass either bloat (extra refactor / extra cleanup / extra abstraction) or skim (declared done with the regression test missing). Both are merge-blockers; aim for the intersection.',
+    '- Before declaring done, walk the brief literally:',
+    '    1. List every requirement in the prompt (and `done` if present).',
+    '    2. For each, ask: "is this in my diff?" If no, you are not done.',
+    '    3. Walk the diff in reverse: for each changed file/line, ask: "is this required by a brief item?" If no, remove it.',
+    '    4. If `verifyCommand` is set, run it. Quote the relevant output line in your summary.',
+    '',
+    'Brief-vs-diff walk (REQUIRED on every task):',
+    '- For each item in the brief\'s `prompt` and `done`, locate the diff hunk that satisfies it. If you cannot, the item is unsatisfied.',
+    '- For each diff hunk, name the brief item it satisfies. If you cannot, the hunk is scope creep.',
+    '- Worked example. Brief: "fix the off-by-one in `paginate(page, total)` — `total < pageSize` should still produce one page; add a regression test in `tests/pagination.test.ts`." Naive worker rewrites `paginate` as a clean three-liner with new docstrings, skips the test → SILENT PARTIAL FIX (no test) + SCOPE CREEP (rewrote a function that needed a one-line fix). Correct worker: changes one boundary condition in `paginate` (one line of diff in the implementation file), adds one test in `tests/pagination.test.ts` covering the `total < pageSize` case, runs `verifyCommand` if set, quotes the test name and "1 passed" in the summary, stops. Two diff hunks total, both directly tied to the brief.',
+    '- Most workers miss findings of this shape on first pass because the rewrite "feels cleaner". The brief-vs-diff walk forces the question "what did the brief ACTUALLY ask for?".',
+].join('\n');
+//# sourceMappingURL=implementer-criteria.js.map

package/dist/tools/delegate/implementer-criteria.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"implementer-criteria.js","sourceRoot":"","sources":["../../../src/tools/delegate/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,4BAA4B,GAAG;IAC1C,6BAA6B;IAC7B,oRAAoR;IACpR,EAAE;IACF,oCAAoC;IACpC,qGAAqG;IACrG,uVAAuV;IACvV,sIAAsI;IACtI,mJAAmJ;IACnJ,yLAAyL;IACzL,sLAAsL;IACtL,6LAA6L;IAC7L,EAAE;IACF,0NAA0N;CAC3N,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;GAKG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,QAAQ;IACR,oGAAoG;IACpG,sOAAsO;IACtO,mSAAmS;IACnS,+NAA+N;CAChO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAG;IACpC,mEAAmE;IACnE,EAAE;IACF,2MAA2M;IAC3M,+OAA+O;IAC/O,+MAA+M;IAC/M,6QAA6Q;IAC7Q,+PAA+P;IAC/P,iNAAiN;IACjN,iTAAiT;IACjT,+OAA+O;IAC/O,6RAA6R;IAC7R,EAAE;IACF,4GAA4G;IAC5G,mFAAmF;IACnF,sGAAsG;IACtG,kLAAkL;CACnL,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;;;;GASG;AACH,MAAM,CAAC,MAAM,8BAA8B,GAAG;IAC5C,wBAAwB;IACxB,oFAAoF;IACpF,6MAA6M;IAC7M,oDAAoD;IACpD,sEAAsE;IACtE,sEAAsE;IACtE,yHAAyH;IACzH,2FAA2F;IAC3F,EAAE;IACF,8CAA8C;IAC9C,sIAAsI;IACtI,iGAAiG;IACjG,0rBAA0rB;IAC1rB,kLAAkL;CACnL,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC"}

package/dist/tools/execute-plan/implementer-criteria.d.ts ADDED Viewed

@@ -0,0 +1,52 @@
+/**
+ * Execute-plan-specific implementer criteria.
+ *
+ * EXECUTE-PLAN'S PURPOSE — read this before adding categories.
+ * mma-execute-plan implements one task from a plan that was written by a
+ * higher-capability model. Your output is a diff the PLAN AUTHOR will
+ * read. They wrote the plan precisely; your job is execution, not
+ * improvement. The success criterion is:
+ *
+ *   "Could the plan author read your diff and say 'yes, that's exactly
+ *    what I wrote' — not 'close, but you took liberties' or 'wrong, you
+ *    missed step 3'?"
+ *
+ * That criterion is what makes a write load-bearing. The fidelity bar
+ * is sharper than mma-delegate's: even a "better" implementation that
+ * deviates from the plan is wrong here. If you think the plan is wrong:
+ * REPORT IT and stop. Do NOT silently improve.
+ *
+ * Plan execution is artifact-producing — you write files. Cross-agent
+ * spec + quality review still applies. But the spec the spec-reviewer
+ * checks against is the PLAN, not your interpretation of it.
+ */
+/**
+ * The orientation block. Goes at the TOP of every execute-plan prompt.
+ *
+ * Without an explicit fidelity statement, workers default to "implement
+ * the goal" — which produces "improvements" that diverge from the plan
+ * (CODE SUBSTITUTION, ACCEPTANCE-CRITERIA OVERRUN). With this
+ * orientation, the worker treats the plan as authoritative and reports
+ * defects rather than silently working around them.
+ */
+export declare const EXECUTE_PLAN_PURPOSE_ORIENTATION: string;
+export declare const EXECUTE_PLAN_SCOPE_RULE: string;
+/**
+ * The failure-mode taxonomy for execute-plan.
+ *
+ * Workers calibrated on "implement the goal" tend to make "small
+ * improvements" to plans they think are imperfect. The 9 categories
+ * below are the specific ways execution diverges from intent.
+ */
+export declare const EXECUTE_PLAN_FAILURE_MODES: string;
+/**
+ * Plan-fidelity reminder.
+ *
+ * The shared SEVERITY_LADDER does not apply to write tools. The
+ * counter-balance for execute-plan is opposite to read-only tools:
+ * the typical failure is OVER-IMPLEMENTATION (improving the plan), not
+ * under-finding. This block tells the worker the load-bearing
+ * constraint is fidelity, not "good code".
+ */
+export declare const PLAN_FIDELITY_REMINDER: string;
+//# sourceMappingURL=implementer-criteria.d.ts.map

package/dist/tools/execute-plan/implementer-criteria.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"implementer-criteria.d.ts","sourceRoot":"","sources":["../../../src/tools/execute-plan/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH;;;;;;;;GAQG;AACH,eAAO,MAAM,gCAAgC,QAiBjC,CAAC;AAEb,eAAO,MAAM,uBAAuB,QAMxB,CAAC;AAEb;;;;;;GAMG;AACH,eAAO,MAAM,0BAA0B,QAiB3B,CAAC;AAEb;;;;;;;;GAQG;AACH,eAAO,MAAM,sBAAsB,QAYvB,CAAC"}

package/dist/tools/execute-plan/implementer-criteria.js ADDED Viewed

@@ -0,0 +1,104 @@
+/**
+ * Execute-plan-specific implementer criteria.
+ *
+ * EXECUTE-PLAN'S PURPOSE — read this before adding categories.
+ * mma-execute-plan implements one task from a plan that was written by a
+ * higher-capability model. Your output is a diff the PLAN AUTHOR will
+ * read. They wrote the plan precisely; your job is execution, not
+ * improvement. The success criterion is:
+ *
+ *   "Could the plan author read your diff and say 'yes, that's exactly
+ *    what I wrote' — not 'close, but you took liberties' or 'wrong, you
+ *    missed step 3'?"
+ *
+ * That criterion is what makes a write load-bearing. The fidelity bar
+ * is sharper than mma-delegate's: even a "better" implementation that
+ * deviates from the plan is wrong here. If you think the plan is wrong:
+ * REPORT IT and stop. Do NOT silently improve.
+ *
+ * Plan execution is artifact-producing — you write files. Cross-agent
+ * spec + quality review still applies. But the spec the spec-reviewer
+ * checks against is the PLAN, not your interpretation of it.
+ */
+/**
+ * The orientation block. Goes at the TOP of every execute-plan prompt.
+ *
+ * Without an explicit fidelity statement, workers default to "implement
+ * the goal" — which produces "improvements" that diverge from the plan
+ * (CODE SUBSTITUTION, ACCEPTANCE-CRITERIA OVERRUN). With this
+ * orientation, the worker treats the plan as authoritative and reports
+ * defects rather than silently working around them.
+ */
+export const EXECUTE_PLAN_PURPOSE_ORIENTATION = [
+    'Why this execution exists:',
+    'mma-execute-plan executes ONE task from a plan written by a higher-capability model. Your output is a diff the PLAN AUTHOR will read. They wrote the plan precisely. Your job is execution, not improvement.',
+    '',
+    'The completion test: would the plan author, reading your diff, say "yes, that\'s exactly what I wrote" — or would they say "close, but you took liberties" / "wrong, you missed step 3"?',
+    '',
+    'Fidelity rules — these override your usual instincts:',
+    '- Follow the plan EXACTLY as written. If the plan provides code blocks, use them VERBATIM (same names, same signatures, same comments, same imports).',
+    '- Do NOT redesign. Do NOT substitute your own approach. Do NOT improve names you find unidiomatic.',
+    '- Do NOT add steps the plan does not list. Do NOT skip steps the plan does list.',
+    '- Do NOT widen scope ("while I\'m here…"). Touch only what this task heading authorizes; another task probably owns the rest.',
+    '- If the plan looks wrong (typo, contradiction, undefined symbol, missing dependency): REPORT IT in your summary and stop. Do NOT silently work around it. Do NOT silently fix it.',
+    '- The plan was written by a higher-capability model than you. Your judgment about "what would be cleaner" is not load-bearing here; the plan is.',
+    '',
+    'Reviewer awareness for plan execution:',
+    '- The spec-reviewer compares your diff against the PLAN section, not against general "good code" heuristics. A diff that improves on the plan will fail spec review.',
+    '- The quality-reviewer checks safety/correctness without overriding the plan. If the plan is genuinely unsafe, that surfaces as a quality concern that the caller resolves — not as your unilateral fix.',
+].join('\n');
+export const EXECUTE_PLAN_SCOPE_RULE = [
+    'Scope:',
+    '- Strictly the task the descriptor names. Other tasks in the plan have other workers; do not implement them on the side.',
+    '- Touch only the files the named task authorizes (explicit file paths in the plan section, or files clearly implied by the named task).',
+    '- Out of scope: other plan tasks; refactors not in the plan; "while I\'m here" cleanup; renaming code blocks the plan provided verbatim.',
+    '- Genuinely necessary cross-cutting work (e.g. updating a caller because the plan changed a signature): allowed when the plan implies it. When in doubt, REPORT it as part of your summary and let the caller decide.',
+].join('\n');
+/**
+ * The failure-mode taxonomy for execute-plan.
+ *
+ * Workers calibrated on "implement the goal" tend to make "small
+ * improvements" to plans they think are imperfect. The 9 categories
+ * below are the specific ways execution diverges from intent.
+ */
+export const EXECUTE_PLAN_FAILURE_MODES = [
+    'Patterns to consciously check for. Apply on EVERY plan execution:',
+    '',
+    '1. PLAN REWRITE — you decided the plan was suboptimal and "improved" it. This is the worst execute-plan failure mode. The plan author treats the plan as the contract; your improvements are a contract violation.',
+    '2. STEP SKIP — the plan section lists multiple steps; you implemented some and silently omitted others. Every step listed in the plan is a required deliverable unless the plan explicitly marks it optional.',
+    '3. STEP REORDER — you executed plan steps in a different order than the plan specifies. Order may be load-bearing (later steps may depend on earlier ones); preserve it.',
+    '4. CODE SUBSTITUTION — the plan provided a code block (function body, import line, type definition) and you wrote DIFFERENT code that "does the same thing". The plan\'s code is verbatim; copy it. Renaming, reformatting, or replacing with idiomatic equivalents is substitution.',
+    '5. ACCEPTANCE-CRITERIA OVERRUN — the plan listed criteria A and B; you also delivered C ("seemed natural"). Adding extras the plan did not list is scope creep — even if C is technically good code.',
+    '6. ACCEPTANCE-CRITERIA UNDERRUN — the plan implies sub-criteria (e.g. "add the function" implies "add the export to the index file"; "fix the bug" implies "add a regression test"). Missing implicit sub-criteria is the most common silent-partial-fix in plan execution.',
+    '7. WRONG-TASK MATCH — you matched a different plan section than the descriptor names (e.g. matched "Step 4: foo" when descriptor said "Step 4: bar"). The descriptor must match the plan heading verbatim; if no unique match exists, report that and stop.',
+    '8. CROSS-TASK CONTAMINATION — you touched files the named task does not authorize, on the assumption that another task in the plan will eventually need them. Other tasks have other workers; touching their files creates merge conflicts and ownership ambiguity.',
+    '9. PROBLEM-NOT-FLAGGED — you noticed a defect in the plan (typo, contradiction, undefined symbol, broken example) and silently worked around it. The defect must be reported in your summary so the caller can correct the plan; silent workarounds make the next plan execution harder.',
+    '',
+    'Severity calibration for plan execution (in your summary, not via SEVERITY_LADDER which is for read-only tools):',
+    '- Plan defects you notice: ALWAYS report. The caller may have a fix or may want to update the plan first.',
+    '- Sub-criteria you cannot satisfy without deviating from the plan: report and stop. Do not pick a workaround unilaterally.',
+    '- Sub-criteria that are clearly implied but not literally stated: implement them, name them in your summary as "implicit per the task heading".',
+].join('\n');
+/**
+ * Plan-fidelity reminder.
+ *
+ * The shared SEVERITY_LADDER does not apply to write tools. The
+ * counter-balance for execute-plan is opposite to read-only tools:
+ * the typical failure is OVER-IMPLEMENTATION (improving the plan), not
+ * under-finding. This block tells the worker the load-bearing
+ * constraint is fidelity, not "good code".
+ */
+export const PLAN_FIDELITY_REMINDER = [
+    'Plan-fidelity reminder:',
+    '- Your judgment about "what would be cleaner" is NOT load-bearing here. The plan is.',
+    '- Every deviation from the plan needs a reason and a report. Silent deviations are the most common defect.',
+    '- "Smallest faithful change" — touch the minimum the task authorizes, in the order the plan specifies, with the code the plan provides verbatim where provided.',
+    '- If the plan is wrong: report and stop. Do NOT silently fix the plan.',
+    '',
+    'Code-block faithfulness walk (REQUIRED on every task that includes plan-provided code):',
+    '- For each code block in the matched plan section, ask: did I copy this verbatim? Same names, same signatures, same comments, same imports?',
+    '- If no — what did I change? Why? Is the change required by the task or am I improving?',
+    '- Worked example. A plan section says: "Step 2: create `src/parser.ts` with content (verbatim): `export function parse(input: string): Token[] { ... }`". Naive worker writes `src/parser.ts` exporting `parseTokens` (renamed for clarity) with JSDoc added. Result: CODE SUBSTITUTION + ACCEPTANCE-CRITERIA OVERRUN. The downstream code that imports `parse` now breaks; the plan author reads the diff and says "I wrote `parse`, why is this `parseTokens`?". Correct worker creates `src/parser.ts` with exactly the named export `parse`, no JSDoc additions, no rename. If JSDoc would be valuable, mention it in the summary as a follow-up rather than adding it here.',
+    '- Most workers miss findings of this shape on first pass because the renamed/reformatted version "feels right" and they trust their instincts. The faithfulness walk forces the verbatim check.',
+].join('\n');
+//# sourceMappingURL=implementer-criteria.js.map

package/dist/tools/execute-plan/implementer-criteria.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"implementer-criteria.js","sourceRoot":"","sources":["../../../src/tools/execute-plan/implementer-criteria.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,gCAAgC,GAAG;IAC9C,4BAA4B;IAC5B,8MAA8M;IAC9M,EAAE;IACF,0LAA0L;IAC1L,EAAE;IACF,uDAAuD;IACvD,uJAAuJ;IACvJ,oGAAoG;IACpG,kFAAkF;IAClF,+HAA+H;IAC/H,oLAAoL;IACpL,kJAAkJ;IAClJ,EAAE;IACF,wCAAwC;IACxC,sKAAsK;IACtK,0MAA0M;CAC3M,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,uBAAuB,GAAG;IACrC,QAAQ;IACR,0HAA0H;IAC1H,yIAAyI;IACzI,0IAA0I;IAC1I,uNAAuN;CACxN,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,0BAA0B,GAAG;IACxC,mEAAmE;IACnE,EAAE;IACF,oNAAoN;IACpN,+MAA+M;IAC/M,0KAA0K;IAC1K,sRAAsR;IACtR,sMAAsM;IACtM,6QAA6Q;IAC7Q,6PAA6P;IAC7P,qQAAqQ;IACrQ,0RAA0R;IAC1R,EAAE;IACF,kHAAkH;IAClH,2GAA2G;IAC3G,4HAA4H;IAC5H,iJAAiJ;CAClJ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAG;IACpC,yBAAyB;IACzB,sFAAsF;IACtF,4GAA4G;IAC5G,iKAAiK;IACjK,wEAAwE;IACxE,EAAE;IACF,yFAAyF;IACzF,6IAA6I;IAC7I,yFAAyF;IACzF,kpBAAkpB;IAClpB,iMAAiM;CAClM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC"}

package/dist/tools/execute-plan/tool-config.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/execute-plan/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAMlF,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AACvE,OAAO,EAA4B,KAAK,oBAAoB,EAAE,MAAM,mDAAmD,CAAC;~~AAMxH~~,eAAO,MAAM,sBAAsB;;;;;;;;;;;;kBAOxB,CAAC;AAEZ,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,sBAAsB,CAAC,CAAC;AAE1E,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI,CAYvE;~~AA0CD~~,eAAO,MAAM,UAAU,EAAE,UAAU,CAAC,oBAAoB,EAAE,oBAAoB,CA4B7E,CAAC"}
1	+ {"version":3,"file":"tool-config.d.ts","sourceRoot":"","sources":["../../../src/tools/execute-plan/tool-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,mBAAmB,EAAE,MAAM,6CAA6C,CAAC;AAMlF,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AACvE,OAAO,EAA4B,KAAK,oBAAoB,EAAE,MAAM,mDAAmD,CAAC;AAYxH,eAAO,MAAM,sBAAsB;;;;;;;;;;;;kBAOxB,CAAC;AAEZ,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,sBAAsB,CAAC,CAAC;AAE1E,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI,CAYvE;AA6DD,eAAO,MAAM,UAAU,EAAE,UAAU,CAAC,oBAAoB,EAAE,oBAAoB,CA4B7E,CAAC"}