npm - @zhixuan92/multi-model-agent - Versions diffs - 5.0.2 → 5.0.3 - Mend

@zhixuan92/multi-model-agent 5.0.2 → 5.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (255) hide show

package/README.md +8 -18
package/dist/cli/index.d.ts +62 -0
package/dist/cli/index.d.ts.map +1 -0
package/dist/cli/index.js +345 -0
package/dist/cli/index.js.map +1 -0
package/dist/cli/info.d.ts +22 -0
package/dist/cli/info.d.ts.map +1 -0
package/dist/cli/info.js +100 -0
package/dist/cli/info.js.map +1 -0
package/dist/cli/logs.d.ts +15 -0
package/dist/cli/logs.d.ts.map +1 -0
package/dist/cli/logs.js +102 -0
package/dist/cli/logs.js.map +1 -0
package/dist/cli/print-token.d.ts +18 -0
package/dist/cli/print-token.d.ts.map +1 -0
package/dist/cli/print-token.js +60 -0
package/dist/cli/print-token.js.map +1 -0
package/dist/cli/serve.d.ts +28 -0
package/dist/cli/serve.d.ts.map +1 -0
package/dist/cli/serve.js +405 -0
package/dist/cli/serve.js.map +1 -0
package/dist/cli/status.d.ts +49 -0
package/dist/cli/status.d.ts.map +1 -0
package/dist/cli/status.js +155 -0
package/dist/cli/status.js.map +1 -0
package/dist/cli/sync-skills.d.ts +58 -0
package/dist/cli/sync-skills.d.ts.map +1 -0
package/dist/cli/sync-skills.js +266 -0
package/dist/cli/sync-skills.js.map +1 -0
package/dist/cli/telemetry.d.ts +10 -0
package/dist/cli/telemetry.d.ts.map +1 -0
package/dist/cli/telemetry.js +161 -0
package/dist/cli/telemetry.js.map +1 -0
package/dist/cli/toggle.d.ts +26 -0
package/dist/cli/toggle.d.ts.map +1 -0
package/dist/cli/toggle.js +185 -0
package/dist/cli/toggle.js.map +1 -0
package/dist/http/async-dispatch.d.ts +44 -0
package/dist/http/async-dispatch.d.ts.map +1 -0
package/dist/http/async-dispatch.js +175 -0
package/dist/http/async-dispatch.js.map +1 -0
package/dist/http/auth.d.ts +20 -0
package/dist/http/auth.d.ts.map +1 -0
package/dist/http/auth.js +56 -0
package/dist/http/auth.js.map +1 -0
package/dist/http/canonicalize-file-paths.d.ts +8 -0
package/dist/http/canonicalize-file-paths.d.ts.map +1 -0
package/dist/http/canonicalize-file-paths.js +43 -0
package/dist/http/canonicalize-file-paths.js.map +1 -0
package/dist/http/cwd-validator.d.ts +11 -0
package/dist/http/cwd-validator.d.ts.map +1 -0
package/dist/http/cwd-validator.js +130 -0
package/dist/http/cwd-validator.js.map +1 -0
package/dist/http/errors.d.ts +4 -0
package/dist/http/errors.d.ts.map +1 -0
package/dist/http/errors.js +9 -0
package/dist/http/errors.js.map +1 -0
package/dist/http/execution-context.d.ts +18 -0
package/dist/http/execution-context.d.ts.map +1 -0
package/dist/http/execution-context.js +61 -0
package/dist/http/execution-context.js.map +1 -0
package/dist/http/handler-deps.d.ts +19 -0
package/dist/http/handler-deps.d.ts.map +1 -0
package/dist/http/handler-deps.js +2 -0
package/dist/http/handler-deps.js.map +1 -0
package/dist/http/handlers/control/batch-slice.d.ts +4 -0
package/dist/http/handlers/control/batch-slice.d.ts.map +1 -0
package/dist/http/handlers/control/batch-slice.js +40 -0
package/dist/http/handlers/control/batch-slice.js.map +1 -0
package/dist/http/handlers/control/batch.d.ts +23 -0
package/dist/http/handlers/control/batch.d.ts.map +1 -0
package/dist/http/handlers/control/batch.js +332 -0
package/dist/http/handlers/control/batch.js.map +1 -0
package/dist/http/handlers/control/context-blocks.d.ts +22 -0
package/dist/http/handlers/control/context-blocks.d.ts.map +1 -0
package/dist/http/handlers/control/context-blocks.js +111 -0
package/dist/http/handlers/control/context-blocks.js.map +1 -0
package/dist/http/handlers/introspection/health.d.ts +20 -0
package/dist/http/handlers/introspection/health.d.ts.map +1 -0
package/dist/http/handlers/introspection/health.js +18 -0
package/dist/http/handlers/introspection/health.js.map +1 -0
package/dist/http/handlers/introspection/status.d.ts +26 -0
package/dist/http/handlers/introspection/status.d.ts.map +1 -0
package/dist/http/handlers/introspection/status.js +136 -0
package/dist/http/handlers/introspection/status.js.map +1 -0
package/dist/http/handlers/tools/audit.d.ts +4 -0
package/dist/http/handlers/tools/audit.d.ts.map +1 -0
package/dist/http/handlers/tools/audit.js +43 -0
package/dist/http/handlers/tools/audit.js.map +1 -0
package/dist/http/handlers/tools/debug.d.ts +4 -0
package/dist/http/handlers/tools/debug.d.ts.map +1 -0
package/dist/http/handlers/tools/debug.js +43 -0
package/dist/http/handlers/tools/debug.js.map +1 -0
package/dist/http/handlers/tools/delegate.d.ts +4 -0
package/dist/http/handlers/tools/delegate.d.ts.map +1 -0
package/dist/http/handlers/tools/delegate.js +43 -0
package/dist/http/handlers/tools/delegate.js.map +1 -0
package/dist/http/handlers/tools/execute-plan.d.ts +4 -0
package/dist/http/handlers/tools/execute-plan.d.ts.map +1 -0
package/dist/http/handlers/tools/execute-plan.js +45 -0
package/dist/http/handlers/tools/execute-plan.js.map +1 -0
package/dist/http/handlers/tools/investigate.d.ts +4 -0
package/dist/http/handlers/tools/investigate.d.ts.map +1 -0
package/dist/http/handlers/tools/investigate.js +64 -0
package/dist/http/handlers/tools/investigate.js.map +1 -0
package/dist/http/handlers/tools/journal-recall.d.ts +4 -0
package/dist/http/handlers/tools/journal-recall.d.ts.map +1 -0
package/dist/http/handlers/tools/journal-recall.js +40 -0
package/dist/http/handlers/tools/journal-recall.js.map +1 -0
package/dist/http/handlers/tools/journal-record.d.ts +8 -0
package/dist/http/handlers/tools/journal-record.d.ts.map +1 -0
package/dist/http/handlers/tools/journal-record.js +40 -0
package/dist/http/handlers/tools/journal-record.js.map +1 -0
package/dist/http/handlers/tools/research.d.ts +4 -0
package/dist/http/handlers/tools/research.d.ts.map +1 -0
package/dist/http/handlers/tools/research.js +64 -0
package/dist/http/handlers/tools/research.js.map +1 -0
package/dist/http/handlers/tools/retry.d.ts +4 -0
package/dist/http/handlers/tools/retry.d.ts.map +1 -0
package/dist/http/handlers/tools/retry.js +73 -0
package/dist/http/handlers/tools/retry.js.map +1 -0
package/dist/http/handlers/tools/review.d.ts +4 -0
package/dist/http/handlers/tools/review.d.ts.map +1 -0
package/dist/http/handlers/tools/review.js +43 -0
package/dist/http/handlers/tools/review.js.map +1 -0
package/dist/http/journal-lock.d.ts +4 -0
package/dist/http/journal-lock.d.ts.map +1 -0
package/dist/http/journal-lock.js +34 -0
package/dist/http/journal-lock.js.map +1 -0
package/dist/http/middleware/body-reader.d.ts +16 -0
package/dist/http/middleware/body-reader.d.ts.map +1 -0
package/dist/http/middleware/body-reader.js +44 -0
package/dist/http/middleware/body-reader.js.map +1 -0
package/dist/http/middleware/caller-identity.d.ts +16 -0
package/dist/http/middleware/caller-identity.d.ts.map +1 -0
package/dist/http/middleware/caller-identity.js +16 -0
package/dist/http/middleware/caller-identity.js.map +1 -0
package/dist/http/middleware/decompress.d.ts +14 -0
package/dist/http/middleware/decompress.d.ts.map +1 -0
package/dist/http/middleware/decompress.js +51 -0
package/dist/http/middleware/decompress.js.map +1 -0
package/dist/http/project-registry.d.ts +54 -0
package/dist/http/project-registry.d.ts.map +1 -0
package/dist/http/project-registry.js +130 -0
package/dist/http/project-registry.js.map +1 -0
package/dist/http/request-observability.d.ts +8 -0
package/dist/http/request-observability.d.ts.map +1 -0
package/dist/http/request-observability.js +20 -0
package/dist/http/request-observability.js.map +1 -0
package/dist/http/request-pipeline.d.ts +16 -0
package/dist/http/request-pipeline.d.ts.map +1 -0
package/dist/http/request-pipeline.js +144 -0
package/dist/http/request-pipeline.js.map +1 -0
package/dist/http/server.d.ts +17 -0
package/dist/http/server.d.ts.map +1 -0
package/dist/http/server.js +300 -0
package/dist/http/server.js.map +1 -0
package/dist/http/types.d.ts +20 -0
package/dist/http/types.d.ts.map +1 -0
package/dist/http/types.js +2 -0
package/dist/http/types.js.map +1 -0
package/dist/skill-install/disabled-state.d.ts +35 -0
package/dist/skill-install/disabled-state.d.ts.map +1 -0
package/dist/skill-install/disabled-state.js +96 -0
package/dist/skill-install/disabled-state.js.map +1 -0
package/dist/skill-install/discover.d.ts +29 -0
package/dist/skill-install/discover.d.ts.map +1 -0
package/dist/skill-install/discover.js +104 -0
package/dist/skill-install/discover.js.map +1 -0
package/dist/skill-install/include-utils.d.ts +27 -0
package/dist/skill-install/include-utils.d.ts.map +1 -0
package/dist/skill-install/include-utils.js +90 -0
package/dist/skill-install/include-utils.js.map +1 -0
package/dist/skill-install/manifest.d.ts +82 -0
package/dist/skill-install/manifest.d.ts.map +1 -0
package/dist/skill-install/manifest.js +215 -0
package/dist/skill-install/manifest.js.map +1 -0
package/dist/skill-install/skill-installer-common.d.ts +26 -0
package/dist/skill-install/skill-installer-common.d.ts.map +1 -0
package/dist/skill-install/skill-installer-common.js +139 -0
package/dist/skill-install/skill-installer-common.js.map +1 -0
package/dist/skill-install/skill-installers/claude-code.d.ts +43 -0
package/dist/skill-install/skill-installers/claude-code.d.ts.map +1 -0
package/dist/skill-install/skill-installers/claude-code.js +65 -0
package/dist/skill-install/skill-installers/claude-code.js.map +1 -0
package/dist/skill-install/skill-installers/codex-cli.d.ts +27 -0
package/dist/skill-install/skill-installers/codex-cli.d.ts.map +1 -0
package/dist/skill-install/skill-installers/codex-cli.js +84 -0
package/dist/skill-install/skill-installers/codex-cli.js.map +1 -0
package/dist/skill-install/skill-installers/cursor.d.ts +72 -0
package/dist/skill-install/skill-installers/cursor.d.ts.map +1 -0
package/dist/skill-install/skill-installers/cursor.js +81 -0
package/dist/skill-install/skill-installers/cursor.js.map +1 -0
package/dist/skill-install/skill-installers/gemini-cli.d.ts +50 -0
package/dist/skill-install/skill-installers/gemini-cli.d.ts.map +1 -0
package/dist/skill-install/skill-installers/gemini-cli.js +72 -0
package/dist/skill-install/skill-installers/gemini-cli.js.map +1 -0
package/dist/skill-install/skill-manifest-sync.d.ts +11 -0
package/dist/skill-install/skill-manifest-sync.d.ts.map +1 -0
package/dist/skill-install/skill-manifest-sync.js +65 -0
package/dist/skill-install/skill-manifest-sync.js.map +1 -0
package/dist/skills/_shared/auth.md +41 -0
package/dist/skills/_shared/error-handling.md +31 -0
package/dist/skills/_shared/polling.md +88 -0
package/dist/skills/_shared/response-shape.md +55 -0
package/dist/skills/_shared/review-policy.md +15 -0
package/dist/skills/mma-audit/SKILL.md +270 -0
package/dist/skills/mma-context-blocks/SKILL.md +148 -0
package/dist/skills/mma-debug/SKILL.md +208 -0
package/dist/skills/mma-delegate/SKILL.md +216 -0
package/dist/skills/mma-execute-plan/SKILL.md +214 -0
package/dist/skills/mma-explore/SKILL.md +190 -0
package/dist/skills/mma-investigate/SKILL.md +258 -0
package/dist/skills/mma-journal-recall/SKILL.md +242 -0
package/dist/skills/mma-journal-record/SKILL.md +202 -0
package/dist/skills/mma-research/SKILL.md +223 -0
package/dist/skills/mma-retry/SKILL.md +221 -0
package/dist/skills/mma-review/SKILL.md +209 -0
package/dist/skills/multi-model-agent/SKILL.md +206 -0
package/dist/telemetry/consent.d.ts +4 -0
package/dist/telemetry/consent.d.ts.map +1 -0
package/dist/telemetry/consent.js +40 -0
package/dist/telemetry/consent.js.map +1 -0
package/dist/telemetry/flusher.d.ts +19 -0
package/dist/telemetry/flusher.d.ts.map +1 -0
package/dist/telemetry/flusher.js +277 -0
package/dist/telemetry/flusher.js.map +1 -0
package/dist/telemetry/generation.d.ts +9 -0
package/dist/telemetry/generation.d.ts.map +1 -0
package/dist/telemetry/generation.js +33 -0
package/dist/telemetry/generation.js.map +1 -0
package/dist/telemetry/identity.d.ts +9 -0
package/dist/telemetry/identity.d.ts.map +1 -0
package/dist/telemetry/identity.js +35 -0
package/dist/telemetry/identity.js.map +1 -0
package/dist/telemetry/install-id.d.ts +13 -0
package/dist/telemetry/install-id.d.ts.map +1 -0
package/dist/telemetry/install-id.js +49 -0
package/dist/telemetry/install-id.js.map +1 -0
package/dist/telemetry/install-meta.d.ts +10 -0
package/dist/telemetry/install-meta.d.ts.map +1 -0
package/dist/telemetry/install-meta.js +15 -0
package/dist/telemetry/install-meta.js.map +1 -0
package/dist/telemetry/queue.d.ts +35 -0
package/dist/telemetry/queue.d.ts.map +1 -0
package/dist/telemetry/queue.js +287 -0
package/dist/telemetry/queue.js.map +1 -0
package/dist/telemetry/recorder.d.ts +39 -0
package/dist/telemetry/recorder.d.ts.map +1 -0
package/dist/telemetry/recorder.js +173 -0
package/dist/telemetry/recorder.js.map +1 -0
package/package.json +43 -24
package/scripts/postinstall.js +36 -0
package/bin/mmagent.mjs +0 -47
package/postinstall.mjs +0 -8

package/dist/skills/mma-debug/SKILL.md ADDED Viewed

@@ -0,0 +1,208 @@
+---
+name: mma-debug
+description: >-
+  Use when a test fails, a build breaks, or behavior is unexpected AND narrowing
+  the root cause requires reading files, reproducing the failure, or tracing
+  across multiple modules — the worker investigates so the main agent stays on
+  the hypothesis
+when_to_use: >-
+  A failure has surfaced (test/build/runtime) AND you need investigation work —
+  read files, reproduce, trace — OR a methodology skill
+  (superpowers:systematic-debugging) points at the investigation step. Delegate
+  the read/reproduce/trace; the main agent stays on the hypothesis and the fix.
+version: 5.0.3
+---
+# mma-debug
+## Overview
+Submit a problem, context, and hypothesis to a worker for focused debugging. Unlike `mma-audit` and `mma-review`, all `filePaths` are investigated TOGETHER in a single task (not parallelized per file) — debugging needs cross-file reasoning.
+**Core principle:** The hypothesis is judgment (your job). Reading files and reproducing the failure is labor (the worker's job). Pass the hypothesis as input; receive structured findings.
+## When to Use
+**Use when:**
+- A test fails / build breaks / runtime behavior is unexpected
+- The root cause likely spans 2+ files
+- You have a hypothesis to test (or want the worker to suggest one)
+- A methodology skill (`superpowers:systematic-debugging`) routed here
+**Don't use when:**
+- The error message points at one file you can read in 30 seconds → just `Read`
+- You don't know what's broken yet → use `mma-investigate` first to map the area
+- You already know the fix → skip debug, dispatch `mma-delegate` with the fix
+## Endpoint
+`POST /debug?cwd=<abs-path>`
+@include _shared/auth.md
+## Request body
+```json
+{
+  "problem": "POST /login returns 500 when password contains special characters",
+  "context": "Regression introduced in commit abc123; only affects production config",
+  "hypothesis": "The bcrypt binding fails on non-ASCII input in the Docker image",
+  "subtype": "default",
+  "filePaths": [
+    "/project/src/auth/login.ts",
+    "/project/src/auth/password.ts"
+  ],
+  "contextBlockIds": []
+}
+```
+| Field | Type | Required | Notes |
+|---|---|---|---|
+| `problem` | string | yes | What is broken (one sentence; concrete symptom) |
+| `context` | string | no | Background — what changed recently, what works, what doesn't |
+| `hypothesis` | string | no | Your initial theory; worker tests it first, then explores |
+| `subtype` | `'default'` | no (defaults to `'default'`) | Reserved for future criteria sets; only `default` is wired today. |
+| `filePaths` | string[] | no | All files investigated together (cross-file reasoning) |
+| `contextBlockIds` | string[] | no | IDs from `mma-context-blocks` (e.g. error logs, traces) |
+> Worker tier for `mma-debug` is hardcoded to `complex` and is not caller-configurable. Sending `agentType` is rejected with HTTP 400.
+## Full example
+```bash
+BATCH=$(curl -f --show-error -s -X POST \
+  -H "X-MMA-Client: $MMA_CLIENT" \
+  -H "X-MMA-Main-Model: $MMA_MAIN_MODEL" \
+  -H "Authorization: Bearer $TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{"problem":"Tests fail on CI only","hypothesis":"Missing env var","filePaths":["/project/src/config.ts"]}' \
+  "http://localhost:$PORT/debug?cwd=/project")
+BATCH_ID=$(echo "$BATCH" | jq -r '.batchId')
+```
+@include _shared/polling.md
+@include _shared/response-shape.md
+## Reading the findings
+The main agent reads `completed` + `message` + `findings` — the findings are the answer. For
+read-only routes, `filesChanged` is always `[]` and `commitSha` is always `null`.
+```json
+{
+  "completed": true,
+  "message": "Investigation complete; 1 finding.",
+  "findings": [
+    { "id": "F1", "severity": "high", "category": "root-cause",
+      "claim": "bcrypt binding fails on non-ASCII input in the Docker image.",
+      "evidence": "Worker reproduced the failure with `pass='café'`; strace shows EINVAL on encode call.",
+      "suggestion": "Normalize input to NFC form before calling bcrypt.",
+      "source": "implementer" }
+  ],
+  "filesChanged": [],
+  "commitSha": null,
+  "summary": "...",
+  "telemetry": { ... }
+}
+```
+### Finding shape
+Every finding has this shape:
+| Field | Type | Notes |
+|---|---|---|
+| `id` | string | Worker-assigned, e.g. `F1`, `F2`. Stable across chain. |
+| `severity` | `'critical' \| 'high' \| 'medium' \| 'low'` | 4-tier. |
+| `category` | string | Topical bucket, e.g. `root-cause`, `reproduction`. |
+| `claim` | string | One-sentence summary. |
+| `evidence` | string ≥20 chars | Verbatim from source when grounded. |
+| `suggestion?` | string | Optional fix recommendation. |
+| `source` | `'implementer' \| 'reviewer'` | Who produced the finding. |
+`annotatorConfidence` and `evidenceGrounded` are retired — they were v4 fields with no producers.
+### Recommended rendering by the main agent
+1. Show ALL findings — never silently drop. Severity and grounding are soft
+   signals, not gates.
+2. Default sort: severity (critical → low), then `id` ascending.
+3. `severity` is the authoritative value — use it directly.
+4. Mark findings with `evidence` shorter than 30 chars as "low-evidence"
+   (lighter color or `(low evidence)` annotation). User decides what to do.
+5. Severity-tier counts feed the dashboard.
+## Best practices
+This skill is one step in the larger flow described in `multi-model-agent` → "Best practices". Recipes that involve `mma-debug`:
+- **Recipe B — Debug-fix-review.** `mma-debug` → `mma-delegate` (apply fix) → `mma-review` with the acceptance criteria in the brief. Strict order. Register the failing test output / reproduction log as a context block before the debug call; reuse it on the review call.
+Anti-pattern alert: **`inline-labor-leakage`** (AP2). If you're about to read 3+ files in main context to "understand the bug," that's the labor we delegate — call `mma-debug` with the hypothesis instead.
+## Common pitfalls
+❌ **Vague `problem`**
+> "The login is broken"
+Worker has no symptom to chase. **Fix:** specific reproducer — `"POST /login with body {user:'a@b.c', pass:'café'} returns 500 with 'invalid character' in stderr"`.
+❌ **No `hypothesis`**
+The worker explores blindly, often investigates the wrong area first. **Fix:** even a weak hypothesis ("might be encoding-related") narrows the search space.
+❌ **Splitting one bug across multiple `mma-debug` calls**
+Debug intentionally bundles `filePaths` for cross-file reasoning. Splitting defeats this. **Fix:** one call with all suspect files; if you really have N independent failures, use `mma-delegate` with N tasks.
+❌ **Treating `mma-debug` as the fix step**
+Debug investigates and proposes; it doesn't necessarily write the fix. **Fix:** if the worker identifies a fix, dispatch `mma-delegate` to implement it (or write it inline if you understand it).
+❌ **Skipping when an error message looks self-explanatory**
+Often the obvious cause isn't the real one. **Fix:** a 30-second debug pass costs less than a wrong fix that breaks something else.
+## Terminal context block
+Every completed **read-route** task (audit / review / debug / investigate / research) auto-registers a reusable terminal context block containing its report (headline + findings). The block id is returned on each per-task result as **`contextBlockId`**. Write routes (delegate / execute-plan / retry) return `contextBlockId: null` — their record is the commit, not a block. This block is immutable, lives for the session duration, and counts against the project's `maxEntries` quota (default 500).
+Use it for delta follow-ups — feed prior results' block ids into a later call's `contextBlockIds`, filtering out nulls:
+    contextBlockIds: priorResults.map(r => r.contextBlockId).filter((id) => id !== null)
+**Use cases:**
+- Pass debug findings to a downstream `mma-delegate` fix step
+- Feed the root-cause analysis into a follow-up `mma-review` with acceptance criteria in the brief
+- Carry debug context forward through the debug → fix → review chain
+The block is registered server-side at task completion; no caller action is needed to create it. Delete it explicitly via `DELETE /context-blocks/:id` when no longer needed, or let it expire on session teardown.
+## Outcome semantics
+Every task result carries outcome fields that describe the debugging investigation's conclusion status:
+| Field | Type | Meaning |
+|---|---|---|
+| `findingsOutcome` | `'found' \| 'clean' \| 'not_applicable'` | Answers the question: did the investigation identify a root cause? |
+| `findingsOutcomeReason` | `string \| null` | When `findingsOutcome` is set, this explains why (e.g. "Root cause identified with high confidence: bcrypt binding fails on non-ASCII input" or "No evidence supports the hypothesis; root cause remains unknown"). |
+| `outcomeInferred` | `boolean` | `true` if the system inferred the outcome from findings count; `false` if the investigator explicitly stated it. |
+| `outcomeMalformed` | `boolean` | `true` if the outcome line was malformed and had to be repaired; `false` otherwise. |
+### Enum values
+- **`found`** — the investigation identified one or more root-cause hypotheses (findings) with supporting evidence. This indicates the problem has a diagnosed cause.
+- **`clean`** — the investigation completed but found zero root causes. This is rare for debug and indicates the failure remains unexplained despite thorough investigation.
+- **`not_applicable`** — the investigation could not proceed (e.g., inability to reproduce the failure, missing context, or out of scope). This is the "unable to diagnose" state.
+### Empty findings ≠ failure
+A crucial semantic: **empty findings does NOT mean `completed: false` or a failed debug session.** An investigation that proceeds thoroughly and produces zero root-cause candidates is a valid `completed: true` outcome; it means "I looked hard and found nothing." For debug, this often surfaces a `not_applicable` outcome instead (root cause is elsewhere), but zero findings is still a success.
+### Per-route legal outcomes
+The legal outcomes for this route are: `['found', 'not_applicable']`
+- **`found`** — one or more root-cause hypotheses were identified across the investigation criteria.
+- **`not_applicable`** — the failure could not be diagnosed (reproduction failed, wrong area, or scope issue).
+The outcome `clean` (zero findings + success) is not legal for `mma-debug` because a debug session always either identifies a root cause or cannot proceed.
+@include _shared/error-handling.md

package/dist/skills/mma-delegate/SKILL.md ADDED Viewed

@@ -0,0 +1,216 @@
+---
+name: mma-delegate
+description: >-
+  Use when you have one or more ad-hoc implementation or research tasks WITHOUT
+  a plan file on disk and you want them to run on cheap workers in parallel
+  instead of consuming main-context tokens
+when_to_use: >-
+  You have ad-hoc implementation or research tasks (no plan file on disk) AND
+  mmagent is running. Prefer this over inline Agent dispatches or
+  superpowers:dispatching-parallel-agents — workers are cheaper, parallel-safe,
+  and keep main context free. If a plan file exists → use mma-execute-plan. If
+  the task is audit / review / verify / debug / investigate → use the matching
+  specialized skill.
+version: 5.0.3
+---
+# mma-delegate
+## Overview
+Dispatch one or more ad-hoc tasks to workers concurrently. Each task is an independent instruction with optional file scope, acceptance criteria, and context blocks.
+**Core principle:** Workers run on cheap providers; the main agent consumes only the structured per-task report. Parallelize freely as long as tasks don't write the same files.
+## When to Use
+**Use when:**
+- 2+ unrelated implementation tasks (parallel speedup)
+- A research task you'd otherwise spend tokens reading and grepping
+- A focused refactor that fits in one prompt
+- The task does NOT match audit / review / verify / debug / investigate (those have specialized skills)
+**Don't use when:**
+- A plan file exists on disk → `mma-execute-plan` (descriptors auto-match plan headings)
+- Two tasks write the same file → dispatch sequentially, not in one batch (workers race)
+- The work needs to read across many files for synthesis only → `mma-investigate` is cheaper (read-only)
+## Endpoint
+`POST /delegate?cwd=<abs-path>`
+@include _shared/auth.md
+## Request body
+```json
+{
+  "tasks": [
+    {
+      "prompt": "Add input validation to the login handler",
+      "agentType": "standard",
+      "filePaths": ["/project/src/auth/login.ts"],
+      "done": "All inputs validated; unit tests pass",
+      "contextBlockIds": ["cb_abc123"]
+    }
+  ]
+}
+```
+| Field | Type | Required | Notes |
+|---|---|---|---|
+| `tasks` | array | yes | At least one task |
+| `tasks[].prompt` | string | yes | The task instruction |
+| `tasks[].agentType` | `"standard"` / `"complex"` | no | Worker tier. Default `"standard"`. Pick `"complex"` when the task is ambiguous, security-sensitive, touches many files, or a prior standard run came back with `filesWritten: 0` / hit `incompleteReason: "turn_cap"`. |
+| `tasks[].filePaths` | string[] | no | Files the worker focuses on |
+| `tasks[].done` | string | no | Acceptance criteria |
+| `tasks[].contextBlockIds` | string[] | no | IDs from `mma-context-blocks` |
+| `tasks[].reviewPolicy` | `"full"` / `"quality_only"` / `"diff_only"` / `"none"` | no | See review-policy snippet below. Default `"full"` |
+@include _shared/review-policy.md
+## Full example
+```bash
+BATCH=$(curl -f --show-error -s -X POST \
+  -H "X-MMA-Client: $MMA_CLIENT" \
+  -H "X-MMA-Main-Model: $MMA_MAIN_MODEL" \
+  -H "Authorization: Bearer $TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{"tasks":[{"prompt":"Refactor utils.ts to remove dead code","filePaths":["/project/src/utils.ts"]}]}' \
+  "http://localhost:$PORT/delegate?cwd=/project")
+BATCH_ID=$(echo "$BATCH" | jq -r '.batchId')
+```
+@include _shared/polling.md
+## Response shapes
+### POST /delegate?cwd=<abs> — dispatch response (202)
+```json
+{ "batchId": "<uuid>", "statusUrl": "/batch/<uuid>" }
+```
+Use `batchId` to poll. `statusUrl` is a convenience pointer.
+### GET /batch/:id — polling response
+The HTTP status is the state discriminator:
+| Status | Meaning |
+|---|---|
+| `202 text/plain` | Still pending — body is the running headline string |
+| `200 application/json` | Terminal — body is the batch envelope below |
+| `404` / `401` / `5xx` | Error — see Error response below; stop polling |
+### GET /batch/:id?taskIndex=N — single task slice
+Same envelope. `results` contains exactly the task at index `N`. Returns `404 unknown_task_index` if `N` is out of range.
+### Reading the task result
+Each task result is the per-task wire object (`ComposePayload`):
+```json
+{
+  "completed": true,
+  "message": "Task completed; tests passed; one file changed.",
+  "findings": [
+    {
+      "id": "F1",
+      "severity": "high",
+      "category": "correctness",
+      "claim": "The function does not handle empty input",
+      "evidence": "function foo() { ... } // no null check",
+      "suggestion": "Add an explicit null guard at the top",
+      "source": "reviewer"
+    }
+  ],
+  "summary": "Refactored utils.ts — removed 3 dead branches, added JSDoc",
+  "filesChanged": ["/project/src/utils.ts"],
+  "commitSha": "abc123def",
+  "blockId": null,
+  "telemetry": {
+    "totalDurationMs": 12400,
+    "totalCostUSD": 0.08,
+    "workerSelfAssessment": "done",
+    "reviewVerdict": "approved",
+    "commitOutcome": "committed",
+    "stopReason": "normal",
+    "haltedStage": null,
+    "stages": [
+      { "name": "prepare",        "outcome": "advance", "durationMs": 2,    "costUSD": 0 },
+      { "name": "register-block", "outcome": "skip",    "comment": "register-block does not apply to route=delegate", "durationMs": 0, "costUSD": 0 },
+      { "name": "implement",      "outcome": "advance", "durationMs": 8900, "costUSD": 0.05 },
+      { "name": "review",         "outcome": "advance", "durationMs": 2100, "costUSD": 0.02 },
+      { "name": "rework",         "outcome": "skip",    "comment": "rework skipped because review approved", "durationMs": 0, "costUSD": 0 },
+      { "name": "commit",         "outcome": "advance", "durationMs": 340,  "costUSD": 0 },
+      { "name": "annotate",       "outcome": "advance", "durationMs": 890,  "costUSD": 0.01 },
+      { "name": "compose",        "outcome": "advance", "durationMs": 68,   "costUSD": 0 },
+      { "name": "terminal",       "outcome": "advance", "durationMs": 100,  "costUSD": 0 }
+    ]
+  }
+}
+```
+**Top-level fields to read for the main-agent verdict:**
+| Field | When `true` / populated |
+|---|---|
+| `completed: true` | Task succeeded. `message` is the summary; `findings` are post-review issues (if any). |
+| `completed: false` | Task did not complete. `message` names the blocking gate or finding; `findings` carry any discovered issues. |
+| `findings` | Issues surfaced by the worker or reviewer. `severity` = `critical` \| `high` \| `medium` \| `low`. `source` = `implementer` \| `reviewer`. |
+| `filesChanged` | File paths modified (empty for read-only routes). |
+| `commitSha` | Git SHA of the committed diff; `null` for read-only routes or when commit was skipped. |
+`blockId` is not used for the delegate route — it is always `null`, as is `contextBlockId` (write routes register no terminal block). To carry inputs forward, register them explicitly via `mma-context-blocks` and pass `contextBlockIds`.
+**The stages array** (always 9 rows) is the canonical telemetry log. `outcome` is one of:
+- `advance` — stage ran and produced its payload
+- `skip` — stage did not run; `comment` explains why
+- `halt` — stage stopped the chain; `comment` is the failure message
+- `not_run` — stage was not reached because a prior stage halted
+Use `telemetry.haltedStage` to find the first halt; `telemetry.stopReason` to find why.
+### Error response (4xx / 5xx)
+```json
+{
+  "error": "<code>",
+  "message": "<human-readable>",
+  "details": { /* optional structured context, e.g. fieldErrors for 400 */ }
+}
+```
+`details` is optional and present only when the server has structured additional context.
+## Best practices
+This skill is one step in the larger flow described in `multi-model-agent` → "Best practices". Recipes that involve `mma-delegate`:
+- **Recipe A (the fix step).** Between audit rounds, `mma-delegate` applies the fix when the change is more than 1-2 lines. Register the spec/audit findings as a context block; pass via `contextBlockIds`.
+- **Recipe B (the apply-fix step).** After `mma-debug` returns a hypothesis, `mma-delegate` applies the fix. Same context block carries forward to a follow-up `mma-review` if you want acceptance-criteria checking.
+Anti-pattern alert: **`inline-labor-leakage`** (AP2). If you're reading 3+ files or grepping in main context before dispatching, you're paying flagship-model tokens for labor. Pass the file paths to `mma-delegate` and let the worker read.
+## Common pitfalls
+❌ **Two tasks writing the same file in one batch**
+> tasks: [{prompt:"add JWT to login.ts"}, {prompt:"add logging to login.ts"}]
+Workers run concurrently and race on the file. **Fix:** dispatch sequentially, or merge into one prompt.
+❌ **Two tasks writing the same file in one batch**
+N tasks × 50KB = N transmissions. **Fix:** register the doc once via `mma-context-blocks`, pass the `contextBlockIds` to each task.
+❌ **Reading the worker's diff inline before review**
+The reviewer sees the full diff with the original prompt as context. Reading inline burns main-context tokens for no quality gain.
+## Terminal context block
+Write-route tasks (delegate / execute-plan / retry) do NOT register a terminal context block — their durable record is the commit (`commitSha` + changed files). The per-task result's `contextBlockId` is always `null` for these routes. Read routes (audit / review / debug / investigate / research) return a non-null `contextBlockId`; see those skills for the delta-follow-up recipe.
+@include _shared/error-handling.md

package/dist/skills/mma-execute-plan/SKILL.md ADDED Viewed

@@ -0,0 +1,214 @@
+---
+name: mma-execute-plan
+description: >-
+  Use when a plan or spec file exists on disk (any markdown with numbered task
+  headings — docs/superpowers/plans/*.md, a TODO list, a spec doc) and you need
+  to implement one or more tasks from it on cheap workers in parallel
+when_to_use: >-
+  A plan file exists on disk AND you need to implement one or more tasks from it
+  AND mmagent is running. Prefer this over inline Agent dispatches or
+  superpowers:subagent-driven-development / superpowers:executing-plans —
+  workers are cheaper and don't pollute main context. Task descriptors must
+  match plan headings verbatim.
+version: 5.0.3
+---
+# mma-execute-plan
+## Overview
+Dispatch named tasks from a plan file to workers. Each `taskDescriptors` string must match a heading in the plan verbatim (e.g. `"1. Setup database schema"`). All tasks run in parallel; duplicate descriptors are rejected.
+**Core principle:** The plan IS the prompt. Workers re-read the plan file in-process and find their named task — you don't need to inline the task body.
+## When to Use
+**Use when:**
+- A plan/spec markdown exists with numbered task headings
+- You want to dispatch a subset (or all) of those tasks
+- Tasks are mostly independent (parallel-safe)
+**Don't use when:**
+- No plan file → `mma-delegate` (pass the prompt directly)
+- Tasks form a hard linear sequence (later tasks depend on earlier ones' outputs) → dispatch in order, one batch each
+- The "plan" is in conversation only, not on disk → write it to disk first, or use `mma-delegate`
+## Endpoint
+`POST /execute-plan?cwd=<abs-path>`
+@include _shared/auth.md
+## Request body
+```json
+{
+  "taskDescriptors": [
+    "1. Add input validation to login handler",
+    "2. Write unit tests for the auth module"
+  ],
+  "filePaths": [
+    "/project/docs/plan.md"
+  ],
+  "contextBlockIds": []
+}
+```
+| Field | Type | Required | Notes |
+|---|---|---|---|
+| `taskDescriptors` | string[] | yes | At least one; must be unique; each string matches a plan heading verbatim |
+| `filePaths` | string[] | yes | EXACTLY one entry: the plan markdown file. Source files belong in `contextBlockIds` (registered via `mma-context-blocks`) so workers can grep them on demand without re-inlining into every worker prompt |
+| `contextBlockIds` | string[] | no | IDs from `mma-context-blocks` — the right place for source files referenced by the plan |
+| `perTaskReviewPolicy` | `Record<string, 'full'\|'quality_only'\|'diff_only'\|'none'>` | no | Per-task-index review policy override. Key = task index as string (`"0"`, `"1"`, ...). Default per task: `"full"` |
+| `cwd` | string | no | Override the `?cwd=` query param value at the body level (rare; usually pass via query) |
+@include _shared/review-policy.md
+> **No `agentType` here.** Worker tier is hardcoded to `standard` for every plan task; sending `agentType` (top-level or per-task) is rejected with HTTP 400. For tasks that need `complex` tier, dispatch via `mma-delegate` with the plan task as the prompt and `agentType: "complex"`.
+## Full example
+```bash
+BATCH=$(curl -f --show-error -s -X POST \
+  -H "X-MMA-Client: $MMA_CLIENT" \
+  -H "X-MMA-Main-Model: $MMA_MAIN_MODEL" \
+  -H "Authorization: Bearer $TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{"taskDescriptors":["3. Migrate database schema"],"filePaths":["/project/docs/plan.md"]}' \
+  "http://localhost:$PORT/execute-plan?cwd=/project")
+BATCH_ID=$(echo "$BATCH" | jq -r '.batchId')
+```
+@include _shared/polling.md
+## Response shapes
+### POST /execute-plan?cwd=<abs> — dispatch response (202)
+```json
+{ "batchId": "<uuid>", "statusUrl": "/batch/<uuid>" }
+```
+Use `batchId` to poll. `statusUrl` is a convenience pointer.
+### GET /batch/:id — polling response
+The HTTP status is the state discriminator:
+| Status | Meaning |
+|---|---|
+| `202 text/plain` | Still pending — body is the running headline string |
+| `200 application/json` | Terminal — body is the batch envelope below |
+| `404` / `401` / `5xx` | Error — see Error response below; stop polling |
+### GET /batch/:id?taskIndex=N — single task slice
+Same envelope. `results` contains exactly the task at index `N`. Returns `404 unknown_task_index` if `N` is out of range.
+### Reading the task result
+Each task result is the per-task wire object (`ComposePayload`):
+```json
+{
+  "completed": true,
+  "message": "Task completed; tests passed; one file changed.",
+  "findings": [
+    {
+      "id": "F1",
+      "severity": "high",
+      "category": "correctness",
+      "claim": "The function does not handle empty input",
+      "evidence": "function foo() { ... } // no null check",
+      "suggestion": "Add an explicit null guard at the top",
+      "source": "reviewer"
+    }
+  ],
+  "summary": "Refactored utils.ts — removed 3 dead branches, added JSDoc",
+  "filesChanged": ["/project/src/utils.ts"],
+  "commitSha": "abc123def",
+  "blockId": null,
+  "telemetry": {
+    "totalDurationMs": 12400,
+    "totalCostUSD": 0.08,
+    "workerSelfAssessment": "done",
+    "reviewVerdict": "approved",
+    "commitOutcome": "committed",
+    "stopReason": "normal",
+    "haltedStage": null,
+    "stages": [
+      { "name": "prepare",        "outcome": "advance", "durationMs": 2,    "costUSD": 0 },
+      { "name": "register-block", "outcome": "skip",    "comment": "register-block does not apply to route=execute-plan", "durationMs": 0, "costUSD": 0 },
+      { "name": "implement",      "outcome": "advance", "durationMs": 8900, "costUSD": 0.05 },
+      { "name": "review",         "outcome": "advance", "durationMs": 2100, "costUSD": 0.02 },
+      { "name": "rework",         "outcome": "skip",    "comment": "rework skipped because review approved", "durationMs": 0, "costUSD": 0 },
+      { "name": "commit",         "outcome": "advance", "durationMs": 340,  "costUSD": 0 },
+      { "name": "annotate",       "outcome": "advance", "durationMs": 890,  "costUSD": 0.01 },
+      { "name": "compose",        "outcome": "advance", "durationMs": 68,   "costUSD": 0 },
+      { "name": "terminal",       "outcome": "advance", "durationMs": 100,  "costUSD": 0 }
+    ]
+  }
+}
+```
+**Top-level fields to read for the main-agent verdict:**
+| Field | When `true` / populated |
+|---|---|
+| `completed: true` | Task succeeded. `message` is the summary; `findings` are post-review issues (if any). |
+| `completed: false` | Task did not complete. `message` names the blocking gate or finding; `findings` carry any discovered issues. |
+| `findings` | Issues surfaced by the worker or reviewer. `severity` = `critical` \| `high` \| `medium` \| `low`. `source` = `implementer` \| `reviewer`. |
+| `filesChanged` | File paths modified (empty for read-only routes). |
+| `commitSha` | Git SHA of the committed diff; `null` for read-only routes or when commit was skipped. |
+| `blockId` | Always `null` (execute-plan is a write route; `contextBlockId` is `null` too — no terminal block). |
+**The stages array** (always 9 rows) is the canonical telemetry log. `outcome` is one of:
+- `advance` — stage ran and produced its payload
+- `skip` — stage did not run; `comment` explains why
+- `halt` — stage stopped the chain; `comment` is the failure message
+- `not_run` — stage was not reached because a prior stage halted
+Use `telemetry.haltedStage` to find the first halt; `telemetry.stopReason` to find why.
+### Error response (4xx / 5xx)
+```json
+{
+  "error": "<code>",
+  "message": "<human-readable>",
+  "details": { /* optional structured context, e.g. fieldErrors for 400 */ }
+}
+```
+`details` is optional and present only when the server has structured additional context.
+## Best practices
+This skill is one step in the larger flow described in `multi-model-agent` → "Best practices". Recipes that involve `mma-execute-plan`:
+- **Recipe C — Investigate-plan-execute.** `mma-investigate` → write the plan → `mma-execute-plan` → `mma-retry` on failed indices. Register the plan file as a context block before the execute-plan call so it isn't re-inlined into every worker's prompt; retry inherits the same configuration.
+- **Recipe D — Plan-execute-retry (entry point).** `mma-execute-plan` is the producer of the `batchId` that `mma-retry` consumes. When this batch returns mixed `done` / `failed`, the next call is `mma-retry` with failed indices, NOT a re-dispatch.
+Anti-pattern alert: **`full-batch-redispatch`** (AP4). When the batch returns mixed `done` / `failed`, do NOT re-run the whole task list — use `mma-retry` with the failed indices only. Re-running the whole list re-charges every successful task.
+## Common pitfalls
+❌ **Task descriptor doesn't match plan heading verbatim**
+> taskDescriptors: ["Migrate db schema"]    ← plan heading is "3. Migrate database schema"
+Worker rejects with "no matching task" or matches the wrong one. **Fix:** copy the heading from the plan, including the leading number.
+❌ **Forgetting the plan file in `filePaths`**
+> filePaths: ["/project/src/db/schema.sql"]    ← no plan file
+Worker can't read the task body. **Fix:** always include the plan path: `filePaths: ["/project/docs/plan.md", "/project/src/db/schema.sql"]`.
+❌ **Dispatching dependent tasks in one batch**
+Task 5 depends on Task 4's output → workers race; Task 5 might run before Task 4 finishes. **Fix:** dispatch Task 4, wait for terminal, then dispatch Task 5.
+## Terminal context block
+Write-route tasks (delegate / execute-plan / retry) do NOT register a terminal context block — their durable record is the commit (`commitSha` + changed files). The per-task result's `contextBlockId` is always `null` for these routes. Read routes (audit / review / debug / investigate / research) return a non-null `contextBlockId`; see those skills for the delta-follow-up recipe.
+@include _shared/error-handling.md