@zhixuan92/multi-model-agent 5.0.2 → 5.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -18
- package/dist/cli/index.d.ts +62 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +345 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/info.d.ts +22 -0
- package/dist/cli/info.d.ts.map +1 -0
- package/dist/cli/info.js +100 -0
- package/dist/cli/info.js.map +1 -0
- package/dist/cli/logs.d.ts +15 -0
- package/dist/cli/logs.d.ts.map +1 -0
- package/dist/cli/logs.js +102 -0
- package/dist/cli/logs.js.map +1 -0
- package/dist/cli/print-token.d.ts +18 -0
- package/dist/cli/print-token.d.ts.map +1 -0
- package/dist/cli/print-token.js +60 -0
- package/dist/cli/print-token.js.map +1 -0
- package/dist/cli/serve.d.ts +28 -0
- package/dist/cli/serve.d.ts.map +1 -0
- package/dist/cli/serve.js +405 -0
- package/dist/cli/serve.js.map +1 -0
- package/dist/cli/status.d.ts +49 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +155 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/cli/sync-skills.d.ts +58 -0
- package/dist/cli/sync-skills.d.ts.map +1 -0
- package/dist/cli/sync-skills.js +266 -0
- package/dist/cli/sync-skills.js.map +1 -0
- package/dist/cli/telemetry.d.ts +10 -0
- package/dist/cli/telemetry.d.ts.map +1 -0
- package/dist/cli/telemetry.js +161 -0
- package/dist/cli/telemetry.js.map +1 -0
- package/dist/cli/toggle.d.ts +26 -0
- package/dist/cli/toggle.d.ts.map +1 -0
- package/dist/cli/toggle.js +185 -0
- package/dist/cli/toggle.js.map +1 -0
- package/dist/http/async-dispatch.d.ts +44 -0
- package/dist/http/async-dispatch.d.ts.map +1 -0
- package/dist/http/async-dispatch.js +175 -0
- package/dist/http/async-dispatch.js.map +1 -0
- package/dist/http/auth.d.ts +20 -0
- package/dist/http/auth.d.ts.map +1 -0
- package/dist/http/auth.js +56 -0
- package/dist/http/auth.js.map +1 -0
- package/dist/http/canonicalize-file-paths.d.ts +8 -0
- package/dist/http/canonicalize-file-paths.d.ts.map +1 -0
- package/dist/http/canonicalize-file-paths.js +43 -0
- package/dist/http/canonicalize-file-paths.js.map +1 -0
- package/dist/http/cwd-validator.d.ts +11 -0
- package/dist/http/cwd-validator.d.ts.map +1 -0
- package/dist/http/cwd-validator.js +130 -0
- package/dist/http/cwd-validator.js.map +1 -0
- package/dist/http/errors.d.ts +4 -0
- package/dist/http/errors.d.ts.map +1 -0
- package/dist/http/errors.js +9 -0
- package/dist/http/errors.js.map +1 -0
- package/dist/http/execution-context.d.ts +18 -0
- package/dist/http/execution-context.d.ts.map +1 -0
- package/dist/http/execution-context.js +61 -0
- package/dist/http/execution-context.js.map +1 -0
- package/dist/http/handler-deps.d.ts +19 -0
- package/dist/http/handler-deps.d.ts.map +1 -0
- package/dist/http/handler-deps.js +2 -0
- package/dist/http/handler-deps.js.map +1 -0
- package/dist/http/handlers/control/batch-slice.d.ts +4 -0
- package/dist/http/handlers/control/batch-slice.d.ts.map +1 -0
- package/dist/http/handlers/control/batch-slice.js +40 -0
- package/dist/http/handlers/control/batch-slice.js.map +1 -0
- package/dist/http/handlers/control/batch.d.ts +23 -0
- package/dist/http/handlers/control/batch.d.ts.map +1 -0
- package/dist/http/handlers/control/batch.js +332 -0
- package/dist/http/handlers/control/batch.js.map +1 -0
- package/dist/http/handlers/control/context-blocks.d.ts +22 -0
- package/dist/http/handlers/control/context-blocks.d.ts.map +1 -0
- package/dist/http/handlers/control/context-blocks.js +111 -0
- package/dist/http/handlers/control/context-blocks.js.map +1 -0
- package/dist/http/handlers/introspection/health.d.ts +20 -0
- package/dist/http/handlers/introspection/health.d.ts.map +1 -0
- package/dist/http/handlers/introspection/health.js +18 -0
- package/dist/http/handlers/introspection/health.js.map +1 -0
- package/dist/http/handlers/introspection/status.d.ts +26 -0
- package/dist/http/handlers/introspection/status.d.ts.map +1 -0
- package/dist/http/handlers/introspection/status.js +136 -0
- package/dist/http/handlers/introspection/status.js.map +1 -0
- package/dist/http/handlers/tools/audit.d.ts +4 -0
- package/dist/http/handlers/tools/audit.d.ts.map +1 -0
- package/dist/http/handlers/tools/audit.js +43 -0
- package/dist/http/handlers/tools/audit.js.map +1 -0
- package/dist/http/handlers/tools/debug.d.ts +4 -0
- package/dist/http/handlers/tools/debug.d.ts.map +1 -0
- package/dist/http/handlers/tools/debug.js +43 -0
- package/dist/http/handlers/tools/debug.js.map +1 -0
- package/dist/http/handlers/tools/delegate.d.ts +4 -0
- package/dist/http/handlers/tools/delegate.d.ts.map +1 -0
- package/dist/http/handlers/tools/delegate.js +43 -0
- package/dist/http/handlers/tools/delegate.js.map +1 -0
- package/dist/http/handlers/tools/execute-plan.d.ts +4 -0
- package/dist/http/handlers/tools/execute-plan.d.ts.map +1 -0
- package/dist/http/handlers/tools/execute-plan.js +45 -0
- package/dist/http/handlers/tools/execute-plan.js.map +1 -0
- package/dist/http/handlers/tools/investigate.d.ts +4 -0
- package/dist/http/handlers/tools/investigate.d.ts.map +1 -0
- package/dist/http/handlers/tools/investigate.js +64 -0
- package/dist/http/handlers/tools/investigate.js.map +1 -0
- package/dist/http/handlers/tools/journal-recall.d.ts +4 -0
- package/dist/http/handlers/tools/journal-recall.d.ts.map +1 -0
- package/dist/http/handlers/tools/journal-recall.js +40 -0
- package/dist/http/handlers/tools/journal-recall.js.map +1 -0
- package/dist/http/handlers/tools/journal-record.d.ts +8 -0
- package/dist/http/handlers/tools/journal-record.d.ts.map +1 -0
- package/dist/http/handlers/tools/journal-record.js +40 -0
- package/dist/http/handlers/tools/journal-record.js.map +1 -0
- package/dist/http/handlers/tools/research.d.ts +4 -0
- package/dist/http/handlers/tools/research.d.ts.map +1 -0
- package/dist/http/handlers/tools/research.js +64 -0
- package/dist/http/handlers/tools/research.js.map +1 -0
- package/dist/http/handlers/tools/retry.d.ts +4 -0
- package/dist/http/handlers/tools/retry.d.ts.map +1 -0
- package/dist/http/handlers/tools/retry.js +73 -0
- package/dist/http/handlers/tools/retry.js.map +1 -0
- package/dist/http/handlers/tools/review.d.ts +4 -0
- package/dist/http/handlers/tools/review.d.ts.map +1 -0
- package/dist/http/handlers/tools/review.js +43 -0
- package/dist/http/handlers/tools/review.js.map +1 -0
- package/dist/http/journal-lock.d.ts +4 -0
- package/dist/http/journal-lock.d.ts.map +1 -0
- package/dist/http/journal-lock.js +34 -0
- package/dist/http/journal-lock.js.map +1 -0
- package/dist/http/middleware/body-reader.d.ts +16 -0
- package/dist/http/middleware/body-reader.d.ts.map +1 -0
- package/dist/http/middleware/body-reader.js +44 -0
- package/dist/http/middleware/body-reader.js.map +1 -0
- package/dist/http/middleware/caller-identity.d.ts +16 -0
- package/dist/http/middleware/caller-identity.d.ts.map +1 -0
- package/dist/http/middleware/caller-identity.js +16 -0
- package/dist/http/middleware/caller-identity.js.map +1 -0
- package/dist/http/middleware/decompress.d.ts +14 -0
- package/dist/http/middleware/decompress.d.ts.map +1 -0
- package/dist/http/middleware/decompress.js +51 -0
- package/dist/http/middleware/decompress.js.map +1 -0
- package/dist/http/project-registry.d.ts +54 -0
- package/dist/http/project-registry.d.ts.map +1 -0
- package/dist/http/project-registry.js +130 -0
- package/dist/http/project-registry.js.map +1 -0
- package/dist/http/request-observability.d.ts +8 -0
- package/dist/http/request-observability.d.ts.map +1 -0
- package/dist/http/request-observability.js +20 -0
- package/dist/http/request-observability.js.map +1 -0
- package/dist/http/request-pipeline.d.ts +16 -0
- package/dist/http/request-pipeline.d.ts.map +1 -0
- package/dist/http/request-pipeline.js +144 -0
- package/dist/http/request-pipeline.js.map +1 -0
- package/dist/http/server.d.ts +17 -0
- package/dist/http/server.d.ts.map +1 -0
- package/dist/http/server.js +300 -0
- package/dist/http/server.js.map +1 -0
- package/dist/http/types.d.ts +20 -0
- package/dist/http/types.d.ts.map +1 -0
- package/dist/http/types.js +2 -0
- package/dist/http/types.js.map +1 -0
- package/dist/skill-install/disabled-state.d.ts +35 -0
- package/dist/skill-install/disabled-state.d.ts.map +1 -0
- package/dist/skill-install/disabled-state.js +96 -0
- package/dist/skill-install/disabled-state.js.map +1 -0
- package/dist/skill-install/discover.d.ts +29 -0
- package/dist/skill-install/discover.d.ts.map +1 -0
- package/dist/skill-install/discover.js +104 -0
- package/dist/skill-install/discover.js.map +1 -0
- package/dist/skill-install/include-utils.d.ts +27 -0
- package/dist/skill-install/include-utils.d.ts.map +1 -0
- package/dist/skill-install/include-utils.js +90 -0
- package/dist/skill-install/include-utils.js.map +1 -0
- package/dist/skill-install/manifest.d.ts +82 -0
- package/dist/skill-install/manifest.d.ts.map +1 -0
- package/dist/skill-install/manifest.js +215 -0
- package/dist/skill-install/manifest.js.map +1 -0
- package/dist/skill-install/skill-installer-common.d.ts +26 -0
- package/dist/skill-install/skill-installer-common.d.ts.map +1 -0
- package/dist/skill-install/skill-installer-common.js +139 -0
- package/dist/skill-install/skill-installer-common.js.map +1 -0
- package/dist/skill-install/skill-installers/claude-code.d.ts +43 -0
- package/dist/skill-install/skill-installers/claude-code.d.ts.map +1 -0
- package/dist/skill-install/skill-installers/claude-code.js +65 -0
- package/dist/skill-install/skill-installers/claude-code.js.map +1 -0
- package/dist/skill-install/skill-installers/codex-cli.d.ts +27 -0
- package/dist/skill-install/skill-installers/codex-cli.d.ts.map +1 -0
- package/dist/skill-install/skill-installers/codex-cli.js +84 -0
- package/dist/skill-install/skill-installers/codex-cli.js.map +1 -0
- package/dist/skill-install/skill-installers/cursor.d.ts +72 -0
- package/dist/skill-install/skill-installers/cursor.d.ts.map +1 -0
- package/dist/skill-install/skill-installers/cursor.js +81 -0
- package/dist/skill-install/skill-installers/cursor.js.map +1 -0
- package/dist/skill-install/skill-installers/gemini-cli.d.ts +50 -0
- package/dist/skill-install/skill-installers/gemini-cli.d.ts.map +1 -0
- package/dist/skill-install/skill-installers/gemini-cli.js +72 -0
- package/dist/skill-install/skill-installers/gemini-cli.js.map +1 -0
- package/dist/skill-install/skill-manifest-sync.d.ts +11 -0
- package/dist/skill-install/skill-manifest-sync.d.ts.map +1 -0
- package/dist/skill-install/skill-manifest-sync.js +65 -0
- package/dist/skill-install/skill-manifest-sync.js.map +1 -0
- package/dist/skills/_shared/auth.md +41 -0
- package/dist/skills/_shared/error-handling.md +31 -0
- package/dist/skills/_shared/polling.md +88 -0
- package/dist/skills/_shared/response-shape.md +55 -0
- package/dist/skills/_shared/review-policy.md +15 -0
- package/dist/skills/mma-audit/SKILL.md +270 -0
- package/dist/skills/mma-context-blocks/SKILL.md +148 -0
- package/dist/skills/mma-debug/SKILL.md +208 -0
- package/dist/skills/mma-delegate/SKILL.md +216 -0
- package/dist/skills/mma-execute-plan/SKILL.md +214 -0
- package/dist/skills/mma-explore/SKILL.md +190 -0
- package/dist/skills/mma-investigate/SKILL.md +258 -0
- package/dist/skills/mma-journal-recall/SKILL.md +242 -0
- package/dist/skills/mma-journal-record/SKILL.md +202 -0
- package/dist/skills/mma-research/SKILL.md +223 -0
- package/dist/skills/mma-retry/SKILL.md +221 -0
- package/dist/skills/mma-review/SKILL.md +209 -0
- package/dist/skills/multi-model-agent/SKILL.md +206 -0
- package/dist/telemetry/consent.d.ts +4 -0
- package/dist/telemetry/consent.d.ts.map +1 -0
- package/dist/telemetry/consent.js +40 -0
- package/dist/telemetry/consent.js.map +1 -0
- package/dist/telemetry/flusher.d.ts +19 -0
- package/dist/telemetry/flusher.d.ts.map +1 -0
- package/dist/telemetry/flusher.js +277 -0
- package/dist/telemetry/flusher.js.map +1 -0
- package/dist/telemetry/generation.d.ts +9 -0
- package/dist/telemetry/generation.d.ts.map +1 -0
- package/dist/telemetry/generation.js +33 -0
- package/dist/telemetry/generation.js.map +1 -0
- package/dist/telemetry/identity.d.ts +9 -0
- package/dist/telemetry/identity.d.ts.map +1 -0
- package/dist/telemetry/identity.js +35 -0
- package/dist/telemetry/identity.js.map +1 -0
- package/dist/telemetry/install-id.d.ts +13 -0
- package/dist/telemetry/install-id.d.ts.map +1 -0
- package/dist/telemetry/install-id.js +49 -0
- package/dist/telemetry/install-id.js.map +1 -0
- package/dist/telemetry/install-meta.d.ts +10 -0
- package/dist/telemetry/install-meta.d.ts.map +1 -0
- package/dist/telemetry/install-meta.js +15 -0
- package/dist/telemetry/install-meta.js.map +1 -0
- package/dist/telemetry/queue.d.ts +35 -0
- package/dist/telemetry/queue.d.ts.map +1 -0
- package/dist/telemetry/queue.js +287 -0
- package/dist/telemetry/queue.js.map +1 -0
- package/dist/telemetry/recorder.d.ts +39 -0
- package/dist/telemetry/recorder.d.ts.map +1 -0
- package/dist/telemetry/recorder.js +173 -0
- package/dist/telemetry/recorder.js.map +1 -0
- package/package.json +43 -24
- package/scripts/postinstall.js +36 -0
- package/bin/mmagent.mjs +0 -47
- package/postinstall.mjs +0 -8
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: mma-retry
|
|
3
|
+
description: >-
|
|
4
|
+
Use when a previous mma-* batch returned partial results (some tasks failed or
|
|
5
|
+
came back incomplete) and you want to re-run JUST the failed indices without
|
|
6
|
+
re-dispatching the whole batch
|
|
7
|
+
when_to_use: >-
|
|
8
|
+
A previous mma-delegate / mma-execute-plan / mma-audit / mma-review /
|
|
9
|
+
mma-debug / mma-investigate batch returned partial results AND you want to
|
|
10
|
+
re-try the failed indices only. Prefer this over re-dispatching the whole
|
|
11
|
+
batch or inline-retrying — it's idempotent and preserves the original batch's
|
|
12
|
+
diagnostics.
|
|
13
|
+
version: 5.0.3
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
# mma-retry
|
|
17
|
+
|
|
18
|
+
## Overview
|
|
19
|
+
|
|
20
|
+
Re-run selected tasks from a completed or failed batch. Specify the original `batchId` and the zero-based indices of the tasks to re-run. The retry runs those tasks fresh with the same configuration as the original batch and produces a new `batchId`.
|
|
21
|
+
|
|
22
|
+
**Core principle:** A batch is the unit of dispatch, but a TASK is the unit of failure. Retry at the task level so successful tasks aren't re-charged.
|
|
23
|
+
|
|
24
|
+
## When to Use
|
|
25
|
+
|
|
26
|
+
```dot
|
|
27
|
+
digraph when_to_use {
|
|
28
|
+
"Batch returned terminal?" [shape=diamond];
|
|
29
|
+
"Some tasks failed/incomplete?" [shape=diamond];
|
|
30
|
+
"All tasks failed?" [shape=diamond];
|
|
31
|
+
"mma-retry (selected indices)" [shape=box];
|
|
32
|
+
"Re-dispatch the whole batch" [shape=box];
|
|
33
|
+
"Investigate first (mma-debug)" [shape=box];
|
|
34
|
+
|
|
35
|
+
"Batch returned terminal?" -> "Some tasks failed/incomplete?";
|
|
36
|
+
"Some tasks failed/incomplete?" -> "All tasks failed?" [label="yes"];
|
|
37
|
+
"Some tasks failed/incomplete?" -> "Done — read results" [label="no"];
|
|
38
|
+
"All tasks failed?" -> "Investigate first (mma-debug)" [label="yes"];
|
|
39
|
+
"All tasks failed?" -> "mma-retry (selected indices)" [label="no — partial"];
|
|
40
|
+
}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
**Use when:**
|
|
44
|
+
- A previous batch's terminal envelope shows mixed `completed: true` / `completed: false`
|
|
45
|
+
- 1–N tasks (but not all) need a re-run with the same config
|
|
46
|
+
- You want to keep the original batch's diagnostics intact for comparison
|
|
47
|
+
|
|
48
|
+
**Don't use when:**
|
|
49
|
+
- All tasks failed → investigate the systemic cause first (`mma-debug`); retrying won't help
|
|
50
|
+
- The original batch is `expired` (TTL elapsed) → re-dispatch fresh
|
|
51
|
+
- You want to change the prompt → re-dispatch with the new prompt; retry preserves the original
|
|
52
|
+
|
|
53
|
+
## Endpoint
|
|
54
|
+
|
|
55
|
+
`POST /retry?cwd=<abs-path>`
|
|
56
|
+
|
|
57
|
+
@include _shared/auth.md
|
|
58
|
+
|
|
59
|
+
## Request body
|
|
60
|
+
|
|
61
|
+
```json
|
|
62
|
+
{
|
|
63
|
+
"batchId": "550e8400-e29b-41d4-a716-446655440000",
|
|
64
|
+
"taskIndices": [1, 3]
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
| Field | Type | Required | Notes |
|
|
69
|
+
|---|---|---|---|
|
|
70
|
+
| `batchId` | string (UUID) | yes | Batch ID from a previous dispatch (not yet expired) |
|
|
71
|
+
| `taskIndices` | number[] | yes | Zero-based indices to re-run; must be non-negative integers |
|
|
72
|
+
|
|
73
|
+
To re-run all tasks: pass `[0, 1, ..., tasks.length - 1]`. (But consider: if all failed, debug instead of retrying.)
|
|
74
|
+
|
|
75
|
+
## Full example
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
# Original batch had 4 tasks; re-run tasks at index 1 and 3
|
|
79
|
+
BATCH=$(curl -f --show-error -s -X POST \
|
|
80
|
+
-H "X-MMA-Client: $MMA_CLIENT" \
|
|
81
|
+
-H "X-MMA-Main-Model: $MMA_MAIN_MODEL" \
|
|
82
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
83
|
+
-H "Content-Type: application/json" \
|
|
84
|
+
-d '{"batchId":"550e8400-e29b-41d4-a716-446655440000","taskIndices":[1,3]}' \
|
|
85
|
+
"http://localhost:$PORT/retry?cwd=/project")
|
|
86
|
+
BATCH_ID=$(echo "$BATCH" | jq -r '.batchId') # NEW batchId — not the original
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
@include _shared/polling.md
|
|
90
|
+
|
|
91
|
+
## Response shapes
|
|
92
|
+
|
|
93
|
+
### POST /retry?cwd=<abs> — dispatch response (202)
|
|
94
|
+
|
|
95
|
+
```json
|
|
96
|
+
{ "batchId": "<uuid>", "statusUrl": "/batch/<uuid>" }
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Use `batchId` to poll. `statusUrl` is a convenience pointer. **This is a new batchId** — polling the original batch returns its terminal state.
|
|
100
|
+
|
|
101
|
+
### GET /batch/:id — polling response
|
|
102
|
+
|
|
103
|
+
The HTTP status is the state discriminator:
|
|
104
|
+
|
|
105
|
+
| Status | Meaning |
|
|
106
|
+
|---|---|
|
|
107
|
+
| `202 text/plain` | Still pending — body is the running headline string |
|
|
108
|
+
| `200 application/json` | Terminal — body is the batch envelope below |
|
|
109
|
+
| `404` / `401` / `5xx` | Error — see Error response below; stop polling |
|
|
110
|
+
|
|
111
|
+
### GET /batch/:id?taskIndex=N — single task slice
|
|
112
|
+
|
|
113
|
+
Same envelope. `results` contains exactly the task at index `N`. Returns `404 unknown_task_index` if `N` is out of range.
|
|
114
|
+
|
|
115
|
+
### Reading the task result
|
|
116
|
+
|
|
117
|
+
Each task result is the per-task wire object (`ComposePayload`):
|
|
118
|
+
|
|
119
|
+
```json
|
|
120
|
+
{
|
|
121
|
+
"completed": true,
|
|
122
|
+
"message": "Task completed; tests passed; one file changed.",
|
|
123
|
+
"findings": [
|
|
124
|
+
{
|
|
125
|
+
"id": "F1",
|
|
126
|
+
"severity": "high",
|
|
127
|
+
"category": "correctness",
|
|
128
|
+
"claim": "The function does not handle empty input",
|
|
129
|
+
"evidence": "function foo() { ... } // no null check",
|
|
130
|
+
"suggestion": "Add an explicit null guard at the top",
|
|
131
|
+
"source": "reviewer"
|
|
132
|
+
}
|
|
133
|
+
],
|
|
134
|
+
"summary": "Refactored utils.ts — removed 3 dead branches, added JSDoc",
|
|
135
|
+
"filesChanged": ["/project/src/utils.ts"],
|
|
136
|
+
"commitSha": "abc123def",
|
|
137
|
+
"blockId": null,
|
|
138
|
+
"telemetry": {
|
|
139
|
+
"totalDurationMs": 12400,
|
|
140
|
+
"totalCostUSD": 0.08,
|
|
141
|
+
"workerSelfAssessment": "done",
|
|
142
|
+
"reviewVerdict": "approved",
|
|
143
|
+
"commitOutcome": "committed",
|
|
144
|
+
"stopReason": "normal",
|
|
145
|
+
"haltedStage": null,
|
|
146
|
+
"stages": [
|
|
147
|
+
{ "name": "prepare", "outcome": "advance", "durationMs": 2, "costUSD": 0 },
|
|
148
|
+
{ "name": "register-block", "outcome": "skip", "comment": "register-block does not apply to route=delegate", "durationMs": 0, "costUSD": 0 },
|
|
149
|
+
{ "name": "implement", "outcome": "advance", "durationMs": 8900, "costUSD": 0.05 },
|
|
150
|
+
{ "name": "review", "outcome": "advance", "durationMs": 2100, "costUSD": 0.02 },
|
|
151
|
+
{ "name": "rework", "outcome": "skip", "comment": "rework skipped because review approved", "durationMs": 0, "costUSD": 0 },
|
|
152
|
+
{ "name": "commit", "outcome": "advance", "durationMs": 340, "costUSD": 0 },
|
|
153
|
+
{ "name": "annotate", "outcome": "advance", "durationMs": 890, "costUSD": 0.01 },
|
|
154
|
+
{ "name": "compose", "outcome": "advance", "durationMs": 68, "costUSD": 0 },
|
|
155
|
+
{ "name": "terminal", "outcome": "advance", "durationMs": 100, "costUSD": 0 }
|
|
156
|
+
]
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
**Top-level fields to read for the main-agent verdict:**
|
|
162
|
+
|
|
163
|
+
| Field | When `true` / populated |
|
|
164
|
+
|---|---|
|
|
165
|
+
| `completed: true` | Task succeeded. `message` is the summary; `findings` are post-review issues (if any). |
|
|
166
|
+
| `completed: false` | Task did not complete. `message` names the blocking gate or finding; `findings` carry any discovered issues. |
|
|
167
|
+
| `findings` | Issues surfaced by the worker or reviewer. `severity` = `critical` \| `high` \| `medium` \| `low`. `source` = `implementer` \| `reviewer`. |
|
|
168
|
+
| `filesChanged` | File paths modified (empty for read-only routes). |
|
|
169
|
+
| `commitSha` | Git SHA of the committed diff; `null` for read-only routes or when commit was skipped. |
|
|
170
|
+
| `blockId` | Always `null` (retry replays write tasks; `contextBlockId` is `null` too — no terminal block). |
|
|
171
|
+
|
|
172
|
+
**The stages array** (always 9 rows) is the canonical telemetry log. `outcome` is one of:
|
|
173
|
+
- `advance` — stage ran and produced its payload
|
|
174
|
+
- `skip` — stage did not run; `comment` explains why
|
|
175
|
+
- `halt` — stage stopped the chain; `comment` is the failure message
|
|
176
|
+
- `not_run` — stage was not reached because a prior stage halted
|
|
177
|
+
|
|
178
|
+
Use `telemetry.haltedStage` to find the first halt; `telemetry.stopReason` to find why.
|
|
179
|
+
|
|
180
|
+
### Error response (4xx / 5xx)
|
|
181
|
+
|
|
182
|
+
```json
|
|
183
|
+
{
|
|
184
|
+
"error": "<code>",
|
|
185
|
+
"message": "<human-readable>",
|
|
186
|
+
"details": { /* optional structured context, e.g. fieldErrors for 400 */ }
|
|
187
|
+
}
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
`details` is optional and present only when the server has structured additional context.
|
|
191
|
+
|
|
192
|
+
## Best practices
|
|
193
|
+
|
|
194
|
+
This skill is one step in the larger flow described in `multi-model-agent` → "Best practices". Recipes that involve `mma-retry`:
|
|
195
|
+
|
|
196
|
+
- **Recipe C — Investigate-plan-execute (last step).** After `mma-execute-plan` returns mixed results, retry the failed indices to close the loop.
|
|
197
|
+
- **Recipe D — Plan-execute-retry.** Pass the **original `batchId`** as input, specify the failed indices, keep the same configuration. `mma-retry` produces a NEW `batchId` in its response — poll that one for terminal state. Any `contextBlockIds` from the original carry forward.
|
|
198
|
+
|
|
199
|
+
Anti-pattern alert: **`full-batch-redispatch`** (AP4). Re-dispatching the entire batch re-charges every successful task. Always retry by index.
|
|
200
|
+
|
|
201
|
+
## Common pitfalls
|
|
202
|
+
|
|
203
|
+
❌ **Retrying after the batch expired**
|
|
204
|
+
TTL elapsed → original task specs are gone. **Fix:** re-dispatch fresh; the retry endpoint returns 404.
|
|
205
|
+
|
|
206
|
+
❌ **Retrying without addressing the root cause**
|
|
207
|
+
A flaky task that failed once will likely fail again. **Fix:** investigate (`mma-debug` or read the original `result.error.message`), then retry — or escalate `agentType` to `complex` by re-dispatching.
|
|
208
|
+
|
|
209
|
+
❌ **Confusing the new and original `batchId`**
|
|
210
|
+
Retry produces a NEW batchId; polling the original returns the old terminal state. **Fix:** save the retry's `batchId` and poll that one.
|
|
211
|
+
|
|
212
|
+
❌ **Using retry to change task config**
|
|
213
|
+
Retry preserves the ORIGINAL config (prompt, agentType, filePaths, reviewPolicy). **Fix:** if you want different config, re-dispatch with `mma-delegate` / `mma-execute-plan`.
|
|
214
|
+
|
|
215
|
+
## Terminal context block
|
|
216
|
+
|
|
217
|
+
Write-route tasks (delegate / execute-plan / retry) do NOT register a terminal context block — their durable record is the commit (`commitSha` + changed files). The per-task result's `contextBlockId` is always `null` for these routes. Read routes (audit / review / debug / investigate / research) return a non-null `contextBlockId`; see those skills for the delta-follow-up recipe.
|
|
218
|
+
|
|
219
|
+
Note: a re-run **read-route** task registers its own terminal context block (`contextBlockId`); re-run write tasks register none. Original-batch blocks remain intact and are not overwritten.
|
|
220
|
+
|
|
221
|
+
@include _shared/error-handling.md
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: mma-review
|
|
3
|
+
description: >-
|
|
4
|
+
Use when source code needs a quality / security / correctness pass — pre-merge
|
|
5
|
+
review, post-implementation sanity check, or focused look at a small file set
|
|
6
|
+
— and the review can run in parallel per file
|
|
7
|
+
when_to_use: >-
|
|
8
|
+
User asks for a code review or pre-merge check, OR a methodology skill
|
|
9
|
+
(superpowers:requesting-code-review, /review, /security-review) points at one,
|
|
10
|
+
AND mmagent is running. Delegate so each file reviews on its own worker; the
|
|
11
|
+
main agent only decides what to merge. Review on SOURCE CODE — use mma-audit
|
|
12
|
+
for prose specs / configs.
|
|
13
|
+
version: 5.0.3
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
# mma-review
|
|
17
|
+
|
|
18
|
+
## Overview
|
|
19
|
+
|
|
20
|
+
mma-review is the **pre-merge gate**. Send code files (or a diff) to a worker for structured review against an executability bar: would a maintainer who reads only the verdict and the diff understand which changes are required, why each is required, and where each lives — well enough to apply the fix and re-merge without re-investigating?
|
|
21
|
+
|
|
22
|
+
Each file is reviewed independently in parallel; results are index-aligned with `filePaths`.
|
|
23
|
+
|
|
24
|
+
**Core principle:** Reviewer is a different model from the implementer — different training, different blind spots. Cross-model review catches what self-review misses. The reviewer runs against a 10-category failure-mode taxonomy (test gap, cross-file ripple, missing edge case, race, resource leak, backward-compat break, security/performance regression, implicit-contract assumption, pre-existing-bug-vs-new-regression separation) and weighs every change through the security, performance, and correctness lenses regardless of `focus`.
|
|
25
|
+
|
|
26
|
+
## When to Use
|
|
27
|
+
|
|
28
|
+
**Use when:**
|
|
29
|
+
- 1+ source code files just changed (post-implementation review)
|
|
30
|
+
- Pre-merge sanity check on a focused diff
|
|
31
|
+
- Security-sensitive code path (`focus: ["security"]`)
|
|
32
|
+
- A specialized review pass (e.g. `focus: ["performance"]` on hot-path code)
|
|
33
|
+
|
|
34
|
+
**Don't use when:**
|
|
35
|
+
- The thing being reviewed is prose / spec / config → `mma-audit` (better-suited prompt template)
|
|
36
|
+
- You want to know whether a complete branch is mergeable → run `/ultrareview` (multi-model branch review) instead
|
|
37
|
+
- The diff is one-line / one-character → reading inline is faster than dispatch
|
|
38
|
+
|
|
39
|
+
## How to invoke for cross-file ripple detection
|
|
40
|
+
|
|
41
|
+
The cross-file ripple pass (changed-symbol → broken caller) only fires when the worker can identify what changed. Two patterns:
|
|
42
|
+
|
|
43
|
+
- **Diff-as-input (preferred for cross-file ripple)**: pass the diff via the `code` field, plus the named files via `filePaths`. The worker treats the diff as the change-set and greps for callers of changed public symbols.
|
|
44
|
+
- **Files-only (static review)**: pass only `filePaths`. The worker reviews the files in their current state without a change-set, so cross-file ripple is degenerate. Test gap, missing edge case, race, leak, and security/performance findings still fire.
|
|
45
|
+
|
|
46
|
+
## Endpoint
|
|
47
|
+
|
|
48
|
+
`POST /review?cwd=<abs-path>`
|
|
49
|
+
|
|
50
|
+
@include _shared/auth.md
|
|
51
|
+
|
|
52
|
+
## Request body
|
|
53
|
+
|
|
54
|
+
```json
|
|
55
|
+
{
|
|
56
|
+
"code": "inline code snippet (optional if filePaths given)",
|
|
57
|
+
"focus": ["correctness", "security"],
|
|
58
|
+
"subtype": "default",
|
|
59
|
+
"filePaths": ["/project/src/auth/login.ts"],
|
|
60
|
+
"contextBlockIds": []
|
|
61
|
+
}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
| Field | Type | Required | Notes |
|
|
65
|
+
|---|---|---|---|
|
|
66
|
+
| `code` | string | no | Inline code snippet to review |
|
|
67
|
+
| `focus` | string[] | no | Any of `security`, `performance`, `correctness`, `style`. Omit for general review. |
|
|
68
|
+
| `subtype` | `'default'` | no (defaults to `'default'`) | Reserved for future criteria sets; only `default` is wired today. |
|
|
69
|
+
| `filePaths` | string[] | no | Files to review (one worker per file, parallel) |
|
|
70
|
+
| `contextBlockIds` | string[] | no | IDs from `mma-context-blocks` — useful for design docs the reviewer should validate against |
|
|
71
|
+
|
|
72
|
+
Either `code` or `filePaths` (or both) must be provided.
|
|
73
|
+
|
|
74
|
+
> Worker tier for `mma-review` is hardcoded to `complex` and is not caller-configurable. Sending `agentType` is rejected with HTTP 400.
|
|
75
|
+
|
|
76
|
+
## Full example
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
BATCH=$(curl -f --show-error -s -X POST \
|
|
80
|
+
-H "X-MMA-Client: $MMA_CLIENT" \
|
|
81
|
+
-H "X-MMA-Main-Model: $MMA_MAIN_MODEL" \
|
|
82
|
+
-H "Authorization: Bearer $TOKEN" \
|
|
83
|
+
-H "Content-Type: application/json" \
|
|
84
|
+
-d '{"focus":["security","correctness"],"filePaths":["/project/src/auth/login.ts"]}' \
|
|
85
|
+
"http://localhost:$PORT/review?cwd=/project")
|
|
86
|
+
BATCH_ID=$(echo "$BATCH" | jq -r '.batchId')
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
@include _shared/polling.md
|
|
90
|
+
|
|
91
|
+
@include _shared/response-shape.md
|
|
92
|
+
|
|
93
|
+
## Reading the findings
|
|
94
|
+
|
|
95
|
+
The main agent reads `completed` + `message` + `findings` — the findings are the answer. For
|
|
96
|
+
read-only routes, `filesChanged` is always `[]` and `commitSha` is always `null`.
|
|
97
|
+
|
|
98
|
+
```json
|
|
99
|
+
{
|
|
100
|
+
"completed": true,
|
|
101
|
+
"message": "Review complete; 3 findings.",
|
|
102
|
+
"findings": [
|
|
103
|
+
{ "id": "F1", "severity": "critical", "category": "test-gap",
|
|
104
|
+
"claim": "login.ts has no test for null username edge case.",
|
|
105
|
+
"evidence": "Worker read login.ts and grepped for test files — no null-case test found.",
|
|
106
|
+
"suggestion": "Add test case: `login(null) throws ValidationError`.",
|
|
107
|
+
"source": "reviewer" }
|
|
108
|
+
],
|
|
109
|
+
"filesChanged": [],
|
|
110
|
+
"commitSha": null,
|
|
111
|
+
"summary": "...",
|
|
112
|
+
"telemetry": { ... }
|
|
113
|
+
}
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Finding shape
|
|
117
|
+
|
|
118
|
+
Every finding has this shape:
|
|
119
|
+
|
|
120
|
+
| Field | Type | Notes |
|
|
121
|
+
|---|---|---|
|
|
122
|
+
| `id` | string | Worker-assigned, e.g. `F1`, `F2`. Stable across chain. |
|
|
123
|
+
| `severity` | `'critical' \| 'high' \| 'medium' \| 'low'` | 4-tier. |
|
|
124
|
+
| `category` | string | Topical bucket, e.g. `test-gap`, `cross-file-ripple`. |
|
|
125
|
+
| `claim` | string | One-sentence summary. |
|
|
126
|
+
| `evidence` | string ≥20 chars | Verbatim from source when grounded. |
|
|
127
|
+
| `suggestion?` | string | Optional fix recommendation. |
|
|
128
|
+
| `source` | `'implementer' \| 'reviewer'` | Who produced the finding. |
|
|
129
|
+
|
|
130
|
+
`annotatorConfidence` and `evidenceGrounded` are retired — they were v4 fields with no producers.
|
|
131
|
+
|
|
132
|
+
### Recommended rendering by the main agent
|
|
133
|
+
|
|
134
|
+
1. Show ALL findings — never silently drop. Severity and grounding are soft
|
|
135
|
+
signals, not gates.
|
|
136
|
+
2. Default sort: severity (critical → low), then `id` ascending.
|
|
137
|
+
3. `severity` is the authoritative value — use it directly.
|
|
138
|
+
4. Mark findings with `evidence` shorter than 30 chars as "low-evidence"
|
|
139
|
+
(lighter color or `(low evidence)` annotation). User decides what to do.
|
|
140
|
+
5. Severity-tier counts feed the dashboard.
|
|
141
|
+
|
|
142
|
+
## Best practices
|
|
143
|
+
|
|
144
|
+
This skill is one step in the larger flow described in `multi-model-agent` → "Best practices". Recipes that involve `mma-review`:
|
|
145
|
+
|
|
146
|
+
- **Recipe A (analog) — Review-iterate-clean.** `mma-review` → fix → `mma-review` again. Same shape as the audit recipe, applied to source code. Sequential rounds; register the file(s) via `mma-context-blocks` before round 1 and reuse the same ID across rounds.
|
|
147
|
+
|
|
148
|
+
Anti-pattern alert: **`parallel-rounds-same-target`** (AP1). Three parallel reviews of the same source file re-flag the same issues. Run rounds sequentially with a fix between each.
|
|
149
|
+
|
|
150
|
+
## Common pitfalls
|
|
151
|
+
|
|
152
|
+
❌ **Reviewing a plan/spec markdown with `mma-review`**
|
|
153
|
+
The reviewer is tuned for code constructs (types, call sites, test coverage). On prose it produces vague nits. **Fix:** use `mma-audit` for docs/specs, `mma-review` for source.
|
|
154
|
+
|
|
155
|
+
❌ **Omitting `focus` and getting watery findings**
|
|
156
|
+
A general review surfaces low-signal style nits alongside real bugs. **Fix:** specify `focus: ["correctness"]` or `["security"]` to bias the reviewer toward the dimension you care about.
|
|
157
|
+
|
|
158
|
+
❌ **Inlining the spec the reviewer should validate against**
|
|
159
|
+
If the reviewer needs to check the diff against a design doc, register the doc once via `mma-context-blocks` and pass the `contextBlockIds`. Inlining N times wastes tokens.
|
|
160
|
+
|
|
161
|
+
❌ **Skipping review because "I already read it"**
|
|
162
|
+
Self-review and cross-model review are not the same thing. The whole reason to delegate is the different blind spots. Read the findings; merge what you agree with.
|
|
163
|
+
|
|
164
|
+
## Terminal context block
|
|
165
|
+
|
|
166
|
+
Every completed **read-route** task (audit / review / debug / investigate / research) auto-registers a reusable terminal context block containing its report (headline + findings). The block id is returned on each per-task result as **`contextBlockId`**. Write routes (delegate / execute-plan / retry) return `contextBlockId: null` — their record is the commit, not a block. This block is immutable, lives for the session duration, and counts against the project's `maxEntries` quota (default 500).
|
|
167
|
+
|
|
168
|
+
Use it for delta follow-ups — feed prior results' block ids into a later call's `contextBlockIds`, filtering out nulls:
|
|
169
|
+
|
|
170
|
+
contextBlockIds: priorResults.map(r => r.contextBlockId).filter((id) => id !== null)
|
|
171
|
+
|
|
172
|
+
**Use cases:**
|
|
173
|
+
- Pass round-N review findings to round N+1 via `contextBlockIds`
|
|
174
|
+
- Feed review results into a downstream `mma-delegate` fix step
|
|
175
|
+
- Accumulate findings across iterative review rounds
|
|
176
|
+
|
|
177
|
+
The block is registered server-side at task completion; no caller action is needed to create it. Delete it explicitly via `DELETE /context-blocks/:id` when no longer needed, or let it expire on session teardown.
|
|
178
|
+
|
|
179
|
+
## Outcome semantics
|
|
180
|
+
|
|
181
|
+
Every task result carries outcome fields that describe the code review's conclusion status:
|
|
182
|
+
|
|
183
|
+
| Field | Type | Meaning |
|
|
184
|
+
|---|---|---|
|
|
185
|
+
| `findingsOutcome` | `'found' \| 'clean' \| 'not_applicable'` | Answers the question: did the review uncover issues? |
|
|
186
|
+
| `findingsOutcomeReason` | `string \| null` | When `findingsOutcome` is set, this explains why (e.g. "Test gap: login() has no null-username case" or "Code is clean across all review criteria"). |
|
|
187
|
+
| `outcomeInferred` | `boolean` | `true` if the system inferred the outcome from findings count; `false` if the reviewer explicitly stated it. |
|
|
188
|
+
| `outcomeMalformed` | `boolean` | `true` if the outcome line was malformed and had to be repaired; `false` otherwise. |
|
|
189
|
+
|
|
190
|
+
### Enum values
|
|
191
|
+
|
|
192
|
+
- **`found`** — the review surfaced one or more issues (findings) across one or more review categories (test gap, cross-file ripple, race, leak, security, performance, etc.). This indicates the code needs rework before merge.
|
|
193
|
+
- **`clean`** — the review completed and found zero issues. The code passes the review bar and is safe to merge.
|
|
194
|
+
- **`not_applicable`** — the review could not proceed (e.g., wrong input type, missing preconditions, or system error). This is rare; most reviews resolve to `found` or `clean`.
|
|
195
|
+
|
|
196
|
+
### Empty findings ≠ failure
|
|
197
|
+
|
|
198
|
+
A crucial semantic: **empty findings does NOT mean `completed: false` or a failed review.** Finding nothing wrong is a successful review outcome — it means the code passed inspection. A review with zero findings is `completed: true` with `findingsOutcome: 'clean'`.
|
|
199
|
+
|
|
200
|
+
### Per-route legal outcomes
|
|
201
|
+
|
|
202
|
+
The legal outcomes for this route are: `['found', 'clean']`
|
|
203
|
+
|
|
204
|
+
- **`found`** — one or more issues were detected across the review categories.
|
|
205
|
+
- **`clean`** — zero issues were detected; the code is ready to merge.
|
|
206
|
+
|
|
207
|
+
The outcome `not_applicable` is not legal for `mma-review` (except on actual precondition failures) because a code review always produces a verdict: either issues found or clean.
|
|
208
|
+
|
|
209
|
+
@include _shared/error-handling.md
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: multi-model-agent
|
|
3
|
+
description: >-
|
|
4
|
+
Use first whenever you're about to delegate any tool-using work — picks the
|
|
5
|
+
right mma-* skill (audit, review, verify, debug, plan execution, codebase
|
|
6
|
+
investigation, ad-hoc delegation, retry, context-block reuse) instead of
|
|
7
|
+
defaulting to inline Agent dispatches
|
|
8
|
+
when_to_use: >-
|
|
9
|
+
The user asks for work you'd normally delegate — audit, code review, checklist
|
|
10
|
+
verification, debugging, plan execution, codebase Q&A, or ad-hoc parallel
|
|
11
|
+
tasks — AND mmagent is running. Read this once, pick the matching mma-* skill,
|
|
12
|
+
and delegate there. Applies equally whether the user invoked a superpowers
|
|
13
|
+
methodology skill or asked directly.
|
|
14
|
+
version: 5.0.3
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
# multi-model-agent (router)
|
|
18
|
+
|
|
19
|
+
## Overview
|
|
20
|
+
|
|
21
|
+
Local HTTP service that fans out tool-using work to workers on different LLM providers (Claude, OpenAI-compatible, Codex). Workers run on cheap models; the main agent stays on judgment.
|
|
22
|
+
|
|
23
|
+
**Core principle:** Pick the most specific `mma-*` skill that fits the task. Specificity reduces input — specialized skills know their route, schema, and defaults so you write less.
|
|
24
|
+
|
|
25
|
+
## Skill map
|
|
26
|
+
|
|
27
|
+
```dot
|
|
28
|
+
digraph picker {
|
|
29
|
+
"Plan/spec file on disk?" [shape=diamond];
|
|
30
|
+
"Audit a doc?" [shape=diamond];
|
|
31
|
+
"Review code?" [shape=diamond];
|
|
32
|
+
"Verify a checklist?" [shape=diamond];
|
|
33
|
+
"Debug a failure?" [shape=diamond];
|
|
34
|
+
"Codebase question?" [shape=diamond];
|
|
35
|
+
"Convergent or divergent?" [shape=diamond];
|
|
36
|
+
"mma-execute-plan" [shape=box];
|
|
37
|
+
"mma-audit" [shape=box];
|
|
38
|
+
"mma-review" [shape=box];
|
|
39
|
+
"mma-debug" [shape=box];
|
|
40
|
+
"mma-investigate" [shape=box];
|
|
41
|
+
"mma-explore" [shape=box];
|
|
42
|
+
"mma-delegate" [shape=box];
|
|
43
|
+
|
|
44
|
+
"Plan/spec file on disk?" -> "mma-execute-plan" [label="yes"];
|
|
45
|
+
"Plan/spec file on disk?" -> "Audit a doc?" [label="no"];
|
|
46
|
+
"Audit a doc?" -> "mma-audit" [label="yes"];
|
|
47
|
+
"Audit a doc?" -> "Review code?" [label="no"];
|
|
48
|
+
"Review code?" -> "mma-review" [label="yes"];
|
|
49
|
+
"Review code?" -> "Debug a failure?" [label="no"];
|
|
50
|
+
"Debug a failure?" -> "mma-debug" [label="yes"];
|
|
51
|
+
"Debug a failure?" -> "Codebase question?" [label="no"];
|
|
52
|
+
"Codebase question?" -> "Convergent or divergent?" [label="yes"];
|
|
53
|
+
"Codebase question?" -> "mma-delegate" [label="no — ad-hoc"];
|
|
54
|
+
"Convergent or divergent?" -> "mma-investigate" [label="convergent (one answer)"];
|
|
55
|
+
"Convergent or divergent?" -> "mma-explore" [label="divergent (3-5 directions)"];
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
| Skill | Purpose |
|
|
60
|
+
|---|---|
|
|
61
|
+
| `mma-execute-plan` | Implement tasks from a plan or spec file (descriptors match plan headings) |
|
|
62
|
+
| `mma-audit` | Audit a document/spec/config for security, correctness, style, or performance |
|
|
63
|
+
| `mma-review` | Review code for quality, security, performance, correctness. Pass acceptance checklists in the brief if you need verification-style checks. |
|
|
64
|
+
| `mma-debug` | Debug a failure with a structured hypothesis |
|
|
65
|
+
| `mma-investigate` | Codebase Q&A — structured answer with `file:line` citations + confidence |
|
|
66
|
+
| `mma-explore` | Divergent ideation from codebase + web research + prior-learnings recall — use before `superpowers:brainstorming` |
|
|
67
|
+
| `mma-delegate` | Ad-hoc implementation / research with no plan file |
|
|
68
|
+
| `mma-retry` | Re-run specific failed/incomplete tasks from a previous batch by index |
|
|
69
|
+
| `mma-context-blocks` | Register a reused doc once; reference by ID across N tasks |
|
|
70
|
+
|
|
71
|
+
## Best practices
|
|
72
|
+
|
|
73
|
+
### The unifying principle
|
|
74
|
+
|
|
75
|
+
The main session is for judgment, orchestration, and dialogue with the engineer. Everything else — read, grep, audit, review, debug, implement, verify — gets delegated. If you're about to do labor in main context, you've already taken the wrong turn.
|
|
76
|
+
|
|
77
|
+
### Judgment vs labor — what NEVER delegates
|
|
78
|
+
|
|
79
|
+
Labor handles work whose answer is findable from the inputs. Main session keeps work whose answer is **judgment** — there is no "right answer" a worker could discover:
|
|
80
|
+
|
|
81
|
+
- **Brainstorming** — exploring the problem space with the engineer before a spec exists.
|
|
82
|
+
- **Spec writing** — deciding what to build, what success looks like, what's out of scope.
|
|
83
|
+
- **Plan writing** — turning a spec into ordered, testable steps with the right decomposition.
|
|
84
|
+
- **Architecture and design decisions** — choosing the shape of the solution.
|
|
85
|
+
- **Final approval / merge decisions** — what ships.
|
|
86
|
+
- **Dialogue with the engineer** — clarifying intent, negotiating tradeoffs, answering "should we?".
|
|
87
|
+
|
|
88
|
+
The test: *if a worker can produce the answer from the given inputs, delegate; if the answer requires deciding what the inputs should be, it's main-session work.* Recipes A–D all keep these judgment steps in main context (e.g., Recipe C explicitly: `mma-investigate` → **write the plan (main)** → `mma-execute-plan`).
|
|
89
|
+
|
|
90
|
+
### C1 — Delegate by default, inline by exception
|
|
91
|
+
|
|
92
|
+
If a task needs 3+ file reads or any grep, it goes to a worker. Inline `Read` is reserved for files already in context, single-file lookups, or 1-2 file reads with a known target.
|
|
93
|
+
|
|
94
|
+
### C2 — Parallel for independence, sequential for iteration
|
|
95
|
+
|
|
96
|
+
Independent fan-out (5 unrelated audits, 5 unrelated bugs) → parallel batch. Coupled rounds where round N's fix produces round N+1's input (audit → fix → re-audit, debug → fix → verify) → sequential.
|
|
97
|
+
|
|
98
|
+
### C3 — Shared content lives in a context block, not in caller tokens
|
|
99
|
+
|
|
100
|
+
Any artifact (spec, plan, prior-round findings, long error log) that crosses 2+ calls gets registered once via `mma-context-blocks` and referenced by ID.
|
|
101
|
+
|
|
102
|
+
### Recipe A — Audit-iterate-clean
|
|
103
|
+
|
|
104
|
+
`mma-audit` → read findings → fix (inline if 1-2 lines, else `mma-delegate`) → `mma-audit` again. Sequential rounds, NOT parallel re-audits. The fix produces new edges; round 2 catches what round 1 couldn't see. Register the doc as a context block before round 1; reuse the same ID across all rounds. The same shape applies to `mma-review` for source code (review → fix → re-review).
|
|
105
|
+
|
|
106
|
+
### Recipe B — Debug-fix-review
|
|
107
|
+
|
|
108
|
+
`mma-debug` (read/reproduce/trace) → `mma-delegate` (apply the fix the hypothesis implies) → `mma-review` with the acceptance criteria included in the brief. Three skills, strict order. Register the failing test output / reproduction log as a context block before the debug call; reuse it on the review call so the reviewer can compare against the same evidence.
|
|
109
|
+
|
|
110
|
+
### Recipe C — Investigate-plan-execute
|
|
111
|
+
|
|
112
|
+
`mma-investigate` (codebase Q&A) → write the plan (main-context judgment task) → `mma-execute-plan` (workers implement against named plan headings) → `mma-retry` on any failed indices. Register the plan file as a context block before execute-plan; the retry call inherits the same configuration including `contextBlockIds`.
|
|
113
|
+
|
|
114
|
+
### Recipe D — Plan-execute-retry
|
|
115
|
+
|
|
116
|
+
When `mma-execute-plan` returns mixed `done` / `done_with_concerns` / `failed`, the next step is `mma-retry` on the failed indices only — never a full-batch re-dispatch. Pass the **original `batchId`** as input, specify the failed task indices, keep the same configuration. (`mma-retry` produces a NEW `batchId` in its response — poll that one for terminal state, not the original.) Any `contextBlockIds` registered for the original batch carry forward into retry — no need to re-register.
|
|
117
|
+
|
|
118
|
+
### Anti-patterns
|
|
119
|
+
|
|
120
|
+
1. **`parallel-rounds-same-target`** — Caller fans out 3 parallel calls of the same skill on the same target — `mma-audit` on one document, or `mma-review` on the same source file. The reports overlap heavily; later rounds never see the fix from earlier rounds, so they re-flag the same issues. Corrective: sequential rounds with a fix between each (Recipe A).
|
|
121
|
+
|
|
122
|
+
2. **`inline-labor-leakage`** — Caller does 3+ `Read` calls, or any `grep`, in main context "just to understand the situation." Main tokens get burned on labor; the answer the caller actually needs is one paragraph of synthesis. Corrective: `mma-investigate` for codebase Q&A; if the goal is implementation, jump straight to `mma-delegate` with file paths and let the worker read.
|
|
123
|
+
|
|
124
|
+
3. **`re-inlined-shared-content`** — Caller pastes the same spec / plan / error log into 5 task prompts in one batch (or across rounds). Token cost scales linearly with N. Corrective: `mma-context-blocks` register once, pass `contextBlockIds` to every task. C3 fires the moment the same content is referenced a second time.
|
|
125
|
+
|
|
126
|
+
4. **`full-batch-redispatch`** — Caller re-runs `mma-execute-plan` with the entire task list when only 2 of 8 tasks failed. The 6 successful tasks get re-charged. Corrective: `mma-retry` with the failed indices. (The same anti-pattern applies to multi-task `mma-delegate` batches; `mma-retry` is the corrective there too.)
|
|
127
|
+
|
|
128
|
+
## Preflight: auto-start the daemon if it is not running
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
PORT=7337
|
|
132
|
+
if ! curl -sf "http://127.0.0.1:$PORT/health" >/dev/null 2>&1; then
|
|
133
|
+
mmagent serve >/dev/null 2>&1 & disown
|
|
134
|
+
for _ in 1 2 3 4 5 6 7 8 9 10; do
|
|
135
|
+
sleep 0.5
|
|
136
|
+
curl -sf "http://127.0.0.1:$PORT/health" >/dev/null 2>&1 && break
|
|
137
|
+
done
|
|
138
|
+
fi
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Idempotent: already-running daemon → curl succeeds → no-op. Background `mmagent serve` (with `& disown`) — never run it foreground (it would block the rest of the script).
|
|
142
|
+
|
|
143
|
+
## Auth token
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
export MMAGENT_AUTH_TOKEN=$(mmagent print-token)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Every request requires `Authorization: Bearer $MMAGENT_AUTH_TOKEN`. The token rotates on every `mmagent serve` restart — re-export after a `pkill`/upgrade.
|
|
150
|
+
|
|
151
|
+
## Worker tier: `agentType`
|
|
152
|
+
|
|
153
|
+
Only `mma-delegate` accepts `agentType: "standard" | "complex"` per task — default `"standard"` (cheaper, faster). Pick `"complex"` when:
|
|
154
|
+
|
|
155
|
+
- The task touches many files or requires multi-step reasoning a standard-tier model cannot hold in context.
|
|
156
|
+
- A prior standard run came back with `filesWritten: 0` or `incompleteReason: "turn_cap"` / `"timeout"`.
|
|
157
|
+
- The task is security-sensitive or ambiguous enough that being wrong is costly.
|
|
158
|
+
|
|
159
|
+
Every other route hardcodes its tier and rejects `agentType` with HTTP 400:
|
|
160
|
+
|
|
161
|
+
| Route | Hardcoded tier |
|
|
162
|
+
|---|---|
|
|
163
|
+
| `mma-execute-plan` | `standard` |
|
|
164
|
+
| `mma-audit` | `complex` |
|
|
165
|
+
| `mma-review` | `complex` |
|
|
166
|
+
| `mma-debug` | `complex` |
|
|
167
|
+
| `mma-investigate` | `complex` |
|
|
168
|
+
| `mma-explore` | `complex` (all three workers — internal, external, synthesizer) |
|
|
169
|
+
|
|
170
|
+
If you need `complex` tier on plan-style work, dispatch via `mma-delegate` with the plan task as the prompt and `agentType: "complex"`.
|
|
171
|
+
|
|
172
|
+
## Context block defaults
|
|
173
|
+
|
|
174
|
+
| Default | Value | Notes |
|
|
175
|
+
|---|---|---|
|
|
176
|
+
| Idle TTL | 24 h | Block eligible for eviction after 24 h with no active batch references |
|
|
177
|
+
| `maxEntries` | 500 | Per-project cap on total context blocks |
|
|
178
|
+
| Body cap | 50 MiB | Maximum `content` size per block |
|
|
179
|
+
|
|
180
|
+
Context blocks are immutable after creation. To update content, register a new block and switch `contextBlockIds` to the new ID.
|
|
181
|
+
|
|
182
|
+
## Terminal context block
|
|
183
|
+
|
|
184
|
+
Every completed **read-route** task (audit / review / debug / investigate / research) auto-registers a reusable terminal context block containing its report (headline + findings). The block id is returned on each per-task result as **`contextBlockId`**. Write routes (delegate / execute-plan / retry) return `contextBlockId: null` — their record is the commit, not a block. This block is immutable, lives for the session duration, and counts against the project's `maxEntries` quota (default 500).
|
|
185
|
+
|
|
186
|
+
Use it for delta follow-ups — feed prior results' block ids into a later call's `contextBlockIds`, filtering out nulls:
|
|
187
|
+
|
|
188
|
+
contextBlockIds: priorResults.map(r => r.contextBlockId).filter((id) => id !== null)
|
|
189
|
+
|
|
190
|
+
## General flow
|
|
191
|
+
|
|
192
|
+
1. Call the matching `mma-*` skill → receive `{ batchId, statusUrl }`.
|
|
193
|
+
2. Poll `GET /batch/:id`: `202 text/plain` while pending (body is the running headline), `200 application/json` on terminal.
|
|
194
|
+
3. Read `results` / `error` from the 6-field terminal envelope.
|
|
195
|
+
|
|
196
|
+
## Common pitfalls
|
|
197
|
+
|
|
198
|
+
❌ **Defaulting to inline Agent dispatch when mmagent is up.** mmagent workers cost ~10× less and don't pollute main context. **Why:** every inline tool call burns flagship-model tokens; that's exactly what mmagent exists to avoid.
|
|
199
|
+
|
|
200
|
+
❌ **Picking `mma-delegate` when a more specific skill fits.** Audit / review / verify / debug / investigate workers know their route's defaults and emit structured reports. **Why:** specialized skills require less input and produce richer output.
|
|
201
|
+
|
|
202
|
+
❌ **Starting an investigation that needs to write code.** `mma-investigate` is read-only. **Fix:** dispatch `mma-delegate` with research-then-edit framing, or split: investigate → digest → edit.
|
|
203
|
+
|
|
204
|
+
## Diagnosing slow tasks
|
|
205
|
+
|
|
206
|
+
`mmagent serve --verbose` (or `diagnostics.verbose: true` in config) records `tool_call`, `turn_complete`, and `heartbeat` events. Tail with `mmagent logs --follow --batch=$BATCH_ID`.
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import { type ConsentDecision } from '@zhixuan92/multi-model-agent-core/events/consent-rules';
|
|
2
|
+
export declare function decide(homeDir: string): ConsentDecision;
|
|
3
|
+
export declare function watchConfigForChanges(homeDir: string, onChange: (d: ConsentDecision) => void): () => void;
|
|
4
|
+
//# sourceMappingURL=consent.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"consent.d.ts","sourceRoot":"","sources":["../../src/telemetry/consent.ts"],"names":[],"mappings":"AAEA,OAAO,EAAiB,KAAK,eAAe,EAAE,MAAM,wDAAwD,CAAC;AAE7G,wBAAgB,MAAM,CAAC,OAAO,EAAE,MAAM,GAAG,eAAe,CAsBvD;AAED,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,eAAe,KAAK,IAAI,GAAG,MAAM,IAAI,CASzG"}
|