@zhixuan92/multi-model-agent 4.3.1 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -22
- package/dist/http/handlers/tools/delegate.d.ts.map +1 -1
- package/dist/http/handlers/tools/delegate.js +8 -0
- package/dist/http/handlers/tools/delegate.js.map +1 -1
- package/dist/http/handlers/tools/execute-plan.d.ts.map +1 -1
- package/dist/http/handlers/tools/execute-plan.js +9 -1
- package/dist/http/handlers/tools/execute-plan.js.map +1 -1
- package/dist/http/handlers/tools/research.js +1 -1
- package/dist/http/handlers/tools/research.js.map +1 -1
- package/dist/http/handlers/tools/retry.js.map +1 -1
- package/dist/http/middleware/caller-identity.d.ts +5 -3
- package/dist/http/middleware/caller-identity.d.ts.map +1 -1
- package/dist/http/middleware/caller-identity.js.map +1 -1
- package/dist/http/request-pipeline.d.ts.map +1 -1
- package/dist/http/request-pipeline.js +8 -14
- package/dist/http/request-pipeline.js.map +1 -1
- package/dist/http/server.d.ts.map +1 -1
- package/dist/http/server.js +7 -11
- package/dist/http/server.js.map +1 -1
- package/dist/http/wire/register-all-handlers.d.ts.map +1 -1
- package/dist/http/wire/register-all-handlers.js +0 -2
- package/dist/http/wire/register-all-handlers.js.map +1 -1
- package/dist/skills/_shared/auth.md +4 -1
- package/dist/skills/mma-audit/SKILL.md +67 -60
- package/dist/skills/mma-context-blocks/SKILL.md +5 -3
- package/dist/skills/mma-debug/SKILL.md +7 -4
- package/dist/skills/mma-delegate/SKILL.md +3 -2
- package/dist/skills/mma-execute-plan/SKILL.md +2 -1
- package/dist/skills/mma-explore/SKILL.md +1 -1
- package/dist/skills/mma-investigate/SKILL.md +4 -1
- package/dist/skills/mma-research/SKILL.md +6 -1
- package/dist/skills/mma-retry/SKILL.md +6 -5
- package/dist/skills/mma-review/SKILL.md +4 -1
- package/dist/skills/multi-model-agent/SKILL.md +6 -11
- package/package.json +2 -2
- package/dist/http/handlers/tools/verify.d.ts +0 -4
- package/dist/http/handlers/tools/verify.d.ts.map +0 -1
- package/dist/http/handlers/tools/verify.js +0 -53
- package/dist/http/handlers/tools/verify.js.map +0 -1
- package/dist/skills/mma-verify/SKILL.md +0 -155
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
import * as verify from '@zhixuan92/multi-model-agent-core/tools/verify/schema';
|
|
2
|
-
import { executeTask } from '@zhixuan92/multi-model-agent-core/lifecycle/task-executor';
|
|
3
|
-
import { toolConfig } from '@zhixuan92/multi-model-agent-core/tools/verify/tool-config';
|
|
4
|
-
import { sendError, sendJson } from '../../errors.js';
|
|
5
|
-
import { asyncDispatch } from '../../async-dispatch.js';
|
|
6
|
-
import { emitRequestReceived } from '../../request-observability.js';
|
|
7
|
-
export function buildVerifyHandler(deps) {
|
|
8
|
-
return async (_req, res, _params, ctx) => {
|
|
9
|
-
const parsed = verify.inputSchema.safeParse(ctx.body);
|
|
10
|
-
if (!parsed.success) {
|
|
11
|
-
sendError(res, 400, 'invalid_request', 'Request body validation failed', {
|
|
12
|
-
fieldErrors: parsed.error.flatten(),
|
|
13
|
-
});
|
|
14
|
-
return;
|
|
15
|
-
}
|
|
16
|
-
const input = parsed.data;
|
|
17
|
-
const cwd = ctx.cwd;
|
|
18
|
-
const reserveResult = deps.projectRegistry.reserveProject(cwd);
|
|
19
|
-
if (!reserveResult.ok) {
|
|
20
|
-
sendError(res, 503, reserveResult.error, reserveResult.message);
|
|
21
|
-
return;
|
|
22
|
-
}
|
|
23
|
-
const pc = reserveResult.projectContext;
|
|
24
|
-
pc.lastActivityAt = Date.now();
|
|
25
|
-
deps.projectRegistry.cancelReservation(cwd);
|
|
26
|
-
const blockIds = input.contextBlockIds ?? [];
|
|
27
|
-
const { batchId, statusUrl } = asyncDispatch({
|
|
28
|
-
tool: 'verify',
|
|
29
|
-
projectCwd: cwd,
|
|
30
|
-
blockIds,
|
|
31
|
-
batchRegistry: deps.batchRegistry,
|
|
32
|
-
projectContext: pc,
|
|
33
|
-
deps,
|
|
34
|
-
caller: { client: ctx.callerClient, mainModel: ctx.mainModel },
|
|
35
|
-
executor: async (executionCtx) => {
|
|
36
|
-
const callExecutor = () => executeTask(toolConfig, executionCtx, input);
|
|
37
|
-
if (deps.routeDispatcher) {
|
|
38
|
-
const result = await deps.routeDispatcher.dispatch({
|
|
39
|
-
route: 'verify',
|
|
40
|
-
toolCategory: 'read_only',
|
|
41
|
-
rawRequest: input,
|
|
42
|
-
executor: () => callExecutor(),
|
|
43
|
-
});
|
|
44
|
-
return result.body;
|
|
45
|
-
}
|
|
46
|
-
return callExecutor();
|
|
47
|
-
},
|
|
48
|
-
});
|
|
49
|
-
await emitRequestReceived({ config: deps.config, batchId, route: _req.url ?? '', parsed: input });
|
|
50
|
-
sendJson(res, 202, { batchId, statusUrl });
|
|
51
|
-
};
|
|
52
|
-
}
|
|
53
|
-
//# sourceMappingURL=verify.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"verify.js","sourceRoot":"","sources":["../../../../src/http/handlers/tools/verify.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,MAAM,MAAM,uDAAuD,CAAC;AAChF,OAAO,EAAE,WAAW,EAAE,MAAM,2DAA2D,CAAC;AACxF,OAAO,EAAE,UAAU,EAAE,MAAM,4DAA4D,CAAC;AACxF,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AAExD,OAAO,EAAE,mBAAmB,EAAE,MAAM,gCAAgC,CAAC;AAErE,MAAM,UAAU,kBAAkB,CAAC,IAAiB;IAClD,OAAO,KAAK,EAAE,IAAqB,EAAE,GAAmB,EAAE,OAA+B,EAAE,GAAG,EAAE,EAAE;QAChG,MAAM,MAAM,GAAG,MAAM,CAAC,WAAW,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACtD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACpB,SAAS,CAAC,GAAG,EAAE,GAAG,EAAE,iBAAiB,EAAE,gCAAgC,EAAE;gBACvE,WAAW,EAAE,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE;aACpC,CAAC,CAAC;YACH,OAAO;QACT,CAAC;QAED,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC;QAC1B,MAAM,GAAG,GAAG,GAAG,CAAC,GAAI,CAAC;QAErB,MAAM,aAAa,GAAG,IAAI,CAAC,eAAe,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;QAC/D,IAAI,CAAC,aAAa,CAAC,EAAE,EAAE,CAAC;YACtB,SAAS,CAAC,GAAG,EAAE,GAAG,EAAE,aAAa,CAAC,KAAK,EAAE,aAAa,CAAC,OAAO,CAAC,CAAC;YAChE,OAAO;QACT,CAAC;QACD,MAAM,EAAE,GAAG,aAAa,CAAC,cAAc,CAAC;QACxC,EAAE,CAAC,cAAc,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC/B,IAAI,CAAC,eAAe,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC;QAE5C,MAAM,QAAQ,GAAG,KAAK,CAAC,eAAe,IAAI,EAAE,CAAC;QAC7C,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,aAAa,CAAC;YAC3C,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE,GAAG;YACf,QAAQ;YACR,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,cAAc,EAAE,EAAE;YAClB,IAAI;YACJ,MAAM,EAAE,EAAE,MAAM,EAAE,GAAG,CAAC,YAAY,EAAE,SAAS,EAAE,GAAG,CAAC,SAAS,EAAE;YAC9D,QAAQ,EAAE,KAAK,EAAE,YAAY,EAAE,EAAE;gBAC/B,MAAM,YAAY,GAAG,GAAG,EAAE,CAAC,WAAW,CAAC,UAAU,EAAE,YAAY,EAAE,KAAK,CAAC,CAAC;gBACxE,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;oBACzB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC;wBACjD,KAAK,EAAE,QAAQ;wBACf,YAAY,EAAE,WAAW;wBACzB,UAAU,EAAE,KAAK;wBACjB,QAAQ,EAAE,GAAG,EAAE,CAAC,YAAY,EAAE;qBAC/B,CAAC,CAAC;oBACH,OAAO,MAAM,CAAC,IAAI,CAAC;gBACrB,CAAC;gBACD,OAAO,YAAY,EAAE,CAAC;YACxB,CAAC;SACF,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,CAAC,GAAG,IAAI,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;QAElG,QAAQ,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC,CAAC;IAC7C,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -1,155 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: mma-verify
|
|
3
|
-
description: >-
|
|
4
|
-
Use when work is "complete" and you need to confirm acceptance criteria are
|
|
5
|
-
actually met before claiming so to the user — each checklist item verified
|
|
6
|
-
independently against the work
|
|
7
|
-
when_to_use: >-
|
|
8
|
-
The user (or a methodology skill like
|
|
9
|
-
superpowers:verification-before-completion) needs acceptance-criteria checked
|
|
10
|
-
against implemented work BEFORE claiming success. Delegate so each checklist
|
|
11
|
-
item gets independent evidence-gathering on a worker. Use this BEFORE saying
|
|
12
|
-
"done" — never after.
|
|
13
|
-
version: 4.3.1
|
|
14
|
-
---
|
|
15
|
-
|
|
16
|
-
# mma-verify
|
|
17
|
-
|
|
18
|
-
## Overview
|
|
19
|
-
|
|
20
|
-
Submit work product and a checklist to workers for independent verification. Each checklist item is verified in parallel; results are index-aligned with the input.
|
|
21
|
-
|
|
22
|
-
**Core principle:** Self-verification ("I read the files; they look correct") has no external validation. Workers check independently and return evidence (or absence of it) per item.
|
|
23
|
-
|
|
24
|
-
## When to Use
|
|
25
|
-
|
|
26
|
-
**Use when:**
|
|
27
|
-
- You're about to claim a task is "done" and need evidence per acceptance item
|
|
28
|
-
- A methodology skill (superpowers:verification-before-completion) routed here
|
|
29
|
-
- The user gave a checklist and asked you to confirm each item
|
|
30
|
-
|
|
31
|
-
**Don't use when:**
|
|
32
|
-
- The "checklist" is one item — read inline, faster than dispatch
|
|
33
|
-
- You don't have explicit acceptance criteria — write them first, then dispatch
|
|
34
|
-
- The work hasn't been done yet — verification is a post-condition, not a pre-condition
|
|
35
|
-
|
|
36
|
-
## Endpoint
|
|
37
|
-
|
|
38
|
-
`POST /verify?cwd=<abs-path>`
|
|
39
|
-
|
|
40
|
-
@include _shared/auth.md
|
|
41
|
-
|
|
42
|
-
## Request body
|
|
43
|
-
|
|
44
|
-
```json
|
|
45
|
-
{
|
|
46
|
-
"work": "inline description of the work (optional if filePaths given)",
|
|
47
|
-
"checklist": [
|
|
48
|
-
"All public functions have JSDoc comments",
|
|
49
|
-
"No console.log statements remain",
|
|
50
|
-
"Unit tests cover the happy path and at least one error case"
|
|
51
|
-
],
|
|
52
|
-
"filePaths": ["/project/src/utils.ts"],
|
|
53
|
-
"contextBlockIds": []
|
|
54
|
-
}
|
|
55
|
-
```
|
|
56
|
-
|
|
57
|
-
| Field | Type | Required | Notes |
|
|
58
|
-
|---|---|---|---|
|
|
59
|
-
| `work` | string | no | Inline work-product description (e.g. summary of what changed) |
|
|
60
|
-
| `checklist` | string[] | yes | At least one item — each item verified by its own worker |
|
|
61
|
-
| `filePaths` | string[] | no | Files to verify against (workers can read them) |
|
|
62
|
-
| `contextBlockIds` | string[] | no | IDs from `mma-context-blocks` (e.g. the spec the work was supposed to satisfy) |
|
|
63
|
-
|
|
64
|
-
> Worker tier for `mma-verify` is hardcoded to `complex` and is not caller-configurable. Sending `agentType` is rejected with HTTP 400.
|
|
65
|
-
|
|
66
|
-
## Full example
|
|
67
|
-
|
|
68
|
-
```bash
|
|
69
|
-
BATCH=$(curl -f --show-error -s -X POST \
|
|
70
|
-
-H "X-MMA-Client: $MMA_CLIENT" \
|
|
71
|
-
-H "Authorization: Bearer $TOKEN" \
|
|
72
|
-
-H "Content-Type: application/json" \
|
|
73
|
-
-d '{"checklist":["Error handler exists","Tests pass"],"filePaths":["/project/src/handler.ts"]}' \
|
|
74
|
-
"http://localhost:$PORT/verify?cwd=/project")
|
|
75
|
-
BATCH_ID=$(echo "$BATCH" | jq -r '.batchId')
|
|
76
|
-
```
|
|
77
|
-
|
|
78
|
-
@include _shared/polling.md
|
|
79
|
-
|
|
80
|
-
@include _shared/response-shape.md
|
|
81
|
-
|
|
82
|
-
## Reading the findings (3.10.5+)
|
|
83
|
-
|
|
84
|
-
The terminal envelope's `results[N].annotatedFindings` is a list of structured
|
|
85
|
-
findings the reviewer extracted and scored from the implementer's narrative.
|
|
86
|
-
Every finding has the same shape:
|
|
87
|
-
|
|
88
|
-
| Field | Type | Notes |
|
|
89
|
-
|---|---|---|
|
|
90
|
-
| `id` | string | Reviewer-assigned, e.g. `F1`, `F2`. |
|
|
91
|
-
| `severity` | `'critical' \| 'high' \| 'medium' \| 'low'` | 4-tier. |
|
|
92
|
-
| `claim` | string | One-sentence summary. |
|
|
93
|
-
| `evidence` | string ≥20 chars | Quoted from worker output when grounded. |
|
|
94
|
-
| `suggestion?` | string | Optional fix recommendation. |
|
|
95
|
-
| `annotatorConfidence` | `number \| null` | 0–100 from the reviewer; `null` when emitted via deterministic fallback. |
|
|
96
|
-
| `evidenceGrounded` | boolean | True when `evidence` is a verbatim substring of worker output. |
|
|
97
|
-
|
|
98
|
-
### Verdict states (`qualityReviewVerdict`)
|
|
99
|
-
|
|
100
|
-
- `'annotated'` — every finding is structured. May be reviewer-emitted (with
|
|
101
|
-
numeric `annotatorConfidence`) or deterministic-fallback (with
|
|
102
|
-
`annotatorConfidence: null`). The route ALWAYS reaches `'annotated'` unless
|
|
103
|
-
the reviewer call itself fails transport.
|
|
104
|
-
- `'error'` — only when the reviewer call fails transport (network / 5xx).
|
|
105
|
-
|
|
106
|
-
### Recommended rendering by the main agent
|
|
107
|
-
|
|
108
|
-
1. Show ALL findings — never silently drop. Confidence and grounding are
|
|
109
|
-
soft signals, not gates.
|
|
110
|
-
2. Default sort: severity (critical → low) then `annotatorConfidence` desc
|
|
111
|
-
(nulls last).
|
|
112
|
-
3. `severity` is the reviewer's authoritative final value — use it directly.
|
|
113
|
-
4. Mark findings with `evidenceGrounded: false` or
|
|
114
|
-
`annotatorConfidence < 70` as "lower-trust" (collapsed section, lighter
|
|
115
|
-
color, or `(low confidence)` annotation). User decides what to do.
|
|
116
|
-
5. Severity-tier counts feed the dashboard via V3 `findingsBySeverity`.
|
|
117
|
-
|
|
118
|
-
@include _shared/budget-defaults.md
|
|
119
|
-
|
|
120
|
-
## Best practices
|
|
121
|
-
|
|
122
|
-
This skill is one step in the larger flow described in `multi-model-agent` → "Best practices". Recipes that involve `mma-verify`:
|
|
123
|
-
|
|
124
|
-
- **Recipe B — Debug-fix-verify.** `mma-debug` → `mma-delegate` (fix) → `mma-verify`. Verify checks acceptance criteria against the implemented work. Reuse the context block registered for the debug call.
|
|
125
|
-
|
|
126
|
-
Anti-pattern alert: **`parallel-rounds-same-target`** (AP1, verify analog). Two parallel `mma-verify` calls on the unchanged checklist re-flag the same gaps. Run verify → fix → re-verify sequentially instead.
|
|
127
|
-
|
|
128
|
-
## Common pitfalls
|
|
129
|
-
|
|
130
|
-
❌ **Vague checklist items**
|
|
131
|
-
> "Code is good"
|
|
132
|
-
|
|
133
|
-
The worker can't gather evidence for "good". **Fix:** specific, falsifiable criteria — `"Function parseConfig has at least 3 unit tests covering: missing field, malformed JSON, empty file"`.
|
|
134
|
-
|
|
135
|
-
❌ **Verifying without `filePaths`**
|
|
136
|
-
Worker has nothing to read; verdict is speculative. **Fix:** always pass the file(s) the work landed in.
|
|
137
|
-
|
|
138
|
-
❌ **Treating verify as the implementation step**
|
|
139
|
-
Verify CHECKS work; it doesn't DO work. If a checklist item fails, dispatch `mma-delegate` to fix it, then re-verify.
|
|
140
|
-
|
|
141
|
-
❌ **Skipping verify because "tests pass"**
|
|
142
|
-
Tests verify the test cases that exist. Verify checks the acceptance criteria — which often include things tests don't (docs updated, no debug-print left, etc.).
|
|
143
|
-
|
|
144
|
-
## Terminal context block
|
|
145
|
-
|
|
146
|
-
Every completed task automatically registers a terminal markdown context block containing the full task report (headline, checklist item verdicts, and annotated findings). The `blockId` is returned in each task result as `terminalBlockId`. This block is immutable, lives for the session duration, and counts against the project's `maxEntries` quota (default 500).
|
|
147
|
-
|
|
148
|
-
**Use cases:**
|
|
149
|
-
- Pass verification results to a downstream `mma-delegate` fix step
|
|
150
|
-
- Feed verify findings into a re-verify round after fixes are applied
|
|
151
|
-
- Accumulate evidence across iterative verify-fix-verify cycles
|
|
152
|
-
|
|
153
|
-
The block is registered server-side at task completion; no caller action is needed to create it. Delete it explicitly via `DELETE /context-blocks/:id` when no longer needed, or let it expire on session teardown.
|
|
154
|
-
|
|
155
|
-
@include _shared/error-handling.md
|