@zhixuan92/multi-model-agent-mcp 0.3.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -28
- package/dist/cli.d.ts +19 -17
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +233 -81
- package/dist/cli.js.map +1 -1
- package/dist/headline.d.ts +30 -0
- package/dist/headline.d.ts.map +1 -0
- package/dist/headline.js +73 -0
- package/dist/headline.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/routing/render-provider-routing-matrix.d.ts +1 -1
- package/dist/routing/render-provider-routing-matrix.d.ts.map +1 -1
- package/dist/routing/render-provider-routing-matrix.js +37 -65
- package/dist/routing/render-provider-routing-matrix.js.map +1 -1
- package/dist/tools/audit-document.d.ts +19 -0
- package/dist/tools/audit-document.d.ts.map +1 -0
- package/dist/tools/audit-document.js +30 -0
- package/dist/tools/audit-document.js.map +1 -0
- package/dist/tools/debug-task.d.ts +15 -0
- package/dist/tools/debug-task.d.ts.map +1 -0
- package/dist/tools/debug-task.js +33 -0
- package/dist/tools/debug-task.js.map +1 -0
- package/dist/tools/execute-plan-task.d.ts +26 -0
- package/dist/tools/execute-plan-task.d.ts.map +1 -0
- package/dist/tools/execute-plan-task.js +49 -0
- package/dist/tools/execute-plan-task.js.map +1 -0
- package/dist/tools/review-code.d.ts +19 -0
- package/dist/tools/review-code.d.ts.map +1 -0
- package/dist/tools/review-code.js +31 -0
- package/dist/tools/review-code.js.map +1 -0
- package/dist/tools/verify-work.d.ts +14 -0
- package/dist/tools/verify-work.d.ts.map +1 -0
- package/dist/tools/verify-work.js +32 -0
- package/dist/tools/verify-work.js.map +1 -0
- package/package.json +22 -2
package/README.md
CHANGED
|
@@ -2,16 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
**Delegate work from your expensive parent-session model to a fleet of cheaper sub-agents, in parallel, from a single MCP tool call.**
|
|
4
4
|
|
|
5
|
-
This is the MCP stdio server for [`multi-model-agent`](https://github.com/zhixuan312/multi-model-agent). Your MCP client (Claude Code, Claude Desktop, Codex CLI, Cursor, …) spawns it on demand and gets
|
|
5
|
+
This is the MCP stdio server for [`multi-model-agent`](https://github.com/zhixuan312/multi-model-agent). Your MCP client (Claude Code, Claude Desktop, Codex CLI, Cursor, …) spawns it on demand and gets nine tools: `delegate_tasks`, `register_context_block`, `retry_tasks`, `get_batch_slice`, `execute_plan_task`, `audit_document`, `debug_task`, `review_code`, and `verify_work`. Each `delegate_tasks` call runs the supplied tasks in parallel across the agents you configured, auto-routing each to the cheapest one that has the required capabilities and agent type — or pinning to a specific agent when you want control. Every response envelope carries a pre-computed `headline` field so the calling agent can narrate the ROI story in one line without any arithmetic.
|
|
6
6
|
|
|
7
7
|
## Why use it
|
|
8
8
|
|
|
9
|
-
- **Cut cost and context.** Mechanical work (file edits, search, doc lookups) runs on cheap
|
|
9
|
+
- **Cut cost and context.** Mechanical work (file edits, search, doc lookups) runs on cheap agents in a clean worker context. Your parent session's window stays lean and its judgment unblocked.
|
|
10
10
|
- **Run tasks in parallel.** Independent tasks in one call execute concurrently; wall-clock time drops with task count.
|
|
11
|
-
- **Mix
|
|
12
|
-
- **Auto-route and escalate.** Capability filter →
|
|
11
|
+
- **Mix agents in one config.** Claude, Codex, and any OpenAI-compatible endpoint (MiniMax, DeepSeek, Groq, local vLLM, …) live side-by-side.
|
|
12
|
+
- **Auto-route and escalate.** Capability filter → agent type routing; on failure the chain is walked automatically, stopping at the first success.
|
|
13
13
|
- **No bare failures.** Every termination path (incomplete, max_turns, timeout, error) populates `output` from the runner's scratchpad.
|
|
14
14
|
- **Sandboxed by default.** `cwd-only` file tool confinement and shell-disabled by default. Opt out per-task only when needed.
|
|
15
|
+
- **Pre-computed ROI headline**: every `delegate_tasks` response carries a `headline` field — a one-line summary of tasks, success rate, wall-clock, serial savings, cost, and ROI. Quote it verbatim; no arithmetic required.
|
|
15
16
|
- **Visible ROI.** Every response surfaces `aggregateCost`, `timings`, and per-task `savedCostUSD` for delegation savings.
|
|
16
17
|
|
|
17
18
|
## How it works
|
|
@@ -26,24 +27,16 @@ Create `~/.multi-model/config.json`:
|
|
|
26
27
|
|
|
27
28
|
```json
|
|
28
29
|
{
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"type": "
|
|
30
|
+
"agents": {
|
|
31
|
+
"standard": {
|
|
32
|
+
"type": "openai-compatible",
|
|
32
33
|
"model": "claude-sonnet-4-6",
|
|
33
|
-
"
|
|
34
|
-
},
|
|
35
|
-
"codex": {
|
|
36
|
-
"type": "codex",
|
|
37
|
-
"model": "gpt-5-codex",
|
|
38
|
-
"costTier": "medium"
|
|
34
|
+
"baseUrl": "https://api.claude.ai/v1"
|
|
39
35
|
},
|
|
40
|
-
"
|
|
36
|
+
"complex": {
|
|
41
37
|
"type": "openai-compatible",
|
|
42
|
-
"model": "
|
|
43
|
-
"baseUrl": "https://api.
|
|
44
|
-
"apiKeyEnv": "MINIMAX_API_KEY",
|
|
45
|
-
"costTier": "free",
|
|
46
|
-
"hostedTools": ["web_search"]
|
|
38
|
+
"model": "claude-opus-4-6",
|
|
39
|
+
"baseUrl": "https://api.claude.ai/v1"
|
|
47
40
|
}
|
|
48
41
|
},
|
|
49
42
|
"defaults": {
|
|
@@ -56,11 +49,10 @@ Create `~/.multi-model/config.json`:
|
|
|
56
49
|
|
|
57
50
|
Config lookup order: `--config <path>` → `MULTI_MODEL_CONFIG` env var → `~/.multi-model/config.json`.
|
|
58
51
|
|
|
59
|
-
|
|
52
|
+
Agent auth:
|
|
60
53
|
|
|
61
|
-
-
|
|
62
|
-
-
|
|
63
|
-
- **`openai-compatible`** uses `apiKeyEnv` (preferred) or inline `apiKey`
|
|
54
|
+
- **OpenAI-compatible** agents use `apiKeyEnv` (preferred) or inline `apiKey`
|
|
55
|
+
- **Claude** agents use `ANTHROPIC_API_KEY` if set, otherwise the local Claude auth flow
|
|
64
56
|
|
|
65
57
|
## Setup
|
|
66
58
|
|
|
@@ -72,7 +64,7 @@ One command — the client will spawn the server on demand. Use `-s user` so the
|
|
|
72
64
|
claude mcp add multi-model-agent -s user -- npx -y @zhixuan92/multi-model-agent-mcp serve
|
|
73
65
|
```
|
|
74
66
|
|
|
75
|
-
If your
|
|
67
|
+
If your agents need environment variables:
|
|
76
68
|
|
|
77
69
|
```bash
|
|
78
70
|
claude mcp add multi-model-agent -s user \
|
|
@@ -99,7 +91,7 @@ ANTHROPIC_API_KEY = "sk-ant-..."
|
|
|
99
91
|
MINIMAX_API_KEY = "..."
|
|
100
92
|
```
|
|
101
93
|
|
|
102
|
-
Only set the env keys for the
|
|
94
|
+
Only set the env keys for the agents you actually configured. If you use `codex login`, the `codex` agent inside `multi-model-agent` reuses that auth automatically — but Claude, MiniMax, and other API-key agents still need to be passed through `[mcp_servers.multi-model-agent.env]` because the spawned MCP process does not inherit your shell environment. Restart `codex` after editing the file.
|
|
103
95
|
|
|
104
96
|
### Claude Desktop
|
|
105
97
|
|
|
@@ -145,7 +137,7 @@ args = ["-y", "@zhixuan92/multi-model-agent-mcp@0.3.0", "serve"]
|
|
|
145
137
|
|
|
146
138
|
## Recommended: delegation rule for Claude Code
|
|
147
139
|
|
|
148
|
-
Claude Code's native `Task` / `Agent` subagents inherit your parent session's expensive model and eat its context window. We ship a drop-in rule file that teaches Claude Code **when** to delegate work through `delegate_tasks` instead — mechanical edits go to free
|
|
140
|
+
Claude Code's native `Task` / `Agent` subagents inherit your parent session's expensive model and eat its context window. We ship a drop-in rule file that teaches Claude Code **when** to delegate work through `delegate_tasks` instead — mechanical edits go to free agents, reasoning-tier work escalates only when needed, and independent tasks run in parallel.
|
|
149
141
|
|
|
150
142
|
Install globally:
|
|
151
143
|
|
|
@@ -190,12 +182,22 @@ Accepts an array of tasks and runs them concurrently. Auto-routes each task by c
|
|
|
190
182
|
}
|
|
191
183
|
```
|
|
192
184
|
|
|
193
|
-
Per-task fields: `prompt`, `tier`, `requiredCapabilities`, `provider?`, `tools?`, `maxTurns?`, `timeoutMs?`, `cwd?`, `effort?`, `sandboxPolicy?`, `contextBlockIds?`, `expectedCoverage?`, `includeProgressTrace?`, `parentModel?`.
|
|
185
|
+
Per-task fields: `prompt`, `tier`, `requiredCapabilities`, `provider?`, `tools?`, `maxTurns?`, `timeoutMs?`, `cwd?`, `effort?`, `sandboxPolicy?`, `contextBlockIds?`, `expectedCoverage?`, `includeProgressTrace?`, `parentModel?`, `skipCompletionHeuristic?`.
|
|
194
186
|
|
|
195
|
-
`expectedCoverage` supports `minSections?`, `sectionPattern?`, and `requiredMarkers?`. `includeProgressTrace` opts a task into returning its bounded post-hoc progress trace. `parentModel` lets the server estimate `savedCostUSD` relative to the calling model.
|
|
187
|
+
`expectedCoverage` supports `minSections?`, `sectionPattern?`, and `requiredMarkers?`. `includeProgressTrace` opts a task into returning its bounded post-hoc progress trace. `parentModel` lets the server estimate `savedCostUSD` relative to the calling model. `skipCompletionHeuristic: true` disables the short-output completion heuristic in the runner's supervision layer — use for tight-format outputs (single-line verdicts, CSV rows, opaque identifiers) that don't follow prose conventions. The `empty` and `thinking_only` degeneracy checks still fire independently. If you also set `expectedCoverage`, the coverage contract is authoritative and the short-output heuristic is automatically skipped on coverage pass — you don't need both.
|
|
196
188
|
|
|
197
189
|
Capabilities: `file_read`, `file_write`, `grep`, `glob`, `shell`, `web_search`, `web_fetch`.
|
|
198
190
|
|
|
191
|
+
### ROI headline
|
|
192
|
+
|
|
193
|
+
Every `delegate_tasks` response envelope — both `full` mode and `summary` mode — carries a pre-computed `headline` field: a one-line summary of tasks / success rate / wall-clock / serial-savings / actual cost / saved cost / ROI multiplier (when a single baseline is declared). The calling agent is expected to quote it verbatim to the user after every dispatch, with no arithmetic. Example:
|
|
194
|
+
|
|
195
|
+
> *"11 tasks, 5/11 ok (45.5%), wall 5m 54s, saved ~18m 30s vs serial, $1.37 actual / $8.91 saved vs claude-opus-4-6 (7.5x ROI)"*
|
|
196
|
+
|
|
197
|
+
When a batch declares mixed parent models across its tasks, the ROI multiplier is suppressed (because a single ratio across different baselines is not coherent) and the cost clause reads `$X actual / $Y saved vs multiple baselines`. When no `parentModel` is declared, the cost clause collapses to `$X actual`.
|
|
198
|
+
|
|
199
|
+
If the primary response came back via summary mode or a client-side limit obscured the envelope, call `get_batch_slice({ batchId, slice: 'telemetry' })` — it returns the same `headline` plus the envelope with a ~600-byte header and ~200 bytes per task in `results[]`. A typical 10–30-task batch comes back at 2–7 KB, well under the client's tool-result size limit; very large batches (100+ tasks) scale linearly and may approach the limit.
|
|
200
|
+
|
|
199
201
|
## Security
|
|
200
202
|
|
|
201
203
|
### Sandbox enforcement
|
package/dist/cli.d.ts
CHANGED
|
@@ -8,25 +8,11 @@ export declare function computeTimings(wallClockMs: number, results: RunResult[]
|
|
|
8
8
|
export declare function computeBatchProgress(results: RunResult[]): BatchProgress;
|
|
9
9
|
export declare function computeAggregateCost(results: RunResult[]): BatchAggregateCost;
|
|
10
10
|
export declare const SERVER_VERSION: string;
|
|
11
|
-
export declare function buildTaskSchema(
|
|
11
|
+
export declare function buildTaskSchema(availableAgents: [string, ...string[]]): z.ZodObject<{
|
|
12
12
|
prompt: z.ZodString;
|
|
13
|
-
|
|
13
|
+
agentType: z.ZodOptional<z.ZodEnum<{
|
|
14
14
|
[x: string]: string;
|
|
15
15
|
}>>;
|
|
16
|
-
tier: z.ZodEnum<{
|
|
17
|
-
reasoning: "reasoning";
|
|
18
|
-
standard: "standard";
|
|
19
|
-
trivial: "trivial";
|
|
20
|
-
}>;
|
|
21
|
-
requiredCapabilities: z.ZodArray<z.ZodEnum<{
|
|
22
|
-
file_read: "file_read";
|
|
23
|
-
file_write: "file_write";
|
|
24
|
-
grep: "grep";
|
|
25
|
-
glob: "glob";
|
|
26
|
-
shell: "shell";
|
|
27
|
-
web_search: "web_search";
|
|
28
|
-
web_fetch: "web_fetch";
|
|
29
|
-
}>>;
|
|
30
16
|
tools: z.ZodOptional<z.ZodEnum<{
|
|
31
17
|
full: "full";
|
|
32
18
|
none: "none";
|
|
@@ -44,14 +30,28 @@ export declare function buildTaskSchema(availableProviders: [string, ...string[]
|
|
|
44
30
|
none: "none";
|
|
45
31
|
"cwd-only": "cwd-only";
|
|
46
32
|
}>>;
|
|
33
|
+
requiredCapabilities: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
47
34
|
contextBlockIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
48
35
|
expectedCoverage: z.ZodOptional<z.ZodObject<{
|
|
49
36
|
minSections: z.ZodOptional<z.ZodNumber>;
|
|
50
37
|
sectionPattern: z.ZodOptional<z.ZodString>;
|
|
51
38
|
requiredMarkers: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
52
39
|
}, z.core.$strip>>;
|
|
53
|
-
|
|
40
|
+
skipCompletionHeuristic: z.ZodOptional<z.ZodBoolean>;
|
|
54
41
|
parentModel: z.ZodOptional<z.ZodString>;
|
|
42
|
+
maxCostUSD: z.ZodOptional<z.ZodNumber>;
|
|
43
|
+
reviewPolicy: z.ZodOptional<z.ZodEnum<{
|
|
44
|
+
full: "full";
|
|
45
|
+
spec_only: "spec_only";
|
|
46
|
+
off: "off";
|
|
47
|
+
}>>;
|
|
48
|
+
maxReviewRounds: z.ZodOptional<z.ZodNumber>;
|
|
49
|
+
briefQualityPolicy: z.ZodOptional<z.ZodEnum<{
|
|
50
|
+
off: "off";
|
|
51
|
+
normalize: "normalize";
|
|
52
|
+
strict: "strict";
|
|
53
|
+
warn: "warn";
|
|
54
|
+
}>>;
|
|
55
55
|
}, z.core.$strip>;
|
|
56
56
|
export declare function buildMcpServer(config: Parameters<typeof runTasks>[1], options?: {
|
|
57
57
|
/** Character threshold that triggers auto-switch from 'full' to
|
|
@@ -61,6 +61,8 @@ export declare function buildMcpServer(config: Parameters<typeof runTasks>[1], o
|
|
|
61
61
|
* MULTI_MODEL_LARGE_RESPONSE_THRESHOLD_CHARS > config file
|
|
62
62
|
* defaults.largeResponseThresholdChars > this option > default. */
|
|
63
63
|
largeResponseThresholdChars?: number;
|
|
64
|
+
/** Internal test-only hook for injecting a stubbed runTasks implementation. */
|
|
65
|
+
_testRunTasksOverride?: typeof runTasks;
|
|
64
66
|
}): McpServer;
|
|
65
67
|
/**
|
|
66
68
|
* MCP CLI config discovery (owned by MCP, not core):
|
package/dist/cli.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAQA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AAEpE,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB,OAAO,EAAE,QAAQ,EAAE,MAAM,6CAA6C,CAAC;AAEvE,OAAO,KAAK,EACV,gBAAgB,EAGhB,SAAS,EACT,YAAY,EACZ,aAAa,EACb,kBAAkB,
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAQA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AAEpE,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB,OAAO,EAAE,QAAQ,EAAE,MAAM,6CAA6C,CAAC;AAEvE,OAAO,KAAK,EACV,gBAAgB,EAGhB,SAAS,EACT,YAAY,EACZ,aAAa,EACb,kBAAkB,EAEnB,MAAM,mCAAmC,CAAC;AAS3C,eAAO,MAAM,WAAW,sBAAsB,CAAC;AAc/C,wBAAgB,cAAc,CAAC,WAAW,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,YAAY,CAItF;AAED,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,SAAS,EAAE,GAAG,aAAa,CAgBxE;AAED,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,SAAS,EAAE,GAAG,kBAAkB,CAiB7E;AA4GD,eAAO,MAAM,cAAc,QAAc,CAAC;AAE1C,wBAAgB,eAAe,CAAC,eAAe,EAAE,CAAC,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kBA4HrE;AAkCD,wBAAgB,cAAc,CAC5B,MAAM,EAAE,UAAU,CAAC,OAAO,QAAQ,CAAC,CAAC,CAAC,CAAC,EACtC,OAAO,CAAC,EAAE;IACR;;;;;wEAKoE;IACpE,2BAA2B,CAAC,EAAE,MAAM,CAAC;IACrC,+EAA+E;IAC/E,qBAAqB,CAAC,EAAE,OAAO,QAAQ,CAAC;CACzC,aA6cF;AAED;;;;;GAKG;AACH,wBAAsB,cAAc,IAAI,OAAO,CAAC,gBAAgB,CAAC,CA4BhE"}
|