@heretyc/subagent-mcp 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,908 @@
1
+ #!/usr/bin/env node
2
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
+ import { z } from "zod";
5
+ import { spawn, execSync } from "child_process";
6
+ import { unlinkSync, existsSync, realpathSync } from "node:fs";
7
+ import { randomUUID } from "crypto";
8
+ import { isAbsolute, basename } from "node:path";
9
+ import { pathToFileURL } from "url";
10
+ import { buildCommand } from "./effort.js";
11
+ import { resolveExeFor } from "./platform.js";
12
+ import { formatLocalIso, selectUnreported } from "./wait-helpers.js";
13
+ import { computeStatusTransition, buildLivenessFields, } from "./status-helpers.js";
14
+ import { extractFinalTurn } from "./output-helpers.js";
15
+ import { consumeStreamChunk, flushStream, retainLastN, } from "./stream-helpers.js";
16
+ import { loadRoutingTable, buildCandidates, validatePresence, TASK_CATEGORIES, AUTO_HINT, SPLIT_HINT, } from "./routing.js";
17
+ import { createDeadlockWindow } from "./deadlock.js";
18
+ import { createRulesetGate, RULESET_HARD_FAIL_MSG, } from "./ruleset.js";
19
+ import * as orchestrationMarker from "./orchestration/marker.js";
20
+ const agents = new Map();
21
+ const MAX_CLAUDE = 5;
22
+ const MAX_CODEX = 5;
23
+ const deadlockWindow = createDeadlockWindow();
24
+ // Advanced-ruleset gate: per-process latch with exactly the deadlock-window
25
+ // scoping. The env-check runs lazily at the FIRST launch_agent call; success
26
+ // latches enabled/disabled for the process lifetime, failure never latches.
27
+ const rulesetGate = createRulesetGate();
28
+ // Post-spawn grace window (ms). A child that exits within this window after a
29
+ // successful spawn never launched (codex installed but not logged in, expired
30
+ // auth, instant crash) — the attempt loop silently advances instead of falsely
31
+ // reporting success. ANY exit within the window counts, even code 0, EXCEPT a
32
+ // codex child already finalized by its turn.completed marker (legitimate fast
33
+ // completion). SUBAGENT_SPAWN_GRACE_MS overrides (non-negative int; 0 disables
34
+ // detection = legacy spawn-event-only success) — a test seam; production never
35
+ // sets it.
36
+ const SPAWN_GRACE_MS = (() => {
37
+ const raw = process.env.SUBAGENT_SPAWN_GRACE_MS;
38
+ if (raw === undefined || raw === "")
39
+ return 1500;
40
+ const parsed = Number.parseInt(raw, 10);
41
+ return Number.isInteger(parsed) && parsed >= 0 ? parsed : 1500;
42
+ })();
43
+ // TASK_CATEGORIES, AUTO_HINT, SPLIT_HINT, and validatePresence are the pure,
44
+ // side-effect-free presence layer — defined in ./routing.js and imported above
45
+ // so the handler-validation test can exercise them without importing this entry
46
+ // module (which would open the stdio transport).
47
+ // Caveman self-classification gloss for the task_category param (tool-description.md).
48
+ const TASK_CATEGORY_GLOSS = "REQUIRED. Task shape -> routing category (the server picks the best model for it). Pick ONE: math_proof: proof/derivation/formally-checkable result; deductive step-validity under axioms; verified by a proof-checker not tests. security_review: security verdict/threat-assessment/demonstrated exploit; adversarial reasoning over attack surface — vuln, auth/authz, crypto, exploitability. debugging: verified fix/root-cause; ONLY with an observed failure (error, crash, red test, regression, flake); done when the symptom is resolved. quality_review: evaluative verdict on an existing NON-security artifact with NO observed failure; review diff/PR, compare A-vs-B, validate-vs-spec; never self-review. architecture: cross-module design/plan, NO code, NO execution loop; system structure, interface/migration strategy, decompose-into-tasks; >2 files or public API. agentic_execution: reach a target end-state by iterating in a mutating env (act/observe/adapt loop); run/deploy/provision/browse, tool/function-call, iterate-until-tests-pass. data_analysis: empirical finding/model ABOUT a structured dataset; query/SQL/dataframe answer, statistic, fit-model-report-drivers; the finding is the deliverable even if code runs. coding: bounded runnable code artifact, one-pass; implement function/module/feature/script, write tests, single-module refactor; compiles/passes tests. knowledge_synthesis: novel integrated prose over sources; synthesize/summarize/translate/draft/explain-across-files; judged by faithfulness/coherence not exact-match. mechanical: deterministic single-pass transform/leaf op, exact-match checkable; find/grep/list/rename/reformat/convert/extract-to-fixed-schema; minimal reasoning. fallback_default: no category fits with confidence (under-specified/mixed/tied); read-only sentinel — PREFER splitting the work into smaller atomic steps that each map to one category.";
49
+ function errorResult(text) {
50
+ return { content: [{ type: "text", text }], isError: true };
51
+ }
52
+ const isWindows = process.platform === "win32";
53
+ let _npmPrefix = null;
54
+ function getNpmPrefix() {
55
+ if (!_npmPrefix) {
56
+ _npmPrefix = execSync("npm prefix -g", { encoding: "utf-8" }).trim();
57
+ }
58
+ return _npmPrefix;
59
+ }
60
+ function resolveExe(provider) {
61
+ return resolveExeFor(provider, process.platform, { existsSync, npmPrefix: getNpmPrefix });
62
+ }
63
+ function cleanupUcSettings(agentState) {
64
+ if (agentState.ucSettingsPath) {
65
+ try {
66
+ if (existsSync(agentState.ucSettingsPath)) {
67
+ unlinkSync(agentState.ucSettingsPath);
68
+ }
69
+ }
70
+ catch { }
71
+ agentState.ucSettingsPath = undefined;
72
+ }
73
+ }
74
+ // Concurrency cap accounting: only `processing` agents count against a
75
+ // provider's cap. `stalled` agents (live but quiet past the heartbeat window) do
76
+ // NOT count, freeing a slot while they idle.
77
+ function countProcessing(provider) {
78
+ let count = 0;
79
+ for (const a of agents.values()) {
80
+ if (a.provider === provider && a.status === "processing")
81
+ count++;
82
+ }
83
+ return count;
84
+ }
85
+ // Synchronously reconcile a single agent's status against the pure transition
86
+ // helper. Folds the live process exitCode into AgentState first so an already-
87
+ // exited process is reported as completed/failed immediately (no monitor lag).
88
+ function reconcileAgent(agent, now) {
89
+ if ((agent.status === "processing" || agent.status === "stalled") &&
90
+ agent.process.exitCode !== null) {
91
+ agent.exitCode = agent.process.exitCode;
92
+ }
93
+ const next = computeStatusTransition({
94
+ status: agent.status,
95
+ exitCode: agent.exitCode,
96
+ lastActivity: agent.lastActivity,
97
+ now,
98
+ exitedAt: agent.exitedAt,
99
+ });
100
+ agent.status = next.status;
101
+ agent.exitedAt = next.exitedAt;
102
+ }
103
+ // .unref() so this background reconcile timer never keeps the event loop alive
104
+ // on its own — the process (and any test importing this module) can exit cleanly.
105
+ const reconcileInterval = setInterval(() => {
106
+ const now = Date.now();
107
+ for (const agent of agents.values()) {
108
+ reconcileAgent(agent, now);
109
+ }
110
+ }, 10000);
111
+ reconcileInterval.unref();
112
+ // Heavy operating-model + governance guidance for ORCHESTRATION MODE. Carried in
113
+ // the MCP server `instructions` field so a connecting host reads it ONCE at
114
+ // initialize (per the MCP spec the initialize result has an `instructions`
115
+ // field) rather than re-injecting it on every turn. The bundled per-turn hook
116
+ // injects only a small compact reminder; this is the durable, full explanation.
117
+ const ORCHESTRATION_INSTRUCTIONS = "ORCHESTRATION MODE (orchestration-mode tool). WHAT: per-project toggle for LONG-HORIZON work that would fill the context window if run inline. OPERATING MODEL: when ON, act as a workflow orchestrator, delegate-default — decompose and delegate/offload by default. INLINE BY RIGHT: steps bound to main-session-only capability may stay inline (MCP tools sub-agents can't inherit, interactive/consent tools, tight verify loops); state which and why. MUST DELEGATE/OFFLOAD: pure compute and any payload >50KB or >200 lines go via temp scratch-file path handoff (%TEMP% on Windows, /tmp on POSIX); keep the orchestrator context lean. CONFLICT ORDER: safety-scope > user instruction this turn > delegate-default. A user tool-pin re-partitions work; it does not suspend mode. PERSISTENCE: enabling writes a per-project marker that PERSISTS across restarts/sessions until disabled with explicit user permission (does NOT reset on a new session). CARRYOVER: if mode was already ON at session start (inherited), the bundled hook prepends a CARRYOVER notice ONCE per marker; you MUST tell the user it auto-activated, ask whether to keep it ON, and advise whether it fits this session's request. DISABLE: never disable on your own initiative — only with EXPLICIT user permission. You MAY propose disabling when task fit is wrong (bounded, interactive, or MCP-bound). Either way, first explain WHAT mode is and WHY, then request permission via the provider tool: AskUserQuestion on Claude, request-user-input on Codex. Only explicit approval may call orchestration-mode enabled:false; if declined, continue under inline-by-right, ask once per topic, never re-nag. Per-turn injection fires only in CLI hosts loading the bundled hook; desktop hosts toggle the marker but inject nothing (documented degradation).";
118
+ const server = new McpServer({
119
+ name: "subagent-mcp",
120
+ version: "2.3.9",
121
+ description: "Spawns the LOCALLY INSTALLED `claude` and `codex` CLI binaries as child processes. Does NOT call the Anthropic or OpenAI HTTP APIs directly (no API keys, no SDK) and there are no plans to — all model access is via the local CLIs.",
122
+ }, {
123
+ instructions: ORCHESTRATION_INSTRUCTIONS,
124
+ });
125
+ // Best-effort removal of a candidate's temp ultracode settings file after a
126
+ // LAUNCH-TIME failure (the agentState is never registered, so the close handler
127
+ // will not run cleanupUcSettings for it).
128
+ function cleanupUcSettingsPath(ucSettingsPath) {
129
+ if (!ucSettingsPath)
130
+ return;
131
+ try {
132
+ if (existsSync(ucSettingsPath))
133
+ unlinkSync(ucSettingsPath);
134
+ }
135
+ catch { }
136
+ }
137
+ // Attempt to spawn + register a single candidate. Resolves to the agent_id on a
138
+ // successful spawn, or a launch-time failure reason string (never throws/rejects).
139
+ //
140
+ // spawn() failures (ENOENT/EACCES) are ASYNC: a missing/broken CLI emits the
141
+ // child 'error' event AFTER spawn() returns, so a try/catch around spawn cannot
142
+ // see it. We therefore: (a) fast-fail when the resolved exe path does not exist;
143
+ // (b) attach an 'error' handler immediately and AWAIT a one-shot 'spawn' vs
144
+ // 'error' race. Only on the 'spawn' win do we register the agent. A persistent
145
+ // 'error' handler stays attached so a LATE spawn error can never crash the
146
+ // process. Any launch-time failure cleans up and is reported so the attempt loop
147
+ // silently advances to the next candidate.
148
+ async function tryLaunchCandidate(candidate, prompt, agentCwd, routingTier, rulesetInfo) {
149
+ // Concurrency cap for this provider.
150
+ const running = countProcessing(candidate.provider);
151
+ const max = candidate.provider === "claude" ? MAX_CLAUDE : MAX_CODEX;
152
+ if (running >= max) {
153
+ return {
154
+ reason: `Maximum ${max} concurrent ${candidate.provider} agents already running. Current: ${running}`,
155
+ };
156
+ }
157
+ // Build the command. haiku ignores effort; pass "high" placeholder for the
158
+ // "none" sentinel (buildCommand drops it for haiku anyway).
159
+ const effortForBuild = candidate.effort === "none" ? "high" : candidate.effort;
160
+ let buildResult;
161
+ let cmd;
162
+ try {
163
+ buildResult = buildCommand(candidate.provider, candidate.model, effortForBuild, prompt, agentCwd);
164
+ cmd = resolveExe(candidate.provider);
165
+ }
166
+ catch (e) {
167
+ return { reason: e instanceof Error ? e.message : String(e) };
168
+ }
169
+ // Fast-fail absolute paths only. Bare names intentionally rely on PATH; spawn
170
+ // below resolves them and reports ENOENT/EACCES through the same failure path.
171
+ if (isAbsolute(cmd) && !existsSync(cmd)) {
172
+ cleanupUcSettingsPath(buildResult.ucSettingsPath);
173
+ return { reason: `CLI executable not found: ${cmd}` };
174
+ }
175
+ const stdinMode = candidate.provider === "claude" ? "pipe" : "ignore";
176
+ let childProcess;
177
+ try {
178
+ childProcess = spawn(cmd, buildResult.args, {
179
+ cwd: agentCwd,
180
+ env: { ...process.env, SUBAGENT_MCP_SUBAGENT: "1" },
181
+ stdio: [stdinMode, "pipe", "pipe"],
182
+ windowsHide: true,
183
+ });
184
+ }
185
+ catch (error) {
186
+ // Synchronous spawn throw (rare) — clean up and report as a launch failure.
187
+ cleanupUcSettingsPath(buildResult.ucSettingsPath);
188
+ return { reason: error instanceof Error ? error.message : String(error) };
189
+ }
190
+ // Await the one-shot spawn/error race. The 'error' handler is attached BEFORE
191
+ // we await so an async ENOENT cannot escape as an unhandled event.
192
+ try {
193
+ await new Promise((resolve, reject) => {
194
+ const onError = (err) => reject(err);
195
+ childProcess.once("spawn", () => {
196
+ childProcess.removeListener("error", onError);
197
+ resolve();
198
+ });
199
+ childProcess.once("error", onError);
200
+ });
201
+ }
202
+ catch (err) {
203
+ // Launch-time failure (ENOENT/EACCES/etc.) — kill if somehow alive, clean up
204
+ // the settings file, and report so the attempt loop advances.
205
+ try {
206
+ childProcess.kill();
207
+ }
208
+ catch { }
209
+ cleanupUcSettingsPath(buildResult.ucSettingsPath);
210
+ return { reason: err instanceof Error ? err.message : String(err) };
211
+ }
212
+ // Spawn succeeded. Register the agent exactly as before. Keep a persistent
213
+ // 'error' handler so a LATE spawn error never crashes the process; fold it
214
+ // into stderr rather than throwing.
215
+ const agentId = randomUUID();
216
+ const now = Date.now();
217
+ const agentState = {
218
+ id: agentId,
219
+ provider: candidate.provider,
220
+ model: candidate.model,
221
+ routingTier,
222
+ ...(rulesetInfo
223
+ ? { rulesetApplied: true, rulesetOriginalSelection: rulesetInfo.originalSelection }
224
+ : {}),
225
+ status: "processing",
226
+ process: childProcess,
227
+ stdout: "",
228
+ stderr: "",
229
+ exitCode: null,
230
+ exitedAt: null,
231
+ // Launch time is the initial heartbeat. Only PARSED VISIBLE provider stream
232
+ // items refresh lastActivity afterwards (see the stdout handler); raw
233
+ // stdout/stderr chunks do NOT, so `stalled` means exactly "no visible
234
+ // provider stream item for the heartbeat window".
235
+ startedAt: now,
236
+ lastActivity: now,
237
+ cwd: agentCwd,
238
+ ucSettingsPath: buildResult.ucSettingsPath,
239
+ waitReported: false,
240
+ visibleStream: [],
241
+ streamBuf: "",
242
+ };
243
+ childProcess.on("error", (err) => {
244
+ // Captured into the stderr tail for debugging. Not a visible provider stream
245
+ // item, so it does NOT refresh the heartbeat.
246
+ agentState.stderr += `\n[process error] ${err instanceof Error ? err.message : String(err)}`;
247
+ });
248
+ if (candidate.provider === "claude" && childProcess.stdin) {
249
+ // EPIPE if the child dies before draining the prompt (the grace-window
250
+ // early-exit class) — fold into stderr, never crash the server.
251
+ childProcess.stdin.on("error", (err) => {
252
+ agentState.stderr += `\n[stdin error] ${err instanceof Error ? err.message : String(err)}`;
253
+ });
254
+ childProcess.stdin.write(prompt);
255
+ childProcess.stdin.end();
256
+ }
257
+ if (childProcess.stdout) {
258
+ childProcess.stdout.on("data", (data) => {
259
+ const chunk = data.toString();
260
+ const at = Date.now();
261
+ // Buffer partial lines so a provider JSONL event split across chunks is
262
+ // never dropped. Only COMPLETE lines are parsed this call; the trailing
263
+ // fragment is carried in streamBuf until its newline arrives.
264
+ const { items, pending, lines } = consumeStreamChunk(agentState.provider, agentState.streamBuf, chunk);
265
+ agentState.streamBuf = pending;
266
+ // Accumulate all complete lines into stored stdout.
267
+ for (const line of lines) {
268
+ agentState.stdout += line + "\n";
269
+ }
270
+ if (items.length > 0) {
271
+ // Heartbeat refreshes only on parsed visible provider stream items,
272
+ // not on raw stdout bytes.
273
+ agentState.lastActivity = at;
274
+ agentState.visibleStream = retainLastN(agentState.visibleStream, items.map((it) => ({ ...it, at })), 3);
275
+ }
276
+ // Codex emits JSONL; turn.completed signals task done — kill process. Scan
277
+ // COMPLETE lines only so a marker split across chunks is matched once
278
+ // fully assembled (never on a partial fragment).
279
+ if (agentState.provider === "codex" &&
280
+ lines.some((l) => l.includes('"type":"turn.completed"'))) {
281
+ agentState.turnCompleted = true;
282
+ agentState.status = "finished";
283
+ agentState.exitCode = 0;
284
+ if (agentState.exitedAt === null)
285
+ agentState.exitedAt = at;
286
+ childProcess.kill();
287
+ }
288
+ });
289
+ }
290
+ // Capture stderr into the tail for debugging. stderr is NOT a parsed visible
291
+ // provider stream, so it does NOT refresh the heartbeat (parsed-visible only).
292
+ if (childProcess.stderr) {
293
+ childProcess.stderr.on("data", (data) => {
294
+ agentState.stderr += data.toString();
295
+ });
296
+ }
297
+ childProcess.on("close", (code) => {
298
+ // Flush any buffered trailing stdout line (final event may arrive without a
299
+ // terminating newline) so its visible item is not lost.
300
+ if (agentState.streamBuf) {
301
+ const at = Date.now();
302
+ const { items, lines } = flushStream(agentState.provider, agentState.streamBuf);
303
+ agentState.streamBuf = "";
304
+ for (const line of lines) {
305
+ agentState.stdout += line + "\n";
306
+ }
307
+ // A turn.completed marker may arrive only in this final flush (no
308
+ // trailing newline) — the grace window's success exception needs it.
309
+ if (agentState.provider === "codex" &&
310
+ lines.some((l) => l.includes('"type":"turn.completed"'))) {
311
+ agentState.turnCompleted = true;
312
+ }
313
+ if (items.length > 0) {
314
+ agentState.lastActivity = at;
315
+ agentState.visibleStream = retainLastN(agentState.visibleStream, items.map((it) => ({ ...it, at })), 3);
316
+ }
317
+ }
318
+ // Always clean up ultracode settings file on close
319
+ cleanupUcSettings(agentState);
320
+ // Always record actual close time (unless already finalized)
321
+ if (agentState.exitedAt === null)
322
+ agentState.exitedAt = Date.now();
323
+ if (agentState.status === "stopped") {
324
+ // Record real exit code but preserve "stopped" status
325
+ if (agentState.exitCode === null)
326
+ agentState.exitCode = code !== null ? code : -1;
327
+ return;
328
+ }
329
+ if (agentState.status === "finished") {
330
+ // Already finalized by turn.completed; exitedAt already stamped
331
+ return;
332
+ }
333
+ // Normal exit: set exit code and derive status
334
+ agentState.exitCode = code !== null ? code : -1;
335
+ agentState.status = code === 0 ? "finished" : "errored";
336
+ });
337
+ // Resolves after the close handler above has fully run (attach order):
338
+ // streams flushed and any final turn.completed marker scanned. Pre-created
339
+ // because 'close' can fire in the same frame as 'exit', before the grace
340
+ // race's await continuation could attach a listener.
341
+ const closedAfterFlush = new Promise((resolve) => {
342
+ childProcess.once("close", () => resolve());
343
+ });
344
+ // Post-spawn grace window: a 'spawn' win alone is NOT success — a binary that
345
+ // spawns then dies immediately (codex installed but not logged in) must
346
+ // advance the attempt loop, not falsely conclude it. AgentState is fully
347
+ // wired and the claude prompt already written above, so a surviving child
348
+ // loses no stream output during the wait. Exception: a codex child already
349
+ // finalized by its turn.completed marker (dedicated turnCompleted flag — the
350
+ // SOLE in-window success exception; visibility-and-failover.md) completed
351
+ // the task legitimately fast — that is a success, never a launch failure.
352
+ // The close handler above cleans up a condemned child (uc settings, stream
353
+ // flush); the agent is simply never registered.
354
+ if (SPAWN_GRACE_MS > 0) {
355
+ const earlyExit = await new Promise((resolve) => {
356
+ const timer = setTimeout(() => {
357
+ childProcess.removeListener("exit", onExit);
358
+ resolve(null);
359
+ }, SPAWN_GRACE_MS);
360
+ const onExit = (code, signal) => {
361
+ clearTimeout(timer);
362
+ resolve({ code, signal });
363
+ };
364
+ childProcess.once("exit", onExit);
365
+ });
366
+ if (earlyExit) {
367
+ // 'exit' can be delivered before the final stdout chunk, so wait for
368
+ // 'close' (streams drained, flush scanned) before deciding — a
369
+ // turn.completed fast completion must never be misread as a launch
370
+ // failure and the task silently re-executed on the next candidate.
371
+ await closedAfterFlush;
372
+ if (!agentState.turnCompleted) {
373
+ const tail = agentState.stderr.trim().split("\n").slice(-1)[0] ?? "";
374
+ return {
375
+ reason: `process exited (code ${earlyExit.code ?? earlyExit.signal}) within ${SPAWN_GRACE_MS}ms of spawn${tail ? `: ${tail}` : ""}`,
376
+ };
377
+ }
378
+ }
379
+ }
380
+ agents.set(agentId, agentState);
381
+ return { agentId };
382
+ }
383
+ // Order-sensitive (provider, model, effort) list equality. Detects whether the
384
+ // advanced ruleset actually ALTERED the routing decision — visibility fields
385
+ // are persisted/exposed only then (passthrough looks identical to disabled).
386
+ function sameTriples(a, b) {
387
+ if (a.length !== b.length)
388
+ return false;
389
+ return a.every((c, i) => c.provider === b[i].provider && c.model === b[i].model && c.effort === b[i].effort);
390
+ }
391
+ // Tool 1: launch_agent
392
+ server.tool("launch_agent", "Spawn a sub-agent. AUTO MODE (preferred): pass only `prompt` + `task_category`; the server picks the best provider/model/effort for that category from its routing table, launches the top candidate, and silently falls back to the next-best on launch failure. `provider`/`model`/`effort` are OPTIONAL overrides — omit for auto-selection; if you pass `model` you must also pass `provider`, and if you pass `effort` you must pass both `provider` and `model`. Unsure which task_category fits? Don't submit one amorphous task — SPLIT into atomic steps that each map to a single category, one agent per step. ultracode effort is Opus-4.8+ only (induced via a temp `--settings {\"ultracode\":true}` file; the CLI rejects `--effort ultracode`). Each sub-agent is a separate claude/codex CLI child that does NOT inherit this session's MCP servers; children run with env SUBAGENT_MCP_SUBAGENT=1 so the orchestration hooks skip them (they are not orchestrators and don't re-trigger carryover). Launch returns status `processing` (alive); a later `stalled` is alive-but-quiet (thinking or awaiting a temp-file handoff), NOT dead — wait or re-poll, don't kill (see poll_agent). DEADLOCK RULE: you MUST ALWAYS set `deadlock=true` when more than 2 launch attempts have already been made for the SAME atomic task (i.e. the 3rd attempt onward), and NEVER otherwise.", {
393
+ task_category: z.enum(TASK_CATEGORIES).describe(TASK_CATEGORY_GLOSS),
394
+ prompt: z.string().min(1),
395
+ provider: z.enum(["claude", "codex"]).optional(),
396
+ model: z.enum(["haiku", "sonnet", "opus", "opus-4-8", "gpt-5.5"]).optional(),
397
+ effort: z.enum(["low", "medium", "high", "xhigh", "max", "ultracode"]).optional(),
398
+ cwd: z.string().optional(),
399
+ deadlock: z.boolean().optional().describe("MANDATE: ALWAYS set deadlock=true when, and ONLY when, more than 2 launch attempts have already been made for the SAME atomic task — the 3rd attempt onward. NEVER set it on a 1st or 2nd attempt, NEVER for a different task, NEVER speculatively. Auto mode only: cannot be combined with provider/model/effort. Passing false is identical to omitting it."),
400
+ }, async (params) => {
401
+ const { task_category, provider, model, effort, prompt, deadlock } = params;
402
+ const agentCwd = params.cwd || process.cwd();
403
+ // 1-5. Param-presence validation (zod already constrains task_category, but
404
+ // hard-validate so the spec error text — valid list + hints, and the
405
+ // effort-before-model ordering — is what the caller sees). Pure,
406
+ // exported, and unit-tested (test/handler-validation.test.mjs).
407
+ const presenceError = validatePresence({ task_category, provider, model, effort, deadlock });
408
+ if (presenceError) {
409
+ return errorResult(presenceError);
410
+ }
411
+ // 6. Build the candidate list per mode.
412
+ const overrides = { provider, model, effort };
413
+ const isExplicit = !!(provider && model && effort);
414
+ if (!isExplicit && task_category === "fallback_default") {
415
+ return errorResult(`Error: fallback_default is a split hint sentinel, not a launchable routing-table category.\n${SPLIT_HINT}\n${AUTO_HINT}`);
416
+ }
417
+ // Arm window after all validation (including fallback_default rejection) passes.
418
+ if (deadlock === true) {
419
+ deadlockWindow.arm();
420
+ }
421
+ const pureAuto = !provider && !model && !effort;
422
+ const branch = (pureAuto && deadlockWindow.active()) ? "performance" : "cost_efficiency";
423
+ const routingTier = isExplicit ? "manual" : branch;
424
+ // explicit mode never reads the table; all other modes do.
425
+ const table = isExplicit ? null : loadRoutingTable();
426
+ if (!isExplicit && table === null) {
427
+ return errorResult(`Error: routing table not populated for ${task_category} (routing-table file missing or unreadable). Either run the model-profiler to populate it, or pass provider+model+effort explicitly for a fully-specified launch.\n${AUTO_HINT}`);
428
+ }
429
+ const result = buildCandidates(table, task_category, overrides, branch);
430
+ const mode = result.mode;
431
+ if (!isExplicit && result.noCandidates) {
432
+ let scope = "";
433
+ if (mode === "provider")
434
+ scope = ` matching provider ${provider}`;
435
+ else if (mode === "provider_model")
436
+ scope = ` matching model ${model}`;
437
+ return errorResult(`Error: routing table not populated for ${task_category} (no${scope} pairings available). Either run the model-profiler to populate it, or pass provider+model+effort explicitly.\n${AUTO_HINT}`);
438
+ }
439
+ // Advanced-ruleset hook (docs/spec/advanced-ruleset/). Env-check gate runs
440
+ // at the first launch_agent of this process (success latches for the
441
+ // process lifetime; failure NEVER latches — re-run next call so an admin
442
+ // fix recovers without a restart). When enabled, routing mode runs ONCE per
443
+ // launch_agent — in ALL selection modes, explicit included — and is never
444
+ // re-run per failover attempt: the attempt loop consumes the returned list
445
+ // verbatim. Deadlock/branch state is never exposed to the script. The
446
+ // hard-fail message deliberately carries no hints (admin must intervene).
447
+ const gateResult = await rulesetGate.ensureReady();
448
+ if (!gateResult.ok) {
449
+ return errorResult(RULESET_HARD_FAIL_MSG);
450
+ }
451
+ let candidates = result.candidates;
452
+ let rulesetApplied = false;
453
+ let rulesetOriginalSelection;
454
+ if (gateResult.active) {
455
+ const payload = {
456
+ candidates: candidates.map((c, i) => ({
457
+ provider: c.provider,
458
+ model: c.model,
459
+ effort: c.effort,
460
+ // Dense positional rank 1..N over the already-filtered list (raw
461
+ // table ranks gap after launchability filtering; explicit has none).
462
+ rank: i + 1,
463
+ })),
464
+ context: {
465
+ task_category,
466
+ cwd: agentCwd,
467
+ selection_mode: mode,
468
+ provider: provider ?? null,
469
+ model: model ?? null,
470
+ effort: effort ?? null,
471
+ },
472
+ };
473
+ const applied = await rulesetGate.applyRules(payload);
474
+ if (!applied.ok) {
475
+ return errorResult(RULESET_HARD_FAIL_MSG);
476
+ }
477
+ if (applied.candidates.length === 0) {
478
+ // Empty list = deliberate policy veto (the limit case of the allowed
479
+ // filter operation), NOT a malfunction — clean error, never the
480
+ // hard-fail message, never latched.
481
+ return errorResult(`Error: advanced ruleset returned zero candidates for task_category ${task_category}; launch vetoed by ruleset.\n${AUTO_HINT}`);
482
+ }
483
+ rulesetApplied = !sameTriples(candidates, applied.candidates);
484
+ if (rulesetApplied) {
485
+ rulesetOriginalSelection = { ...candidates[0] };
486
+ }
487
+ candidates = applied.candidates;
488
+ }
489
+ // 6. Attempt loop: best→worst. Register on first successful spawn; silently
490
+ // advance on launch-time failure. Sub-agent task outcome is NEVER a trigger.
491
+ const skipped = [];
492
+ for (const candidate of candidates) {
493
+ const outcome = await tryLaunchCandidate(candidate, prompt, agentCwd, routingTier, rulesetApplied && rulesetOriginalSelection !== undefined
494
+ ? { applied: true, originalSelection: rulesetOriginalSelection }
495
+ : undefined);
496
+ if ("agentId" in outcome) {
497
+ if (branch === "performance") {
498
+ deadlockWindow.consume();
499
+ }
500
+ return {
501
+ content: [
502
+ {
503
+ type: "text",
504
+ text: JSON.stringify({
505
+ agent_id: outcome.agentId,
506
+ status: "processing",
507
+ provider: candidate.provider,
508
+ model: candidate.model,
509
+ effort: candidate.effort,
510
+ task_category,
511
+ ...(rulesetApplied
512
+ ? {
513
+ ruleset_applied: true,
514
+ ruleset_original_selection: rulesetOriginalSelection,
515
+ }
516
+ : {}),
517
+ }),
518
+ },
519
+ ],
520
+ };
521
+ }
522
+ skipped.push({
523
+ model: candidate.model,
524
+ effort: candidate.effort,
525
+ provider: candidate.provider,
526
+ reason: outcome.reason,
527
+ });
528
+ }
529
+ // 7. All candidates failed. A ruleset-modified explicit launch may have
530
+ // attempted N≠1 candidates — only the numbered ALL_FAILED shape can
531
+ // report that, so the explicit shape is reserved for unmodified launches.
532
+ if (isExplicit && !rulesetApplied) {
533
+ const f = skipped[0];
534
+ return errorResult(`Error: explicit launch ${f.model}@${f.effort} (${f.provider}) failed: ${f.reason}.\n${AUTO_HINT}`);
535
+ }
536
+ const lines = skipped
537
+ .map((s, i) => ` ${i + 1}. ${s.model}@${s.effort} (${s.provider}): ${s.reason}`)
538
+ .join("\n");
539
+ return errorResult(`Error: all ${skipped.length} candidate launches failed for task_category ${task_category}:\n${lines}\n${SPLIT_HINT}\n${AUTO_HINT}`);
540
+ });
541
+ // Tool 2: poll_agent
542
+ server.tool("poll_agent", "Get an agent's current status and output. Status `processing` = ALIVE with visible provider activity in the last 10 minutes; `stalled` = ALIVE but no parsed visible provider stream item for 10 minutes (thinking, or awaiting a temp-file handoff) — NOT dead, so prefer `wait`/re-poll over killing. Always returns `alive` and `idle_seconds`, plus `recent_stream` (the last 3 visible provider stream items, each timestamped) and a `hint` while stalled. Pass `verbose: true` to also return `final_output`, the agent's final assistant turn extracted from its captured stdout.", {
543
+ agent_id: z.string(),
544
+ verbose: z.boolean().optional().default(false),
545
+ }, async (params) => {
546
+ const agent = agents.get(params.agent_id);
547
+ if (!agent) {
548
+ return {
549
+ content: [
550
+ {
551
+ type: "text",
552
+ text: `Error: Agent ${params.agent_id} not found`,
553
+ },
554
+ ],
555
+ isError: true,
556
+ };
557
+ }
558
+ // Reconcile exit synchronously so an already-exited process is reported as
559
+ // completed/failed immediately (no up-to-10s health-monitor lag).
560
+ const now = Date.now();
561
+ reconcileAgent(agent, now);
562
+ const stdoutTail = agent.stdout.length > 2000
563
+ ? agent.stdout.slice(-2000)
564
+ : agent.stdout;
565
+ const stderrTail = agent.stderr.length > 1000
566
+ ? agent.stderr.slice(-1000)
567
+ : agent.stderr;
568
+ const liveness = buildLivenessFields(agent.status, agent.exitCode, agent.lastActivity, now);
569
+ return {
570
+ content: [
571
+ {
572
+ type: "text",
573
+ text: JSON.stringify({
574
+ id: agent.id,
575
+ provider: agent.provider,
576
+ model: agent.model,
577
+ status: agent.status,
578
+ exit_code: agent.exitCode,
579
+ stdout_tail: stdoutTail,
580
+ stderr_tail: stderrTail,
581
+ started_at: agent.startedAt,
582
+ last_activity: agent.lastActivity,
583
+ cwd: agent.cwd,
584
+ ...liveness,
585
+ ...(agent.routingTier !== undefined ? { routing_tier: agent.routingTier } : {}),
586
+ ...(agent.rulesetApplied
587
+ ? {
588
+ ruleset_applied: true,
589
+ ruleset_original_selection: agent.rulesetOriginalSelection,
590
+ }
591
+ : {}),
592
+ recent_stream: agent.visibleStream.map((it) => ({
593
+ type: it.type,
594
+ text: it.text,
595
+ at: it.at !== undefined ? formatLocalIso(it.at) : null,
596
+ })),
597
+ ...(params.verbose
598
+ ? { final_output: extractFinalTurn(agent.provider, agent.stdout) }
599
+ : {}),
600
+ }),
601
+ },
602
+ ],
603
+ };
604
+ });
605
+ // Tool 3: kill_agent
606
+ server.tool("kill_agent", "Terminate a live agent (status `processing` or `stalled`) by immediately force-killing its managed process tree. No-op for already-terminal agents.", {
607
+ agent_id: z.string(),
608
+ }, async (params) => {
609
+ const agent = agents.get(params.agent_id);
610
+ if (!agent) {
611
+ return {
612
+ content: [
613
+ {
614
+ type: "text",
615
+ text: `Error: Agent ${params.agent_id} not found`,
616
+ },
617
+ ],
618
+ isError: true,
619
+ };
620
+ }
621
+ // Kill applies to ALL live states (processing OR stalled). A terminal agent
622
+ // (finished/errored/stopped) is a no-op.
623
+ const isLive = agent.status === "processing" || agent.status === "stalled";
624
+ if (!isLive) {
625
+ return {
626
+ content: [
627
+ {
628
+ type: "text",
629
+ text: JSON.stringify({
630
+ agent_id: agent.id,
631
+ status: agent.status,
632
+ message: `Agent is not live (status: ${agent.status})`,
633
+ }),
634
+ },
635
+ ],
636
+ };
637
+ }
638
+ try {
639
+ // Immediately force-kill the managed process tree — no graceful SIGTERM
640
+ // grace period. On Windows, taskkill /t /f tears down the whole tree; on
641
+ // POSIX, SIGKILL the process (close handler records the real exit code).
642
+ agent.status = "stopped";
643
+ if (isWindows && agent.process.pid) {
644
+ spawn("taskkill", ["/pid", String(agent.process.pid), "/t", "/f"], {
645
+ windowsHide: true,
646
+ });
647
+ }
648
+ else if (agent.process.pid) {
649
+ process.kill(agent.process.pid, "SIGKILL");
650
+ }
651
+ else {
652
+ agent.process.kill("SIGKILL");
653
+ }
654
+ return {
655
+ content: [
656
+ {
657
+ type: "text",
658
+ text: JSON.stringify({
659
+ agent_id: agent.id,
660
+ status: "stopped",
661
+ message: "Process tree force-killed",
662
+ }),
663
+ },
664
+ ],
665
+ };
666
+ }
667
+ catch (error) {
668
+ const errorMsg = error instanceof Error ? error.message : String(error);
669
+ return {
670
+ content: [
671
+ {
672
+ type: "text",
673
+ text: `Error killing agent: ${errorMsg}`,
674
+ },
675
+ ],
676
+ isError: true,
677
+ };
678
+ }
679
+ });
680
+ // Tool 4: send_message
681
+ server.tool("send_message", "Send a message to a running agent's stdin", {
682
+ agent_id: z.string(),
683
+ message: z.string().min(1),
684
+ }, async (params) => {
685
+ const agent = agents.get(params.agent_id);
686
+ if (!agent) {
687
+ return {
688
+ content: [
689
+ {
690
+ type: "text",
691
+ text: `Error: Agent ${params.agent_id} not found`,
692
+ },
693
+ ],
694
+ isError: true,
695
+ };
696
+ }
697
+ const isLive = agent.status === "processing" || agent.status === "stalled";
698
+ if (!isLive) {
699
+ return {
700
+ content: [
701
+ {
702
+ type: "text",
703
+ text: `Error: Agent is not live (status: ${agent.status})`,
704
+ },
705
+ ],
706
+ isError: true,
707
+ };
708
+ }
709
+ if (!agent.process.stdin) {
710
+ return {
711
+ content: [
712
+ {
713
+ type: "text",
714
+ text: `Error: Agent stdin is not available`,
715
+ },
716
+ ],
717
+ isError: true,
718
+ };
719
+ }
720
+ try {
721
+ agent.process.stdin.write(params.message + "\n");
722
+ agent.lastActivity = Date.now();
723
+ return {
724
+ content: [
725
+ {
726
+ type: "text",
727
+ text: JSON.stringify({
728
+ agent_id: agent.id,
729
+ status: "sent",
730
+ message: "Message written to agent stdin",
731
+ }),
732
+ },
733
+ ],
734
+ };
735
+ }
736
+ catch (error) {
737
+ const errorMsg = error instanceof Error ? error.message : String(error);
738
+ return {
739
+ content: [
740
+ {
741
+ type: "text",
742
+ text: `Error sending message: ${errorMsg}`,
743
+ },
744
+ ],
745
+ isError: true,
746
+ };
747
+ }
748
+ });
749
+ // Tool 5: list_agents
750
+ server.tool("list_agents", "List all agents with token-efficient core metrics (status, `alive`, `idle_seconds`). `stalled` is ALIVE-but-quiet, NOT dead (full status semantics on poll_agent). Use `poll_agent` for per-agent stream items, hints, and final output.", {}, async () => {
751
+ const now = Date.now();
752
+ const agentList = Array.from(agents.values()).map((agent) => {
753
+ // Reconcile exit synchronously so already-exited processes are reported
754
+ // as finished/errored immediately (no health-monitor lag).
755
+ reconcileAgent(agent, now);
756
+ // includeHint=false: the verbose stalled hint lives on poll_agent only;
757
+ // list_agents stays token-efficient.
758
+ return {
759
+ id: agent.id,
760
+ provider: agent.provider,
761
+ model: agent.model,
762
+ status: agent.status,
763
+ started_at: agent.startedAt,
764
+ last_activity: agent.lastActivity,
765
+ cwd_basename: basename(agent.cwd),
766
+ ...buildLivenessFields(agent.status, agent.exitCode, agent.lastActivity, now, false),
767
+ };
768
+ });
769
+ return {
770
+ content: [
771
+ {
772
+ type: "text",
773
+ text: JSON.stringify({ agents: agentList }),
774
+ },
775
+ ],
776
+ };
777
+ });
778
+ // Tool 6: wait
779
+ server.tool("wait", "Blocks until one or more sub-agents reach a terminal state (finished/errored/stopped), returning each one's exit code + local-time exit timestamp; or returns the live-job list after a 15-minute timeout. A `stalled` agent is still ALIVE and does NOT end the wait — only a terminal exit does. Pass `verbose: true` to add each finished agent's `final_output` (its final assistant turn, extracted from captured stdout).", {
780
+ verbose: z.boolean().optional().default(false),
781
+ }, async (params) => {
782
+ const { verbose } = params;
783
+ const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
784
+ const TIMEOUT_MS = 15 * 60 * 1000;
785
+ const deadline = Date.now() + TIMEOUT_MS;
786
+ const buildFinishedEntry = (a) => ({
787
+ id: a.id,
788
+ provider: a.provider,
789
+ model: a.model,
790
+ status: a.status,
791
+ exit_code: a.exitCode,
792
+ exited_at: formatLocalIso(a.exitedAt),
793
+ elapsed_ms: a.exitedAt - a.startedAt,
794
+ ...(verbose
795
+ ? { final_output: extractFinalTurn(a.provider, a.stdout) }
796
+ : {}),
797
+ });
798
+ const buildRunningEntry = (a, now) => ({
799
+ id: a.id,
800
+ provider: a.provider,
801
+ model: a.model,
802
+ status: a.status,
803
+ started_at_local: formatLocalIso(a.startedAt),
804
+ last_activity_local: formatLocalIso(a.lastActivity),
805
+ elapsed_ms: now - a.startedAt,
806
+ });
807
+ // Step 1: collect already-terminal unreported agents
808
+ const allAgents = Array.from(agents.values());
809
+ let unreported = selectUnreported(allAgents);
810
+ if (unreported.length > 0) {
811
+ // Mark reported synchronously before building return (single-threaded JS → atomic)
812
+ for (const a of unreported)
813
+ a.waitReported = true;
814
+ const payload = { finished: unreported.map(buildFinishedEntry) };
815
+ return {
816
+ content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
817
+ };
818
+ }
819
+ // Step 2: nothing alive and nothing unreported (includes stopped-but-not-yet-closed).
820
+ // `stalled` is a LIVE state — it keeps the wait pending, it never ends it.
821
+ const TERMINAL_SET = new Set(["finished", "errored", "stopped"]);
822
+ const hasPending = Array.from(agents.values()).some((a) => a.status === "processing" ||
823
+ a.status === "stalled" ||
824
+ (TERMINAL_SET.has(a.status) && a.exitedAt === null));
825
+ if (!hasPending) {
826
+ const payload = {
827
+ finished: [],
828
+ message: "No agents are running or waiting to finish.",
829
+ };
830
+ return {
831
+ content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
832
+ };
833
+ }
834
+ // Step 3: block-poll until a terminal agent appears or deadline passes
835
+ while (Date.now() < deadline) {
836
+ await sleep(250);
837
+ unreported = selectUnreported(Array.from(agents.values()));
838
+ if (unreported.length > 0) {
839
+ for (const a of unreported)
840
+ a.waitReported = true;
841
+ const payload = { finished: unreported.map(buildFinishedEntry) };
842
+ return {
843
+ content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
844
+ };
845
+ }
846
+ }
847
+ // Step 4: timeout — return still-running jobs
848
+ const now = Date.now();
849
+ const stillRunning = Array.from(agents.values()).filter((a) => a.status === "processing" || a.status === "stalled");
850
+ const payload = {
851
+ timed_out: true,
852
+ elapsed_minutes: 15,
853
+ running: stillRunning.map((a) => buildRunningEntry(a, now)),
854
+ hint: "15 minutes elapsed with no agent finishing. Call wait again to block for another 15 minutes or until the next agent finishes.",
855
+ };
856
+ return {
857
+ content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
858
+ };
859
+ });
860
+ // Tool 7: orchestration-mode
861
+ server.tool("orchestration-mode", "Toggle or query per-project ORCHESTRATION MODE. `enabled`: true = ON, false = OFF, omit = query current state. The FULL operating model + governance is carried in this server's MCP `instructions` (read once at initialize) — this is the operational summary only; do not act on the mode without that detail. WHAT: a per-project toggle for LONG-HORIZON work that would fill the context window if run to completion inline; when ON, act as an orchestrator with delegate-default, but steps bound to main-session-only capability stay INLINE BY RIGHT (state which + why). PERSISTENCE: a per-project marker keyed by cwd; absence of the marker = OFF = no injection; once ON it persists across restarts/sessions until a permitted disable (it does NOT reset on a new session). CARRYOVER: if ON was inherited from a PRIOR session (provenance = carried-over, not user-enabled this session), the bundled hook prepends a ONE-TIME notice (once per marker, never per turn) — you MUST then notify the user it auto-activated and confirm whether to keep it ON. DISABLE: never on your own initiative; you MAY PROPOSE turning it OFF on task-fit mismatch, but only EXPLICIT user permission (AskUserQuestion on Claude, request-user-input on Codex) may set enabled:false. Per-turn injection fires only in CLI hosts that load the bundled hook; desktop hosts toggle the marker but inject nothing (documented degradation).", {
862
+ enabled: z.boolean().optional(),
863
+ }, async (params) => {
864
+ const cwd = process.cwd();
865
+ if (params.enabled === true) {
866
+ orchestrationMarker.enable(cwd);
867
+ }
868
+ else if (params.enabled === false) {
869
+ orchestrationMarker.disable(cwd);
870
+ }
871
+ // enabled === undefined -> query only; no marker mutation.
872
+ return {
873
+ content: [
874
+ {
875
+ type: "text",
876
+ text: JSON.stringify({
877
+ orchestration_mode: orchestrationMarker.isActive(cwd),
878
+ marker_path: orchestrationMarker.markerPath(cwd),
879
+ }),
880
+ },
881
+ ],
882
+ };
883
+ });
884
+ // Connect the stdio transport only when run as the entry point (the bin), NOT
885
+ // when this module is imported (e.g. test/handler-validation.test.mjs importing
886
+ // the exported validatePresence). Connecting on import would block the test on
887
+ // an open stdio transport. argv[1] is the invoked script; compare to this URL.
888
+ const isMain = process.argv[1] !== undefined &&
889
+ import.meta.url === pathToFileURL(realpathSync(process.argv[1])).href;
890
+ if (isMain) {
891
+ if (process.argv[2] === "setup") {
892
+ const { runSetup } = await import("./setup.js");
893
+ await runSetup();
894
+ process.exit(0);
895
+ }
896
+ if (process.argv[2] === "doctor") {
897
+ const { runDoctor } = await import("./doctor.js");
898
+ process.exit(await runDoctor());
899
+ }
900
+ // ORCHESTRATION MODE PERSISTS across restarts/sessions: the server does NOT
901
+ // clear the marker on startup. DEFAULT OFF now means ABSENCE of a marker — a
902
+ // project never enabled stays OFF; a project explicitly enabled persists ON
903
+ // until disabled with explicit user permission. On a new session the bundled
904
+ // hook detects the carried-over marker and prompts the user to confirm.
905
+ // (orchestrationMarker.disable is still used by the tool's enabled:false.)
906
+ const transport = new StdioServerTransport();
907
+ await server.connect(transport);
908
+ }