@heretyc/subagent-mcp 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/NOTICE +5 -0
- package/README.md +124 -0
- package/directives/carryover-claude.md +17 -0
- package/directives/carryover-codex.md +17 -0
- package/directives/off-turn-reminder.md +1 -0
- package/directives/orchestration-claude.md +21 -0
- package/directives/orchestration-codex.md +22 -0
- package/dist/advanced-ruleset.py +67 -0
- package/dist/deadlock.js +8 -0
- package/dist/doctor.js +32 -0
- package/dist/effort.js +78 -0
- package/dist/hooks/orchestration-claude.js +88 -0
- package/dist/hooks/orchestration-codex.js +152 -0
- package/dist/index.js +908 -0
- package/dist/orchestration/hook-core.js +208 -0
- package/dist/orchestration/marker.js +139 -0
- package/dist/output-helpers.js +128 -0
- package/dist/platform.js +59 -0
- package/dist/routing-table.json +3821 -0
- package/dist/routing.js +260 -0
- package/dist/ruleset-scaffold.js +2 -0
- package/dist/ruleset.js +319 -0
- package/dist/setup.js +507 -0
- package/dist/status-helpers.js +56 -0
- package/dist/stream-helpers.js +182 -0
- package/dist/wait-helpers.js +21 -0
- package/package.json +51 -0
- package/scripts/postinstall.mjs +102 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,908 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
|
+
import { z } from "zod";
|
|
5
|
+
import { spawn, execSync } from "child_process";
|
|
6
|
+
import { unlinkSync, existsSync, realpathSync } from "node:fs";
|
|
7
|
+
import { randomUUID } from "crypto";
|
|
8
|
+
import { isAbsolute, basename } from "node:path";
|
|
9
|
+
import { pathToFileURL } from "url";
|
|
10
|
+
import { buildCommand } from "./effort.js";
|
|
11
|
+
import { resolveExeFor } from "./platform.js";
|
|
12
|
+
import { formatLocalIso, selectUnreported } from "./wait-helpers.js";
|
|
13
|
+
import { computeStatusTransition, buildLivenessFields, } from "./status-helpers.js";
|
|
14
|
+
import { extractFinalTurn } from "./output-helpers.js";
|
|
15
|
+
import { consumeStreamChunk, flushStream, retainLastN, } from "./stream-helpers.js";
|
|
16
|
+
import { loadRoutingTable, buildCandidates, validatePresence, TASK_CATEGORIES, AUTO_HINT, SPLIT_HINT, } from "./routing.js";
|
|
17
|
+
import { createDeadlockWindow } from "./deadlock.js";
|
|
18
|
+
import { createRulesetGate, RULESET_HARD_FAIL_MSG, } from "./ruleset.js";
|
|
19
|
+
import * as orchestrationMarker from "./orchestration/marker.js";
|
|
20
|
+
const agents = new Map();
|
|
21
|
+
const MAX_CLAUDE = 5;
|
|
22
|
+
const MAX_CODEX = 5;
|
|
23
|
+
const deadlockWindow = createDeadlockWindow();
|
|
24
|
+
// Advanced-ruleset gate: per-process latch with exactly the deadlock-window
|
|
25
|
+
// scoping. The env-check runs lazily at the FIRST launch_agent call; success
|
|
26
|
+
// latches enabled/disabled for the process lifetime, failure never latches.
|
|
27
|
+
const rulesetGate = createRulesetGate();
|
|
28
|
+
// Post-spawn grace window (ms). A child that exits within this window after a
|
|
29
|
+
// successful spawn never launched (codex installed but not logged in, expired
|
|
30
|
+
// auth, instant crash) — the attempt loop silently advances instead of falsely
|
|
31
|
+
// reporting success. ANY exit within the window counts, even code 0, EXCEPT a
|
|
32
|
+
// codex child already finalized by its turn.completed marker (legitimate fast
|
|
33
|
+
// completion). SUBAGENT_SPAWN_GRACE_MS overrides (non-negative int; 0 disables
|
|
34
|
+
// detection = legacy spawn-event-only success) — a test seam; production never
|
|
35
|
+
// sets it.
|
|
36
|
+
const SPAWN_GRACE_MS = (() => {
|
|
37
|
+
const raw = process.env.SUBAGENT_SPAWN_GRACE_MS;
|
|
38
|
+
if (raw === undefined || raw === "")
|
|
39
|
+
return 1500;
|
|
40
|
+
const parsed = Number.parseInt(raw, 10);
|
|
41
|
+
return Number.isInteger(parsed) && parsed >= 0 ? parsed : 1500;
|
|
42
|
+
})();
|
|
43
|
+
// TASK_CATEGORIES, AUTO_HINT, SPLIT_HINT, and validatePresence are the pure,
|
|
44
|
+
// side-effect-free presence layer — defined in ./routing.js and imported above
|
|
45
|
+
// so the handler-validation test can exercise them without importing this entry
|
|
46
|
+
// module (which would open the stdio transport).
|
|
47
|
+
// Caveman self-classification gloss for the task_category param (tool-description.md).
|
|
48
|
+
const TASK_CATEGORY_GLOSS = "REQUIRED. Task shape -> routing category (the server picks the best model for it). Pick ONE: math_proof: proof/derivation/formally-checkable result; deductive step-validity under axioms; verified by a proof-checker not tests. security_review: security verdict/threat-assessment/demonstrated exploit; adversarial reasoning over attack surface — vuln, auth/authz, crypto, exploitability. debugging: verified fix/root-cause; ONLY with an observed failure (error, crash, red test, regression, flake); done when the symptom is resolved. quality_review: evaluative verdict on an existing NON-security artifact with NO observed failure; review diff/PR, compare A-vs-B, validate-vs-spec; never self-review. architecture: cross-module design/plan, NO code, NO execution loop; system structure, interface/migration strategy, decompose-into-tasks; >2 files or public API. agentic_execution: reach a target end-state by iterating in a mutating env (act/observe/adapt loop); run/deploy/provision/browse, tool/function-call, iterate-until-tests-pass. data_analysis: empirical finding/model ABOUT a structured dataset; query/SQL/dataframe answer, statistic, fit-model-report-drivers; the finding is the deliverable even if code runs. coding: bounded runnable code artifact, one-pass; implement function/module/feature/script, write tests, single-module refactor; compiles/passes tests. knowledge_synthesis: novel integrated prose over sources; synthesize/summarize/translate/draft/explain-across-files; judged by faithfulness/coherence not exact-match. mechanical: deterministic single-pass transform/leaf op, exact-match checkable; find/grep/list/rename/reformat/convert/extract-to-fixed-schema; minimal reasoning. fallback_default: no category fits with confidence (under-specified/mixed/tied); read-only sentinel — PREFER splitting the work into smaller atomic steps that each map to one category.";
|
|
49
|
+
function errorResult(text) {
|
|
50
|
+
return { content: [{ type: "text", text }], isError: true };
|
|
51
|
+
}
|
|
52
|
+
const isWindows = process.platform === "win32";
|
|
53
|
+
let _npmPrefix = null;
|
|
54
|
+
function getNpmPrefix() {
|
|
55
|
+
if (!_npmPrefix) {
|
|
56
|
+
_npmPrefix = execSync("npm prefix -g", { encoding: "utf-8" }).trim();
|
|
57
|
+
}
|
|
58
|
+
return _npmPrefix;
|
|
59
|
+
}
|
|
60
|
+
function resolveExe(provider) {
|
|
61
|
+
return resolveExeFor(provider, process.platform, { existsSync, npmPrefix: getNpmPrefix });
|
|
62
|
+
}
|
|
63
|
+
function cleanupUcSettings(agentState) {
|
|
64
|
+
if (agentState.ucSettingsPath) {
|
|
65
|
+
try {
|
|
66
|
+
if (existsSync(agentState.ucSettingsPath)) {
|
|
67
|
+
unlinkSync(agentState.ucSettingsPath);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
catch { }
|
|
71
|
+
agentState.ucSettingsPath = undefined;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Concurrency cap accounting: only `processing` agents count against a
|
|
75
|
+
// provider's cap. `stalled` agents (live but quiet past the heartbeat window) do
|
|
76
|
+
// NOT count, freeing a slot while they idle.
|
|
77
|
+
function countProcessing(provider) {
|
|
78
|
+
let count = 0;
|
|
79
|
+
for (const a of agents.values()) {
|
|
80
|
+
if (a.provider === provider && a.status === "processing")
|
|
81
|
+
count++;
|
|
82
|
+
}
|
|
83
|
+
return count;
|
|
84
|
+
}
|
|
85
|
+
// Synchronously reconcile a single agent's status against the pure transition
|
|
86
|
+
// helper. Folds the live process exitCode into AgentState first so an already-
|
|
87
|
+
// exited process is reported as completed/failed immediately (no monitor lag).
|
|
88
|
+
function reconcileAgent(agent, now) {
|
|
89
|
+
if ((agent.status === "processing" || agent.status === "stalled") &&
|
|
90
|
+
agent.process.exitCode !== null) {
|
|
91
|
+
agent.exitCode = agent.process.exitCode;
|
|
92
|
+
}
|
|
93
|
+
const next = computeStatusTransition({
|
|
94
|
+
status: agent.status,
|
|
95
|
+
exitCode: agent.exitCode,
|
|
96
|
+
lastActivity: agent.lastActivity,
|
|
97
|
+
now,
|
|
98
|
+
exitedAt: agent.exitedAt,
|
|
99
|
+
});
|
|
100
|
+
agent.status = next.status;
|
|
101
|
+
agent.exitedAt = next.exitedAt;
|
|
102
|
+
}
|
|
103
|
+
// .unref() so this background reconcile timer never keeps the event loop alive
|
|
104
|
+
// on its own — the process (and any test importing this module) can exit cleanly.
|
|
105
|
+
const reconcileInterval = setInterval(() => {
|
|
106
|
+
const now = Date.now();
|
|
107
|
+
for (const agent of agents.values()) {
|
|
108
|
+
reconcileAgent(agent, now);
|
|
109
|
+
}
|
|
110
|
+
}, 10000);
|
|
111
|
+
reconcileInterval.unref();
|
|
112
|
+
// Heavy operating-model + governance guidance for ORCHESTRATION MODE. Carried in
|
|
113
|
+
// the MCP server `instructions` field so a connecting host reads it ONCE at
|
|
114
|
+
// initialize (per the MCP spec the initialize result has an `instructions`
|
|
115
|
+
// field) rather than re-injecting it on every turn. The bundled per-turn hook
|
|
116
|
+
// injects only a small compact reminder; this is the durable, full explanation.
|
|
117
|
+
const ORCHESTRATION_INSTRUCTIONS = "ORCHESTRATION MODE (orchestration-mode tool). WHAT: per-project toggle for LONG-HORIZON work that would fill the context window if run inline. OPERATING MODEL: when ON, act as a workflow orchestrator, delegate-default — decompose and delegate/offload by default. INLINE BY RIGHT: steps bound to main-session-only capability may stay inline (MCP tools sub-agents can't inherit, interactive/consent tools, tight verify loops); state which and why. MUST DELEGATE/OFFLOAD: pure compute and any payload >50KB or >200 lines go via temp scratch-file path handoff (%TEMP% on Windows, /tmp on POSIX); keep the orchestrator context lean. CONFLICT ORDER: safety-scope > user instruction this turn > delegate-default. A user tool-pin re-partitions work; it does not suspend mode. PERSISTENCE: enabling writes a per-project marker that PERSISTS across restarts/sessions until disabled with explicit user permission (does NOT reset on a new session). CARRYOVER: if mode was already ON at session start (inherited), the bundled hook prepends a CARRYOVER notice ONCE per marker; you MUST tell the user it auto-activated, ask whether to keep it ON, and advise whether it fits this session's request. DISABLE: never disable on your own initiative — only with EXPLICIT user permission. You MAY propose disabling when task fit is wrong (bounded, interactive, or MCP-bound). Either way, first explain WHAT mode is and WHY, then request permission via the provider tool: AskUserQuestion on Claude, request-user-input on Codex. Only explicit approval may call orchestration-mode enabled:false; if declined, continue under inline-by-right, ask once per topic, never re-nag. Per-turn injection fires only in CLI hosts loading the bundled hook; desktop hosts toggle the marker but inject nothing (documented degradation).";
|
|
118
|
+
const server = new McpServer({
|
|
119
|
+
name: "subagent-mcp",
|
|
120
|
+
version: "2.3.9",
|
|
121
|
+
description: "Spawns the LOCALLY INSTALLED `claude` and `codex` CLI binaries as child processes. Does NOT call the Anthropic or OpenAI HTTP APIs directly (no API keys, no SDK) and there are no plans to — all model access is via the local CLIs.",
|
|
122
|
+
}, {
|
|
123
|
+
instructions: ORCHESTRATION_INSTRUCTIONS,
|
|
124
|
+
});
|
|
125
|
+
// Best-effort removal of a candidate's temp ultracode settings file after a
|
|
126
|
+
// LAUNCH-TIME failure (the agentState is never registered, so the close handler
|
|
127
|
+
// will not run cleanupUcSettings for it).
|
|
128
|
+
function cleanupUcSettingsPath(ucSettingsPath) {
|
|
129
|
+
if (!ucSettingsPath)
|
|
130
|
+
return;
|
|
131
|
+
try {
|
|
132
|
+
if (existsSync(ucSettingsPath))
|
|
133
|
+
unlinkSync(ucSettingsPath);
|
|
134
|
+
}
|
|
135
|
+
catch { }
|
|
136
|
+
}
|
|
137
|
+
// Attempt to spawn + register a single candidate. Resolves to the agent_id on a
|
|
138
|
+
// successful spawn, or a launch-time failure reason string (never throws/rejects).
|
|
139
|
+
//
|
|
140
|
+
// spawn() failures (ENOENT/EACCES) are ASYNC: a missing/broken CLI emits the
|
|
141
|
+
// child 'error' event AFTER spawn() returns, so a try/catch around spawn cannot
|
|
142
|
+
// see it. We therefore: (a) fast-fail when the resolved exe path does not exist;
|
|
143
|
+
// (b) attach an 'error' handler immediately and AWAIT a one-shot 'spawn' vs
|
|
144
|
+
// 'error' race. Only on the 'spawn' win do we register the agent. A persistent
|
|
145
|
+
// 'error' handler stays attached so a LATE spawn error can never crash the
|
|
146
|
+
// process. Any launch-time failure cleans up and is reported so the attempt loop
|
|
147
|
+
// silently advances to the next candidate.
|
|
148
|
+
async function tryLaunchCandidate(candidate, prompt, agentCwd, routingTier, rulesetInfo) {
|
|
149
|
+
// Concurrency cap for this provider.
|
|
150
|
+
const running = countProcessing(candidate.provider);
|
|
151
|
+
const max = candidate.provider === "claude" ? MAX_CLAUDE : MAX_CODEX;
|
|
152
|
+
if (running >= max) {
|
|
153
|
+
return {
|
|
154
|
+
reason: `Maximum ${max} concurrent ${candidate.provider} agents already running. Current: ${running}`,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
// Build the command. haiku ignores effort; pass "high" placeholder for the
|
|
158
|
+
// "none" sentinel (buildCommand drops it for haiku anyway).
|
|
159
|
+
const effortForBuild = candidate.effort === "none" ? "high" : candidate.effort;
|
|
160
|
+
let buildResult;
|
|
161
|
+
let cmd;
|
|
162
|
+
try {
|
|
163
|
+
buildResult = buildCommand(candidate.provider, candidate.model, effortForBuild, prompt, agentCwd);
|
|
164
|
+
cmd = resolveExe(candidate.provider);
|
|
165
|
+
}
|
|
166
|
+
catch (e) {
|
|
167
|
+
return { reason: e instanceof Error ? e.message : String(e) };
|
|
168
|
+
}
|
|
169
|
+
// Fast-fail absolute paths only. Bare names intentionally rely on PATH; spawn
|
|
170
|
+
// below resolves them and reports ENOENT/EACCES through the same failure path.
|
|
171
|
+
if (isAbsolute(cmd) && !existsSync(cmd)) {
|
|
172
|
+
cleanupUcSettingsPath(buildResult.ucSettingsPath);
|
|
173
|
+
return { reason: `CLI executable not found: ${cmd}` };
|
|
174
|
+
}
|
|
175
|
+
const stdinMode = candidate.provider === "claude" ? "pipe" : "ignore";
|
|
176
|
+
let childProcess;
|
|
177
|
+
try {
|
|
178
|
+
childProcess = spawn(cmd, buildResult.args, {
|
|
179
|
+
cwd: agentCwd,
|
|
180
|
+
env: { ...process.env, SUBAGENT_MCP_SUBAGENT: "1" },
|
|
181
|
+
stdio: [stdinMode, "pipe", "pipe"],
|
|
182
|
+
windowsHide: true,
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
catch (error) {
|
|
186
|
+
// Synchronous spawn throw (rare) — clean up and report as a launch failure.
|
|
187
|
+
cleanupUcSettingsPath(buildResult.ucSettingsPath);
|
|
188
|
+
return { reason: error instanceof Error ? error.message : String(error) };
|
|
189
|
+
}
|
|
190
|
+
// Await the one-shot spawn/error race. The 'error' handler is attached BEFORE
|
|
191
|
+
// we await so an async ENOENT cannot escape as an unhandled event.
|
|
192
|
+
try {
|
|
193
|
+
await new Promise((resolve, reject) => {
|
|
194
|
+
const onError = (err) => reject(err);
|
|
195
|
+
childProcess.once("spawn", () => {
|
|
196
|
+
childProcess.removeListener("error", onError);
|
|
197
|
+
resolve();
|
|
198
|
+
});
|
|
199
|
+
childProcess.once("error", onError);
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
catch (err) {
|
|
203
|
+
// Launch-time failure (ENOENT/EACCES/etc.) — kill if somehow alive, clean up
|
|
204
|
+
// the settings file, and report so the attempt loop advances.
|
|
205
|
+
try {
|
|
206
|
+
childProcess.kill();
|
|
207
|
+
}
|
|
208
|
+
catch { }
|
|
209
|
+
cleanupUcSettingsPath(buildResult.ucSettingsPath);
|
|
210
|
+
return { reason: err instanceof Error ? err.message : String(err) };
|
|
211
|
+
}
|
|
212
|
+
// Spawn succeeded. Register the agent exactly as before. Keep a persistent
|
|
213
|
+
// 'error' handler so a LATE spawn error never crashes the process; fold it
|
|
214
|
+
// into stderr rather than throwing.
|
|
215
|
+
const agentId = randomUUID();
|
|
216
|
+
const now = Date.now();
|
|
217
|
+
const agentState = {
|
|
218
|
+
id: agentId,
|
|
219
|
+
provider: candidate.provider,
|
|
220
|
+
model: candidate.model,
|
|
221
|
+
routingTier,
|
|
222
|
+
...(rulesetInfo
|
|
223
|
+
? { rulesetApplied: true, rulesetOriginalSelection: rulesetInfo.originalSelection }
|
|
224
|
+
: {}),
|
|
225
|
+
status: "processing",
|
|
226
|
+
process: childProcess,
|
|
227
|
+
stdout: "",
|
|
228
|
+
stderr: "",
|
|
229
|
+
exitCode: null,
|
|
230
|
+
exitedAt: null,
|
|
231
|
+
// Launch time is the initial heartbeat. Only PARSED VISIBLE provider stream
|
|
232
|
+
// items refresh lastActivity afterwards (see the stdout handler); raw
|
|
233
|
+
// stdout/stderr chunks do NOT, so `stalled` means exactly "no visible
|
|
234
|
+
// provider stream item for the heartbeat window".
|
|
235
|
+
startedAt: now,
|
|
236
|
+
lastActivity: now,
|
|
237
|
+
cwd: agentCwd,
|
|
238
|
+
ucSettingsPath: buildResult.ucSettingsPath,
|
|
239
|
+
waitReported: false,
|
|
240
|
+
visibleStream: [],
|
|
241
|
+
streamBuf: "",
|
|
242
|
+
};
|
|
243
|
+
childProcess.on("error", (err) => {
|
|
244
|
+
// Captured into the stderr tail for debugging. Not a visible provider stream
|
|
245
|
+
// item, so it does NOT refresh the heartbeat.
|
|
246
|
+
agentState.stderr += `\n[process error] ${err instanceof Error ? err.message : String(err)}`;
|
|
247
|
+
});
|
|
248
|
+
if (candidate.provider === "claude" && childProcess.stdin) {
|
|
249
|
+
// EPIPE if the child dies before draining the prompt (the grace-window
|
|
250
|
+
// early-exit class) — fold into stderr, never crash the server.
|
|
251
|
+
childProcess.stdin.on("error", (err) => {
|
|
252
|
+
agentState.stderr += `\n[stdin error] ${err instanceof Error ? err.message : String(err)}`;
|
|
253
|
+
});
|
|
254
|
+
childProcess.stdin.write(prompt);
|
|
255
|
+
childProcess.stdin.end();
|
|
256
|
+
}
|
|
257
|
+
if (childProcess.stdout) {
|
|
258
|
+
childProcess.stdout.on("data", (data) => {
|
|
259
|
+
const chunk = data.toString();
|
|
260
|
+
const at = Date.now();
|
|
261
|
+
// Buffer partial lines so a provider JSONL event split across chunks is
|
|
262
|
+
// never dropped. Only COMPLETE lines are parsed this call; the trailing
|
|
263
|
+
// fragment is carried in streamBuf until its newline arrives.
|
|
264
|
+
const { items, pending, lines } = consumeStreamChunk(agentState.provider, agentState.streamBuf, chunk);
|
|
265
|
+
agentState.streamBuf = pending;
|
|
266
|
+
// Accumulate all complete lines into stored stdout.
|
|
267
|
+
for (const line of lines) {
|
|
268
|
+
agentState.stdout += line + "\n";
|
|
269
|
+
}
|
|
270
|
+
if (items.length > 0) {
|
|
271
|
+
// Heartbeat refreshes only on parsed visible provider stream items,
|
|
272
|
+
// not on raw stdout bytes.
|
|
273
|
+
agentState.lastActivity = at;
|
|
274
|
+
agentState.visibleStream = retainLastN(agentState.visibleStream, items.map((it) => ({ ...it, at })), 3);
|
|
275
|
+
}
|
|
276
|
+
// Codex emits JSONL; turn.completed signals task done — kill process. Scan
|
|
277
|
+
// COMPLETE lines only so a marker split across chunks is matched once
|
|
278
|
+
// fully assembled (never on a partial fragment).
|
|
279
|
+
if (agentState.provider === "codex" &&
|
|
280
|
+
lines.some((l) => l.includes('"type":"turn.completed"'))) {
|
|
281
|
+
agentState.turnCompleted = true;
|
|
282
|
+
agentState.status = "finished";
|
|
283
|
+
agentState.exitCode = 0;
|
|
284
|
+
if (agentState.exitedAt === null)
|
|
285
|
+
agentState.exitedAt = at;
|
|
286
|
+
childProcess.kill();
|
|
287
|
+
}
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
// Capture stderr into the tail for debugging. stderr is NOT a parsed visible
|
|
291
|
+
// provider stream, so it does NOT refresh the heartbeat (parsed-visible only).
|
|
292
|
+
if (childProcess.stderr) {
|
|
293
|
+
childProcess.stderr.on("data", (data) => {
|
|
294
|
+
agentState.stderr += data.toString();
|
|
295
|
+
});
|
|
296
|
+
}
|
|
297
|
+
childProcess.on("close", (code) => {
|
|
298
|
+
// Flush any buffered trailing stdout line (final event may arrive without a
|
|
299
|
+
// terminating newline) so its visible item is not lost.
|
|
300
|
+
if (agentState.streamBuf) {
|
|
301
|
+
const at = Date.now();
|
|
302
|
+
const { items, lines } = flushStream(agentState.provider, agentState.streamBuf);
|
|
303
|
+
agentState.streamBuf = "";
|
|
304
|
+
for (const line of lines) {
|
|
305
|
+
agentState.stdout += line + "\n";
|
|
306
|
+
}
|
|
307
|
+
// A turn.completed marker may arrive only in this final flush (no
|
|
308
|
+
// trailing newline) — the grace window's success exception needs it.
|
|
309
|
+
if (agentState.provider === "codex" &&
|
|
310
|
+
lines.some((l) => l.includes('"type":"turn.completed"'))) {
|
|
311
|
+
agentState.turnCompleted = true;
|
|
312
|
+
}
|
|
313
|
+
if (items.length > 0) {
|
|
314
|
+
agentState.lastActivity = at;
|
|
315
|
+
agentState.visibleStream = retainLastN(agentState.visibleStream, items.map((it) => ({ ...it, at })), 3);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
// Always clean up ultracode settings file on close
|
|
319
|
+
cleanupUcSettings(agentState);
|
|
320
|
+
// Always record actual close time (unless already finalized)
|
|
321
|
+
if (agentState.exitedAt === null)
|
|
322
|
+
agentState.exitedAt = Date.now();
|
|
323
|
+
if (agentState.status === "stopped") {
|
|
324
|
+
// Record real exit code but preserve "stopped" status
|
|
325
|
+
if (agentState.exitCode === null)
|
|
326
|
+
agentState.exitCode = code !== null ? code : -1;
|
|
327
|
+
return;
|
|
328
|
+
}
|
|
329
|
+
if (agentState.status === "finished") {
|
|
330
|
+
// Already finalized by turn.completed; exitedAt already stamped
|
|
331
|
+
return;
|
|
332
|
+
}
|
|
333
|
+
// Normal exit: set exit code and derive status
|
|
334
|
+
agentState.exitCode = code !== null ? code : -1;
|
|
335
|
+
agentState.status = code === 0 ? "finished" : "errored";
|
|
336
|
+
});
|
|
337
|
+
// Resolves after the close handler above has fully run (attach order):
|
|
338
|
+
// streams flushed and any final turn.completed marker scanned. Pre-created
|
|
339
|
+
// because 'close' can fire in the same frame as 'exit', before the grace
|
|
340
|
+
// race's await continuation could attach a listener.
|
|
341
|
+
const closedAfterFlush = new Promise((resolve) => {
|
|
342
|
+
childProcess.once("close", () => resolve());
|
|
343
|
+
});
|
|
344
|
+
// Post-spawn grace window: a 'spawn' win alone is NOT success — a binary that
|
|
345
|
+
// spawns then dies immediately (codex installed but not logged in) must
|
|
346
|
+
// advance the attempt loop, not falsely conclude it. AgentState is fully
|
|
347
|
+
// wired and the claude prompt already written above, so a surviving child
|
|
348
|
+
// loses no stream output during the wait. Exception: a codex child already
|
|
349
|
+
// finalized by its turn.completed marker (dedicated turnCompleted flag — the
|
|
350
|
+
// SOLE in-window success exception; visibility-and-failover.md) completed
|
|
351
|
+
// the task legitimately fast — that is a success, never a launch failure.
|
|
352
|
+
// The close handler above cleans up a condemned child (uc settings, stream
|
|
353
|
+
// flush); the agent is simply never registered.
|
|
354
|
+
if (SPAWN_GRACE_MS > 0) {
|
|
355
|
+
const earlyExit = await new Promise((resolve) => {
|
|
356
|
+
const timer = setTimeout(() => {
|
|
357
|
+
childProcess.removeListener("exit", onExit);
|
|
358
|
+
resolve(null);
|
|
359
|
+
}, SPAWN_GRACE_MS);
|
|
360
|
+
const onExit = (code, signal) => {
|
|
361
|
+
clearTimeout(timer);
|
|
362
|
+
resolve({ code, signal });
|
|
363
|
+
};
|
|
364
|
+
childProcess.once("exit", onExit);
|
|
365
|
+
});
|
|
366
|
+
if (earlyExit) {
|
|
367
|
+
// 'exit' can be delivered before the final stdout chunk, so wait for
|
|
368
|
+
// 'close' (streams drained, flush scanned) before deciding — a
|
|
369
|
+
// turn.completed fast completion must never be misread as a launch
|
|
370
|
+
// failure and the task silently re-executed on the next candidate.
|
|
371
|
+
await closedAfterFlush;
|
|
372
|
+
if (!agentState.turnCompleted) {
|
|
373
|
+
const tail = agentState.stderr.trim().split("\n").slice(-1)[0] ?? "";
|
|
374
|
+
return {
|
|
375
|
+
reason: `process exited (code ${earlyExit.code ?? earlyExit.signal}) within ${SPAWN_GRACE_MS}ms of spawn${tail ? `: ${tail}` : ""}`,
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
agents.set(agentId, agentState);
|
|
381
|
+
return { agentId };
|
|
382
|
+
}
|
|
383
|
+
// Order-sensitive (provider, model, effort) list equality. Detects whether the
|
|
384
|
+
// advanced ruleset actually ALTERED the routing decision — visibility fields
|
|
385
|
+
// are persisted/exposed only then (passthrough looks identical to disabled).
|
|
386
|
+
function sameTriples(a, b) {
|
|
387
|
+
if (a.length !== b.length)
|
|
388
|
+
return false;
|
|
389
|
+
return a.every((c, i) => c.provider === b[i].provider && c.model === b[i].model && c.effort === b[i].effort);
|
|
390
|
+
}
|
|
391
|
+
// Tool 1: launch_agent
|
|
392
|
+
server.tool("launch_agent", "Spawn a sub-agent. AUTO MODE (preferred): pass only `prompt` + `task_category`; the server picks the best provider/model/effort for that category from its routing table, launches the top candidate, and silently falls back to the next-best on launch failure. `provider`/`model`/`effort` are OPTIONAL overrides — omit for auto-selection; if you pass `model` you must also pass `provider`, and if you pass `effort` you must pass both `provider` and `model`. Unsure which task_category fits? Don't submit one amorphous task — SPLIT into atomic steps that each map to a single category, one agent per step. ultracode effort is Opus-4.8+ only (induced via a temp `--settings {\"ultracode\":true}` file; the CLI rejects `--effort ultracode`). Each sub-agent is a separate claude/codex CLI child that does NOT inherit this session's MCP servers; children run with env SUBAGENT_MCP_SUBAGENT=1 so the orchestration hooks skip them (they are not orchestrators and don't re-trigger carryover). Launch returns status `processing` (alive); a later `stalled` is alive-but-quiet (thinking or awaiting a temp-file handoff), NOT dead — wait or re-poll, don't kill (see poll_agent). DEADLOCK RULE: you MUST ALWAYS set `deadlock=true` when more than 2 launch attempts have already been made for the SAME atomic task (i.e. the 3rd attempt onward), and NEVER otherwise.", {
|
|
393
|
+
task_category: z.enum(TASK_CATEGORIES).describe(TASK_CATEGORY_GLOSS),
|
|
394
|
+
prompt: z.string().min(1),
|
|
395
|
+
provider: z.enum(["claude", "codex"]).optional(),
|
|
396
|
+
model: z.enum(["haiku", "sonnet", "opus", "opus-4-8", "gpt-5.5"]).optional(),
|
|
397
|
+
effort: z.enum(["low", "medium", "high", "xhigh", "max", "ultracode"]).optional(),
|
|
398
|
+
cwd: z.string().optional(),
|
|
399
|
+
deadlock: z.boolean().optional().describe("MANDATE: ALWAYS set deadlock=true when, and ONLY when, more than 2 launch attempts have already been made for the SAME atomic task — the 3rd attempt onward. NEVER set it on a 1st or 2nd attempt, NEVER for a different task, NEVER speculatively. Auto mode only: cannot be combined with provider/model/effort. Passing false is identical to omitting it."),
|
|
400
|
+
}, async (params) => {
|
|
401
|
+
const { task_category, provider, model, effort, prompt, deadlock } = params;
|
|
402
|
+
const agentCwd = params.cwd || process.cwd();
|
|
403
|
+
// 1-5. Param-presence validation (zod already constrains task_category, but
|
|
404
|
+
// hard-validate so the spec error text — valid list + hints, and the
|
|
405
|
+
// effort-before-model ordering — is what the caller sees). Pure,
|
|
406
|
+
// exported, and unit-tested (test/handler-validation.test.mjs).
|
|
407
|
+
const presenceError = validatePresence({ task_category, provider, model, effort, deadlock });
|
|
408
|
+
if (presenceError) {
|
|
409
|
+
return errorResult(presenceError);
|
|
410
|
+
}
|
|
411
|
+
// 6. Build the candidate list per mode.
|
|
412
|
+
const overrides = { provider, model, effort };
|
|
413
|
+
const isExplicit = !!(provider && model && effort);
|
|
414
|
+
if (!isExplicit && task_category === "fallback_default") {
|
|
415
|
+
return errorResult(`Error: fallback_default is a split hint sentinel, not a launchable routing-table category.\n${SPLIT_HINT}\n${AUTO_HINT}`);
|
|
416
|
+
}
|
|
417
|
+
// Arm window after all validation (including fallback_default rejection) passes.
|
|
418
|
+
if (deadlock === true) {
|
|
419
|
+
deadlockWindow.arm();
|
|
420
|
+
}
|
|
421
|
+
const pureAuto = !provider && !model && !effort;
|
|
422
|
+
const branch = (pureAuto && deadlockWindow.active()) ? "performance" : "cost_efficiency";
|
|
423
|
+
const routingTier = isExplicit ? "manual" : branch;
|
|
424
|
+
// explicit mode never reads the table; all other modes do.
|
|
425
|
+
const table = isExplicit ? null : loadRoutingTable();
|
|
426
|
+
if (!isExplicit && table === null) {
|
|
427
|
+
return errorResult(`Error: routing table not populated for ${task_category} (routing-table file missing or unreadable). Either run the model-profiler to populate it, or pass provider+model+effort explicitly for a fully-specified launch.\n${AUTO_HINT}`);
|
|
428
|
+
}
|
|
429
|
+
const result = buildCandidates(table, task_category, overrides, branch);
|
|
430
|
+
const mode = result.mode;
|
|
431
|
+
if (!isExplicit && result.noCandidates) {
|
|
432
|
+
let scope = "";
|
|
433
|
+
if (mode === "provider")
|
|
434
|
+
scope = ` matching provider ${provider}`;
|
|
435
|
+
else if (mode === "provider_model")
|
|
436
|
+
scope = ` matching model ${model}`;
|
|
437
|
+
return errorResult(`Error: routing table not populated for ${task_category} (no${scope} pairings available). Either run the model-profiler to populate it, or pass provider+model+effort explicitly.\n${AUTO_HINT}`);
|
|
438
|
+
}
|
|
439
|
+
// Advanced-ruleset hook (docs/spec/advanced-ruleset/). Env-check gate runs
|
|
440
|
+
// at the first launch_agent of this process (success latches for the
|
|
441
|
+
// process lifetime; failure NEVER latches — re-run next call so an admin
|
|
442
|
+
// fix recovers without a restart). When enabled, routing mode runs ONCE per
|
|
443
|
+
// launch_agent — in ALL selection modes, explicit included — and is never
|
|
444
|
+
// re-run per failover attempt: the attempt loop consumes the returned list
|
|
445
|
+
// verbatim. Deadlock/branch state is never exposed to the script. The
|
|
446
|
+
// hard-fail message deliberately carries no hints (admin must intervene).
|
|
447
|
+
const gateResult = await rulesetGate.ensureReady();
|
|
448
|
+
if (!gateResult.ok) {
|
|
449
|
+
return errorResult(RULESET_HARD_FAIL_MSG);
|
|
450
|
+
}
|
|
451
|
+
let candidates = result.candidates;
|
|
452
|
+
let rulesetApplied = false;
|
|
453
|
+
let rulesetOriginalSelection;
|
|
454
|
+
if (gateResult.active) {
|
|
455
|
+
const payload = {
|
|
456
|
+
candidates: candidates.map((c, i) => ({
|
|
457
|
+
provider: c.provider,
|
|
458
|
+
model: c.model,
|
|
459
|
+
effort: c.effort,
|
|
460
|
+
// Dense positional rank 1..N over the already-filtered list (raw
|
|
461
|
+
// table ranks gap after launchability filtering; explicit has none).
|
|
462
|
+
rank: i + 1,
|
|
463
|
+
})),
|
|
464
|
+
context: {
|
|
465
|
+
task_category,
|
|
466
|
+
cwd: agentCwd,
|
|
467
|
+
selection_mode: mode,
|
|
468
|
+
provider: provider ?? null,
|
|
469
|
+
model: model ?? null,
|
|
470
|
+
effort: effort ?? null,
|
|
471
|
+
},
|
|
472
|
+
};
|
|
473
|
+
const applied = await rulesetGate.applyRules(payload);
|
|
474
|
+
if (!applied.ok) {
|
|
475
|
+
return errorResult(RULESET_HARD_FAIL_MSG);
|
|
476
|
+
}
|
|
477
|
+
if (applied.candidates.length === 0) {
|
|
478
|
+
// Empty list = deliberate policy veto (the limit case of the allowed
|
|
479
|
+
// filter operation), NOT a malfunction — clean error, never the
|
|
480
|
+
// hard-fail message, never latched.
|
|
481
|
+
return errorResult(`Error: advanced ruleset returned zero candidates for task_category ${task_category}; launch vetoed by ruleset.\n${AUTO_HINT}`);
|
|
482
|
+
}
|
|
483
|
+
rulesetApplied = !sameTriples(candidates, applied.candidates);
|
|
484
|
+
if (rulesetApplied) {
|
|
485
|
+
rulesetOriginalSelection = { ...candidates[0] };
|
|
486
|
+
}
|
|
487
|
+
candidates = applied.candidates;
|
|
488
|
+
}
|
|
489
|
+
// 6. Attempt loop: best→worst. Register on first successful spawn; silently
|
|
490
|
+
// advance on launch-time failure. Sub-agent task outcome is NEVER a trigger.
|
|
491
|
+
const skipped = [];
|
|
492
|
+
for (const candidate of candidates) {
|
|
493
|
+
const outcome = await tryLaunchCandidate(candidate, prompt, agentCwd, routingTier, rulesetApplied && rulesetOriginalSelection !== undefined
|
|
494
|
+
? { applied: true, originalSelection: rulesetOriginalSelection }
|
|
495
|
+
: undefined);
|
|
496
|
+
if ("agentId" in outcome) {
|
|
497
|
+
if (branch === "performance") {
|
|
498
|
+
deadlockWindow.consume();
|
|
499
|
+
}
|
|
500
|
+
return {
|
|
501
|
+
content: [
|
|
502
|
+
{
|
|
503
|
+
type: "text",
|
|
504
|
+
text: JSON.stringify({
|
|
505
|
+
agent_id: outcome.agentId,
|
|
506
|
+
status: "processing",
|
|
507
|
+
provider: candidate.provider,
|
|
508
|
+
model: candidate.model,
|
|
509
|
+
effort: candidate.effort,
|
|
510
|
+
task_category,
|
|
511
|
+
...(rulesetApplied
|
|
512
|
+
? {
|
|
513
|
+
ruleset_applied: true,
|
|
514
|
+
ruleset_original_selection: rulesetOriginalSelection,
|
|
515
|
+
}
|
|
516
|
+
: {}),
|
|
517
|
+
}),
|
|
518
|
+
},
|
|
519
|
+
],
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
skipped.push({
|
|
523
|
+
model: candidate.model,
|
|
524
|
+
effort: candidate.effort,
|
|
525
|
+
provider: candidate.provider,
|
|
526
|
+
reason: outcome.reason,
|
|
527
|
+
});
|
|
528
|
+
}
|
|
529
|
+
// 7. All candidates failed. A ruleset-modified explicit launch may have
|
|
530
|
+
// attempted N≠1 candidates — only the numbered ALL_FAILED shape can
|
|
531
|
+
// report that, so the explicit shape is reserved for unmodified launches.
|
|
532
|
+
if (isExplicit && !rulesetApplied) {
|
|
533
|
+
const f = skipped[0];
|
|
534
|
+
return errorResult(`Error: explicit launch ${f.model}@${f.effort} (${f.provider}) failed: ${f.reason}.\n${AUTO_HINT}`);
|
|
535
|
+
}
|
|
536
|
+
const lines = skipped
|
|
537
|
+
.map((s, i) => ` ${i + 1}. ${s.model}@${s.effort} (${s.provider}): ${s.reason}`)
|
|
538
|
+
.join("\n");
|
|
539
|
+
return errorResult(`Error: all ${skipped.length} candidate launches failed for task_category ${task_category}:\n${lines}\n${SPLIT_HINT}\n${AUTO_HINT}`);
|
|
540
|
+
});
|
|
541
|
+
// Tool 2: poll_agent
|
|
542
|
+
server.tool("poll_agent", "Get an agent's current status and output. Status `processing` = ALIVE with visible provider activity in the last 10 minutes; `stalled` = ALIVE but no parsed visible provider stream item for 10 minutes (thinking, or awaiting a temp-file handoff) — NOT dead, so prefer `wait`/re-poll over killing. Always returns `alive` and `idle_seconds`, plus `recent_stream` (the last 3 visible provider stream items, each timestamped) and a `hint` while stalled. Pass `verbose: true` to also return `final_output`, the agent's final assistant turn extracted from its captured stdout.", {
|
|
543
|
+
agent_id: z.string(),
|
|
544
|
+
verbose: z.boolean().optional().default(false),
|
|
545
|
+
}, async (params) => {
|
|
546
|
+
const agent = agents.get(params.agent_id);
|
|
547
|
+
if (!agent) {
|
|
548
|
+
return {
|
|
549
|
+
content: [
|
|
550
|
+
{
|
|
551
|
+
type: "text",
|
|
552
|
+
text: `Error: Agent ${params.agent_id} not found`,
|
|
553
|
+
},
|
|
554
|
+
],
|
|
555
|
+
isError: true,
|
|
556
|
+
};
|
|
557
|
+
}
|
|
558
|
+
// Reconcile exit synchronously so an already-exited process is reported as
|
|
559
|
+
// completed/failed immediately (no up-to-10s health-monitor lag).
|
|
560
|
+
const now = Date.now();
|
|
561
|
+
reconcileAgent(agent, now);
|
|
562
|
+
const stdoutTail = agent.stdout.length > 2000
|
|
563
|
+
? agent.stdout.slice(-2000)
|
|
564
|
+
: agent.stdout;
|
|
565
|
+
const stderrTail = agent.stderr.length > 1000
|
|
566
|
+
? agent.stderr.slice(-1000)
|
|
567
|
+
: agent.stderr;
|
|
568
|
+
const liveness = buildLivenessFields(agent.status, agent.exitCode, agent.lastActivity, now);
|
|
569
|
+
return {
|
|
570
|
+
content: [
|
|
571
|
+
{
|
|
572
|
+
type: "text",
|
|
573
|
+
text: JSON.stringify({
|
|
574
|
+
id: agent.id,
|
|
575
|
+
provider: agent.provider,
|
|
576
|
+
model: agent.model,
|
|
577
|
+
status: agent.status,
|
|
578
|
+
exit_code: agent.exitCode,
|
|
579
|
+
stdout_tail: stdoutTail,
|
|
580
|
+
stderr_tail: stderrTail,
|
|
581
|
+
started_at: agent.startedAt,
|
|
582
|
+
last_activity: agent.lastActivity,
|
|
583
|
+
cwd: agent.cwd,
|
|
584
|
+
...liveness,
|
|
585
|
+
...(agent.routingTier !== undefined ? { routing_tier: agent.routingTier } : {}),
|
|
586
|
+
...(agent.rulesetApplied
|
|
587
|
+
? {
|
|
588
|
+
ruleset_applied: true,
|
|
589
|
+
ruleset_original_selection: agent.rulesetOriginalSelection,
|
|
590
|
+
}
|
|
591
|
+
: {}),
|
|
592
|
+
recent_stream: agent.visibleStream.map((it) => ({
|
|
593
|
+
type: it.type,
|
|
594
|
+
text: it.text,
|
|
595
|
+
at: it.at !== undefined ? formatLocalIso(it.at) : null,
|
|
596
|
+
})),
|
|
597
|
+
...(params.verbose
|
|
598
|
+
? { final_output: extractFinalTurn(agent.provider, agent.stdout) }
|
|
599
|
+
: {}),
|
|
600
|
+
}),
|
|
601
|
+
},
|
|
602
|
+
],
|
|
603
|
+
};
|
|
604
|
+
});
|
|
605
|
+
// Tool 3: kill_agent
|
|
606
|
+
server.tool("kill_agent", "Terminate a live agent (status `processing` or `stalled`) by immediately force-killing its managed process tree. No-op for already-terminal agents.", {
|
|
607
|
+
agent_id: z.string(),
|
|
608
|
+
}, async (params) => {
|
|
609
|
+
const agent = agents.get(params.agent_id);
|
|
610
|
+
if (!agent) {
|
|
611
|
+
return {
|
|
612
|
+
content: [
|
|
613
|
+
{
|
|
614
|
+
type: "text",
|
|
615
|
+
text: `Error: Agent ${params.agent_id} not found`,
|
|
616
|
+
},
|
|
617
|
+
],
|
|
618
|
+
isError: true,
|
|
619
|
+
};
|
|
620
|
+
}
|
|
621
|
+
// Kill applies to ALL live states (processing OR stalled). A terminal agent
|
|
622
|
+
// (finished/errored/stopped) is a no-op.
|
|
623
|
+
const isLive = agent.status === "processing" || agent.status === "stalled";
|
|
624
|
+
if (!isLive) {
|
|
625
|
+
return {
|
|
626
|
+
content: [
|
|
627
|
+
{
|
|
628
|
+
type: "text",
|
|
629
|
+
text: JSON.stringify({
|
|
630
|
+
agent_id: agent.id,
|
|
631
|
+
status: agent.status,
|
|
632
|
+
message: `Agent is not live (status: ${agent.status})`,
|
|
633
|
+
}),
|
|
634
|
+
},
|
|
635
|
+
],
|
|
636
|
+
};
|
|
637
|
+
}
|
|
638
|
+
try {
|
|
639
|
+
// Immediately force-kill the managed process tree — no graceful SIGTERM
|
|
640
|
+
// grace period. On Windows, taskkill /t /f tears down the whole tree; on
|
|
641
|
+
// POSIX, SIGKILL the process (close handler records the real exit code).
|
|
642
|
+
agent.status = "stopped";
|
|
643
|
+
if (isWindows && agent.process.pid) {
|
|
644
|
+
spawn("taskkill", ["/pid", String(agent.process.pid), "/t", "/f"], {
|
|
645
|
+
windowsHide: true,
|
|
646
|
+
});
|
|
647
|
+
}
|
|
648
|
+
else if (agent.process.pid) {
|
|
649
|
+
process.kill(agent.process.pid, "SIGKILL");
|
|
650
|
+
}
|
|
651
|
+
else {
|
|
652
|
+
agent.process.kill("SIGKILL");
|
|
653
|
+
}
|
|
654
|
+
return {
|
|
655
|
+
content: [
|
|
656
|
+
{
|
|
657
|
+
type: "text",
|
|
658
|
+
text: JSON.stringify({
|
|
659
|
+
agent_id: agent.id,
|
|
660
|
+
status: "stopped",
|
|
661
|
+
message: "Process tree force-killed",
|
|
662
|
+
}),
|
|
663
|
+
},
|
|
664
|
+
],
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
catch (error) {
|
|
668
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
669
|
+
return {
|
|
670
|
+
content: [
|
|
671
|
+
{
|
|
672
|
+
type: "text",
|
|
673
|
+
text: `Error killing agent: ${errorMsg}`,
|
|
674
|
+
},
|
|
675
|
+
],
|
|
676
|
+
isError: true,
|
|
677
|
+
};
|
|
678
|
+
}
|
|
679
|
+
});
|
|
680
|
+
// Tool 4: send_message
|
|
681
|
+
server.tool("send_message", "Send a message to a running agent's stdin", {
|
|
682
|
+
agent_id: z.string(),
|
|
683
|
+
message: z.string().min(1),
|
|
684
|
+
}, async (params) => {
|
|
685
|
+
const agent = agents.get(params.agent_id);
|
|
686
|
+
if (!agent) {
|
|
687
|
+
return {
|
|
688
|
+
content: [
|
|
689
|
+
{
|
|
690
|
+
type: "text",
|
|
691
|
+
text: `Error: Agent ${params.agent_id} not found`,
|
|
692
|
+
},
|
|
693
|
+
],
|
|
694
|
+
isError: true,
|
|
695
|
+
};
|
|
696
|
+
}
|
|
697
|
+
const isLive = agent.status === "processing" || agent.status === "stalled";
|
|
698
|
+
if (!isLive) {
|
|
699
|
+
return {
|
|
700
|
+
content: [
|
|
701
|
+
{
|
|
702
|
+
type: "text",
|
|
703
|
+
text: `Error: Agent is not live (status: ${agent.status})`,
|
|
704
|
+
},
|
|
705
|
+
],
|
|
706
|
+
isError: true,
|
|
707
|
+
};
|
|
708
|
+
}
|
|
709
|
+
if (!agent.process.stdin) {
|
|
710
|
+
return {
|
|
711
|
+
content: [
|
|
712
|
+
{
|
|
713
|
+
type: "text",
|
|
714
|
+
text: `Error: Agent stdin is not available`,
|
|
715
|
+
},
|
|
716
|
+
],
|
|
717
|
+
isError: true,
|
|
718
|
+
};
|
|
719
|
+
}
|
|
720
|
+
try {
|
|
721
|
+
agent.process.stdin.write(params.message + "\n");
|
|
722
|
+
agent.lastActivity = Date.now();
|
|
723
|
+
return {
|
|
724
|
+
content: [
|
|
725
|
+
{
|
|
726
|
+
type: "text",
|
|
727
|
+
text: JSON.stringify({
|
|
728
|
+
agent_id: agent.id,
|
|
729
|
+
status: "sent",
|
|
730
|
+
message: "Message written to agent stdin",
|
|
731
|
+
}),
|
|
732
|
+
},
|
|
733
|
+
],
|
|
734
|
+
};
|
|
735
|
+
}
|
|
736
|
+
catch (error) {
|
|
737
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
738
|
+
return {
|
|
739
|
+
content: [
|
|
740
|
+
{
|
|
741
|
+
type: "text",
|
|
742
|
+
text: `Error sending message: ${errorMsg}`,
|
|
743
|
+
},
|
|
744
|
+
],
|
|
745
|
+
isError: true,
|
|
746
|
+
};
|
|
747
|
+
}
|
|
748
|
+
});
|
|
749
|
+
// Tool 5: list_agents
|
|
750
|
+
server.tool("list_agents", "List all agents with token-efficient core metrics (status, `alive`, `idle_seconds`). `stalled` is ALIVE-but-quiet, NOT dead (full status semantics on poll_agent). Use `poll_agent` for per-agent stream items, hints, and final output.", {}, async () => {
|
|
751
|
+
const now = Date.now();
|
|
752
|
+
const agentList = Array.from(agents.values()).map((agent) => {
|
|
753
|
+
// Reconcile exit synchronously so already-exited processes are reported
|
|
754
|
+
// as finished/errored immediately (no health-monitor lag).
|
|
755
|
+
reconcileAgent(agent, now);
|
|
756
|
+
// includeHint=false: the verbose stalled hint lives on poll_agent only;
|
|
757
|
+
// list_agents stays token-efficient.
|
|
758
|
+
return {
|
|
759
|
+
id: agent.id,
|
|
760
|
+
provider: agent.provider,
|
|
761
|
+
model: agent.model,
|
|
762
|
+
status: agent.status,
|
|
763
|
+
started_at: agent.startedAt,
|
|
764
|
+
last_activity: agent.lastActivity,
|
|
765
|
+
cwd_basename: basename(agent.cwd),
|
|
766
|
+
...buildLivenessFields(agent.status, agent.exitCode, agent.lastActivity, now, false),
|
|
767
|
+
};
|
|
768
|
+
});
|
|
769
|
+
return {
|
|
770
|
+
content: [
|
|
771
|
+
{
|
|
772
|
+
type: "text",
|
|
773
|
+
text: JSON.stringify({ agents: agentList }),
|
|
774
|
+
},
|
|
775
|
+
],
|
|
776
|
+
};
|
|
777
|
+
});
|
|
778
|
+
// Tool 6: wait
|
|
779
|
+
server.tool("wait", "Blocks until one or more sub-agents reach a terminal state (finished/errored/stopped), returning each one's exit code + local-time exit timestamp; or returns the live-job list after a 15-minute timeout. A `stalled` agent is still ALIVE and does NOT end the wait — only a terminal exit does. Pass `verbose: true` to add each finished agent's `final_output` (its final assistant turn, extracted from captured stdout).", {
|
|
780
|
+
verbose: z.boolean().optional().default(false),
|
|
781
|
+
}, async (params) => {
|
|
782
|
+
const { verbose } = params;
|
|
783
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
784
|
+
const TIMEOUT_MS = 15 * 60 * 1000;
|
|
785
|
+
const deadline = Date.now() + TIMEOUT_MS;
|
|
786
|
+
const buildFinishedEntry = (a) => ({
|
|
787
|
+
id: a.id,
|
|
788
|
+
provider: a.provider,
|
|
789
|
+
model: a.model,
|
|
790
|
+
status: a.status,
|
|
791
|
+
exit_code: a.exitCode,
|
|
792
|
+
exited_at: formatLocalIso(a.exitedAt),
|
|
793
|
+
elapsed_ms: a.exitedAt - a.startedAt,
|
|
794
|
+
...(verbose
|
|
795
|
+
? { final_output: extractFinalTurn(a.provider, a.stdout) }
|
|
796
|
+
: {}),
|
|
797
|
+
});
|
|
798
|
+
const buildRunningEntry = (a, now) => ({
|
|
799
|
+
id: a.id,
|
|
800
|
+
provider: a.provider,
|
|
801
|
+
model: a.model,
|
|
802
|
+
status: a.status,
|
|
803
|
+
started_at_local: formatLocalIso(a.startedAt),
|
|
804
|
+
last_activity_local: formatLocalIso(a.lastActivity),
|
|
805
|
+
elapsed_ms: now - a.startedAt,
|
|
806
|
+
});
|
|
807
|
+
// Step 1: collect already-terminal unreported agents
|
|
808
|
+
const allAgents = Array.from(agents.values());
|
|
809
|
+
let unreported = selectUnreported(allAgents);
|
|
810
|
+
if (unreported.length > 0) {
|
|
811
|
+
// Mark reported synchronously before building return (single-threaded JS → atomic)
|
|
812
|
+
for (const a of unreported)
|
|
813
|
+
a.waitReported = true;
|
|
814
|
+
const payload = { finished: unreported.map(buildFinishedEntry) };
|
|
815
|
+
return {
|
|
816
|
+
content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
|
|
817
|
+
};
|
|
818
|
+
}
|
|
819
|
+
// Step 2: nothing alive and nothing unreported (includes stopped-but-not-yet-closed).
|
|
820
|
+
// `stalled` is a LIVE state — it keeps the wait pending, it never ends it.
|
|
821
|
+
const TERMINAL_SET = new Set(["finished", "errored", "stopped"]);
|
|
822
|
+
const hasPending = Array.from(agents.values()).some((a) => a.status === "processing" ||
|
|
823
|
+
a.status === "stalled" ||
|
|
824
|
+
(TERMINAL_SET.has(a.status) && a.exitedAt === null));
|
|
825
|
+
if (!hasPending) {
|
|
826
|
+
const payload = {
|
|
827
|
+
finished: [],
|
|
828
|
+
message: "No agents are running or waiting to finish.",
|
|
829
|
+
};
|
|
830
|
+
return {
|
|
831
|
+
content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
|
|
832
|
+
};
|
|
833
|
+
}
|
|
834
|
+
// Step 3: block-poll until a terminal agent appears or deadline passes
|
|
835
|
+
while (Date.now() < deadline) {
|
|
836
|
+
await sleep(250);
|
|
837
|
+
unreported = selectUnreported(Array.from(agents.values()));
|
|
838
|
+
if (unreported.length > 0) {
|
|
839
|
+
for (const a of unreported)
|
|
840
|
+
a.waitReported = true;
|
|
841
|
+
const payload = { finished: unreported.map(buildFinishedEntry) };
|
|
842
|
+
return {
|
|
843
|
+
content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
|
|
844
|
+
};
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
// Step 4: timeout — return still-running jobs
|
|
848
|
+
const now = Date.now();
|
|
849
|
+
const stillRunning = Array.from(agents.values()).filter((a) => a.status === "processing" || a.status === "stalled");
|
|
850
|
+
const payload = {
|
|
851
|
+
timed_out: true,
|
|
852
|
+
elapsed_minutes: 15,
|
|
853
|
+
running: stillRunning.map((a) => buildRunningEntry(a, now)),
|
|
854
|
+
hint: "15 minutes elapsed with no agent finishing. Call wait again to block for another 15 minutes or until the next agent finishes.",
|
|
855
|
+
};
|
|
856
|
+
return {
|
|
857
|
+
content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
|
|
858
|
+
};
|
|
859
|
+
});
|
|
860
|
+
// Tool 7: orchestration-mode
|
|
861
|
+
server.tool("orchestration-mode", "Toggle or query per-project ORCHESTRATION MODE. `enabled`: true = ON, false = OFF, omit = query current state. The FULL operating model + governance is carried in this server's MCP `instructions` (read once at initialize) — this is the operational summary only; do not act on the mode without that detail. WHAT: a per-project toggle for LONG-HORIZON work that would fill the context window if run to completion inline; when ON, act as an orchestrator with delegate-default, but steps bound to main-session-only capability stay INLINE BY RIGHT (state which + why). PERSISTENCE: a per-project marker keyed by cwd; absence of the marker = OFF = no injection; once ON it persists across restarts/sessions until a permitted disable (it does NOT reset on a new session). CARRYOVER: if ON was inherited from a PRIOR session (provenance = carried-over, not user-enabled this session), the bundled hook prepends a ONE-TIME notice (once per marker, never per turn) — you MUST then notify the user it auto-activated and confirm whether to keep it ON. DISABLE: never on your own initiative; you MAY PROPOSE turning it OFF on task-fit mismatch, but only EXPLICIT user permission (AskUserQuestion on Claude, request-user-input on Codex) may set enabled:false. Per-turn injection fires only in CLI hosts that load the bundled hook; desktop hosts toggle the marker but inject nothing (documented degradation).", {
|
|
862
|
+
enabled: z.boolean().optional(),
|
|
863
|
+
}, async (params) => {
|
|
864
|
+
const cwd = process.cwd();
|
|
865
|
+
if (params.enabled === true) {
|
|
866
|
+
orchestrationMarker.enable(cwd);
|
|
867
|
+
}
|
|
868
|
+
else if (params.enabled === false) {
|
|
869
|
+
orchestrationMarker.disable(cwd);
|
|
870
|
+
}
|
|
871
|
+
// enabled === undefined -> query only; no marker mutation.
|
|
872
|
+
return {
|
|
873
|
+
content: [
|
|
874
|
+
{
|
|
875
|
+
type: "text",
|
|
876
|
+
text: JSON.stringify({
|
|
877
|
+
orchestration_mode: orchestrationMarker.isActive(cwd),
|
|
878
|
+
marker_path: orchestrationMarker.markerPath(cwd),
|
|
879
|
+
}),
|
|
880
|
+
},
|
|
881
|
+
],
|
|
882
|
+
};
|
|
883
|
+
});
|
|
884
|
+
// Connect the stdio transport only when run as the entry point (the bin), NOT
|
|
885
|
+
// when this module is imported (e.g. test/handler-validation.test.mjs importing
|
|
886
|
+
// the exported validatePresence). Connecting on import would block the test on
|
|
887
|
+
// an open stdio transport. argv[1] is the invoked script; compare to this URL.
|
|
888
|
+
const isMain = process.argv[1] !== undefined &&
|
|
889
|
+
import.meta.url === pathToFileURL(realpathSync(process.argv[1])).href;
|
|
890
|
+
if (isMain) {
|
|
891
|
+
if (process.argv[2] === "setup") {
|
|
892
|
+
const { runSetup } = await import("./setup.js");
|
|
893
|
+
await runSetup();
|
|
894
|
+
process.exit(0);
|
|
895
|
+
}
|
|
896
|
+
if (process.argv[2] === "doctor") {
|
|
897
|
+
const { runDoctor } = await import("./doctor.js");
|
|
898
|
+
process.exit(await runDoctor());
|
|
899
|
+
}
|
|
900
|
+
// ORCHESTRATION MODE PERSISTS across restarts/sessions: the server does NOT
|
|
901
|
+
// clear the marker on startup. DEFAULT OFF now means ABSENCE of a marker — a
|
|
902
|
+
// project never enabled stays OFF; a project explicitly enabled persists ON
|
|
903
|
+
// until disabled with explicit user permission. On a new session the bundled
|
|
904
|
+
// hook detects the carried-over marker and prompts the user to confirm.
|
|
905
|
+
// (orchestrationMarker.disable is still used by the tool's enabled:false.)
|
|
906
|
+
const transport = new StdioServerTransport();
|
|
907
|
+
await server.connect(transport);
|
|
908
|
+
}
|