mcp-agents 0.10.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +13 -0
  2. package/package.json +3 -3
  3. package/server.js +498 -51
package/README.md CHANGED
@@ -101,6 +101,19 @@ Startup flags (`--model`, `--model_reasoning_effort`) set the model and effort f
101
101
 
102
102
  is forwarded to Codex as `{ "prompt": "Review this diff" }`. Change the model or effort at server startup instead.
103
103
 
104
+ **Idle watchdog.** The codex pass-through is transparent, so a Codex session that
105
+ stalls after doing work (e.g. its final model turn hangs, or it waits on an
106
+ elicitation the client never answers) would otherwise hang the caller's
107
+ `tools/call` forever. `--codex_idle_timeout <seconds>` (default `600`, `0`
108
+ disables) bounds this: if Codex emits nothing while a request is in flight for
109
+ that long, the wrapper returns a JSON-RPC error (`-32001`) for the open
110
+ request(s), kills the Codex process group, and exits — turning an unbounded stall
111
+ into a surfaced error. The timer resets on any Codex output or inbound client
112
+ activity and is suspended while the client backpressures stdout, so healthy long
113
+ or interactive runs are not killed. The wrapper also exits (instead of lingering)
114
+ if Codex dies or fails to start, so a dead Codex can never leave the caller
115
+ hanging.
116
+
104
117
  ## Integration with Claude Code
105
118
 
106
119
  Add entries to your project's `.mcp.json` (requires `npm i -g mcp-agents`):
package/package.json CHANGED
@@ -1,10 +1,10 @@
1
1
  {
2
2
  "name": "mcp-agents",
3
- "version": "0.10.1",
3
+ "version": "0.11.0",
4
4
  "description": "MCP server that wraps AI CLI tools (Claude Code, Gemini CLI, Codex CLI) for use by any MCP client",
5
5
  "type": "module",
6
6
  "bin": {
7
- "mcp-agents": "./server.js"
7
+ "mcp-agents": "server.js"
8
8
  },
9
9
  "files": [
10
10
  "server.js",
@@ -23,7 +23,7 @@
23
23
  ],
24
24
  "repository": {
25
25
  "type": "git",
26
- "url": "https://github.com/thomaswitt/mcp-agents"
26
+ "url": "git+https://github.com/thomaswitt/mcp-agents.git"
27
27
  },
28
28
  "author": "Thomas Witt",
29
29
  "dependencies": {
package/server.js CHANGED
@@ -31,6 +31,11 @@ const DEFAULT_CODEX_MODEL = "gpt-5.5";
31
31
  const DEFAULT_CODEX_MODEL_REASONING_EFFORT = "xhigh";
32
32
  const DEFAULT_CODEX_SANDBOX_MODE = "workspace-write";
33
33
  const DEFAULT_CODEX_APPROVAL_POLICY = "never";
34
+ // Idle watchdog for the codex pass-through: if a request is in flight and codex
35
+ // emits nothing on stdout/stderr for this long, the wrapper synthesizes a
36
+ // JSON-RPC error for the open request(s) and tears codex down — converting an
37
+ // unbounded post-completion stall into a surfaced error. 0 disables it.
38
+ const DEFAULT_CODEX_IDLE_TIMEOUT_MS = 600_000;
34
39
  const DEFAULT_CLAUDE_MODEL = "claude-opus-4-8";
35
40
  const DEFAULT_CLAUDE_EFFORT = "xhigh";
36
41
  // tools/call argument keys stripped from the codex pass-through so callers
@@ -61,7 +66,7 @@ const CODEX_STRIPPED_CONFIG_KEYS = [
61
66
  ];
62
67
  const MAX_BUFFER_BYTES = 10 * 1024 * 1024;
63
68
  const CLAUDE_EMPTY_OUTPUT_MAX_ATTEMPTS = 2;
64
- const SIGNAL_CODES = { SIGHUP: 1, SIGINT: 2, SIGTERM: 15 };
69
+ const SIGNAL_CODES = { SIGHUP: 1, SIGINT: 2, SIGKILL: 9, SIGTERM: 15 };
65
70
  const SHUTDOWN_TIMEOUT_MS = 3_000;
66
71
  let fatalShutdown;
67
72
 
@@ -187,6 +192,8 @@ Options:
187
192
  danger-full-access [default: ${DEFAULT_CODEX_SANDBOX_MODE}]
188
193
  --approval_policy <policy> Codex approval policy: untrusted, on-failure,
189
194
  on-request, never [default: ${DEFAULT_CODEX_APPROVAL_POLICY}]
195
+ --codex_idle_timeout <secs> Codex pass-through idle watchdog; 0 disables
196
+ [default: ${DEFAULT_CODEX_IDLE_TIMEOUT_MS / 1000}]
190
197
  --timeout <seconds> Default timeout per call [default: 300]
191
198
  --help, -h Show this help message
192
199
  --version, -v Show version number`);
@@ -195,8 +202,8 @@ Options:
195
202
  /**
196
203
  * Parse CLI flags from process.argv.
197
204
  * Handles --help, --version, --provider, --model, --model_reasoning_effort,
198
- * --sandbox_mode, --approval_policy, and unknown flags.
199
- * @returns {{ provider: string, model?: string, modelReasoningEffort?: string, sandboxMode?: string, approvalPolicy?: string, defaultTimeoutMs?: number }}
205
+ * --sandbox_mode, --approval_policy, --codex_idle_timeout, and unknown flags.
206
+ * @returns {{ provider: string, model?: string, modelReasoningEffort?: string, sandboxMode?: string, approvalPolicy?: string, codexIdleTimeoutMs?: number, defaultTimeoutMs?: number }}
200
207
  */
201
208
  function parseArgs() {
202
209
  const args = process.argv.slice(2);
@@ -205,6 +212,7 @@ function parseArgs() {
205
212
  let modelReasoningEffort;
206
213
  let sandboxMode;
207
214
  let approvalPolicy;
215
+ let codexIdleTimeoutMs;
208
216
  let defaultTimeoutMs;
209
217
 
210
218
  for (let i = 0; i < args.length; i++) {
@@ -256,6 +264,21 @@ function parseArgs() {
256
264
  }
257
265
  approvalPolicy = args[++i];
258
266
  break;
267
+ case "--codex_idle_timeout": {
268
+ if (i + 1 >= args.length) {
269
+ process.stderr.write("error: --codex_idle_timeout requires a value\n");
270
+ process.exit(1);
271
+ }
272
+ const secs = Number(args[++i]);
273
+ if (!Number.isFinite(secs) || secs < 0) {
274
+ process.stderr.write(
275
+ "error: --codex_idle_timeout must be a non-negative number\n",
276
+ );
277
+ process.exit(1);
278
+ }
279
+ codexIdleTimeoutMs = Math.round(secs * 1000);
280
+ break;
281
+ }
259
282
  case "--timeout": {
260
283
  if (i + 1 >= args.length) {
261
284
  process.stderr.write("error: --timeout requires a value\n");
@@ -281,6 +304,7 @@ function parseArgs() {
281
304
  modelReasoningEffort,
282
305
  sandboxMode,
283
306
  approvalPolicy,
307
+ codexIdleTimeoutMs,
284
308
  defaultTimeoutMs,
285
309
  };
286
310
  }
@@ -459,26 +483,33 @@ function createIsolatedCodexHome({
459
483
  approvalPolicy,
460
484
  }) {
461
485
  const codexHome = mkdtempSync(join(tmpdir(), "mcp-agents-codex-"));
462
- const sourceAuthPath = join(resolveCodexHome(), "auth.json");
463
- const targetAuthPath = join(codexHome, "auth.json");
464
- const configPath = join(codexHome, "config.toml");
486
+ // If auth copy or config write throws after the dir exists, remove the
487
+ // partially-prepared dir before rethrowing so it is never leaked.
488
+ try {
489
+ const sourceAuthPath = join(resolveCodexHome(), "auth.json");
490
+ const targetAuthPath = join(codexHome, "auth.json");
491
+ const configPath = join(codexHome, "config.toml");
465
492
 
466
- if (existsSync(sourceAuthPath)) {
467
- copyFileSync(sourceAuthPath, targetAuthPath);
468
- }
493
+ if (existsSync(sourceAuthPath)) {
494
+ copyFileSync(sourceAuthPath, targetAuthPath);
495
+ }
469
496
 
470
- writeFileSync(
471
- configPath,
472
- buildCodexBridgeConfig({
473
- model,
474
- modelReasoningEffort,
475
- sandboxMode,
476
- approvalPolicy,
477
- }),
478
- "utf8",
479
- );
497
+ writeFileSync(
498
+ configPath,
499
+ buildCodexBridgeConfig({
500
+ model,
501
+ modelReasoningEffort,
502
+ sandboxMode,
503
+ approvalPolicy,
504
+ }),
505
+ "utf8",
506
+ );
480
507
 
481
- return codexHome;
508
+ return codexHome;
509
+ } catch (err) {
510
+ try { rmSync(codexHome, { recursive: true, force: true }); } catch {}
511
+ throw err;
512
+ }
482
513
  }
483
514
 
484
515
  /**
@@ -545,22 +576,27 @@ function filterCodexToolCall(line) {
545
576
  }
546
577
 
547
578
  /**
548
- * Spawn codex mcp-server as a pass-through. stdout/stderr flow straight back to
549
- * the client, but the client's stdin is intercepted line-by-line so per-call
550
- * model/config overrides are stripped before reaching codex.
551
- * @param {{ model?: string, modelReasoningEffort?: string, sandboxMode?: string, approvalPolicy?: string }} opts
579
+ * Spawn codex mcp-server as a pass-through. codex stdout is forwarded back to
580
+ * the client byte-for-byte, but the client's stdin is intercepted line-by-line
581
+ * so per-call model/config overrides are stripped before reaching codex. An
582
+ * idle watchdog converts an unbounded codex stall (no stdout/stderr while a
583
+ * request is in flight) into a synthesized JSON-RPC error so the caller never
584
+ * hangs forever.
585
+ * @param {{ model?: string, modelReasoningEffort?: string, sandboxMode?: string, approvalPolicy?: string, idleTimeoutMs?: number }} opts
552
586
  */
553
587
  function runCodexPassthrough({
554
588
  model,
555
589
  modelReasoningEffort,
556
590
  sandboxMode,
557
591
  approvalPolicy,
592
+ idleTimeoutMs,
558
593
  }) {
559
594
  const resolvedModel = model || DEFAULT_CODEX_MODEL;
560
595
  const resolvedModelReasoningEffort =
561
596
  modelReasoningEffort || DEFAULT_CODEX_MODEL_REASONING_EFFORT;
562
597
  const resolvedSandboxMode = sandboxMode || DEFAULT_CODEX_SANDBOX_MODE;
563
598
  const resolvedApprovalPolicy = approvalPolicy || DEFAULT_CODEX_APPROVAL_POLICY;
599
+ const resolvedIdleTimeoutMs = idleTimeoutMs ?? DEFAULT_CODEX_IDLE_TIMEOUT_MS;
564
600
  let isolatedCodexHome;
565
601
 
566
602
  try {
@@ -595,70 +631,479 @@ function runCodexPassthrough({
595
631
  `[mcp-agents] passthrough: codex ${args.join(" ")} ` +
596
632
  `(model=${resolvedModel}, reasoning_effort=${resolvedModelReasoningEffort}, ` +
597
633
  `sandbox_mode=${resolvedSandboxMode}, approval_policy=${resolvedApprovalPolicy}, ` +
598
- `isolated_home=true)`,
634
+ `idle_timeout_ms=${resolvedIdleTimeoutMs}, isolated_home=true)`,
599
635
  );
600
636
 
601
637
  const child = spawn("codex", args, {
602
638
  env: { ...process.env, CODEX_HOME: isolatedCodexHome },
603
- // stdin is piped (not inherited) so we can strip per-call overrides;
604
- // stdout stays inherited so codex responses reach the client untouched.
605
- stdio: ["pipe", "inherit", "pipe"],
639
+ // stdin is piped so we can strip per-call overrides; stdout is piped (not
640
+ // inherited) so the wrapper can both forward responses byte-for-byte AND
641
+ // observe them for the idle watchdog. detached:true puts codex in its own
642
+ // process group so a stall is torn down group-wide (mirrors runCli).
643
+ detached: true,
644
+ stdio: ["pipe", "pipe", "pipe"],
606
645
  });
607
646
 
647
+ const NEWLINE = 0x0a;
648
+ // Clean the isolated home on any exit path, not just the ones we route through
649
+ // hardExit() (e.g. a global uncaughtException handler calling process.exit).
650
+ process.once("exit", () => cleanupIsolatedCodexHome());
651
+
652
+ // Install signal teardown IMMEDIATELY after spawn (before the heavier wiring
653
+ // below) so a signal in the startup window can never orphan the detached
654
+ // group. `finalize` is a forward reference — safe because the handler body
655
+ // only runs when a signal fires, which is after this synchronous setup
656
+ // completes and `finalize` is defined.
657
+ for (const sig of ["SIGTERM", "SIGINT", "SIGHUP"]) {
658
+ process.once(sig, () => {
659
+ finalize({
660
+ reason: `signal ${sig}`,
661
+ emit: false,
662
+ exitCode: 128 + SIGNAL_CODES[sig],
663
+ });
664
+ });
665
+ }
666
+
667
+ // ── In-flight request tracking ──────────────────────────────────────────
668
+ // Client requests (id + method) awaiting a codex response. Keyed by a
669
+ // type-preserving key so JSON-RPC `1` (number) and `"1"` (string) never
670
+ // collide. `canceled` marks ids the client gave up on (notifications/
671
+ // cancelled): we never synthesize a response for them, but they still count
672
+ // toward teardown so a canceled-but-wedged codex is not left running.
673
+ const inFlight = new Map();
674
+ const idKey = (id) => `${typeof id}:${id}`;
675
+ const addInFlight = (id) => {
676
+ if (id == null) return;
677
+ const key = idKey(id);
678
+ if (!inFlight.has(key)) inFlight.set(key, { id, canceled: false });
679
+ };
680
+ const clearInFlight = (id) => {
681
+ if (id != null) inFlight.delete(idKey(id));
682
+ };
683
+ const cancelInFlight = (id) => {
684
+ const entry = id == null ? undefined : inFlight.get(idKey(id));
685
+ if (entry) entry.canceled = true;
686
+ };
687
+ const hasEmittableInFlight = () => {
688
+ for (const entry of inFlight.values()) if (!entry.canceled) return true;
689
+ return false;
690
+ };
691
+
692
+ // ── Liveness / lifecycle state ──────────────────────────────────────────
693
+ let finalizing = false;
694
+ let exited = false;
695
+ let stdoutPaused = false; // process.stdout backpressured (downstream, not idle)
696
+ let idleTimer;
697
+ let lastForwardedByteWasNewline = true; // nothing forwarded yet
698
+ let stdoutObsBuf = Buffer.alloc(0); // observation copy of codex stdout
699
+ let skippingFrame = false; // mid-skip of an oversized stdout frame (resync at \n)
700
+ let droppedFrameResponseId; // partial oversized frame's classified id (cleared at its newline)
701
+ let observationDropLogged = false; // log the first observation-cap drop only
702
+
703
+ const killGroup = (signal) => {
704
+ try {
705
+ if (child.pid) process.kill(-child.pid, signal);
706
+ else child.kill(signal);
707
+ } catch {
708
+ try { child.kill(signal); } catch {}
709
+ }
710
+ };
711
+
712
+ const clearIdle = () => {
713
+ if (idleTimer) {
714
+ clearTimeout(idleTimer);
715
+ idleTimer = undefined;
716
+ }
717
+ };
718
+ const armIdle = () => {
719
+ clearIdle();
720
+ // No watchdog when disabled, while finalizing, or while downstream is
721
+ // backpressured (blocked downstream != idle upstream).
722
+ if (!(resolvedIdleTimeoutMs > 0) || finalizing || stdoutPaused) return;
723
+ idleTimer = setTimeout(onIdle, resolvedIdleTimeoutMs);
724
+ };
725
+ const resetIdle = armIdle;
726
+
727
+ // Parse one complete codex->client stdout frame (observation only — the raw
728
+ // bytes are forwarded separately). Clears an id once its result/error lands.
729
+ const observeOutgoingLine = (line) => {
730
+ const trimmed = line.trim();
731
+ if (!trimmed) return;
732
+ let msg;
733
+ try { msg = JSON.parse(trimmed); } catch { return; }
734
+ if (
735
+ msg && typeof msg === "object" && "id" in msg &&
736
+ ("result" in msg || "error" in msg)
737
+ ) {
738
+ clearInFlight(msg.id);
739
+ }
740
+ };
741
+
742
+ // Classify a (possibly oversized) frame from a bounded prefix: return the
743
+ // request id iff it is clearly a RESPONSE — a top-level "result"/"error" with
744
+ // the "id" appearing before it and no top-level "method" preceding it.
745
+ // Assumes codex's (serde_json) serialization order: a response is
746
+ // {jsonrpc,id,result|error} (id/result within the first handful of bytes), and
747
+ // a notification/request emits its top-level "method" before "params". Under
748
+ // that contract a nested "result"/"id" inside a non-response's params cannot be
749
+ // misread as a response. Only ever consulted for frames too large to buffer.
750
+ const FRAME_HEADER_SCAN = 8192;
751
+ const peekResponseId = (prefix) => {
752
+ const s = prefix
753
+ .subarray(0, Math.min(prefix.length, FRAME_HEADER_SCAN))
754
+ .toString("utf8");
755
+ const resultAt = s.search(/"(?:result|error)"\s*:/);
756
+ if (resultAt === -1) return undefined; // no result/error -> not a response
757
+ const methodAt = s.search(/"method"\s*:/);
758
+ if (methodAt !== -1 && methodAt < resultAt) return undefined; // request/notif
759
+ // Capture the full id TOKEN (number or quoted string) and JSON-decode it so
760
+ // the value matches what noteInbound stored via JSON.parse — otherwise an
761
+ // escaped string id (e.g. "a\\b") would not equal the tracked key.
762
+ const idMatch = s
763
+ .slice(0, resultAt)
764
+ .match(/"id"\s*:\s*(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?|"(?:[^"\\]|\\.)*")/);
765
+ if (!idMatch) return undefined;
766
+ try { return JSON.parse(idMatch[1]); } catch { return undefined; }
767
+ };
768
+
769
+ const logObservationDropOnce = () => {
770
+ if (!observationDropLogged) {
771
+ logErr(
772
+ "[mcp-agents] codex passthrough: stdout frame exceeded observation cap; " +
773
+ "classifying it via a bounded header scan (forwarding unaffected)",
774
+ );
775
+ observationDropLogged = true;
776
+ }
777
+ };
778
+
779
+ // Resolve a dropped frame's effect on id-tracking. The frame's raw bytes were
780
+ // already forwarded to the client. If a bounded header scan proves it is the
781
+ // RESPONSE for an in-flight id, clear exactly that id — so we neither
782
+ // double-respond with a synthetic error nor falsely idle-kill a healthy
783
+ // session once codex goes quiet. If it is NOT a response (notification /
784
+ // server->client request) or cannot be classified, leave the in-flight ids
785
+ // tracked so a genuine post-frame stall is still caught. ONLY call this once
786
+ // the frame is COMPLETE (its terminating newline has been seen): clearing on a
787
+ // still-partial frame would prematurely untrack an id whose response codex may
788
+ // never finish writing, re-introducing a hang.
789
+ const resolveDroppedFrame = (prefix) => {
790
+ const id = peekResponseId(prefix);
791
+ if (id !== undefined) clearInFlight(id);
792
+ };
793
+
794
+ // Accumulate codex stdout into the observation buffer and parse each complete
795
+ // frame to clear in-flight ids. Soft-bounded by MAX_BUFFER_BYTES so a
796
+ // pathologically large single frame cannot exhaust memory — the bound is
797
+ // approximate (a frame may transiently allocate up to one stream chunk beyond
798
+ // the cap before being dropped). The RAW bytes are always forwarded untouched
799
+ // by the caller regardless. A dropped frame is handled by onObservedFrameDropped().
800
+ const observeOutgoing = (chunk) => {
801
+ let data = chunk;
802
+ if (skippingFrame) {
803
+ const nl = data.indexOf(NEWLINE);
804
+ if (nl === -1) return; // still inside the oversized frame
805
+ // The oversized frame just COMPLETED. Apply the deferred clear now: if its
806
+ // header looked like a response, the response genuinely finished, so clear
807
+ // that id. (If codex had stalled mid-frame, this newline never arrives and
808
+ // the id stays tracked so the watchdog still catches the stall.)
809
+ skippingFrame = false;
810
+ if (droppedFrameResponseId !== undefined) {
811
+ clearInFlight(droppedFrameResponseId);
812
+ droppedFrameResponseId = undefined;
813
+ }
814
+ data = data.subarray(nl + 1); // resume parsing after the frame boundary
815
+ }
816
+ stdoutObsBuf = stdoutObsBuf.length ? Buffer.concat([stdoutObsBuf, data]) : data;
817
+ let nl;
818
+ while ((nl = stdoutObsBuf.indexOf(NEWLINE)) !== -1) {
819
+ if (nl > MAX_BUFFER_BYTES) {
820
+ // A COMPLETE frame larger than the cap: it fully arrived, so classify it
821
+ // from a bounded header prefix and clear its id now (no huge alloc).
822
+ logObservationDropOnce();
823
+ resolveDroppedFrame(stdoutObsBuf.subarray(0, nl));
824
+ stdoutObsBuf = stdoutObsBuf.subarray(nl + 1);
825
+ continue;
826
+ }
827
+ const line = stdoutObsBuf.subarray(0, nl).toString("utf8");
828
+ stdoutObsBuf = stdoutObsBuf.subarray(nl + 1);
829
+ observeOutgoingLine(line);
830
+ }
831
+ if (stdoutObsBuf.length > MAX_BUFFER_BYTES) {
832
+ // A PARTIAL frame already past the cap with no newline yet: classify the
833
+ // prefix but DEFER clearing to the frame's newline (above) — clearing now
834
+ // would untrack an id whose response codex might never finish, hanging it.
835
+ logObservationDropOnce();
836
+ droppedFrameResponseId = peekResponseId(stdoutObsBuf);
837
+ stdoutObsBuf = Buffer.alloc(0);
838
+ skippingFrame = true;
839
+ }
840
+ };
841
+
842
+ const hardExit = (code) => {
843
+ if (exited) return;
844
+ exited = true;
845
+ clearIdle();
846
+ cleanupIsolatedCodexHome();
847
+ process.exit(code);
848
+ };
849
+ const flushThenExit = (code) => {
850
+ if (exited) return;
851
+ if (process.stdout.writableLength === 0) {
852
+ hardExit(code);
853
+ return;
854
+ }
855
+ // Ref'd safety timer guarantees exit if 'drain' never fires (client gone).
856
+ const safety = setTimeout(() => hardExit(code), 2_000);
857
+ process.stdout.once("drain", () => {
858
+ clearTimeout(safety);
859
+ hardExit(code);
860
+ });
861
+ };
862
+
863
+ // Single, idempotent teardown. `emit` controls whether open (non-canceled)
864
+ // requests get a synthetic JSON-RPC error before exit. The detached group is
865
+ // killed on EVERY teardown path so codex and any descendants are never
866
+ // orphaned.
867
+ const finalize = ({ reason, emit, exitCode }) => {
868
+ if (finalizing) return;
869
+ finalizing = true;
870
+ clearIdle();
871
+ logErr(`[mcp-agents] codex passthrough finalize: ${reason}`);
872
+
873
+ // Stop forwarding further codex stdout so a late real response cannot race
874
+ // the synthetic error onto the wire after we've taken over the stream.
875
+ try { child.stdout?.pause(); } catch {}
876
+
877
+ // Kill the whole detached group so codex AND any descendants it spawned are
878
+ // reaped on EVERY teardown path — never orphaned. On abort paths (idle /
879
+ // signal / EPIPE / fatal) codex is still alive, so there is no PID-reuse
880
+ // risk; on a natural close/spawn-error this runs synchronously right after
881
+ // the child was reaped (a negligible reuse window) to clean up anything
882
+ // codex left behind in its group. A SIGKILL on an already-empty group is a
883
+ // harmless ESRCH (swallowed by killGroup).
884
+ killGroup("SIGKILL");
885
+
886
+ if (emit && hasEmittableInFlight()) {
887
+ // Framing recovery: if codex left a dangling partial line on the wire, try
888
+ // to parse it (it may itself be the real response) and terminate it with a
889
+ // newline so the synthetic frame cannot glue onto a half-written line.
890
+ if (stdoutObsBuf.length > 0) {
891
+ observeOutgoingLine(stdoutObsBuf.toString("utf8"));
892
+ stdoutObsBuf = Buffer.alloc(0);
893
+ try { process.stdout.write("\n"); } catch {}
894
+ lastForwardedByteWasNewline = true;
895
+ } else if (!lastForwardedByteWasNewline) {
896
+ try { process.stdout.write("\n"); } catch {}
897
+ lastForwardedByteWasNewline = true;
898
+ }
899
+
900
+ for (const entry of inFlight.values()) {
901
+ if (entry.canceled) continue;
902
+ const frame = {
903
+ jsonrpc: "2.0",
904
+ id: entry.id,
905
+ error: {
906
+ code: -32001,
907
+ message:
908
+ `mcp-agents: codex pass-through aborted before responding ` +
909
+ `(${reason}); the request was still open. Any applied edits may ` +
910
+ `exist — verify the tree.`,
911
+ },
912
+ };
913
+ try { process.stdout.write(`${JSON.stringify(frame)}\n`); } catch {}
914
+ }
915
+ }
916
+
917
+ flushThenExit(exitCode);
918
+ };
919
+
920
+ // Route the global uncaughtException/unhandledRejection handlers through the
921
+ // same teardown so codex's DETACHED group is always killed — otherwise those
922
+ // handlers call process.exit() directly and orphan codex (the 'exit' handler
923
+ // only deletes CODEX_HOME, it cannot reap a detached group).
924
+ fatalShutdown = (reason, code) =>
925
+ finalize({ reason: `fatal: ${reason}`, emit: true, exitCode: code ?? 1 });
926
+
927
+ function onIdle() {
928
+ idleTimer = undefined;
929
+ if (finalizing) return;
930
+ if (hasEmittableInFlight()) {
931
+ finalize({
932
+ reason: `idle timeout (${Math.round(resolvedIdleTimeoutMs / 1000)}s)`,
933
+ emit: true,
934
+ exitCode: 1,
935
+ });
936
+ return;
937
+ }
938
+ // Only canceled requests left -> tear down quietly. Nothing open at all ->
939
+ // healthy idle between calls, just re-arm.
940
+ if (inFlight.size > 0) {
941
+ finalize({
942
+ reason: "idle timeout (canceled-only)",
943
+ emit: false,
944
+ exitCode: 1,
945
+ });
946
+ } else {
947
+ armIdle();
948
+ }
949
+ }
950
+
608
951
  child.stderr.on("data", (chunk) => {
952
+ resetIdle();
609
953
  logErr(`[codex] ${chunk.toString().trimEnd()}`);
610
954
  });
611
955
 
956
+ // Forward codex stdout to the client byte-for-byte (raw Buffer) and keep a
957
+ // parallel observation buffer (split on the newline BYTE) to clear in-flight
958
+ // ids as their responses land. Raw chunks are forwarded; reconstructed lines
959
+ // are never written back.
960
+ child.stdout.on("data", (chunk) => {
961
+ if (finalizing) return; // stream ownership has been taken over
962
+ resetIdle();
963
+
964
+ // Forward the raw bytes FIRST so a bug in observation can never affect the
965
+ // byte-for-byte passthrough (observation is best-effort id-tracking only).
966
+ if (chunk.length > 0) {
967
+ lastForwardedByteWasNewline = chunk[chunk.length - 1] === NEWLINE;
968
+ }
969
+ const ok = process.stdout.write(chunk);
970
+ try {
971
+ observeOutgoing(chunk); // bounded parse-for-ids; never alters forwarded bytes
972
+ } catch (err) {
973
+ const msg = err instanceof Error ? err.message : String(err);
974
+ logErr(`[mcp-agents] codex passthrough: stdout observation error (ignored): ${msg}`);
975
+ }
976
+ if (!ok) {
977
+ // Downstream full: pause codex and suspend the idle watchdog until the
978
+ // client drains, so a slow reader is never mistaken for a stalled codex.
979
+ // Trade-off: a client that never drains keeps the request open with no
980
+ // watchdog — but a synthetic error could not be delivered to it anyway.
981
+ stdoutPaused = true;
982
+ clearIdle();
983
+ child.stdout.pause();
984
+ }
985
+ });
986
+
987
+ process.stdout.on("drain", () => {
988
+ if (!stdoutPaused) return;
989
+ stdoutPaused = false;
990
+ if (finalizing) return;
991
+ child.stdout.resume();
992
+ resetIdle();
993
+ });
994
+
995
+ process.stdout.on("error", (err) => {
996
+ // Client went away mid-write: nothing left to answer, tear codex down.
997
+ if (err && err.code === "EPIPE") {
998
+ finalize({ reason: "stdout EPIPE", emit: false, exitCode: 0 });
999
+ }
1000
+ });
1001
+
612
1002
  // Pump client stdin -> codex stdin, splitting on the newline BYTE (0x0a) that
613
1003
  // delimits MCP stdio JSON-RPC frames. Buffering raw bytes (not per-chunk
614
1004
  // strings) avoids corrupting a multibyte UTF-8 sequence that straddles two
615
1005
  // read chunks, which would otherwise break the byte-for-byte passthrough.
616
1006
  child.stdin.on("error", () => {}); // ignore EPIPE if codex exits early
617
- const NEWLINE = 0x0a;
1007
+
1008
+ // Read-only inbound tracking: record client requests (id + method) as
1009
+ // in-flight and honor cancellations. Never mutates what is forwarded —
1010
+ // filterCodexToolCall remains the sole authority on the forwarded bytes.
1011
+ const noteInbound = (line) => {
1012
+ const trimmed = line.trim();
1013
+ if (!trimmed) return;
1014
+ let msg;
1015
+ try { msg = JSON.parse(trimmed); } catch { return; }
1016
+ if (!msg || typeof msg !== "object") return;
1017
+ // (Watchdog liveness is reset at the byte level in the stdin 'data' handler,
1018
+ // so even an elicitation response — bare id, no method — keeps a healthy
1019
+ // interactive flow alive.)
1020
+ if (msg.method === "notifications/cancelled") {
1021
+ cancelInFlight(msg.params?.requestId);
1022
+ return;
1023
+ }
1024
+ // A client message awaits a response iff it carries BOTH an id and a method.
1025
+ // A bare id with no method is a *response* to a codex elicitation — skip it
1026
+ // for in-flight tracking.
1027
+ if (msg.id != null && typeof msg.method === "string") {
1028
+ addInFlight(msg.id);
1029
+ }
1030
+ };
1031
+
618
1032
  let stdinBuf = Buffer.alloc(0);
619
1033
  process.stdin.on("data", (chunk) => {
1034
+ // ANY inbound bytes mean the client side of the exchange is alive — even a
1035
+ // large/slow elicitation response arriving across chunks without a newline.
1036
+ // Reset the watchdog here at the BYTE level (not per parsed line): a truly
1037
+ // stalled exchange (codex silent AND client sending nothing) still produces
1038
+ // no inbound, so the genuine stall is still caught.
1039
+ resetIdle();
620
1040
  stdinBuf = stdinBuf.length ? Buffer.concat([stdinBuf, chunk]) : chunk;
621
1041
  let nl;
622
1042
  while ((nl = stdinBuf.indexOf(NEWLINE)) !== -1) {
623
1043
  const line = stdinBuf.subarray(0, nl).toString("utf8");
624
1044
  stdinBuf = stdinBuf.subarray(nl + 1);
1045
+ noteInbound(line);
625
1046
  child.stdin.write(`${filterCodexToolCall(line)}\n`);
626
1047
  }
627
1048
  });
628
1049
  process.stdin.on("error", () => {});
629
1050
  process.stdin.on("end", () => {
630
1051
  if (stdinBuf.length > 0) {
631
- child.stdin.write(filterCodexToolCall(stdinBuf.toString("utf8")));
1052
+ const line = stdinBuf.toString("utf8");
1053
+ noteInbound(line);
1054
+ child.stdin.write(filterCodexToolCall(line));
632
1055
  }
633
1056
  child.stdin.end();
634
1057
  });
635
1058
 
636
- for (const sig of ["SIGTERM", "SIGINT", "SIGHUP"]) {
637
- process.once(sig, () => {
638
- child.kill(sig);
639
- setTimeout(() => {
640
- child.kill("SIGKILL");
641
- cleanupIsolatedCodexHome();
642
- process.exit(128 + SIGNAL_CODES[sig]);
643
- }, 5000).unref();
644
- });
645
- }
646
-
647
1059
  child.on("error", (err) => {
648
- cleanupIsolatedCodexHome();
649
1060
  logErr(`[mcp-agents] failed to start codex: ${err.message}`);
650
- process.exitCode = 1;
1061
+ // codex failed to start. The fix that matters is that we EXIT (instead of
1062
+ // leaving a childless wrapper alive on the client's open stdin, which used
1063
+ // to hang). `emit` synthesizes an error only if a request was already
1064
+ // tracked; spawn 'error' usually fires before any stdin is read, so the
1065
+ // client typically just sees the server exit — the conventional
1066
+ // "server failed to start".
1067
+ finalize({
1068
+ reason: `codex spawn error: ${err.message}`,
1069
+ emit: true,
1070
+ exitCode: 1,
1071
+ });
651
1072
  });
652
1073
 
653
- child.on("exit", (code, signal) => {
654
- cleanupIsolatedCodexHome();
655
- if (signal) {
656
- logErr(`[mcp-agents] codex killed by ${signal}`);
657
- process.exitCode = 128 + (SIGNAL_CODES[signal] ?? 0);
658
- } else {
659
- if (code !== 0) logErr(`[mcp-agents] codex exited with code ${code}`);
660
- process.exitCode = code ?? 1;
1074
+ // codex death is handled via BOTH 'exit' and 'close':
1075
+ // - 'exit' fires when the codex PROCESS terminates. A descendant that
1076
+ // inherited codex's stdio can hold those pipes open, delaying or even
1077
+ // preventing 'close' (and would be orphaned), so we kill the group here to
1078
+ // reap it which also lets 'close' fire. A ref'd fallback guarantees
1079
+ // teardown even if a descendant escaped the group (setsid) so 'close'
1080
+ // never arrives.
1081
+ // - 'close' fires once all stdio is drained, so codex's final response has
1082
+ // been delivered and its id cleared — only THEN do we decide whether to
1083
+ // synthesize, which avoids double-responding.
1084
+ let childExitInfo = null;
1085
+ const onChildGone = () => {
1086
+ const code = childExitInfo?.code;
1087
+ const signal = childExitInfo?.signal;
1088
+ if (signal) logErr(`[mcp-agents] codex killed by ${signal}`);
1089
+ else if (code != null && code !== 0) {
1090
+ logErr(`[mcp-agents] codex exited with code ${code}`);
661
1091
  }
1092
+ finalize({
1093
+ reason: signal ? `codex killed by ${signal}` : `codex exited (code ${code})`,
1094
+ emit: true,
1095
+ exitCode: signal ? 128 + (SIGNAL_CODES[signal] ?? 0) : (code ?? 1),
1096
+ });
1097
+ };
1098
+
1099
+ child.on("exit", (code, signal) => {
1100
+ childExitInfo = { code, signal };
1101
+ killGroup("SIGKILL");
1102
+ setTimeout(onChildGone, 2_000);
1103
+ });
1104
+ child.on("close", (code, signal) => {
1105
+ if (!childExitInfo) childExitInfo = { code, signal };
1106
+ onChildGone();
662
1107
  });
663
1108
  }
664
1109
 
@@ -673,6 +1118,7 @@ async function main() {
673
1118
  modelReasoningEffort,
674
1119
  sandboxMode,
675
1120
  approvalPolicy,
1121
+ codexIdleTimeoutMs,
676
1122
  defaultTimeoutMs,
677
1123
  } = parseArgs();
678
1124
  const backend = CLI_BACKENDS[providerName];
@@ -690,6 +1136,7 @@ async function main() {
690
1136
  modelReasoningEffort,
691
1137
  sandboxMode,
692
1138
  approvalPolicy,
1139
+ idleTimeoutMs: codexIdleTimeoutMs,
693
1140
  });
694
1141
  return;
695
1142
  }