npm - @oh-my-pi/pi-coding-agent - Versions diffs - 15.9.5 → 15.10.0 - Mend

@oh-my-pi/pi-coding-agent 15.9.5 → 15.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

package/CHANGELOG.md +98 -1
package/dist/types/cli/args.d.ts +1 -1
package/dist/types/cli/gallery-cli.d.ts +43 -0
package/dist/types/cli/gallery-fixtures/agentic.d.ts +2 -0
package/dist/types/cli/gallery-fixtures/codeintel.d.ts +3 -0
package/dist/types/cli/gallery-fixtures/edit.d.ts +3 -0
package/dist/types/cli/gallery-fixtures/fs.d.ts +2 -0
package/dist/types/cli/gallery-fixtures/index.d.ts +4 -0
package/dist/types/cli/gallery-fixtures/interaction.d.ts +3 -0
package/dist/types/cli/gallery-fixtures/memory.d.ts +2 -0
package/dist/types/cli/gallery-fixtures/misc.d.ts +3 -0
package/dist/types/cli/gallery-fixtures/search.d.ts +3 -0
package/dist/types/cli/gallery-fixtures/shell.d.ts +3 -0
package/dist/types/cli/gallery-fixtures/types.d.ts +44 -0
package/dist/types/cli/gallery-fixtures/web.d.ts +2 -0
package/dist/types/cli/gallery-screenshot.d.ts +35 -0
package/dist/types/commands/gallery.d.ts +47 -0
package/dist/types/config/keybindings.d.ts +10 -2
package/dist/types/config/model-id-affixes.d.ts +2 -0
package/dist/types/config/model-registry.d.ts +8 -1
package/dist/types/config/settings-schema.d.ts +43 -7
package/dist/types/edit/file-snapshot-store.d.ts +1 -1
package/dist/types/eval/backend.d.ts +6 -6
package/dist/types/eval/bridge-timeout.d.ts +27 -0
package/dist/types/eval/idle-timeout.d.ts +16 -14
package/dist/types/eval/js/executor.d.ts +3 -3
package/dist/types/eval/py/executor.d.ts +2 -2
package/dist/types/eval/py/spawn-options.d.ts +58 -0
package/dist/types/extensibility/plugins/marketplace-auto-update.d.ts +8 -0
package/dist/types/lsp/types.d.ts +10 -0
package/dist/types/main.d.ts +3 -2
package/dist/types/memory-backend/index.d.ts +2 -1
package/dist/types/memory-backend/resolve.d.ts +1 -1
package/dist/types/memory-backend/types.d.ts +1 -1
package/dist/types/modes/components/assistant-message.d.ts +5 -0
package/dist/types/modes/components/copy-selector.d.ts +22 -0
package/dist/types/modes/components/custom-editor.d.ts +2 -1
package/dist/types/modes/components/model-selector.d.ts +1 -0
package/dist/types/modes/components/tool-execution.d.ts +18 -0
package/dist/types/modes/controllers/command-controller.d.ts +0 -1
package/dist/types/modes/controllers/selector-controller.d.ts +2 -1
package/dist/types/modes/index.d.ts +5 -4
package/dist/types/modes/interactive-mode.d.ts +2 -2
package/dist/types/modes/setup-version.d.ts +11 -0
package/dist/types/modes/setup-wizard/index.d.ts +2 -1
package/dist/types/modes/setup-wizard/scenes/web-search.d.ts +2 -1
package/dist/types/modes/types.d.ts +2 -2
package/dist/types/modes/utils/copy-targets.d.ts +53 -0
package/dist/types/sdk.d.ts +1 -1
package/dist/types/task/executor.d.ts +7 -0
package/dist/types/telemetry-export.d.ts +1 -1
package/dist/types/tools/eval-render.d.ts +1 -0
package/dist/types/tools/fetch.d.ts +15 -7
package/dist/types/tools/render-utils.d.ts +33 -0
package/dist/types/tools/renderers.d.ts +16 -2
package/dist/types/tools/search.d.ts +1 -1
package/dist/types/tools/write.d.ts +2 -0
package/dist/types/tui/code-cell.d.ts +6 -0
package/dist/types/tui/output-block.d.ts +11 -0
package/dist/types/web/scrapers/github.d.ts +22 -0
package/dist/types/web/search/providers/perplexity.d.ts +8 -1
package/dist/types/web/search/types.d.ts +1 -1
package/package.json +9 -9
package/scripts/dev-launch +42 -0
package/scripts/dev-launch-preload.ts +19 -0
package/src/autoresearch/dashboard.ts +11 -21
package/src/cli/args.ts +2 -2
package/src/cli/claude-trace-cli.ts +13 -1
package/src/cli/gallery-cli.ts +223 -0
package/src/cli/gallery-fixtures/agentic.ts +292 -0
package/src/cli/gallery-fixtures/codeintel.ts +188 -0
package/src/cli/gallery-fixtures/edit.ts +194 -0
package/src/cli/gallery-fixtures/fs.ts +153 -0
package/src/cli/gallery-fixtures/index.ts +40 -0
package/src/cli/gallery-fixtures/interaction.ts +49 -0
package/src/cli/gallery-fixtures/memory.ts +81 -0
package/src/cli/gallery-fixtures/misc.ts +221 -0
package/src/cli/gallery-fixtures/search.ts +213 -0
package/src/cli/gallery-fixtures/shell.ts +167 -0
package/src/cli/gallery-fixtures/types.ts +41 -0
package/src/cli/gallery-fixtures/web.ts +158 -0
package/src/cli/gallery-screenshot.ts +279 -0
package/src/cli-commands.ts +1 -0
package/src/commands/gallery.ts +52 -0
package/src/commands/launch.ts +1 -1
package/src/config/keybindings.ts +68 -2
package/src/config/model-equivalence.ts +35 -12
package/src/config/model-id-affixes.ts +39 -22
package/src/config/model-registry.ts +16 -16
package/src/config/settings-schema.ts +29 -6
package/src/config/settings.ts +11 -0
package/src/dap/client.ts +14 -16
package/src/debug/raw-sse.ts +18 -4
package/src/edit/file-snapshot-store.ts +1 -1
package/src/edit/index.ts +1 -1
package/src/edit/renderer.ts +43 -55
package/src/edit/streaming.ts +1 -1
package/src/eval/__tests__/agent-bridge.test.ts +102 -58
package/src/eval/__tests__/bridge-timeout.test.ts +64 -0
package/src/eval/__tests__/idle-timeout.test.ts +26 -12
package/src/eval/__tests__/kernel-spawn.test.ts +103 -0
package/src/eval/__tests__/llm-bridge.test.ts +10 -10
package/src/eval/agent-bridge.ts +38 -12
package/src/eval/backend.ts +6 -6
package/src/eval/bridge-timeout.ts +44 -0
package/src/eval/idle-timeout.ts +33 -15
package/src/eval/js/executor.ts +10 -10
package/src/eval/llm-bridge.ts +4 -5
package/src/eval/py/executor.ts +6 -6
package/src/eval/py/kernel.ts +11 -1
package/src/eval/py/spawn-options.ts +126 -0
package/src/export/ttsr.ts +9 -0
package/src/extensibility/extensions/runner.ts +3 -0
package/src/extensibility/plugins/doctor.ts +0 -1
package/src/extensibility/plugins/marketplace-auto-update.ts +49 -0
package/src/goals/tools/goal-tool.ts +2 -2
package/src/internal-urls/docs-index.generated.ts +7 -6
package/src/lsp/client.ts +179 -52
package/src/lsp/index.ts +38 -4
package/src/lsp/render.ts +3 -3
package/src/lsp/types.ts +10 -0
package/src/main.ts +47 -52
package/src/memory-backend/index.ts +13 -1
package/src/memory-backend/resolve.ts +3 -5
package/src/memory-backend/types.ts +1 -1
package/src/modes/components/agent-dashboard.ts +13 -4
package/src/modes/components/assistant-message.ts +22 -1
package/src/modes/components/copy-selector.ts +249 -0
package/src/modes/components/custom-editor.ts +10 -1
package/src/modes/components/extensions/extension-list.ts +17 -8
package/src/modes/components/history-search.ts +19 -11
package/src/modes/components/model-selector.ts +125 -29
package/src/modes/components/oauth-selector.ts +28 -12
package/src/modes/components/session-observer-overlay.ts +13 -15
package/src/modes/components/session-selector.ts +24 -13
package/src/modes/components/status-line.ts +3 -5
package/src/modes/components/tool-execution.ts +83 -24
package/src/modes/components/tree-selector.ts +19 -7
package/src/modes/components/user-message-selector.ts +25 -14
package/src/modes/controllers/command-controller.ts +13 -118
package/src/modes/controllers/event-controller.ts +26 -10
package/src/modes/controllers/input-controller.ts +11 -3
package/src/modes/controllers/selector-controller.ts +40 -3
package/src/modes/index.ts +5 -4
package/src/modes/interactive-mode.ts +21 -7
package/src/modes/setup-version.ts +11 -0
package/src/modes/setup-wizard/index.ts +3 -2
package/src/modes/setup-wizard/scenes/web-search.ts +3 -2
package/src/modes/theme/theme.ts +46 -10
package/src/modes/types.ts +2 -2
package/src/modes/utils/context-usage.ts +10 -6
package/src/modes/utils/copy-targets.ts +254 -0
package/src/modes/utils/hotkeys-markdown.ts +1 -0
package/src/prompts/tools/ast-edit.md +1 -1
package/src/prompts/tools/ast-grep.md +1 -1
package/src/prompts/tools/read.md +1 -1
package/src/prompts/tools/search.md +1 -1
package/src/sdk.ts +21 -23
package/src/session/agent-session.ts +13 -9
package/src/slash-commands/builtin-registry.ts +4 -12
package/src/slash-commands/helpers/usage-report.ts +2 -0
package/src/task/executor.ts +20 -2
package/src/task/render.ts +37 -11
package/src/telemetry-export.ts +25 -7
package/src/tools/bash.ts +18 -8
package/src/tools/browser/render.ts +5 -4
package/src/tools/debug.ts +3 -3
package/src/tools/eval-backends.ts +6 -17
package/src/tools/eval-render.ts +28 -10
package/src/tools/eval.ts +19 -23
package/src/tools/fetch.ts +99 -89
package/src/tools/read.ts +7 -7
package/src/tools/render-utils.ts +63 -3
package/src/tools/renderers.ts +16 -1
package/src/tools/report-tool-issue.ts +1 -1
package/src/tools/search.ts +173 -81
package/src/tools/ssh.ts +21 -8
package/src/tools/todo.ts +20 -7
package/src/tools/write.ts +39 -9
package/src/tui/code-cell.ts +19 -4
package/src/tui/output-block.ts +14 -0
package/src/web/scrapers/github.ts +255 -3
package/src/web/scrapers/youtube.ts +3 -2
package/src/web/search/providers/perplexity.ts +199 -51
package/src/web/search/render.ts +42 -57
package/src/web/search/types.ts +5 -1
package/dist/types/eval/heartbeat.d.ts +0 -45
package/src/eval/__tests__/heartbeat.test.ts +0 -84
package/src/eval/__tests__/shared-executors.test.ts +0 -609
package/src/eval/heartbeat.ts +0 -74
/package/dist/types/eval/__tests__/{heartbeat.test.d.ts → bridge-timeout.test.d.ts} +0 -0
/package/dist/types/eval/__tests__/{shared-executors.test.d.ts → kernel-spawn.test.d.ts} +0 -0

package/src/eval/__tests__/agent-bridge.test.ts CHANGED Viewed

@@ -10,7 +10,7 @@ import { AgentOutputManager } from "../../task/output-manager";
 import type { AgentDefinition, AgentProgress, SingleResult } from "../../task/types";
 import type { ToolSession } from "../../tools";
 import { EVAL_AGENT_MAX_DEPTH, runEvalAgent } from "../agent-bridge";
-import { EVAL_HEARTBEAT_OP, setBridgeHeartbeatIntervalMs } from "../heartbeat";
+import { EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP } from "../bridge-timeout";
 import { IdleTimeout } from "../idle-timeout";
 import { disposeAllVmContexts } from "../js/context-manager";
 import { executeJs } from "../js/executor";
@@ -231,12 +231,62 @@ describe("runEvalAgent", () => {
 		});
 		await expect(runEvalAgent({ prompt: "fail" }, { session: makeSession() })).rejects.toThrow("boom");
 	});
+	// Regression: a runtime-limit abort returns exitCode=1, stderr="", error=undefined,
+	// aborted=true, abortReason="Subagent runtime limit exceeded (...)". The previous
+	// failure-message coalesce stopped at the empty `stderr` (since `??` only skips
+	// nullish values) and shipped an empty error through the bridge — Python then
+	// surfaced the generic `bridge call '__agent__' failed`. See #2006.
+	it("surfaces abortReason for aborts that leave stderr empty", async () => {
+		mockAgents();
+		const runSpy = vi.spyOn(taskExecutor, "runSubprocess");
+		runSpy.mockImplementationOnce(async options =>
+			singleResult(options, {
+				exitCode: 1,
+				output: "",
+				stderr: "",
+				error: undefined,
+				aborted: true,
+				abortReason: "Subagent runtime limit exceeded (task.maxRuntimeMs=900000)",
+			}),
+		);
+		runSpy.mockImplementationOnce(async options =>
+			singleResult(options, {
+				exitCode: 1,
+				output: "",
+				stderr: "   ",
+				error: "   ",
+				aborted: true,
+				abortReason: "Cancelled by caller",
+			}),
+		);
+		runSpy.mockImplementationOnce(async options =>
+			singleResult(options, {
+				exitCode: 1,
+				output: "",
+				stderr: "",
+				error: undefined,
+			}),
+		);
+		await expect(runEvalAgent({ prompt: "slow" }, { session: makeSession() })).rejects.toThrow(
+			"Subagent runtime limit exceeded (task.maxRuntimeMs=900000)",
+		);
+		// Whitespace-only stderr/error must not mask abortReason either.
+		await expect(runEvalAgent({ prompt: "cancelled" }, { session: makeSession() })).rejects.toThrow(
+			"Cancelled by caller",
+		);
+		// Last resort: still produce a non-empty message even when nothing useful is set,
+		// so Python never falls back to `bridge call '__agent__' failed`.
+		await expect(runEvalAgent({ prompt: "blank" }, { session: makeSession() })).rejects.toThrow(
+			"agent() subagent 'task' failed.",
+		);
+	});
 });
 describe("agent() through eval runtimes", () => {
 	afterEach(() => {
 		vi.restoreAllMocks();
-		setBridgeHeartbeatIntervalMs();
 	});
 	afterAll(async () => {
@@ -327,18 +377,6 @@ describe("agent() through eval runtimes", () => {
 			singleResult(options, { output: "hello from python" }),
 		);
-		const probe = await executePython('print("probe")', {
-			cwd: tempDir.path(),
-			sessionId: `${sessionId}:probe`,
-			sessionFile,
-			kernelMode: "per-call",
-		});
-		if (probe.exitCode === undefined && probe.cancelled) {
-			expect(probe.output).toBe("");
-			return;
-		}
-		expect(probe.exitCode).toBe(0);
 		const result = await executePython('print(agent("hi"))', {
 			cwd: tempDir.path(),
 			sessionId,
@@ -346,6 +384,10 @@ describe("agent() through eval runtimes", () => {
 			kernelMode: "per-call",
 			toolSession: session,
 		});
+		if (result.exitCode === undefined && result.cancelled) {
+			expect(result.output).toBe("");
+			return; // kernel unavailable in this environment
+		}
 		expect(result.exitCode).toBe(0);
 		expect(result.output.trim()).toBe("hello from python");
@@ -374,22 +416,14 @@ describe("agent() through eval runtimes", () => {
 			}
 		});
-		const probe = await executePython('print("probe")', {
-			cwd: tempDir.path(),
-			sessionId: `${sessionId}:probe`,
-			sessionFile,
-			kernelMode: "per-call",
-		});
-		if (probe.exitCode === undefined && probe.cancelled) {
-			expect(probe.output).toBe("");
-			return;
-		}
-		expect(probe.exitCode).toBe(0);
 		const result = await executePython(
 			'import json\nprint(json.dumps(parallel([lambda n=n: agent(n) for n in ["a", "b", "c", "d"]])))',
 			{ cwd: tempDir.path(), sessionId, sessionFile, kernelMode: "per-call", toolSession: session },
 		);
+		if (result.exitCode === undefined && result.cancelled) {
+			expect(result.output).toBe("");
+			return; // kernel unavailable in this environment
+		}
 		expect(result.exitCode).toBe(0);
 		expect(JSON.parse(result.output.trim())).toEqual(["a", "b", "c", "d"]);
@@ -413,7 +447,14 @@ describe("agent() through eval runtimes", () => {
 		// The host must respond the instant the cell aborts so the kernel can
 		// unwind via KeyboardInterrupt instead of being hard-killed (which used to
 		// surface "[kernel] Python kernel shutdown" and lose all session state).
+		let inFlight = 0;
+		let markSaturated: (() => void) | undefined;
+		const saturated = new Promise<void>(resolve => {
+			markSaturated = resolve;
+		});
 		vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
+			// task.maxConcurrency=6 → six bridge calls block at once; signal then.
+			if (++inFlight >= 6) markSaturated?.();
 			await Bun.sleep(9000); // deliberately ignores options.signal
 			return singleResult(options, { output: options.assignment ?? "" });
 		});
@@ -433,8 +474,9 @@ describe("agent() through eval runtimes", () => {
 		expect(seed.exitCode).toBe(0);
 		const ac = new AbortController();
-		// Abort ~1s in, after the worker threads are blocked in their bridge calls.
-		setTimeout(() => ac.abort(new Error("external interrupt")), 1000);
+		// Abort the instant all six worker threads are confirmed blocked in their
+		// bridge calls (condition-driven) instead of waiting a fixed wall second.
+		void saturated.then(() => ac.abort(new Error("external interrupt")));
 		const start = Date.now();
 		const result = await executePython(
@@ -560,52 +602,52 @@ describe("agent() through eval runtimes", () => {
 		expect(displayAgentEvents.length).toBe(2);
 	});
-	it("keeps the idle watchdog armed while a quiet agent() runs past the budget", async () => {
-		using tempDir = TempDir.createSync("@omp-eval-agent-heartbeat-");
-		const { session } = makeEvalSession(tempDir, "js-agent-heartbeat");
+	it("pauses the idle watchdog while a quiet agent() runs past the budget", async () => {
+		using tempDir = TempDir.createSync("@omp-eval-agent-timeout-pause-");
+		const { session } = makeEvalSession(tempDir, "js-agent-timeout-pause");
 		mockAgents();
-		// Heartbeat cadence well under the idle budget so a working-but-silent
-		// subagent re-arms the watchdog several times before it could expire.
-		setBridgeHeartbeatIntervalMs(15);
-		// runSubprocess runs far past the budget and emits NO progress of its own
-		// — the only thing standing between the subagent and a spurious idle abort
-		// is the heartbeat keepalive the bridge pumps while it awaits.
+		// runSubprocess runs far past the eval timeout budget and emits NO progress
+		// of its own. The bridge pause must make that delegated time invisible to
+		// the watchdog.
 		vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
-			await Bun.sleep(200);
+			await Bun.sleep(40);
 			return singleResult(options, { output: "done" });
 		});
-		// Mirror the eval tool's wiring: an IdleTimeout drives cancellation and
-		// ONLY a bridge heartbeat re-arms it.
-		using idle = new IdleTimeout(60);
+		const ops: string[] = [];
+		using idle = new IdleTimeout(20);
 		const result = await runEvalAgent(
 			{ prompt: "investigate" },
 			{
 				session,
 				signal: idle.signal,
 				emitStatus: event => {
-					if (event.op === EVAL_HEARTBEAT_OP) idle.bump();
+					ops.push(event.op);
+					if (event.op === EVAL_TIMEOUT_PAUSE_OP) idle.pause();
+					if (event.op === EVAL_TIMEOUT_RESUME_OP) idle.resume();
 				},
 			},
 		);
-		expect(idle.signal.aborted).toBe(false);
 		expect(result.text).toBe("done");
+		expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP]);
+		expect(idle.signal.aborted).toBe(false);
+		await Bun.sleep(60);
+		expect(idle.signal.aborted).toBe(true);
 	});
-	it("does not let agent() progress snapshots re-arm the watchdog without a heartbeat", async () => {
-		using tempDir = TempDir.createSync("@omp-eval-agent-progress-no-rearm-");
-		const { session } = makeEvalSession(tempDir, "js-agent-progress-no-rearm");
+	it("keeps timeout paused despite agent() progress snapshots", async () => {
+		using tempDir = TempDir.createSync("@omp-eval-agent-progress-timeout-pause-");
+		const { session } = makeEvalSession(tempDir, "js-agent-progress-timeout-pause");
 		mockAgents();
-		// Heartbeat slower than the budget: only the immediate beat at call start
-		// fires, so after the budget elapses nothing re-arms the watchdog.
-		setBridgeHeartbeatIntervalMs(10_000);
 		// Stream frequent progress snapshots (op:"agent") for well past the budget.
-		// Progress is rendered but MUST NOT count as activity — only heartbeats do.
+		// They render as status, but timeout accounting is controlled only by the
+		// bridge pause/resume events.
 		vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
-			for (let i = 0; i < 40; i++) {
+			for (let i = 0; i < 20; i++) {
 				options.onProgress?.({
 					index: options.index,
 					id: options.id,
@@ -622,28 +664,30 @@ describe("agent() through eval runtimes", () => {
 					cost: 0,
 					durationMs: i * 10,
 				});
-				await Bun.sleep(10);
+				await Bun.sleep(5);
 			}
 			return singleResult(options, { output: "done" });
 		});
 		const ops: string[] = [];
-		using idle = new IdleTimeout(80);
-		await runEvalAgent(
+		using idle = new IdleTimeout(40);
+		const result = await runEvalAgent(
 			{ prompt: "investigate" },
 			{
 				session,
 				signal: idle.signal,
 				emitStatus: event => {
 					ops.push(event.op);
-					if (event.op === EVAL_HEARTBEAT_OP) idle.bump();
+					if (event.op === EVAL_TIMEOUT_PAUSE_OP) idle.pause();
+					if (event.op === EVAL_TIMEOUT_RESUME_OP) idle.resume();
 				},
 			},
 		);
-		// Progress streamed, but the watchdog still fired: agent snapshots never
-		// re-armed it, and the lone start heartbeat lapsed before the call ended.
+		expect(result.text).toBe("done");
+		expect(ops[0]).toBe(EVAL_TIMEOUT_PAUSE_OP);
 		expect(ops).toContain("agent");
-		expect(idle.signal.aborted).toBe(true);
+		expect(ops.at(-1)).toBe(EVAL_TIMEOUT_RESUME_OP);
+		expect(idle.signal.aborted).toBe(false);
 	});
 });

package/src/eval/__tests__/bridge-timeout.test.ts ADDED Viewed

@@ -0,0 +1,64 @@
+import { describe, expect, it } from "bun:test";
+import {
+	EVAL_TIMEOUT_PAUSE_OP,
+	EVAL_TIMEOUT_RESUME_OP,
+	isEvalTimeoutControlEvent,
+	withBridgeTimeoutPause,
+} from "../bridge-timeout";
+import type { JsStatusEvent } from "../js/shared/types";
+describe("withBridgeTimeoutPause", () => {
+	it("emits one pause before the operation and one resume after it settles", async () => {
+		const events: JsStatusEvent[] = [];
+		const value = await withBridgeTimeoutPause(
+			event => events.push(event),
+			async () => {
+				await Bun.sleep(80);
+				return "done";
+			},
+		);
+		expect(value).toBe("done");
+		expect(events.map(event => event.op)).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP]);
+		const settledCount = events.length;
+		await Bun.sleep(40);
+		expect(events.length).toBe(settledCount);
+	});
+	it("resumes timeout accounting even when the operation throws", async () => {
+		const events: JsStatusEvent[] = [];
+		await expect(
+			withBridgeTimeoutPause(
+				event => events.push(event),
+				async () => {
+					await Bun.sleep(20);
+					throw new Error("boom");
+				},
+			),
+		).rejects.toThrow("boom");
+		expect(events.map(event => event.op)).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP]);
+	});
+	it("runs the operation without emitting when no status sink is wired", async () => {
+		let ran = 0;
+		const value = await withBridgeTimeoutPause(undefined, async () => {
+			ran++;
+			await Bun.sleep(20);
+			return 42;
+		});
+		expect(value).toBe(42);
+		expect(ran).toBe(1);
+	});
+	it("identifies timeout-control events as non-renderable status", () => {
+		expect(isEvalTimeoutControlEvent({ op: EVAL_TIMEOUT_PAUSE_OP })).toBe(true);
+		expect(isEvalTimeoutControlEvent({ op: EVAL_TIMEOUT_RESUME_OP })).toBe(true);
+		expect(isEvalTimeoutControlEvent({ op: "agent", id: "subagent-1" })).toBe(false);
+	});
+});

package/src/eval/__tests__/idle-timeout.test.ts CHANGED Viewed

@@ -32,21 +32,34 @@ describe("IdleTimeout", () => {
 		expect((idle.signal.reason as DOMException).name).toBe("TimeoutError");
 	});
-	it("re-arms on every bump and only fires after activity stops", async () => {
-		using idle = new IdleTimeout(150);
-		// Bump well past a single window; each bump must push the deadline forward
-		// so the watchdog never trips while activity continues.
-		for (let i = 0; i < 6; i++) {
-			await Bun.sleep(40);
-			idle.bump();
-		}
+	it("ignores elapsed time while paused and resumes with a fresh window", async () => {
+		using idle = new IdleTimeout(80);
+		idle.pause();
+		await Bun.sleep(160);
 		expect(idle.signal.aborted).toBe(false);
-		// Activity stopped — the watchdog should now fire within roughly one window.
-		const fired = await abortedWithin(idle.signal, 800);
+		idle.resume();
+		const firedEarly = await abortedWithin(idle.signal, 30);
+		expect(firedEarly).toBe(false);
+		const fired = await abortedWithin(idle.signal, 500);
 		expect(fired).toBe(true);
 	});
+	it("reference-counts overlapping pauses", async () => {
+		using idle = new IdleTimeout(60);
+		idle.pause();
+		idle.pause();
+		await Bun.sleep(120);
+		expect(idle.signal.aborted).toBe(false);
+		idle.resume();
+		await Bun.sleep(90);
+		expect(idle.signal.aborted).toBe(false);
+		idle.resume();
+		const fired = await abortedWithin(idle.signal, 500);
+		expect(fired).toBe(true);
+	});
 	it("never fires after dispose()", async () => {
 		const idle = new IdleTimeout(30);
 		idle.dispose();
@@ -55,12 +68,13 @@ describe("IdleTimeout", () => {
 		expect(idle.signal.aborted).toBe(false);
 	});
-	it("ignores bump() after the watchdog has already fired", async () => {
+	it("ignores pause/resume after the watchdog has already fired", async () => {
 		using idle = new IdleTimeout(30);
 		await abortedWithin(idle.signal, 500);
 		expect(idle.signal.aborted).toBe(true);
 		// Late activity must not un-abort or rearm a settled watchdog.
-		idle.bump();
+		idle.pause();
+		idle.resume();
 		expect(idle.signal.aborted).toBe(true);
 	});
 });

package/src/eval/__tests__/kernel-spawn.test.ts ADDED Viewed

@@ -0,0 +1,103 @@
+import { afterEach, describe, expect, it } from "bun:test";
+import {
+	__resetWindowsConsoleProbeCache,
+	consoleAttachedViaTTY,
+	hostHasInheritableConsole,
+	shouldHideKernelWindow,
+} from "../py/spawn-options";
+/**
+ * `shouldHideKernelWindow` decides whether the long-lived Python kernel
+ * subprocess is spawned with `windowsHide: true`. On Windows, Bun maps that
+ * option to `CREATE_NO_WINDOW`, which detaches the child from any inherited
+ * console — breaking both (a) `LoadLibraryExW` for NumPy/pandas native
+ * extensions and (b) SIGINT delivery via `GenerateConsoleCtrlEvent`. See
+ * issue #1960. The tests below pin the three layered concerns the PR review
+ * surfaced:
+ *
+ * 1. `shouldHideKernelWindow` — pure predicate over a single boolean.
+ * 2. `consoleAttachedViaTTY` — the TTY-OR fallback used when the Win32 FFI
+ *    probe is unavailable; covers the partial-redirection cases.
+ * 3. `hostHasInheritableConsole` — the integration boundary. Off-Windows it
+ *    short-circuits to the TTY fallback; on Windows it is expected to
+ *    consult `kernel32!GetConsoleWindow()` first, which is the authoritative
+ *    signal even for the all-stdio-redirected case.
+ */
+describe("shouldHideKernelWindow", () => {
+	it("inherits the host console on Windows when one is attached", () => {
+		// Reporter's repro: omp launched in Windows Terminal, host has a
+		// console, kernel must inherit so `import pandas` doesn't deadlock in
+		// `_multiarray_umath` and SIGINT can recover the cell.
+		expect(shouldHideKernelWindow({ platform: "win32", hostHasInheritableConsole: true })).toBe(false);
+	});
+	it("hides on Windows only when the host has no console at all (true service / daemon)", () => {
+		// CREATE_NO_WINDOW here suppresses the console window Windows would
+		// otherwise auto-allocate for the console-app Python kernel.
+		expect(shouldHideKernelWindow({ platform: "win32", hostHasInheritableConsole: false })).toBe(true);
+	});
+	it("never sets windowsHide off-Windows (the option is a Win32-only flag)", () => {
+		// On POSIX `windowsHide` is a no-op; the predicate must return false
+		// everywhere off-Windows so the spawn site matches pre-fix behavior.
+		expect(shouldHideKernelWindow({ platform: "linux", hostHasInheritableConsole: true })).toBe(false);
+		expect(shouldHideKernelWindow({ platform: "linux", hostHasInheritableConsole: false })).toBe(false);
+		expect(shouldHideKernelWindow({ platform: "darwin", hostHasInheritableConsole: true })).toBe(false);
+		expect(shouldHideKernelWindow({ platform: "darwin", hostHasInheritableConsole: false })).toBe(false);
+	});
+});
+describe("consoleAttachedViaTTY (FFI fallback heuristic)", () => {
+	// The OR of three TTY signals correctly classifies the realistic shell
+	// redirection scenarios that motivated widening the check beyond stdout
+	// in the first review pass (PR #1961). The all-three-redirected case
+	// (false here) is the gap that the Win32 FFI probe in
+	// `hostHasInheritableConsole` is meant to close — this fallback is best-
+	// effort.
+	it("treats a fully interactive launch as console-attached", () => {
+		expect(consoleAttachedViaTTY({ stdinIsTTY: true, stdoutIsTTY: true, stderrIsTTY: true })).toBe(true);
+	});
+	it("treats `omp -p '...' > out.txt` (stdout-only redirect) as console-attached", () => {
+		// The reviewer's first-pass repro: stdout off the terminal, stdin
+		// and stderr still attached. OR keeps the console.
+		expect(consoleAttachedViaTTY({ stdinIsTTY: true, stdoutIsTTY: false, stderrIsTTY: true })).toBe(true);
+	});
+	it("treats stdin-only redirects (`< in.txt`) as console-attached", () => {
+		expect(consoleAttachedViaTTY({ stdinIsTTY: false, stdoutIsTTY: true, stderrIsTTY: true })).toBe(true);
+	});
+	it("treats stderr-only redirects (`2> err.log`) as console-attached", () => {
+		expect(consoleAttachedViaTTY({ stdinIsTTY: true, stdoutIsTTY: true, stderrIsTTY: false })).toBe(true);
+	});
+	it("returns false only when none of stdin/stdout/stderr is a TTY", () => {
+		// This is the gap: a real Windows Terminal session with all three
+		// streams redirected (`omp ... < in > out 2> err`) lands here.
+		// `hostHasInheritableConsole` uses the Win32 FFI probe to recover
+		// the right answer in that scenario; this helper is the fallback.
+		expect(consoleAttachedViaTTY({ stdinIsTTY: false, stdoutIsTTY: false, stderrIsTTY: false })).toBe(false);
+	});
+});
+describe("hostHasInheritableConsole", () => {
+	afterEach(() => {
+		__resetWindowsConsoleProbeCache();
+	});
+	if (process.platform !== "win32") {
+		it("matches the TTY-OR fallback off-Windows", () => {
+			// Off-Windows, `windowsHide` is a no-op anyway, but we still
+			// expose `hostHasInheritableConsole` symmetrically. Confirm it
+			// degrades to the same OR the call site would compute by hand.
+			const tty = consoleAttachedViaTTY({
+				stdinIsTTY: !!process.stdin.isTTY,
+				stdoutIsTTY: !!process.stdout.isTTY,
+				stderrIsTTY: !!process.stderr.isTTY,
+			});
+			expect(hostHasInheritableConsole()).toBe(tty);
+		});
+	}
+});

package/src/eval/__tests__/llm-bridge.test.ts CHANGED Viewed

@@ -8,7 +8,7 @@ import type { ModelRegistry } from "../../config/model-registry";
 import { Settings } from "../../config/settings";
 import type { ToolSession } from "../../tools";
 import { ToolError } from "../../tools/tool-errors";
-import { EVAL_HEARTBEAT_OP, setBridgeHeartbeatIntervalMs } from "../heartbeat";
+import { EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP } from "../bridge-timeout";
 import { IdleTimeout } from "../idle-timeout";
 import { disposeAllVmContexts } from "../js/context-manager";
 import { executeJs } from "../js/executor";
@@ -99,7 +99,6 @@ function assistant(opts: {
 describe("runEvalLlm", () => {
 	afterEach(() => {
 		vi.restoreAllMocks();
-		setBridgeHeartbeatIntervalMs();
 	});
 	it("resolves each tier to its expected model", async () => {
@@ -217,31 +216,32 @@ describe("runEvalLlm", () => {
 		);
 	});
-	it("keeps the idle watchdog armed while a slow llm() request is in flight", async () => {
-		// A oneshot completion emits no status until it returns; a slow request
-		// must not look like a stalled cell. The bridge pumps a heartbeat while it
-		// awaits, re-arming the watchdog through emitStatus.
-		setBridgeHeartbeatIntervalMs(15);
+	it("pauses the idle watchdog while a slow llm() request is in flight", async () => {
+		// A oneshot completion emits no status until it returns; delegated model
+		// time must be invisible to the eval timeout budget.
 		vi.spyOn(ai, "completeSimple").mockImplementation(async () => {
 			await Bun.sleep(200);
 			return assistant({ text: "the answer" });
 		});
+		const ops: string[] = [];
 		using idle = new IdleTimeout(60);
 		const result = await runEvalLlm(
 			{ prompt: "q", model: "smol" },
 			{
 				session: makeSession(),
 				signal: idle.signal,
-				// Mirror the eval tool: only a bridge heartbeat re-arms the watchdog.
 				emitStatus: event => {
-					if (event.op === EVAL_HEARTBEAT_OP) idle.bump();
+					ops.push(event.op);
+					if (event.op === EVAL_TIMEOUT_PAUSE_OP) idle.pause();
+					if (event.op === EVAL_TIMEOUT_RESUME_OP) idle.resume();
 				},
 			},
 		);
-		expect(idle.signal.aborted).toBe(false);
 		expect(result.text).toBe("the answer");
+		expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "llm"]);
+		expect(idle.signal.aborted).toBe(false);
 	});
 });

package/src/eval/agent-bridge.ts CHANGED Viewed

@@ -13,10 +13,10 @@ import subagentUserPromptTemplate from "../prompts/system/subagent-user-prompt.m
 import * as taskDiscovery from "../task/discovery";
 import * as taskExecutor from "../task/executor";
 import { AgentOutputManager } from "../task/output-manager";
-import type { AgentDefinition, AgentProgress } from "../task/types";
+import type { AgentDefinition, AgentProgress, SingleResult } from "../task/types";
 import type { ToolSession } from "../tools";
 import { ToolError } from "../tools/tool-errors";
-import { withBridgeHeartbeat } from "./heartbeat";
+import { withBridgeTimeoutPause } from "./bridge-timeout";
 import type { JsStatusEvent } from "./js/shared/types";
 // Import review tools for side effects (registers subagent tool handlers).
 import "../tools/review";
@@ -173,6 +173,26 @@ function emitProgressStatus(emitStatus: ((event: JsStatusEvent) => void) | undef
 	});
 }
+/**
+ * Coalesce a subagent failure into a non-empty, human-meaningful error message.
+ *
+ * When the executor aborts a subagent (runtime limit, parent cancellation, …)
+ * the actionable explanation lives on `abortReason`, while `error`/`stderr`
+ * are routinely empty strings. Plain `??` coalescing stops at the empty string
+ * and ships an empty error through the bridge — Python then surfaces only the
+ * generic `bridge call '__agent__' failed`. See #2006.
+ */
+function buildSubagentFailureMessage(agentName: string, result: SingleResult): string {
+	const abortReason = trimToUndefined(result.abortReason);
+	if (result.aborted && abortReason) return abortReason;
+	return (
+		trimToUndefined(result.error) ??
+		trimToUndefined(result.stderr) ??
+		abortReason ??
+		`agent() subagent '${agentName}' failed.`
+	);
+}
 /**
  * Run a single subagent on behalf of an eval cell's `agent()` call.
  */
@@ -225,17 +245,15 @@ export async function runEvalAgent(args: unknown, options: EvalAgentBridgeOption
 		getSessionId: options.session.getSessionId ?? (() => null),
 	};
 	const parentArtifactManager = options.session.getArtifactManager?.() ?? undefined;
-	const parentEvalSessionId = options.session.getEvalSessionId?.() ?? undefined;
 	const mcpManager = options.session.mcpManager ?? MCPManager.instance();
 	const { sessionFile, artifactsDir, contextFile } = await getArtifacts(options.session);
 	const outputManager = getOutputManager(options.session);
 	const id = await outputManager.allocate(outputIdBase(parsed.label, agentName));
 	const assignment = parsed.prompt.trim();
 	const context = trimToUndefined(parsed.context);
-	// Pump a heartbeat while the subagent runs so the eval idle watchdog stays
-	// armed across quiet stretches (time-to-first-token, long nested tools)
-	// where `onProgress` would otherwise emit no status to re-arm it.
-	const result = await withBridgeHeartbeat(options.emitStatus, () =>
+	// Suspend eval timeout accounting while the subagent owns control. The
+	// timeout clock restarts once the bridge returns to the cell runtime.
+	const result = await withBridgeTimeoutPause(options.emitStatus, () =>
 		taskExecutor.runSubprocess({
 			cwd: options.session.cwd,
 			agent: effectiveAgent,
@@ -261,6 +279,12 @@ export async function runEvalAgent(args: unknown, options: EvalAgentBridgeOption
 			authStorage: options.session.authStorage,
 			modelRegistry: options.session.modelRegistry,
 			settings: options.session.settings,
+			// Eval `agent()` subagents are never wall-clock capped: the parent
+			// cell's idle watchdog is suspended for the whole bridge call
+			// (withBridgeTimeoutPause), so a long-running phase/recovery workflow
+			// must not be killed by `task.maxRuntimeMs`. Force the limit off
+			// regardless of the inherited session setting.
+			maxRuntimeMs: 0,
 			mcpManager,
 			contextFiles,
 			skills: availableSkills,
@@ -272,14 +296,16 @@ export async function runEvalAgent(args: unknown, options: EvalAgentBridgeOption
 			parentHindsightSessionState: options.session.getHindsightSessionState?.(),
 			parentMnemopiSessionState: options.session.getMnemopiSessionState?.(),
 			parentTelemetry: options.session.getTelemetry?.(),
-			parentEvalSessionId,
+			// Deliberately omit parentEvalSessionId: the parent's Python kernel is
+			// blocked on this bridge call, so sharing the eval session would deadlock
+			// (subagent queues behind the parent's in-flight execution, parent waits
+			// for subagent → circular). Each bridge-spawned subagent gets its own
+			// eval session with an independent kernel.
 		}),
 	);
-	if (result.exitCode !== 0 || result.error) {
-		const failureMessage =
-			result.error ?? result.stderr ?? result.abortReason ?? `agent() subagent '${agentName}' failed.`;
-		throw new ToolError(failureMessage);
+	if (result.exitCode !== 0 || result.error || result.aborted) {
+		throw new ToolError(buildSubagentFailureMessage(agentName, result));
 	}
 	options.session.recordEvalSubagentUsage?.(result.usage?.output ?? 0);

package/src/eval/backend.ts CHANGED Viewed

@@ -10,12 +10,12 @@ export interface ExecutorBackendExecOptions {
 	signal?: AbortSignal;
 	session: ToolSession;
 	/**
-	 * Inactivity budget in milliseconds (the cell's `timeout`). Cancellation is
-	 * driven entirely by `signal`, which the eval tool arms as an idle watchdog
-	 * that fires a `TimeoutError` reason after this much time with no progress
-	 * (status) events. Backends use this value only for timeout-annotation text
-	 * and as cold-start headroom; they MUST NOT derive a competing wall-clock
-	 * timer from it.
+	 * Runtime-work budget in milliseconds (the cell's `timeout`). Cancellation is
+	 * driven entirely by `signal`, which the eval tool arms as a watchdog that
+	 * pauses on bridge timeout-control status events and fires a `TimeoutError`
+	 * reason only while the Python/JS runtime owns control. Backends use this
+	 * value only for timeout-annotation text and as cold-start headroom; they MUST
+	 * NOT derive a competing wall-clock timer from it.
 	 */
 	idleTimeoutMs: number;
 	reset: boolean;