npm - @oh-my-pi/pi-coding-agent - Versions diffs - 15.13.1 → 15.13.3 - Mend

@oh-my-pi/pi-coding-agent 15.13.1 → 15.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

package/CHANGELOG.md +47 -0
package/dist/cli.js +1057 -289
package/dist/types/config/model-registry.d.ts +1 -0
package/dist/types/config/models-config-schema.d.ts +3 -0
package/dist/types/config/models-config.d.ts +3 -0
package/dist/types/config/settings-schema.d.ts +97 -0
package/dist/types/edit/hashline/block-resolver.d.ts +1 -1
package/dist/types/edit/index.d.ts +2 -0
package/dist/types/eval/js/context-manager.d.ts +15 -0
package/dist/types/modes/components/welcome.d.ts +1 -0
package/dist/types/modes/controllers/input-controller.d.ts +4 -4
package/dist/types/modes/interactive-mode.d.ts +1 -0
package/dist/types/modes/rpc/rpc-types.d.ts +2 -1
package/dist/types/modes/types.d.ts +6 -0
package/dist/types/sdk.d.ts +3 -0
package/dist/types/session/session-dump-format.d.ts +2 -1
package/dist/types/session/unexpected-stop-classifier.d.ts +13 -0
package/dist/types/stt/asr-client.d.ts +1 -1
package/dist/types/system-prompt.d.ts +11 -0
package/dist/types/tiny/title-client.d.ts +1 -1
package/dist/types/tools/ask.d.ts +2 -0
package/dist/types/tools/ast-edit.d.ts +2 -0
package/dist/types/tools/ast-grep.d.ts +2 -0
package/dist/types/tools/browser.d.ts +2 -0
package/dist/types/tools/debug.d.ts +2 -0
package/dist/types/tools/eval.d.ts +2 -0
package/dist/types/tools/find.d.ts +2 -0
package/dist/types/tools/inspect-image.d.ts +2 -1
package/dist/types/tools/irc.d.ts +2 -0
package/dist/types/tools/job.d.ts +1 -0
package/dist/types/tools/ssh.d.ts +2 -0
package/dist/types/tools/todo.d.ts +2 -0
package/dist/types/tts/tts-client.d.ts +1 -1
package/dist/types/tui/tree-list.d.ts +1 -0
package/dist/types/utils/thinking-display.d.ts +1 -17
package/package.json +12 -12
package/src/cli.ts +25 -12
package/src/config/model-registry.ts +16 -2
package/src/config/models-config-schema.ts +2 -0
package/src/config/models-config.ts +1 -0
package/src/config/settings-schema.ts +78 -0
package/src/edit/hashline/block-resolver.ts +1 -1
package/src/edit/hashline/execute.ts +1 -6
package/src/edit/index.ts +48 -0
package/src/eval/__tests__/agent-bridge.test.ts +106 -46
package/src/eval/__tests__/js-context-manager.test.ts +53 -3
package/src/eval/js/context-manager.ts +132 -29
package/src/eval/js/worker-core.ts +1 -1
package/src/eval/js/worker-entry.ts +7 -0
package/src/export/html/template.js +18 -22
package/src/internal-urls/docs-index.generated.ts +12 -3
package/src/main.ts +15 -5
package/src/modes/acp/acp-agent.ts +2 -2
package/src/modes/acp/acp-event-mapper.ts +2 -2
package/src/modes/components/agent-hub.ts +31 -7
package/src/modes/components/assistant-message.ts +24 -15
package/src/modes/components/snapcompact-shape-preview-doc.md +2 -2
package/src/modes/components/snapcompact-shape-preview.ts +2 -2
package/src/modes/components/tree-selector.ts +3 -2
package/src/modes/components/welcome.ts +14 -4
package/src/modes/controllers/event-controller.ts +3 -3
package/src/modes/controllers/input-controller.ts +28 -39
package/src/modes/controllers/streaming-reveal.ts +4 -4
package/src/modes/interactive-mode.ts +2 -0
package/src/modes/rpc/rpc-mode.ts +1 -0
package/src/modes/rpc/rpc-types.ts +2 -2
package/src/modes/types.ts +6 -0
package/src/modes/utils/ui-helpers.ts +3 -3
package/src/prompts/agents/oracle.md +0 -1
package/src/prompts/agents/reviewer.md +0 -1
package/src/prompts/system/system-prompt.md +17 -21
package/src/prompts/system/unexpected-stop-classifier.md +17 -0
package/src/prompts/system/unexpected-stop-retry.md +4 -0
package/src/prompts/tools/ask.md +0 -8
package/src/prompts/tools/ast-edit.md +0 -15
package/src/prompts/tools/ast-grep.md +0 -13
package/src/prompts/tools/browser.md +0 -21
package/src/prompts/tools/debug.md +0 -13
package/src/prompts/tools/eval.md +0 -9
package/src/prompts/tools/find.md +0 -13
package/src/prompts/tools/inspect-image.md +0 -9
package/src/prompts/tools/irc.md +0 -15
package/src/prompts/tools/patch.md +0 -13
package/src/prompts/tools/ssh.md +0 -9
package/src/prompts/tools/todo.md +1 -19
package/src/sdk.ts +19 -0
package/src/session/agent-session.ts +289 -29
package/src/session/session-dump-format.ts +17 -49
package/src/session/unexpected-stop-classifier.ts +129 -0
package/src/stt/asr-client.ts +1 -1
package/src/system-prompt.ts +31 -0
package/src/tiny/title-client.ts +1 -1
package/src/tools/ask.ts +41 -0
package/src/tools/ast-edit.ts +46 -0
package/src/tools/ast-grep.ts +24 -0
package/src/tools/browser/tab-supervisor.ts +1 -1
package/src/tools/browser/tab-worker-entry.ts +12 -4
package/src/tools/browser.ts +52 -0
package/src/tools/debug.ts +17 -0
package/src/tools/eval.ts +20 -1
package/src/tools/find.ts +24 -0
package/src/tools/inspect-image.ts +27 -1
package/src/tools/irc.ts +41 -0
package/src/tools/job.ts +1 -0
package/src/tools/ssh.ts +16 -0
package/src/tools/todo.ts +82 -3
package/src/tts/tts-client.ts +1 -1
package/src/tui/tree-list.ts +68 -19
package/src/utils/thinking-display.ts +8 -34

package/src/eval/__tests__/agent-bridge.test.ts CHANGED Viewed

@@ -121,6 +121,34 @@ function makeEvalSession(
 	return { session, sessionFile, sessionId: `${prefix}:${crypto.randomUUID()}` };
 }
+/**
+ * Spy `runSubprocess` so a `parallel()` fan-out overlaps deterministically: every
+ * bridge call parks until the pool saturates at `limit` concurrent calls in flight,
+ * then all proceed. Proves the pool reaches its ceiling without a wall-clock sleep —
+ * the pool itself caps how many run at once, so an unbounded pool would drive
+ * `maxInFlight` past `limit` and fail the bound.
+ */
+function spyConcurrencyBarrier(limit: number): { maxInFlight: () => number } {
+	let inFlight = 0;
+	let max = 0;
+	let saturate: (() => void) | undefined;
+	const saturated = new Promise<void>(resolve => {
+		saturate = resolve;
+	});
+	vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
+		inFlight++;
+		max = Math.max(max, inFlight);
+		if (inFlight >= limit) saturate?.();
+		try {
+			await saturated;
+			return singleResult(options, { output: options.assignment ?? "" });
+		} finally {
+			inFlight--;
+		}
+	});
+	return { maxInFlight: () => max };
+}
 describe("runEvalAgent", () => {
 	afterEach(() => {
 		vi.restoreAllMocks();
@@ -298,8 +326,17 @@ describe("runEvalAgent", () => {
 });
 describe("agent() through eval runtimes", () => {
+	// One shared JS worker backs every agent() JavaScript test below. Spawning a
+	// worker (thread + module-graph import) is fixed infrastructure cost, not
+	// behavior under test; reusing it keeps the suite fast. Each run still threads
+	// its own ToolSession (settings/mock are read live through the bridge per call)
+	// and top-level `const`/`let` are demoted to `var`, so reuse never leaks state
+	// these tests observe. Torn down in afterAll via disposeAllVmContexts().
+	const sharedJsSessionId = "agent-bridge-shared-js";
 	afterEach(() => {
 		vi.restoreAllMocks();
+		vi.useRealTimers();
 	});
 	afterAll(async () => {
@@ -309,7 +346,7 @@ describe("agent() through eval runtimes", () => {
 	it("exposes agent() in JavaScript and parses structured output", async () => {
 		using tempDir = TempDir.createSync("@omp-eval-agent-js-");
-		const { session, sessionFile, sessionId } = makeEvalSession(tempDir, "js-agent");
+		const { session, sessionFile } = makeEvalSession(tempDir, "js-agent");
 		mockAgents();
 		vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options =>
 			singleResult(options, {
@@ -319,7 +356,7 @@ describe("agent() through eval runtimes", () => {
 		const result = await executeJs(
 			'const text = await agent("hi"); const data = await agent("json", { schema: { type: "object" } }); return JSON.stringify([text, data]);',
-			{ cwd: tempDir.path(), sessionId, session, sessionFile },
+			{ cwd: tempDir.path(), sessionId: sharedJsSessionId, session, sessionFile },
 		);
 		expect(result.exitCode).toBe(0);
@@ -334,35 +371,24 @@ describe("agent() through eval runtimes", () => {
 			"task.enableLsp": true,
 			"task.maxConcurrency": 2,
 		});
-		const { session, sessionFile, sessionId } = makeEvalSession(tempDir, "js-agent-parallel", settings);
+		const { session, sessionFile } = makeEvalSession(tempDir, "js-agent-parallel", settings);
 		mockAgents();
-		let inFlight = 0;
-		let maxInFlight = 0;
-		vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
-			inFlight++;
-			maxInFlight = Math.max(maxInFlight, inFlight);
-			try {
-				await Bun.sleep(options.assignment === "a" ? 30 : 10);
-				return singleResult(options, { output: options.assignment ?? "" });
-			} finally {
-				inFlight--;
-			}
-		});
+		const barrier = spyConcurrencyBarrier(2);
 		const result = await executeJs(
 			'const values = await parallel(["a", "b", "c", "d"].map(name => () => agent(name))); return JSON.stringify(values);',
-			{ cwd: tempDir.path(), sessionId, session, sessionFile },
+			{ cwd: tempDir.path(), sessionId: sharedJsSessionId, session, sessionFile },
 		);
 		expect(result.exitCode).toBe(0);
 		expect(JSON.parse(result.output.trim())).toEqual(["a", "b", "c", "d"]);
-		expect(maxInFlight).toBeGreaterThan(1);
-		expect(maxInFlight).toBeLessThanOrEqual(2);
+		expect(barrier.maxInFlight()).toBeGreaterThan(1);
+		expect(barrier.maxInFlight()).toBeLessThanOrEqual(2);
 	});
 	it("propagates JavaScript parallel() rejections", async () => {
 		using tempDir = TempDir.createSync("@omp-eval-agent-js-reject-");
-		const { session, sessionFile, sessionId } = makeEvalSession(tempDir, "js-agent-reject");
+		const { session, sessionFile } = makeEvalSession(tempDir, "js-agent-reject");
 		mockAgents();
 		vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
 			if (options.assignment === "bad") {
@@ -373,7 +399,7 @@ describe("agent() through eval runtimes", () => {
 		const result = await executeJs('await parallel([() => agent("ok"), () => agent("bad")]);', {
 			cwd: tempDir.path(),
-			sessionId,
+			sessionId: sharedJsSessionId,
 			session,
 			sessionFile,
 		});
@@ -416,18 +442,7 @@ describe("agent() through eval runtimes", () => {
 		});
 		const { session, sessionFile, sessionId } = makeEvalSession(tempDir, "py-agent-parallel", settings);
 		mockAgents();
-		let inFlight = 0;
-		let maxInFlight = 0;
-		vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
-			inFlight++;
-			maxInFlight = Math.max(maxInFlight, inFlight);
-			try {
-				await Bun.sleep(options.assignment === "a" ? 30 : 10);
-				return singleResult(options, { output: options.assignment ?? "" });
-			} finally {
-				inFlight--;
-			}
-		});
+		const barrier = spyConcurrencyBarrier(2);
 		const result = await executePython(
 			'import json\nprint(json.dumps(parallel([lambda n=n: agent(n) for n in ["a", "b", "c", "d"]])))',
@@ -440,8 +455,8 @@ describe("agent() through eval runtimes", () => {
 		expect(result.exitCode).toBe(0);
 		expect(JSON.parse(result.output.trim())).toEqual(["a", "b", "c", "d"]);
-		expect(maxInFlight).toBeGreaterThan(1);
-		expect(maxInFlight).toBeLessThanOrEqual(2);
+		expect(barrier.maxInFlight()).toBeGreaterThan(1);
+		expect(barrier.maxInFlight()).toBeLessThanOrEqual(2);
 	});
 	it("interrupting a Python parallel() fan-out settles the kernel cleanly and preserves session state", async () => {
@@ -526,7 +541,7 @@ describe("agent() through eval runtimes", () => {
 	it("streams enriched agent progress through onStatus before the cell finishes", async () => {
 		using tempDir = TempDir.createSync("@omp-eval-agent-progress-");
-		const { session, sessionFile, sessionId } = makeEvalSession(tempDir, "js-agent-progress");
+		const { session, sessionFile } = makeEvalSession(tempDir, "js-agent-progress");
 		mockAgents();
 		const makeProgress = (options: ExecutorOptions, overrides: Partial<AgentProgress>): AgentProgress => ({
@@ -580,7 +595,7 @@ describe("agent() through eval runtimes", () => {
 		const events: Array<{ op: string; [key: string]: unknown }> = [];
 		const result = await executeJs('await agent("investigate", { label: "Scout" });', {
 			cwd: tempDir.path(),
-			sessionId,
+			sessionId: sharedJsSessionId,
 			session,
 			sessionFile,
 			onStatus: event => events.push(event),
@@ -622,16 +637,28 @@ describe("agent() through eval runtimes", () => {
 		mockAgents();
 		// runSubprocess runs far past the eval timeout budget and emits NO progress
-		// of its own. The bridge pause must make that delegated time invisible to
-		// the watchdog.
+		// of its own; the bridge pause must make that delegated time invisible to
+		// the watchdog. Fake timers replace the real wait: the subprocess parks on
+		// `released` so the test can advance the clock past the budget while the
+		// bridge call is provably in flight, then release it deterministically.
+		let release: (() => void) | undefined;
+		const released = new Promise<void>(resolve => {
+			release = resolve;
+		});
+		let markInFlight: (() => void) | undefined;
+		const inFlight = new Promise<void>(resolve => {
+			markInFlight = resolve;
+		});
 		vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
-			await Bun.sleep(40);
+			markInFlight?.();
+			await released;
 			return singleResult(options, { output: "done" });
 		});
 		const ops: string[] = [];
+		vi.useFakeTimers();
 		using idle = new IdleTimeout(20);
-		const result = await runEvalAgent(
+		const resultPromise = runEvalAgent(
 			{ prompt: "investigate" },
 			{
 				session,
@@ -644,11 +671,22 @@ describe("agent() through eval runtimes", () => {
 			},
 		);
+		// The bridge paused the watchdog; the subprocess is now blocked in flight.
+		await inFlight;
+		// Burn far more than the 20ms budget while paused: the watchdog stays armed-off.
+		vi.advanceTimersByTime(1_000);
+		expect(idle.signal.aborted).toBe(false);
+		release?.();
+		const result = await resultPromise;
 		expect(result.text).toBe("done");
 		expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP]);
 		expect(idle.signal.aborted).toBe(false);
-		await Bun.sleep(60);
+		// RESUME re-armed a fresh window; once the runtime stays idle past it the
+		// watchdog finally fires.
+		vi.advanceTimersByTime(idle.idleMs + 5);
 		expect(idle.signal.aborted).toBe(true);
 	});
@@ -657,9 +695,20 @@ describe("agent() through eval runtimes", () => {
 		const { session } = makeEvalSession(tempDir, "js-agent-progress-timeout-pause");
 		mockAgents();
-		// Stream frequent progress snapshots (op:"agent") for well past the budget.
+		// Stream frequent progress snapshots (op:"agent") well past the budget.
 		// They render as status, but timeout accounting is controlled only by the
-		// bridge pause/resume events.
+		// bridge pause/resume events — so even a flood of snapshots must not re-arm
+		// the watchdog. Fake timers make "past the budget" deterministic: the
+		// subprocess emits its snapshots, parks on `released`, and the test advances
+		// the clock far past the window before releasing it.
+		let release: (() => void) | undefined;
+		const released = new Promise<void>(resolve => {
+			release = resolve;
+		});
+		let markInFlight: (() => void) | undefined;
+		const inFlight = new Promise<void>(resolve => {
+			markInFlight = resolve;
+		});
 		vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
 			for (let i = 0; i < 20; i++) {
 				options.onProgress?.({
@@ -679,15 +728,16 @@ describe("agent() through eval runtimes", () => {
 					cost: 0,
 					durationMs: i * 10,
 				});
-				await Bun.sleep(40);
 			}
+			markInFlight?.();
+			await released;
 			return singleResult(options, { output: "done" });
 		});
 		const ops: string[] = [];
-		// Timing invariant (keep, do not re-tighten): total mock work (20*40ms = 800ms) > idle window (250ms) > scheduling jitter (~tens of ms).
+		vi.useFakeTimers();
 		using idle = new IdleTimeout(250);
-		const result = await runEvalAgent(
+		const resultPromise = runEvalAgent(
 			{ prompt: "investigate" },
 			{
 				session,
@@ -700,6 +750,16 @@ describe("agent() through eval runtimes", () => {
 			},
 		);
+		// All snapshots have streamed and the subprocess is blocked in flight.
+		await inFlight;
+		// Far exceed the 250ms budget while paused: the snapshots already delivered
+		// must not have re-armed the watchdog.
+		vi.advanceTimersByTime(10_000);
+		expect(idle.signal.aborted).toBe(false);
+		release?.();
+		const result = await resultPromise;
 		expect(result.text).toBe("done");
 		expect(ops[0]).toBe(EVAL_TIMEOUT_PAUSE_OP);
 		expect(ops).toContain("agent");

package/src/eval/__tests__/js-context-manager.test.ts CHANGED Viewed

@@ -1,8 +1,8 @@
-import { afterEach, describe, expect, it } from "bun:test";
+import { afterEach, beforeEach, describe, expect, it } from "bun:test";
 import { TempDir } from "@oh-my-pi/pi-utils";
 import { Settings } from "../../config/settings";
 import type { ToolSession } from "../../tools";
-import { disposeAllVmContexts } from "../js/context-manager";
+import { disposeAllVmContexts, setWorkerCloseTimeoutMsForTests } from "../js/context-manager";
 import { executeJs } from "../js/executor";
 const originalWorker = globalThis.Worker;
@@ -15,6 +15,7 @@ interface FakeWorkerStats {
 interface FakeWorkerBehavior {
 	exitOnClose: boolean;
 	settleRuns: boolean;
+	errorOnStart?: boolean;
 }
 function makeSession(cwd: string): ToolSession {
@@ -70,6 +71,7 @@ async function waitForRealWorkerExitAfterClose(cwd: string): Promise<void> {
 	worker.addEventListener("close", () => workerClosed.resolve());
 	try {
+		worker.postMessage({ type: "init", snapshot });
 		await withTimeout(ready.promise, 1_000, "worker ready");
 		worker.postMessage({
 			type: "run",
@@ -91,6 +93,7 @@ function installFakeWorker(stats: FakeWorkerStats, behavior: FakeWorkerBehavior)
 	class FakeWorker {
 		#messageListeners = new Set<(event: MessageEvent) => void>();
 		#closeListeners = new Set<(event: Event) => void>();
+		#errorListeners = new Set<(event: Event) => void>();
 		#readyQueued = false;
 		#exited = false;
@@ -115,11 +118,18 @@ function installFakeWorker(stats: FakeWorkerStats, behavior: FakeWorkerBehavior)
 				this.#closeListeners.add(listener as (event: Event) => void);
 				return;
 			}
+			if (type === "error") {
+				this.#errorListeners.add(listener as (event: Event) => void);
+				return;
+			}
 			if (type !== "message") return;
 			this.#messageListeners.add(listener as (event: MessageEvent) => void);
 			if (!this.#readyQueued) {
 				this.#readyQueued = true;
-				queueMicrotask(() => this.#emitMessage({ type: "ready" }));
+				queueMicrotask(() => {
+					if (behavior.errorOnStart) this.#emitError();
+					else this.#emitMessage({ type: "ready" });
+				});
 			}
 		}
@@ -128,6 +138,10 @@ function installFakeWorker(stats: FakeWorkerStats, behavior: FakeWorkerBehavior)
 				this.#closeListeners.delete(listener as (event: Event) => void);
 				return;
 			}
+			if (type === "error") {
+				this.#errorListeners.delete(listener as (event: Event) => void);
+				return;
+			}
 			if (type !== "message") return;
 			this.#messageListeners.delete(listener as (event: MessageEvent) => void);
 		}
@@ -148,6 +162,14 @@ function installFakeWorker(stats: FakeWorkerStats, behavior: FakeWorkerBehavior)
 			const event = new Event("close");
 			for (const listener of this.#closeListeners) listener(event);
 		}
+		#emitError(): void {
+			const event = new ErrorEvent("error", {
+				message: "fake worker failed to start",
+				error: new Error("fake worker failed to start"),
+			});
+			for (const listener of this.#errorListeners) listener(event);
+		}
 	}
 	Object.defineProperty(globalThis, "Worker", {
@@ -158,8 +180,18 @@ function installFakeWorker(stats: FakeWorkerStats, behavior: FakeWorkerBehavior)
 }
 describe("JavaScript eval worker lifecycle", () => {
+	let restoreCloseTimeoutMs = 0;
+	beforeEach(() => {
+		// Shrink the graceful-close grace period so the "close acked but the worker
+		// never exits -> force terminate" contract is proven without a real 1s wait.
+		restoreCloseTimeoutMs = setWorkerCloseTimeoutMsForTests(1);
+	});
 	afterEach(async () => {
+		// Dispose while the shrunk timeout is still active so a hung worker's afterEach
+		// close also force-terminates instantly, then restore the production default.
 		await disposeAllVmContexts();
+		setWorkerCloseTimeoutMsForTests(restoreCloseTimeoutMs);
 		Object.defineProperty(globalThis, "Worker", {
 			configurable: true,
 			writable: true,
@@ -238,4 +270,22 @@ describe("JavaScript eval worker lifecycle", () => {
 		expect(stats.closeRequests).toBe(0);
 		expect(stats.terminateCalls).toBe(1);
 	});
+	it("falls back to the inline worker when the spawned worker errors during startup", async () => {
+		using tempDir = TempDir.createSync("@omp-js-worker-error-");
+		const stats: FakeWorkerStats = { closeRequests: 0, terminateCalls: 0 };
+		installFakeWorker(stats, { exitOnClose: true, settleRuns: true, errorOnStart: true });
+		const session = makeSession(tempDir.path());
+		const sessionId = `js-worker-error:${crypto.randomUUID()}`;
+		// The spawned worker emits an `error` event instead of `ready`. Without fail-fast
+		// error handling the handshake would stall until WORKER_INIT_TIMEOUT_MS (15s); with
+		// it, the handshake rejects at once and the inline worker runs the cell.
+		const result = await executeJs("return String(6 * 7);", { cwd: tempDir.path(), sessionId, session });
+		expect(result.exitCode).toBe(0);
+		expect(result.output.trim()).toBe("42");
+		// The errored primary worker is torn down before the inline retry takes over.
+		expect(stats.terminateCalls).toBe(1);
+	});
 });

package/src/eval/js/context-manager.ts CHANGED Viewed

@@ -27,6 +27,7 @@ interface WorkerHandle {
 	mode: "worker" | "inline";
 	send(msg: WorkerInbound): void;
 	onMessage(handler: (msg: WorkerOutbound) => void): () => void;
+	onError(handler: (error: Error) => void): () => void;
 	close(): Promise<boolean>;
 	terminate(): Promise<void>;
 }
@@ -59,6 +60,22 @@ const resettingSessions = new Map<string, Promise<void>>();
 // SIGILL/SIGSEGV. Callers that pass a larger per-cell budget still dominate.
 const WORKER_INIT_TIMEOUT_MS = 15_000;
 const WORKER_CLOSE_TIMEOUT_MS = 1_000;
+// Active graceful-close grace period before a worker that ack'd `close` but never
+// emitted its `close` event is force-terminated. Defaults to the production floor;
+// tests override it (and restore it) to exercise the close-timeout -> terminate
+// path without a real wall-clock wait.
+let workerCloseTimeoutMs: number = WORKER_CLOSE_TIMEOUT_MS;
+/**
+ * Test-only seam: override the graceful-close grace period (ms). Returns the
+ * previous value so callers can restore it. Production always uses
+ * {@link WORKER_CLOSE_TIMEOUT_MS}; never call this outside tests.
+ */
+export function setWorkerCloseTimeoutMsForTests(ms: number): number {
+	const previous = workerCloseTimeoutMs;
+	workerCloseTimeoutMs = ms;
+	return previous;
+}
 export async function executeInVmContext(options: {
 	sessionKey: string;
@@ -124,6 +141,27 @@ export async function disposeAllVmContexts(): Promise<void> {
 	await Promise.all(all.map(session => killSession(session, new ToolError("JS context disposed"), { force: false })));
 }
+/**
+ * Smoke probe: spawn the JS eval worker through the worker-host entry and prove
+ * it answers the `init` handshake on a real worker thread (not the inline
+ * fallback). Catches the silent worker-load and init-message-drop regressions
+ * that otherwise strand every cell on the init timeout in a distribution build —
+ * the failure mode that motivated `installWorkerInbox`. Wired into
+ * `omp --smoke-test` so binary / source / tarball installs all exercise it.
+ */
+export async function smokeTestJsEvalWorker(): Promise<void> {
+	const worker = spawnJsWorker();
+	const session: JsSession = { sessionKey: "smoke", worker, state: "alive", pending: new Map() };
+	try {
+		await initWorker(session, { cwd: process.cwd(), sessionId: "smoke" }, WORKER_INIT_TIMEOUT_MS);
+		if (worker.mode !== "worker") {
+			throw new Error("JS eval worker smoke fell back to the inline worker (real worker failed to start)");
+		}
+	} finally {
+		await worker.terminate().catch(() => undefined);
+	}
+}
 async function runOnce(
 	session: JsSession,
 	options: {
@@ -186,41 +224,45 @@ async function acquireSession(sessionKey: string, snapshot: SessionSnapshot, tim
 	if (starting) return await starting;
 	const startup = (async (): Promise<JsSession> => {
-		const worker = await spawnJsWorker();
+		// The message listener must be attached synchronously after `new Worker`:
+		// Bun drops messages posted before a listener exists, and WorkerCore emits
+		// `ready` from its constructor on load. `spawnJsWorker` + `initWorker` run with
+		// no intervening await, so `ready` can never race the attach.
+		const worker = spawnJsWorker();
 		const session: JsSession = {
 			sessionKey,
 			worker,
 			state: "alive",
 			pending: new Map(),
 		};
-		const { promise: readyPromise, resolve: resolveReady, reject: rejectReady } = Promise.withResolvers<void>();
-		let resolved = false;
-		const unsubscribe = worker.onMessage(msg => {
-			if (!resolved && msg.type === "ready") {
-				resolved = true;
-				resolveReady();
-				return;
-			}
-			if (!resolved && msg.type === "init-failed") {
-				resolved = true;
-				rejectReady(errorFromPayload(msg.error));
-				return;
-			}
-			handleSessionMessage(session, msg);
-		});
+		// Init headroom is the fixed infrastructure floor; the caller's per-cell timeout
+		// dominates when larger so users can grant more by raising `timeout` on a cell.
+		const readyTimeoutMs = Math.max(WORKER_INIT_TIMEOUT_MS, timeoutMs ?? 0);
 		try {
-			// Init headroom is the fixed infrastructure floor; the caller's per-cell timeout
-			// dominates when larger so users can grant more by raising `timeout` on a cell.
-			const readyTimeoutMs = Math.max(WORKER_INIT_TIMEOUT_MS, timeoutMs ?? 0);
-			await raceWithTimeout(readyPromise, readyTimeoutMs, "Timed out initializing JS eval worker");
-			worker.send({ type: "init", snapshot });
-			sessions.set(sessionKey, session);
-			return session;
+			await initWorker(session, snapshot, readyTimeoutMs);
 		} catch (error) {
-			unsubscribe();
+			// Worker-thread crash/load failures surface asynchronously via the worker
+			// `error` event — after `spawnJsWorker`'s synchronous try/catch already
+			// returned — so the only signal is the rejected handshake. Retry on the
+			// inline worker so a broken module graph fails fast instead of stalling
+			// every cell on the init timeout and then dying with exitCode 1.
 			await worker.terminate().catch(() => undefined);
-			throw error;
+			if (worker.mode === "inline") throw error;
+			logger.warn("JS eval worker init failed; retrying with inline worker (no sync-loop guard)", {
+				error: error instanceof Error ? error.message : String(error),
+			});
+			const inline = spawnInlineWorker();
+			session.worker = inline;
+			session.state = "alive";
+			try {
+				await initWorker(session, snapshot, readyTimeoutMs);
+			} catch (inlineError) {
+				await inline.terminate().catch(() => undefined);
+				throw inlineError;
+			}
 		}
+		sessions.set(sessionKey, session);
+		return session;
 	})();
 	startingSessions.set(sessionKey, startup);
 	try {
@@ -230,6 +272,49 @@ async function acquireSession(sessionKey: string, snapshot: SessionSnapshot, tim
 	}
 }
+async function initWorker(session: JsSession, snapshot: SessionSnapshot, timeoutMs: number): Promise<void> {
+	const worker = session.worker;
+	const { promise: readyPromise, resolve: resolveReady, reject: rejectReady } = Promise.withResolvers<void>();
+	let resolved = false;
+	const unsubscribeMessage = worker.onMessage(msg => {
+		if (!resolved && msg.type === "ready") {
+			resolved = true;
+			resolveReady();
+			return;
+		}
+		if (!resolved && msg.type === "init-failed") {
+			resolved = true;
+			rejectReady(errorFromPayload(msg.error));
+			return;
+		}
+		handleSessionMessage(session, msg);
+	});
+	const unsubscribeError = worker.onError(error => {
+		if (!resolved) {
+			resolved = true;
+			rejectReady(error);
+			return;
+		}
+		// Worker died after a successful handshake: tear the session down so the
+		// in-flight run (and the next acquire) fail fast instead of hanging on a
+		// worker that will never reply.
+		void killSessionFor(session, error, { force: true });
+	});
+	try {
+		// Attach listeners and send init before awaiting ready. The worker now
+		// emits ready only in response to init, so this ordering is race-free.
+		worker.send({ type: "init", snapshot });
+		await raceWithTimeout(readyPromise, timeoutMs, "Timed out initializing JS eval worker");
+	} catch (error) {
+		// Handshake failed (timeout, init-failed, or worker error): drop both listeners
+		// so the abandoned worker can't keep routing messages into a session the caller
+		// is about to discard or retry on the inline fallback.
+		unsubscribeMessage();
+		unsubscribeError();
+		throw error;
+	}
+}
 function handleSessionMessage(session: JsSession, msg: WorkerOutbound): void {
 	switch (msg.type) {
 		case "text": {
@@ -379,11 +464,11 @@ async function raceWithTimeout<T>(promise: Promise<T>, timeoutMs: number, reason
 	}
 }
-async function spawnJsWorker(): Promise<WorkerHandle> {
+function spawnJsWorker(): WorkerHandle {
 	try {
 		const hostEntry = workerHostEntry();
 		const worker = hostEntry
-			? new Worker(hostEntry, { type: "module", argv: ["__omp_js_eval_worker"] })
+			? new Worker(hostEntry, { type: "module", argv: ["__omp_worker_js_eval"] })
 			: new Worker(new URL("./worker-entry.ts", import.meta.url).href, { type: "module" });
 		return wrapBunWorker(worker);
 	} catch (err) {
@@ -405,6 +490,17 @@ function wrapBunWorker(worker: Worker): WorkerHandle {
 			worker.addEventListener("message", wrap);
 			return () => worker.removeEventListener("message", wrap);
 		},
+		onError(handler) {
+			const onError = (event: ErrorEvent): void => handler(errorFromWorkerEvent(event));
+			const onMessageError = (event: MessageEvent): void =>
+				handler(new ToolError(`JS eval worker message error: ${String(event.data)}`));
+			worker.addEventListener("error", onError);
+			worker.addEventListener("messageerror", onMessageError);
+			return () => {
+				worker.removeEventListener("error", onError);
+				worker.removeEventListener("messageerror", onMessageError);
+			};
+		},
 		async close() {
 			const { promise: closed, resolve } = Promise.withResolvers<boolean>();
 			let settled = false;
@@ -433,7 +529,7 @@ function wrapBunWorker(worker: Worker): WorkerHandle {
 				finishIfClosed();
 			});
 			worker.addEventListener("close", onClose);
-			timeout = setTimeout(() => finish(false), WORKER_CLOSE_TIMEOUT_MS);
+			timeout = setTimeout(() => finish(false), workerCloseTimeoutMs);
 			worker.postMessage({ type: "close" } satisfies WorkerInbound);
 			return await closed;
 		},
@@ -443,6 +539,12 @@ function wrapBunWorker(worker: Worker): WorkerHandle {
 	};
 }
+function errorFromWorkerEvent(event: ErrorEvent): Error {
+	if (event.error instanceof Error) return event.error;
+	if (event.message) return new Error(event.message);
+	return new Error("Unknown JS eval worker error");
+}
 /**
  * Inline fallback for environments where Bun cannot spawn the worker entry
  * (e.g. some test runners). Preserves behavior but cannot interrupt synchronous
@@ -473,6 +575,7 @@ function spawnInlineWorker(): WorkerHandle {
 			hostListeners.add(handler);
 			return () => hostListeners.delete(handler);
 		},
+		onError: () => () => {},
 		async close() {
 			const { promise: closed, resolve } = Promise.withResolvers<boolean>();
 			let settled = false;
@@ -491,7 +594,7 @@ function spawnInlineWorker(): WorkerHandle {
 				if (msg.type === "closed") finish(true);
 			});
 			this.send({ type: "close" });
-			timeout = setTimeout(() => finish(false), WORKER_CLOSE_TIMEOUT_MS);
+			timeout = setTimeout(() => finish(false), workerCloseTimeoutMs);
 			return await closed;
 		},
 		async terminate() {

package/src/eval/js/worker-core.ts CHANGED Viewed

@@ -43,13 +43,13 @@ export class WorkerCore {
 	constructor(transport: Transport) {
 		this.#transport = transport;
 		this.#unsubscribe = transport.onMessage(msg => this.#handle(msg));
-		transport.send({ type: "ready" });
 	}
 	#handle(msg: WorkerInbound): void {
 		switch (msg.type) {
 			case "init":
 				this.#ensureRuntime(msg.snapshot);
+				this.#transport.send({ type: "ready" });
 				return;
 			case "run":
 				void this.#runOne(msg.runId, msg.code, msg.filename, msg.snapshot);