@oh-my-pi/pi-coding-agent 15.13.1 → 15.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/CHANGELOG.md +47 -0
  2. package/dist/cli.js +1057 -289
  3. package/dist/types/config/model-registry.d.ts +1 -0
  4. package/dist/types/config/models-config-schema.d.ts +3 -0
  5. package/dist/types/config/models-config.d.ts +3 -0
  6. package/dist/types/config/settings-schema.d.ts +97 -0
  7. package/dist/types/edit/hashline/block-resolver.d.ts +1 -1
  8. package/dist/types/edit/index.d.ts +2 -0
  9. package/dist/types/eval/js/context-manager.d.ts +15 -0
  10. package/dist/types/modes/components/welcome.d.ts +1 -0
  11. package/dist/types/modes/controllers/input-controller.d.ts +4 -4
  12. package/dist/types/modes/interactive-mode.d.ts +1 -0
  13. package/dist/types/modes/rpc/rpc-types.d.ts +2 -1
  14. package/dist/types/modes/types.d.ts +6 -0
  15. package/dist/types/sdk.d.ts +3 -0
  16. package/dist/types/session/session-dump-format.d.ts +2 -1
  17. package/dist/types/session/unexpected-stop-classifier.d.ts +13 -0
  18. package/dist/types/stt/asr-client.d.ts +1 -1
  19. package/dist/types/system-prompt.d.ts +11 -0
  20. package/dist/types/tiny/title-client.d.ts +1 -1
  21. package/dist/types/tools/ask.d.ts +2 -0
  22. package/dist/types/tools/ast-edit.d.ts +2 -0
  23. package/dist/types/tools/ast-grep.d.ts +2 -0
  24. package/dist/types/tools/browser.d.ts +2 -0
  25. package/dist/types/tools/debug.d.ts +2 -0
  26. package/dist/types/tools/eval.d.ts +2 -0
  27. package/dist/types/tools/find.d.ts +2 -0
  28. package/dist/types/tools/inspect-image.d.ts +2 -1
  29. package/dist/types/tools/irc.d.ts +2 -0
  30. package/dist/types/tools/job.d.ts +1 -0
  31. package/dist/types/tools/ssh.d.ts +2 -0
  32. package/dist/types/tools/todo.d.ts +2 -0
  33. package/dist/types/tts/tts-client.d.ts +1 -1
  34. package/dist/types/tui/tree-list.d.ts +1 -0
  35. package/dist/types/utils/thinking-display.d.ts +1 -17
  36. package/package.json +12 -12
  37. package/src/cli.ts +25 -12
  38. package/src/config/model-registry.ts +16 -2
  39. package/src/config/models-config-schema.ts +2 -0
  40. package/src/config/models-config.ts +1 -0
  41. package/src/config/settings-schema.ts +78 -0
  42. package/src/edit/hashline/block-resolver.ts +1 -1
  43. package/src/edit/hashline/execute.ts +1 -6
  44. package/src/edit/index.ts +48 -0
  45. package/src/eval/__tests__/agent-bridge.test.ts +106 -46
  46. package/src/eval/__tests__/js-context-manager.test.ts +53 -3
  47. package/src/eval/js/context-manager.ts +132 -29
  48. package/src/eval/js/worker-core.ts +1 -1
  49. package/src/eval/js/worker-entry.ts +7 -0
  50. package/src/export/html/template.js +18 -22
  51. package/src/internal-urls/docs-index.generated.ts +12 -3
  52. package/src/main.ts +15 -5
  53. package/src/modes/acp/acp-agent.ts +2 -2
  54. package/src/modes/acp/acp-event-mapper.ts +2 -2
  55. package/src/modes/components/agent-hub.ts +31 -7
  56. package/src/modes/components/assistant-message.ts +24 -15
  57. package/src/modes/components/snapcompact-shape-preview-doc.md +2 -2
  58. package/src/modes/components/snapcompact-shape-preview.ts +2 -2
  59. package/src/modes/components/tree-selector.ts +3 -2
  60. package/src/modes/components/welcome.ts +14 -4
  61. package/src/modes/controllers/event-controller.ts +3 -3
  62. package/src/modes/controllers/input-controller.ts +28 -39
  63. package/src/modes/controllers/streaming-reveal.ts +4 -4
  64. package/src/modes/interactive-mode.ts +2 -0
  65. package/src/modes/rpc/rpc-mode.ts +1 -0
  66. package/src/modes/rpc/rpc-types.ts +2 -2
  67. package/src/modes/types.ts +6 -0
  68. package/src/modes/utils/ui-helpers.ts +3 -3
  69. package/src/prompts/agents/oracle.md +0 -1
  70. package/src/prompts/agents/reviewer.md +0 -1
  71. package/src/prompts/system/system-prompt.md +17 -21
  72. package/src/prompts/system/unexpected-stop-classifier.md +17 -0
  73. package/src/prompts/system/unexpected-stop-retry.md +4 -0
  74. package/src/prompts/tools/ask.md +0 -8
  75. package/src/prompts/tools/ast-edit.md +0 -15
  76. package/src/prompts/tools/ast-grep.md +0 -13
  77. package/src/prompts/tools/browser.md +0 -21
  78. package/src/prompts/tools/debug.md +0 -13
  79. package/src/prompts/tools/eval.md +0 -9
  80. package/src/prompts/tools/find.md +0 -13
  81. package/src/prompts/tools/inspect-image.md +0 -9
  82. package/src/prompts/tools/irc.md +0 -15
  83. package/src/prompts/tools/patch.md +0 -13
  84. package/src/prompts/tools/ssh.md +0 -9
  85. package/src/prompts/tools/todo.md +1 -19
  86. package/src/sdk.ts +19 -0
  87. package/src/session/agent-session.ts +289 -29
  88. package/src/session/session-dump-format.ts +17 -49
  89. package/src/session/unexpected-stop-classifier.ts +129 -0
  90. package/src/stt/asr-client.ts +1 -1
  91. package/src/system-prompt.ts +31 -0
  92. package/src/tiny/title-client.ts +1 -1
  93. package/src/tools/ask.ts +41 -0
  94. package/src/tools/ast-edit.ts +46 -0
  95. package/src/tools/ast-grep.ts +24 -0
  96. package/src/tools/browser/tab-supervisor.ts +1 -1
  97. package/src/tools/browser/tab-worker-entry.ts +12 -4
  98. package/src/tools/browser.ts +52 -0
  99. package/src/tools/debug.ts +17 -0
  100. package/src/tools/eval.ts +20 -1
  101. package/src/tools/find.ts +24 -0
  102. package/src/tools/inspect-image.ts +27 -1
  103. package/src/tools/irc.ts +41 -0
  104. package/src/tools/job.ts +1 -0
  105. package/src/tools/ssh.ts +16 -0
  106. package/src/tools/todo.ts +82 -3
  107. package/src/tts/tts-client.ts +1 -1
  108. package/src/tui/tree-list.ts +68 -19
  109. package/src/utils/thinking-display.ts +8 -34
@@ -121,6 +121,34 @@ function makeEvalSession(
121
121
  return { session, sessionFile, sessionId: `${prefix}:${crypto.randomUUID()}` };
122
122
  }
123
123
 
124
+ /**
125
+ * Spy `runSubprocess` so a `parallel()` fan-out overlaps deterministically: every
126
+ * bridge call parks until the pool saturates at `limit` concurrent calls in flight,
127
+ * then all proceed. Proves the pool reaches its ceiling without a wall-clock sleep —
128
+ * the pool itself caps how many run at once, so an unbounded pool would drive
129
+ * `maxInFlight` past `limit` and fail the bound.
130
+ */
131
+ function spyConcurrencyBarrier(limit: number): { maxInFlight: () => number } {
132
+ let inFlight = 0;
133
+ let max = 0;
134
+ let saturate: (() => void) | undefined;
135
+ const saturated = new Promise<void>(resolve => {
136
+ saturate = resolve;
137
+ });
138
+ vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
139
+ inFlight++;
140
+ max = Math.max(max, inFlight);
141
+ if (inFlight >= limit) saturate?.();
142
+ try {
143
+ await saturated;
144
+ return singleResult(options, { output: options.assignment ?? "" });
145
+ } finally {
146
+ inFlight--;
147
+ }
148
+ });
149
+ return { maxInFlight: () => max };
150
+ }
151
+
124
152
  describe("runEvalAgent", () => {
125
153
  afterEach(() => {
126
154
  vi.restoreAllMocks();
@@ -298,8 +326,17 @@ describe("runEvalAgent", () => {
298
326
  });
299
327
 
300
328
  describe("agent() through eval runtimes", () => {
329
+ // One shared JS worker backs every agent() JavaScript test below. Spawning a
330
+ // worker (thread + module-graph import) is fixed infrastructure cost, not
331
+ // behavior under test; reusing it keeps the suite fast. Each run still threads
332
+ // its own ToolSession (settings/mock are read live through the bridge per call)
333
+ // and top-level `const`/`let` are demoted to `var`, so reuse never leaks state
334
+ // these tests observe. Torn down in afterAll via disposeAllVmContexts().
335
+ const sharedJsSessionId = "agent-bridge-shared-js";
336
+
301
337
  afterEach(() => {
302
338
  vi.restoreAllMocks();
339
+ vi.useRealTimers();
303
340
  });
304
341
 
305
342
  afterAll(async () => {
@@ -309,7 +346,7 @@ describe("agent() through eval runtimes", () => {
309
346
 
310
347
  it("exposes agent() in JavaScript and parses structured output", async () => {
311
348
  using tempDir = TempDir.createSync("@omp-eval-agent-js-");
312
- const { session, sessionFile, sessionId } = makeEvalSession(tempDir, "js-agent");
349
+ const { session, sessionFile } = makeEvalSession(tempDir, "js-agent");
313
350
  mockAgents();
314
351
  vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options =>
315
352
  singleResult(options, {
@@ -319,7 +356,7 @@ describe("agent() through eval runtimes", () => {
319
356
 
320
357
  const result = await executeJs(
321
358
  'const text = await agent("hi"); const data = await agent("json", { schema: { type: "object" } }); return JSON.stringify([text, data]);',
322
- { cwd: tempDir.path(), sessionId, session, sessionFile },
359
+ { cwd: tempDir.path(), sessionId: sharedJsSessionId, session, sessionFile },
323
360
  );
324
361
 
325
362
  expect(result.exitCode).toBe(0);
@@ -334,35 +371,24 @@ describe("agent() through eval runtimes", () => {
334
371
  "task.enableLsp": true,
335
372
  "task.maxConcurrency": 2,
336
373
  });
337
- const { session, sessionFile, sessionId } = makeEvalSession(tempDir, "js-agent-parallel", settings);
374
+ const { session, sessionFile } = makeEvalSession(tempDir, "js-agent-parallel", settings);
338
375
  mockAgents();
339
- let inFlight = 0;
340
- let maxInFlight = 0;
341
- vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
342
- inFlight++;
343
- maxInFlight = Math.max(maxInFlight, inFlight);
344
- try {
345
- await Bun.sleep(options.assignment === "a" ? 30 : 10);
346
- return singleResult(options, { output: options.assignment ?? "" });
347
- } finally {
348
- inFlight--;
349
- }
350
- });
376
+ const barrier = spyConcurrencyBarrier(2);
351
377
 
352
378
  const result = await executeJs(
353
379
  'const values = await parallel(["a", "b", "c", "d"].map(name => () => agent(name))); return JSON.stringify(values);',
354
- { cwd: tempDir.path(), sessionId, session, sessionFile },
380
+ { cwd: tempDir.path(), sessionId: sharedJsSessionId, session, sessionFile },
355
381
  );
356
382
 
357
383
  expect(result.exitCode).toBe(0);
358
384
  expect(JSON.parse(result.output.trim())).toEqual(["a", "b", "c", "d"]);
359
- expect(maxInFlight).toBeGreaterThan(1);
360
- expect(maxInFlight).toBeLessThanOrEqual(2);
385
+ expect(barrier.maxInFlight()).toBeGreaterThan(1);
386
+ expect(barrier.maxInFlight()).toBeLessThanOrEqual(2);
361
387
  });
362
388
 
363
389
  it("propagates JavaScript parallel() rejections", async () => {
364
390
  using tempDir = TempDir.createSync("@omp-eval-agent-js-reject-");
365
- const { session, sessionFile, sessionId } = makeEvalSession(tempDir, "js-agent-reject");
391
+ const { session, sessionFile } = makeEvalSession(tempDir, "js-agent-reject");
366
392
  mockAgents();
367
393
  vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
368
394
  if (options.assignment === "bad") {
@@ -373,7 +399,7 @@ describe("agent() through eval runtimes", () => {
373
399
 
374
400
  const result = await executeJs('await parallel([() => agent("ok"), () => agent("bad")]);', {
375
401
  cwd: tempDir.path(),
376
- sessionId,
402
+ sessionId: sharedJsSessionId,
377
403
  session,
378
404
  sessionFile,
379
405
  });
@@ -416,18 +442,7 @@ describe("agent() through eval runtimes", () => {
416
442
  });
417
443
  const { session, sessionFile, sessionId } = makeEvalSession(tempDir, "py-agent-parallel", settings);
418
444
  mockAgents();
419
- let inFlight = 0;
420
- let maxInFlight = 0;
421
- vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
422
- inFlight++;
423
- maxInFlight = Math.max(maxInFlight, inFlight);
424
- try {
425
- await Bun.sleep(options.assignment === "a" ? 30 : 10);
426
- return singleResult(options, { output: options.assignment ?? "" });
427
- } finally {
428
- inFlight--;
429
- }
430
- });
445
+ const barrier = spyConcurrencyBarrier(2);
431
446
 
432
447
  const result = await executePython(
433
448
  'import json\nprint(json.dumps(parallel([lambda n=n: agent(n) for n in ["a", "b", "c", "d"]])))',
@@ -440,8 +455,8 @@ describe("agent() through eval runtimes", () => {
440
455
 
441
456
  expect(result.exitCode).toBe(0);
442
457
  expect(JSON.parse(result.output.trim())).toEqual(["a", "b", "c", "d"]);
443
- expect(maxInFlight).toBeGreaterThan(1);
444
- expect(maxInFlight).toBeLessThanOrEqual(2);
458
+ expect(barrier.maxInFlight()).toBeGreaterThan(1);
459
+ expect(barrier.maxInFlight()).toBeLessThanOrEqual(2);
445
460
  });
446
461
 
447
462
  it("interrupting a Python parallel() fan-out settles the kernel cleanly and preserves session state", async () => {
@@ -526,7 +541,7 @@ describe("agent() through eval runtimes", () => {
526
541
 
527
542
  it("streams enriched agent progress through onStatus before the cell finishes", async () => {
528
543
  using tempDir = TempDir.createSync("@omp-eval-agent-progress-");
529
- const { session, sessionFile, sessionId } = makeEvalSession(tempDir, "js-agent-progress");
544
+ const { session, sessionFile } = makeEvalSession(tempDir, "js-agent-progress");
530
545
  mockAgents();
531
546
 
532
547
  const makeProgress = (options: ExecutorOptions, overrides: Partial<AgentProgress>): AgentProgress => ({
@@ -580,7 +595,7 @@ describe("agent() through eval runtimes", () => {
580
595
  const events: Array<{ op: string; [key: string]: unknown }> = [];
581
596
  const result = await executeJs('await agent("investigate", { label: "Scout" });', {
582
597
  cwd: tempDir.path(),
583
- sessionId,
598
+ sessionId: sharedJsSessionId,
584
599
  session,
585
600
  sessionFile,
586
601
  onStatus: event => events.push(event),
@@ -622,16 +637,28 @@ describe("agent() through eval runtimes", () => {
622
637
  mockAgents();
623
638
 
624
639
  // runSubprocess runs far past the eval timeout budget and emits NO progress
625
- // of its own. The bridge pause must make that delegated time invisible to
626
- // the watchdog.
640
+ // of its own; the bridge pause must make that delegated time invisible to
641
+ // the watchdog. Fake timers replace the real wait: the subprocess parks on
642
+ // `released` so the test can advance the clock past the budget while the
643
+ // bridge call is provably in flight, then release it deterministically.
644
+ let release: (() => void) | undefined;
645
+ const released = new Promise<void>(resolve => {
646
+ release = resolve;
647
+ });
648
+ let markInFlight: (() => void) | undefined;
649
+ const inFlight = new Promise<void>(resolve => {
650
+ markInFlight = resolve;
651
+ });
627
652
  vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
628
- await Bun.sleep(40);
653
+ markInFlight?.();
654
+ await released;
629
655
  return singleResult(options, { output: "done" });
630
656
  });
631
657
 
632
658
  const ops: string[] = [];
659
+ vi.useFakeTimers();
633
660
  using idle = new IdleTimeout(20);
634
- const result = await runEvalAgent(
661
+ const resultPromise = runEvalAgent(
635
662
  { prompt: "investigate" },
636
663
  {
637
664
  session,
@@ -644,11 +671,22 @@ describe("agent() through eval runtimes", () => {
644
671
  },
645
672
  );
646
673
 
674
+ // The bridge paused the watchdog; the subprocess is now blocked in flight.
675
+ await inFlight;
676
+ // Burn far more than the 20ms budget while paused: the watchdog stays armed-off.
677
+ vi.advanceTimersByTime(1_000);
678
+ expect(idle.signal.aborted).toBe(false);
679
+
680
+ release?.();
681
+ const result = await resultPromise;
682
+
647
683
  expect(result.text).toBe("done");
648
684
  expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP]);
649
685
  expect(idle.signal.aborted).toBe(false);
650
686
 
651
- await Bun.sleep(60);
687
+ // RESUME re-armed a fresh window; once the runtime stays idle past it the
688
+ // watchdog finally fires.
689
+ vi.advanceTimersByTime(idle.idleMs + 5);
652
690
  expect(idle.signal.aborted).toBe(true);
653
691
  });
654
692
 
@@ -657,9 +695,20 @@ describe("agent() through eval runtimes", () => {
657
695
  const { session } = makeEvalSession(tempDir, "js-agent-progress-timeout-pause");
658
696
  mockAgents();
659
697
 
660
- // Stream frequent progress snapshots (op:"agent") for well past the budget.
698
+ // Stream frequent progress snapshots (op:"agent") well past the budget.
661
699
  // They render as status, but timeout accounting is controlled only by the
662
- // bridge pause/resume events.
700
+ // bridge pause/resume events — so even a flood of snapshots must not re-arm
701
+ // the watchdog. Fake timers make "past the budget" deterministic: the
702
+ // subprocess emits its snapshots, parks on `released`, and the test advances
703
+ // the clock far past the window before releasing it.
704
+ let release: (() => void) | undefined;
705
+ const released = new Promise<void>(resolve => {
706
+ release = resolve;
707
+ });
708
+ let markInFlight: (() => void) | undefined;
709
+ const inFlight = new Promise<void>(resolve => {
710
+ markInFlight = resolve;
711
+ });
663
712
  vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
664
713
  for (let i = 0; i < 20; i++) {
665
714
  options.onProgress?.({
@@ -679,15 +728,16 @@ describe("agent() through eval runtimes", () => {
679
728
  cost: 0,
680
729
  durationMs: i * 10,
681
730
  });
682
- await Bun.sleep(40);
683
731
  }
732
+ markInFlight?.();
733
+ await released;
684
734
  return singleResult(options, { output: "done" });
685
735
  });
686
736
 
687
737
  const ops: string[] = [];
688
- // Timing invariant (keep, do not re-tighten): total mock work (20*40ms = 800ms) > idle window (250ms) > scheduling jitter (~tens of ms).
738
+ vi.useFakeTimers();
689
739
  using idle = new IdleTimeout(250);
690
- const result = await runEvalAgent(
740
+ const resultPromise = runEvalAgent(
691
741
  { prompt: "investigate" },
692
742
  {
693
743
  session,
@@ -700,6 +750,16 @@ describe("agent() through eval runtimes", () => {
700
750
  },
701
751
  );
702
752
 
753
+ // All snapshots have streamed and the subprocess is blocked in flight.
754
+ await inFlight;
755
+ // Far exceed the 250ms budget while paused: the snapshots already delivered
756
+ // must not have re-armed the watchdog.
757
+ vi.advanceTimersByTime(10_000);
758
+ expect(idle.signal.aborted).toBe(false);
759
+
760
+ release?.();
761
+ const result = await resultPromise;
762
+
703
763
  expect(result.text).toBe("done");
704
764
  expect(ops[0]).toBe(EVAL_TIMEOUT_PAUSE_OP);
705
765
  expect(ops).toContain("agent");
@@ -1,8 +1,8 @@
1
- import { afterEach, describe, expect, it } from "bun:test";
1
+ import { afterEach, beforeEach, describe, expect, it } from "bun:test";
2
2
  import { TempDir } from "@oh-my-pi/pi-utils";
3
3
  import { Settings } from "../../config/settings";
4
4
  import type { ToolSession } from "../../tools";
5
- import { disposeAllVmContexts } from "../js/context-manager";
5
+ import { disposeAllVmContexts, setWorkerCloseTimeoutMsForTests } from "../js/context-manager";
6
6
  import { executeJs } from "../js/executor";
7
7
 
8
8
  const originalWorker = globalThis.Worker;
@@ -15,6 +15,7 @@ interface FakeWorkerStats {
15
15
  interface FakeWorkerBehavior {
16
16
  exitOnClose: boolean;
17
17
  settleRuns: boolean;
18
+ errorOnStart?: boolean;
18
19
  }
19
20
 
20
21
  function makeSession(cwd: string): ToolSession {
@@ -70,6 +71,7 @@ async function waitForRealWorkerExitAfterClose(cwd: string): Promise<void> {
70
71
  worker.addEventListener("close", () => workerClosed.resolve());
71
72
 
72
73
  try {
74
+ worker.postMessage({ type: "init", snapshot });
73
75
  await withTimeout(ready.promise, 1_000, "worker ready");
74
76
  worker.postMessage({
75
77
  type: "run",
@@ -91,6 +93,7 @@ function installFakeWorker(stats: FakeWorkerStats, behavior: FakeWorkerBehavior)
91
93
  class FakeWorker {
92
94
  #messageListeners = new Set<(event: MessageEvent) => void>();
93
95
  #closeListeners = new Set<(event: Event) => void>();
96
+ #errorListeners = new Set<(event: Event) => void>();
94
97
  #readyQueued = false;
95
98
  #exited = false;
96
99
 
@@ -115,11 +118,18 @@ function installFakeWorker(stats: FakeWorkerStats, behavior: FakeWorkerBehavior)
115
118
  this.#closeListeners.add(listener as (event: Event) => void);
116
119
  return;
117
120
  }
121
+ if (type === "error") {
122
+ this.#errorListeners.add(listener as (event: Event) => void);
123
+ return;
124
+ }
118
125
  if (type !== "message") return;
119
126
  this.#messageListeners.add(listener as (event: MessageEvent) => void);
120
127
  if (!this.#readyQueued) {
121
128
  this.#readyQueued = true;
122
- queueMicrotask(() => this.#emitMessage({ type: "ready" }));
129
+ queueMicrotask(() => {
130
+ if (behavior.errorOnStart) this.#emitError();
131
+ else this.#emitMessage({ type: "ready" });
132
+ });
123
133
  }
124
134
  }
125
135
 
@@ -128,6 +138,10 @@ function installFakeWorker(stats: FakeWorkerStats, behavior: FakeWorkerBehavior)
128
138
  this.#closeListeners.delete(listener as (event: Event) => void);
129
139
  return;
130
140
  }
141
+ if (type === "error") {
142
+ this.#errorListeners.delete(listener as (event: Event) => void);
143
+ return;
144
+ }
131
145
  if (type !== "message") return;
132
146
  this.#messageListeners.delete(listener as (event: MessageEvent) => void);
133
147
  }
@@ -148,6 +162,14 @@ function installFakeWorker(stats: FakeWorkerStats, behavior: FakeWorkerBehavior)
148
162
  const event = new Event("close");
149
163
  for (const listener of this.#closeListeners) listener(event);
150
164
  }
165
+
166
+ #emitError(): void {
167
+ const event = new ErrorEvent("error", {
168
+ message: "fake worker failed to start",
169
+ error: new Error("fake worker failed to start"),
170
+ });
171
+ for (const listener of this.#errorListeners) listener(event);
172
+ }
151
173
  }
152
174
 
153
175
  Object.defineProperty(globalThis, "Worker", {
@@ -158,8 +180,18 @@ function installFakeWorker(stats: FakeWorkerStats, behavior: FakeWorkerBehavior)
158
180
  }
159
181
 
160
182
  describe("JavaScript eval worker lifecycle", () => {
183
+ let restoreCloseTimeoutMs = 0;
184
+ beforeEach(() => {
185
+ // Shrink the graceful-close grace period so the "close acked but the worker
186
+ // never exits -> force terminate" contract is proven without a real 1s wait.
187
+ restoreCloseTimeoutMs = setWorkerCloseTimeoutMsForTests(1);
188
+ });
189
+
161
190
  afterEach(async () => {
191
+ // Dispose while the shrunk timeout is still active so a hung worker's afterEach
192
+ // close also force-terminates instantly, then restore the production default.
162
193
  await disposeAllVmContexts();
194
+ setWorkerCloseTimeoutMsForTests(restoreCloseTimeoutMs);
163
195
  Object.defineProperty(globalThis, "Worker", {
164
196
  configurable: true,
165
197
  writable: true,
@@ -238,4 +270,22 @@ describe("JavaScript eval worker lifecycle", () => {
238
270
  expect(stats.closeRequests).toBe(0);
239
271
  expect(stats.terminateCalls).toBe(1);
240
272
  });
273
+
274
+ it("falls back to the inline worker when the spawned worker errors during startup", async () => {
275
+ using tempDir = TempDir.createSync("@omp-js-worker-error-");
276
+ const stats: FakeWorkerStats = { closeRequests: 0, terminateCalls: 0 };
277
+ installFakeWorker(stats, { exitOnClose: true, settleRuns: true, errorOnStart: true });
278
+
279
+ const session = makeSession(tempDir.path());
280
+ const sessionId = `js-worker-error:${crypto.randomUUID()}`;
281
+
282
+ // The spawned worker emits an `error` event instead of `ready`. Without fail-fast
283
+ // error handling the handshake would stall until WORKER_INIT_TIMEOUT_MS (15s); with
284
+ // it, the handshake rejects at once and the inline worker runs the cell.
285
+ const result = await executeJs("return String(6 * 7);", { cwd: tempDir.path(), sessionId, session });
286
+ expect(result.exitCode).toBe(0);
287
+ expect(result.output.trim()).toBe("42");
288
+ // The errored primary worker is torn down before the inline retry takes over.
289
+ expect(stats.terminateCalls).toBe(1);
290
+ });
241
291
  });
@@ -27,6 +27,7 @@ interface WorkerHandle {
27
27
  mode: "worker" | "inline";
28
28
  send(msg: WorkerInbound): void;
29
29
  onMessage(handler: (msg: WorkerOutbound) => void): () => void;
30
+ onError(handler: (error: Error) => void): () => void;
30
31
  close(): Promise<boolean>;
31
32
  terminate(): Promise<void>;
32
33
  }
@@ -59,6 +60,22 @@ const resettingSessions = new Map<string, Promise<void>>();
59
60
  // SIGILL/SIGSEGV. Callers that pass a larger per-cell budget still dominate.
60
61
  const WORKER_INIT_TIMEOUT_MS = 15_000;
61
62
  const WORKER_CLOSE_TIMEOUT_MS = 1_000;
63
+ // Active graceful-close grace period before a worker that ack'd `close` but never
64
+ // emitted its `close` event is force-terminated. Defaults to the production floor;
65
+ // tests override it (and restore it) to exercise the close-timeout -> terminate
66
+ // path without a real wall-clock wait.
67
+ let workerCloseTimeoutMs: number = WORKER_CLOSE_TIMEOUT_MS;
68
+
69
+ /**
70
+ * Test-only seam: override the graceful-close grace period (ms). Returns the
71
+ * previous value so callers can restore it. Production always uses
72
+ * {@link WORKER_CLOSE_TIMEOUT_MS}; never call this outside tests.
73
+ */
74
+ export function setWorkerCloseTimeoutMsForTests(ms: number): number {
75
+ const previous = workerCloseTimeoutMs;
76
+ workerCloseTimeoutMs = ms;
77
+ return previous;
78
+ }
62
79
 
63
80
  export async function executeInVmContext(options: {
64
81
  sessionKey: string;
@@ -124,6 +141,27 @@ export async function disposeAllVmContexts(): Promise<void> {
124
141
  await Promise.all(all.map(session => killSession(session, new ToolError("JS context disposed"), { force: false })));
125
142
  }
126
143
 
144
+ /**
145
+ * Smoke probe: spawn the JS eval worker through the worker-host entry and prove
146
+ * it answers the `init` handshake on a real worker thread (not the inline
147
+ * fallback). Catches the silent worker-load and init-message-drop regressions
148
+ * that otherwise strand every cell on the init timeout in a distribution build —
149
+ * the failure mode that motivated `installWorkerInbox`. Wired into
150
+ * `omp --smoke-test` so binary / source / tarball installs all exercise it.
151
+ */
152
+ export async function smokeTestJsEvalWorker(): Promise<void> {
153
+ const worker = spawnJsWorker();
154
+ const session: JsSession = { sessionKey: "smoke", worker, state: "alive", pending: new Map() };
155
+ try {
156
+ await initWorker(session, { cwd: process.cwd(), sessionId: "smoke" }, WORKER_INIT_TIMEOUT_MS);
157
+ if (worker.mode !== "worker") {
158
+ throw new Error("JS eval worker smoke fell back to the inline worker (real worker failed to start)");
159
+ }
160
+ } finally {
161
+ await worker.terminate().catch(() => undefined);
162
+ }
163
+ }
164
+
127
165
  async function runOnce(
128
166
  session: JsSession,
129
167
  options: {
@@ -186,41 +224,45 @@ async function acquireSession(sessionKey: string, snapshot: SessionSnapshot, tim
186
224
  if (starting) return await starting;
187
225
 
188
226
  const startup = (async (): Promise<JsSession> => {
189
- const worker = await spawnJsWorker();
227
+ // The message listener must be attached synchronously after `new Worker`:
228
+ // Bun drops messages posted before a listener exists, and WorkerCore emits
229
+ // `ready` from its constructor on load. `spawnJsWorker` + `initWorker` run with
230
+ // no intervening await, so `ready` can never race the attach.
231
+ const worker = spawnJsWorker();
190
232
  const session: JsSession = {
191
233
  sessionKey,
192
234
  worker,
193
235
  state: "alive",
194
236
  pending: new Map(),
195
237
  };
196
- const { promise: readyPromise, resolve: resolveReady, reject: rejectReady } = Promise.withResolvers<void>();
197
- let resolved = false;
198
- const unsubscribe = worker.onMessage(msg => {
199
- if (!resolved && msg.type === "ready") {
200
- resolved = true;
201
- resolveReady();
202
- return;
203
- }
204
- if (!resolved && msg.type === "init-failed") {
205
- resolved = true;
206
- rejectReady(errorFromPayload(msg.error));
207
- return;
208
- }
209
- handleSessionMessage(session, msg);
210
- });
238
+ // Init headroom is the fixed infrastructure floor; the caller's per-cell timeout
239
+ // dominates when larger so users can grant more by raising `timeout` on a cell.
240
+ const readyTimeoutMs = Math.max(WORKER_INIT_TIMEOUT_MS, timeoutMs ?? 0);
211
241
  try {
212
- // Init headroom is the fixed infrastructure floor; the caller's per-cell timeout
213
- // dominates when larger so users can grant more by raising `timeout` on a cell.
214
- const readyTimeoutMs = Math.max(WORKER_INIT_TIMEOUT_MS, timeoutMs ?? 0);
215
- await raceWithTimeout(readyPromise, readyTimeoutMs, "Timed out initializing JS eval worker");
216
- worker.send({ type: "init", snapshot });
217
- sessions.set(sessionKey, session);
218
- return session;
242
+ await initWorker(session, snapshot, readyTimeoutMs);
219
243
  } catch (error) {
220
- unsubscribe();
244
+ // Worker-thread crash/load failures surface asynchronously via the worker
245
+ // `error` event — after `spawnJsWorker`'s synchronous try/catch already
246
+ // returned — so the only signal is the rejected handshake. Retry on the
247
+ // inline worker so a broken module graph fails fast instead of stalling
248
+ // every cell on the init timeout and then dying with exitCode 1.
221
249
  await worker.terminate().catch(() => undefined);
222
- throw error;
250
+ if (worker.mode === "inline") throw error;
251
+ logger.warn("JS eval worker init failed; retrying with inline worker (no sync-loop guard)", {
252
+ error: error instanceof Error ? error.message : String(error),
253
+ });
254
+ const inline = spawnInlineWorker();
255
+ session.worker = inline;
256
+ session.state = "alive";
257
+ try {
258
+ await initWorker(session, snapshot, readyTimeoutMs);
259
+ } catch (inlineError) {
260
+ await inline.terminate().catch(() => undefined);
261
+ throw inlineError;
262
+ }
223
263
  }
264
+ sessions.set(sessionKey, session);
265
+ return session;
224
266
  })();
225
267
  startingSessions.set(sessionKey, startup);
226
268
  try {
@@ -230,6 +272,49 @@ async function acquireSession(sessionKey: string, snapshot: SessionSnapshot, tim
230
272
  }
231
273
  }
232
274
 
275
+ async function initWorker(session: JsSession, snapshot: SessionSnapshot, timeoutMs: number): Promise<void> {
276
+ const worker = session.worker;
277
+ const { promise: readyPromise, resolve: resolveReady, reject: rejectReady } = Promise.withResolvers<void>();
278
+ let resolved = false;
279
+ const unsubscribeMessage = worker.onMessage(msg => {
280
+ if (!resolved && msg.type === "ready") {
281
+ resolved = true;
282
+ resolveReady();
283
+ return;
284
+ }
285
+ if (!resolved && msg.type === "init-failed") {
286
+ resolved = true;
287
+ rejectReady(errorFromPayload(msg.error));
288
+ return;
289
+ }
290
+ handleSessionMessage(session, msg);
291
+ });
292
+ const unsubscribeError = worker.onError(error => {
293
+ if (!resolved) {
294
+ resolved = true;
295
+ rejectReady(error);
296
+ return;
297
+ }
298
+ // Worker died after a successful handshake: tear the session down so the
299
+ // in-flight run (and the next acquire) fail fast instead of hanging on a
300
+ // worker that will never reply.
301
+ void killSessionFor(session, error, { force: true });
302
+ });
303
+ try {
304
+ // Attach listeners and send init before awaiting ready. The worker now
305
+ // emits ready only in response to init, so this ordering is race-free.
306
+ worker.send({ type: "init", snapshot });
307
+ await raceWithTimeout(readyPromise, timeoutMs, "Timed out initializing JS eval worker");
308
+ } catch (error) {
309
+ // Handshake failed (timeout, init-failed, or worker error): drop both listeners
310
+ // so the abandoned worker can't keep routing messages into a session the caller
311
+ // is about to discard or retry on the inline fallback.
312
+ unsubscribeMessage();
313
+ unsubscribeError();
314
+ throw error;
315
+ }
316
+ }
317
+
233
318
  function handleSessionMessage(session: JsSession, msg: WorkerOutbound): void {
234
319
  switch (msg.type) {
235
320
  case "text": {
@@ -379,11 +464,11 @@ async function raceWithTimeout<T>(promise: Promise<T>, timeoutMs: number, reason
379
464
  }
380
465
  }
381
466
 
382
- async function spawnJsWorker(): Promise<WorkerHandle> {
467
+ function spawnJsWorker(): WorkerHandle {
383
468
  try {
384
469
  const hostEntry = workerHostEntry();
385
470
  const worker = hostEntry
386
- ? new Worker(hostEntry, { type: "module", argv: ["__omp_js_eval_worker"] })
471
+ ? new Worker(hostEntry, { type: "module", argv: ["__omp_worker_js_eval"] })
387
472
  : new Worker(new URL("./worker-entry.ts", import.meta.url).href, { type: "module" });
388
473
  return wrapBunWorker(worker);
389
474
  } catch (err) {
@@ -405,6 +490,17 @@ function wrapBunWorker(worker: Worker): WorkerHandle {
405
490
  worker.addEventListener("message", wrap);
406
491
  return () => worker.removeEventListener("message", wrap);
407
492
  },
493
+ onError(handler) {
494
+ const onError = (event: ErrorEvent): void => handler(errorFromWorkerEvent(event));
495
+ const onMessageError = (event: MessageEvent): void =>
496
+ handler(new ToolError(`JS eval worker message error: ${String(event.data)}`));
497
+ worker.addEventListener("error", onError);
498
+ worker.addEventListener("messageerror", onMessageError);
499
+ return () => {
500
+ worker.removeEventListener("error", onError);
501
+ worker.removeEventListener("messageerror", onMessageError);
502
+ };
503
+ },
408
504
  async close() {
409
505
  const { promise: closed, resolve } = Promise.withResolvers<boolean>();
410
506
  let settled = false;
@@ -433,7 +529,7 @@ function wrapBunWorker(worker: Worker): WorkerHandle {
433
529
  finishIfClosed();
434
530
  });
435
531
  worker.addEventListener("close", onClose);
436
- timeout = setTimeout(() => finish(false), WORKER_CLOSE_TIMEOUT_MS);
532
+ timeout = setTimeout(() => finish(false), workerCloseTimeoutMs);
437
533
  worker.postMessage({ type: "close" } satisfies WorkerInbound);
438
534
  return await closed;
439
535
  },
@@ -443,6 +539,12 @@ function wrapBunWorker(worker: Worker): WorkerHandle {
443
539
  };
444
540
  }
445
541
 
542
+ function errorFromWorkerEvent(event: ErrorEvent): Error {
543
+ if (event.error instanceof Error) return event.error;
544
+ if (event.message) return new Error(event.message);
545
+ return new Error("Unknown JS eval worker error");
546
+ }
547
+
446
548
  /**
447
549
  * Inline fallback for environments where Bun cannot spawn the worker entry
448
550
  * (e.g. some test runners). Preserves behavior but cannot interrupt synchronous
@@ -473,6 +575,7 @@ function spawnInlineWorker(): WorkerHandle {
473
575
  hostListeners.add(handler);
474
576
  return () => hostListeners.delete(handler);
475
577
  },
578
+ onError: () => () => {},
476
579
  async close() {
477
580
  const { promise: closed, resolve } = Promise.withResolvers<boolean>();
478
581
  let settled = false;
@@ -491,7 +594,7 @@ function spawnInlineWorker(): WorkerHandle {
491
594
  if (msg.type === "closed") finish(true);
492
595
  });
493
596
  this.send({ type: "close" });
494
- timeout = setTimeout(() => finish(false), WORKER_CLOSE_TIMEOUT_MS);
597
+ timeout = setTimeout(() => finish(false), workerCloseTimeoutMs);
495
598
  return await closed;
496
599
  },
497
600
  async terminate() {
@@ -43,13 +43,13 @@ export class WorkerCore {
43
43
  constructor(transport: Transport) {
44
44
  this.#transport = transport;
45
45
  this.#unsubscribe = transport.onMessage(msg => this.#handle(msg));
46
- transport.send({ type: "ready" });
47
46
  }
48
47
 
49
48
  #handle(msg: WorkerInbound): void {
50
49
  switch (msg.type) {
51
50
  case "init":
52
51
  this.#ensureRuntime(msg.snapshot);
52
+ this.#transport.send({ type: "ready" });
53
53
  return;
54
54
  case "run":
55
55
  void this.#runOne(msg.runId, msg.code, msg.filename, msg.snapshot);