@oh-my-pi/pi-coding-agent 15.9.5 → 15.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/CHANGELOG.md +98 -1
  2. package/dist/types/cli/args.d.ts +1 -1
  3. package/dist/types/cli/gallery-cli.d.ts +43 -0
  4. package/dist/types/cli/gallery-fixtures/agentic.d.ts +2 -0
  5. package/dist/types/cli/gallery-fixtures/codeintel.d.ts +3 -0
  6. package/dist/types/cli/gallery-fixtures/edit.d.ts +3 -0
  7. package/dist/types/cli/gallery-fixtures/fs.d.ts +2 -0
  8. package/dist/types/cli/gallery-fixtures/index.d.ts +4 -0
  9. package/dist/types/cli/gallery-fixtures/interaction.d.ts +3 -0
  10. package/dist/types/cli/gallery-fixtures/memory.d.ts +2 -0
  11. package/dist/types/cli/gallery-fixtures/misc.d.ts +3 -0
  12. package/dist/types/cli/gallery-fixtures/search.d.ts +3 -0
  13. package/dist/types/cli/gallery-fixtures/shell.d.ts +3 -0
  14. package/dist/types/cli/gallery-fixtures/types.d.ts +44 -0
  15. package/dist/types/cli/gallery-fixtures/web.d.ts +2 -0
  16. package/dist/types/cli/gallery-screenshot.d.ts +35 -0
  17. package/dist/types/commands/gallery.d.ts +47 -0
  18. package/dist/types/config/keybindings.d.ts +10 -2
  19. package/dist/types/config/model-id-affixes.d.ts +2 -0
  20. package/dist/types/config/model-registry.d.ts +8 -1
  21. package/dist/types/config/settings-schema.d.ts +43 -7
  22. package/dist/types/edit/file-snapshot-store.d.ts +1 -1
  23. package/dist/types/eval/backend.d.ts +6 -6
  24. package/dist/types/eval/bridge-timeout.d.ts +27 -0
  25. package/dist/types/eval/idle-timeout.d.ts +16 -14
  26. package/dist/types/eval/js/executor.d.ts +3 -3
  27. package/dist/types/eval/py/executor.d.ts +2 -2
  28. package/dist/types/eval/py/spawn-options.d.ts +58 -0
  29. package/dist/types/extensibility/plugins/marketplace-auto-update.d.ts +8 -0
  30. package/dist/types/lsp/types.d.ts +10 -0
  31. package/dist/types/main.d.ts +3 -2
  32. package/dist/types/memory-backend/index.d.ts +2 -1
  33. package/dist/types/memory-backend/resolve.d.ts +1 -1
  34. package/dist/types/memory-backend/types.d.ts +1 -1
  35. package/dist/types/modes/components/assistant-message.d.ts +5 -0
  36. package/dist/types/modes/components/copy-selector.d.ts +22 -0
  37. package/dist/types/modes/components/custom-editor.d.ts +2 -1
  38. package/dist/types/modes/components/model-selector.d.ts +1 -0
  39. package/dist/types/modes/components/tool-execution.d.ts +18 -0
  40. package/dist/types/modes/controllers/command-controller.d.ts +0 -1
  41. package/dist/types/modes/controllers/selector-controller.d.ts +2 -1
  42. package/dist/types/modes/index.d.ts +5 -4
  43. package/dist/types/modes/interactive-mode.d.ts +2 -2
  44. package/dist/types/modes/setup-version.d.ts +11 -0
  45. package/dist/types/modes/setup-wizard/index.d.ts +2 -1
  46. package/dist/types/modes/setup-wizard/scenes/web-search.d.ts +2 -1
  47. package/dist/types/modes/types.d.ts +2 -2
  48. package/dist/types/modes/utils/copy-targets.d.ts +53 -0
  49. package/dist/types/sdk.d.ts +1 -1
  50. package/dist/types/task/executor.d.ts +7 -0
  51. package/dist/types/telemetry-export.d.ts +1 -1
  52. package/dist/types/tools/eval-render.d.ts +1 -0
  53. package/dist/types/tools/fetch.d.ts +15 -7
  54. package/dist/types/tools/render-utils.d.ts +33 -0
  55. package/dist/types/tools/renderers.d.ts +16 -2
  56. package/dist/types/tools/search.d.ts +1 -1
  57. package/dist/types/tools/write.d.ts +2 -0
  58. package/dist/types/tui/code-cell.d.ts +6 -0
  59. package/dist/types/tui/output-block.d.ts +11 -0
  60. package/dist/types/web/scrapers/github.d.ts +22 -0
  61. package/dist/types/web/search/providers/perplexity.d.ts +8 -1
  62. package/dist/types/web/search/types.d.ts +1 -1
  63. package/package.json +9 -9
  64. package/scripts/dev-launch +42 -0
  65. package/scripts/dev-launch-preload.ts +19 -0
  66. package/src/autoresearch/dashboard.ts +11 -21
  67. package/src/cli/args.ts +2 -2
  68. package/src/cli/claude-trace-cli.ts +13 -1
  69. package/src/cli/gallery-cli.ts +223 -0
  70. package/src/cli/gallery-fixtures/agentic.ts +292 -0
  71. package/src/cli/gallery-fixtures/codeintel.ts +188 -0
  72. package/src/cli/gallery-fixtures/edit.ts +194 -0
  73. package/src/cli/gallery-fixtures/fs.ts +153 -0
  74. package/src/cli/gallery-fixtures/index.ts +40 -0
  75. package/src/cli/gallery-fixtures/interaction.ts +49 -0
  76. package/src/cli/gallery-fixtures/memory.ts +81 -0
  77. package/src/cli/gallery-fixtures/misc.ts +221 -0
  78. package/src/cli/gallery-fixtures/search.ts +213 -0
  79. package/src/cli/gallery-fixtures/shell.ts +167 -0
  80. package/src/cli/gallery-fixtures/types.ts +41 -0
  81. package/src/cli/gallery-fixtures/web.ts +158 -0
  82. package/src/cli/gallery-screenshot.ts +279 -0
  83. package/src/cli-commands.ts +1 -0
  84. package/src/commands/gallery.ts +52 -0
  85. package/src/commands/launch.ts +1 -1
  86. package/src/config/keybindings.ts +68 -2
  87. package/src/config/model-equivalence.ts +35 -12
  88. package/src/config/model-id-affixes.ts +39 -22
  89. package/src/config/model-registry.ts +16 -16
  90. package/src/config/settings-schema.ts +29 -6
  91. package/src/config/settings.ts +11 -0
  92. package/src/dap/client.ts +14 -16
  93. package/src/debug/raw-sse.ts +18 -4
  94. package/src/edit/file-snapshot-store.ts +1 -1
  95. package/src/edit/index.ts +1 -1
  96. package/src/edit/renderer.ts +43 -55
  97. package/src/edit/streaming.ts +1 -1
  98. package/src/eval/__tests__/agent-bridge.test.ts +102 -58
  99. package/src/eval/__tests__/bridge-timeout.test.ts +64 -0
  100. package/src/eval/__tests__/idle-timeout.test.ts +26 -12
  101. package/src/eval/__tests__/kernel-spawn.test.ts +103 -0
  102. package/src/eval/__tests__/llm-bridge.test.ts +10 -10
  103. package/src/eval/agent-bridge.ts +38 -12
  104. package/src/eval/backend.ts +6 -6
  105. package/src/eval/bridge-timeout.ts +44 -0
  106. package/src/eval/idle-timeout.ts +33 -15
  107. package/src/eval/js/executor.ts +10 -10
  108. package/src/eval/llm-bridge.ts +4 -5
  109. package/src/eval/py/executor.ts +6 -6
  110. package/src/eval/py/kernel.ts +11 -1
  111. package/src/eval/py/spawn-options.ts +126 -0
  112. package/src/export/ttsr.ts +9 -0
  113. package/src/extensibility/extensions/runner.ts +3 -0
  114. package/src/extensibility/plugins/doctor.ts +0 -1
  115. package/src/extensibility/plugins/marketplace-auto-update.ts +49 -0
  116. package/src/goals/tools/goal-tool.ts +2 -2
  117. package/src/internal-urls/docs-index.generated.ts +7 -6
  118. package/src/lsp/client.ts +179 -52
  119. package/src/lsp/index.ts +38 -4
  120. package/src/lsp/render.ts +3 -3
  121. package/src/lsp/types.ts +10 -0
  122. package/src/main.ts +47 -52
  123. package/src/memory-backend/index.ts +13 -1
  124. package/src/memory-backend/resolve.ts +3 -5
  125. package/src/memory-backend/types.ts +1 -1
  126. package/src/modes/components/agent-dashboard.ts +13 -4
  127. package/src/modes/components/assistant-message.ts +22 -1
  128. package/src/modes/components/copy-selector.ts +249 -0
  129. package/src/modes/components/custom-editor.ts +10 -1
  130. package/src/modes/components/extensions/extension-list.ts +17 -8
  131. package/src/modes/components/history-search.ts +19 -11
  132. package/src/modes/components/model-selector.ts +125 -29
  133. package/src/modes/components/oauth-selector.ts +28 -12
  134. package/src/modes/components/session-observer-overlay.ts +13 -15
  135. package/src/modes/components/session-selector.ts +24 -13
  136. package/src/modes/components/status-line.ts +3 -5
  137. package/src/modes/components/tool-execution.ts +83 -24
  138. package/src/modes/components/tree-selector.ts +19 -7
  139. package/src/modes/components/user-message-selector.ts +25 -14
  140. package/src/modes/controllers/command-controller.ts +13 -118
  141. package/src/modes/controllers/event-controller.ts +26 -10
  142. package/src/modes/controllers/input-controller.ts +11 -3
  143. package/src/modes/controllers/selector-controller.ts +40 -3
  144. package/src/modes/index.ts +5 -4
  145. package/src/modes/interactive-mode.ts +21 -7
  146. package/src/modes/setup-version.ts +11 -0
  147. package/src/modes/setup-wizard/index.ts +3 -2
  148. package/src/modes/setup-wizard/scenes/web-search.ts +3 -2
  149. package/src/modes/theme/theme.ts +46 -10
  150. package/src/modes/types.ts +2 -2
  151. package/src/modes/utils/context-usage.ts +10 -6
  152. package/src/modes/utils/copy-targets.ts +254 -0
  153. package/src/modes/utils/hotkeys-markdown.ts +1 -0
  154. package/src/prompts/tools/ast-edit.md +1 -1
  155. package/src/prompts/tools/ast-grep.md +1 -1
  156. package/src/prompts/tools/read.md +1 -1
  157. package/src/prompts/tools/search.md +1 -1
  158. package/src/sdk.ts +21 -23
  159. package/src/session/agent-session.ts +13 -9
  160. package/src/slash-commands/builtin-registry.ts +4 -12
  161. package/src/slash-commands/helpers/usage-report.ts +2 -0
  162. package/src/task/executor.ts +20 -2
  163. package/src/task/render.ts +37 -11
  164. package/src/telemetry-export.ts +25 -7
  165. package/src/tools/bash.ts +18 -8
  166. package/src/tools/browser/render.ts +5 -4
  167. package/src/tools/debug.ts +3 -3
  168. package/src/tools/eval-backends.ts +6 -17
  169. package/src/tools/eval-render.ts +28 -10
  170. package/src/tools/eval.ts +19 -23
  171. package/src/tools/fetch.ts +99 -89
  172. package/src/tools/read.ts +7 -7
  173. package/src/tools/render-utils.ts +63 -3
  174. package/src/tools/renderers.ts +16 -1
  175. package/src/tools/report-tool-issue.ts +1 -1
  176. package/src/tools/search.ts +173 -81
  177. package/src/tools/ssh.ts +21 -8
  178. package/src/tools/todo.ts +20 -7
  179. package/src/tools/write.ts +39 -9
  180. package/src/tui/code-cell.ts +19 -4
  181. package/src/tui/output-block.ts +14 -0
  182. package/src/web/scrapers/github.ts +255 -3
  183. package/src/web/scrapers/youtube.ts +3 -2
  184. package/src/web/search/providers/perplexity.ts +199 -51
  185. package/src/web/search/render.ts +42 -57
  186. package/src/web/search/types.ts +5 -1
  187. package/dist/types/eval/heartbeat.d.ts +0 -45
  188. package/src/eval/__tests__/heartbeat.test.ts +0 -84
  189. package/src/eval/__tests__/shared-executors.test.ts +0 -609
  190. package/src/eval/heartbeat.ts +0 -74
  191. /package/dist/types/eval/__tests__/{heartbeat.test.d.ts → bridge-timeout.test.d.ts} +0 -0
  192. /package/dist/types/eval/__tests__/{shared-executors.test.d.ts → kernel-spawn.test.d.ts} +0 -0
@@ -10,7 +10,7 @@ import { AgentOutputManager } from "../../task/output-manager";
10
10
  import type { AgentDefinition, AgentProgress, SingleResult } from "../../task/types";
11
11
  import type { ToolSession } from "../../tools";
12
12
  import { EVAL_AGENT_MAX_DEPTH, runEvalAgent } from "../agent-bridge";
13
- import { EVAL_HEARTBEAT_OP, setBridgeHeartbeatIntervalMs } from "../heartbeat";
13
+ import { EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP } from "../bridge-timeout";
14
14
  import { IdleTimeout } from "../idle-timeout";
15
15
  import { disposeAllVmContexts } from "../js/context-manager";
16
16
  import { executeJs } from "../js/executor";
@@ -231,12 +231,62 @@ describe("runEvalAgent", () => {
231
231
  });
232
232
  await expect(runEvalAgent({ prompt: "fail" }, { session: makeSession() })).rejects.toThrow("boom");
233
233
  });
234
+
235
+ // Regression: a runtime-limit abort returns exitCode=1, stderr="", error=undefined,
236
+ // aborted=true, abortReason="Subagent runtime limit exceeded (...)". The previous
237
+ // failure-message coalesce stopped at the empty `stderr` (since `??` only skips
238
+ // nullish values) and shipped an empty error through the bridge — Python then
239
+ // surfaced the generic `bridge call '__agent__' failed`. See #2006.
240
+ it("surfaces abortReason for aborts that leave stderr empty", async () => {
241
+ mockAgents();
242
+ const runSpy = vi.spyOn(taskExecutor, "runSubprocess");
243
+ runSpy.mockImplementationOnce(async options =>
244
+ singleResult(options, {
245
+ exitCode: 1,
246
+ output: "",
247
+ stderr: "",
248
+ error: undefined,
249
+ aborted: true,
250
+ abortReason: "Subagent runtime limit exceeded (task.maxRuntimeMs=900000)",
251
+ }),
252
+ );
253
+ runSpy.mockImplementationOnce(async options =>
254
+ singleResult(options, {
255
+ exitCode: 1,
256
+ output: "",
257
+ stderr: " ",
258
+ error: " ",
259
+ aborted: true,
260
+ abortReason: "Cancelled by caller",
261
+ }),
262
+ );
263
+ runSpy.mockImplementationOnce(async options =>
264
+ singleResult(options, {
265
+ exitCode: 1,
266
+ output: "",
267
+ stderr: "",
268
+ error: undefined,
269
+ }),
270
+ );
271
+
272
+ await expect(runEvalAgent({ prompt: "slow" }, { session: makeSession() })).rejects.toThrow(
273
+ "Subagent runtime limit exceeded (task.maxRuntimeMs=900000)",
274
+ );
275
+ // Whitespace-only stderr/error must not mask abortReason either.
276
+ await expect(runEvalAgent({ prompt: "cancelled" }, { session: makeSession() })).rejects.toThrow(
277
+ "Cancelled by caller",
278
+ );
279
+ // Last resort: still produce a non-empty message even when nothing useful is set,
280
+ // so Python never falls back to `bridge call '__agent__' failed`.
281
+ await expect(runEvalAgent({ prompt: "blank" }, { session: makeSession() })).rejects.toThrow(
282
+ "agent() subagent 'task' failed.",
283
+ );
284
+ });
234
285
  });
235
286
 
236
287
  describe("agent() through eval runtimes", () => {
237
288
  afterEach(() => {
238
289
  vi.restoreAllMocks();
239
- setBridgeHeartbeatIntervalMs();
240
290
  });
241
291
 
242
292
  afterAll(async () => {
@@ -327,18 +377,6 @@ describe("agent() through eval runtimes", () => {
327
377
  singleResult(options, { output: "hello from python" }),
328
378
  );
329
379
 
330
- const probe = await executePython('print("probe")', {
331
- cwd: tempDir.path(),
332
- sessionId: `${sessionId}:probe`,
333
- sessionFile,
334
- kernelMode: "per-call",
335
- });
336
- if (probe.exitCode === undefined && probe.cancelled) {
337
- expect(probe.output).toBe("");
338
- return;
339
- }
340
- expect(probe.exitCode).toBe(0);
341
-
342
380
  const result = await executePython('print(agent("hi"))', {
343
381
  cwd: tempDir.path(),
344
382
  sessionId,
@@ -346,6 +384,10 @@ describe("agent() through eval runtimes", () => {
346
384
  kernelMode: "per-call",
347
385
  toolSession: session,
348
386
  });
387
+ if (result.exitCode === undefined && result.cancelled) {
388
+ expect(result.output).toBe("");
389
+ return; // kernel unavailable in this environment
390
+ }
349
391
 
350
392
  expect(result.exitCode).toBe(0);
351
393
  expect(result.output.trim()).toBe("hello from python");
@@ -374,22 +416,14 @@ describe("agent() through eval runtimes", () => {
374
416
  }
375
417
  });
376
418
 
377
- const probe = await executePython('print("probe")', {
378
- cwd: tempDir.path(),
379
- sessionId: `${sessionId}:probe`,
380
- sessionFile,
381
- kernelMode: "per-call",
382
- });
383
- if (probe.exitCode === undefined && probe.cancelled) {
384
- expect(probe.output).toBe("");
385
- return;
386
- }
387
- expect(probe.exitCode).toBe(0);
388
-
389
419
  const result = await executePython(
390
420
  'import json\nprint(json.dumps(parallel([lambda n=n: agent(n) for n in ["a", "b", "c", "d"]])))',
391
421
  { cwd: tempDir.path(), sessionId, sessionFile, kernelMode: "per-call", toolSession: session },
392
422
  );
423
+ if (result.exitCode === undefined && result.cancelled) {
424
+ expect(result.output).toBe("");
425
+ return; // kernel unavailable in this environment
426
+ }
393
427
 
394
428
  expect(result.exitCode).toBe(0);
395
429
  expect(JSON.parse(result.output.trim())).toEqual(["a", "b", "c", "d"]);
@@ -413,7 +447,14 @@ describe("agent() through eval runtimes", () => {
413
447
  // The host must respond the instant the cell aborts so the kernel can
414
448
  // unwind via KeyboardInterrupt instead of being hard-killed (which used to
415
449
  // surface "[kernel] Python kernel shutdown" and lose all session state).
450
+ let inFlight = 0;
451
+ let markSaturated: (() => void) | undefined;
452
+ const saturated = new Promise<void>(resolve => {
453
+ markSaturated = resolve;
454
+ });
416
455
  vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
456
+ // task.maxConcurrency=6 → six bridge calls block at once; signal then.
457
+ if (++inFlight >= 6) markSaturated?.();
417
458
  await Bun.sleep(9000); // deliberately ignores options.signal
418
459
  return singleResult(options, { output: options.assignment ?? "" });
419
460
  });
@@ -433,8 +474,9 @@ describe("agent() through eval runtimes", () => {
433
474
  expect(seed.exitCode).toBe(0);
434
475
 
435
476
  const ac = new AbortController();
436
- // Abort ~1s in, after the worker threads are blocked in their bridge calls.
437
- setTimeout(() => ac.abort(new Error("external interrupt")), 1000);
477
+ // Abort the instant all six worker threads are confirmed blocked in their
478
+ // bridge calls (condition-driven) instead of waiting a fixed wall second.
479
+ void saturated.then(() => ac.abort(new Error("external interrupt")));
438
480
 
439
481
  const start = Date.now();
440
482
  const result = await executePython(
@@ -560,52 +602,52 @@ describe("agent() through eval runtimes", () => {
560
602
  expect(displayAgentEvents.length).toBe(2);
561
603
  });
562
604
 
563
- it("keeps the idle watchdog armed while a quiet agent() runs past the budget", async () => {
564
- using tempDir = TempDir.createSync("@omp-eval-agent-heartbeat-");
565
- const { session } = makeEvalSession(tempDir, "js-agent-heartbeat");
605
+ it("pauses the idle watchdog while a quiet agent() runs past the budget", async () => {
606
+ using tempDir = TempDir.createSync("@omp-eval-agent-timeout-pause-");
607
+ const { session } = makeEvalSession(tempDir, "js-agent-timeout-pause");
566
608
  mockAgents();
567
- // Heartbeat cadence well under the idle budget so a working-but-silent
568
- // subagent re-arms the watchdog several times before it could expire.
569
- setBridgeHeartbeatIntervalMs(15);
570
609
 
571
- // runSubprocess runs far past the budget and emits NO progress of its own
572
- // the only thing standing between the subagent and a spurious idle abort
573
- // is the heartbeat keepalive the bridge pumps while it awaits.
610
+ // runSubprocess runs far past the eval timeout budget and emits NO progress
611
+ // of its own. The bridge pause must make that delegated time invisible to
612
+ // the watchdog.
574
613
  vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
575
- await Bun.sleep(200);
614
+ await Bun.sleep(40);
576
615
  return singleResult(options, { output: "done" });
577
616
  });
578
617
 
579
- // Mirror the eval tool's wiring: an IdleTimeout drives cancellation and
580
- // ONLY a bridge heartbeat re-arms it.
581
- using idle = new IdleTimeout(60);
618
+ const ops: string[] = [];
619
+ using idle = new IdleTimeout(20);
582
620
  const result = await runEvalAgent(
583
621
  { prompt: "investigate" },
584
622
  {
585
623
  session,
586
624
  signal: idle.signal,
587
625
  emitStatus: event => {
588
- if (event.op === EVAL_HEARTBEAT_OP) idle.bump();
626
+ ops.push(event.op);
627
+ if (event.op === EVAL_TIMEOUT_PAUSE_OP) idle.pause();
628
+ if (event.op === EVAL_TIMEOUT_RESUME_OP) idle.resume();
589
629
  },
590
630
  },
591
631
  );
592
632
 
593
- expect(idle.signal.aborted).toBe(false);
594
633
  expect(result.text).toBe("done");
634
+ expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP]);
635
+ expect(idle.signal.aborted).toBe(false);
636
+
637
+ await Bun.sleep(60);
638
+ expect(idle.signal.aborted).toBe(true);
595
639
  });
596
640
 
597
- it("does not let agent() progress snapshots re-arm the watchdog without a heartbeat", async () => {
598
- using tempDir = TempDir.createSync("@omp-eval-agent-progress-no-rearm-");
599
- const { session } = makeEvalSession(tempDir, "js-agent-progress-no-rearm");
641
+ it("keeps timeout paused despite agent() progress snapshots", async () => {
642
+ using tempDir = TempDir.createSync("@omp-eval-agent-progress-timeout-pause-");
643
+ const { session } = makeEvalSession(tempDir, "js-agent-progress-timeout-pause");
600
644
  mockAgents();
601
- // Heartbeat slower than the budget: only the immediate beat at call start
602
- // fires, so after the budget elapses nothing re-arms the watchdog.
603
- setBridgeHeartbeatIntervalMs(10_000);
604
645
 
605
646
  // Stream frequent progress snapshots (op:"agent") for well past the budget.
606
- // Progress is rendered but MUST NOT count as activity — only heartbeats do.
647
+ // They render as status, but timeout accounting is controlled only by the
648
+ // bridge pause/resume events.
607
649
  vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
608
- for (let i = 0; i < 40; i++) {
650
+ for (let i = 0; i < 20; i++) {
609
651
  options.onProgress?.({
610
652
  index: options.index,
611
653
  id: options.id,
@@ -622,28 +664,30 @@ describe("agent() through eval runtimes", () => {
622
664
  cost: 0,
623
665
  durationMs: i * 10,
624
666
  });
625
- await Bun.sleep(10);
667
+ await Bun.sleep(5);
626
668
  }
627
669
  return singleResult(options, { output: "done" });
628
670
  });
629
671
 
630
672
  const ops: string[] = [];
631
- using idle = new IdleTimeout(80);
632
- await runEvalAgent(
673
+ using idle = new IdleTimeout(40);
674
+ const result = await runEvalAgent(
633
675
  { prompt: "investigate" },
634
676
  {
635
677
  session,
636
678
  signal: idle.signal,
637
679
  emitStatus: event => {
638
680
  ops.push(event.op);
639
- if (event.op === EVAL_HEARTBEAT_OP) idle.bump();
681
+ if (event.op === EVAL_TIMEOUT_PAUSE_OP) idle.pause();
682
+ if (event.op === EVAL_TIMEOUT_RESUME_OP) idle.resume();
640
683
  },
641
684
  },
642
685
  );
643
686
 
644
- // Progress streamed, but the watchdog still fired: agent snapshots never
645
- // re-armed it, and the lone start heartbeat lapsed before the call ended.
687
+ expect(result.text).toBe("done");
688
+ expect(ops[0]).toBe(EVAL_TIMEOUT_PAUSE_OP);
646
689
  expect(ops).toContain("agent");
647
- expect(idle.signal.aborted).toBe(true);
690
+ expect(ops.at(-1)).toBe(EVAL_TIMEOUT_RESUME_OP);
691
+ expect(idle.signal.aborted).toBe(false);
648
692
  });
649
693
  });
@@ -0,0 +1,64 @@
1
+ import { describe, expect, it } from "bun:test";
2
+ import {
3
+ EVAL_TIMEOUT_PAUSE_OP,
4
+ EVAL_TIMEOUT_RESUME_OP,
5
+ isEvalTimeoutControlEvent,
6
+ withBridgeTimeoutPause,
7
+ } from "../bridge-timeout";
8
+ import type { JsStatusEvent } from "../js/shared/types";
9
+
10
+ describe("withBridgeTimeoutPause", () => {
11
+ it("emits one pause before the operation and one resume after it settles", async () => {
12
+ const events: JsStatusEvent[] = [];
13
+
14
+ const value = await withBridgeTimeoutPause(
15
+ event => events.push(event),
16
+ async () => {
17
+ await Bun.sleep(80);
18
+ return "done";
19
+ },
20
+ );
21
+
22
+ expect(value).toBe("done");
23
+ expect(events.map(event => event.op)).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP]);
24
+
25
+ const settledCount = events.length;
26
+ await Bun.sleep(40);
27
+ expect(events.length).toBe(settledCount);
28
+ });
29
+
30
+ it("resumes timeout accounting even when the operation throws", async () => {
31
+ const events: JsStatusEvent[] = [];
32
+
33
+ await expect(
34
+ withBridgeTimeoutPause(
35
+ event => events.push(event),
36
+ async () => {
37
+ await Bun.sleep(20);
38
+ throw new Error("boom");
39
+ },
40
+ ),
41
+ ).rejects.toThrow("boom");
42
+
43
+ expect(events.map(event => event.op)).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP]);
44
+ });
45
+
46
+ it("runs the operation without emitting when no status sink is wired", async () => {
47
+ let ran = 0;
48
+
49
+ const value = await withBridgeTimeoutPause(undefined, async () => {
50
+ ran++;
51
+ await Bun.sleep(20);
52
+ return 42;
53
+ });
54
+
55
+ expect(value).toBe(42);
56
+ expect(ran).toBe(1);
57
+ });
58
+
59
+ it("identifies timeout-control events as non-renderable status", () => {
60
+ expect(isEvalTimeoutControlEvent({ op: EVAL_TIMEOUT_PAUSE_OP })).toBe(true);
61
+ expect(isEvalTimeoutControlEvent({ op: EVAL_TIMEOUT_RESUME_OP })).toBe(true);
62
+ expect(isEvalTimeoutControlEvent({ op: "agent", id: "subagent-1" })).toBe(false);
63
+ });
64
+ });
@@ -32,21 +32,34 @@ describe("IdleTimeout", () => {
32
32
  expect((idle.signal.reason as DOMException).name).toBe("TimeoutError");
33
33
  });
34
34
 
35
- it("re-arms on every bump and only fires after activity stops", async () => {
36
- using idle = new IdleTimeout(150);
37
- // Bump well past a single window; each bump must push the deadline forward
38
- // so the watchdog never trips while activity continues.
39
- for (let i = 0; i < 6; i++) {
40
- await Bun.sleep(40);
41
- idle.bump();
42
- }
35
+ it("ignores elapsed time while paused and resumes with a fresh window", async () => {
36
+ using idle = new IdleTimeout(80);
37
+ idle.pause();
38
+ await Bun.sleep(160);
43
39
  expect(idle.signal.aborted).toBe(false);
44
40
 
45
- // Activity stopped — the watchdog should now fire within roughly one window.
46
- const fired = await abortedWithin(idle.signal, 800);
41
+ idle.resume();
42
+ const firedEarly = await abortedWithin(idle.signal, 30);
43
+ expect(firedEarly).toBe(false);
44
+ const fired = await abortedWithin(idle.signal, 500);
47
45
  expect(fired).toBe(true);
48
46
  });
49
47
 
48
+ it("reference-counts overlapping pauses", async () => {
49
+ using idle = new IdleTimeout(60);
50
+ idle.pause();
51
+ idle.pause();
52
+ await Bun.sleep(120);
53
+ expect(idle.signal.aborted).toBe(false);
54
+
55
+ idle.resume();
56
+ await Bun.sleep(90);
57
+ expect(idle.signal.aborted).toBe(false);
58
+
59
+ idle.resume();
60
+ const fired = await abortedWithin(idle.signal, 500);
61
+ expect(fired).toBe(true);
62
+ });
50
63
  it("never fires after dispose()", async () => {
51
64
  const idle = new IdleTimeout(30);
52
65
  idle.dispose();
@@ -55,12 +68,13 @@ describe("IdleTimeout", () => {
55
68
  expect(idle.signal.aborted).toBe(false);
56
69
  });
57
70
 
58
- it("ignores bump() after the watchdog has already fired", async () => {
71
+ it("ignores pause/resume after the watchdog has already fired", async () => {
59
72
  using idle = new IdleTimeout(30);
60
73
  await abortedWithin(idle.signal, 500);
61
74
  expect(idle.signal.aborted).toBe(true);
62
75
  // Late activity must not un-abort or rearm a settled watchdog.
63
- idle.bump();
76
+ idle.pause();
77
+ idle.resume();
64
78
  expect(idle.signal.aborted).toBe(true);
65
79
  });
66
80
  });
@@ -0,0 +1,103 @@
1
+ import { afterEach, describe, expect, it } from "bun:test";
2
+ import {
3
+ __resetWindowsConsoleProbeCache,
4
+ consoleAttachedViaTTY,
5
+ hostHasInheritableConsole,
6
+ shouldHideKernelWindow,
7
+ } from "../py/spawn-options";
8
+
9
+ /**
10
+ * `shouldHideKernelWindow` decides whether the long-lived Python kernel
11
+ * subprocess is spawned with `windowsHide: true`. On Windows, Bun maps that
12
+ * option to `CREATE_NO_WINDOW`, which detaches the child from any inherited
13
+ * console — breaking both (a) `LoadLibraryExW` for NumPy/pandas native
14
+ * extensions and (b) SIGINT delivery via `GenerateConsoleCtrlEvent`. See
15
+ * issue #1960. The tests below pin the three layered concerns the PR review
16
+ * surfaced:
17
+ *
18
+ * 1. `shouldHideKernelWindow` — pure predicate over a single boolean.
19
+ * 2. `consoleAttachedViaTTY` — the TTY-OR fallback used when the Win32 FFI
20
+ * probe is unavailable; covers the partial-redirection cases.
21
+ * 3. `hostHasInheritableConsole` — the integration boundary. Off-Windows it
22
+ * short-circuits to the TTY fallback; on Windows it is expected to
23
+ * consult `kernel32!GetConsoleWindow()` first, which is the authoritative
24
+ * signal even for the all-stdio-redirected case.
25
+ */
26
+ describe("shouldHideKernelWindow", () => {
27
+ it("inherits the host console on Windows when one is attached", () => {
28
+ // Reporter's repro: omp launched in Windows Terminal, host has a
29
+ // console, kernel must inherit so `import pandas` doesn't deadlock in
30
+ // `_multiarray_umath` and SIGINT can recover the cell.
31
+ expect(shouldHideKernelWindow({ platform: "win32", hostHasInheritableConsole: true })).toBe(false);
32
+ });
33
+
34
+ it("hides on Windows only when the host has no console at all (true service / daemon)", () => {
35
+ // CREATE_NO_WINDOW here suppresses the console window Windows would
36
+ // otherwise auto-allocate for the console-app Python kernel.
37
+ expect(shouldHideKernelWindow({ platform: "win32", hostHasInheritableConsole: false })).toBe(true);
38
+ });
39
+
40
+ it("never sets windowsHide off-Windows (the option is a Win32-only flag)", () => {
41
+ // On POSIX `windowsHide` is a no-op; the predicate must return false
42
+ // everywhere off-Windows so the spawn site matches pre-fix behavior.
43
+ expect(shouldHideKernelWindow({ platform: "linux", hostHasInheritableConsole: true })).toBe(false);
44
+ expect(shouldHideKernelWindow({ platform: "linux", hostHasInheritableConsole: false })).toBe(false);
45
+ expect(shouldHideKernelWindow({ platform: "darwin", hostHasInheritableConsole: true })).toBe(false);
46
+ expect(shouldHideKernelWindow({ platform: "darwin", hostHasInheritableConsole: false })).toBe(false);
47
+ });
48
+ });
49
+
50
+ describe("consoleAttachedViaTTY (FFI fallback heuristic)", () => {
51
+ // The OR of three TTY signals correctly classifies the realistic shell
52
+ // redirection scenarios that motivated widening the check beyond stdout
53
+ // in the first review pass (PR #1961). The all-three-redirected case
54
+ // (false here) is the gap that the Win32 FFI probe in
55
+ // `hostHasInheritableConsole` is meant to close — this fallback is best-
56
+ // effort.
57
+
58
+ it("treats a fully interactive launch as console-attached", () => {
59
+ expect(consoleAttachedViaTTY({ stdinIsTTY: true, stdoutIsTTY: true, stderrIsTTY: true })).toBe(true);
60
+ });
61
+
62
+ it("treats `omp -p '...' > out.txt` (stdout-only redirect) as console-attached", () => {
63
+ // The reviewer's first-pass repro: stdout off the terminal, stdin
64
+ // and stderr still attached. OR keeps the console.
65
+ expect(consoleAttachedViaTTY({ stdinIsTTY: true, stdoutIsTTY: false, stderrIsTTY: true })).toBe(true);
66
+ });
67
+
68
+ it("treats stdin-only redirects (`< in.txt`) as console-attached", () => {
69
+ expect(consoleAttachedViaTTY({ stdinIsTTY: false, stdoutIsTTY: true, stderrIsTTY: true })).toBe(true);
70
+ });
71
+
72
+ it("treats stderr-only redirects (`2> err.log`) as console-attached", () => {
73
+ expect(consoleAttachedViaTTY({ stdinIsTTY: true, stdoutIsTTY: true, stderrIsTTY: false })).toBe(true);
74
+ });
75
+
76
+ it("returns false only when none of stdin/stdout/stderr is a TTY", () => {
77
+ // This is the gap: a real Windows Terminal session with all three
78
+ // streams redirected (`omp ... < in > out 2> err`) lands here.
79
+ // `hostHasInheritableConsole` uses the Win32 FFI probe to recover
80
+ // the right answer in that scenario; this helper is the fallback.
81
+ expect(consoleAttachedViaTTY({ stdinIsTTY: false, stdoutIsTTY: false, stderrIsTTY: false })).toBe(false);
82
+ });
83
+ });
84
+
85
+ describe("hostHasInheritableConsole", () => {
86
+ afterEach(() => {
87
+ __resetWindowsConsoleProbeCache();
88
+ });
89
+
90
+ if (process.platform !== "win32") {
91
+ it("matches the TTY-OR fallback off-Windows", () => {
92
+ // Off-Windows, `windowsHide` is a no-op anyway, but we still
93
+ // expose `hostHasInheritableConsole` symmetrically. Confirm it
94
+ // degrades to the same OR the call site would compute by hand.
95
+ const tty = consoleAttachedViaTTY({
96
+ stdinIsTTY: !!process.stdin.isTTY,
97
+ stdoutIsTTY: !!process.stdout.isTTY,
98
+ stderrIsTTY: !!process.stderr.isTTY,
99
+ });
100
+ expect(hostHasInheritableConsole()).toBe(tty);
101
+ });
102
+ }
103
+ });
@@ -8,7 +8,7 @@ import type { ModelRegistry } from "../../config/model-registry";
8
8
  import { Settings } from "../../config/settings";
9
9
  import type { ToolSession } from "../../tools";
10
10
  import { ToolError } from "../../tools/tool-errors";
11
- import { EVAL_HEARTBEAT_OP, setBridgeHeartbeatIntervalMs } from "../heartbeat";
11
+ import { EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP } from "../bridge-timeout";
12
12
  import { IdleTimeout } from "../idle-timeout";
13
13
  import { disposeAllVmContexts } from "../js/context-manager";
14
14
  import { executeJs } from "../js/executor";
@@ -99,7 +99,6 @@ function assistant(opts: {
99
99
  describe("runEvalLlm", () => {
100
100
  afterEach(() => {
101
101
  vi.restoreAllMocks();
102
- setBridgeHeartbeatIntervalMs();
103
102
  });
104
103
 
105
104
  it("resolves each tier to its expected model", async () => {
@@ -217,31 +216,32 @@ describe("runEvalLlm", () => {
217
216
  );
218
217
  });
219
218
 
220
- it("keeps the idle watchdog armed while a slow llm() request is in flight", async () => {
221
- // A oneshot completion emits no status until it returns; a slow request
222
- // must not look like a stalled cell. The bridge pumps a heartbeat while it
223
- // awaits, re-arming the watchdog through emitStatus.
224
- setBridgeHeartbeatIntervalMs(15);
219
+ it("pauses the idle watchdog while a slow llm() request is in flight", async () => {
220
+ // A oneshot completion emits no status until it returns; delegated model
221
+ // time must be invisible to the eval timeout budget.
225
222
  vi.spyOn(ai, "completeSimple").mockImplementation(async () => {
226
223
  await Bun.sleep(200);
227
224
  return assistant({ text: "the answer" });
228
225
  });
229
226
 
227
+ const ops: string[] = [];
230
228
  using idle = new IdleTimeout(60);
231
229
  const result = await runEvalLlm(
232
230
  { prompt: "q", model: "smol" },
233
231
  {
234
232
  session: makeSession(),
235
233
  signal: idle.signal,
236
- // Mirror the eval tool: only a bridge heartbeat re-arms the watchdog.
237
234
  emitStatus: event => {
238
- if (event.op === EVAL_HEARTBEAT_OP) idle.bump();
235
+ ops.push(event.op);
236
+ if (event.op === EVAL_TIMEOUT_PAUSE_OP) idle.pause();
237
+ if (event.op === EVAL_TIMEOUT_RESUME_OP) idle.resume();
239
238
  },
240
239
  },
241
240
  );
242
241
 
243
- expect(idle.signal.aborted).toBe(false);
244
242
  expect(result.text).toBe("the answer");
243
+ expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "llm"]);
244
+ expect(idle.signal.aborted).toBe(false);
245
245
  });
246
246
  });
247
247
 
@@ -13,10 +13,10 @@ import subagentUserPromptTemplate from "../prompts/system/subagent-user-prompt.m
13
13
  import * as taskDiscovery from "../task/discovery";
14
14
  import * as taskExecutor from "../task/executor";
15
15
  import { AgentOutputManager } from "../task/output-manager";
16
- import type { AgentDefinition, AgentProgress } from "../task/types";
16
+ import type { AgentDefinition, AgentProgress, SingleResult } from "../task/types";
17
17
  import type { ToolSession } from "../tools";
18
18
  import { ToolError } from "../tools/tool-errors";
19
- import { withBridgeHeartbeat } from "./heartbeat";
19
+ import { withBridgeTimeoutPause } from "./bridge-timeout";
20
20
  import type { JsStatusEvent } from "./js/shared/types";
21
21
  // Import review tools for side effects (registers subagent tool handlers).
22
22
  import "../tools/review";
@@ -173,6 +173,26 @@ function emitProgressStatus(emitStatus: ((event: JsStatusEvent) => void) | undef
173
173
  });
174
174
  }
175
175
 
176
+ /**
177
+ * Coalesce a subagent failure into a non-empty, human-meaningful error message.
178
+ *
179
+ * When the executor aborts a subagent (runtime limit, parent cancellation, …)
180
+ * the actionable explanation lives on `abortReason`, while `error`/`stderr`
181
+ * are routinely empty strings. Plain `??` coalescing stops at the empty string
182
+ * and ships an empty error through the bridge — Python then surfaces only the
183
+ * generic `bridge call '__agent__' failed`. See #2006.
184
+ */
185
+ function buildSubagentFailureMessage(agentName: string, result: SingleResult): string {
186
+ const abortReason = trimToUndefined(result.abortReason);
187
+ if (result.aborted && abortReason) return abortReason;
188
+ return (
189
+ trimToUndefined(result.error) ??
190
+ trimToUndefined(result.stderr) ??
191
+ abortReason ??
192
+ `agent() subagent '${agentName}' failed.`
193
+ );
194
+ }
195
+
176
196
  /**
177
197
  * Run a single subagent on behalf of an eval cell's `agent()` call.
178
198
  */
@@ -225,17 +245,15 @@ export async function runEvalAgent(args: unknown, options: EvalAgentBridgeOption
225
245
  getSessionId: options.session.getSessionId ?? (() => null),
226
246
  };
227
247
  const parentArtifactManager = options.session.getArtifactManager?.() ?? undefined;
228
- const parentEvalSessionId = options.session.getEvalSessionId?.() ?? undefined;
229
248
  const mcpManager = options.session.mcpManager ?? MCPManager.instance();
230
249
  const { sessionFile, artifactsDir, contextFile } = await getArtifacts(options.session);
231
250
  const outputManager = getOutputManager(options.session);
232
251
  const id = await outputManager.allocate(outputIdBase(parsed.label, agentName));
233
252
  const assignment = parsed.prompt.trim();
234
253
  const context = trimToUndefined(parsed.context);
235
- // Pump a heartbeat while the subagent runs so the eval idle watchdog stays
236
- // armed across quiet stretches (time-to-first-token, long nested tools)
237
- // where `onProgress` would otherwise emit no status to re-arm it.
238
- const result = await withBridgeHeartbeat(options.emitStatus, () =>
254
+ // Suspend eval timeout accounting while the subagent owns control. The
255
+ // timeout clock restarts once the bridge returns to the cell runtime.
256
+ const result = await withBridgeTimeoutPause(options.emitStatus, () =>
239
257
  taskExecutor.runSubprocess({
240
258
  cwd: options.session.cwd,
241
259
  agent: effectiveAgent,
@@ -261,6 +279,12 @@ export async function runEvalAgent(args: unknown, options: EvalAgentBridgeOption
261
279
  authStorage: options.session.authStorage,
262
280
  modelRegistry: options.session.modelRegistry,
263
281
  settings: options.session.settings,
282
+ // Eval `agent()` subagents are never wall-clock capped: the parent
283
+ // cell's idle watchdog is suspended for the whole bridge call
284
+ // (withBridgeTimeoutPause), so a long-running phase/recovery workflow
285
+ // must not be killed by `task.maxRuntimeMs`. Force the limit off
286
+ // regardless of the inherited session setting.
287
+ maxRuntimeMs: 0,
264
288
  mcpManager,
265
289
  contextFiles,
266
290
  skills: availableSkills,
@@ -272,14 +296,16 @@ export async function runEvalAgent(args: unknown, options: EvalAgentBridgeOption
272
296
  parentHindsightSessionState: options.session.getHindsightSessionState?.(),
273
297
  parentMnemopiSessionState: options.session.getMnemopiSessionState?.(),
274
298
  parentTelemetry: options.session.getTelemetry?.(),
275
- parentEvalSessionId,
299
+ // Deliberately omit parentEvalSessionId: the parent's Python kernel is
300
+ // blocked on this bridge call, so sharing the eval session would deadlock
301
+ // (subagent queues behind the parent's in-flight execution, parent waits
302
+ // for subagent → circular). Each bridge-spawned subagent gets its own
303
+ // eval session with an independent kernel.
276
304
  }),
277
305
  );
278
306
 
279
- if (result.exitCode !== 0 || result.error) {
280
- const failureMessage =
281
- result.error ?? result.stderr ?? result.abortReason ?? `agent() subagent '${agentName}' failed.`;
282
- throw new ToolError(failureMessage);
307
+ if (result.exitCode !== 0 || result.error || result.aborted) {
308
+ throw new ToolError(buildSubagentFailureMessage(agentName, result));
283
309
  }
284
310
 
285
311
  options.session.recordEvalSubagentUsage?.(result.usage?.output ?? 0);
@@ -10,12 +10,12 @@ export interface ExecutorBackendExecOptions {
10
10
  signal?: AbortSignal;
11
11
  session: ToolSession;
12
12
  /**
13
- * Inactivity budget in milliseconds (the cell's `timeout`). Cancellation is
14
- * driven entirely by `signal`, which the eval tool arms as an idle watchdog
15
- * that fires a `TimeoutError` reason after this much time with no progress
16
- * (status) events. Backends use this value only for timeout-annotation text
17
- * and as cold-start headroom; they MUST NOT derive a competing wall-clock
18
- * timer from it.
13
+ * Runtime-work budget in milliseconds (the cell's `timeout`). Cancellation is
14
+ * driven entirely by `signal`, which the eval tool arms as a watchdog that
15
+ * pauses on bridge timeout-control status events and fires a `TimeoutError`
16
+ * reason only while the Python/JS runtime owns control. Backends use this
17
+ * value only for timeout-annotation text and as cold-start headroom; they MUST
18
+ * NOT derive a competing wall-clock timer from it.
19
19
  */
20
20
  idleTimeoutMs: number;
21
21
  reset: boolean;