@oh-my-pi/pi-coding-agent 15.10.4 → 15.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +52 -0
  2. package/dist/types/capability/rule-buckets.d.ts +1 -1
  3. package/dist/types/capability/rule.d.ts +6 -1
  4. package/dist/types/cli/update-cli.d.ts +11 -1
  5. package/dist/types/config/model-registry.d.ts +18 -1
  6. package/dist/types/discovery/at-imports.d.ts +15 -0
  7. package/dist/types/edit/diff.d.ts +3 -2
  8. package/dist/types/eval/__tests__/helpers-local-roots.test.d.ts +1 -0
  9. package/dist/types/eval/backend.d.ts +7 -0
  10. package/dist/types/eval/js/context-manager.d.ts +1 -0
  11. package/dist/types/eval/js/executor.d.ts +2 -0
  12. package/dist/types/eval/js/index.d.ts +1 -1
  13. package/dist/types/eval/js/shared/helpers.d.ts +6 -0
  14. package/dist/types/eval/js/shared/runtime.d.ts +5 -0
  15. package/dist/types/eval/js/worker-protocol.d.ts +6 -0
  16. package/dist/types/eval/py/executor.d.ts +7 -0
  17. package/dist/types/eval/py/index.d.ts +1 -1
  18. package/dist/types/export/ttsr.d.ts +14 -0
  19. package/dist/types/extensibility/extensions/types.d.ts +8 -1
  20. package/dist/types/extensibility/legacy-pi-ai-shim.d.ts +1 -1
  21. package/dist/types/internal-urls/local-protocol.d.ts +10 -0
  22. package/dist/types/mcp/oauth-flow.d.ts +2 -2
  23. package/dist/types/modes/components/custom-editor.d.ts +3 -0
  24. package/dist/types/modes/components/{status-line.d.ts → status-line/component.d.ts} +2 -32
  25. package/dist/types/modes/components/status-line/index.d.ts +1 -0
  26. package/dist/types/modes/components/status-line/types.d.ts +31 -2
  27. package/dist/types/modes/image-references.d.ts +8 -3
  28. package/dist/types/modes/interactive-mode.d.ts +1 -1
  29. package/dist/types/modes/theme/theme.d.ts +2 -1
  30. package/dist/types/modes/types.d.ts +2 -1
  31. package/dist/types/modes/utils/ui-helpers.d.ts +2 -2
  32. package/dist/types/session/agent-session.d.ts +0 -2
  33. package/dist/types/tools/ask.d.ts +1 -0
  34. package/dist/types/tools/browser/tab-worker.d.ts +15 -0
  35. package/dist/types/tools/index.d.ts +17 -0
  36. package/dist/types/tools/render-utils.d.ts +1 -1
  37. package/dist/types/tools/tool-timeouts.d.ts +1 -1
  38. package/dist/types/utils/block-context.d.ts +35 -0
  39. package/dist/types/utils/image-loading.d.ts +12 -0
  40. package/package.json +29 -9
  41. package/src/capability/rule-buckets.ts +4 -2
  42. package/src/capability/rule.ts +10 -1
  43. package/src/cli/auth-broker-cli.ts +6 -7
  44. package/src/cli/auth-gateway-cli.ts +1 -1
  45. package/src/cli/list-models.ts +5 -0
  46. package/src/cli/update-cli.ts +138 -16
  47. package/src/config/model-registry.ts +81 -2
  48. package/src/debug/index.ts +4 -8
  49. package/src/discovery/at-imports.ts +273 -0
  50. package/src/discovery/builtin-rules/index.ts +4 -0
  51. package/src/discovery/builtin-rules/ts-no-test-timers.md +55 -0
  52. package/src/discovery/builtin-rules/ts-redundant-clear-guard.md +75 -0
  53. package/src/discovery/helpers.ts +2 -1
  54. package/src/edit/diff.ts +114 -4
  55. package/src/edit/hashline/diff.ts +1 -1
  56. package/src/edit/hashline/execute.ts +1 -1
  57. package/src/edit/modes/patch.ts +6 -2
  58. package/src/edit/modes/replace.ts +1 -1
  59. package/src/edit/renderer.ts +12 -2
  60. package/src/eval/__tests__/helpers-local-roots.test.ts +58 -0
  61. package/src/eval/backend.ts +15 -0
  62. package/src/eval/js/context-manager.ts +4 -2
  63. package/src/eval/js/executor.ts +3 -0
  64. package/src/eval/js/index.ts +7 -1
  65. package/src/eval/js/shared/helpers.ts +53 -6
  66. package/src/eval/js/shared/runtime.ts +8 -0
  67. package/src/eval/js/worker-core.ts +1 -0
  68. package/src/eval/js/worker-protocol.ts +6 -0
  69. package/src/eval/py/executor.ts +12 -0
  70. package/src/eval/py/index.ts +7 -1
  71. package/src/eval/py/prelude.py +43 -4
  72. package/src/eval/py/runner.py +1 -0
  73. package/src/exa/render.ts +1 -1
  74. package/src/export/ttsr.ts +122 -1
  75. package/src/extensibility/extensions/types.ts +8 -1
  76. package/src/extensibility/legacy-pi-ai-shim.ts +1 -1
  77. package/src/extensibility/plugins/doctor.ts +1 -1
  78. package/src/extensibility/plugins/legacy-pi-compat.ts +6 -5
  79. package/src/goals/tools/goal-tool.ts +1 -1
  80. package/src/internal-urls/docs-index.generated.ts +6 -5
  81. package/src/internal-urls/local-protocol.ts +13 -0
  82. package/src/lsp/render.ts +8 -6
  83. package/src/mcp/oauth-flow.ts +3 -3
  84. package/src/mcp/render.ts +7 -1
  85. package/src/modes/components/custom-editor.ts +12 -6
  86. package/src/modes/components/login-dialog.ts +1 -1
  87. package/src/modes/components/oauth-selector.ts +4 -4
  88. package/src/modes/components/read-tool-group.ts +10 -3
  89. package/src/modes/components/{status-line.ts → status-line/component.ts} +18 -40
  90. package/src/modes/components/status-line/index.ts +1 -0
  91. package/src/modes/components/status-line/types.ts +23 -8
  92. package/src/modes/components/tool-execution.ts +1 -1
  93. package/src/modes/components/transcript-container.ts +17 -10
  94. package/src/modes/components/user-message.ts +6 -3
  95. package/src/modes/components/welcome.ts +1 -1
  96. package/src/modes/controllers/extension-ui-controller.ts +143 -127
  97. package/src/modes/controllers/input-controller.ts +36 -10
  98. package/src/modes/controllers/mcp-command-controller.ts +28 -12
  99. package/src/modes/controllers/selector-controller.ts +4 -11
  100. package/src/modes/controllers/ssh-command-controller.ts +2 -2
  101. package/src/modes/image-references.ts +13 -7
  102. package/src/modes/interactive-mode.ts +2 -2
  103. package/src/modes/rpc/rpc-mode.ts +1 -1
  104. package/src/modes/setup-wizard/scenes/sign-in.ts +3 -11
  105. package/src/modes/theme/theme.ts +95 -1
  106. package/src/modes/types.ts +2 -1
  107. package/src/modes/utils/ui-helpers.ts +14 -5
  108. package/src/prompts/tools/bash.md +1 -1
  109. package/src/prompts/tools/eval.md +4 -4
  110. package/src/sdk.ts +31 -14
  111. package/src/session/agent-session.ts +213 -155
  112. package/src/session/session-manager.ts +1 -1
  113. package/src/slash-commands/builtin-registry.ts +1 -1
  114. package/src/system-prompt.ts +15 -9
  115. package/src/task/render.ts +20 -8
  116. package/src/tools/ask.ts +14 -5
  117. package/src/tools/bash-interactive.ts +1 -1
  118. package/src/tools/bash.ts +14 -2
  119. package/src/tools/browser/render.ts +5 -2
  120. package/src/tools/browser/tab-worker.ts +211 -91
  121. package/src/tools/debug.ts +5 -2
  122. package/src/tools/eval-render.ts +6 -3
  123. package/src/tools/eval.ts +1 -1
  124. package/src/tools/gh-renderer.ts +29 -15
  125. package/src/tools/index.ts +32 -0
  126. package/src/tools/inspect-image-renderer.ts +12 -5
  127. package/src/tools/job.ts +9 -6
  128. package/src/tools/memory-render.ts +19 -5
  129. package/src/tools/read.ts +165 -18
  130. package/src/tools/render-utils.ts +3 -1
  131. package/src/tools/resolve.ts +1 -1
  132. package/src/tools/review.ts +1 -1
  133. package/src/tools/ssh.ts +4 -1
  134. package/src/tools/todo.ts +8 -1
  135. package/src/tools/tool-timeouts.ts +1 -1
  136. package/src/tools/write.ts +1 -1
  137. package/src/tui/code-cell.ts +1 -1
  138. package/src/utils/block-context.ts +312 -0
  139. package/src/utils/image-loading.ts +31 -1
  140. package/src/web/search/providers/codex.ts +1 -1
  141. package/src/web/search/render.ts +14 -6
@@ -807,12 +807,15 @@ function renderReviewResult(
807
807
 
808
808
  // Verdict line
809
809
  const verdictColor = summary.overall_correctness === "correct" ? "success" : "error";
810
- const verdictIcon = summary.overall_correctness === "correct" ? theme.status.success : theme.status.error;
810
+ const isCorrect = summary.overall_correctness === "correct";
811
+ const verdictIcon = isCorrect
812
+ ? theme.styledSymbol("tool.task", "accent")
813
+ : theme.fg(verdictColor, theme.status.error);
811
814
  lines.push(
812
- `${continuePrefix} Patch is ${theme.fg(verdictColor, summary.overall_correctness)} ${theme.fg(
813
- verdictColor,
814
- verdictIcon,
815
- )} ${theme.fg("dim", `(${(summary.confidence * 100).toFixed(0)}% confidence)`)}`,
815
+ `${continuePrefix} Patch is ${theme.fg(verdictColor, summary.overall_correctness)} ${verdictIcon} ${theme.fg(
816
+ "dim",
817
+ `(${(summary.confidence * 100).toFixed(0)}% confidence)`,
818
+ )}`,
816
819
  );
817
820
 
818
821
  // Explanation preview (first ~80 chars when collapsed, full when expanded)
@@ -913,7 +916,7 @@ function renderAgentResult(
913
916
  : needsWarning
914
917
  ? theme.status.warning
915
918
  : success
916
- ? theme.status.success
919
+ ? theme.styledSymbol("tool.task", "accent")
917
920
  : theme.status.error;
918
921
  const iconColor = needsWarning ? "warning" : success ? "success" : mergeFailed ? "warning" : "error";
919
922
  const statusText = aborted
@@ -1082,7 +1085,10 @@ export function renderResult(
1082
1085
 
1083
1086
  if (!details) {
1084
1087
  const text = result.content.find(c => c.type === "text")?.text || "";
1085
- const header = renderStatusLine({ icon: "success", title: "Task" }, theme);
1088
+ const header = renderStatusLine(
1089
+ { iconOverride: theme.styledSymbol("tool.task", "accent"), title: "Task" },
1090
+ theme,
1091
+ );
1086
1092
  return framedBlock(theme, width => ({
1087
1093
  header,
1088
1094
  sections: [
@@ -1102,11 +1108,17 @@ export function renderResult(
1102
1108
  const isError = aborted || failed;
1103
1109
  const agentCount = hasResults ? details.results.length : (details.progress?.length ?? 0);
1104
1110
  const icon: ToolUIStatus = options.isPartial ? "running" : isError ? "error" : mergeFailed ? "warning" : "success";
1111
+ // Surface the dispatched agent type (e.g. `Reviewer`) alongside the count so
1112
+ // the header reads `Task 16 agents: Reviewer`. All tasks in one call share a
1113
+ // single `agent` type (top-level param), so one label covers the whole batch.
1114
+ const agentName = args?.agent?.trim();
1115
+ const countLabel = agentCount > 0 ? `${agentCount} ${agentCount === 1 ? "agent" : "agents"}` : undefined;
1116
+ const metaLabel = countLabel ? (agentName ? `${countLabel}: ${agentName}` : countLabel) : agentName;
1105
1117
  const header = renderStatusLine(
1106
1118
  {
1107
1119
  icon,
1108
1120
  title: "Task",
1109
- meta: agentCount > 0 ? [`${agentCount} ${agentCount === 1 ? "agent" : "agents"}`] : undefined,
1121
+ meta: metaLabel ? [metaLabel] : undefined,
1110
1122
  },
1111
1123
  theme,
1112
1124
  );
package/src/tools/ask.ts CHANGED
@@ -96,7 +96,7 @@ const OTHER_OPTION = "Other (type your own)";
96
96
  const RECOMMENDED_SUFFIX = " (Recommended)";
97
97
 
98
98
  function getDoneOptionLabel(): string {
99
- return `${theme.status.success} Done selecting`;
99
+ return `${theme.symbol("tool.ask")} Done selecting`;
100
100
  }
101
101
 
102
102
  /** Add "(Recommended)" suffix to the option at the given index if not already present */
@@ -407,6 +407,12 @@ export class AskTool implements AgentTool<typeof askSchema, AskToolDetails> {
407
407
  readonly description: string;
408
408
  readonly parameters = askSchema;
409
409
  readonly strict = true;
410
+ // Run alone in its tool batch. The interactive selector/editor is a single
411
+ // shared UI surface (`ExtensionUiController.showHookSelector` has no queue and
412
+ // overwrites `ctx.hookSelector` on each call), so two concurrent `ask` calls
413
+ // would clobber each other: the second steals focus and orphans the first,
414
+ // whose promise then hangs until the user aborts the whole turn.
415
+ readonly concurrency = "exclusive";
410
416
  readonly loadMode = "discoverable";
411
417
 
412
418
  constructor(private readonly session: ToolSession) {
@@ -621,9 +627,7 @@ interface AskRenderArgs {
621
627
  /** Render a custom free-text answer as a status line plus indented continuation rows. */
622
628
  function renderCustomInputLines(uiTheme: Theme, customInput: string): string[] {
623
629
  const lines = customInput.split("\n");
624
- const out: string[] = [
625
- ` ${uiTheme.styledSymbol("status.success", "success")} ${uiTheme.fg("toolOutput", lines[0] ?? "")}`,
626
- ];
630
+ const out: string[] = [` ${uiTheme.styledSymbol("tool.ask", "accent")} ${uiTheme.fg("toolOutput", lines[0] ?? "")}`];
627
631
  for (let i = 1; i < lines.length; i++) out.push(` ${uiTheme.fg("toolOutput", lines[i])}`);
628
632
  return out;
629
633
  }
@@ -814,7 +818,12 @@ export const askToolRenderer = {
814
818
  const question = details.question;
815
819
  const hasSelection =
816
820
  details.customInput !== undefined || (details.selectedOptions && details.selectedOptions.length > 0);
817
- const header = renderStatusLine({ icon: hasSelection ? "success" : "warning", title: "Ask" }, uiTheme);
821
+ const header = renderStatusLine(
822
+ hasSelection
823
+ ? { iconOverride: uiTheme.styledSymbol("tool.ask", "accent"), title: "Ask" }
824
+ : { icon: "warning", title: "Ask" },
825
+ uiTheme,
826
+ );
818
827
  const dOptions = details.options;
819
828
  const dSelected = details.selectedOptions;
820
829
  const dMulti = details.multi;
@@ -246,7 +246,7 @@ class BashInteractiveOverlayComponent implements Component {
246
246
  this.#state === "running"
247
247
  ? formatStatusIcon("running", this.uiTheme)
248
248
  : this.#state === "complete" && this.#exitCode === 0
249
- ? formatStatusIcon("success", this.uiTheme)
249
+ ? this.uiTheme.styledSymbol("tool.bash", "accent")
250
250
  : formatStatusIcon("warning", this.uiTheme);
251
251
  const title = this.uiTheme.fg("accent", "Console");
252
252
  const statusBadge = `${this.uiTheme.fg("dim", this.uiTheme.format.bracketLeft)}${this.#stateText()}${this.uiTheme.fg("dim", this.uiTheme.format.bracketRight)}`;
package/src/tools/bash.ts CHANGED
@@ -1151,11 +1151,23 @@ export function createShellRenderer<TArgs>(config: ShellRendererConfig<TArgs>) {
1151
1151
  const renderArgs = toBashRenderArgs(args, config);
1152
1152
  const cmdLines = args ? formatBashCommandLines(renderArgs, uiTheme) : undefined;
1153
1153
  const isError = result.isError === true;
1154
- const icon = options.isPartial ? "pending" : isError ? "error" : "success";
1154
+ const isPartial = options.isPartial === true;
1155
+ const success = !isPartial && !isError;
1155
1156
  const header =
1156
1157
  config.showHeader === false
1157
1158
  ? undefined
1158
- : renderStatusLine({ icon, title: config.resolveTitle(args, options) }, uiTheme);
1159
+ : renderStatusLine(
1160
+ success
1161
+ ? {
1162
+ iconOverride: uiTheme.styledSymbol("tool.bash", "accent"),
1163
+ title: config.resolveTitle(args, options),
1164
+ }
1165
+ : {
1166
+ icon: isPartial ? "pending" : "error",
1167
+ title: config.resolveTitle(args, options),
1168
+ },
1169
+ uiTheme,
1170
+ );
1159
1171
  const details = result.details;
1160
1172
  const outputBlock = new CachedOutputBlock();
1161
1173
 
@@ -146,7 +146,7 @@ function renderOpenOrCloseLine(
146
146
  const action = (details?.action ?? args.action ?? "open") as "open" | "close" | "run";
147
147
  const status = cellStatus(isPartial, isError);
148
148
  const icon =
149
- status === "complete" ? "success" : status === "error" ? "error" : status === "running" ? "running" : "pending";
149
+ status === "complete" ? "done" : status === "error" ? "error" : status === "running" ? "running" : "pending";
150
150
 
151
151
  let title: string;
152
152
  if (action === "close") {
@@ -163,7 +163,10 @@ function renderOpenOrCloseLine(
163
163
  const url = details?.url ?? args.url;
164
164
  if (url) meta.push(shortenPath(url));
165
165
 
166
- const header = renderStatusLine({ icon, title, meta }, theme);
166
+ const header =
167
+ status === "complete"
168
+ ? renderStatusLine({ iconOverride: theme.styledSymbol("tool.browser", "accent"), title, meta }, theme)
169
+ : renderStatusLine({ icon, title, meta }, theme);
167
170
  if (!output) return new Text(header, 0, 0);
168
171
  const outputLines = output.split("\n").map(line => theme.fg("toolOutput", replaceTabs(line)));
169
172
  return new Text([header, ...outputLines].join("\n"), 0, 0);
@@ -8,6 +8,7 @@ import type {
8
8
  Browser,
9
9
  Dialog,
10
10
  ElementHandle,
11
+ ElementScreenshotOptions,
11
12
  HTTPResponse,
12
13
  KeyInput,
13
14
  Page,
@@ -78,6 +79,14 @@ type DialogPolicy = "accept" | "dismiss";
78
79
  type DragTarget = string | { readonly x: number; readonly y: number };
79
80
  type ActionabilityResult = { ok: true; x: number; y: number } | { ok: false; reason: string };
80
81
 
82
+ /**
83
+ * Per-op ceiling for puppeteer-internal helpers that should resolve quickly
84
+ * (`observe`, `screenshot`, `extract`). Kept below the default 30s cell budget so a
85
+ * single stalled helper fails fast with a named error and leaves budget for the rest
86
+ * of the cell. Effective cap is `min(cellBudget, QUICK_OP_TIMEOUT_MS)`.
87
+ */
88
+ const QUICK_OP_TIMEOUT_MS = 20_000;
89
+
81
90
  interface ScreenshotOptions {
82
91
  selector?: string;
83
92
  fullPage?: boolean;
@@ -404,12 +413,36 @@ async function clickQueryHandlerText(
404
413
  );
405
414
  }
406
415
 
416
+ export interface InflightOp {
417
+ label: string;
418
+ startedAt: number;
419
+ }
420
+
407
421
  interface ActiveRun {
408
422
  id: string;
409
423
  ac: AbortController;
410
424
  displays: RunResultOk["displays"];
411
425
  screenshots: ScreenshotResult[];
412
426
  pendingTools: Map<string, { resolve(value: unknown): void; reject(error: Error): void }>;
427
+ /** Helper invocations currently awaiting the page/network, keyed by op id. */
428
+ inflight: Map<number, InflightOp>;
429
+ opCounter: number;
430
+ }
431
+
432
+ /** Human-readable label for a screenshot op, used in op tracking + timeout errors. */
433
+ export function describeScreenshot(opts?: ScreenshotOptions): string {
434
+ if (opts?.selector) return `tab.screenshot({ selector: ${JSON.stringify(opts.selector)} })`;
435
+ if (opts?.fullPage) return "tab.screenshot({ fullPage: true })";
436
+ return "tab.screenshot()";
437
+ }
438
+
439
+ /** Summarize still-running helpers (oldest first) so a cell timeout names what stalled. */
440
+ export function describeInflight(inflight: Map<number, InflightOp>): string {
441
+ const now = Date.now();
442
+ return [...inflight.values()]
443
+ .sort((a, b) => a.startedAt - b.startedAt)
444
+ .map(op => `${op.label} (${((now - op.startedAt) / 1000).toFixed(1)}s)`)
445
+ .join(", ");
413
446
  }
414
447
 
415
448
  export class WorkerCore {
@@ -560,13 +593,21 @@ export class WorkerCore {
560
593
  const signal = AbortSignal.any([timeoutSignal, ac.signal]);
561
594
  const displays: RunResultOk["displays"] = [];
562
595
  const screenshots: ScreenshotResult[] = [];
563
- const active: ActiveRun = { id: msg.id, ac, displays, screenshots, pendingTools: new Map() };
596
+ const active: ActiveRun = {
597
+ id: msg.id,
598
+ ac,
599
+ displays,
600
+ screenshots,
601
+ pendingTools: new Map(),
602
+ inflight: new Map(),
603
+ opCounter: 0,
604
+ };
564
605
  this.#active = active;
565
606
  try {
566
607
  throwIfAborted(signal);
567
608
  const page = this.#requirePage();
568
609
  const browser = this.#requireBrowser();
569
- const tabApi = this.#createTabApi(msg.name, msg.timeoutMs, signal, msg.session, displays, screenshots);
610
+ const tabApi = this.#createTabApi(msg.name, msg.timeoutMs, signal, msg.session, displays, screenshots, active);
570
611
  const runtime = this.#ensureRuntime(msg.session);
571
612
  runtime.setCwd(msg.session.cwd);
572
613
  runtime.setRunScope({
@@ -580,11 +621,16 @@ export class WorkerCore {
580
621
  });
581
622
  const { promise: cancelRejection, reject: rejectCancel } = Promise.withResolvers<never>();
582
623
  const onCancel = (): void => {
583
- rejectCancel(
584
- timeoutSignal.aborted
585
- ? new ToolError(`Browser code execution timed out after ${msg.timeoutMs}ms`)
586
- : new ToolAbortError(),
587
- );
624
+ if (timeoutSignal.aborted) {
625
+ const stalled = describeInflight(active.inflight);
626
+ rejectCancel(
627
+ new ToolError(
628
+ `Browser code execution timed out after ${msg.timeoutMs}ms${stalled ? ` (stalled on ${stalled})` : ""}`,
629
+ ),
630
+ );
631
+ } else {
632
+ rejectCancel(new ToolAbortError());
633
+ }
588
634
  // Cancel in-flight tool calls so user code's awaited proxies reject promptly.
589
635
  for (const pending of active.pendingTools.values()) {
590
636
  pending.reject(new ToolAbortError());
@@ -670,6 +716,39 @@ export class WorkerCore {
670
716
  else pending.reject(replyError(reply.error));
671
717
  }
672
718
 
719
+ /**
720
+ * Wrap a tab helper so it (a) registers in the active run's in-flight map for
721
+ * timeout diagnostics and (b) honors an optional per-op deadline that fails fast
722
+ * with a named error instead of silently consuming the whole cell budget. Pass
723
+ * `Number.POSITIVE_INFINITY` for `perOpTimeoutMs` to bound the op only by the cell
724
+ * budget (used for `evaluate` running user code and for locator helpers that already
725
+ * carry puppeteer's own `.setTimeout(timeoutMs)`).
726
+ */
727
+ async #runOp<T>(
728
+ active: ActiveRun,
729
+ label: string,
730
+ cellSignal: AbortSignal,
731
+ perOpTimeoutMs: number,
732
+ fn: (signal: AbortSignal) => Promise<T>,
733
+ ): Promise<T> {
734
+ const opId = active.opCounter++;
735
+ active.inflight.set(opId, { label, startedAt: Date.now() });
736
+ const capped = Number.isFinite(perOpTimeoutMs) && perOpTimeoutMs > 0;
737
+ const opTimeout = capped ? AbortSignal.timeout(perOpTimeoutMs) : undefined;
738
+ const opSignal = opTimeout ? AbortSignal.any([cellSignal, opTimeout]) : cellSignal;
739
+ try {
740
+ return await fn(opSignal);
741
+ } catch (err) {
742
+ // Per-op deadline fired (not the cell budget, not an explicit abort) → named, actionable error.
743
+ if (opTimeout?.aborted && !cellSignal.aborted) {
744
+ throw new ToolError(`${label} timed out after ${perOpTimeoutMs}ms`);
745
+ }
746
+ throw err;
747
+ } finally {
748
+ active.inflight.delete(opId);
749
+ }
750
+ }
751
+
673
752
  #createTabApi(
674
753
  name: string,
675
754
  timeoutMs: number,
@@ -677,98 +756,125 @@ export class WorkerCore {
677
756
  session: SessionSnapshot,
678
757
  displays: RunResultOk["displays"],
679
758
  screenshots: ScreenshotResult[],
759
+ active: ActiveRun,
680
760
  ): TabApi {
681
761
  const page = this.#requirePage();
762
+ const quickOpMs = Math.min(timeoutMs, QUICK_OP_TIMEOUT_MS);
763
+ const INF = Number.POSITIVE_INFINITY;
764
+ const op = <T>(label: string, perOpMs: number, fn: (sig: AbortSignal) => Promise<T>): Promise<T> =>
765
+ this.#runOp(active, label, signal, perOpMs, fn);
682
766
  return {
683
767
  name,
684
768
  page,
685
769
  signal,
686
770
  url: () => page.url(),
687
- title: () => page.title(),
688
- goto: async (url, opts) => {
689
- this.#clearElementCache();
690
- await untilAborted(signal, () =>
771
+ title: () => op("tab.title()", INF, sig => untilAborted(sig, () => page.title())),
772
+ goto: (url, opts) =>
773
+ op(`tab.goto(${JSON.stringify(url)})`, INF, async sig => {
774
+ this.#clearElementCache();
691
775
  // Default to "load" because dev servers with HMR/WS never reach networkidle.
692
- page.goto(url, { waitUntil: opts?.waitUntil ?? "load", timeout: timeoutMs }),
693
- );
694
- },
695
- observe: opts => this.#collectObservation({ ...opts, signal }),
696
- screenshot: async opts => await this.#captureScreenshot(session, displays, screenshots, signal, opts),
697
- extract: async (format = "markdown") => {
698
- const html = (await untilAborted(signal, () => page.content())) as string;
699
- const result = await extractReadableFromHtml(html, page.url(), format);
700
- if (!result) {
701
- throw new ToolError(`tab.extract(${JSON.stringify(format)}) found no readable content on ${page.url()}`);
702
- }
703
- const content = format === "markdown" ? result.markdown : result.text;
704
- if (!content) {
705
- throw new ToolError(
706
- `tab.extract(${JSON.stringify(format)}) produced empty ${format} content for ${page.url()}`,
707
- );
708
- }
709
- return content;
710
- },
711
- click: async selector => {
712
- const resolved = normalizeSelector(selector);
713
- if (resolved.startsWith("text/")) await clickQueryHandlerText(page, resolved, timeoutMs, signal);
714
- else await untilAborted(signal, () => page.locator(resolved).setTimeout(timeoutMs).click());
715
- },
716
- type: async (selector, text) => {
717
- const handle = (await untilAborted(signal, () =>
718
- page.locator(normalizeSelector(selector)).setTimeout(timeoutMs).waitHandle(),
719
- )) as ElementHandle;
720
- try {
721
- await untilAborted(signal, () => handle.type(text, { delay: 0 }));
722
- } finally {
723
- await handle.dispose();
724
- }
725
- },
726
- fill: async (selector, value) => {
727
- await untilAborted(signal, () =>
728
- page.locator(normalizeSelector(selector)).setTimeout(timeoutMs).fill(value),
729
- );
730
- },
731
- press: async (key, opts) => {
732
- const selector = opts?.selector;
733
- if (selector) await untilAborted(signal, () => page.focus(normalizeSelector(selector)));
734
- await untilAborted(signal, () => page.keyboard.press(key));
735
- },
736
- scroll: async (deltaX, deltaY) => {
737
- await untilAborted(signal, () => page.mouse.wheel({ deltaX, deltaY }));
738
- },
739
- drag: async (from, to) => await this.#drag(from, to, signal),
740
- waitFor: async selector =>
741
- (await untilAborted(signal, () =>
742
- page.locator(normalizeSelector(selector)).setTimeout(timeoutMs).waitHandle(),
743
- )) as ElementHandle,
744
- evaluate: async (fn, ...args) =>
745
- (await untilAborted(signal, () =>
746
- typeof fn === "string" ? page.evaluate(fn) : page.evaluate(fn as (...a: unknown[]) => unknown, ...args),
747
- )) as never,
748
- scrollIntoView: async selector => {
749
- const handle = (await untilAborted(signal, () =>
750
- page.locator(normalizeSelector(selector)).setTimeout(timeoutMs).waitHandle(),
751
- )) as ElementHandle;
752
- try {
753
- await untilAborted(signal, () =>
754
- handle.evaluate(el => {
755
- const target = el as unknown as {
756
- scrollIntoView: (opts: { behavior: string; block: string; inline: string }) => void;
757
- };
758
- target.scrollIntoView({ behavior: "instant", block: "center", inline: "center" });
759
- }),
776
+ await untilAborted(sig, () =>
777
+ page.goto(url, { waitUntil: opts?.waitUntil ?? "load", timeout: timeoutMs }),
760
778
  );
761
- } finally {
762
- await handle.dispose().catch(() => undefined);
763
- }
764
- },
765
- select: async (selector, ...values) => await this.#select(selector, values, timeoutMs, signal),
766
- uploadFile: async (selector, ...filePaths) =>
767
- await this.#uploadFile(selector, filePaths, timeoutMs, signal, session),
768
- waitForUrl: async (pattern, opts) => await this.#waitForUrl(pattern, opts?.timeout ?? timeoutMs, signal),
769
- waitForResponse: async (pattern, opts) =>
770
- await this.#waitForResponse(pattern, opts?.timeout ?? timeoutMs, signal),
771
- id: async id => await this.#resolveCachedHandle(id),
779
+ }),
780
+ observe: opts => op("tab.observe()", quickOpMs, sig => this.#collectObservation({ ...opts, signal: sig })),
781
+ screenshot: opts =>
782
+ op(describeScreenshot(opts), quickOpMs, sig =>
783
+ this.#captureScreenshot(session, displays, screenshots, sig, opts),
784
+ ),
785
+ extract: (format = "markdown") =>
786
+ op(`tab.extract(${JSON.stringify(format)})`, quickOpMs, async sig => {
787
+ const html = (await untilAborted(sig, () => page.content())) as string;
788
+ const result = await extractReadableFromHtml(html, page.url(), format);
789
+ if (!result) {
790
+ throw new ToolError(
791
+ `tab.extract(${JSON.stringify(format)}) found no readable content on ${page.url()}`,
792
+ );
793
+ }
794
+ const content = format === "markdown" ? result.markdown : result.text;
795
+ if (!content) {
796
+ throw new ToolError(
797
+ `tab.extract(${JSON.stringify(format)}) produced empty ${format} content for ${page.url()}`,
798
+ );
799
+ }
800
+ return content;
801
+ }),
802
+ click: selector =>
803
+ op(`tab.click(${JSON.stringify(selector)})`, INF, async sig => {
804
+ const resolved = normalizeSelector(selector);
805
+ if (resolved.startsWith("text/")) await clickQueryHandlerText(page, resolved, timeoutMs, sig);
806
+ else await untilAborted(sig, () => page.locator(resolved).setTimeout(timeoutMs).click());
807
+ }),
808
+ type: (selector, text) =>
809
+ op(`tab.type(${JSON.stringify(selector)})`, INF, async sig => {
810
+ const handle = (await untilAborted(sig, () =>
811
+ page.locator(normalizeSelector(selector)).setTimeout(timeoutMs).waitHandle(),
812
+ )) as ElementHandle;
813
+ try {
814
+ await untilAborted(sig, () => handle.type(text, { delay: 0 }));
815
+ } finally {
816
+ await handle.dispose();
817
+ }
818
+ }),
819
+ fill: (selector, value) =>
820
+ op(`tab.fill(${JSON.stringify(selector)})`, INF, sig =>
821
+ untilAborted(sig, () => page.locator(normalizeSelector(selector)).setTimeout(timeoutMs).fill(value)),
822
+ ),
823
+ press: (key, opts) =>
824
+ op(`tab.press(${JSON.stringify(key)})`, INF, async sig => {
825
+ const selector = opts?.selector;
826
+ if (selector) await untilAborted(sig, () => page.focus(normalizeSelector(selector)));
827
+ await untilAborted(sig, () => page.keyboard.press(key));
828
+ }),
829
+ scroll: (deltaX, deltaY) =>
830
+ op("tab.scroll()", INF, sig => untilAborted(sig, () => page.mouse.wheel({ deltaX, deltaY }))),
831
+ drag: (from, to) => op("tab.drag()", INF, sig => this.#drag(from, to, sig)),
832
+ waitFor: selector =>
833
+ op(
834
+ `tab.waitFor(${JSON.stringify(selector)})`,
835
+ INF,
836
+ async sig =>
837
+ (await untilAborted(sig, () =>
838
+ page.locator(normalizeSelector(selector)).setTimeout(timeoutMs).waitHandle(),
839
+ )) as ElementHandle,
840
+ ),
841
+ evaluate: (fn, ...args) =>
842
+ op("tab.evaluate()", INF, sig =>
843
+ untilAborted(sig, () =>
844
+ typeof fn === "string"
845
+ ? page.evaluate(fn)
846
+ : page.evaluate(fn as (...a: unknown[]) => unknown, ...args),
847
+ ),
848
+ ) as never,
849
+ scrollIntoView: selector =>
850
+ op(`tab.scrollIntoView(${JSON.stringify(selector)})`, INF, async sig => {
851
+ const handle = (await untilAborted(sig, () =>
852
+ page.locator(normalizeSelector(selector)).setTimeout(timeoutMs).waitHandle(),
853
+ )) as ElementHandle;
854
+ try {
855
+ await untilAborted(sig, () =>
856
+ handle.evaluate(el => {
857
+ const target = el as unknown as {
858
+ scrollIntoView: (opts: { behavior: string; block: string; inline: string }) => void;
859
+ };
860
+ target.scrollIntoView({ behavior: "instant", block: "center", inline: "center" });
861
+ }),
862
+ );
863
+ } finally {
864
+ await handle.dispose().catch(() => undefined);
865
+ }
866
+ }),
867
+ select: (selector, ...values) =>
868
+ op(`tab.select(${JSON.stringify(selector)})`, INF, sig => this.#select(selector, values, timeoutMs, sig)),
869
+ uploadFile: (selector, ...filePaths) =>
870
+ op(`tab.uploadFile(${JSON.stringify(selector)})`, INF, sig =>
871
+ this.#uploadFile(selector, filePaths, timeoutMs, sig, session),
872
+ ),
873
+ waitForUrl: (pattern, opts) =>
874
+ op("tab.waitForUrl()", INF, sig => this.#waitForUrl(pattern, opts?.timeout ?? timeoutMs, sig)),
875
+ waitForResponse: (pattern, opts) =>
876
+ op("tab.waitForResponse()", INF, sig => this.#waitForResponse(pattern, opts?.timeout ?? timeoutMs, sig)),
877
+ id: id => this.#resolveCachedHandle(id),
772
878
  };
773
879
  }
774
880
 
@@ -832,7 +938,21 @@ export class WorkerCore {
832
938
  )) as ElementHandle | null;
833
939
  if (!handle) throw new ToolError("Screenshot selector did not resolve to an element");
834
940
  try {
835
- buffer = (await untilAborted(signal, () => handle.screenshot({ type: "png" }))) as Buffer;
941
+ // Bring the element into view with a single instant scroll instead of puppeteer's
942
+ // scrollIntoViewIfNeeded(), whose IntersectionObserver promise can stall indefinitely
943
+ // on continuously-animating pages (WebGL / backdrop-filter "glass" effects). Best-effort.
944
+ await untilAborted(signal, () =>
945
+ handle.evaluate(el => {
946
+ const target = el as unknown as {
947
+ scrollIntoView: (opts: { behavior: string; block: string; inline: string }) => void;
948
+ };
949
+ target.scrollIntoView({ behavior: "instant", block: "center", inline: "center" });
950
+ }),
951
+ ).catch(() => undefined);
952
+ // scrollIntoView:false skips the same IntersectionObserver check inside screenshot();
953
+ // captureBeyondViewport (puppeteer's default) still renders the clipped region.
954
+ const shotOpts: ElementScreenshotOptions = { type: "png", scrollIntoView: false };
955
+ buffer = (await untilAborted(signal, () => handle.screenshot(shotOpts))) as Buffer;
836
956
  } finally {
837
957
  await handle.dispose().catch(() => undefined);
838
958
  }
@@ -594,8 +594,11 @@ export const debugToolRenderer = {
594
594
  return markFramedBlockComponent({
595
595
  render(width: number): string[] {
596
596
  const action = (args?.action ?? result.details?.action ?? "debug").replaceAll("_", " ");
597
- const status = options.isPartial ? "running" : result.isError ? "error" : "success";
598
- const header = `${formatStatusIcon(status, theme, options.spinnerFrame)} Debug ${action}`;
597
+ const success = !options.isPartial && !result.isError;
598
+ const statusIcon = success
599
+ ? theme.styledSymbol("tool.debug", "accent")
600
+ : formatStatusIcon(options.isPartial ? "running" : "error", theme, options.spinnerFrame);
601
+ const header = `${statusIcon} Debug ${action}`;
599
602
  const summaryLines = result.details?.snapshot
600
603
  ? formatSessionSnapshot(result.details.snapshot).map(line => replaceTabs(line))
601
604
  : [];
@@ -172,7 +172,7 @@ function renderAgentProgressEvents(events: EvalStatusEvent[], theme: Theme, spin
172
172
  const status = agentEventStatus(event.status);
173
173
  const iconStatus =
174
174
  status === "completed"
175
- ? "success"
175
+ ? "done"
176
176
  : status === "failed"
177
177
  ? "error"
178
178
  : status === "aborted"
@@ -182,10 +182,13 @@ function renderAgentProgressEvents(events: EvalStatusEvent[], theme: Theme, spin
182
182
  : "running";
183
183
  const iconColor =
184
184
  status === "completed" ? "success" : status === "failed" || status === "aborted" ? "error" : "accent";
185
- const icon = formatStatusIcon(iconStatus, theme, status === "running" ? spinnerFrame : undefined);
185
+ const icon =
186
+ status === "completed"
187
+ ? theme.styledSymbol("tool.eval", "accent")
188
+ : theme.fg(iconColor, formatStatusIcon(iconStatus, theme, status === "running" ? spinnerFrame : undefined));
186
189
 
187
190
  const id = eventString(event.id) ?? "agent";
188
- let line = `${prefix} ${theme.fg(iconColor, icon)} ${theme.fg("accent", theme.bold(id))}`;
191
+ let line = `${prefix} ${icon} ${theme.fg("accent", theme.bold(id))}`;
189
192
 
190
193
  if (status === "failed" || status === "aborted") {
191
194
  line += ` ${formatBadge(status, iconColor, theme)}`;
package/src/tools/eval.ts CHANGED
@@ -30,7 +30,7 @@ const evalCellSchema = z.object({
30
30
  language: z.enum(["py", "js"]).describe('runtime: "py" for the IPython kernel, "js" for the persistent JS VM'),
31
31
  code: z.string().describe("cell body, verbatim. Use top-level await freely."),
32
32
  title: z.string().optional().describe('short label shown in transcript (e.g. "imports", "load config")'),
33
- timeout: z.number().int().min(1).max(600).optional().describe("per-cell timeout in seconds (1-600, default 30)"),
33
+ timeout: z.number().int().min(1).max(3600).optional().describe("per-cell timeout in seconds (1-3600, default 30)"),
34
34
  reset: z
35
35
  .boolean()
36
36
  .optional()
@@ -163,8 +163,8 @@ function getJobStateVisual(
163
163
  ): { iconRaw: string; iconColor: ToolUIColor; textColor: ThemeColor } {
164
164
  if (job.conclusion && SUCCESS_CONCLUSIONS.has(job.conclusion)) {
165
165
  return {
166
- iconRaw: theme.status.success,
167
- iconColor: "success",
166
+ iconRaw: theme.symbol("tool.gh"),
167
+ iconColor: "accent",
168
168
  textColor: "success",
169
169
  };
170
170
  }
@@ -327,14 +327,21 @@ function renderFallbackComponent(
327
327
  const title = formatOpTitle(args.op);
328
328
  const meta = buildOpMeta(args);
329
329
  const isError = result.isError === true;
330
- const status: ToolUIStatus = isError ? "error" : text ? "success" : "warning";
330
+ const success = !isError && Boolean(text);
331
331
  const header = renderStatusLine(
332
- {
333
- icon: status,
334
- title,
335
- titleColor: isError ? "error" : "accent",
336
- meta,
337
- },
332
+ success
333
+ ? {
334
+ iconOverride: theme.styledSymbol("tool.gh", "accent"),
335
+ title,
336
+ titleColor: "accent",
337
+ meta,
338
+ }
339
+ : {
340
+ icon: isError ? "error" : "warning",
341
+ title,
342
+ titleColor: isError ? "error" : "accent",
343
+ meta,
344
+ },
338
345
  theme,
339
346
  );
340
347
 
@@ -438,12 +445,19 @@ export const githubToolRenderer = {
438
445
  if (watch) {
439
446
  const isError = result.isError === true;
440
447
  const header = renderStatusLine(
441
- {
442
- icon: isError ? "error" : "success",
443
- title: "GitHub Run Watch",
444
- titleColor: isError ? "error" : "accent",
445
- meta: [getWatchHeader(watch)],
446
- },
448
+ isError
449
+ ? {
450
+ icon: "error",
451
+ title: "GitHub Run Watch",
452
+ titleColor: "error",
453
+ meta: [getWatchHeader(watch)],
454
+ }
455
+ : {
456
+ iconOverride: uiTheme.styledSymbol("tool.gh", "accent"),
457
+ title: "GitHub Run Watch",
458
+ titleColor: "accent",
459
+ meta: [getWatchHeader(watch)],
460
+ },
447
461
  uiTheme,
448
462
  );
449
463
  return framedBlock(uiTheme, width => {