@oh-my-pi/pi-coding-agent 14.9.3 → 14.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/CHANGELOG.md +96 -0
  2. package/package.json +7 -7
  3. package/src/async/job-manager.ts +66 -9
  4. package/src/capability/rule.ts +20 -0
  5. package/src/cli/setup-cli.ts +14 -161
  6. package/src/cli/stats-cli.ts +56 -2
  7. package/src/cli.ts +0 -1
  8. package/src/config/model-registry.ts +13 -0
  9. package/src/config/model-resolver.ts +8 -2
  10. package/src/config/settings-schema.ts +1 -11
  11. package/src/edit/index.ts +8 -0
  12. package/src/edit/renderer.ts +6 -1
  13. package/src/edit/streaming.ts +53 -2
  14. package/src/eval/eval.lark +30 -10
  15. package/src/eval/js/context-manager.ts +334 -601
  16. package/src/eval/js/shared/helpers.ts +237 -0
  17. package/src/eval/js/shared/indirect-eval.ts +30 -0
  18. package/src/eval/js/{prelude.txt → shared/prelude.txt} +0 -2
  19. package/src/eval/js/shared/rewrite-imports.ts +211 -0
  20. package/src/eval/js/shared/runtime.ts +168 -0
  21. package/src/eval/js/shared/types.ts +18 -0
  22. package/src/eval/js/tool-bridge.ts +2 -4
  23. package/src/eval/js/worker-core.ts +146 -0
  24. package/src/eval/js/worker-entry.ts +24 -0
  25. package/src/eval/js/worker-protocol.ts +41 -0
  26. package/src/eval/parse.ts +218 -49
  27. package/src/eval/py/display.ts +71 -0
  28. package/src/eval/py/executor.ts +97 -96
  29. package/src/eval/py/index.ts +2 -2
  30. package/src/eval/py/kernel.ts +472 -900
  31. package/src/eval/py/prelude.py +106 -87
  32. package/src/eval/py/runner.py +879 -0
  33. package/src/eval/py/runtime.ts +3 -16
  34. package/src/eval/py/tool-bridge.ts +137 -0
  35. package/src/export/html/template.css +12 -0
  36. package/src/export/html/template.generated.ts +1 -1
  37. package/src/export/html/template.js +113 -7
  38. package/src/extensibility/plugins/loader.ts +31 -6
  39. package/src/extensibility/skills.ts +20 -0
  40. package/src/internal-urls/agent-protocol.ts +63 -52
  41. package/src/internal-urls/artifact-protocol.ts +51 -51
  42. package/src/internal-urls/docs-index.generated.ts +35 -3
  43. package/src/internal-urls/index.ts +6 -19
  44. package/src/internal-urls/local-protocol.ts +49 -7
  45. package/src/internal-urls/mcp-protocol.ts +2 -8
  46. package/src/internal-urls/memory-protocol.ts +89 -59
  47. package/src/internal-urls/router.ts +38 -22
  48. package/src/internal-urls/rule-protocol.ts +2 -20
  49. package/src/internal-urls/skill-protocol.ts +4 -27
  50. package/src/main.ts +1 -1
  51. package/src/mcp/manager.ts +17 -0
  52. package/src/modes/components/session-observer-overlay.ts +2 -2
  53. package/src/modes/components/tool-execution.ts +6 -0
  54. package/src/modes/components/tree-selector.ts +4 -0
  55. package/src/modes/controllers/command-controller.ts +0 -23
  56. package/src/modes/controllers/event-controller.ts +23 -2
  57. package/src/modes/controllers/mcp-command-controller.ts +7 -10
  58. package/src/modes/interactive-mode.ts +2 -2
  59. package/src/modes/theme/theme.ts +27 -27
  60. package/src/modes/types.ts +1 -1
  61. package/src/modes/utils/ui-helpers.ts +14 -9
  62. package/src/prompts/commands/orchestrate.md +1 -0
  63. package/src/prompts/system/project-prompt.md +10 -2
  64. package/src/prompts/system/subagent-system-prompt.md +8 -8
  65. package/src/prompts/system/system-prompt.md +13 -7
  66. package/src/prompts/tools/ask.md +0 -1
  67. package/src/prompts/tools/bash.md +0 -10
  68. package/src/prompts/tools/eval.md +15 -30
  69. package/src/prompts/tools/github.md +6 -5
  70. package/src/prompts/tools/hashline.md +1 -0
  71. package/src/prompts/tools/job.md +14 -6
  72. package/src/prompts/tools/task.md +20 -3
  73. package/src/registry/agent-registry.ts +2 -1
  74. package/src/sdk.ts +87 -89
  75. package/src/session/agent-session.ts +58 -21
  76. package/src/session/artifacts.ts +7 -4
  77. package/src/session/history-storage.ts +77 -19
  78. package/src/session/session-manager.ts +30 -1
  79. package/src/ssh/connection-manager.ts +32 -16
  80. package/src/ssh/sshfs-mount.ts +10 -7
  81. package/src/system-prompt.ts +0 -5
  82. package/src/task/executor.ts +14 -2
  83. package/src/task/index.ts +19 -5
  84. package/src/tool-discovery/tool-index.ts +21 -8
  85. package/src/tools/ast-edit.ts +3 -2
  86. package/src/tools/ast-grep.ts +3 -2
  87. package/src/tools/bash.ts +15 -9
  88. package/src/tools/browser/tab-protocol.ts +4 -0
  89. package/src/tools/browser/tab-supervisor.ts +98 -7
  90. package/src/tools/browser/tab-worker.ts +104 -58
  91. package/src/tools/eval.ts +49 -11
  92. package/src/tools/fetch.ts +1 -1
  93. package/src/tools/gh.ts +140 -4
  94. package/src/tools/index.ts +12 -11
  95. package/src/tools/job.ts +48 -12
  96. package/src/tools/read.ts +5 -4
  97. package/src/tools/search.ts +3 -2
  98. package/src/tools/todo-write.ts +1 -1
  99. package/src/web/scrapers/mastodon.ts +1 -1
  100. package/src/web/scrapers/repology.ts +7 -7
  101. package/src/web/search/index.ts +6 -4
  102. package/src/cli/jupyter-cli.ts +0 -106
  103. package/src/commands/jupyter.ts +0 -32
  104. package/src/eval/py/cancellation.ts +0 -28
  105. package/src/eval/py/gateway-coordinator.ts +0 -424
  106. package/src/internal-urls/jobs-protocol.ts +0 -120
  107. package/src/prompts/system/now-prompt.md +0 -7
  108. /package/src/eval/js/{prelude.ts → shared/prelude.ts} +0 -0
package/src/tools/bash.ts CHANGED
@@ -4,8 +4,10 @@ import type { Component } from "@oh-my-pi/pi-tui";
4
4
  import { ImageProtocol, TERMINAL, Text } from "@oh-my-pi/pi-tui";
5
5
  import { $env, getProjectDir, isEnoent, prompt } from "@oh-my-pi/pi-utils";
6
6
  import { Type } from "@sinclair/typebox";
7
+ import { AsyncJobManager } from "../async";
7
8
  import { type BashResult, executeBash } from "../exec/bash-executor";
8
9
  import type { RenderResultOptions } from "../extensibility/custom-tools/types";
10
+ import { InternalUrlRouter } from "../internal-urls";
9
11
  import { truncateToVisualLines } from "../modes/components/visual-truncate";
10
12
  import type { Theme } from "../modes/theme/theme";
11
13
  import bashDescription from "../prompts/tools/bash.md" with { type: "text" };
@@ -326,7 +328,9 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
326
328
  }
327
329
  lines.push(`Background job ${jobId} started: ${label}`);
328
330
  lines.push("Result will be delivered automatically when complete.");
329
- lines.push(`Use \`job\` (with \`poll\` or \`cancel\`) or \`read jobs://${jobId}\` if needed.`);
331
+ lines.push(
332
+ `You can use \`job\` to poll until complete, but prefer to continue with another task in the meanwhile if it's not blocking.`,
333
+ );
330
334
  return {
331
335
  content: [{ type: "text", text: lines.join("\n") }],
332
336
  details,
@@ -349,7 +353,7 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
349
353
  onUpdate?: AgentToolUpdateCallback<BashToolDetails>;
350
354
  startBackgrounded: boolean;
351
355
  }): ManagedBashJobHandle {
352
- const manager = this.session.asyncJobManager;
356
+ const manager = AsyncJobManager.instance();
353
357
  if (!manager) {
354
358
  throw new ToolError("Background job manager unavailable for this session.");
355
359
  }
@@ -399,6 +403,7 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
399
403
  }
400
404
  },
401
405
  {
406
+ ownerId: this.session.getAgentId?.() ?? undefined,
402
407
  onProgress: async (text, details) => {
403
408
  latestText = text;
404
409
  await options.onUpdate?.({
@@ -501,7 +506,7 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
501
506
 
502
507
  const internalUrlOptions: InternalUrlExpansionOptions = {
503
508
  skills: this.session.skills ?? [],
504
- internalRouter: this.session.internalRouter,
509
+ internalRouter: InternalUrlRouter.instance(),
505
510
  localOptions: {
506
511
  getArtifactsDir: this.session.getArtifactsDir,
507
512
  getSessionId: this.session.getSessionId,
@@ -549,7 +554,7 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
549
554
  const timeoutClampNotice = formatTimeoutClampNotice(requestedTimeoutSec, timeoutSec);
550
555
 
551
556
  if (asyncRequested) {
552
- if (!this.session.asyncJobManager) {
557
+ if (!AsyncJobManager.instance()) {
553
558
  throw new ToolError("Async job manager unavailable for this session.");
554
559
  }
555
560
  const job = this.#startManagedBashJob({
@@ -570,7 +575,8 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
570
575
  });
571
576
  }
572
577
 
573
- if (this.#autoBackgroundEnabled && !pty && this.session.asyncJobManager) {
578
+ const autoBgManager = AsyncJobManager.instance();
579
+ if (this.#autoBackgroundEnabled && !pty && autoBgManager) {
574
580
  const autoBackgroundWaitMs = this.#resolveAutoBackgroundWaitMs(timeoutMs);
575
581
  const startBackgrounded = autoBackgroundWaitMs === 0;
576
582
  const job = this.#startManagedBashJob({
@@ -593,16 +599,16 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
593
599
  }
594
600
  const waitResult = await this.#waitForManagedBashJob(job, autoBackgroundWaitMs, signal);
595
601
  if (waitResult.kind === "completed") {
596
- this.session.asyncJobManager.acknowledgeDeliveries([job.jobId]);
602
+ autoBgManager.acknowledgeDeliveries([job.jobId]);
597
603
  return waitResult.result;
598
604
  }
599
605
  if (waitResult.kind === "failed") {
600
- this.session.asyncJobManager.acknowledgeDeliveries([job.jobId]);
606
+ autoBgManager.acknowledgeDeliveries([job.jobId]);
601
607
  throw waitResult.error;
602
608
  }
603
609
  if (waitResult.kind === "aborted") {
604
- this.session.asyncJobManager.cancel(job.jobId);
605
- this.session.asyncJobManager.acknowledgeDeliveries([job.jobId]);
610
+ autoBgManager.cancel(job.jobId);
611
+ autoBgManager.acknowledgeDeliveries([job.jobId]);
606
612
  throw new ToolAbortError(job.getLatestText() || "Command aborted");
607
613
  }
608
614
  job.setBackgrounded(true);
@@ -59,10 +59,13 @@ export type WorkerInitPayload =
59
59
  dialogs?: "accept" | "dismiss";
60
60
  };
61
61
 
62
+ export type ToolReply = { ok: true; value: unknown } | { ok: false; error: RunErrorPayload };
63
+
62
64
  export type WorkerInbound =
63
65
  | { type: "init"; payload: WorkerInitPayload }
64
66
  | { type: "run"; id: string; name: string; code: string; timeoutMs: number; session: SessionSnapshot }
65
67
  | { type: "abort"; id: string }
68
+ | { type: "tool-reply"; id: string; reply: ToolReply }
66
69
  | { type: "close" };
67
70
 
68
71
  export interface ReadyInfo {
@@ -91,6 +94,7 @@ export type WorkerOutbound =
91
94
  | { type: "init-failed"; error: RunErrorPayload }
92
95
  | { type: "result"; id: string; ok: true; payload: RunResultOk }
93
96
  | { type: "result"; id: string; ok: false; error: RunErrorPayload }
97
+ | { type: "tool-call"; id: string; runId: string; name: string; args: unknown }
94
98
  | { type: "log"; level: "debug" | "warn" | "error"; msg: string; meta?: Record<string, unknown> }
95
99
  | { type: "closed" };
96
100
 
@@ -1,5 +1,6 @@
1
1
  import { getPuppeteerDir, logger, Snowflake } from "@oh-my-pi/pi-utils";
2
2
  import type { Page, Target } from "puppeteer-core";
3
+ import { callSessionTool } from "../../eval/js/tool-bridge";
3
4
  import type { ToolSession } from "../../sdk";
4
5
  import { expandPath } from "../path-utils";
5
6
  import { ToolAbortError, ToolError } from "../tool-errors";
@@ -16,6 +17,17 @@ import type {
16
17
  WorkerInitPayload,
17
18
  WorkerOutbound,
18
19
  } from "./tab-protocol";
20
+ // Imported with `type: "file"` so Bun's bundler statically discovers the
21
+ // worker entry and embeds it inside `bun build --compile` single-file
22
+ // binaries. Without this attribute the bundler cannot reach the entry through
23
+ // a `new URL(..., import.meta.url)` literal stored in a local variable, and
24
+ // the prebuilt binary surfaces `Timed out initializing browser tab worker`
25
+ // (issue #1011) because `/$bunfs/root/tab-worker-entry.ts` is missing.
26
+ // tsgo doesn't recognize Bun's `with { type: "file" }` attribute and treats
27
+ // this as a normal TS source import, raising TS1192/TS5097. Bun's bundler
28
+ // (and runtime) honors the attribute and returns the embedded file URL.
29
+ // @ts-expect-error -- Bun file-URL import (see comment above).
30
+ import tabWorkerEntryUrl from "./tab-worker-entry.ts" with { type: "file" };
19
31
 
20
32
  interface WorkerHandle {
21
33
  send(msg: WorkerInbound, transferList?: Transferable[]): void;
@@ -26,6 +38,14 @@ interface WorkerHandle {
26
38
 
27
39
  export type DialogPolicy = "accept" | "dismiss";
28
40
 
41
+ export interface PendingRun {
42
+ resolve(result: RunResultOk): void;
43
+ reject(error: unknown): void;
44
+ session: ToolSession;
45
+ signal?: AbortSignal;
46
+ toolCalls: Map<string, AbortController>;
47
+ }
48
+
29
49
  export interface TabSession {
30
50
  name: string;
31
51
  browser: BrowserHandle;
@@ -33,7 +53,7 @@ export interface TabSession {
33
53
  worker: WorkerHandle;
34
54
  state: "alive" | "dead";
35
55
  info: ReadyInfo;
36
- pending: Map<string, { resolve: (result: RunResultOk) => void; reject: (error: unknown) => void }>;
56
+ pending: Map<string, PendingRun>;
37
57
  dialogPolicy?: DialogPolicy;
38
58
  kindTag: BrowserKindTag;
39
59
  }
@@ -144,14 +164,14 @@ export async function acquireTab(
144
164
  export async function runInTab(name: string, opts: RunInTabOptions): Promise<RunResultOk> {
145
165
  return await runInTabWithSnapshot(
146
166
  name,
147
- { code: opts.code, timeoutMs: opts.timeoutMs, signal: opts.signal },
167
+ { code: opts.code, timeoutMs: opts.timeoutMs, signal: opts.signal, session: opts.session },
148
168
  { cwd: opts.session.cwd, browserScreenshotDir: expandBrowserScreenshotDir(opts.session) },
149
169
  );
150
170
  }
151
171
 
152
172
  async function runInTabWithSnapshot(
153
173
  name: string,
154
- opts: { code: string; timeoutMs: number; signal?: AbortSignal },
174
+ opts: { code: string; timeoutMs: number; signal?: AbortSignal; session?: ToolSession },
155
175
  snapshot: SessionSnapshot,
156
176
  ): Promise<RunResultOk> {
157
177
  const tab = tabs.get(name);
@@ -159,8 +179,18 @@ async function runInTabWithSnapshot(
159
179
  if (tab.pending.size > 0) throw new ToolError(`Tab ${JSON.stringify(name)} is busy`);
160
180
  const id = Snowflake.next();
161
181
  const { promise, resolve, reject } = Promise.withResolvers<RunResultOk>();
162
- tab.pending.set(id, { resolve, reject });
163
- const abort = (): void => tab.worker.send({ type: "abort", id });
182
+ const pending: PendingRun = {
183
+ resolve,
184
+ reject,
185
+ session: opts.session ?? ({} as ToolSession),
186
+ signal: opts.signal,
187
+ toolCalls: new Map(),
188
+ };
189
+ tab.pending.set(id, pending);
190
+ const abort = (): void => {
191
+ tab.worker.send({ type: "abort", id });
192
+ for (const ctrl of pending.toolCalls.values()) ctrl.abort(opts.signal?.reason);
193
+ };
164
194
  if (opts.signal?.aborted) abort();
165
195
  else opts.signal?.addEventListener("abort", abort, { once: true });
166
196
  try {
@@ -266,9 +296,71 @@ function handleTabMessage(tab: TabSession, msg: WorkerOutbound): void {
266
296
  tab.info = msg.info;
267
297
  return;
268
298
  }
299
+ if (msg.type === "tool-call") {
300
+ void dispatchToolCall(tab, msg);
301
+ return;
302
+ }
269
303
  if (msg.type === "log") logWorkerMessage(msg);
270
304
  }
271
305
 
306
+ async function dispatchToolCall(tab: TabSession, msg: Extract<WorkerOutbound, { type: "tool-call" }>): Promise<void> {
307
+ const pending = tab.pending.get(msg.runId);
308
+ if (!pending?.session.cwd) {
309
+ safeSend(tab, {
310
+ type: "tool-reply",
311
+ id: msg.id,
312
+ reply: {
313
+ ok: false,
314
+ error: { name: "ToolError", message: "No active run for tool call", isToolError: true, isAbort: false },
315
+ },
316
+ });
317
+ return;
318
+ }
319
+ const ctrl = new AbortController();
320
+ pending.toolCalls.set(msg.id, ctrl);
321
+ const onParentAbort = (): void => ctrl.abort(pending.signal?.reason);
322
+ if (pending.signal?.aborted) onParentAbort();
323
+ else pending.signal?.addEventListener("abort", onParentAbort, { once: true });
324
+ try {
325
+ const value = await callSessionTool(msg.name, msg.args, {
326
+ session: pending.session,
327
+ signal: ctrl.signal,
328
+ emitStatus: () => {
329
+ // Status events from tool calls aren't piped back to user code yet; the worker
330
+ // already pushes its own helper status via the display channel.
331
+ },
332
+ });
333
+ safeSend(tab, { type: "tool-reply", id: msg.id, reply: { ok: true, value } });
334
+ } catch (error) {
335
+ safeSend(tab, { type: "tool-reply", id: msg.id, reply: { ok: false, error: toErrorPayload(error) } });
336
+ } finally {
337
+ pending.toolCalls.delete(msg.id);
338
+ pending.signal?.removeEventListener("abort", onParentAbort);
339
+ }
340
+ }
341
+
342
+ function safeSend(tab: TabSession, msg: WorkerInbound): void {
343
+ if (tab.state !== "alive") return;
344
+ try {
345
+ tab.worker.send(msg);
346
+ } catch (err) {
347
+ logger.debug("tab worker send failed", { error: err instanceof Error ? err.message : String(err) });
348
+ }
349
+ }
350
+
351
+ function toErrorPayload(error: unknown): RunErrorPayload {
352
+ if (error instanceof Error) {
353
+ return {
354
+ name: error.name,
355
+ message: error.message,
356
+ stack: error.stack,
357
+ isAbort: error.name === "AbortError" || error.name === "ToolAbortError",
358
+ isToolError: error instanceof ToolError || error.name === "ToolError",
359
+ };
360
+ }
361
+ return { name: "Error", message: String(error), isAbort: false, isToolError: false };
362
+ }
363
+
272
364
  async function forceKillTab(name: string, reason: string): Promise<void> {
273
365
  const tab = tabs.get(name);
274
366
  if (!tab) return;
@@ -364,8 +456,7 @@ async function raceWithTimeout<T>(
364
456
 
365
457
  async function spawnTabWorker(): Promise<WorkerHandle> {
366
458
  try {
367
- const url = new URL("./tab-worker-entry.ts", import.meta.url);
368
- const worker = new Worker(url.href, { type: "module" });
459
+ const worker = new Worker(tabWorkerEntryUrl, { type: "module" });
369
460
  return wrapBunWorker(worker);
370
461
  } catch (err) {
371
462
  logger.warn("Bun Worker spawn failed; using inline tab worker (no sync-loop guard)", {
@@ -1,7 +1,7 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as os from "node:os";
3
3
  import * as path from "node:path";
4
- import * as vm from "node:vm";
4
+
5
5
  import { Snowflake, untilAborted } from "@oh-my-pi/pi-utils";
6
6
  import type { HTMLElement } from "linkedom";
7
7
  import type {
@@ -14,6 +14,8 @@ import type {
14
14
  SerializedAXNode,
15
15
  Target,
16
16
  } from "puppeteer-core";
17
+ import { JsRuntime, type RuntimeHooks } from "../../eval/js/shared/runtime";
18
+ import type { JsDisplayOutput } from "../../eval/js/shared/types";
17
19
  import { resizeImage } from "../../utils/image-resize";
18
20
  import { resolveToCwd } from "../path-utils";
19
21
  import { formatScreenshot } from "../render-utils";
@@ -34,6 +36,7 @@ import type {
34
36
  RunResultOk,
35
37
  ScreenshotResult,
36
38
  SessionSnapshot,
39
+ ToolReply,
37
40
  Transport,
38
41
  WorkerInbound,
39
42
  WorkerInitPayload,
@@ -177,6 +180,27 @@ function errorPayload(error: unknown): RunErrorPayload {
177
180
  return { name: "Error", message: String(error), isToolError: false, isAbort: false };
178
181
  }
179
182
 
183
+ function safeJsonStringify(value: unknown): string {
184
+ try {
185
+ return JSON.stringify(value, null, 2);
186
+ } catch {
187
+ return String(value);
188
+ }
189
+ }
190
+
191
+ function replyError(payload: RunErrorPayload): Error {
192
+ if (payload.isAbort) {
193
+ const err = new ToolAbortError(payload.message || "Tool call aborted");
194
+ if (payload.stack) err.stack = payload.stack;
195
+ return err;
196
+ }
197
+ const Ctor = payload.isToolError ? ToolError : Error;
198
+ const err = new Ctor(payload.message);
199
+ if (payload.name) err.name = payload.name;
200
+ if (payload.stack) err.stack = payload.stack;
201
+ return err;
202
+ }
203
+
180
204
  async function targetIdForTarget(target: Target): Promise<string> {
181
205
  const raw = target as unknown as { _targetId?: unknown };
182
206
  if (typeof raw._targetId === "string") return raw._targetId;
@@ -361,6 +385,14 @@ async function clickQueryHandlerText(
361
385
  );
362
386
  }
363
387
 
388
+ interface ActiveRun {
389
+ id: string;
390
+ ac: AbortController;
391
+ displays: RunResultOk["displays"];
392
+ screenshots: ScreenshotResult[];
393
+ pendingTools: Map<string, { resolve(value: unknown): void; reject(error: Error): void }>;
394
+ }
395
+
364
396
  export class WorkerCore {
365
397
  #transport: Transport;
366
398
  #browser?: Browser;
@@ -368,7 +400,8 @@ export class WorkerCore {
368
400
  #targetId?: string;
369
401
  #elementCache = new Map<number, ElementHandle>();
370
402
  #elementCounter = 0;
371
- #active?: { id: string; ac: AbortController };
403
+ #active: ActiveRun | null = null;
404
+ #runtime: JsRuntime | null = null;
372
405
  #unsub: () => void;
373
406
  #mode?: WorkerInitPayload["mode"];
374
407
  #dialogPolicy?: DialogPolicy;
@@ -401,6 +434,9 @@ export class WorkerCore {
401
434
  case "abort":
402
435
  if (this.#active?.id === msg.id) this.#active.ac.abort(new ToolAbortError());
403
436
  return;
437
+ case "tool-reply":
438
+ this.#deliverToolReply(msg.id, msg.reply);
439
+ return;
404
440
  case "close":
405
441
  await this.#close();
406
442
  return;
@@ -502,37 +538,26 @@ export class WorkerCore {
502
538
  const timeoutSignal = AbortSignal.timeout(msg.timeoutMs);
503
539
  const ac = new AbortController();
504
540
  const signal = AbortSignal.any([timeoutSignal, ac.signal]);
505
- this.#active = { id: msg.id, ac };
506
541
  const displays: RunResultOk["displays"] = [];
507
542
  const screenshots: ScreenshotResult[] = [];
543
+ const active: ActiveRun = { id: msg.id, ac, displays, screenshots, pendingTools: new Map() };
544
+ this.#active = active;
508
545
  try {
509
546
  throwIfAborted(signal);
510
547
  const page = this.#requirePage();
511
548
  const browser = this.#requireBrowser();
512
549
  const tabApi = this.#createTabApi(msg.name, msg.timeoutMs, signal, msg.session, displays, screenshots);
513
- const ctx = vm.createContext({
550
+ const runtime = this.#ensureRuntime(msg.session);
551
+ runtime.setCwd(msg.session.cwd);
552
+ runtime.setRunScope({
514
553
  page,
515
554
  browser,
516
555
  tab: tabApi,
517
- display: (value: unknown): void => this.#display(displays, value),
518
556
  assert: (cond: unknown, text?: string): void => {
519
557
  if (!cond) throw new ToolError(text ?? "Assertion failed");
520
558
  },
521
559
  wait: (ms: number): Promise<void> => Bun.sleep(ms),
522
- console: this.#console(),
523
- setTimeout,
524
- clearTimeout,
525
- setInterval,
526
- clearInterval,
527
- queueMicrotask,
528
- Promise,
529
- URL,
530
- URLSearchParams,
531
- TextEncoder,
532
- TextDecoder,
533
- Buffer,
534
560
  });
535
- const wrapped = `(async () => {\n${msg.code}\n})()`;
536
561
  const { promise: cancelRejection, reject: rejectCancel } = Promise.withResolvers<never>();
537
562
  const onCancel = (): void => {
538
563
  rejectCancel(
@@ -540,15 +565,17 @@ export class WorkerCore {
540
565
  ? new ToolError(`Browser code execution timed out after ${msg.timeoutMs}ms`)
541
566
  : new ToolAbortError(),
542
567
  );
568
+ // Cancel in-flight tool calls so user code's awaited proxies reject promptly.
569
+ for (const pending of active.pendingTools.values()) {
570
+ pending.reject(new ToolAbortError());
571
+ }
572
+ active.pendingTools.clear();
543
573
  };
544
574
  if (signal.aborted) onCancel();
545
575
  else signal.addEventListener("abort", onCancel, { once: true });
546
576
  try {
547
577
  const returnValue = await Promise.race([
548
- vm.runInContext(wrapped, ctx, {
549
- filename: `browser-run-${msg.id}.js`,
550
- lineOffset: -1,
551
- }) as Promise<unknown>,
578
+ runtime.run(msg.code, `browser-run-${msg.id}.js`),
552
579
  cancelRejection,
553
580
  ]);
554
581
  await this.#postReadyInfo();
@@ -564,8 +591,62 @@ export class WorkerCore {
564
591
  } catch (error) {
565
592
  this.#transport.send({ type: "result", id: msg.id, ok: false, error: errorPayload(error) });
566
593
  } finally {
567
- if (this.#active?.id === msg.id) this.#active = undefined;
594
+ if (this.#active?.id === msg.id) this.#active = null;
595
+ }
596
+ }
597
+
598
+ #ensureRuntime(session: SessionSnapshot): JsRuntime {
599
+ if (this.#runtime) return this.#runtime;
600
+ this.#runtime = new JsRuntime({
601
+ initialCwd: session.cwd,
602
+ sessionId: `browser-tab-${this.#targetId ?? "unknown"}`,
603
+ getHooks: () => this.#hooksForActiveRun(),
604
+ });
605
+ return this.#runtime;
606
+ }
607
+
608
+ #hooksForActiveRun(): RuntimeHooks | null {
609
+ const active = this.#active;
610
+ if (!active) return null;
611
+ return {
612
+ // console.* output stays on the supervisor log channel — matches pre-runtime behavior
613
+ // where browser cells didn't surface `console.log` to the model.
614
+ onText: chunk => this.#log("debug", chunk.replace(/\n$/, "")),
615
+ onDisplay: output => this.#pushDisplay(active.displays, output),
616
+ callTool: (name, args) => this.#callTool(active, name, args),
617
+ };
618
+ }
619
+
620
+ #pushDisplay(displays: RunResultOk["displays"], output: JsDisplayOutput): void {
621
+ if (output.type === "image") {
622
+ displays.push({ type: "image", data: output.data, mimeType: output.mimeType });
623
+ return;
624
+ }
625
+ if (output.type === "json") {
626
+ displays.push({ type: "text", text: safeJsonStringify(output.data) });
627
+ return;
568
628
  }
629
+ // status — surface as compact JSON so helper side effects (read/write/tree) appear in
630
+ // the cell result alongside explicit display() output.
631
+ displays.push({ type: "text", text: safeJsonStringify(output.event) });
632
+ }
633
+
634
+ async #callTool(active: ActiveRun, name: string, args: unknown): Promise<unknown> {
635
+ const id = `tab-tc-${active.id}-${crypto.randomUUID()}`;
636
+ const { promise, resolve, reject } = Promise.withResolvers<unknown>();
637
+ active.pendingTools.set(id, { resolve, reject });
638
+ this.#transport.send({ type: "tool-call", id, runId: active.id, name, args });
639
+ return await promise;
640
+ }
641
+
642
+ #deliverToolReply(id: string, reply: ToolReply): void {
643
+ const active = this.#active;
644
+ if (!active) return;
645
+ const pending = active.pendingTools.get(id);
646
+ if (!pending) return;
647
+ active.pendingTools.delete(id);
648
+ if (reply.ok) pending.resolve(reply.value);
649
+ else pending.reject(replyError(reply.error));
569
650
  }
570
651
 
571
652
  #createTabApi(
@@ -933,41 +1014,6 @@ export class WorkerCore {
933
1014
  }
934
1015
  return handle;
935
1016
  }
936
-
937
- #display(displays: RunResultOk["displays"], value: unknown): void {
938
- if (value === undefined || value === null) return;
939
- if (
940
- typeof value === "object" &&
941
- value !== null &&
942
- "type" in (value as Record<string, unknown>) &&
943
- (value as { type?: unknown }).type === "image"
944
- ) {
945
- const img = value as { data?: unknown; mimeType?: unknown };
946
- if (typeof img.data === "string" && typeof img.mimeType === "string") {
947
- displays.push({ type: "image", data: img.data, mimeType: img.mimeType });
948
- return;
949
- }
950
- }
951
- if (typeof value === "string") {
952
- displays.push({ type: "text", text: value });
953
- return;
954
- }
955
- try {
956
- displays.push({ type: "text", text: JSON.stringify(value, null, 2) });
957
- } catch {
958
- displays.push({ type: "text", text: String(value) });
959
- }
960
- }
961
-
962
- #console(): Pick<Console, "log" | "debug" | "warn" | "error"> {
963
- return {
964
- log: (...args: unknown[]) => this.#log("debug", args.map(String).join(" ")),
965
- debug: (...args: unknown[]) => this.#log("debug", args.map(String).join(" ")),
966
- warn: (...args: unknown[]) => this.#log("warn", args.map(String).join(" ")),
967
- error: (...args: unknown[]) => this.#log("error", args.map(String).join(" ")),
968
- };
969
- }
970
-
971
1017
  #clearElementCache(): void {
972
1018
  if (this.#elementCache.size === 0) {
973
1019
  this.#elementCounter = 0;
package/src/tools/eval.ts CHANGED
@@ -8,7 +8,7 @@ import { jsBackend, parseEvalInput, pythonBackend, sniffEvalLanguage } from "../
8
8
  import type { ExecutorBackend } from "../eval/backend";
9
9
  import evalGrammar from "../eval/eval.lark" with { type: "text" };
10
10
  import { ABORT_WARNING, type ParsedEvalCell } from "../eval/parse";
11
- import type { EvalCellResult, EvalLanguage, EvalStatusEvent, EvalToolDetails } from "../eval/types";
11
+ import type { EvalCellResult, EvalDisplayOutput, EvalLanguage, EvalStatusEvent, EvalToolDetails } from "../eval/types";
12
12
  import type { RenderResultOptions } from "../extensibility/custom-tools/types";
13
13
  import { truncateToVisualLines } from "../modes/components/visual-truncate";
14
14
  import { getMarkdownTheme, type Theme } from "../modes/theme/theme";
@@ -26,7 +26,7 @@ export const EVAL_DEFAULT_PREVIEW_LINES = 10;
26
26
 
27
27
  export const evalSchema = Type.Object({
28
28
  input: Type.String({
29
- description: "eval input as a sequence of `*** Begin <LANG>` cell headers followed by code",
29
+ description: 'eval input as a sequence of `*** Cell <lang>:"title"` cell headers followed by code',
30
30
  }),
31
31
  });
32
32
  export type EvalToolParams = Static<typeof evalSchema>;
@@ -47,6 +47,38 @@ function formatJsonScalar(value: unknown): string {
47
47
  return "[object]";
48
48
  }
49
49
 
50
+ /** Cap per `display()` value sent back to the model. */
51
+ const MAX_DISPLAY_TEXT_BYTES = 8000;
52
+
53
+ function formatDisplayJsonForText(value: unknown): string {
54
+ let text: string;
55
+ try {
56
+ text = JSON.stringify(value, null, 2) ?? String(value);
57
+ } catch {
58
+ text = String(value);
59
+ }
60
+ if (text.length > MAX_DISPLAY_TEXT_BYTES) {
61
+ text = `${text.slice(0, MAX_DISPLAY_TEXT_BYTES)}\n… (${text.length - MAX_DISPLAY_TEXT_BYTES} chars truncated)`;
62
+ }
63
+ return text;
64
+ }
65
+
66
+ /**
67
+ * Format display() JSON values into text the model can see. Images are surfaced
68
+ * separately as ImageContent so the model can actually inspect them; this helper
69
+ * intentionally does not touch images.
70
+ */
71
+ function formatDisplayOutputsForText(outputs: EvalDisplayOutput[]): string {
72
+ const chunks: string[] = [];
73
+ let displayIndex = 0;
74
+ for (const output of outputs) {
75
+ if (output.type !== "json") continue;
76
+ displayIndex++;
77
+ chunks.push(`display[${displayIndex}]:\n${formatDisplayJsonForText(output.data)}`);
78
+ }
79
+ return chunks.join("\n\n");
80
+ }
81
+
50
82
  function renderJsonTree(value: unknown, theme: Theme, expanded: boolean, maxDepth = expanded ? 6 : 2): string[] {
51
83
  const maxItems = expanded ? 20 : 5;
52
84
 
@@ -370,13 +402,16 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
370
402
  const durationMs = Date.now() - startTime;
371
403
 
372
404
  const cellStatusEvents: EvalStatusEvent[] = [];
405
+ const cellDisplayOutputs: EvalDisplayOutput[] = [];
373
406
  let cellHasMarkdown = false;
374
407
  for (const output of result.displayOutputs) {
375
408
  if (output.type === "json") {
376
409
  jsonOutputs.push(output.data);
410
+ cellDisplayOutputs.push(output);
377
411
  }
378
412
  if (output.type === "image") {
379
413
  images.push({ type: "image", data: output.data, mimeType: output.mimeType });
414
+ cellDisplayOutputs.push(output);
380
415
  }
381
416
  if (output.type === "status") {
382
417
  statusEvents.push(output.event);
@@ -387,7 +422,10 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
387
422
  }
388
423
  }
389
424
 
390
- const cellOutput = result.output.trim();
425
+ const stdoutTrimmed = result.output.trim();
426
+ const displayText = formatDisplayOutputsForText(cellDisplayOutputs);
427
+ const cellOutput =
428
+ stdoutTrimmed && displayText ? `${stdoutTrimmed}\n\n${displayText}` : stdoutTrimmed || displayText;
391
429
  cellResult.output = cellOutput;
392
430
  cellResult.exitCode = result.exitCode;
393
431
  cellResult.durationMs = durationMs;
@@ -431,14 +469,13 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
431
469
  languages,
432
470
  cells: cellResults,
433
471
  jsonOutputs: jsonOutputs.length > 0 ? jsonOutputs : undefined,
434
- images: images.length > 0 ? images : undefined,
435
472
  statusEvents: statusEvents.length > 0 ? statusEvents : undefined,
436
473
  isError: true,
437
474
  };
438
475
  if (notice) details.notice = notice;
439
476
 
440
477
  return toolResult(details)
441
- .text(outputText)
478
+ .content([{ type: "text", text: outputText }, ...images])
442
479
  .truncationFromSummary(summaryForMeta, { direction: "tail" })
443
480
  .done();
444
481
  }
@@ -461,14 +498,13 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
461
498
  languages,
462
499
  cells: cellResults,
463
500
  jsonOutputs: jsonOutputs.length > 0 ? jsonOutputs : undefined,
464
- images: images.length > 0 ? images : undefined,
465
501
  statusEvents: statusEvents.length > 0 ? statusEvents : undefined,
466
502
  isError: true,
467
503
  };
468
504
  if (notice) details.notice = notice;
469
505
 
470
506
  return toolResult(details)
471
- .text(outputText)
507
+ .content([{ type: "text", text: outputText }, ...images])
472
508
  .truncationFromSummary(summaryForMeta, { direction: "tail" })
473
509
  .done();
474
510
  }
@@ -479,9 +515,12 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
479
515
 
480
516
  const combinedOutput = cellOutputs.join("\n\n");
481
517
  const abortSuffix = parsedInput.aborted ? `\n\n${ABORT_WARNING}` : "";
518
+ const hasImages = images.length > 0;
482
519
  const outputText =
483
- (combinedOutput || (jsonOutputs.length > 0 || images.length > 0 ? "(no text output)" : "(no output)")) +
484
- abortSuffix;
520
+ (combinedOutput ||
521
+ (hasImages
522
+ ? `(displayed ${images.length} image${images.length === 1 ? "" : "s"}; no text output)`
523
+ : "(no output)")) + abortSuffix;
485
524
  const summaryForMeta = await summarizeFinal(combinedOutput, finalizeOutput);
486
525
 
487
526
  const details: EvalToolDetails = {
@@ -489,13 +528,12 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
489
528
  languages,
490
529
  cells: cellResults,
491
530
  jsonOutputs: jsonOutputs.length > 0 ? jsonOutputs : undefined,
492
- images: images.length > 0 ? images : undefined,
493
531
  statusEvents: statusEvents.length > 0 ? statusEvents : undefined,
494
532
  };
495
533
  if (notice) details.notice = notice;
496
534
 
497
535
  return toolResult(details)
498
- .text(outputText)
536
+ .content([{ type: "text", text: outputText }, ...images])
499
537
  .truncationFromSummary(summaryForMeta, { direction: "tail" })
500
538
  .done();
501
539
  } finally {
@@ -1352,7 +1352,7 @@ export function renderReadUrlCall(
1352
1352
  ): Component {
1353
1353
  const url = args.path ?? args.url ?? "";
1354
1354
  const domain = getDomain(url);
1355
- const path = truncate(url.replace(/^https?:\/\/[^/]+/, ""), 50, "\u2026");
1355
+ const path = truncate(url.replace(/^https?:\/\/[^/]+/, ""), 50, "");
1356
1356
  const description = `${domain}${path ? ` ${path}` : ""}`.trim();
1357
1357
  const meta: string[] = [];
1358
1358
  if (args.raw) meta.push("raw");