@oh-my-pi/pi-coding-agent 14.9.5 → 14.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +52 -0
  2. package/package.json +7 -7
  3. package/src/cli/setup-cli.ts +14 -161
  4. package/src/cli/stats-cli.ts +56 -2
  5. package/src/cli.ts +0 -1
  6. package/src/config/settings-schema.ts +0 -10
  7. package/src/eval/eval.lark +30 -10
  8. package/src/eval/js/context-manager.ts +334 -564
  9. package/src/eval/js/shared/helpers.ts +237 -0
  10. package/src/eval/js/shared/indirect-eval.ts +30 -0
  11. package/src/eval/js/shared/rewrite-imports.ts +211 -0
  12. package/src/eval/js/shared/runtime.ts +168 -0
  13. package/src/eval/js/shared/types.ts +18 -0
  14. package/src/eval/js/tool-bridge.ts +2 -4
  15. package/src/eval/js/worker-core.ts +146 -0
  16. package/src/eval/js/worker-entry.ts +24 -0
  17. package/src/eval/js/worker-protocol.ts +41 -0
  18. package/src/eval/parse.ts +218 -49
  19. package/src/eval/py/display.ts +71 -0
  20. package/src/eval/py/executor.ts +74 -89
  21. package/src/eval/py/index.ts +1 -2
  22. package/src/eval/py/kernel.ts +472 -900
  23. package/src/eval/py/prelude.py +95 -7
  24. package/src/eval/py/runner.py +879 -0
  25. package/src/eval/py/runtime.ts +3 -16
  26. package/src/eval/py/tool-bridge.ts +137 -0
  27. package/src/export/html/template.generated.ts +1 -1
  28. package/src/export/html/template.js +93 -5
  29. package/src/internal-urls/docs-index.generated.ts +3 -3
  30. package/src/modes/controllers/command-controller.ts +0 -23
  31. package/src/prompts/tools/eval.md +14 -27
  32. package/src/session/agent-session.ts +0 -1
  33. package/src/session/history-storage.ts +77 -19
  34. package/src/tools/browser/tab-protocol.ts +4 -0
  35. package/src/tools/browser/tab-supervisor.ts +86 -5
  36. package/src/tools/browser/tab-worker.ts +104 -58
  37. package/src/tools/eval.ts +1 -1
  38. package/src/web/search/index.ts +6 -4
  39. package/src/cli/jupyter-cli.ts +0 -106
  40. package/src/commands/jupyter.ts +0 -32
  41. package/src/eval/py/cancellation.ts +0 -28
  42. package/src/eval/py/gateway-coordinator.ts +0 -424
  43. /package/src/eval/js/{prelude.ts → shared/prelude.ts} +0 -0
  44. /package/src/eval/js/{prelude.txt → shared/prelude.txt} +0 -0
@@ -14,7 +14,6 @@ import { formatDuration, Snowflake, setProjectDir } from "@oh-my-pi/pi-utils";
14
14
  import { $ } from "bun";
15
15
  import { reset as resetCapabilities } from "../../capability";
16
16
  import { clearClaudePluginRootsCache } from "../../discovery/helpers";
17
- import { getGatewayStatus } from "../../eval/py/gateway-coordinator";
18
17
  import { loadCustomShare } from "../../export/custom-share";
19
18
  import type { CompactOptions } from "../../extensibility/extensions/types";
20
19
  import {
@@ -402,28 +401,6 @@ export class CommandController {
402
401
  }
403
402
  }
404
403
 
405
- const gateway = await getGatewayStatus();
406
- info += `\n${theme.bold("Python Gateway")}\n`;
407
- if (gateway.active) {
408
- info += `${theme.fg("dim", "Status:")} ${theme.fg("success", "Active (Global)")}\n`;
409
- info += `${theme.fg("dim", "URL:")} ${gateway.url}\n`;
410
- info += `${theme.fg("dim", "PID:")} ${gateway.pid}\n`;
411
- if (gateway.pythonPath) {
412
- info += `${theme.fg("dim", "Python:")} ${gateway.pythonPath}\n`;
413
- }
414
- if (gateway.venvPath) {
415
- info += `${theme.fg("dim", "Venv:")} ${gateway.venvPath}\n`;
416
- }
417
- if (gateway.uptime !== null) {
418
- const uptimeSec = Math.floor(gateway.uptime / 1000);
419
- const mins = Math.floor(uptimeSec / 60);
420
- const secs = uptimeSec % 60;
421
- info += `${theme.fg("dim", "Uptime:")} ${mins}m ${secs}s\n`;
422
- }
423
- } else {
424
- info += `${theme.fg("dim", "Status:")} ${theme.fg("dim", "Inactive")}\n`;
425
- }
426
-
427
404
  if (this.ctx.lspServers && this.ctx.lspServers.length > 0) {
428
405
  info += `\n${theme.bold("LSP Servers")}\n`;
429
406
  for (const server of this.ctx.lspServers) {
@@ -1,23 +1,18 @@
1
1
  Run code in a persistent kernel using codeblock cells.
2
2
 
3
3
  <instruction>
4
- Each cell is wrapped between `*** Begin <LANG>` and `*** End <LANG>`:
4
+ Each cell starts with a single header line and runs until the next header (or end of input):
5
5
 
6
6
  ```
7
- *** Begin PY
8
- *** Title: optional title
9
- *** Timeout: 10s
10
- *** Reset
7
+ *** Cell py:"optional title" t:10s rst
11
8
  print("hi")
12
- *** End PY
13
9
  ```
14
10
 
15
- - **Language**: {{#if py}}`PY` for Python{{/if}}{{#ifAll py js}}, {{/ifAll}}{{#if js}}`JS` / `TS` for JavaScript{{/if}}. The opening `<LANG>` and closing `<LANG>` **MUST** match.
16
- - **Attributes** (optional, in any order, immediately after `*** Begin`):
17
- - `*** Title: …` — cell title shown in the UI.
18
- - `*** Timeout: <duration>` per-cell timeout. Digits with optional `ms` / `s` / `m` units (e.g. `500ms`, `15s`, `2m`). Default 30s.
19
- - `*** Reset` wipe this cell's own language kernel before running.{{#ifAll py js}} Other languages are untouched.{{/ifAll}}
20
- - Anything between the last attribute and `*** End <LANG>` is the cell's code, verbatim.
11
+ - **Language + title**: `<lang>:"<title>"` — {{#if py}}`py` for Python{{/if}}{{#ifAll py js}}, {{/ifAll}}{{#if js}}`js` for JavaScript{{/if}}. Title may be empty (`py:""`).
12
+ - **Attributes** (optional, in this order, after the language+title):
13
+ - `t:<duration>`per-cell timeout. Digits with optional `ms` / `s` / `m` units (e.g. `500ms`, `15s`, `2m`). Default 30s.
14
+ - `rst` wipe this cell's own language kernel before running.{{#ifAll py js}} Other languages are untouched.{{/ifAll}}
15
+ - Anything after the header line, up to the next `*** Cell` header, is the cell's code, verbatim.
21
16
  - Stack multiple cells back-to-back; blank lines between cells are ignored.
22
17
 
23
18
  **Work incrementally:**
@@ -50,40 +45,32 @@ env(key?=None, value?=None) → str | None | dict
50
45
  No args → full environment as dict. One arg → value of `key`. Two args → set `key=value` and return value.
51
46
  output(*ids, format?="raw", query?=None, offset?=None, limit?=None) → str | dict | list[dict]
52
47
  Read task/agent output by ID. Single id returns text/dict; multiple ids return a list.
48
+ tool.<name>(args) → unknown
49
+ Invoke any session tool by name. `args` is the tool's parameter object.
53
50
  ```
54
-
55
- {{#if js}}**JavaScript only:** `tool.<name>(args)` invokes any session tool directly (e.g. `await tool.read({ path: "src/foo.ts" })`).
56
- {{/if}}</prelude>
51
+ </prelude>
57
52
 
58
53
  <output>
59
54
  Cells render like a Jupyter notebook. `display(value)` renders non-presentable data as an interactive JSON tree. Presentable values (figures, images, dataframes, etc.) use their native representation.
60
55
  </output>
61
56
 
62
57
  <caution>
63
- - In session mode, use `*** Reset` on a cell to wipe its language's kernel before running.{{#ifAll py js}} Reset is per-language: a python cell's `*** Reset` does not touch the JavaScript kernel and vice versa.{{/ifAll}}
58
+ - In session mode, use `rst` on a cell to wipe its language's kernel before running.{{#ifAll py js}} Reset is per-language: a python cell's `rst` does not touch the JavaScript kernel and vice versa.{{/ifAll}}
64
59
  {{#if js}}- **js**: the VM exposes a selective `process` subset, Web APIs, `Buffer`, `fs/promises`, and the `Bun` global.
65
60
  {{/if}}</caution>
66
61
 
67
62
  <example>
68
- {{#if py}}*** Begin PY
69
- *** Title: imports
70
- *** Timeout: 10s
63
+ {{#if py}}*** Cell py:"imports" t:10s
71
64
  import json
72
65
  from pathlib import Path
73
- *** End PY
74
66
 
75
- *** Begin PY
76
- *** Title: load config
67
+ *** Cell py:"load config"
77
68
  data = json.loads(read('package.json'))
78
69
  display(data)
79
- *** End PY
80
70
  {{/if}}{{#ifAll py js}}
81
- {{/ifAll}}{{#if js}}*** Begin JS
82
- *** Title: js summary
83
- *** Reset
71
+ {{/ifAll}}{{#if js}}*** Cell js:"summary" rst
84
72
  const data = JSON.parse(await read('package.json'));
85
73
  display(data);
86
74
  return data.name;
87
- *** End JS
88
75
  {{/if}}
89
76
  </example>
@@ -6561,7 +6561,6 @@ export class AgentSession {
6561
6561
  sessionId,
6562
6562
  kernelOwnerId: this.#evalKernelOwnerId,
6563
6563
  kernelMode: this.settings.get("python.kernelMode"),
6564
- useSharedGateway: this.settings.get("python.sharedGateway"),
6565
6564
  onChunk,
6566
6565
  signal: abortController.signal,
6567
6566
  });
@@ -19,6 +19,12 @@ type HistoryRow = {
19
19
 
20
20
  const SQLITE_NOW_EPOCH = "CAST(strftime('%s','now') AS INTEGER)";
21
21
 
22
+ // Escape LIKE wildcards so user input is treated as literal text.
23
+ // Matches the `ESCAPE '\\'` clause used by substring-search statements.
24
+ function escapeLikePattern(text: string): string {
25
+ return text.replace(/[\\%_]/g, "\\$&");
26
+ }
27
+
22
28
  class AsyncDrain<T> {
23
29
  #queue?: T[];
24
30
  #promise = Promise.resolve();
@@ -63,6 +69,8 @@ export class HistoryStorage {
63
69
  #recentStmt: Statement;
64
70
  #searchStmt: Statement;
65
71
  #lastPromptStmt: Statement;
72
+ // Cache substring-fallback prepared statements keyed by token count.
73
+ #substringStmts = new Map<number, Statement>();
66
74
 
67
75
  // In-memory cache of last prompt to avoid sync DB reads on add
68
76
  #lastPromptCache: string | null = null;
@@ -167,16 +175,53 @@ CREATE TRIGGER IF NOT EXISTS history_ai AFTER INSERT ON history BEGIN
167
175
  const safeLimit = this.#normalizeLimit(limit);
168
176
  if (safeLimit === 0) return [];
169
177
 
170
- const ftsQuery = this.#buildFtsQuery(query);
171
- if (!ftsQuery) return [];
178
+ const tokens = this.#tokenize(query);
179
+ if (tokens.length === 0) return [];
172
180
 
181
+ // 1. FTS5 prefix match (token AND, prefix-wildcard per token).
182
+ // Handles punctuation by tokenizing query the same way unicode61 tokenizer
183
+ // indexed the stored text, so "git-commit" -> "git"* "commit"*.
184
+ const ftsQuery = tokens.map(tok => `"${tok.replace(/"/g, '""')}"*`).join(" ");
185
+ let ftsRows: HistoryRow[] = [];
173
186
  try {
174
- const rows = this.#searchStmt.all(ftsQuery, safeLimit) as HistoryRow[];
175
- return rows.map(row => this.#toEntry(row));
187
+ ftsRows = this.#searchStmt.all(ftsQuery, safeLimit) as HistoryRow[];
176
188
  } catch (error) {
177
- logger.error("HistoryStorage search failed", { error: String(error) });
178
- return [];
189
+ // Malformed FTS expression - fall through to substring path.
190
+ logger.debug("HistoryStorage FTS query failed, using substring only", { error: String(error) });
191
+ }
192
+
193
+ if (ftsRows.length >= safeLimit) {
194
+ return ftsRows.map(row => this.#toEntry(row));
195
+ }
196
+
197
+ // 2. Substring fallback (token-AND LIKE). Catches infix matches FTS5's
198
+ // prefix-only wildcard cannot reach (e.g. "mit" -> "commit"). Bounded
199
+ // by safeLimit, ordered by recency - no full-table load into JS.
200
+ let subRows: HistoryRow[] = [];
201
+ try {
202
+ subRows = this.#searchSubstring(tokens, safeLimit);
203
+ } catch (error) {
204
+ logger.error("HistoryStorage substring search failed", { error: String(error) });
205
+ }
206
+
207
+ if (ftsRows.length === 0) {
208
+ return subRows.map(row => this.#toEntry(row));
209
+ }
210
+
211
+ const seen = new Set<number>();
212
+ const merged: HistoryEntry[] = [];
213
+ for (const row of ftsRows) {
214
+ if (seen.has(row.id)) continue;
215
+ seen.add(row.id);
216
+ merged.push(this.#toEntry(row));
179
217
  }
218
+ for (const row of subRows) {
219
+ if (merged.length >= safeLimit) break;
220
+ if (seen.has(row.id)) continue;
221
+ seen.add(row.id);
222
+ merged.push(this.#toEntry(row));
223
+ }
224
+ return merged;
180
225
  }
181
226
 
182
227
  #ensureDir(dbPath: string): void {
@@ -225,21 +270,34 @@ END;
225
270
  return Math.min(clamped, 1000);
226
271
  }
227
272
 
228
- #buildFtsQuery(query: string): string | null {
229
- const tokens = query
230
- .trim()
231
- .split(/\s+/)
232
- .map(token => token.trim())
233
- .filter(Boolean);
273
+ /**
274
+ * Split on non-alphanumeric runs, mirroring FTS5's `unicode61` tokenizer so
275
+ * query tokens align with how stored prompts were indexed. Lowercases for
276
+ * stable substring matching.
277
+ */
278
+ #tokenize(query: string): string[] {
279
+ return query
280
+ .toLowerCase()
281
+ .split(/[^\p{L}\p{N}]+/u)
282
+ .filter(tok => tok.length > 0);
283
+ }
234
284
 
235
- if (tokens.length === 0) return null;
285
+ #searchSubstring(tokens: string[], limit: number): HistoryRow[] {
286
+ const stmt = this.#getSubstringStmt(tokens.length);
287
+ const params: unknown[] = tokens.map(tok => `%${escapeLikePattern(tok)}%`);
288
+ params.push(limit);
289
+ return stmt.all(...(params as [string, ...unknown[]])) as HistoryRow[];
290
+ }
236
291
 
237
- return tokens
238
- .map(token => {
239
- const escaped = token.replace(/"/g, '""');
240
- return `"${escaped}"*`;
241
- })
242
- .join(" ");
292
+ #getSubstringStmt(tokenCount: number): Statement {
293
+ let stmt = this.#substringStmts.get(tokenCount);
294
+ if (stmt) return stmt;
295
+ const whereClause = Array(tokenCount).fill("prompt LIKE ? ESCAPE '\\' COLLATE NOCASE").join(" AND ");
296
+ stmt = this.#db.prepare(
297
+ `SELECT id, prompt, created_at, cwd FROM history WHERE ${whereClause} ORDER BY created_at DESC, id DESC LIMIT ?`,
298
+ );
299
+ this.#substringStmts.set(tokenCount, stmt);
300
+ return stmt;
243
301
  }
244
302
 
245
303
  #toEntry(row: HistoryRow): HistoryEntry {
@@ -59,10 +59,13 @@ export type WorkerInitPayload =
59
59
  dialogs?: "accept" | "dismiss";
60
60
  };
61
61
 
62
+ export type ToolReply = { ok: true; value: unknown } | { ok: false; error: RunErrorPayload };
63
+
62
64
  export type WorkerInbound =
63
65
  | { type: "init"; payload: WorkerInitPayload }
64
66
  | { type: "run"; id: string; name: string; code: string; timeoutMs: number; session: SessionSnapshot }
65
67
  | { type: "abort"; id: string }
68
+ | { type: "tool-reply"; id: string; reply: ToolReply }
66
69
  | { type: "close" };
67
70
 
68
71
  export interface ReadyInfo {
@@ -91,6 +94,7 @@ export type WorkerOutbound =
91
94
  | { type: "init-failed"; error: RunErrorPayload }
92
95
  | { type: "result"; id: string; ok: true; payload: RunResultOk }
93
96
  | { type: "result"; id: string; ok: false; error: RunErrorPayload }
97
+ | { type: "tool-call"; id: string; runId: string; name: string; args: unknown }
94
98
  | { type: "log"; level: "debug" | "warn" | "error"; msg: string; meta?: Record<string, unknown> }
95
99
  | { type: "closed" };
96
100
 
@@ -1,5 +1,6 @@
1
1
  import { getPuppeteerDir, logger, Snowflake } from "@oh-my-pi/pi-utils";
2
2
  import type { Page, Target } from "puppeteer-core";
3
+ import { callSessionTool } from "../../eval/js/tool-bridge";
3
4
  import type { ToolSession } from "../../sdk";
4
5
  import { expandPath } from "../path-utils";
5
6
  import { ToolAbortError, ToolError } from "../tool-errors";
@@ -37,6 +38,14 @@ interface WorkerHandle {
37
38
 
38
39
  export type DialogPolicy = "accept" | "dismiss";
39
40
 
41
+ export interface PendingRun {
42
+ resolve(result: RunResultOk): void;
43
+ reject(error: unknown): void;
44
+ session: ToolSession;
45
+ signal?: AbortSignal;
46
+ toolCalls: Map<string, AbortController>;
47
+ }
48
+
40
49
  export interface TabSession {
41
50
  name: string;
42
51
  browser: BrowserHandle;
@@ -44,7 +53,7 @@ export interface TabSession {
44
53
  worker: WorkerHandle;
45
54
  state: "alive" | "dead";
46
55
  info: ReadyInfo;
47
- pending: Map<string, { resolve: (result: RunResultOk) => void; reject: (error: unknown) => void }>;
56
+ pending: Map<string, PendingRun>;
48
57
  dialogPolicy?: DialogPolicy;
49
58
  kindTag: BrowserKindTag;
50
59
  }
@@ -155,14 +164,14 @@ export async function acquireTab(
155
164
  export async function runInTab(name: string, opts: RunInTabOptions): Promise<RunResultOk> {
156
165
  return await runInTabWithSnapshot(
157
166
  name,
158
- { code: opts.code, timeoutMs: opts.timeoutMs, signal: opts.signal },
167
+ { code: opts.code, timeoutMs: opts.timeoutMs, signal: opts.signal, session: opts.session },
159
168
  { cwd: opts.session.cwd, browserScreenshotDir: expandBrowserScreenshotDir(opts.session) },
160
169
  );
161
170
  }
162
171
 
163
172
  async function runInTabWithSnapshot(
164
173
  name: string,
165
- opts: { code: string; timeoutMs: number; signal?: AbortSignal },
174
+ opts: { code: string; timeoutMs: number; signal?: AbortSignal; session?: ToolSession },
166
175
  snapshot: SessionSnapshot,
167
176
  ): Promise<RunResultOk> {
168
177
  const tab = tabs.get(name);
@@ -170,8 +179,18 @@ async function runInTabWithSnapshot(
170
179
  if (tab.pending.size > 0) throw new ToolError(`Tab ${JSON.stringify(name)} is busy`);
171
180
  const id = Snowflake.next();
172
181
  const { promise, resolve, reject } = Promise.withResolvers<RunResultOk>();
173
- tab.pending.set(id, { resolve, reject });
174
- const abort = (): void => tab.worker.send({ type: "abort", id });
182
+ const pending: PendingRun = {
183
+ resolve,
184
+ reject,
185
+ session: opts.session ?? ({} as ToolSession),
186
+ signal: opts.signal,
187
+ toolCalls: new Map(),
188
+ };
189
+ tab.pending.set(id, pending);
190
+ const abort = (): void => {
191
+ tab.worker.send({ type: "abort", id });
192
+ for (const ctrl of pending.toolCalls.values()) ctrl.abort(opts.signal?.reason);
193
+ };
175
194
  if (opts.signal?.aborted) abort();
176
195
  else opts.signal?.addEventListener("abort", abort, { once: true });
177
196
  try {
@@ -277,9 +296,71 @@ function handleTabMessage(tab: TabSession, msg: WorkerOutbound): void {
277
296
  tab.info = msg.info;
278
297
  return;
279
298
  }
299
+ if (msg.type === "tool-call") {
300
+ void dispatchToolCall(tab, msg);
301
+ return;
302
+ }
280
303
  if (msg.type === "log") logWorkerMessage(msg);
281
304
  }
282
305
 
306
+ async function dispatchToolCall(tab: TabSession, msg: Extract<WorkerOutbound, { type: "tool-call" }>): Promise<void> {
307
+ const pending = tab.pending.get(msg.runId);
308
+ if (!pending?.session.cwd) {
309
+ safeSend(tab, {
310
+ type: "tool-reply",
311
+ id: msg.id,
312
+ reply: {
313
+ ok: false,
314
+ error: { name: "ToolError", message: "No active run for tool call", isToolError: true, isAbort: false },
315
+ },
316
+ });
317
+ return;
318
+ }
319
+ const ctrl = new AbortController();
320
+ pending.toolCalls.set(msg.id, ctrl);
321
+ const onParentAbort = (): void => ctrl.abort(pending.signal?.reason);
322
+ if (pending.signal?.aborted) onParentAbort();
323
+ else pending.signal?.addEventListener("abort", onParentAbort, { once: true });
324
+ try {
325
+ const value = await callSessionTool(msg.name, msg.args, {
326
+ session: pending.session,
327
+ signal: ctrl.signal,
328
+ emitStatus: () => {
329
+ // Status events from tool calls aren't piped back to user code yet; the worker
330
+ // already pushes its own helper status via the display channel.
331
+ },
332
+ });
333
+ safeSend(tab, { type: "tool-reply", id: msg.id, reply: { ok: true, value } });
334
+ } catch (error) {
335
+ safeSend(tab, { type: "tool-reply", id: msg.id, reply: { ok: false, error: toErrorPayload(error) } });
336
+ } finally {
337
+ pending.toolCalls.delete(msg.id);
338
+ pending.signal?.removeEventListener("abort", onParentAbort);
339
+ }
340
+ }
341
+
342
+ function safeSend(tab: TabSession, msg: WorkerInbound): void {
343
+ if (tab.state !== "alive") return;
344
+ try {
345
+ tab.worker.send(msg);
346
+ } catch (err) {
347
+ logger.debug("tab worker send failed", { error: err instanceof Error ? err.message : String(err) });
348
+ }
349
+ }
350
+
351
+ function toErrorPayload(error: unknown): RunErrorPayload {
352
+ if (error instanceof Error) {
353
+ return {
354
+ name: error.name,
355
+ message: error.message,
356
+ stack: error.stack,
357
+ isAbort: error.name === "AbortError" || error.name === "ToolAbortError",
358
+ isToolError: error instanceof ToolError || error.name === "ToolError",
359
+ };
360
+ }
361
+ return { name: "Error", message: String(error), isAbort: false, isToolError: false };
362
+ }
363
+
283
364
  async function forceKillTab(name: string, reason: string): Promise<void> {
284
365
  const tab = tabs.get(name);
285
366
  if (!tab) return;
@@ -1,7 +1,7 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as os from "node:os";
3
3
  import * as path from "node:path";
4
- import * as vm from "node:vm";
4
+
5
5
  import { Snowflake, untilAborted } from "@oh-my-pi/pi-utils";
6
6
  import type { HTMLElement } from "linkedom";
7
7
  import type {
@@ -14,6 +14,8 @@ import type {
14
14
  SerializedAXNode,
15
15
  Target,
16
16
  } from "puppeteer-core";
17
+ import { JsRuntime, type RuntimeHooks } from "../../eval/js/shared/runtime";
18
+ import type { JsDisplayOutput } from "../../eval/js/shared/types";
17
19
  import { resizeImage } from "../../utils/image-resize";
18
20
  import { resolveToCwd } from "../path-utils";
19
21
  import { formatScreenshot } from "../render-utils";
@@ -34,6 +36,7 @@ import type {
34
36
  RunResultOk,
35
37
  ScreenshotResult,
36
38
  SessionSnapshot,
39
+ ToolReply,
37
40
  Transport,
38
41
  WorkerInbound,
39
42
  WorkerInitPayload,
@@ -177,6 +180,27 @@ function errorPayload(error: unknown): RunErrorPayload {
177
180
  return { name: "Error", message: String(error), isToolError: false, isAbort: false };
178
181
  }
179
182
 
183
+ function safeJsonStringify(value: unknown): string {
184
+ try {
185
+ return JSON.stringify(value, null, 2);
186
+ } catch {
187
+ return String(value);
188
+ }
189
+ }
190
+
191
+ function replyError(payload: RunErrorPayload): Error {
192
+ if (payload.isAbort) {
193
+ const err = new ToolAbortError(payload.message || "Tool call aborted");
194
+ if (payload.stack) err.stack = payload.stack;
195
+ return err;
196
+ }
197
+ const Ctor = payload.isToolError ? ToolError : Error;
198
+ const err = new Ctor(payload.message);
199
+ if (payload.name) err.name = payload.name;
200
+ if (payload.stack) err.stack = payload.stack;
201
+ return err;
202
+ }
203
+
180
204
  async function targetIdForTarget(target: Target): Promise<string> {
181
205
  const raw = target as unknown as { _targetId?: unknown };
182
206
  if (typeof raw._targetId === "string") return raw._targetId;
@@ -361,6 +385,14 @@ async function clickQueryHandlerText(
361
385
  );
362
386
  }
363
387
 
388
+ interface ActiveRun {
389
+ id: string;
390
+ ac: AbortController;
391
+ displays: RunResultOk["displays"];
392
+ screenshots: ScreenshotResult[];
393
+ pendingTools: Map<string, { resolve(value: unknown): void; reject(error: Error): void }>;
394
+ }
395
+
364
396
  export class WorkerCore {
365
397
  #transport: Transport;
366
398
  #browser?: Browser;
@@ -368,7 +400,8 @@ export class WorkerCore {
368
400
  #targetId?: string;
369
401
  #elementCache = new Map<number, ElementHandle>();
370
402
  #elementCounter = 0;
371
- #active?: { id: string; ac: AbortController };
403
+ #active: ActiveRun | null = null;
404
+ #runtime: JsRuntime | null = null;
372
405
  #unsub: () => void;
373
406
  #mode?: WorkerInitPayload["mode"];
374
407
  #dialogPolicy?: DialogPolicy;
@@ -401,6 +434,9 @@ export class WorkerCore {
401
434
  case "abort":
402
435
  if (this.#active?.id === msg.id) this.#active.ac.abort(new ToolAbortError());
403
436
  return;
437
+ case "tool-reply":
438
+ this.#deliverToolReply(msg.id, msg.reply);
439
+ return;
404
440
  case "close":
405
441
  await this.#close();
406
442
  return;
@@ -502,37 +538,26 @@ export class WorkerCore {
502
538
  const timeoutSignal = AbortSignal.timeout(msg.timeoutMs);
503
539
  const ac = new AbortController();
504
540
  const signal = AbortSignal.any([timeoutSignal, ac.signal]);
505
- this.#active = { id: msg.id, ac };
506
541
  const displays: RunResultOk["displays"] = [];
507
542
  const screenshots: ScreenshotResult[] = [];
543
+ const active: ActiveRun = { id: msg.id, ac, displays, screenshots, pendingTools: new Map() };
544
+ this.#active = active;
508
545
  try {
509
546
  throwIfAborted(signal);
510
547
  const page = this.#requirePage();
511
548
  const browser = this.#requireBrowser();
512
549
  const tabApi = this.#createTabApi(msg.name, msg.timeoutMs, signal, msg.session, displays, screenshots);
513
- const ctx = vm.createContext({
550
+ const runtime = this.#ensureRuntime(msg.session);
551
+ runtime.setCwd(msg.session.cwd);
552
+ runtime.setRunScope({
514
553
  page,
515
554
  browser,
516
555
  tab: tabApi,
517
- display: (value: unknown): void => this.#display(displays, value),
518
556
  assert: (cond: unknown, text?: string): void => {
519
557
  if (!cond) throw new ToolError(text ?? "Assertion failed");
520
558
  },
521
559
  wait: (ms: number): Promise<void> => Bun.sleep(ms),
522
- console: this.#console(),
523
- setTimeout,
524
- clearTimeout,
525
- setInterval,
526
- clearInterval,
527
- queueMicrotask,
528
- Promise,
529
- URL,
530
- URLSearchParams,
531
- TextEncoder,
532
- TextDecoder,
533
- Buffer,
534
560
  });
535
- const wrapped = `(async () => {\n${msg.code}\n})()`;
536
561
  const { promise: cancelRejection, reject: rejectCancel } = Promise.withResolvers<never>();
537
562
  const onCancel = (): void => {
538
563
  rejectCancel(
@@ -540,15 +565,17 @@ export class WorkerCore {
540
565
  ? new ToolError(`Browser code execution timed out after ${msg.timeoutMs}ms`)
541
566
  : new ToolAbortError(),
542
567
  );
568
+ // Cancel in-flight tool calls so user code's awaited proxies reject promptly.
569
+ for (const pending of active.pendingTools.values()) {
570
+ pending.reject(new ToolAbortError());
571
+ }
572
+ active.pendingTools.clear();
543
573
  };
544
574
  if (signal.aborted) onCancel();
545
575
  else signal.addEventListener("abort", onCancel, { once: true });
546
576
  try {
547
577
  const returnValue = await Promise.race([
548
- vm.runInContext(wrapped, ctx, {
549
- filename: `browser-run-${msg.id}.js`,
550
- lineOffset: -1,
551
- }) as Promise<unknown>,
578
+ runtime.run(msg.code, `browser-run-${msg.id}.js`),
552
579
  cancelRejection,
553
580
  ]);
554
581
  await this.#postReadyInfo();
@@ -564,8 +591,62 @@ export class WorkerCore {
564
591
  } catch (error) {
565
592
  this.#transport.send({ type: "result", id: msg.id, ok: false, error: errorPayload(error) });
566
593
  } finally {
567
- if (this.#active?.id === msg.id) this.#active = undefined;
594
+ if (this.#active?.id === msg.id) this.#active = null;
595
+ }
596
+ }
597
+
598
+ #ensureRuntime(session: SessionSnapshot): JsRuntime {
599
+ if (this.#runtime) return this.#runtime;
600
+ this.#runtime = new JsRuntime({
601
+ initialCwd: session.cwd,
602
+ sessionId: `browser-tab-${this.#targetId ?? "unknown"}`,
603
+ getHooks: () => this.#hooksForActiveRun(),
604
+ });
605
+ return this.#runtime;
606
+ }
607
+
608
+ #hooksForActiveRun(): RuntimeHooks | null {
609
+ const active = this.#active;
610
+ if (!active) return null;
611
+ return {
612
+ // console.* output stays on the supervisor log channel — matches pre-runtime behavior
613
+ // where browser cells didn't surface `console.log` to the model.
614
+ onText: chunk => this.#log("debug", chunk.replace(/\n$/, "")),
615
+ onDisplay: output => this.#pushDisplay(active.displays, output),
616
+ callTool: (name, args) => this.#callTool(active, name, args),
617
+ };
618
+ }
619
+
620
+ #pushDisplay(displays: RunResultOk["displays"], output: JsDisplayOutput): void {
621
+ if (output.type === "image") {
622
+ displays.push({ type: "image", data: output.data, mimeType: output.mimeType });
623
+ return;
624
+ }
625
+ if (output.type === "json") {
626
+ displays.push({ type: "text", text: safeJsonStringify(output.data) });
627
+ return;
568
628
  }
629
+ // status — surface as compact JSON so helper side effects (read/write/tree) appear in
630
+ // the cell result alongside explicit display() output.
631
+ displays.push({ type: "text", text: safeJsonStringify(output.event) });
632
+ }
633
+
634
+ async #callTool(active: ActiveRun, name: string, args: unknown): Promise<unknown> {
635
+ const id = `tab-tc-${active.id}-${crypto.randomUUID()}`;
636
+ const { promise, resolve, reject } = Promise.withResolvers<unknown>();
637
+ active.pendingTools.set(id, { resolve, reject });
638
+ this.#transport.send({ type: "tool-call", id, runId: active.id, name, args });
639
+ return await promise;
640
+ }
641
+
642
+ #deliverToolReply(id: string, reply: ToolReply): void {
643
+ const active = this.#active;
644
+ if (!active) return;
645
+ const pending = active.pendingTools.get(id);
646
+ if (!pending) return;
647
+ active.pendingTools.delete(id);
648
+ if (reply.ok) pending.resolve(reply.value);
649
+ else pending.reject(replyError(reply.error));
569
650
  }
570
651
 
571
652
  #createTabApi(
@@ -933,41 +1014,6 @@ export class WorkerCore {
933
1014
  }
934
1015
  return handle;
935
1016
  }
936
-
937
- #display(displays: RunResultOk["displays"], value: unknown): void {
938
- if (value === undefined || value === null) return;
939
- if (
940
- typeof value === "object" &&
941
- value !== null &&
942
- "type" in (value as Record<string, unknown>) &&
943
- (value as { type?: unknown }).type === "image"
944
- ) {
945
- const img = value as { data?: unknown; mimeType?: unknown };
946
- if (typeof img.data === "string" && typeof img.mimeType === "string") {
947
- displays.push({ type: "image", data: img.data, mimeType: img.mimeType });
948
- return;
949
- }
950
- }
951
- if (typeof value === "string") {
952
- displays.push({ type: "text", text: value });
953
- return;
954
- }
955
- try {
956
- displays.push({ type: "text", text: JSON.stringify(value, null, 2) });
957
- } catch {
958
- displays.push({ type: "text", text: String(value) });
959
- }
960
- }
961
-
962
- #console(): Pick<Console, "log" | "debug" | "warn" | "error"> {
963
- return {
964
- log: (...args: unknown[]) => this.#log("debug", args.map(String).join(" ")),
965
- debug: (...args: unknown[]) => this.#log("debug", args.map(String).join(" ")),
966
- warn: (...args: unknown[]) => this.#log("warn", args.map(String).join(" ")),
967
- error: (...args: unknown[]) => this.#log("error", args.map(String).join(" ")),
968
- };
969
- }
970
-
971
1017
  #clearElementCache(): void {
972
1018
  if (this.#elementCache.size === 0) {
973
1019
  this.#elementCounter = 0;