npm - @oh-my-pi/pi-coding-agent - Versions diffs - 14.9.5 → 14.9.7 - Mend

@oh-my-pi/pi-coding-agent 14.9.5 → 14.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/CHANGELOG.md +52 -0
package/package.json +7 -7
package/src/cli/setup-cli.ts +14 -161
package/src/cli/stats-cli.ts +56 -2
package/src/cli.ts +0 -1
package/src/config/settings-schema.ts +0 -10
package/src/eval/eval.lark +30 -10
package/src/eval/js/context-manager.ts +334 -564
package/src/eval/js/shared/helpers.ts +237 -0
package/src/eval/js/shared/indirect-eval.ts +30 -0
package/src/eval/js/shared/rewrite-imports.ts +211 -0
package/src/eval/js/shared/runtime.ts +168 -0
package/src/eval/js/shared/types.ts +18 -0
package/src/eval/js/tool-bridge.ts +2 -4
package/src/eval/js/worker-core.ts +146 -0
package/src/eval/js/worker-entry.ts +24 -0
package/src/eval/js/worker-protocol.ts +41 -0
package/src/eval/parse.ts +218 -49
package/src/eval/py/display.ts +71 -0
package/src/eval/py/executor.ts +74 -89
package/src/eval/py/index.ts +1 -2
package/src/eval/py/kernel.ts +472 -900
package/src/eval/py/prelude.py +95 -7
package/src/eval/py/runner.py +879 -0
package/src/eval/py/runtime.ts +3 -16
package/src/eval/py/tool-bridge.ts +137 -0
package/src/export/html/template.generated.ts +1 -1
package/src/export/html/template.js +93 -5
package/src/internal-urls/docs-index.generated.ts +3 -3
package/src/modes/controllers/command-controller.ts +0 -23
package/src/prompts/tools/eval.md +14 -27
package/src/session/agent-session.ts +0 -1
package/src/session/history-storage.ts +77 -19
package/src/tools/browser/tab-protocol.ts +4 -0
package/src/tools/browser/tab-supervisor.ts +86 -5
package/src/tools/browser/tab-worker.ts +104 -58
package/src/tools/eval.ts +1 -1
package/src/web/search/index.ts +6 -4
package/src/cli/jupyter-cli.ts +0 -106
package/src/commands/jupyter.ts +0 -32
package/src/eval/py/cancellation.ts +0 -28
package/src/eval/py/gateway-coordinator.ts +0 -424
/package/src/eval/js/{prelude.ts → shared/prelude.ts} +0 -0
/package/src/eval/js/{prelude.txt → shared/prelude.txt} +0 -0

package/src/modes/controllers/command-controller.ts CHANGED Viewed

@@ -14,7 +14,6 @@ import { formatDuration, Snowflake, setProjectDir } from "@oh-my-pi/pi-utils";
 import { $ } from "bun";
 import { reset as resetCapabilities } from "../../capability";
 import { clearClaudePluginRootsCache } from "../../discovery/helpers";
-import { getGatewayStatus } from "../../eval/py/gateway-coordinator";
 import { loadCustomShare } from "../../export/custom-share";
 import type { CompactOptions } from "../../extensibility/extensions/types";
 import {
@@ -402,28 +401,6 @@ export class CommandController {
 			}
 		}
-		const gateway = await getGatewayStatus();
-		info += `\n${theme.bold("Python Gateway")}\n`;
-		if (gateway.active) {
-			info += `${theme.fg("dim", "Status:")} ${theme.fg("success", "Active (Global)")}\n`;
-			info += `${theme.fg("dim", "URL:")} ${gateway.url}\n`;
-			info += `${theme.fg("dim", "PID:")} ${gateway.pid}\n`;
-			if (gateway.pythonPath) {
-				info += `${theme.fg("dim", "Python:")} ${gateway.pythonPath}\n`;
-			}
-			if (gateway.venvPath) {
-				info += `${theme.fg("dim", "Venv:")} ${gateway.venvPath}\n`;
-			}
-			if (gateway.uptime !== null) {
-				const uptimeSec = Math.floor(gateway.uptime / 1000);
-				const mins = Math.floor(uptimeSec / 60);
-				const secs = uptimeSec % 60;
-				info += `${theme.fg("dim", "Uptime:")} ${mins}m ${secs}s\n`;
-			}
-		} else {
-			info += `${theme.fg("dim", "Status:")} ${theme.fg("dim", "Inactive")}\n`;
-		}
 		if (this.ctx.lspServers && this.ctx.lspServers.length > 0) {
 			info += `\n${theme.bold("LSP Servers")}\n`;
 			for (const server of this.ctx.lspServers) {

package/src/prompts/tools/eval.md CHANGED Viewed

@@ -1,23 +1,18 @@
 Run code in a persistent kernel using codeblock cells.
 <instruction>
-Each cell is wrapped between `*** Begin <LANG>` and `*** End <LANG>`:
+Each cell starts with a single header line and runs until the next header (or end of input):
 ```
-*** Begin PY
-*** Title: optional title
-*** Timeout: 10s
-*** Reset
+*** Cell py:"optional title" t:10s rst
 print("hi")
-*** End PY
 ```
-- **Language**: {{#if py}}`PY` for Python{{/if}}{{#ifAll py js}}, {{/ifAll}}{{#if js}}`JS` / `TS` for JavaScript{{/if}}. The opening `<LANG>` and closing `<LANG>` **MUST** match.
-- **Attributes** (optional, in any order, immediately after `*** Begin`):
-  - `*** Title: …` — cell title shown in the UI.
-  - `*** Timeout: <duration>` — per-cell timeout. Digits with optional `ms` / `s` / `m` units (e.g. `500ms`, `15s`, `2m`). Default 30s.
-  - `*** Reset` — wipe this cell's own language kernel before running.{{#ifAll py js}} Other languages are untouched.{{/ifAll}}
-- Anything between the last attribute and `*** End <LANG>` is the cell's code, verbatim.
+- **Language + title**: `<lang>:"<title>"` — {{#if py}}`py` for Python{{/if}}{{#ifAll py js}}, {{/ifAll}}{{#if js}}`js` for JavaScript{{/if}}. Title may be empty (`py:""`).
+- **Attributes** (optional, in this order, after the language+title):
+  - `t:<duration>` — per-cell timeout. Digits with optional `ms` / `s` / `m` units (e.g. `500ms`, `15s`, `2m`). Default 30s.
+  - `rst` — wipe this cell's own language kernel before running.{{#ifAll py js}} Other languages are untouched.{{/ifAll}}
+- Anything after the header line, up to the next `*** Cell` header, is the cell's code, verbatim.
 - Stack multiple cells back-to-back; blank lines between cells are ignored.
 **Work incrementally:**
@@ -50,40 +45,32 @@ env(key?=None, value?=None) → str | None | dict
     No args → full environment as dict. One arg → value of `key`. Two args → set `key=value` and return value.
 output(*ids, format?="raw", query?=None, offset?=None, limit?=None) → str | dict | list[dict]
     Read task/agent output by ID. Single id returns text/dict; multiple ids return a list.
+tool.<name>(args) → unknown
+    Invoke any session tool by name. `args` is the tool's parameter object.
 ```
-{{#if js}}**JavaScript only:** `tool.<name>(args)` invokes any session tool directly (e.g. `await tool.read({ path: "src/foo.ts" })`).
-{{/if}}</prelude>
+</prelude>
 <output>
 Cells render like a Jupyter notebook. `display(value)` renders non-presentable data as an interactive JSON tree. Presentable values (figures, images, dataframes, etc.) use their native representation.
 </output>
 <caution>
-- In session mode, use `*** Reset` on a cell to wipe its language's kernel before running.{{#ifAll py js}} Reset is per-language: a python cell's `*** Reset` does not touch the JavaScript kernel and vice versa.{{/ifAll}}
+- In session mode, use `rst` on a cell to wipe its language's kernel before running.{{#ifAll py js}} Reset is per-language: a python cell's `rst` does not touch the JavaScript kernel and vice versa.{{/ifAll}}
 {{#if js}}- **js**: the VM exposes a selective `process` subset, Web APIs, `Buffer`, `fs/promises`, and the `Bun` global.
 {{/if}}</caution>
 <example>
-{{#if py}}*** Begin PY
-*** Title: imports
-*** Timeout: 10s
+{{#if py}}*** Cell py:"imports" t:10s
 import json
 from pathlib import Path
-*** End PY
-*** Begin PY
-*** Title: load config
+*** Cell py:"load config"
 data = json.loads(read('package.json'))
 display(data)
-*** End PY
 {{/if}}{{#ifAll py js}}
-{{/ifAll}}{{#if js}}*** Begin JS
-*** Title: js summary
-*** Reset
+{{/ifAll}}{{#if js}}*** Cell js:"summary" rst
 const data = JSON.parse(await read('package.json'));
 display(data);
 return data.name;
-*** End JS
 {{/if}}
 </example>

package/src/session/agent-session.ts CHANGED Viewed

@@ -6561,7 +6561,6 @@ export class AgentSession {
 				sessionId,
 				kernelOwnerId: this.#evalKernelOwnerId,
 				kernelMode: this.settings.get("python.kernelMode"),
-				useSharedGateway: this.settings.get("python.sharedGateway"),
 				onChunk,
 				signal: abortController.signal,
 			});

package/src/session/history-storage.ts CHANGED Viewed

@@ -19,6 +19,12 @@ type HistoryRow = {
 const SQLITE_NOW_EPOCH = "CAST(strftime('%s','now') AS INTEGER)";
+// Escape LIKE wildcards so user input is treated as literal text.
+// Matches the `ESCAPE '\\'` clause used by substring-search statements.
+function escapeLikePattern(text: string): string {
+	return text.replace(/[\\%_]/g, "\\$&");
+}
 class AsyncDrain<T> {
 	#queue?: T[];
 	#promise = Promise.resolve();
@@ -63,6 +69,8 @@ export class HistoryStorage {
 	#recentStmt: Statement;
 	#searchStmt: Statement;
 	#lastPromptStmt: Statement;
+	// Cache substring-fallback prepared statements keyed by token count.
+	#substringStmts = new Map<number, Statement>();
 	// In-memory cache of last prompt to avoid sync DB reads on add
 	#lastPromptCache: string | null = null;
@@ -167,16 +175,53 @@ CREATE TRIGGER IF NOT EXISTS history_ai AFTER INSERT ON history BEGIN
 		const safeLimit = this.#normalizeLimit(limit);
 		if (safeLimit === 0) return [];
-		const ftsQuery = this.#buildFtsQuery(query);
-		if (!ftsQuery) return [];
+		const tokens = this.#tokenize(query);
+		if (tokens.length === 0) return [];
+		// 1. FTS5 prefix match (token AND, prefix-wildcard per token).
+		//    Handles punctuation by tokenizing query the same way unicode61 tokenizer
+		//    indexed the stored text, so "git-commit" -> "git"* "commit"*.
+		const ftsQuery = tokens.map(tok => `"${tok.replace(/"/g, '""')}"*`).join(" ");
+		let ftsRows: HistoryRow[] = [];
 		try {
-			const rows = this.#searchStmt.all(ftsQuery, safeLimit) as HistoryRow[];
-			return rows.map(row => this.#toEntry(row));
+			ftsRows = this.#searchStmt.all(ftsQuery, safeLimit) as HistoryRow[];
 		} catch (error) {
-			logger.error("HistoryStorage search failed", { error: String(error) });
-			return [];
+			// Malformed FTS expression - fall through to substring path.
+			logger.debug("HistoryStorage FTS query failed, using substring only", { error: String(error) });
+		}
+		if (ftsRows.length >= safeLimit) {
+			return ftsRows.map(row => this.#toEntry(row));
+		}
+		// 2. Substring fallback (token-AND LIKE). Catches infix matches FTS5's
+		//    prefix-only wildcard cannot reach (e.g. "mit" -> "commit"). Bounded
+		//    by safeLimit, ordered by recency - no full-table load into JS.
+		let subRows: HistoryRow[] = [];
+		try {
+			subRows = this.#searchSubstring(tokens, safeLimit);
+		} catch (error) {
+			logger.error("HistoryStorage substring search failed", { error: String(error) });
+		}
+		if (ftsRows.length === 0) {
+			return subRows.map(row => this.#toEntry(row));
+		}
+		const seen = new Set<number>();
+		const merged: HistoryEntry[] = [];
+		for (const row of ftsRows) {
+			if (seen.has(row.id)) continue;
+			seen.add(row.id);
+			merged.push(this.#toEntry(row));
 		}
+		for (const row of subRows) {
+			if (merged.length >= safeLimit) break;
+			if (seen.has(row.id)) continue;
+			seen.add(row.id);
+			merged.push(this.#toEntry(row));
+		}
+		return merged;
 	}
 	#ensureDir(dbPath: string): void {
@@ -225,21 +270,34 @@ END;
 		return Math.min(clamped, 1000);
 	}
-	#buildFtsQuery(query: string): string | null {
-		const tokens = query
-			.trim()
-			.split(/\s+/)
-			.map(token => token.trim())
-			.filter(Boolean);
+	/**
+	 * Split on non-alphanumeric runs, mirroring FTS5's `unicode61` tokenizer so
+	 * query tokens align with how stored prompts were indexed. Lowercases for
+	 * stable substring matching.
+	 */
+	#tokenize(query: string): string[] {
+		return query
+			.toLowerCase()
+			.split(/[^\p{L}\p{N}]+/u)
+			.filter(tok => tok.length > 0);
+	}
-		if (tokens.length === 0) return null;
+	#searchSubstring(tokens: string[], limit: number): HistoryRow[] {
+		const stmt = this.#getSubstringStmt(tokens.length);
+		const params: unknown[] = tokens.map(tok => `%${escapeLikePattern(tok)}%`);
+		params.push(limit);
+		return stmt.all(...(params as [string, ...unknown[]])) as HistoryRow[];
+	}
-		return tokens
-			.map(token => {
-				const escaped = token.replace(/"/g, '""');
-				return `"${escaped}"*`;
-			})
-			.join(" ");
+	#getSubstringStmt(tokenCount: number): Statement {
+		let stmt = this.#substringStmts.get(tokenCount);
+		if (stmt) return stmt;
+		const whereClause = Array(tokenCount).fill("prompt LIKE ? ESCAPE '\\' COLLATE NOCASE").join(" AND ");
+		stmt = this.#db.prepare(
+			`SELECT id, prompt, created_at, cwd FROM history WHERE ${whereClause} ORDER BY created_at DESC, id DESC LIMIT ?`,
+		);
+		this.#substringStmts.set(tokenCount, stmt);
+		return stmt;
 	}
 	#toEntry(row: HistoryRow): HistoryEntry {

package/src/tools/browser/tab-protocol.ts CHANGED Viewed

@@ -59,10 +59,13 @@ export type WorkerInitPayload =
 			dialogs?: "accept" | "dismiss";
 	  };
+export type ToolReply = { ok: true; value: unknown } | { ok: false; error: RunErrorPayload };
 export type WorkerInbound =
 	| { type: "init"; payload: WorkerInitPayload }
 	| { type: "run"; id: string; name: string; code: string; timeoutMs: number; session: SessionSnapshot }
 	| { type: "abort"; id: string }
+	| { type: "tool-reply"; id: string; reply: ToolReply }
 	| { type: "close" };
 export interface ReadyInfo {
@@ -91,6 +94,7 @@ export type WorkerOutbound =
 	| { type: "init-failed"; error: RunErrorPayload }
 	| { type: "result"; id: string; ok: true; payload: RunResultOk }
 	| { type: "result"; id: string; ok: false; error: RunErrorPayload }
+	| { type: "tool-call"; id: string; runId: string; name: string; args: unknown }
 	| { type: "log"; level: "debug" | "warn" | "error"; msg: string; meta?: Record<string, unknown> }
 	| { type: "closed" };

package/src/tools/browser/tab-supervisor.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { getPuppeteerDir, logger, Snowflake } from "@oh-my-pi/pi-utils";
 import type { Page, Target } from "puppeteer-core";
+import { callSessionTool } from "../../eval/js/tool-bridge";
 import type { ToolSession } from "../../sdk";
 import { expandPath } from "../path-utils";
 import { ToolAbortError, ToolError } from "../tool-errors";
@@ -37,6 +38,14 @@ interface WorkerHandle {
 export type DialogPolicy = "accept" | "dismiss";
+export interface PendingRun {
+	resolve(result: RunResultOk): void;
+	reject(error: unknown): void;
+	session: ToolSession;
+	signal?: AbortSignal;
+	toolCalls: Map<string, AbortController>;
+}
 export interface TabSession {
 	name: string;
 	browser: BrowserHandle;
@@ -44,7 +53,7 @@ export interface TabSession {
 	worker: WorkerHandle;
 	state: "alive" | "dead";
 	info: ReadyInfo;
-	pending: Map<string, { resolve: (result: RunResultOk) => void; reject: (error: unknown) => void }>;
+	pending: Map<string, PendingRun>;
 	dialogPolicy?: DialogPolicy;
 	kindTag: BrowserKindTag;
 }
@@ -155,14 +164,14 @@ export async function acquireTab(
 export async function runInTab(name: string, opts: RunInTabOptions): Promise<RunResultOk> {
 	return await runInTabWithSnapshot(
 		name,
-		{ code: opts.code, timeoutMs: opts.timeoutMs, signal: opts.signal },
+		{ code: opts.code, timeoutMs: opts.timeoutMs, signal: opts.signal, session: opts.session },
 		{ cwd: opts.session.cwd, browserScreenshotDir: expandBrowserScreenshotDir(opts.session) },
 	);
 }
 async function runInTabWithSnapshot(
 	name: string,
-	opts: { code: string; timeoutMs: number; signal?: AbortSignal },
+	opts: { code: string; timeoutMs: number; signal?: AbortSignal; session?: ToolSession },
 	snapshot: SessionSnapshot,
 ): Promise<RunResultOk> {
 	const tab = tabs.get(name);
@@ -170,8 +179,18 @@ async function runInTabWithSnapshot(
 	if (tab.pending.size > 0) throw new ToolError(`Tab ${JSON.stringify(name)} is busy`);
 	const id = Snowflake.next();
 	const { promise, resolve, reject } = Promise.withResolvers<RunResultOk>();
-	tab.pending.set(id, { resolve, reject });
-	const abort = (): void => tab.worker.send({ type: "abort", id });
+	const pending: PendingRun = {
+		resolve,
+		reject,
+		session: opts.session ?? ({} as ToolSession),
+		signal: opts.signal,
+		toolCalls: new Map(),
+	};
+	tab.pending.set(id, pending);
+	const abort = (): void => {
+		tab.worker.send({ type: "abort", id });
+		for (const ctrl of pending.toolCalls.values()) ctrl.abort(opts.signal?.reason);
+	};
 	if (opts.signal?.aborted) abort();
 	else opts.signal?.addEventListener("abort", abort, { once: true });
 	try {
@@ -277,9 +296,71 @@ function handleTabMessage(tab: TabSession, msg: WorkerOutbound): void {
 		tab.info = msg.info;
 		return;
 	}
+	if (msg.type === "tool-call") {
+		void dispatchToolCall(tab, msg);
+		return;
+	}
 	if (msg.type === "log") logWorkerMessage(msg);
 }
+async function dispatchToolCall(tab: TabSession, msg: Extract<WorkerOutbound, { type: "tool-call" }>): Promise<void> {
+	const pending = tab.pending.get(msg.runId);
+	if (!pending?.session.cwd) {
+		safeSend(tab, {
+			type: "tool-reply",
+			id: msg.id,
+			reply: {
+				ok: false,
+				error: { name: "ToolError", message: "No active run for tool call", isToolError: true, isAbort: false },
+			},
+		});
+		return;
+	}
+	const ctrl = new AbortController();
+	pending.toolCalls.set(msg.id, ctrl);
+	const onParentAbort = (): void => ctrl.abort(pending.signal?.reason);
+	if (pending.signal?.aborted) onParentAbort();
+	else pending.signal?.addEventListener("abort", onParentAbort, { once: true });
+	try {
+		const value = await callSessionTool(msg.name, msg.args, {
+			session: pending.session,
+			signal: ctrl.signal,
+			emitStatus: () => {
+				// Status events from tool calls aren't piped back to user code yet; the worker
+				// already pushes its own helper status via the display channel.
+			},
+		});
+		safeSend(tab, { type: "tool-reply", id: msg.id, reply: { ok: true, value } });
+	} catch (error) {
+		safeSend(tab, { type: "tool-reply", id: msg.id, reply: { ok: false, error: toErrorPayload(error) } });
+	} finally {
+		pending.toolCalls.delete(msg.id);
+		pending.signal?.removeEventListener("abort", onParentAbort);
+	}
+}
+function safeSend(tab: TabSession, msg: WorkerInbound): void {
+	if (tab.state !== "alive") return;
+	try {
+		tab.worker.send(msg);
+	} catch (err) {
+		logger.debug("tab worker send failed", { error: err instanceof Error ? err.message : String(err) });
+	}
+}
+function toErrorPayload(error: unknown): RunErrorPayload {
+	if (error instanceof Error) {
+		return {
+			name: error.name,
+			message: error.message,
+			stack: error.stack,
+			isAbort: error.name === "AbortError" || error.name === "ToolAbortError",
+			isToolError: error instanceof ToolError || error.name === "ToolError",
+		};
+	}
+	return { name: "Error", message: String(error), isAbort: false, isToolError: false };
+}
 async function forceKillTab(name: string, reason: string): Promise<void> {
 	const tab = tabs.get(name);
 	if (!tab) return;

package/src/tools/browser/tab-worker.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import * as fs from "node:fs";
 import * as os from "node:os";
 import * as path from "node:path";
-import * as vm from "node:vm";
 import { Snowflake, untilAborted } from "@oh-my-pi/pi-utils";
 import type { HTMLElement } from "linkedom";
 import type {
@@ -14,6 +14,8 @@ import type {
 	SerializedAXNode,
 	Target,
 } from "puppeteer-core";
+import { JsRuntime, type RuntimeHooks } from "../../eval/js/shared/runtime";
+import type { JsDisplayOutput } from "../../eval/js/shared/types";
 import { resizeImage } from "../../utils/image-resize";
 import { resolveToCwd } from "../path-utils";
 import { formatScreenshot } from "../render-utils";
@@ -34,6 +36,7 @@ import type {
 	RunResultOk,
 	ScreenshotResult,
 	SessionSnapshot,
+	ToolReply,
 	Transport,
 	WorkerInbound,
 	WorkerInitPayload,
@@ -177,6 +180,27 @@ function errorPayload(error: unknown): RunErrorPayload {
 	return { name: "Error", message: String(error), isToolError: false, isAbort: false };
 }
+function safeJsonStringify(value: unknown): string {
+	try {
+		return JSON.stringify(value, null, 2);
+	} catch {
+		return String(value);
+	}
+}
+function replyError(payload: RunErrorPayload): Error {
+	if (payload.isAbort) {
+		const err = new ToolAbortError(payload.message || "Tool call aborted");
+		if (payload.stack) err.stack = payload.stack;
+		return err;
+	}
+	const Ctor = payload.isToolError ? ToolError : Error;
+	const err = new Ctor(payload.message);
+	if (payload.name) err.name = payload.name;
+	if (payload.stack) err.stack = payload.stack;
+	return err;
+}
 async function targetIdForTarget(target: Target): Promise<string> {
 	const raw = target as unknown as { _targetId?: unknown };
 	if (typeof raw._targetId === "string") return raw._targetId;
@@ -361,6 +385,14 @@ async function clickQueryHandlerText(
 	);
 }
+interface ActiveRun {
+	id: string;
+	ac: AbortController;
+	displays: RunResultOk["displays"];
+	screenshots: ScreenshotResult[];
+	pendingTools: Map<string, { resolve(value: unknown): void; reject(error: Error): void }>;
+}
 export class WorkerCore {
 	#transport: Transport;
 	#browser?: Browser;
@@ -368,7 +400,8 @@ export class WorkerCore {
 	#targetId?: string;
 	#elementCache = new Map<number, ElementHandle>();
 	#elementCounter = 0;
-	#active?: { id: string; ac: AbortController };
+	#active: ActiveRun | null = null;
+	#runtime: JsRuntime | null = null;
 	#unsub: () => void;
 	#mode?: WorkerInitPayload["mode"];
 	#dialogPolicy?: DialogPolicy;
@@ -401,6 +434,9 @@ export class WorkerCore {
 			case "abort":
 				if (this.#active?.id === msg.id) this.#active.ac.abort(new ToolAbortError());
 				return;
+			case "tool-reply":
+				this.#deliverToolReply(msg.id, msg.reply);
+				return;
 			case "close":
 				await this.#close();
 				return;
@@ -502,37 +538,26 @@ export class WorkerCore {
 		const timeoutSignal = AbortSignal.timeout(msg.timeoutMs);
 		const ac = new AbortController();
 		const signal = AbortSignal.any([timeoutSignal, ac.signal]);
-		this.#active = { id: msg.id, ac };
 		const displays: RunResultOk["displays"] = [];
 		const screenshots: ScreenshotResult[] = [];
+		const active: ActiveRun = { id: msg.id, ac, displays, screenshots, pendingTools: new Map() };
+		this.#active = active;
 		try {
 			throwIfAborted(signal);
 			const page = this.#requirePage();
 			const browser = this.#requireBrowser();
 			const tabApi = this.#createTabApi(msg.name, msg.timeoutMs, signal, msg.session, displays, screenshots);
-			const ctx = vm.createContext({
+			const runtime = this.#ensureRuntime(msg.session);
+			runtime.setCwd(msg.session.cwd);
+			runtime.setRunScope({
 				page,
 				browser,
 				tab: tabApi,
-				display: (value: unknown): void => this.#display(displays, value),
 				assert: (cond: unknown, text?: string): void => {
 					if (!cond) throw new ToolError(text ?? "Assertion failed");
 				},
 				wait: (ms: number): Promise<void> => Bun.sleep(ms),
-				console: this.#console(),
-				setTimeout,
-				clearTimeout,
-				setInterval,
-				clearInterval,
-				queueMicrotask,
-				Promise,
-				URL,
-				URLSearchParams,
-				TextEncoder,
-				TextDecoder,
-				Buffer,
 			});
-			const wrapped = `(async () => {\n${msg.code}\n})()`;
 			const { promise: cancelRejection, reject: rejectCancel } = Promise.withResolvers<never>();
 			const onCancel = (): void => {
 				rejectCancel(
@@ -540,15 +565,17 @@ export class WorkerCore {
 						? new ToolError(`Browser code execution timed out after ${msg.timeoutMs}ms`)
 						: new ToolAbortError(),
 				);
+				// Cancel in-flight tool calls so user code's awaited proxies reject promptly.
+				for (const pending of active.pendingTools.values()) {
+					pending.reject(new ToolAbortError());
+				}
+				active.pendingTools.clear();
 			};
 			if (signal.aborted) onCancel();
 			else signal.addEventListener("abort", onCancel, { once: true });
 			try {
 				const returnValue = await Promise.race([
-					vm.runInContext(wrapped, ctx, {
-						filename: `browser-run-${msg.id}.js`,
-						lineOffset: -1,
-					}) as Promise<unknown>,
+					runtime.run(msg.code, `browser-run-${msg.id}.js`),
 					cancelRejection,
 				]);
 				await this.#postReadyInfo();
@@ -564,8 +591,62 @@ export class WorkerCore {
 		} catch (error) {
 			this.#transport.send({ type: "result", id: msg.id, ok: false, error: errorPayload(error) });
 		} finally {
-			if (this.#active?.id === msg.id) this.#active = undefined;
+			if (this.#active?.id === msg.id) this.#active = null;
+		}
+	}
+	#ensureRuntime(session: SessionSnapshot): JsRuntime {
+		if (this.#runtime) return this.#runtime;
+		this.#runtime = new JsRuntime({
+			initialCwd: session.cwd,
+			sessionId: `browser-tab-${this.#targetId ?? "unknown"}`,
+			getHooks: () => this.#hooksForActiveRun(),
+		});
+		return this.#runtime;
+	}
+	#hooksForActiveRun(): RuntimeHooks | null {
+		const active = this.#active;
+		if (!active) return null;
+		return {
+			// console.* output stays on the supervisor log channel — matches pre-runtime behavior
+			// where browser cells didn't surface `console.log` to the model.
+			onText: chunk => this.#log("debug", chunk.replace(/\n$/, "")),
+			onDisplay: output => this.#pushDisplay(active.displays, output),
+			callTool: (name, args) => this.#callTool(active, name, args),
+		};
+	}
+	#pushDisplay(displays: RunResultOk["displays"], output: JsDisplayOutput): void {
+		if (output.type === "image") {
+			displays.push({ type: "image", data: output.data, mimeType: output.mimeType });
+			return;
+		}
+		if (output.type === "json") {
+			displays.push({ type: "text", text: safeJsonStringify(output.data) });
+			return;
 		}
+		// status — surface as compact JSON so helper side effects (read/write/tree) appear in
+		// the cell result alongside explicit display() output.
+		displays.push({ type: "text", text: safeJsonStringify(output.event) });
+	}
+	async #callTool(active: ActiveRun, name: string, args: unknown): Promise<unknown> {
+		const id = `tab-tc-${active.id}-${crypto.randomUUID()}`;
+		const { promise, resolve, reject } = Promise.withResolvers<unknown>();
+		active.pendingTools.set(id, { resolve, reject });
+		this.#transport.send({ type: "tool-call", id, runId: active.id, name, args });
+		return await promise;
+	}
+	#deliverToolReply(id: string, reply: ToolReply): void {
+		const active = this.#active;
+		if (!active) return;
+		const pending = active.pendingTools.get(id);
+		if (!pending) return;
+		active.pendingTools.delete(id);
+		if (reply.ok) pending.resolve(reply.value);
+		else pending.reject(replyError(reply.error));
 	}
 	#createTabApi(
@@ -933,41 +1014,6 @@ export class WorkerCore {
 		}
 		return handle;
 	}
-	#display(displays: RunResultOk["displays"], value: unknown): void {
-		if (value === undefined || value === null) return;
-		if (
-			typeof value === "object" &&
-			value !== null &&
-			"type" in (value as Record<string, unknown>) &&
-			(value as { type?: unknown }).type === "image"
-		) {
-			const img = value as { data?: unknown; mimeType?: unknown };
-			if (typeof img.data === "string" && typeof img.mimeType === "string") {
-				displays.push({ type: "image", data: img.data, mimeType: img.mimeType });
-				return;
-			}
-		}
-		if (typeof value === "string") {
-			displays.push({ type: "text", text: value });
-			return;
-		}
-		try {
-			displays.push({ type: "text", text: JSON.stringify(value, null, 2) });
-		} catch {
-			displays.push({ type: "text", text: String(value) });
-		}
-	}
-	#console(): Pick<Console, "log" | "debug" | "warn" | "error"> {
-		return {
-			log: (...args: unknown[]) => this.#log("debug", args.map(String).join(" ")),
-			debug: (...args: unknown[]) => this.#log("debug", args.map(String).join(" ")),
-			warn: (...args: unknown[]) => this.#log("warn", args.map(String).join(" ")),
-			error: (...args: unknown[]) => this.#log("error", args.map(String).join(" ")),
-		};
-	}
 	#clearElementCache(): void {
 		if (this.#elementCache.size === 0) {
 			this.#elementCounter = 0;