@oh-my-pi/pi-coding-agent 14.9.3 → 14.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +96 -0
- package/package.json +7 -7
- package/src/async/job-manager.ts +66 -9
- package/src/capability/rule.ts +20 -0
- package/src/cli/setup-cli.ts +14 -161
- package/src/cli/stats-cli.ts +56 -2
- package/src/cli.ts +0 -1
- package/src/config/model-registry.ts +13 -0
- package/src/config/model-resolver.ts +8 -2
- package/src/config/settings-schema.ts +1 -11
- package/src/edit/index.ts +8 -0
- package/src/edit/renderer.ts +6 -1
- package/src/edit/streaming.ts +53 -2
- package/src/eval/eval.lark +30 -10
- package/src/eval/js/context-manager.ts +334 -601
- package/src/eval/js/shared/helpers.ts +237 -0
- package/src/eval/js/shared/indirect-eval.ts +30 -0
- package/src/eval/js/{prelude.txt → shared/prelude.txt} +0 -2
- package/src/eval/js/shared/rewrite-imports.ts +211 -0
- package/src/eval/js/shared/runtime.ts +168 -0
- package/src/eval/js/shared/types.ts +18 -0
- package/src/eval/js/tool-bridge.ts +2 -4
- package/src/eval/js/worker-core.ts +146 -0
- package/src/eval/js/worker-entry.ts +24 -0
- package/src/eval/js/worker-protocol.ts +41 -0
- package/src/eval/parse.ts +218 -49
- package/src/eval/py/display.ts +71 -0
- package/src/eval/py/executor.ts +97 -96
- package/src/eval/py/index.ts +2 -2
- package/src/eval/py/kernel.ts +472 -900
- package/src/eval/py/prelude.py +106 -87
- package/src/eval/py/runner.py +879 -0
- package/src/eval/py/runtime.ts +3 -16
- package/src/eval/py/tool-bridge.ts +137 -0
- package/src/export/html/template.css +12 -0
- package/src/export/html/template.generated.ts +1 -1
- package/src/export/html/template.js +113 -7
- package/src/extensibility/plugins/loader.ts +31 -6
- package/src/extensibility/skills.ts +20 -0
- package/src/internal-urls/agent-protocol.ts +63 -52
- package/src/internal-urls/artifact-protocol.ts +51 -51
- package/src/internal-urls/docs-index.generated.ts +35 -3
- package/src/internal-urls/index.ts +6 -19
- package/src/internal-urls/local-protocol.ts +49 -7
- package/src/internal-urls/mcp-protocol.ts +2 -8
- package/src/internal-urls/memory-protocol.ts +89 -59
- package/src/internal-urls/router.ts +38 -22
- package/src/internal-urls/rule-protocol.ts +2 -20
- package/src/internal-urls/skill-protocol.ts +4 -27
- package/src/main.ts +1 -1
- package/src/mcp/manager.ts +17 -0
- package/src/modes/components/session-observer-overlay.ts +2 -2
- package/src/modes/components/tool-execution.ts +6 -0
- package/src/modes/components/tree-selector.ts +4 -0
- package/src/modes/controllers/command-controller.ts +0 -23
- package/src/modes/controllers/event-controller.ts +23 -2
- package/src/modes/controllers/mcp-command-controller.ts +7 -10
- package/src/modes/interactive-mode.ts +2 -2
- package/src/modes/theme/theme.ts +27 -27
- package/src/modes/types.ts +1 -1
- package/src/modes/utils/ui-helpers.ts +14 -9
- package/src/prompts/commands/orchestrate.md +1 -0
- package/src/prompts/system/project-prompt.md +10 -2
- package/src/prompts/system/subagent-system-prompt.md +8 -8
- package/src/prompts/system/system-prompt.md +13 -7
- package/src/prompts/tools/ask.md +0 -1
- package/src/prompts/tools/bash.md +0 -10
- package/src/prompts/tools/eval.md +15 -30
- package/src/prompts/tools/github.md +6 -5
- package/src/prompts/tools/hashline.md +1 -0
- package/src/prompts/tools/job.md +14 -6
- package/src/prompts/tools/task.md +20 -3
- package/src/registry/agent-registry.ts +2 -1
- package/src/sdk.ts +87 -89
- package/src/session/agent-session.ts +58 -21
- package/src/session/artifacts.ts +7 -4
- package/src/session/history-storage.ts +77 -19
- package/src/session/session-manager.ts +30 -1
- package/src/ssh/connection-manager.ts +32 -16
- package/src/ssh/sshfs-mount.ts +10 -7
- package/src/system-prompt.ts +0 -5
- package/src/task/executor.ts +14 -2
- package/src/task/index.ts +19 -5
- package/src/tool-discovery/tool-index.ts +21 -8
- package/src/tools/ast-edit.ts +3 -2
- package/src/tools/ast-grep.ts +3 -2
- package/src/tools/bash.ts +15 -9
- package/src/tools/browser/tab-protocol.ts +4 -0
- package/src/tools/browser/tab-supervisor.ts +98 -7
- package/src/tools/browser/tab-worker.ts +104 -58
- package/src/tools/eval.ts +49 -11
- package/src/tools/fetch.ts +1 -1
- package/src/tools/gh.ts +140 -4
- package/src/tools/index.ts +12 -11
- package/src/tools/job.ts +48 -12
- package/src/tools/read.ts +5 -4
- package/src/tools/search.ts +3 -2
- package/src/tools/todo-write.ts +1 -1
- package/src/web/scrapers/mastodon.ts +1 -1
- package/src/web/scrapers/repology.ts +7 -7
- package/src/web/search/index.ts +6 -4
- package/src/cli/jupyter-cli.ts +0 -106
- package/src/commands/jupyter.ts +0 -32
- package/src/eval/py/cancellation.ts +0 -28
- package/src/eval/py/gateway-coordinator.ts +0 -424
- package/src/internal-urls/jobs-protocol.ts +0 -120
- package/src/prompts/system/now-prompt.md +0 -7
- /package/src/eval/js/{prelude.ts → shared/prelude.ts} +0 -0
package/src/tools/bash.ts
CHANGED
|
@@ -4,8 +4,10 @@ import type { Component } from "@oh-my-pi/pi-tui";
|
|
|
4
4
|
import { ImageProtocol, TERMINAL, Text } from "@oh-my-pi/pi-tui";
|
|
5
5
|
import { $env, getProjectDir, isEnoent, prompt } from "@oh-my-pi/pi-utils";
|
|
6
6
|
import { Type } from "@sinclair/typebox";
|
|
7
|
+
import { AsyncJobManager } from "../async";
|
|
7
8
|
import { type BashResult, executeBash } from "../exec/bash-executor";
|
|
8
9
|
import type { RenderResultOptions } from "../extensibility/custom-tools/types";
|
|
10
|
+
import { InternalUrlRouter } from "../internal-urls";
|
|
9
11
|
import { truncateToVisualLines } from "../modes/components/visual-truncate";
|
|
10
12
|
import type { Theme } from "../modes/theme/theme";
|
|
11
13
|
import bashDescription from "../prompts/tools/bash.md" with { type: "text" };
|
|
@@ -326,7 +328,9 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
|
|
|
326
328
|
}
|
|
327
329
|
lines.push(`Background job ${jobId} started: ${label}`);
|
|
328
330
|
lines.push("Result will be delivered automatically when complete.");
|
|
329
|
-
lines.push(
|
|
331
|
+
lines.push(
|
|
332
|
+
`You can use \`job\` to poll until complete, but prefer to continue with another task in the meanwhile if it's not blocking.`,
|
|
333
|
+
);
|
|
330
334
|
return {
|
|
331
335
|
content: [{ type: "text", text: lines.join("\n") }],
|
|
332
336
|
details,
|
|
@@ -349,7 +353,7 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
|
|
|
349
353
|
onUpdate?: AgentToolUpdateCallback<BashToolDetails>;
|
|
350
354
|
startBackgrounded: boolean;
|
|
351
355
|
}): ManagedBashJobHandle {
|
|
352
|
-
const manager =
|
|
356
|
+
const manager = AsyncJobManager.instance();
|
|
353
357
|
if (!manager) {
|
|
354
358
|
throw new ToolError("Background job manager unavailable for this session.");
|
|
355
359
|
}
|
|
@@ -399,6 +403,7 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
|
|
|
399
403
|
}
|
|
400
404
|
},
|
|
401
405
|
{
|
|
406
|
+
ownerId: this.session.getAgentId?.() ?? undefined,
|
|
402
407
|
onProgress: async (text, details) => {
|
|
403
408
|
latestText = text;
|
|
404
409
|
await options.onUpdate?.({
|
|
@@ -501,7 +506,7 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
|
|
|
501
506
|
|
|
502
507
|
const internalUrlOptions: InternalUrlExpansionOptions = {
|
|
503
508
|
skills: this.session.skills ?? [],
|
|
504
|
-
internalRouter:
|
|
509
|
+
internalRouter: InternalUrlRouter.instance(),
|
|
505
510
|
localOptions: {
|
|
506
511
|
getArtifactsDir: this.session.getArtifactsDir,
|
|
507
512
|
getSessionId: this.session.getSessionId,
|
|
@@ -549,7 +554,7 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
|
|
|
549
554
|
const timeoutClampNotice = formatTimeoutClampNotice(requestedTimeoutSec, timeoutSec);
|
|
550
555
|
|
|
551
556
|
if (asyncRequested) {
|
|
552
|
-
if (!
|
|
557
|
+
if (!AsyncJobManager.instance()) {
|
|
553
558
|
throw new ToolError("Async job manager unavailable for this session.");
|
|
554
559
|
}
|
|
555
560
|
const job = this.#startManagedBashJob({
|
|
@@ -570,7 +575,8 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
|
|
|
570
575
|
});
|
|
571
576
|
}
|
|
572
577
|
|
|
573
|
-
|
|
578
|
+
const autoBgManager = AsyncJobManager.instance();
|
|
579
|
+
if (this.#autoBackgroundEnabled && !pty && autoBgManager) {
|
|
574
580
|
const autoBackgroundWaitMs = this.#resolveAutoBackgroundWaitMs(timeoutMs);
|
|
575
581
|
const startBackgrounded = autoBackgroundWaitMs === 0;
|
|
576
582
|
const job = this.#startManagedBashJob({
|
|
@@ -593,16 +599,16 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
|
|
|
593
599
|
}
|
|
594
600
|
const waitResult = await this.#waitForManagedBashJob(job, autoBackgroundWaitMs, signal);
|
|
595
601
|
if (waitResult.kind === "completed") {
|
|
596
|
-
|
|
602
|
+
autoBgManager.acknowledgeDeliveries([job.jobId]);
|
|
597
603
|
return waitResult.result;
|
|
598
604
|
}
|
|
599
605
|
if (waitResult.kind === "failed") {
|
|
600
|
-
|
|
606
|
+
autoBgManager.acknowledgeDeliveries([job.jobId]);
|
|
601
607
|
throw waitResult.error;
|
|
602
608
|
}
|
|
603
609
|
if (waitResult.kind === "aborted") {
|
|
604
|
-
|
|
605
|
-
|
|
610
|
+
autoBgManager.cancel(job.jobId);
|
|
611
|
+
autoBgManager.acknowledgeDeliveries([job.jobId]);
|
|
606
612
|
throw new ToolAbortError(job.getLatestText() || "Command aborted");
|
|
607
613
|
}
|
|
608
614
|
job.setBackgrounded(true);
|
|
@@ -59,10 +59,13 @@ export type WorkerInitPayload =
|
|
|
59
59
|
dialogs?: "accept" | "dismiss";
|
|
60
60
|
};
|
|
61
61
|
|
|
62
|
+
export type ToolReply = { ok: true; value: unknown } | { ok: false; error: RunErrorPayload };
|
|
63
|
+
|
|
62
64
|
export type WorkerInbound =
|
|
63
65
|
| { type: "init"; payload: WorkerInitPayload }
|
|
64
66
|
| { type: "run"; id: string; name: string; code: string; timeoutMs: number; session: SessionSnapshot }
|
|
65
67
|
| { type: "abort"; id: string }
|
|
68
|
+
| { type: "tool-reply"; id: string; reply: ToolReply }
|
|
66
69
|
| { type: "close" };
|
|
67
70
|
|
|
68
71
|
export interface ReadyInfo {
|
|
@@ -91,6 +94,7 @@ export type WorkerOutbound =
|
|
|
91
94
|
| { type: "init-failed"; error: RunErrorPayload }
|
|
92
95
|
| { type: "result"; id: string; ok: true; payload: RunResultOk }
|
|
93
96
|
| { type: "result"; id: string; ok: false; error: RunErrorPayload }
|
|
97
|
+
| { type: "tool-call"; id: string; runId: string; name: string; args: unknown }
|
|
94
98
|
| { type: "log"; level: "debug" | "warn" | "error"; msg: string; meta?: Record<string, unknown> }
|
|
95
99
|
| { type: "closed" };
|
|
96
100
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { getPuppeteerDir, logger, Snowflake } from "@oh-my-pi/pi-utils";
|
|
2
2
|
import type { Page, Target } from "puppeteer-core";
|
|
3
|
+
import { callSessionTool } from "../../eval/js/tool-bridge";
|
|
3
4
|
import type { ToolSession } from "../../sdk";
|
|
4
5
|
import { expandPath } from "../path-utils";
|
|
5
6
|
import { ToolAbortError, ToolError } from "../tool-errors";
|
|
@@ -16,6 +17,17 @@ import type {
|
|
|
16
17
|
WorkerInitPayload,
|
|
17
18
|
WorkerOutbound,
|
|
18
19
|
} from "./tab-protocol";
|
|
20
|
+
// Imported with `type: "file"` so Bun's bundler statically discovers the
|
|
21
|
+
// worker entry and embeds it inside `bun build --compile` single-file
|
|
22
|
+
// binaries. Without this attribute the bundler cannot reach the entry through
|
|
23
|
+
// a `new URL(..., import.meta.url)` literal stored in a local variable, and
|
|
24
|
+
// the prebuilt binary surfaces `Timed out initializing browser tab worker`
|
|
25
|
+
// (issue #1011) because `/$bunfs/root/tab-worker-entry.ts` is missing.
|
|
26
|
+
// tsgo doesn't recognize Bun's `with { type: "file" }` attribute and treats
|
|
27
|
+
// this as a normal TS source import, raising TS1192/TS5097. Bun's bundler
|
|
28
|
+
// (and runtime) honors the attribute and returns the embedded file URL.
|
|
29
|
+
// @ts-expect-error -- Bun file-URL import (see comment above).
|
|
30
|
+
import tabWorkerEntryUrl from "./tab-worker-entry.ts" with { type: "file" };
|
|
19
31
|
|
|
20
32
|
interface WorkerHandle {
|
|
21
33
|
send(msg: WorkerInbound, transferList?: Transferable[]): void;
|
|
@@ -26,6 +38,14 @@ interface WorkerHandle {
|
|
|
26
38
|
|
|
27
39
|
export type DialogPolicy = "accept" | "dismiss";
|
|
28
40
|
|
|
41
|
+
export interface PendingRun {
|
|
42
|
+
resolve(result: RunResultOk): void;
|
|
43
|
+
reject(error: unknown): void;
|
|
44
|
+
session: ToolSession;
|
|
45
|
+
signal?: AbortSignal;
|
|
46
|
+
toolCalls: Map<string, AbortController>;
|
|
47
|
+
}
|
|
48
|
+
|
|
29
49
|
export interface TabSession {
|
|
30
50
|
name: string;
|
|
31
51
|
browser: BrowserHandle;
|
|
@@ -33,7 +53,7 @@ export interface TabSession {
|
|
|
33
53
|
worker: WorkerHandle;
|
|
34
54
|
state: "alive" | "dead";
|
|
35
55
|
info: ReadyInfo;
|
|
36
|
-
pending: Map<string,
|
|
56
|
+
pending: Map<string, PendingRun>;
|
|
37
57
|
dialogPolicy?: DialogPolicy;
|
|
38
58
|
kindTag: BrowserKindTag;
|
|
39
59
|
}
|
|
@@ -144,14 +164,14 @@ export async function acquireTab(
|
|
|
144
164
|
export async function runInTab(name: string, opts: RunInTabOptions): Promise<RunResultOk> {
|
|
145
165
|
return await runInTabWithSnapshot(
|
|
146
166
|
name,
|
|
147
|
-
{ code: opts.code, timeoutMs: opts.timeoutMs, signal: opts.signal },
|
|
167
|
+
{ code: opts.code, timeoutMs: opts.timeoutMs, signal: opts.signal, session: opts.session },
|
|
148
168
|
{ cwd: opts.session.cwd, browserScreenshotDir: expandBrowserScreenshotDir(opts.session) },
|
|
149
169
|
);
|
|
150
170
|
}
|
|
151
171
|
|
|
152
172
|
async function runInTabWithSnapshot(
|
|
153
173
|
name: string,
|
|
154
|
-
opts: { code: string; timeoutMs: number; signal?: AbortSignal },
|
|
174
|
+
opts: { code: string; timeoutMs: number; signal?: AbortSignal; session?: ToolSession },
|
|
155
175
|
snapshot: SessionSnapshot,
|
|
156
176
|
): Promise<RunResultOk> {
|
|
157
177
|
const tab = tabs.get(name);
|
|
@@ -159,8 +179,18 @@ async function runInTabWithSnapshot(
|
|
|
159
179
|
if (tab.pending.size > 0) throw new ToolError(`Tab ${JSON.stringify(name)} is busy`);
|
|
160
180
|
const id = Snowflake.next();
|
|
161
181
|
const { promise, resolve, reject } = Promise.withResolvers<RunResultOk>();
|
|
162
|
-
|
|
163
|
-
|
|
182
|
+
const pending: PendingRun = {
|
|
183
|
+
resolve,
|
|
184
|
+
reject,
|
|
185
|
+
session: opts.session ?? ({} as ToolSession),
|
|
186
|
+
signal: opts.signal,
|
|
187
|
+
toolCalls: new Map(),
|
|
188
|
+
};
|
|
189
|
+
tab.pending.set(id, pending);
|
|
190
|
+
const abort = (): void => {
|
|
191
|
+
tab.worker.send({ type: "abort", id });
|
|
192
|
+
for (const ctrl of pending.toolCalls.values()) ctrl.abort(opts.signal?.reason);
|
|
193
|
+
};
|
|
164
194
|
if (opts.signal?.aborted) abort();
|
|
165
195
|
else opts.signal?.addEventListener("abort", abort, { once: true });
|
|
166
196
|
try {
|
|
@@ -266,9 +296,71 @@ function handleTabMessage(tab: TabSession, msg: WorkerOutbound): void {
|
|
|
266
296
|
tab.info = msg.info;
|
|
267
297
|
return;
|
|
268
298
|
}
|
|
299
|
+
if (msg.type === "tool-call") {
|
|
300
|
+
void dispatchToolCall(tab, msg);
|
|
301
|
+
return;
|
|
302
|
+
}
|
|
269
303
|
if (msg.type === "log") logWorkerMessage(msg);
|
|
270
304
|
}
|
|
271
305
|
|
|
306
|
+
async function dispatchToolCall(tab: TabSession, msg: Extract<WorkerOutbound, { type: "tool-call" }>): Promise<void> {
|
|
307
|
+
const pending = tab.pending.get(msg.runId);
|
|
308
|
+
if (!pending?.session.cwd) {
|
|
309
|
+
safeSend(tab, {
|
|
310
|
+
type: "tool-reply",
|
|
311
|
+
id: msg.id,
|
|
312
|
+
reply: {
|
|
313
|
+
ok: false,
|
|
314
|
+
error: { name: "ToolError", message: "No active run for tool call", isToolError: true, isAbort: false },
|
|
315
|
+
},
|
|
316
|
+
});
|
|
317
|
+
return;
|
|
318
|
+
}
|
|
319
|
+
const ctrl = new AbortController();
|
|
320
|
+
pending.toolCalls.set(msg.id, ctrl);
|
|
321
|
+
const onParentAbort = (): void => ctrl.abort(pending.signal?.reason);
|
|
322
|
+
if (pending.signal?.aborted) onParentAbort();
|
|
323
|
+
else pending.signal?.addEventListener("abort", onParentAbort, { once: true });
|
|
324
|
+
try {
|
|
325
|
+
const value = await callSessionTool(msg.name, msg.args, {
|
|
326
|
+
session: pending.session,
|
|
327
|
+
signal: ctrl.signal,
|
|
328
|
+
emitStatus: () => {
|
|
329
|
+
// Status events from tool calls aren't piped back to user code yet; the worker
|
|
330
|
+
// already pushes its own helper status via the display channel.
|
|
331
|
+
},
|
|
332
|
+
});
|
|
333
|
+
safeSend(tab, { type: "tool-reply", id: msg.id, reply: { ok: true, value } });
|
|
334
|
+
} catch (error) {
|
|
335
|
+
safeSend(tab, { type: "tool-reply", id: msg.id, reply: { ok: false, error: toErrorPayload(error) } });
|
|
336
|
+
} finally {
|
|
337
|
+
pending.toolCalls.delete(msg.id);
|
|
338
|
+
pending.signal?.removeEventListener("abort", onParentAbort);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
function safeSend(tab: TabSession, msg: WorkerInbound): void {
|
|
343
|
+
if (tab.state !== "alive") return;
|
|
344
|
+
try {
|
|
345
|
+
tab.worker.send(msg);
|
|
346
|
+
} catch (err) {
|
|
347
|
+
logger.debug("tab worker send failed", { error: err instanceof Error ? err.message : String(err) });
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
function toErrorPayload(error: unknown): RunErrorPayload {
|
|
352
|
+
if (error instanceof Error) {
|
|
353
|
+
return {
|
|
354
|
+
name: error.name,
|
|
355
|
+
message: error.message,
|
|
356
|
+
stack: error.stack,
|
|
357
|
+
isAbort: error.name === "AbortError" || error.name === "ToolAbortError",
|
|
358
|
+
isToolError: error instanceof ToolError || error.name === "ToolError",
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
return { name: "Error", message: String(error), isAbort: false, isToolError: false };
|
|
362
|
+
}
|
|
363
|
+
|
|
272
364
|
async function forceKillTab(name: string, reason: string): Promise<void> {
|
|
273
365
|
const tab = tabs.get(name);
|
|
274
366
|
if (!tab) return;
|
|
@@ -364,8 +456,7 @@ async function raceWithTimeout<T>(
|
|
|
364
456
|
|
|
365
457
|
async function spawnTabWorker(): Promise<WorkerHandle> {
|
|
366
458
|
try {
|
|
367
|
-
const
|
|
368
|
-
const worker = new Worker(url.href, { type: "module" });
|
|
459
|
+
const worker = new Worker(tabWorkerEntryUrl, { type: "module" });
|
|
369
460
|
return wrapBunWorker(worker);
|
|
370
461
|
} catch (err) {
|
|
371
462
|
logger.warn("Bun Worker spawn failed; using inline tab worker (no sync-loop guard)", {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as os from "node:os";
|
|
3
3
|
import * as path from "node:path";
|
|
4
|
-
|
|
4
|
+
|
|
5
5
|
import { Snowflake, untilAborted } from "@oh-my-pi/pi-utils";
|
|
6
6
|
import type { HTMLElement } from "linkedom";
|
|
7
7
|
import type {
|
|
@@ -14,6 +14,8 @@ import type {
|
|
|
14
14
|
SerializedAXNode,
|
|
15
15
|
Target,
|
|
16
16
|
} from "puppeteer-core";
|
|
17
|
+
import { JsRuntime, type RuntimeHooks } from "../../eval/js/shared/runtime";
|
|
18
|
+
import type { JsDisplayOutput } from "../../eval/js/shared/types";
|
|
17
19
|
import { resizeImage } from "../../utils/image-resize";
|
|
18
20
|
import { resolveToCwd } from "../path-utils";
|
|
19
21
|
import { formatScreenshot } from "../render-utils";
|
|
@@ -34,6 +36,7 @@ import type {
|
|
|
34
36
|
RunResultOk,
|
|
35
37
|
ScreenshotResult,
|
|
36
38
|
SessionSnapshot,
|
|
39
|
+
ToolReply,
|
|
37
40
|
Transport,
|
|
38
41
|
WorkerInbound,
|
|
39
42
|
WorkerInitPayload,
|
|
@@ -177,6 +180,27 @@ function errorPayload(error: unknown): RunErrorPayload {
|
|
|
177
180
|
return { name: "Error", message: String(error), isToolError: false, isAbort: false };
|
|
178
181
|
}
|
|
179
182
|
|
|
183
|
+
function safeJsonStringify(value: unknown): string {
|
|
184
|
+
try {
|
|
185
|
+
return JSON.stringify(value, null, 2);
|
|
186
|
+
} catch {
|
|
187
|
+
return String(value);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function replyError(payload: RunErrorPayload): Error {
|
|
192
|
+
if (payload.isAbort) {
|
|
193
|
+
const err = new ToolAbortError(payload.message || "Tool call aborted");
|
|
194
|
+
if (payload.stack) err.stack = payload.stack;
|
|
195
|
+
return err;
|
|
196
|
+
}
|
|
197
|
+
const Ctor = payload.isToolError ? ToolError : Error;
|
|
198
|
+
const err = new Ctor(payload.message);
|
|
199
|
+
if (payload.name) err.name = payload.name;
|
|
200
|
+
if (payload.stack) err.stack = payload.stack;
|
|
201
|
+
return err;
|
|
202
|
+
}
|
|
203
|
+
|
|
180
204
|
async function targetIdForTarget(target: Target): Promise<string> {
|
|
181
205
|
const raw = target as unknown as { _targetId?: unknown };
|
|
182
206
|
if (typeof raw._targetId === "string") return raw._targetId;
|
|
@@ -361,6 +385,14 @@ async function clickQueryHandlerText(
|
|
|
361
385
|
);
|
|
362
386
|
}
|
|
363
387
|
|
|
388
|
+
interface ActiveRun {
|
|
389
|
+
id: string;
|
|
390
|
+
ac: AbortController;
|
|
391
|
+
displays: RunResultOk["displays"];
|
|
392
|
+
screenshots: ScreenshotResult[];
|
|
393
|
+
pendingTools: Map<string, { resolve(value: unknown): void; reject(error: Error): void }>;
|
|
394
|
+
}
|
|
395
|
+
|
|
364
396
|
export class WorkerCore {
|
|
365
397
|
#transport: Transport;
|
|
366
398
|
#browser?: Browser;
|
|
@@ -368,7 +400,8 @@ export class WorkerCore {
|
|
|
368
400
|
#targetId?: string;
|
|
369
401
|
#elementCache = new Map<number, ElementHandle>();
|
|
370
402
|
#elementCounter = 0;
|
|
371
|
-
#active
|
|
403
|
+
#active: ActiveRun | null = null;
|
|
404
|
+
#runtime: JsRuntime | null = null;
|
|
372
405
|
#unsub: () => void;
|
|
373
406
|
#mode?: WorkerInitPayload["mode"];
|
|
374
407
|
#dialogPolicy?: DialogPolicy;
|
|
@@ -401,6 +434,9 @@ export class WorkerCore {
|
|
|
401
434
|
case "abort":
|
|
402
435
|
if (this.#active?.id === msg.id) this.#active.ac.abort(new ToolAbortError());
|
|
403
436
|
return;
|
|
437
|
+
case "tool-reply":
|
|
438
|
+
this.#deliverToolReply(msg.id, msg.reply);
|
|
439
|
+
return;
|
|
404
440
|
case "close":
|
|
405
441
|
await this.#close();
|
|
406
442
|
return;
|
|
@@ -502,37 +538,26 @@ export class WorkerCore {
|
|
|
502
538
|
const timeoutSignal = AbortSignal.timeout(msg.timeoutMs);
|
|
503
539
|
const ac = new AbortController();
|
|
504
540
|
const signal = AbortSignal.any([timeoutSignal, ac.signal]);
|
|
505
|
-
this.#active = { id: msg.id, ac };
|
|
506
541
|
const displays: RunResultOk["displays"] = [];
|
|
507
542
|
const screenshots: ScreenshotResult[] = [];
|
|
543
|
+
const active: ActiveRun = { id: msg.id, ac, displays, screenshots, pendingTools: new Map() };
|
|
544
|
+
this.#active = active;
|
|
508
545
|
try {
|
|
509
546
|
throwIfAborted(signal);
|
|
510
547
|
const page = this.#requirePage();
|
|
511
548
|
const browser = this.#requireBrowser();
|
|
512
549
|
const tabApi = this.#createTabApi(msg.name, msg.timeoutMs, signal, msg.session, displays, screenshots);
|
|
513
|
-
const
|
|
550
|
+
const runtime = this.#ensureRuntime(msg.session);
|
|
551
|
+
runtime.setCwd(msg.session.cwd);
|
|
552
|
+
runtime.setRunScope({
|
|
514
553
|
page,
|
|
515
554
|
browser,
|
|
516
555
|
tab: tabApi,
|
|
517
|
-
display: (value: unknown): void => this.#display(displays, value),
|
|
518
556
|
assert: (cond: unknown, text?: string): void => {
|
|
519
557
|
if (!cond) throw new ToolError(text ?? "Assertion failed");
|
|
520
558
|
},
|
|
521
559
|
wait: (ms: number): Promise<void> => Bun.sleep(ms),
|
|
522
|
-
console: this.#console(),
|
|
523
|
-
setTimeout,
|
|
524
|
-
clearTimeout,
|
|
525
|
-
setInterval,
|
|
526
|
-
clearInterval,
|
|
527
|
-
queueMicrotask,
|
|
528
|
-
Promise,
|
|
529
|
-
URL,
|
|
530
|
-
URLSearchParams,
|
|
531
|
-
TextEncoder,
|
|
532
|
-
TextDecoder,
|
|
533
|
-
Buffer,
|
|
534
560
|
});
|
|
535
|
-
const wrapped = `(async () => {\n${msg.code}\n})()`;
|
|
536
561
|
const { promise: cancelRejection, reject: rejectCancel } = Promise.withResolvers<never>();
|
|
537
562
|
const onCancel = (): void => {
|
|
538
563
|
rejectCancel(
|
|
@@ -540,15 +565,17 @@ export class WorkerCore {
|
|
|
540
565
|
? new ToolError(`Browser code execution timed out after ${msg.timeoutMs}ms`)
|
|
541
566
|
: new ToolAbortError(),
|
|
542
567
|
);
|
|
568
|
+
// Cancel in-flight tool calls so user code's awaited proxies reject promptly.
|
|
569
|
+
for (const pending of active.pendingTools.values()) {
|
|
570
|
+
pending.reject(new ToolAbortError());
|
|
571
|
+
}
|
|
572
|
+
active.pendingTools.clear();
|
|
543
573
|
};
|
|
544
574
|
if (signal.aborted) onCancel();
|
|
545
575
|
else signal.addEventListener("abort", onCancel, { once: true });
|
|
546
576
|
try {
|
|
547
577
|
const returnValue = await Promise.race([
|
|
548
|
-
|
|
549
|
-
filename: `browser-run-${msg.id}.js`,
|
|
550
|
-
lineOffset: -1,
|
|
551
|
-
}) as Promise<unknown>,
|
|
578
|
+
runtime.run(msg.code, `browser-run-${msg.id}.js`),
|
|
552
579
|
cancelRejection,
|
|
553
580
|
]);
|
|
554
581
|
await this.#postReadyInfo();
|
|
@@ -564,8 +591,62 @@ export class WorkerCore {
|
|
|
564
591
|
} catch (error) {
|
|
565
592
|
this.#transport.send({ type: "result", id: msg.id, ok: false, error: errorPayload(error) });
|
|
566
593
|
} finally {
|
|
567
|
-
if (this.#active?.id === msg.id) this.#active =
|
|
594
|
+
if (this.#active?.id === msg.id) this.#active = null;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
#ensureRuntime(session: SessionSnapshot): JsRuntime {
|
|
599
|
+
if (this.#runtime) return this.#runtime;
|
|
600
|
+
this.#runtime = new JsRuntime({
|
|
601
|
+
initialCwd: session.cwd,
|
|
602
|
+
sessionId: `browser-tab-${this.#targetId ?? "unknown"}`,
|
|
603
|
+
getHooks: () => this.#hooksForActiveRun(),
|
|
604
|
+
});
|
|
605
|
+
return this.#runtime;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
#hooksForActiveRun(): RuntimeHooks | null {
|
|
609
|
+
const active = this.#active;
|
|
610
|
+
if (!active) return null;
|
|
611
|
+
return {
|
|
612
|
+
// console.* output stays on the supervisor log channel — matches pre-runtime behavior
|
|
613
|
+
// where browser cells didn't surface `console.log` to the model.
|
|
614
|
+
onText: chunk => this.#log("debug", chunk.replace(/\n$/, "")),
|
|
615
|
+
onDisplay: output => this.#pushDisplay(active.displays, output),
|
|
616
|
+
callTool: (name, args) => this.#callTool(active, name, args),
|
|
617
|
+
};
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
#pushDisplay(displays: RunResultOk["displays"], output: JsDisplayOutput): void {
|
|
621
|
+
if (output.type === "image") {
|
|
622
|
+
displays.push({ type: "image", data: output.data, mimeType: output.mimeType });
|
|
623
|
+
return;
|
|
624
|
+
}
|
|
625
|
+
if (output.type === "json") {
|
|
626
|
+
displays.push({ type: "text", text: safeJsonStringify(output.data) });
|
|
627
|
+
return;
|
|
568
628
|
}
|
|
629
|
+
// status — surface as compact JSON so helper side effects (read/write/tree) appear in
|
|
630
|
+
// the cell result alongside explicit display() output.
|
|
631
|
+
displays.push({ type: "text", text: safeJsonStringify(output.event) });
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
async #callTool(active: ActiveRun, name: string, args: unknown): Promise<unknown> {
|
|
635
|
+
const id = `tab-tc-${active.id}-${crypto.randomUUID()}`;
|
|
636
|
+
const { promise, resolve, reject } = Promise.withResolvers<unknown>();
|
|
637
|
+
active.pendingTools.set(id, { resolve, reject });
|
|
638
|
+
this.#transport.send({ type: "tool-call", id, runId: active.id, name, args });
|
|
639
|
+
return await promise;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
#deliverToolReply(id: string, reply: ToolReply): void {
|
|
643
|
+
const active = this.#active;
|
|
644
|
+
if (!active) return;
|
|
645
|
+
const pending = active.pendingTools.get(id);
|
|
646
|
+
if (!pending) return;
|
|
647
|
+
active.pendingTools.delete(id);
|
|
648
|
+
if (reply.ok) pending.resolve(reply.value);
|
|
649
|
+
else pending.reject(replyError(reply.error));
|
|
569
650
|
}
|
|
570
651
|
|
|
571
652
|
#createTabApi(
|
|
@@ -933,41 +1014,6 @@ export class WorkerCore {
|
|
|
933
1014
|
}
|
|
934
1015
|
return handle;
|
|
935
1016
|
}
|
|
936
|
-
|
|
937
|
-
#display(displays: RunResultOk["displays"], value: unknown): void {
|
|
938
|
-
if (value === undefined || value === null) return;
|
|
939
|
-
if (
|
|
940
|
-
typeof value === "object" &&
|
|
941
|
-
value !== null &&
|
|
942
|
-
"type" in (value as Record<string, unknown>) &&
|
|
943
|
-
(value as { type?: unknown }).type === "image"
|
|
944
|
-
) {
|
|
945
|
-
const img = value as { data?: unknown; mimeType?: unknown };
|
|
946
|
-
if (typeof img.data === "string" && typeof img.mimeType === "string") {
|
|
947
|
-
displays.push({ type: "image", data: img.data, mimeType: img.mimeType });
|
|
948
|
-
return;
|
|
949
|
-
}
|
|
950
|
-
}
|
|
951
|
-
if (typeof value === "string") {
|
|
952
|
-
displays.push({ type: "text", text: value });
|
|
953
|
-
return;
|
|
954
|
-
}
|
|
955
|
-
try {
|
|
956
|
-
displays.push({ type: "text", text: JSON.stringify(value, null, 2) });
|
|
957
|
-
} catch {
|
|
958
|
-
displays.push({ type: "text", text: String(value) });
|
|
959
|
-
}
|
|
960
|
-
}
|
|
961
|
-
|
|
962
|
-
#console(): Pick<Console, "log" | "debug" | "warn" | "error"> {
|
|
963
|
-
return {
|
|
964
|
-
log: (...args: unknown[]) => this.#log("debug", args.map(String).join(" ")),
|
|
965
|
-
debug: (...args: unknown[]) => this.#log("debug", args.map(String).join(" ")),
|
|
966
|
-
warn: (...args: unknown[]) => this.#log("warn", args.map(String).join(" ")),
|
|
967
|
-
error: (...args: unknown[]) => this.#log("error", args.map(String).join(" ")),
|
|
968
|
-
};
|
|
969
|
-
}
|
|
970
|
-
|
|
971
1017
|
#clearElementCache(): void {
|
|
972
1018
|
if (this.#elementCache.size === 0) {
|
|
973
1019
|
this.#elementCounter = 0;
|
package/src/tools/eval.ts
CHANGED
|
@@ -8,7 +8,7 @@ import { jsBackend, parseEvalInput, pythonBackend, sniffEvalLanguage } from "../
|
|
|
8
8
|
import type { ExecutorBackend } from "../eval/backend";
|
|
9
9
|
import evalGrammar from "../eval/eval.lark" with { type: "text" };
|
|
10
10
|
import { ABORT_WARNING, type ParsedEvalCell } from "../eval/parse";
|
|
11
|
-
import type { EvalCellResult, EvalLanguage, EvalStatusEvent, EvalToolDetails } from "../eval/types";
|
|
11
|
+
import type { EvalCellResult, EvalDisplayOutput, EvalLanguage, EvalStatusEvent, EvalToolDetails } from "../eval/types";
|
|
12
12
|
import type { RenderResultOptions } from "../extensibility/custom-tools/types";
|
|
13
13
|
import { truncateToVisualLines } from "../modes/components/visual-truncate";
|
|
14
14
|
import { getMarkdownTheme, type Theme } from "../modes/theme/theme";
|
|
@@ -26,7 +26,7 @@ export const EVAL_DEFAULT_PREVIEW_LINES = 10;
|
|
|
26
26
|
|
|
27
27
|
export const evalSchema = Type.Object({
|
|
28
28
|
input: Type.String({
|
|
29
|
-
description:
|
|
29
|
+
description: 'eval input as a sequence of `*** Cell <lang>:"title"` cell headers followed by code',
|
|
30
30
|
}),
|
|
31
31
|
});
|
|
32
32
|
export type EvalToolParams = Static<typeof evalSchema>;
|
|
@@ -47,6 +47,38 @@ function formatJsonScalar(value: unknown): string {
|
|
|
47
47
|
return "[object]";
|
|
48
48
|
}
|
|
49
49
|
|
|
50
|
+
/** Cap per `display()` value sent back to the model. */
|
|
51
|
+
const MAX_DISPLAY_TEXT_BYTES = 8000;
|
|
52
|
+
|
|
53
|
+
function formatDisplayJsonForText(value: unknown): string {
|
|
54
|
+
let text: string;
|
|
55
|
+
try {
|
|
56
|
+
text = JSON.stringify(value, null, 2) ?? String(value);
|
|
57
|
+
} catch {
|
|
58
|
+
text = String(value);
|
|
59
|
+
}
|
|
60
|
+
if (text.length > MAX_DISPLAY_TEXT_BYTES) {
|
|
61
|
+
text = `${text.slice(0, MAX_DISPLAY_TEXT_BYTES)}\n… (${text.length - MAX_DISPLAY_TEXT_BYTES} chars truncated)`;
|
|
62
|
+
}
|
|
63
|
+
return text;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Format display() JSON values into text the model can see. Images are surfaced
|
|
68
|
+
* separately as ImageContent so the model can actually inspect them; this helper
|
|
69
|
+
* intentionally does not touch images.
|
|
70
|
+
*/
|
|
71
|
+
function formatDisplayOutputsForText(outputs: EvalDisplayOutput[]): string {
|
|
72
|
+
const chunks: string[] = [];
|
|
73
|
+
let displayIndex = 0;
|
|
74
|
+
for (const output of outputs) {
|
|
75
|
+
if (output.type !== "json") continue;
|
|
76
|
+
displayIndex++;
|
|
77
|
+
chunks.push(`display[${displayIndex}]:\n${formatDisplayJsonForText(output.data)}`);
|
|
78
|
+
}
|
|
79
|
+
return chunks.join("\n\n");
|
|
80
|
+
}
|
|
81
|
+
|
|
50
82
|
function renderJsonTree(value: unknown, theme: Theme, expanded: boolean, maxDepth = expanded ? 6 : 2): string[] {
|
|
51
83
|
const maxItems = expanded ? 20 : 5;
|
|
52
84
|
|
|
@@ -370,13 +402,16 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
|
|
|
370
402
|
const durationMs = Date.now() - startTime;
|
|
371
403
|
|
|
372
404
|
const cellStatusEvents: EvalStatusEvent[] = [];
|
|
405
|
+
const cellDisplayOutputs: EvalDisplayOutput[] = [];
|
|
373
406
|
let cellHasMarkdown = false;
|
|
374
407
|
for (const output of result.displayOutputs) {
|
|
375
408
|
if (output.type === "json") {
|
|
376
409
|
jsonOutputs.push(output.data);
|
|
410
|
+
cellDisplayOutputs.push(output);
|
|
377
411
|
}
|
|
378
412
|
if (output.type === "image") {
|
|
379
413
|
images.push({ type: "image", data: output.data, mimeType: output.mimeType });
|
|
414
|
+
cellDisplayOutputs.push(output);
|
|
380
415
|
}
|
|
381
416
|
if (output.type === "status") {
|
|
382
417
|
statusEvents.push(output.event);
|
|
@@ -387,7 +422,10 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
|
|
|
387
422
|
}
|
|
388
423
|
}
|
|
389
424
|
|
|
390
|
-
const
|
|
425
|
+
const stdoutTrimmed = result.output.trim();
|
|
426
|
+
const displayText = formatDisplayOutputsForText(cellDisplayOutputs);
|
|
427
|
+
const cellOutput =
|
|
428
|
+
stdoutTrimmed && displayText ? `${stdoutTrimmed}\n\n${displayText}` : stdoutTrimmed || displayText;
|
|
391
429
|
cellResult.output = cellOutput;
|
|
392
430
|
cellResult.exitCode = result.exitCode;
|
|
393
431
|
cellResult.durationMs = durationMs;
|
|
@@ -431,14 +469,13 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
|
|
|
431
469
|
languages,
|
|
432
470
|
cells: cellResults,
|
|
433
471
|
jsonOutputs: jsonOutputs.length > 0 ? jsonOutputs : undefined,
|
|
434
|
-
images: images.length > 0 ? images : undefined,
|
|
435
472
|
statusEvents: statusEvents.length > 0 ? statusEvents : undefined,
|
|
436
473
|
isError: true,
|
|
437
474
|
};
|
|
438
475
|
if (notice) details.notice = notice;
|
|
439
476
|
|
|
440
477
|
return toolResult(details)
|
|
441
|
-
.text
|
|
478
|
+
.content([{ type: "text", text: outputText }, ...images])
|
|
442
479
|
.truncationFromSummary(summaryForMeta, { direction: "tail" })
|
|
443
480
|
.done();
|
|
444
481
|
}
|
|
@@ -461,14 +498,13 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
|
|
|
461
498
|
languages,
|
|
462
499
|
cells: cellResults,
|
|
463
500
|
jsonOutputs: jsonOutputs.length > 0 ? jsonOutputs : undefined,
|
|
464
|
-
images: images.length > 0 ? images : undefined,
|
|
465
501
|
statusEvents: statusEvents.length > 0 ? statusEvents : undefined,
|
|
466
502
|
isError: true,
|
|
467
503
|
};
|
|
468
504
|
if (notice) details.notice = notice;
|
|
469
505
|
|
|
470
506
|
return toolResult(details)
|
|
471
|
-
.text
|
|
507
|
+
.content([{ type: "text", text: outputText }, ...images])
|
|
472
508
|
.truncationFromSummary(summaryForMeta, { direction: "tail" })
|
|
473
509
|
.done();
|
|
474
510
|
}
|
|
@@ -479,9 +515,12 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
|
|
|
479
515
|
|
|
480
516
|
const combinedOutput = cellOutputs.join("\n\n");
|
|
481
517
|
const abortSuffix = parsedInput.aborted ? `\n\n${ABORT_WARNING}` : "";
|
|
518
|
+
const hasImages = images.length > 0;
|
|
482
519
|
const outputText =
|
|
483
|
-
(combinedOutput ||
|
|
484
|
-
|
|
520
|
+
(combinedOutput ||
|
|
521
|
+
(hasImages
|
|
522
|
+
? `(displayed ${images.length} image${images.length === 1 ? "" : "s"}; no text output)`
|
|
523
|
+
: "(no output)")) + abortSuffix;
|
|
485
524
|
const summaryForMeta = await summarizeFinal(combinedOutput, finalizeOutput);
|
|
486
525
|
|
|
487
526
|
const details: EvalToolDetails = {
|
|
@@ -489,13 +528,12 @@ export class EvalTool implements AgentTool<typeof evalSchema> {
|
|
|
489
528
|
languages,
|
|
490
529
|
cells: cellResults,
|
|
491
530
|
jsonOutputs: jsonOutputs.length > 0 ? jsonOutputs : undefined,
|
|
492
|
-
images: images.length > 0 ? images : undefined,
|
|
493
531
|
statusEvents: statusEvents.length > 0 ? statusEvents : undefined,
|
|
494
532
|
};
|
|
495
533
|
if (notice) details.notice = notice;
|
|
496
534
|
|
|
497
535
|
return toolResult(details)
|
|
498
|
-
.text
|
|
536
|
+
.content([{ type: "text", text: outputText }, ...images])
|
|
499
537
|
.truncationFromSummary(summaryForMeta, { direction: "tail" })
|
|
500
538
|
.done();
|
|
501
539
|
} finally {
|
package/src/tools/fetch.ts
CHANGED
|
@@ -1352,7 +1352,7 @@ export function renderReadUrlCall(
|
|
|
1352
1352
|
): Component {
|
|
1353
1353
|
const url = args.path ?? args.url ?? "";
|
|
1354
1354
|
const domain = getDomain(url);
|
|
1355
|
-
const path = truncate(url.replace(/^https?:\/\/[^/]+/, ""), 50, "
|
|
1355
|
+
const path = truncate(url.replace(/^https?:\/\/[^/]+/, ""), 50, "…");
|
|
1356
1356
|
const description = `${domain}${path ? ` ${path}` : ""}`.trim();
|
|
1357
1357
|
const meta: string[] = [];
|
|
1358
1358
|
if (args.raw) meta.push("raw");
|