@oh-my-pi/pi-coding-agent 14.9.5 → 14.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +52 -0
- package/package.json +7 -7
- package/src/cli/setup-cli.ts +14 -161
- package/src/cli/stats-cli.ts +56 -2
- package/src/cli.ts +0 -1
- package/src/config/settings-schema.ts +0 -10
- package/src/eval/eval.lark +30 -10
- package/src/eval/js/context-manager.ts +334 -564
- package/src/eval/js/shared/helpers.ts +237 -0
- package/src/eval/js/shared/indirect-eval.ts +30 -0
- package/src/eval/js/shared/rewrite-imports.ts +211 -0
- package/src/eval/js/shared/runtime.ts +168 -0
- package/src/eval/js/shared/types.ts +18 -0
- package/src/eval/js/tool-bridge.ts +2 -4
- package/src/eval/js/worker-core.ts +146 -0
- package/src/eval/js/worker-entry.ts +24 -0
- package/src/eval/js/worker-protocol.ts +41 -0
- package/src/eval/parse.ts +218 -49
- package/src/eval/py/display.ts +71 -0
- package/src/eval/py/executor.ts +74 -89
- package/src/eval/py/index.ts +1 -2
- package/src/eval/py/kernel.ts +472 -900
- package/src/eval/py/prelude.py +95 -7
- package/src/eval/py/runner.py +879 -0
- package/src/eval/py/runtime.ts +3 -16
- package/src/eval/py/tool-bridge.ts +137 -0
- package/src/export/html/template.generated.ts +1 -1
- package/src/export/html/template.js +93 -5
- package/src/internal-urls/docs-index.generated.ts +3 -3
- package/src/modes/controllers/command-controller.ts +0 -23
- package/src/prompts/tools/eval.md +14 -27
- package/src/session/agent-session.ts +0 -1
- package/src/session/history-storage.ts +77 -19
- package/src/tools/browser/tab-protocol.ts +4 -0
- package/src/tools/browser/tab-supervisor.ts +86 -5
- package/src/tools/browser/tab-worker.ts +104 -58
- package/src/tools/eval.ts +1 -1
- package/src/web/search/index.ts +6 -4
- package/src/cli/jupyter-cli.ts +0 -106
- package/src/commands/jupyter.ts +0 -32
- package/src/eval/py/cancellation.ts +0 -28
- package/src/eval/py/gateway-coordinator.ts +0 -424
- /package/src/eval/js/{prelude.ts → shared/prelude.ts} +0 -0
- /package/src/eval/js/{prelude.txt → shared/prelude.txt} +0 -0
|
@@ -14,7 +14,6 @@ import { formatDuration, Snowflake, setProjectDir } from "@oh-my-pi/pi-utils";
|
|
|
14
14
|
import { $ } from "bun";
|
|
15
15
|
import { reset as resetCapabilities } from "../../capability";
|
|
16
16
|
import { clearClaudePluginRootsCache } from "../../discovery/helpers";
|
|
17
|
-
import { getGatewayStatus } from "../../eval/py/gateway-coordinator";
|
|
18
17
|
import { loadCustomShare } from "../../export/custom-share";
|
|
19
18
|
import type { CompactOptions } from "../../extensibility/extensions/types";
|
|
20
19
|
import {
|
|
@@ -402,28 +401,6 @@ export class CommandController {
|
|
|
402
401
|
}
|
|
403
402
|
}
|
|
404
403
|
|
|
405
|
-
const gateway = await getGatewayStatus();
|
|
406
|
-
info += `\n${theme.bold("Python Gateway")}\n`;
|
|
407
|
-
if (gateway.active) {
|
|
408
|
-
info += `${theme.fg("dim", "Status:")} ${theme.fg("success", "Active (Global)")}\n`;
|
|
409
|
-
info += `${theme.fg("dim", "URL:")} ${gateway.url}\n`;
|
|
410
|
-
info += `${theme.fg("dim", "PID:")} ${gateway.pid}\n`;
|
|
411
|
-
if (gateway.pythonPath) {
|
|
412
|
-
info += `${theme.fg("dim", "Python:")} ${gateway.pythonPath}\n`;
|
|
413
|
-
}
|
|
414
|
-
if (gateway.venvPath) {
|
|
415
|
-
info += `${theme.fg("dim", "Venv:")} ${gateway.venvPath}\n`;
|
|
416
|
-
}
|
|
417
|
-
if (gateway.uptime !== null) {
|
|
418
|
-
const uptimeSec = Math.floor(gateway.uptime / 1000);
|
|
419
|
-
const mins = Math.floor(uptimeSec / 60);
|
|
420
|
-
const secs = uptimeSec % 60;
|
|
421
|
-
info += `${theme.fg("dim", "Uptime:")} ${mins}m ${secs}s\n`;
|
|
422
|
-
}
|
|
423
|
-
} else {
|
|
424
|
-
info += `${theme.fg("dim", "Status:")} ${theme.fg("dim", "Inactive")}\n`;
|
|
425
|
-
}
|
|
426
|
-
|
|
427
404
|
if (this.ctx.lspServers && this.ctx.lspServers.length > 0) {
|
|
428
405
|
info += `\n${theme.bold("LSP Servers")}\n`;
|
|
429
406
|
for (const server of this.ctx.lspServers) {
|
|
@@ -1,23 +1,18 @@
|
|
|
1
1
|
Run code in a persistent kernel using codeblock cells.
|
|
2
2
|
|
|
3
3
|
<instruction>
|
|
4
|
-
Each cell
|
|
4
|
+
Each cell starts with a single header line and runs until the next header (or end of input):
|
|
5
5
|
|
|
6
6
|
```
|
|
7
|
-
***
|
|
8
|
-
*** Title: optional title
|
|
9
|
-
*** Timeout: 10s
|
|
10
|
-
*** Reset
|
|
7
|
+
*** Cell py:"optional title" t:10s rst
|
|
11
8
|
print("hi")
|
|
12
|
-
*** End PY
|
|
13
9
|
```
|
|
14
10
|
|
|
15
|
-
- **Language**: {{#if py}}`
|
|
16
|
-
- **Attributes** (optional, in
|
|
17
|
-
-
|
|
18
|
-
-
|
|
19
|
-
|
|
20
|
-
- Anything between the last attribute and `*** End <LANG>` is the cell's code, verbatim.
|
|
11
|
+
- **Language + title**: `<lang>:"<title>"` — {{#if py}}`py` for Python{{/if}}{{#ifAll py js}}, {{/ifAll}}{{#if js}}`js` for JavaScript{{/if}}. Title may be empty (`py:""`).
|
|
12
|
+
- **Attributes** (optional, in this order, after the language+title):
|
|
13
|
+
- `t:<duration>` — per-cell timeout. Digits with optional `ms` / `s` / `m` units (e.g. `500ms`, `15s`, `2m`). Default 30s.
|
|
14
|
+
- `rst` — wipe this cell's own language kernel before running.{{#ifAll py js}} Other languages are untouched.{{/ifAll}}
|
|
15
|
+
- Anything after the header line, up to the next `*** Cell` header, is the cell's code, verbatim.
|
|
21
16
|
- Stack multiple cells back-to-back; blank lines between cells are ignored.
|
|
22
17
|
|
|
23
18
|
**Work incrementally:**
|
|
@@ -50,40 +45,32 @@ env(key?=None, value?=None) → str | None | dict
|
|
|
50
45
|
No args → full environment as dict. One arg → value of `key`. Two args → set `key=value` and return value.
|
|
51
46
|
output(*ids, format?="raw", query?=None, offset?=None, limit?=None) → str | dict | list[dict]
|
|
52
47
|
Read task/agent output by ID. Single id returns text/dict; multiple ids return a list.
|
|
48
|
+
tool.<name>(args) → unknown
|
|
49
|
+
Invoke any session tool by name. `args` is the tool's parameter object.
|
|
53
50
|
```
|
|
54
|
-
|
|
55
|
-
{{#if js}}**JavaScript only:** `tool.<name>(args)` invokes any session tool directly (e.g. `await tool.read({ path: "src/foo.ts" })`).
|
|
56
|
-
{{/if}}</prelude>
|
|
51
|
+
</prelude>
|
|
57
52
|
|
|
58
53
|
<output>
|
|
59
54
|
Cells render like a Jupyter notebook. `display(value)` renders non-presentable data as an interactive JSON tree. Presentable values (figures, images, dataframes, etc.) use their native representation.
|
|
60
55
|
</output>
|
|
61
56
|
|
|
62
57
|
<caution>
|
|
63
|
-
- In session mode, use
|
|
58
|
+
- In session mode, use `rst` on a cell to wipe its language's kernel before running.{{#ifAll py js}} Reset is per-language: a python cell's `rst` does not touch the JavaScript kernel and vice versa.{{/ifAll}}
|
|
64
59
|
{{#if js}}- **js**: the VM exposes a selective `process` subset, Web APIs, `Buffer`, `fs/promises`, and the `Bun` global.
|
|
65
60
|
{{/if}}</caution>
|
|
66
61
|
|
|
67
62
|
<example>
|
|
68
|
-
{{#if py}}***
|
|
69
|
-
*** Title: imports
|
|
70
|
-
*** Timeout: 10s
|
|
63
|
+
{{#if py}}*** Cell py:"imports" t:10s
|
|
71
64
|
import json
|
|
72
65
|
from pathlib import Path
|
|
73
|
-
*** End PY
|
|
74
66
|
|
|
75
|
-
***
|
|
76
|
-
*** Title: load config
|
|
67
|
+
*** Cell py:"load config"
|
|
77
68
|
data = json.loads(read('package.json'))
|
|
78
69
|
display(data)
|
|
79
|
-
*** End PY
|
|
80
70
|
{{/if}}{{#ifAll py js}}
|
|
81
|
-
{{/ifAll}}{{#if js}}***
|
|
82
|
-
*** Title: js summary
|
|
83
|
-
*** Reset
|
|
71
|
+
{{/ifAll}}{{#if js}}*** Cell js:"summary" rst
|
|
84
72
|
const data = JSON.parse(await read('package.json'));
|
|
85
73
|
display(data);
|
|
86
74
|
return data.name;
|
|
87
|
-
*** End JS
|
|
88
75
|
{{/if}}
|
|
89
76
|
</example>
|
|
@@ -6561,7 +6561,6 @@ export class AgentSession {
|
|
|
6561
6561
|
sessionId,
|
|
6562
6562
|
kernelOwnerId: this.#evalKernelOwnerId,
|
|
6563
6563
|
kernelMode: this.settings.get("python.kernelMode"),
|
|
6564
|
-
useSharedGateway: this.settings.get("python.sharedGateway"),
|
|
6565
6564
|
onChunk,
|
|
6566
6565
|
signal: abortController.signal,
|
|
6567
6566
|
});
|
|
@@ -19,6 +19,12 @@ type HistoryRow = {
|
|
|
19
19
|
|
|
20
20
|
const SQLITE_NOW_EPOCH = "CAST(strftime('%s','now') AS INTEGER)";
|
|
21
21
|
|
|
22
|
+
// Escape LIKE wildcards so user input is treated as literal text.
|
|
23
|
+
// Matches the `ESCAPE '\\'` clause used by substring-search statements.
|
|
24
|
+
function escapeLikePattern(text: string): string {
|
|
25
|
+
return text.replace(/[\\%_]/g, "\\$&");
|
|
26
|
+
}
|
|
27
|
+
|
|
22
28
|
class AsyncDrain<T> {
|
|
23
29
|
#queue?: T[];
|
|
24
30
|
#promise = Promise.resolve();
|
|
@@ -63,6 +69,8 @@ export class HistoryStorage {
|
|
|
63
69
|
#recentStmt: Statement;
|
|
64
70
|
#searchStmt: Statement;
|
|
65
71
|
#lastPromptStmt: Statement;
|
|
72
|
+
// Cache substring-fallback prepared statements keyed by token count.
|
|
73
|
+
#substringStmts = new Map<number, Statement>();
|
|
66
74
|
|
|
67
75
|
// In-memory cache of last prompt to avoid sync DB reads on add
|
|
68
76
|
#lastPromptCache: string | null = null;
|
|
@@ -167,16 +175,53 @@ CREATE TRIGGER IF NOT EXISTS history_ai AFTER INSERT ON history BEGIN
|
|
|
167
175
|
const safeLimit = this.#normalizeLimit(limit);
|
|
168
176
|
if (safeLimit === 0) return [];
|
|
169
177
|
|
|
170
|
-
const
|
|
171
|
-
if (
|
|
178
|
+
const tokens = this.#tokenize(query);
|
|
179
|
+
if (tokens.length === 0) return [];
|
|
172
180
|
|
|
181
|
+
// 1. FTS5 prefix match (token AND, prefix-wildcard per token).
|
|
182
|
+
// Handles punctuation by tokenizing query the same way unicode61 tokenizer
|
|
183
|
+
// indexed the stored text, so "git-commit" -> "git"* "commit"*.
|
|
184
|
+
const ftsQuery = tokens.map(tok => `"${tok.replace(/"/g, '""')}"*`).join(" ");
|
|
185
|
+
let ftsRows: HistoryRow[] = [];
|
|
173
186
|
try {
|
|
174
|
-
|
|
175
|
-
return rows.map(row => this.#toEntry(row));
|
|
187
|
+
ftsRows = this.#searchStmt.all(ftsQuery, safeLimit) as HistoryRow[];
|
|
176
188
|
} catch (error) {
|
|
177
|
-
|
|
178
|
-
|
|
189
|
+
// Malformed FTS expression - fall through to substring path.
|
|
190
|
+
logger.debug("HistoryStorage FTS query failed, using substring only", { error: String(error) });
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (ftsRows.length >= safeLimit) {
|
|
194
|
+
return ftsRows.map(row => this.#toEntry(row));
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// 2. Substring fallback (token-AND LIKE). Catches infix matches FTS5's
|
|
198
|
+
// prefix-only wildcard cannot reach (e.g. "mit" -> "commit"). Bounded
|
|
199
|
+
// by safeLimit, ordered by recency - no full-table load into JS.
|
|
200
|
+
let subRows: HistoryRow[] = [];
|
|
201
|
+
try {
|
|
202
|
+
subRows = this.#searchSubstring(tokens, safeLimit);
|
|
203
|
+
} catch (error) {
|
|
204
|
+
logger.error("HistoryStorage substring search failed", { error: String(error) });
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if (ftsRows.length === 0) {
|
|
208
|
+
return subRows.map(row => this.#toEntry(row));
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const seen = new Set<number>();
|
|
212
|
+
const merged: HistoryEntry[] = [];
|
|
213
|
+
for (const row of ftsRows) {
|
|
214
|
+
if (seen.has(row.id)) continue;
|
|
215
|
+
seen.add(row.id);
|
|
216
|
+
merged.push(this.#toEntry(row));
|
|
179
217
|
}
|
|
218
|
+
for (const row of subRows) {
|
|
219
|
+
if (merged.length >= safeLimit) break;
|
|
220
|
+
if (seen.has(row.id)) continue;
|
|
221
|
+
seen.add(row.id);
|
|
222
|
+
merged.push(this.#toEntry(row));
|
|
223
|
+
}
|
|
224
|
+
return merged;
|
|
180
225
|
}
|
|
181
226
|
|
|
182
227
|
#ensureDir(dbPath: string): void {
|
|
@@ -225,21 +270,34 @@ END;
|
|
|
225
270
|
return Math.min(clamped, 1000);
|
|
226
271
|
}
|
|
227
272
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
273
|
+
/**
|
|
274
|
+
* Split on non-alphanumeric runs, mirroring FTS5's `unicode61` tokenizer so
|
|
275
|
+
* query tokens align with how stored prompts were indexed. Lowercases for
|
|
276
|
+
* stable substring matching.
|
|
277
|
+
*/
|
|
278
|
+
#tokenize(query: string): string[] {
|
|
279
|
+
return query
|
|
280
|
+
.toLowerCase()
|
|
281
|
+
.split(/[^\p{L}\p{N}]+/u)
|
|
282
|
+
.filter(tok => tok.length > 0);
|
|
283
|
+
}
|
|
234
284
|
|
|
235
|
-
|
|
285
|
+
#searchSubstring(tokens: string[], limit: number): HistoryRow[] {
|
|
286
|
+
const stmt = this.#getSubstringStmt(tokens.length);
|
|
287
|
+
const params: unknown[] = tokens.map(tok => `%${escapeLikePattern(tok)}%`);
|
|
288
|
+
params.push(limit);
|
|
289
|
+
return stmt.all(...(params as [string, ...unknown[]])) as HistoryRow[];
|
|
290
|
+
}
|
|
236
291
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
292
|
+
#getSubstringStmt(tokenCount: number): Statement {
|
|
293
|
+
let stmt = this.#substringStmts.get(tokenCount);
|
|
294
|
+
if (stmt) return stmt;
|
|
295
|
+
const whereClause = Array(tokenCount).fill("prompt LIKE ? ESCAPE '\\' COLLATE NOCASE").join(" AND ");
|
|
296
|
+
stmt = this.#db.prepare(
|
|
297
|
+
`SELECT id, prompt, created_at, cwd FROM history WHERE ${whereClause} ORDER BY created_at DESC, id DESC LIMIT ?`,
|
|
298
|
+
);
|
|
299
|
+
this.#substringStmts.set(tokenCount, stmt);
|
|
300
|
+
return stmt;
|
|
243
301
|
}
|
|
244
302
|
|
|
245
303
|
#toEntry(row: HistoryRow): HistoryEntry {
|
|
@@ -59,10 +59,13 @@ export type WorkerInitPayload =
|
|
|
59
59
|
dialogs?: "accept" | "dismiss";
|
|
60
60
|
};
|
|
61
61
|
|
|
62
|
+
export type ToolReply = { ok: true; value: unknown } | { ok: false; error: RunErrorPayload };
|
|
63
|
+
|
|
62
64
|
export type WorkerInbound =
|
|
63
65
|
| { type: "init"; payload: WorkerInitPayload }
|
|
64
66
|
| { type: "run"; id: string; name: string; code: string; timeoutMs: number; session: SessionSnapshot }
|
|
65
67
|
| { type: "abort"; id: string }
|
|
68
|
+
| { type: "tool-reply"; id: string; reply: ToolReply }
|
|
66
69
|
| { type: "close" };
|
|
67
70
|
|
|
68
71
|
export interface ReadyInfo {
|
|
@@ -91,6 +94,7 @@ export type WorkerOutbound =
|
|
|
91
94
|
| { type: "init-failed"; error: RunErrorPayload }
|
|
92
95
|
| { type: "result"; id: string; ok: true; payload: RunResultOk }
|
|
93
96
|
| { type: "result"; id: string; ok: false; error: RunErrorPayload }
|
|
97
|
+
| { type: "tool-call"; id: string; runId: string; name: string; args: unknown }
|
|
94
98
|
| { type: "log"; level: "debug" | "warn" | "error"; msg: string; meta?: Record<string, unknown> }
|
|
95
99
|
| { type: "closed" };
|
|
96
100
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { getPuppeteerDir, logger, Snowflake } from "@oh-my-pi/pi-utils";
|
|
2
2
|
import type { Page, Target } from "puppeteer-core";
|
|
3
|
+
import { callSessionTool } from "../../eval/js/tool-bridge";
|
|
3
4
|
import type { ToolSession } from "../../sdk";
|
|
4
5
|
import { expandPath } from "../path-utils";
|
|
5
6
|
import { ToolAbortError, ToolError } from "../tool-errors";
|
|
@@ -37,6 +38,14 @@ interface WorkerHandle {
|
|
|
37
38
|
|
|
38
39
|
export type DialogPolicy = "accept" | "dismiss";
|
|
39
40
|
|
|
41
|
+
export interface PendingRun {
|
|
42
|
+
resolve(result: RunResultOk): void;
|
|
43
|
+
reject(error: unknown): void;
|
|
44
|
+
session: ToolSession;
|
|
45
|
+
signal?: AbortSignal;
|
|
46
|
+
toolCalls: Map<string, AbortController>;
|
|
47
|
+
}
|
|
48
|
+
|
|
40
49
|
export interface TabSession {
|
|
41
50
|
name: string;
|
|
42
51
|
browser: BrowserHandle;
|
|
@@ -44,7 +53,7 @@ export interface TabSession {
|
|
|
44
53
|
worker: WorkerHandle;
|
|
45
54
|
state: "alive" | "dead";
|
|
46
55
|
info: ReadyInfo;
|
|
47
|
-
pending: Map<string,
|
|
56
|
+
pending: Map<string, PendingRun>;
|
|
48
57
|
dialogPolicy?: DialogPolicy;
|
|
49
58
|
kindTag: BrowserKindTag;
|
|
50
59
|
}
|
|
@@ -155,14 +164,14 @@ export async function acquireTab(
|
|
|
155
164
|
export async function runInTab(name: string, opts: RunInTabOptions): Promise<RunResultOk> {
|
|
156
165
|
return await runInTabWithSnapshot(
|
|
157
166
|
name,
|
|
158
|
-
{ code: opts.code, timeoutMs: opts.timeoutMs, signal: opts.signal },
|
|
167
|
+
{ code: opts.code, timeoutMs: opts.timeoutMs, signal: opts.signal, session: opts.session },
|
|
159
168
|
{ cwd: opts.session.cwd, browserScreenshotDir: expandBrowserScreenshotDir(opts.session) },
|
|
160
169
|
);
|
|
161
170
|
}
|
|
162
171
|
|
|
163
172
|
async function runInTabWithSnapshot(
|
|
164
173
|
name: string,
|
|
165
|
-
opts: { code: string; timeoutMs: number; signal?: AbortSignal },
|
|
174
|
+
opts: { code: string; timeoutMs: number; signal?: AbortSignal; session?: ToolSession },
|
|
166
175
|
snapshot: SessionSnapshot,
|
|
167
176
|
): Promise<RunResultOk> {
|
|
168
177
|
const tab = tabs.get(name);
|
|
@@ -170,8 +179,18 @@ async function runInTabWithSnapshot(
|
|
|
170
179
|
if (tab.pending.size > 0) throw new ToolError(`Tab ${JSON.stringify(name)} is busy`);
|
|
171
180
|
const id = Snowflake.next();
|
|
172
181
|
const { promise, resolve, reject } = Promise.withResolvers<RunResultOk>();
|
|
173
|
-
|
|
174
|
-
|
|
182
|
+
const pending: PendingRun = {
|
|
183
|
+
resolve,
|
|
184
|
+
reject,
|
|
185
|
+
session: opts.session ?? ({} as ToolSession),
|
|
186
|
+
signal: opts.signal,
|
|
187
|
+
toolCalls: new Map(),
|
|
188
|
+
};
|
|
189
|
+
tab.pending.set(id, pending);
|
|
190
|
+
const abort = (): void => {
|
|
191
|
+
tab.worker.send({ type: "abort", id });
|
|
192
|
+
for (const ctrl of pending.toolCalls.values()) ctrl.abort(opts.signal?.reason);
|
|
193
|
+
};
|
|
175
194
|
if (opts.signal?.aborted) abort();
|
|
176
195
|
else opts.signal?.addEventListener("abort", abort, { once: true });
|
|
177
196
|
try {
|
|
@@ -277,9 +296,71 @@ function handleTabMessage(tab: TabSession, msg: WorkerOutbound): void {
|
|
|
277
296
|
tab.info = msg.info;
|
|
278
297
|
return;
|
|
279
298
|
}
|
|
299
|
+
if (msg.type === "tool-call") {
|
|
300
|
+
void dispatchToolCall(tab, msg);
|
|
301
|
+
return;
|
|
302
|
+
}
|
|
280
303
|
if (msg.type === "log") logWorkerMessage(msg);
|
|
281
304
|
}
|
|
282
305
|
|
|
306
|
+
async function dispatchToolCall(tab: TabSession, msg: Extract<WorkerOutbound, { type: "tool-call" }>): Promise<void> {
|
|
307
|
+
const pending = tab.pending.get(msg.runId);
|
|
308
|
+
if (!pending?.session.cwd) {
|
|
309
|
+
safeSend(tab, {
|
|
310
|
+
type: "tool-reply",
|
|
311
|
+
id: msg.id,
|
|
312
|
+
reply: {
|
|
313
|
+
ok: false,
|
|
314
|
+
error: { name: "ToolError", message: "No active run for tool call", isToolError: true, isAbort: false },
|
|
315
|
+
},
|
|
316
|
+
});
|
|
317
|
+
return;
|
|
318
|
+
}
|
|
319
|
+
const ctrl = new AbortController();
|
|
320
|
+
pending.toolCalls.set(msg.id, ctrl);
|
|
321
|
+
const onParentAbort = (): void => ctrl.abort(pending.signal?.reason);
|
|
322
|
+
if (pending.signal?.aborted) onParentAbort();
|
|
323
|
+
else pending.signal?.addEventListener("abort", onParentAbort, { once: true });
|
|
324
|
+
try {
|
|
325
|
+
const value = await callSessionTool(msg.name, msg.args, {
|
|
326
|
+
session: pending.session,
|
|
327
|
+
signal: ctrl.signal,
|
|
328
|
+
emitStatus: () => {
|
|
329
|
+
// Status events from tool calls aren't piped back to user code yet; the worker
|
|
330
|
+
// already pushes its own helper status via the display channel.
|
|
331
|
+
},
|
|
332
|
+
});
|
|
333
|
+
safeSend(tab, { type: "tool-reply", id: msg.id, reply: { ok: true, value } });
|
|
334
|
+
} catch (error) {
|
|
335
|
+
safeSend(tab, { type: "tool-reply", id: msg.id, reply: { ok: false, error: toErrorPayload(error) } });
|
|
336
|
+
} finally {
|
|
337
|
+
pending.toolCalls.delete(msg.id);
|
|
338
|
+
pending.signal?.removeEventListener("abort", onParentAbort);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
function safeSend(tab: TabSession, msg: WorkerInbound): void {
|
|
343
|
+
if (tab.state !== "alive") return;
|
|
344
|
+
try {
|
|
345
|
+
tab.worker.send(msg);
|
|
346
|
+
} catch (err) {
|
|
347
|
+
logger.debug("tab worker send failed", { error: err instanceof Error ? err.message : String(err) });
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
function toErrorPayload(error: unknown): RunErrorPayload {
|
|
352
|
+
if (error instanceof Error) {
|
|
353
|
+
return {
|
|
354
|
+
name: error.name,
|
|
355
|
+
message: error.message,
|
|
356
|
+
stack: error.stack,
|
|
357
|
+
isAbort: error.name === "AbortError" || error.name === "ToolAbortError",
|
|
358
|
+
isToolError: error instanceof ToolError || error.name === "ToolError",
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
return { name: "Error", message: String(error), isAbort: false, isToolError: false };
|
|
362
|
+
}
|
|
363
|
+
|
|
283
364
|
async function forceKillTab(name: string, reason: string): Promise<void> {
|
|
284
365
|
const tab = tabs.get(name);
|
|
285
366
|
if (!tab) return;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as os from "node:os";
|
|
3
3
|
import * as path from "node:path";
|
|
4
|
-
|
|
4
|
+
|
|
5
5
|
import { Snowflake, untilAborted } from "@oh-my-pi/pi-utils";
|
|
6
6
|
import type { HTMLElement } from "linkedom";
|
|
7
7
|
import type {
|
|
@@ -14,6 +14,8 @@ import type {
|
|
|
14
14
|
SerializedAXNode,
|
|
15
15
|
Target,
|
|
16
16
|
} from "puppeteer-core";
|
|
17
|
+
import { JsRuntime, type RuntimeHooks } from "../../eval/js/shared/runtime";
|
|
18
|
+
import type { JsDisplayOutput } from "../../eval/js/shared/types";
|
|
17
19
|
import { resizeImage } from "../../utils/image-resize";
|
|
18
20
|
import { resolveToCwd } from "../path-utils";
|
|
19
21
|
import { formatScreenshot } from "../render-utils";
|
|
@@ -34,6 +36,7 @@ import type {
|
|
|
34
36
|
RunResultOk,
|
|
35
37
|
ScreenshotResult,
|
|
36
38
|
SessionSnapshot,
|
|
39
|
+
ToolReply,
|
|
37
40
|
Transport,
|
|
38
41
|
WorkerInbound,
|
|
39
42
|
WorkerInitPayload,
|
|
@@ -177,6 +180,27 @@ function errorPayload(error: unknown): RunErrorPayload {
|
|
|
177
180
|
return { name: "Error", message: String(error), isToolError: false, isAbort: false };
|
|
178
181
|
}
|
|
179
182
|
|
|
183
|
+
function safeJsonStringify(value: unknown): string {
|
|
184
|
+
try {
|
|
185
|
+
return JSON.stringify(value, null, 2);
|
|
186
|
+
} catch {
|
|
187
|
+
return String(value);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function replyError(payload: RunErrorPayload): Error {
|
|
192
|
+
if (payload.isAbort) {
|
|
193
|
+
const err = new ToolAbortError(payload.message || "Tool call aborted");
|
|
194
|
+
if (payload.stack) err.stack = payload.stack;
|
|
195
|
+
return err;
|
|
196
|
+
}
|
|
197
|
+
const Ctor = payload.isToolError ? ToolError : Error;
|
|
198
|
+
const err = new Ctor(payload.message);
|
|
199
|
+
if (payload.name) err.name = payload.name;
|
|
200
|
+
if (payload.stack) err.stack = payload.stack;
|
|
201
|
+
return err;
|
|
202
|
+
}
|
|
203
|
+
|
|
180
204
|
async function targetIdForTarget(target: Target): Promise<string> {
|
|
181
205
|
const raw = target as unknown as { _targetId?: unknown };
|
|
182
206
|
if (typeof raw._targetId === "string") return raw._targetId;
|
|
@@ -361,6 +385,14 @@ async function clickQueryHandlerText(
|
|
|
361
385
|
);
|
|
362
386
|
}
|
|
363
387
|
|
|
388
|
+
interface ActiveRun {
|
|
389
|
+
id: string;
|
|
390
|
+
ac: AbortController;
|
|
391
|
+
displays: RunResultOk["displays"];
|
|
392
|
+
screenshots: ScreenshotResult[];
|
|
393
|
+
pendingTools: Map<string, { resolve(value: unknown): void; reject(error: Error): void }>;
|
|
394
|
+
}
|
|
395
|
+
|
|
364
396
|
export class WorkerCore {
|
|
365
397
|
#transport: Transport;
|
|
366
398
|
#browser?: Browser;
|
|
@@ -368,7 +400,8 @@ export class WorkerCore {
|
|
|
368
400
|
#targetId?: string;
|
|
369
401
|
#elementCache = new Map<number, ElementHandle>();
|
|
370
402
|
#elementCounter = 0;
|
|
371
|
-
#active
|
|
403
|
+
#active: ActiveRun | null = null;
|
|
404
|
+
#runtime: JsRuntime | null = null;
|
|
372
405
|
#unsub: () => void;
|
|
373
406
|
#mode?: WorkerInitPayload["mode"];
|
|
374
407
|
#dialogPolicy?: DialogPolicy;
|
|
@@ -401,6 +434,9 @@ export class WorkerCore {
|
|
|
401
434
|
case "abort":
|
|
402
435
|
if (this.#active?.id === msg.id) this.#active.ac.abort(new ToolAbortError());
|
|
403
436
|
return;
|
|
437
|
+
case "tool-reply":
|
|
438
|
+
this.#deliverToolReply(msg.id, msg.reply);
|
|
439
|
+
return;
|
|
404
440
|
case "close":
|
|
405
441
|
await this.#close();
|
|
406
442
|
return;
|
|
@@ -502,37 +538,26 @@ export class WorkerCore {
|
|
|
502
538
|
const timeoutSignal = AbortSignal.timeout(msg.timeoutMs);
|
|
503
539
|
const ac = new AbortController();
|
|
504
540
|
const signal = AbortSignal.any([timeoutSignal, ac.signal]);
|
|
505
|
-
this.#active = { id: msg.id, ac };
|
|
506
541
|
const displays: RunResultOk["displays"] = [];
|
|
507
542
|
const screenshots: ScreenshotResult[] = [];
|
|
543
|
+
const active: ActiveRun = { id: msg.id, ac, displays, screenshots, pendingTools: new Map() };
|
|
544
|
+
this.#active = active;
|
|
508
545
|
try {
|
|
509
546
|
throwIfAborted(signal);
|
|
510
547
|
const page = this.#requirePage();
|
|
511
548
|
const browser = this.#requireBrowser();
|
|
512
549
|
const tabApi = this.#createTabApi(msg.name, msg.timeoutMs, signal, msg.session, displays, screenshots);
|
|
513
|
-
const
|
|
550
|
+
const runtime = this.#ensureRuntime(msg.session);
|
|
551
|
+
runtime.setCwd(msg.session.cwd);
|
|
552
|
+
runtime.setRunScope({
|
|
514
553
|
page,
|
|
515
554
|
browser,
|
|
516
555
|
tab: tabApi,
|
|
517
|
-
display: (value: unknown): void => this.#display(displays, value),
|
|
518
556
|
assert: (cond: unknown, text?: string): void => {
|
|
519
557
|
if (!cond) throw new ToolError(text ?? "Assertion failed");
|
|
520
558
|
},
|
|
521
559
|
wait: (ms: number): Promise<void> => Bun.sleep(ms),
|
|
522
|
-
console: this.#console(),
|
|
523
|
-
setTimeout,
|
|
524
|
-
clearTimeout,
|
|
525
|
-
setInterval,
|
|
526
|
-
clearInterval,
|
|
527
|
-
queueMicrotask,
|
|
528
|
-
Promise,
|
|
529
|
-
URL,
|
|
530
|
-
URLSearchParams,
|
|
531
|
-
TextEncoder,
|
|
532
|
-
TextDecoder,
|
|
533
|
-
Buffer,
|
|
534
560
|
});
|
|
535
|
-
const wrapped = `(async () => {\n${msg.code}\n})()`;
|
|
536
561
|
const { promise: cancelRejection, reject: rejectCancel } = Promise.withResolvers<never>();
|
|
537
562
|
const onCancel = (): void => {
|
|
538
563
|
rejectCancel(
|
|
@@ -540,15 +565,17 @@ export class WorkerCore {
|
|
|
540
565
|
? new ToolError(`Browser code execution timed out after ${msg.timeoutMs}ms`)
|
|
541
566
|
: new ToolAbortError(),
|
|
542
567
|
);
|
|
568
|
+
// Cancel in-flight tool calls so user code's awaited proxies reject promptly.
|
|
569
|
+
for (const pending of active.pendingTools.values()) {
|
|
570
|
+
pending.reject(new ToolAbortError());
|
|
571
|
+
}
|
|
572
|
+
active.pendingTools.clear();
|
|
543
573
|
};
|
|
544
574
|
if (signal.aborted) onCancel();
|
|
545
575
|
else signal.addEventListener("abort", onCancel, { once: true });
|
|
546
576
|
try {
|
|
547
577
|
const returnValue = await Promise.race([
|
|
548
|
-
|
|
549
|
-
filename: `browser-run-${msg.id}.js`,
|
|
550
|
-
lineOffset: -1,
|
|
551
|
-
}) as Promise<unknown>,
|
|
578
|
+
runtime.run(msg.code, `browser-run-${msg.id}.js`),
|
|
552
579
|
cancelRejection,
|
|
553
580
|
]);
|
|
554
581
|
await this.#postReadyInfo();
|
|
@@ -564,8 +591,62 @@ export class WorkerCore {
|
|
|
564
591
|
} catch (error) {
|
|
565
592
|
this.#transport.send({ type: "result", id: msg.id, ok: false, error: errorPayload(error) });
|
|
566
593
|
} finally {
|
|
567
|
-
if (this.#active?.id === msg.id) this.#active =
|
|
594
|
+
if (this.#active?.id === msg.id) this.#active = null;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
#ensureRuntime(session: SessionSnapshot): JsRuntime {
|
|
599
|
+
if (this.#runtime) return this.#runtime;
|
|
600
|
+
this.#runtime = new JsRuntime({
|
|
601
|
+
initialCwd: session.cwd,
|
|
602
|
+
sessionId: `browser-tab-${this.#targetId ?? "unknown"}`,
|
|
603
|
+
getHooks: () => this.#hooksForActiveRun(),
|
|
604
|
+
});
|
|
605
|
+
return this.#runtime;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
#hooksForActiveRun(): RuntimeHooks | null {
|
|
609
|
+
const active = this.#active;
|
|
610
|
+
if (!active) return null;
|
|
611
|
+
return {
|
|
612
|
+
// console.* output stays on the supervisor log channel — matches pre-runtime behavior
|
|
613
|
+
// where browser cells didn't surface `console.log` to the model.
|
|
614
|
+
onText: chunk => this.#log("debug", chunk.replace(/\n$/, "")),
|
|
615
|
+
onDisplay: output => this.#pushDisplay(active.displays, output),
|
|
616
|
+
callTool: (name, args) => this.#callTool(active, name, args),
|
|
617
|
+
};
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
#pushDisplay(displays: RunResultOk["displays"], output: JsDisplayOutput): void {
|
|
621
|
+
if (output.type === "image") {
|
|
622
|
+
displays.push({ type: "image", data: output.data, mimeType: output.mimeType });
|
|
623
|
+
return;
|
|
624
|
+
}
|
|
625
|
+
if (output.type === "json") {
|
|
626
|
+
displays.push({ type: "text", text: safeJsonStringify(output.data) });
|
|
627
|
+
return;
|
|
568
628
|
}
|
|
629
|
+
// status — surface as compact JSON so helper side effects (read/write/tree) appear in
|
|
630
|
+
// the cell result alongside explicit display() output.
|
|
631
|
+
displays.push({ type: "text", text: safeJsonStringify(output.event) });
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
async #callTool(active: ActiveRun, name: string, args: unknown): Promise<unknown> {
|
|
635
|
+
const id = `tab-tc-${active.id}-${crypto.randomUUID()}`;
|
|
636
|
+
const { promise, resolve, reject } = Promise.withResolvers<unknown>();
|
|
637
|
+
active.pendingTools.set(id, { resolve, reject });
|
|
638
|
+
this.#transport.send({ type: "tool-call", id, runId: active.id, name, args });
|
|
639
|
+
return await promise;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
#deliverToolReply(id: string, reply: ToolReply): void {
|
|
643
|
+
const active = this.#active;
|
|
644
|
+
if (!active) return;
|
|
645
|
+
const pending = active.pendingTools.get(id);
|
|
646
|
+
if (!pending) return;
|
|
647
|
+
active.pendingTools.delete(id);
|
|
648
|
+
if (reply.ok) pending.resolve(reply.value);
|
|
649
|
+
else pending.reject(replyError(reply.error));
|
|
569
650
|
}
|
|
570
651
|
|
|
571
652
|
#createTabApi(
|
|
@@ -933,41 +1014,6 @@ export class WorkerCore {
|
|
|
933
1014
|
}
|
|
934
1015
|
return handle;
|
|
935
1016
|
}
|
|
936
|
-
|
|
937
|
-
#display(displays: RunResultOk["displays"], value: unknown): void {
|
|
938
|
-
if (value === undefined || value === null) return;
|
|
939
|
-
if (
|
|
940
|
-
typeof value === "object" &&
|
|
941
|
-
value !== null &&
|
|
942
|
-
"type" in (value as Record<string, unknown>) &&
|
|
943
|
-
(value as { type?: unknown }).type === "image"
|
|
944
|
-
) {
|
|
945
|
-
const img = value as { data?: unknown; mimeType?: unknown };
|
|
946
|
-
if (typeof img.data === "string" && typeof img.mimeType === "string") {
|
|
947
|
-
displays.push({ type: "image", data: img.data, mimeType: img.mimeType });
|
|
948
|
-
return;
|
|
949
|
-
}
|
|
950
|
-
}
|
|
951
|
-
if (typeof value === "string") {
|
|
952
|
-
displays.push({ type: "text", text: value });
|
|
953
|
-
return;
|
|
954
|
-
}
|
|
955
|
-
try {
|
|
956
|
-
displays.push({ type: "text", text: JSON.stringify(value, null, 2) });
|
|
957
|
-
} catch {
|
|
958
|
-
displays.push({ type: "text", text: String(value) });
|
|
959
|
-
}
|
|
960
|
-
}
|
|
961
|
-
|
|
962
|
-
#console(): Pick<Console, "log" | "debug" | "warn" | "error"> {
|
|
963
|
-
return {
|
|
964
|
-
log: (...args: unknown[]) => this.#log("debug", args.map(String).join(" ")),
|
|
965
|
-
debug: (...args: unknown[]) => this.#log("debug", args.map(String).join(" ")),
|
|
966
|
-
warn: (...args: unknown[]) => this.#log("warn", args.map(String).join(" ")),
|
|
967
|
-
error: (...args: unknown[]) => this.#log("error", args.map(String).join(" ")),
|
|
968
|
-
};
|
|
969
|
-
}
|
|
970
|
-
|
|
971
1017
|
#clearElementCache(): void {
|
|
972
1018
|
if (this.#elementCache.size === 0) {
|
|
973
1019
|
this.#elementCounter = 0;
|