@oh-my-pi/pi-coding-agent 14.5.12 → 14.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/package.json +18 -10
  3. package/src/cli/jupyter-cli.ts +1 -1
  4. package/src/config/model-equivalence.ts +49 -16
  5. package/src/config/model-registry.ts +100 -25
  6. package/src/config/model-resolver.ts +29 -15
  7. package/src/config/settings-schema.ts +20 -6
  8. package/src/config/settings.ts +9 -8
  9. package/src/config.ts +9 -0
  10. package/src/eval/backend.ts +43 -0
  11. package/src/eval/eval.lark +43 -0
  12. package/src/eval/index.ts +5 -0
  13. package/src/eval/js/context-manager.ts +717 -0
  14. package/src/eval/js/executor.ts +131 -0
  15. package/src/eval/js/index.ts +46 -0
  16. package/src/eval/js/prelude.ts +2 -0
  17. package/src/eval/js/prelude.txt +84 -0
  18. package/src/eval/js/tool-bridge.ts +124 -0
  19. package/src/eval/parse.ts +337 -0
  20. package/src/{ipy → eval/py}/executor.ts +2 -180
  21. package/src/{ipy → eval/py}/gateway-coordinator.ts +2 -2
  22. package/src/eval/py/index.ts +58 -0
  23. package/src/{ipy → eval/py}/kernel.ts +5 -41
  24. package/src/{ipy → eval/py}/prelude.py +39 -227
  25. package/src/eval/types.ts +48 -0
  26. package/src/export/html/template.generated.ts +1 -1
  27. package/src/export/html/template.js +8 -10
  28. package/src/extensibility/extensions/types.ts +2 -3
  29. package/src/internal-urls/docs-index.generated.ts +5 -5
  30. package/src/lsp/client.ts +9 -0
  31. package/src/lsp/index.ts +395 -0
  32. package/src/lsp/types.ts +15 -4
  33. package/src/main.ts +25 -14
  34. package/src/mcp/oauth-flow.ts +1 -1
  35. package/src/memories/index.ts +1 -1
  36. package/src/modes/acp/acp-event-mapper.ts +1 -1
  37. package/src/modes/components/{python-execution.ts → eval-execution.ts} +11 -4
  38. package/src/modes/components/login-dialog.ts +1 -1
  39. package/src/modes/components/oauth-selector.ts +2 -1
  40. package/src/modes/components/tool-execution.ts +3 -4
  41. package/src/modes/controllers/command-controller.ts +28 -8
  42. package/src/modes/controllers/input-controller.ts +4 -4
  43. package/src/modes/controllers/selector-controller.ts +2 -1
  44. package/src/modes/interactive-mode.ts +4 -5
  45. package/src/modes/types.ts +3 -3
  46. package/src/modes/utils/ui-helpers.ts +2 -2
  47. package/src/prompts/system/system-prompt.md +3 -3
  48. package/src/prompts/tools/eval.md +92 -0
  49. package/src/prompts/tools/lsp.md +7 -3
  50. package/src/sdk.ts +45 -31
  51. package/src/session/agent-session.ts +42 -42
  52. package/src/session/messages.ts +1 -1
  53. package/src/slash-commands/builtin-registry.ts +1 -1
  54. package/src/system-prompt.ts +34 -66
  55. package/src/task/executor.ts +5 -9
  56. package/src/tools/browser/launch.ts +22 -0
  57. package/src/tools/browser/registry.ts +25 -244
  58. package/src/tools/browser/render.ts +1 -1
  59. package/src/tools/browser/tab-protocol.ts +101 -0
  60. package/src/tools/browser/tab-supervisor.ts +429 -0
  61. package/src/tools/browser/tab-worker-entry.ts +21 -0
  62. package/src/tools/browser/tab-worker.ts +1006 -0
  63. package/src/tools/browser.ts +12 -29
  64. package/src/tools/checkpoint.ts +2 -2
  65. package/src/tools/{python.ts → eval.ts} +324 -315
  66. package/src/tools/exit-plan-mode.ts +1 -1
  67. package/src/tools/index.ts +62 -100
  68. package/src/tools/read.ts +0 -6
  69. package/src/tools/recipe/runners/pkg.ts +34 -32
  70. package/src/tools/renderers.ts +2 -2
  71. package/src/tools/resolve.ts +7 -2
  72. package/src/tools/todo-write.ts +0 -1
  73. package/src/tools/tool-timeouts.ts +2 -2
  74. package/src/utils/markit.ts +15 -7
  75. package/src/utils/tools-manager.ts +5 -5
  76. package/src/web/search/index.ts +5 -5
  77. package/src/web/search/provider.ts +121 -39
  78. package/src/web/search/providers/gemini.ts +2 -2
  79. package/src/web/search/render.ts +2 -2
  80. package/src/ipy/modules.ts +0 -144
  81. package/src/prompts/tools/python.md +0 -57
  82. package/src/tools/browser/vm.ts +0 -792
  83. /package/src/{ipy → eval/py}/cancellation.ts +0 -0
  84. /package/src/{ipy → eval/py}/prelude.ts +0 -0
  85. /package/src/{ipy → eval/py}/runtime.ts +0 -0
@@ -1,23 +1,10 @@
1
1
  import * as path from "node:path";
2
2
  import { logger } from "@oh-my-pi/pi-utils";
3
3
  import type { Subprocess } from "bun";
4
- import type { Browser, CDPSession, ElementHandle, Page } from "puppeteer-core";
4
+ import type { Browser, CDPSession } from "puppeteer-core";
5
5
  import { ToolAbortError, ToolError } from "../tool-errors";
6
- import {
7
- findFreeCdpPort,
8
- findReusableCdp,
9
- gracefulKillTreeOnce,
10
- killExistingByPath,
11
- pickElectronTarget,
12
- waitForCdp,
13
- } from "./attach";
14
- import {
15
- applyStealthPatches,
16
- applyViewport,
17
- launchHeadlessBrowser,
18
- loadPuppeteer,
19
- type UserAgentOverride,
20
- } from "./launch";
6
+ import { findFreeCdpPort, findReusableCdp, gracefulKillTreeOnce, killExistingByPath, waitForCdp } from "./attach";
7
+ import { BROWSER_PROTOCOL_TIMEOUT_MS, launchHeadlessBrowser, loadPuppeteer, type UserAgentOverride } from "./launch";
21
8
 
22
9
  export type BrowserKind =
23
10
  | { kind: "headless"; headless: boolean }
@@ -37,28 +24,7 @@ export interface BrowserHandle {
37
24
  stealth: { browserSession: CDPSession | null; override: UserAgentOverride | null };
38
25
  }
39
26
 
40
- export type DialogPolicy = "accept" | "dismiss";
41
-
42
- export interface TabHandle {
43
- name: string;
44
- browser: BrowserHandle;
45
- page: Page;
46
- elementCache: Map<number, ElementHandle>;
47
- elementCounter: number;
48
- dialogPolicy?: DialogPolicy;
49
- dialogHandler?: (dialog: { accept: () => Promise<void>; dismiss: () => Promise<void> }) => void;
50
- }
51
-
52
27
  const browsers = new Map<string, BrowserHandle>();
53
- const tabs = new Map<string, TabHandle>();
54
-
55
- export function getTab(name: string): TabHandle | undefined {
56
- return tabs.get(name);
57
- }
58
-
59
- export function listTabs(): TabHandle[] {
60
- return [...tabs.values()];
61
- }
62
28
 
63
29
  export function listBrowsers(): BrowserHandle[] {
64
30
  return [...browsers.values()];
@@ -86,9 +52,7 @@ export async function acquireBrowser(kind: BrowserKind, opts: AcquireBrowserOpti
86
52
  const key = browserKey(kind);
87
53
  const existing = browsers.get(key);
88
54
  if (existing) {
89
- // Headless: connection check; spawned/connected: connection check.
90
55
  if (existing.browser.connected) return existing;
91
- // Stale handle — purge and rebuild.
92
56
  browsers.delete(key);
93
57
  await disposeBrowserHandle(existing, { kill: false });
94
58
  }
@@ -113,7 +77,11 @@ async function openBrowserHandle(kind: BrowserKind, opts: AcquireBrowserOptions)
113
77
  const cdpUrl = kind.cdpUrl.replace(/\/+$/, "");
114
78
  await waitForCdp(cdpUrl, 5_000, opts.signal);
115
79
  const puppeteer = await loadPuppeteer();
116
- const browser = await puppeteer.connect({ browserURL: cdpUrl, defaultViewport: null });
80
+ const browser = await puppeteer.connect({
81
+ browserURL: cdpUrl,
82
+ defaultViewport: null,
83
+ protocolTimeout: BROWSER_PROTOCOL_TIMEOUT_MS,
84
+ });
117
85
  return {
118
86
  key: browserKey(kind),
119
87
  kind,
@@ -123,7 +91,7 @@ async function openBrowserHandle(kind: BrowserKind, opts: AcquireBrowserOptions)
123
91
  stealth: { browserSession: null, override: null },
124
92
  };
125
93
  }
126
- // spawned
94
+
127
95
  const exe = kind.path;
128
96
  if (!path.isAbsolute(exe)) {
129
97
  throw new ToolError(
@@ -135,18 +103,12 @@ async function openBrowserHandle(kind: BrowserKind, opts: AcquireBrowserOptions)
135
103
  let pid: number;
136
104
  let subprocess: Subprocess | undefined;
137
105
  if (reused) {
138
- logger.debug("Reusing existing CDP endpoint for attach", {
139
- exe,
140
- pid: reused.pid,
141
- cdpUrl: reused.cdpUrl,
142
- });
106
+ logger.debug("Reusing existing CDP endpoint for attach", { exe, pid: reused.pid, cdpUrl: reused.cdpUrl });
143
107
  cdpUrl = reused.cdpUrl;
144
108
  pid = reused.pid;
145
109
  } else {
146
110
  const killed = await killExistingByPath(exe, opts.signal);
147
- if (killed > 0) {
148
- logger.debug("Killed existing instances before attach", { exe, killed });
149
- }
111
+ if (killed > 0) logger.debug("Killed existing instances before attach", { exe, killed });
150
112
  const port = await findFreeCdpPort();
151
113
  const launchArgs = [...(opts.appArgs ?? []), `--remote-debugging-port=${port}`];
152
114
  const child = Bun.spawn([exe, ...launchArgs], {
@@ -171,7 +133,11 @@ async function openBrowserHandle(kind: BrowserKind, opts: AcquireBrowserOptions)
171
133
  const puppeteer = await loadPuppeteer();
172
134
  let browser: Browser;
173
135
  try {
174
- browser = await puppeteer.connect({ browserURL: cdpUrl, defaultViewport: null });
136
+ browser = await puppeteer.connect({
137
+ browserURL: cdpUrl,
138
+ defaultViewport: null,
139
+ protocolTimeout: BROWSER_PROTOCOL_TIMEOUT_MS,
140
+ });
175
141
  } catch (err) {
176
142
  if (subprocess) await gracefulKillTreeOnce(subprocess.pid);
177
143
  throw new ToolError(`Connected to ${cdpUrl} but puppeteer.connect failed: ${(err as Error).message}`);
@@ -188,167 +154,19 @@ async function openBrowserHandle(kind: BrowserKind, opts: AcquireBrowserOptions)
188
154
  };
189
155
  }
190
156
 
191
- export interface AcquireTabOptions {
192
- url?: string;
193
- waitUntil?: "load" | "domcontentloaded" | "networkidle0" | "networkidle2";
194
- viewport?: { width: number; height: number; deviceScaleFactor?: number };
195
- target?: string;
196
- signal?: AbortSignal;
197
- timeoutMs: number;
198
- dialogs?: DialogPolicy;
199
- }
200
-
201
- export interface AcquireTabResult {
202
- tab: TabHandle;
203
- created: boolean;
204
- }
205
-
206
- export async function acquireTab(
207
- name: string,
208
- browser: BrowserHandle,
209
- opts: AcquireTabOptions,
210
- ): Promise<AcquireTabResult> {
211
- const existing = tabs.get(name);
212
- if (existing) {
213
- if (existing.browser !== browser) {
214
- throw new ToolError(
215
- `Tab ${JSON.stringify(name)} already exists on a different browser (${existing.browser.kind.kind}). Close it first.`,
216
- );
217
- }
218
- if (!existing.page.isClosed()) {
219
- if (opts.dialogs !== undefined) applyDialogPolicy(existing, opts.dialogs);
220
- if (opts.url) {
221
- clearElementCache(existing);
222
- await existing.page.goto(opts.url, {
223
- waitUntil: opts.waitUntil ?? "networkidle2",
224
- timeout: opts.timeoutMs,
225
- });
226
- }
227
- return { tab: existing, created: false };
228
- }
229
- // Stale tab — purge and recreate.
230
- tabs.delete(name);
231
- browser.refCount = Math.max(0, browser.refCount - 1);
232
- }
233
-
234
- let page: Page;
235
- if (browser.kind.kind === "headless") {
236
- page = await browser.browser.newPage();
237
- await applyStealthPatches(browser.browser, page, browser.stealth);
238
- if (browser.kind.headless || opts.viewport) {
239
- await applyViewport(page, opts.viewport);
240
- }
241
- } else {
242
- // spawned/connected — don't open a new tab in the user's app; pick an existing target.
243
- page = await pickElectronTarget(browser.browser, opts.target);
244
- }
245
-
246
- const tab: TabHandle = {
247
- name,
248
- browser,
249
- page,
250
- elementCache: new Map(),
251
- elementCounter: 0,
252
- };
253
- tabs.set(name, tab);
254
- browser.refCount++;
255
- if (opts.dialogs !== undefined) applyDialogPolicy(tab, opts.dialogs);
256
-
257
- if (opts.url) {
258
- await page.goto(opts.url, {
259
- waitUntil: opts.waitUntil ?? "networkidle2",
260
- timeout: opts.timeoutMs,
261
- });
262
- }
263
-
264
- return { tab, created: true };
265
- }
266
-
267
- export interface ReleaseTabOptions {
268
- kill?: boolean;
269
- }
270
-
271
- export async function releaseTab(name: string, opts: ReleaseTabOptions = {}): Promise<boolean> {
272
- const tab = tabs.get(name);
273
- if (!tab) {
274
- logger.debug("releaseTab: unknown tab", { name });
275
- return false;
276
- }
277
- tabs.delete(name);
278
- await disposeTab(tab);
279
- tab.browser.refCount = Math.max(0, tab.browser.refCount - 1);
280
- if (tab.browser.refCount === 0) {
281
- browsers.delete(tab.browser.key);
282
- await disposeBrowserHandle(tab.browser, { kill: opts.kill ?? false });
283
- }
284
- return true;
285
- }
286
-
287
- export async function releaseAllTabs(opts: ReleaseTabOptions = {}): Promise<number> {
288
- const names = [...tabs.keys()];
289
- let count = 0;
290
- for (const name of names) {
291
- if (await releaseTab(name, opts)) count++;
292
- }
293
- return count;
157
+ export function holdBrowser(handle: BrowserHandle): void {
158
+ handle.refCount++;
294
159
  }
295
160
 
296
- /** Drop only headless browsers and their tabs. Used by the headless-toggle slash command. */
297
- export async function dropHeadlessBrowsers(): Promise<void> {
298
- const targets = [...tabs.values()].filter(t => t.browser.kind.kind === "headless");
299
- for (const tab of targets) {
300
- await releaseTab(tab.name);
301
- }
302
- // Drop any zero-refcount headless browsers that survived (shouldn't happen, defensive).
303
- for (const [key, browser] of browsers) {
304
- if (browser.kind.kind === "headless" && browser.refCount === 0) {
305
- browsers.delete(key);
306
- await disposeBrowserHandle(browser, { kill: false });
307
- }
161
+ export async function releaseBrowser(handle: BrowserHandle, opts: { kill: boolean }): Promise<void> {
162
+ handle.refCount = Math.max(0, handle.refCount - 1);
163
+ if (handle.refCount === 0) {
164
+ browsers.delete(handle.key);
165
+ await disposeBrowserHandle(handle, opts);
308
166
  }
309
167
  }
310
168
 
311
- function applyDialogPolicy(tab: TabHandle, policy: DialogPolicy): void {
312
- if (tab.dialogPolicy === policy && tab.dialogHandler) return;
313
- if (tab.dialogHandler) {
314
- try {
315
- tab.page.off("dialog", tab.dialogHandler);
316
- } catch {}
317
- }
318
- const handler = (dialog: { accept: () => Promise<void>; dismiss: () => Promise<void> }): void => {
319
- const action = policy === "accept" ? dialog.accept() : dialog.dismiss();
320
- void action.catch(err => {
321
- logger.debug("Dialog auto-handler failed", { policy, error: (err as Error).message });
322
- });
323
- };
324
- tab.page.on("dialog", handler);
325
- tab.dialogPolicy = policy;
326
- tab.dialogHandler = handler;
327
- }
328
-
329
- async function disposeTab(tab: TabHandle): Promise<void> {
330
- clearElementCache(tab);
331
- if (tab.dialogHandler && !tab.page.isClosed()) {
332
- try {
333
- tab.page.off("dialog", tab.dialogHandler);
334
- } catch {}
335
- tab.dialogHandler = undefined;
336
- tab.dialogPolicy = undefined;
337
- }
338
- if (tab.browser.kind.kind === "headless") {
339
- // Owned tab — close it.
340
- if (!tab.page.isClosed()) {
341
- try {
342
- await tab.page.close();
343
- } catch (err) {
344
- logger.debug("Failed to close page", { error: (err as Error).message });
345
- }
346
- }
347
- }
348
- // spawned/connected: page belongs to user's app — never close.
349
- }
350
-
351
- async function disposeBrowserHandle(handle: BrowserHandle, opts: { kill: boolean }): Promise<void> {
169
+ export async function disposeBrowserHandle(handle: BrowserHandle, opts: { kill: boolean }): Promise<void> {
352
170
  if (handle.kind.kind === "headless") {
353
171
  if (handle.browser.connected) {
354
172
  try {
@@ -360,7 +178,6 @@ async function disposeBrowserHandle(handle: BrowserHandle, opts: { kill: boolean
360
178
  return;
361
179
  }
362
180
  if (handle.kind.kind === "connected") {
363
- // Never close a remote app — only disconnect.
364
181
  if (handle.browser.connected) {
365
182
  try {
366
183
  handle.browser.disconnect();
@@ -370,7 +187,6 @@ async function disposeBrowserHandle(handle: BrowserHandle, opts: { kill: boolean
370
187
  }
371
188
  return;
372
189
  }
373
- // spawned
374
190
  if (handle.browser.connected) {
375
191
  try {
376
192
  handle.browser.disconnect();
@@ -378,40 +194,5 @@ async function disposeBrowserHandle(handle: BrowserHandle, opts: { kill: boolean
378
194
  logger.debug("Failed to disconnect from spawned browser", { error: (err as Error).message });
379
195
  }
380
196
  }
381
- if (opts.kill && handle.pid !== undefined) {
382
- await gracefulKillTreeOnce(handle.pid);
383
- }
384
- }
385
-
386
- export function clearElementCache(tab: TabHandle): void {
387
- if (tab.elementCache.size === 0) {
388
- tab.elementCounter = 0;
389
- return;
390
- }
391
- const handles = [...tab.elementCache.values()];
392
- tab.elementCache.clear();
393
- tab.elementCounter = 0;
394
- for (const handle of handles) {
395
- // Fire and forget; disposal failures don't affect correctness.
396
- void handle.dispose().catch(() => undefined);
397
- }
398
- }
399
-
400
- export async function resolveCachedHandle(tab: TabHandle, id: number): Promise<ElementHandle> {
401
- const handle = tab.elementCache.get(id);
402
- if (!handle) {
403
- throw new ToolError(`Unknown element id ${id}. Run tab.observe() to refresh the element list.`);
404
- }
405
- try {
406
- const isConnected = (await handle.evaluate(el => el.isConnected)) as boolean;
407
- if (!isConnected) {
408
- clearElementCache(tab);
409
- throw new ToolError(`Element id ${id} is stale. Run tab.observe() again.`);
410
- }
411
- } catch (err) {
412
- if (err instanceof ToolError) throw err;
413
- clearElementCache(tab);
414
- throw new ToolError(`Element id ${id} is stale. Run tab.observe() again.`);
415
- }
416
- return handle;
197
+ if (opts.kill && handle.pid !== undefined) await gracefulKillTreeOnce(handle.pid);
417
198
  }
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * TUI renderer for the browser tool.
3
3
  *
4
- * Mirrors the `python` tool look: each `run` invocation is shown as a JS code
4
+ * Mirrors the `eval` tool look: each `run` invocation is shown as a JS code
5
5
  * cell with status icon, optional output, and expand/collapse handling. `open`
6
6
  * and `close` actions render as compact status lines.
7
7
  */
@@ -0,0 +1,101 @@
1
+ import type { ImageContent, TextContent } from "@oh-my-pi/pi-ai";
2
+
3
+ export type Transferable = Bun.Transferable;
4
+
5
+ export interface ObservationEntry {
6
+ id: number;
7
+ role: string;
8
+ name?: string;
9
+ value?: string | number;
10
+ description?: string;
11
+ keyshortcuts?: string;
12
+ states: string[];
13
+ }
14
+
15
+ export interface Observation {
16
+ url: string;
17
+ title?: string;
18
+ viewport: { width: number; height: number; deviceScaleFactor?: number };
19
+ scroll: {
20
+ x: number;
21
+ y: number;
22
+ width: number;
23
+ height: number;
24
+ scrollWidth: number;
25
+ scrollHeight: number;
26
+ };
27
+ elements: ObservationEntry[];
28
+ }
29
+
30
+ export interface ScreenshotResult {
31
+ dest: string;
32
+ mimeType: string;
33
+ bytes: number;
34
+ width: number;
35
+ height: number;
36
+ }
37
+
38
+ export interface SessionSnapshot {
39
+ cwd: string;
40
+ browserScreenshotDir?: string;
41
+ }
42
+
43
+ export type WorkerInitPayload =
44
+ | {
45
+ mode: "headless";
46
+ browserWSEndpoint: string;
47
+ safeDir: string;
48
+ viewport?: { width: number; height: number; deviceScaleFactor?: number };
49
+ dialogs?: "accept" | "dismiss";
50
+ url?: string;
51
+ waitUntil?: "load" | "domcontentloaded" | "networkidle0" | "networkidle2";
52
+ timeoutMs: number;
53
+ }
54
+ | {
55
+ mode: "attach";
56
+ browserWSEndpoint: string;
57
+ safeDir: string;
58
+ targetId: string;
59
+ dialogs?: "accept" | "dismiss";
60
+ };
61
+
62
+ export type WorkerInbound =
63
+ | { type: "init"; payload: WorkerInitPayload }
64
+ | { type: "run"; id: string; name: string; code: string; timeoutMs: number; session: SessionSnapshot }
65
+ | { type: "abort"; id: string }
66
+ | { type: "close" };
67
+
68
+ export interface ReadyInfo {
69
+ url: string;
70
+ title?: string;
71
+ viewport: { width: number; height: number; deviceScaleFactor?: number };
72
+ targetId: string;
73
+ }
74
+
75
+ export interface RunResultOk {
76
+ displays: Array<TextContent | ImageContent>;
77
+ returnValue: unknown;
78
+ screenshots: ScreenshotResult[];
79
+ }
80
+
81
+ export interface RunErrorPayload {
82
+ name: string;
83
+ message: string;
84
+ stack?: string;
85
+ isToolError: boolean;
86
+ isAbort: boolean;
87
+ }
88
+
89
+ export type WorkerOutbound =
90
+ | { type: "ready"; info: ReadyInfo }
91
+ | { type: "init-failed"; error: RunErrorPayload }
92
+ | { type: "result"; id: string; ok: true; payload: RunResultOk }
93
+ | { type: "result"; id: string; ok: false; error: RunErrorPayload }
94
+ | { type: "log"; level: "debug" | "warn" | "error"; msg: string; meta?: Record<string, unknown> }
95
+ | { type: "closed" };
96
+
97
+ export interface Transport {
98
+ send(msg: WorkerOutbound | WorkerInbound, transferList?: Transferable[]): void;
99
+ onMessage(handler: (msg: WorkerOutbound | WorkerInbound) => void): () => void;
100
+ close(): void;
101
+ }