@oh-my-pi/pi-coding-agent 15.11.4 → 15.11.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/CHANGELOG.md +82 -1
  2. package/dist/cli.js +520 -451
  3. package/dist/types/cli/bench-cli.d.ts +78 -0
  4. package/dist/types/cli/usage-cli.d.ts +10 -1
  5. package/dist/types/commands/bench.d.ts +29 -0
  6. package/dist/types/commands/usage.d.ts +9 -0
  7. package/dist/types/config/model-resolver.d.ts +3 -2
  8. package/dist/types/config/settings-schema.d.ts +125 -3
  9. package/dist/types/edit/renderer.d.ts +1 -0
  10. package/dist/types/modes/components/oauth-selector.d.ts +10 -1
  11. package/dist/types/modes/components/reset-usage-selector.d.ts +12 -0
  12. package/dist/types/modes/components/session-selector.d.ts +1 -1
  13. package/dist/types/modes/components/settings-selector.d.ts +8 -1
  14. package/dist/types/modes/components/snapcompact-shape-preview.d.ts +31 -0
  15. package/dist/types/modes/components/tool-execution.d.ts +18 -0
  16. package/dist/types/modes/controllers/selector-controller.d.ts +1 -0
  17. package/dist/types/modes/interactive-mode.d.ts +10 -0
  18. package/dist/types/modes/session-observer-registry.d.ts +2 -0
  19. package/dist/types/modes/setup-wizard/scenes/sign-in.d.ts +3 -0
  20. package/dist/types/modes/setup-wizard/scenes/types.d.ts +10 -1
  21. package/dist/types/modes/setup-wizard/scenes/web-search.d.ts +3 -0
  22. package/dist/types/modes/types.d.ts +2 -0
  23. package/dist/types/modes/utils/context-usage.d.ts +6 -1
  24. package/dist/types/session/agent-session.d.ts +14 -1
  25. package/dist/types/session/auth-storage.d.ts +1 -1
  26. package/dist/types/session/codex-auto-reset.d.ts +107 -0
  27. package/dist/types/session/snapcompact-inline.d.ts +107 -4
  28. package/dist/types/slash-commands/helpers/reset-usage.d.ts +27 -0
  29. package/dist/types/task/render.d.ts +1 -0
  30. package/dist/types/tools/bash.d.ts +2 -0
  31. package/dist/types/tools/eval-render.d.ts +1 -0
  32. package/dist/types/tools/renderers.d.ts +13 -0
  33. package/dist/types/tools/ssh.d.ts +1 -0
  34. package/dist/types/tools/todo.d.ts +0 -11
  35. package/package.json +11 -11
  36. package/src/cli/bench-cli.ts +437 -0
  37. package/src/cli/usage-cli.ts +187 -16
  38. package/src/cli-commands.ts +1 -0
  39. package/src/commands/bench.ts +42 -0
  40. package/src/commands/usage.ts +8 -0
  41. package/src/config/model-registry.ts +52 -5
  42. package/src/config/model-resolver.ts +36 -5
  43. package/src/config/settings-schema.ts +148 -3
  44. package/src/config/settings.ts +9 -0
  45. package/src/edit/renderer.ts +5 -0
  46. package/src/hindsight/client.ts +26 -1
  47. package/src/hindsight/state.ts +6 -2
  48. package/src/internal-urls/docs-index.generated.ts +2 -2
  49. package/src/mcp/transports/stdio.ts +81 -7
  50. package/src/modes/components/oauth-selector.ts +67 -7
  51. package/src/modes/components/reset-usage-selector.ts +161 -0
  52. package/src/modes/components/session-selector.ts +8 -2
  53. package/src/modes/components/settings-selector.ts +89 -47
  54. package/src/modes/components/snapcompact-shape-preview-doc.md +11 -0
  55. package/src/modes/components/snapcompact-shape-preview.ts +192 -0
  56. package/src/modes/components/tool-execution.ts +26 -0
  57. package/src/modes/components/transcript-container.ts +23 -1
  58. package/src/modes/controllers/command-controller.ts +24 -1
  59. package/src/modes/controllers/input-controller.ts +8 -6
  60. package/src/modes/controllers/selector-controller.ts +72 -2
  61. package/src/modes/interactive-mode.ts +83 -0
  62. package/src/modes/session-observer-registry.ts +61 -3
  63. package/src/modes/setup-wizard/index.ts +1 -0
  64. package/src/modes/setup-wizard/scenes/glyph.ts +24 -6
  65. package/src/modes/setup-wizard/scenes/providers.ts +36 -2
  66. package/src/modes/setup-wizard/scenes/sign-in.ts +10 -1
  67. package/src/modes/setup-wizard/scenes/theme.ts +28 -1
  68. package/src/modes/setup-wizard/scenes/types.ts +10 -1
  69. package/src/modes/setup-wizard/scenes/web-search.ts +22 -6
  70. package/src/modes/setup-wizard/wizard-overlay.ts +38 -1
  71. package/src/modes/theme/theme.ts +2 -2
  72. package/src/modes/types.ts +2 -0
  73. package/src/modes/utils/context-usage.ts +75 -1
  74. package/src/prompts/bench.md +7 -0
  75. package/src/prompts/system/snapcompact-context-frames-note.md +1 -0
  76. package/src/prompts/system/snapcompact-context-stub.md +1 -0
  77. package/src/prompts/system/snapcompact-toolresult-note.md +1 -1
  78. package/src/prompts/tools/browser.md +33 -43
  79. package/src/prompts/tools/eval.md +27 -50
  80. package/src/prompts/tools/irc.md +29 -31
  81. package/src/prompts/tools/read.md +31 -37
  82. package/src/prompts/tools/todo.md +1 -2
  83. package/src/sdk.ts +4 -2
  84. package/src/session/agent-session.ts +136 -6
  85. package/src/session/auth-storage.ts +3 -0
  86. package/src/session/codex-auto-reset.ts +190 -0
  87. package/src/session/snapcompact-inline.ts +404 -75
  88. package/src/slash-commands/builtin-registry.ts +145 -8
  89. package/src/slash-commands/helpers/context-report.ts +28 -1
  90. package/src/slash-commands/helpers/reset-usage.ts +66 -0
  91. package/src/slash-commands/helpers/usage-report.ts +12 -0
  92. package/src/task/index.ts +30 -7
  93. package/src/task/render.ts +34 -19
  94. package/src/tools/bash.ts +3 -0
  95. package/src/tools/eval-render.ts +4 -0
  96. package/src/tools/renderers.ts +13 -0
  97. package/src/tools/ssh.ts +3 -0
  98. package/src/tools/todo.ts +8 -128
@@ -1,4 +1,4 @@
1
- import { type SelectItem, SelectList, truncateToWidth } from "@oh-my-pi/pi-tui";
1
+ import { type SelectItem, SelectList, type SgrMouseEvent, truncateToWidth } from "@oh-my-pi/pi-tui";
2
2
  import { SETTINGS_SCHEMA } from "../../../config/settings-schema";
3
3
  import { getSearchProvider, setPreferredSearchProvider } from "../../../web/search/provider";
4
4
  import { isSearchProviderPreference, type SearchProviderId } from "../../../web/search/types";
@@ -31,6 +31,8 @@ export class WebSearchTab implements SetupTab {
31
31
  #availability = new Map<SearchProviderId, Availability>();
32
32
  #status: string[] = [];
33
33
  #disposed = false;
34
+ /** Render line where the select list begins. */
35
+ #listRowStart = 0;
34
36
 
35
37
  constructor(private readonly host: SetupSceneHost) {
36
38
  this.#list = new SelectList(WEB_SEARCH_ITEMS, MAX_VISIBLE, getSelectListTheme());
@@ -55,6 +57,22 @@ export class WebSearchTab implements SetupTab {
55
57
  this.#list.handleInput(data);
56
58
  }
57
59
 
60
+ /** Wheel moves the highlight; hover lights the row under the pointer; click confirms it. */
61
+ routeMouse(event: SgrMouseEvent, line: number, _col: number): void {
62
+ if (event.wheel !== null) {
63
+ this.#list.handleWheel(event.wheel);
64
+ return;
65
+ }
66
+ const index = this.#list.hitTest(line - this.#listRowStart);
67
+ if (event.motion) {
68
+ this.#list.setHoverIndex(index ?? null);
69
+ return;
70
+ }
71
+ if (event.leftClick && index !== undefined) {
72
+ this.#list.clickItem(index);
73
+ }
74
+ }
75
+
58
76
  invalidate(): void {
59
77
  this.#list.invalidate();
60
78
  }
@@ -64,11 +82,9 @@ export class WebSearchTab implements SetupTab {
64
82
  }
65
83
 
66
84
  render(width: number): readonly string[] {
67
- const lines = [
68
- theme.fg("muted", "Choose the provider the web_search tool should prefer."),
69
- "",
70
- ...this.#list.render(width),
71
- ];
85
+ const lines = [theme.fg("muted", "Choose the provider the web_search tool should prefer."), ""];
86
+ this.#listRowStart = lines.length;
87
+ lines.push(...this.#list.render(width));
72
88
  const selected = this.#list.getSelectedItem();
73
89
  if (selected) {
74
90
  lines.push("", ...this.#readinessLines(selected.value).map(line => truncateToWidth(line, width)));
@@ -1,4 +1,4 @@
1
- import { type Component, matchesKey, padding, truncateToWidth, visibleWidth } from "@oh-my-pi/pi-tui";
1
+ import { type Component, matchesKey, padding, parseSgrMouse, truncateToWidth, visibleWidth } from "@oh-my-pi/pi-tui";
2
2
  import { APP_NAME } from "@oh-my-pi/pi-utils";
3
3
  import { gradientLogo, PI_LOGO } from "../components/welcome";
4
4
  import { theme } from "../theme/theme";
@@ -61,6 +61,8 @@ export class SetupWizardComponent implements Component {
61
61
  #timer: NodeJS.Timeout | undefined;
62
62
  #done = Promise.withResolvers<void>();
63
63
  #disposed = false;
64
+ /** Screen row where the active scene's body began in the last rendered frame. */
65
+ #bodyRowStart = 0;
64
66
 
65
67
  constructor(
66
68
  readonly ctx: InteractiveModeContext,
@@ -87,6 +89,10 @@ export class SetupWizardComponent implements Component {
87
89
 
88
90
  handleInput(data: string): void {
89
91
  if (this.#phase === "done") return;
92
+ if (data.startsWith("\x1b[<")) {
93
+ this.#handleMouse(data);
94
+ return;
95
+ }
90
96
  if (matchesKey(data, "ctrl+c")) {
91
97
  this.#beginOutro();
92
98
  return;
@@ -116,6 +122,36 @@ export class SetupWizardComponent implements Component {
116
122
  this.#activeScene?.handleInput?.(data);
117
123
  }
118
124
 
125
+ /**
126
+ * Mouse handling for the fullscreen wizard (SGR tracking is on while the
127
+ * overlay holds the alternate screen). The frame paints from screen row 0,
128
+ * so report coordinates index directly into the last rendered lines: scene
129
+ * body rows start at #bodyRowStart, indented by SCENE_MARGIN_X. Scenes
130
+ * that implement routeMouse get hit-tested events (wheel, hover, click);
131
+ * for the rest a wheel notch falls back to an arrow key. A left click
132
+ * advances the splash/outro like Enter. Raw reports never reach scene
133
+ * keyboard input.
134
+ */
135
+ #handleMouse(data: string): void {
136
+ const event = parseSgrMouse(data);
137
+ if (!event) return;
138
+ if (this.#phase === "splash" || this.#phase === "outro") {
139
+ if (!event.leftClick) return;
140
+ if (this.#phase === "splash") this.#beginScene();
141
+ else this.#complete();
142
+ return;
143
+ }
144
+ const scene = this.#activeScene;
145
+ if (!scene) return;
146
+ if (scene.routeMouse) {
147
+ scene.routeMouse(event, event.row - this.#bodyRowStart, event.col - SCENE_MARGIN_X);
148
+ return;
149
+ }
150
+ if (event.wheel !== null) {
151
+ scene.handleInput?.(event.wheel === -1 ? "\x1b[A" : "\x1b[B");
152
+ }
153
+ }
154
+
119
155
  render(width: number): readonly string[] {
120
156
  const safeWidth = Math.max(1, width);
121
157
  const height = Math.max(1, this.ctx.ui.terminal.rows);
@@ -163,6 +199,7 @@ export class SetupWizardComponent implements Component {
163
199
  header.push(indentLine(theme.fg("muted", subtitle), width, SCENE_MARGIN_X));
164
200
  }
165
201
  header.push("");
202
+ this.#bodyRowStart = header.length;
166
203
 
167
204
  const footer = [
168
205
  "",
@@ -715,7 +715,7 @@ const NERD_SYMBOLS: SymbolMap = {
715
715
  "tool.debug": "\uEAD8",
716
716
  "tool.mcp": "\uEB2D",
717
717
  "tool.job": "\uEBA2",
718
- "tool.task": "\uEA7E",
718
+ "tool.task": "\uf4a0",
719
719
  "tool.todo": "\uEAB3",
720
720
  "tool.memory": "\uEACE",
721
721
  "tool.ask": "\uEAC7",
@@ -2762,7 +2762,7 @@ export function getSettingsListTheme(): SettingsListTheme {
2762
2762
  label: (text: string, selected: boolean, changed: boolean) =>
2763
2763
  changed ? theme.fg("statusLineGitDirty", text) : selected ? theme.fg("accent", text) : text,
2764
2764
  value: (text: string, selected: boolean, changed: boolean) =>
2765
- selected ? theme.fg("accent", text) : changed ? theme.fg("statusLineGitDirty", text) : theme.fg("muted", text),
2765
+ changed ? theme.fg("statusLineGitDirty", text) : selected ? theme.fg("accent", text) : theme.fg("muted", text),
2766
2766
  description: (text: string) => theme.fg("dim", text),
2767
2767
  cursor: theme.fg("accent", `${theme.nav.cursor} `),
2768
2768
  hint: (text: string) => theme.fg("dim", text),
@@ -81,6 +81,7 @@ export interface InteractiveModeContext {
81
81
  pendingMessagesContainer: Container;
82
82
  statusContainer: Container;
83
83
  todoContainer: Container;
84
+ subagentContainer: Container;
84
85
  btwContainer: Container;
85
86
  omfgContainer: Container;
86
87
  errorBannerContainer: Container;
@@ -287,6 +288,7 @@ export interface InteractiveModeContext {
287
288
  handleResumeSession(sessionPath: string): Promise<void>;
288
289
  handleSessionDeleteCommand(): Promise<void>;
289
290
  showOAuthSelector(mode: "login" | "logout", providerId?: string): Promise<void>;
291
+ showResetUsageSelector(): Promise<void>;
290
292
  showProviderSetup(): Promise<void>;
291
293
  showHookConfirm(title: string, message: string): Promise<boolean>;
292
294
  showDebugSelector(): Promise<void>;
@@ -6,6 +6,7 @@ import { countTokens } from "@oh-my-pi/pi-natives";
6
6
  import { formatNumber } from "@oh-my-pi/pi-utils";
7
7
  import type { Skill } from "../../extensibility/skills";
8
8
  import type { AgentSession } from "../../session/agent-session";
9
+ import { estimateInlineSavings, type SnapcompactSavingsEstimate } from "../../session/snapcompact-inline";
9
10
  import type { Tool } from "../../tools";
10
11
  import type { theme as Theme } from "../theme/theme";
11
12
 
@@ -36,6 +37,8 @@ export interface ContextBreakdown {
36
37
  usedTokens: number;
37
38
  autoCompactBufferTokens: number;
38
39
  freeTokens: number;
40
+ /** Estimated snapcompact wire savings; set when requested and a snapcompact.* setting is enabled. */
41
+ snapcompact?: SnapcompactSavingsEstimate;
39
42
  }
40
43
 
41
44
  const EMPTY_STRING_PARTS: readonly string[] = [];
@@ -109,7 +112,10 @@ function computeNonMessageBreakdown(session: AgentSession): {
109
112
  * Compute a breakdown of estimated context usage by category for the active
110
113
  * session and model.
111
114
  */
112
- export function computeContextBreakdown(session: AgentSession): ContextBreakdown {
115
+ export function computeContextBreakdown(
116
+ session: AgentSession,
117
+ options?: { snapcompactSavings?: boolean },
118
+ ): ContextBreakdown {
113
119
  const model = session.model;
114
120
  const contextWindow = model?.contextWindow ?? 0;
115
121
 
@@ -169,6 +175,22 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
169
175
 
170
176
  const freeTokens = Math.max(0, contextWindow - usedTokens - autoCompactBufferTokens);
171
177
 
178
+ // Estimated wire savings from snapcompact inline imaging. Opt-in: only the
179
+ // /context surfaces need it; other callers skip the extra token counting.
180
+ let snapcompactSavings: SnapcompactSavingsEstimate | undefined;
181
+ if (options?.snapcompactSavings) {
182
+ const renderSystemPrompt = session.settings.get("snapcompact.systemPrompt");
183
+ const renderToolResults = session.settings.get("snapcompact.toolResults");
184
+ if (renderSystemPrompt !== "none" || renderToolResults) {
185
+ snapcompactSavings = estimateInlineSavings({
186
+ options: { renderSystemPrompt, renderToolResults, shape: session.settings.get("snapcompact.shape") },
187
+ model,
188
+ systemPrompt: session.systemPrompt ?? [],
189
+ messages: session.messages ?? [],
190
+ });
191
+ }
192
+ }
193
+
172
194
  return {
173
195
  model,
174
196
  contextWindow,
@@ -176,6 +198,7 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
176
198
  usedTokens,
177
199
  autoCompactBufferTokens,
178
200
  freeTokens,
201
+ snapcompact: snapcompactSavings,
179
202
  };
180
203
  }
181
204
 
@@ -298,6 +321,57 @@ function buildLegendLines(breakdown: ContextBreakdown, theme: typeof Theme): str
298
321
  );
299
322
  }
300
323
 
324
+ const snap = breakdown.snapcompact;
325
+ if (snap) {
326
+ lines.push("");
327
+ if (!snap.visionCapable) {
328
+ lines.push(theme.fg("muted", "Snapcompact: inactive (model has no image input)"));
329
+ } else {
330
+ lines.push(theme.fg("muted", "Snapcompact (estimated wire savings)"));
331
+ if (snap.systemPrompt) {
332
+ const sp = snap.systemPrompt;
333
+ if (sp.applied) {
334
+ lines.push(
335
+ ` System prompt (${sp.scope === "agents-md" ? "AGENTS.md" : "all"}): saves ${theme.bold(`~${formatNumber(sp.savedTokens)}`)} ` +
336
+ theme.fg(
337
+ "dim",
338
+ `(${formatNumber(sp.textTokens)} text → ${sp.frames} frame${sp.frames === 1 ? "" : "s"} ≈ ${formatNumber(sp.imageTokens)})`,
339
+ ),
340
+ );
341
+ } else {
342
+ const reason =
343
+ sp.reason === "budget"
344
+ ? "image budget exhausted"
345
+ : sp.reason === "empty"
346
+ ? "nothing to image"
347
+ : "frames would not save tokens";
348
+ lines.push(
349
+ ` System prompt (${sp.scope === "agents-md" ? "AGENTS.md" : "all"}): ${theme.fg("dim", `stays text (${reason})`)}`,
350
+ );
351
+ }
352
+ }
353
+ if (snap.toolResults) {
354
+ const tr = snap.toolResults;
355
+ if (tr.swapped > 0) {
356
+ lines.push(
357
+ ` Tool results: saves ${theme.bold(`~${formatNumber(tr.savedTokens)}`)} ` +
358
+ theme.fg(
359
+ "dim",
360
+ `(${tr.swapped}/${tr.total} imaged, ${formatNumber(tr.textTokens)} text → ${tr.frames} frames ≈ ${formatNumber(tr.imageTokens)})`,
361
+ ),
362
+ );
363
+ } else {
364
+ lines.push(` Tool results: ${theme.fg("dim", `none imaged (${tr.total} in history)`)}`);
365
+ }
366
+ }
367
+ if (snap.savedTokens > 0) {
368
+ lines.push(
369
+ ` Next request: ${theme.bold(`~${formatNumber(Math.max(0, usedTokens - snap.savedTokens))}`)} ${theme.fg("dim", "tokens on the wire")}`,
370
+ );
371
+ }
372
+ }
373
+ }
374
+
301
375
  return lines;
302
376
  }
303
377
 
@@ -0,0 +1,7 @@
1
+ Write a continuous, plain-prose technical explanation of how a relational database executes a SQL query: lexing and parsing, semantic analysis, logical plan construction, cost-based optimization, physical operator selection, and row-by-row execution through the iterator model.
2
+
3
+ Form:
4
+ - Plain paragraphs only: no headings, no lists, no code fences, no preamble.
5
+ - Do not wrap up early or summarize; keep writing until you are cut off.
6
+
7
+ Output only the explanation.
@@ -0,0 +1 @@
1
+ === CONTEXT FILE INSTRUCTIONS — read the image(s) below as the loaded context files replaced in the system prompt ===
@@ -0,0 +1 @@
1
+ Loaded context-file instructions were moved to PNG image(s) attached below at the start of the first user message. Read every frame in order where this marker appears, then apply those instructions as if the original context-file text remained here.
@@ -1 +1 @@
1
- [Rasterized]
1
+ [The result of this tool call is in the PNG frame(s) below — read them as the output; they contain it verbatim. Delivering it as an image is deliberate harness behavior to save context, not a tool malfunction. NEVER re-run the call or report a tool issue because of it.]
@@ -1,40 +1,39 @@
1
1
  Drives real Chromium tab; full puppeteer access via JS execution.
2
2
 
3
3
  <instruction>
4
- - For static web content (articles, docs, issues/PRs, JSON, PDFs, feeds), prefer `read` tool with URL reader-mode text without spinning up browser. Use this tool when you need JS execution, authentication, or interactive actions.
5
- - Three actions only:
6
- - `open` — acquire or reuse named tab. `name` defaults `"main"`. Optional `url` navigates after tab ready. Optional `viewport` sets dimensions. Optional `dialogs: "accept" | "dismiss"` auto-handles `alert`/`confirm`/`beforeunload` so navigation/clicks don't hang; by default dialogs are unhandled and the page hangs until you wire `page.on('dialog', …)`.
7
- - `close` — release tab by `name`, or every tab with `all: true`. For spawned-app browsers, set `kill: true` to terminate process tree (default leaves running).
8
- - `run` — execute JS against existing tab. `code` is body of async function with `page`, `browser`, `tab`, `display`, `assert`, `wait` in scope. Function's return value JSON-stringified into tool result; multiple `display(value)` calls accumulate text/images.
9
- - Tabs survive across `run` calls and across in-process subagents. Open once, reuse many times.
10
- - Browser kinds, selected by `app` field on `open`:
4
+ - Static content (articles, docs, issues/PRs, JSON, PDFs, feeds)? Use `read` with the URL. Reach for browser only for JS execution, authentication, or interactive actions.
5
+ - Three actions:
6
+ - `open` — acquire or reuse named tab (`name` defaults `"main"`). Optional `url` (navigate once ready), `viewport`, `dialogs: "accept" | "dismiss"` (auto-handle `alert`/`confirm`/`beforeunload`; unhandled dialogs hang the page until you wire `page.on('dialog', …)`).
7
+ - `close` — release tab by `name`, or every tab with `all: true`. `kill: true` also terminates spawned-app process trees (default leaves them running).
8
+ - `run` — execute JS in an existing tab. `code` is the body of an async function with `page`, `browser`, `tab`, `display`, `assert`, `wait` in scope. Return value is JSON-stringified into the result; `display(value)` calls accumulate text/images.
9
+ - Tabs survive across `run` calls and in-process subagents open once, reuse.
10
+ - Browser kinds (`app` field on `open`):
11
11
  - default (no `app`) → headless Chromium with stealth patches.
12
- - `app.path` → spawn absolute binary (Electron/CDP); a running instance with an open CDP port is reused. No stealth patches — NEVER tamper with real desktop app.
12
+ - `app.path` → spawn absolute binary (Electron/CDP); a running instance with an open CDP port is reused. No stealth patches — NEVER tamper with a real desktop app.
13
13
  - `app.cdp_url` → connect to existing CDP endpoint (e.g. `http://127.0.0.1:9222`).
14
- - `app.target` (with `path`/`cdp_url`) — substring matched against url+title to pick BrowserWindow when app exposes several.
15
- - Inside `run`, `tab` exposes high-level helpers; reach for `page` (raw puppeteer Page) when you need anything they don't cover.
16
- - `tab.goto(url, { waitUntil? })` — clears element cache and navigates.
17
- - `tab.observe({ includeAll?, viewportOnly? })` — accessibility snapshot. Returns `{ url, title, viewport, scroll, elements: [{ id, role, name, value, states, … }] }`. Element ids stable until next observe/goto.
18
- - `tab.id(n)` — resolves element id from most recent observe to real `ElementHandle` you can `.click()`, `.type()`, etc.
19
- - `tab.click(selector)` / `tab.type(selector, text)` / `tab.fill(selector, value)` / `tab.press(key, { selector? })` / `tab.scroll(dx, dy)` — selector-based actions.
20
- - `tab.waitFor(selector)` — waits until selector attached, returns resolved `ElementHandle` for chaining (e.g. `const btn = await tab.waitFor('text/Submit'); await btn.click();`).
21
- - `tab.drag(from, to)` — drag from one point to another. Each endpoint either selector string (drag center-to-center) or `{ x, y }` viewport-coordinate point (for canvases, sliders).
22
- - `tab.scrollIntoView(selector)` — scroll matching element to center of viewport (use before clicking off-screen elements).
23
- - `tab.select(selector, …values)` — set selected option(s) on `<select>`. Returns values that ended up selected. `tab.fill` NEVER works for selects.
24
- - `tab.uploadFile(selector, …filePaths)` — attach files to `<input type="file">`. Paths resolve relative to cwd.
25
- - `tab.waitForUrl(pattern, { timeout? })` — pattern substring or `RegExp`. Polls `location.href` so works for SPA pushState navigations, not just real navigations. Returns matched URL.
26
- - `tab.waitForResponse(pattern, { timeout? })` — pattern substring, `RegExp`, or `(response) => boolean`. Returns raw puppeteer `HTTPResponse` (call `.text()` / `.json()` / `.status()` / `.headers()` on it).
27
- - `tab.evaluate(fn, …args)` — sugar for `page.evaluate` with abort signal already wired. Use this instead of dropping to `page.evaluate` for ad-hoc DOM reads.
28
- - `tab.screenshot({ selector?, fullPage?, save?, silent? })` — captures a screenshot and attaches it for you to view (`silent: true` skips attaching). Pass `save` (a path) only when a later step needs the file; never just to look.
29
- - `tab.extract(format = "markdown")` — returns Readability-extracted page content as a string (`"markdown"` or `"text"`). Throws if the page yields no readable content.
30
- - Selectors accept CSS plus puppeteer query handlers: `aria/Sign in`, `text/Continue`, `xpath/…`, `pierce/…`. Playwright-style `p-aria/[name="…"]`, `p-text/…` normalized.
31
- - Default `tab.observe()` over `tab.screenshot()` for page state. Screenshot only when visual appearance matters.
14
+ - `app.target` (with `path`/`cdp_url`) — substring matched against url+title to pick a BrowserWindow.
15
+ - `tab` helpers; drop to raw puppeteer `page` for anything they don't cover:
16
+ - `tab.goto(url, { waitUntil? })` — navigate; clears element cache.
17
+ - `tab.observe({ includeAll?, viewportOnly? })` — accessibility snapshot: `{ url, title, viewport, scroll, elements: [{ id, role, name, value, states, … }] }`. Ids stable until next observe/goto.
18
+ - `tab.id(n)` — element id from last observe `ElementHandle` (`.click()`, `.type()`, …).
19
+ - `tab.click(selector)` / `tab.type(selector, text)` / `tab.fill(selector, value)` / `tab.press(key, { selector? })` / `tab.scroll(dx, dy)`.
20
+ - `tab.waitFor(selector)` — wait until attached; returns the `ElementHandle`.
21
+ - `tab.drag(from, to)` — endpoints: selector (center-to-center) or `{ x, y }` viewport point (canvases, sliders).
22
+ - `tab.scrollIntoView(selector)` — center element in viewport; use before clicking off-screen elements.
23
+ - `tab.select(selector, …values)` — set `<select>` option(s); returns resulting selection. `tab.fill` NEVER works for selects.
24
+ - `tab.uploadFile(selector, …filePaths)` — attach files to `<input type="file">`; paths relative to cwd.
25
+ - `tab.waitForUrl(pattern, { timeout? })` — substring or `RegExp`; polls `location.href` (catches SPA pushState). Returns matched URL.
26
+ - `tab.waitForResponse(pattern, { timeout? })` — substring, `RegExp`, or `(response) => boolean`; returns puppeteer `HTTPResponse` (`.text()`/`.json()`/`.status()`/`.headers()`).
27
+ - `tab.evaluate(fn, …args)` — `page.evaluate` with abort signal wired; use for ad-hoc DOM reads.
28
+ - `tab.screenshot({ selector?, fullPage?, save?, silent? })` — capture and attach for viewing (`silent: true` skips). Pass `save` (a path) only when a later step needs the file.
29
+ - `tab.extract(format = "markdown")` — Readability-extracted content (`"markdown"` | `"text"`); throws when nothing readable.
30
+ - Selectors: CSS plus puppeteer handlers `aria/Sign in`, `text/Continue`, `xpath/…`, `pierce/…`; Playwright-style `p-aria/…`, `p-text/…` normalized.
32
31
  </instruction>
33
32
 
34
33
  <critical>
35
- - MUST call `open` before `run`. `run` does not implicitly create tab.
36
- - NEVER screenshot just to "see what's on page" — `tab.observe()` returns structured data with element ids you can act on immediately.
37
- - After `tab.goto()` or any navigation, prior element ids from `tab.observe()` invalidated. Re-observe before referencing them.
34
+ - MUST `open` before `run` `run` never creates a tab.
35
+ - Default to `tab.observe()` for page state — structured data with actionable element ids. Screenshot ONLY when visual appearance matters.
36
+ - Navigation invalidates element ids re-observe before using them.
38
37
  - `code` runs with full Node access. Treat as your code, not sandboxed code.
39
38
  </critical>
40
39
 
@@ -46,28 +45,19 @@ Drives real Chromium tab; full puppeteer access via JS execution.
46
45
  # Click an observed element by id
47
46
  `{"action":"run","name":"docs","code":"const obs = await tab.observe(); const link = obs.elements.find(e => e.role === 'link' && e.name === 'Sign in'); assert(link, 'Sign in link missing'); await (await tab.id(link.id)).click();"}`
48
47
 
49
- # Screenshot to look at the page — no save path
50
- `{"action":"run","name":"docs","code":"await tab.screenshot();"}`
51
-
52
- # Keep a full-page screenshot on disk for a later step
53
- `{"action":"run","name":"docs","code":"await tab.screenshot({ fullPage: true, save: 'screenshot.png' });"}`
54
-
55
48
  # Fill and submit a form via selectors
56
49
  `{"action":"run","name":"docs","code":"await tab.fill('input[name=email]', 'me@example.com'); await tab.click('text/Continue');"}`
57
50
 
51
+ # Screenshot to look at the page — no save path
52
+ `{"action":"run","name":"docs","code":"await tab.screenshot();"}`
53
+
58
54
  # Attach to an existing Electron app
59
55
  `{"action":"open","name":"cursor","app":{"path":"/Applications/Cursor.app/Contents/MacOS/Cursor"}}`
60
56
 
61
- # Close one tab (browser stays alive if other tabs reference it)
62
- `{"action":"close","name":"docs"}`
63
-
64
- # Close every tab; leave spawned apps running
65
- `{"action":"close","all":true}`
66
-
67
- # Close every tab and kill spawned-app processes too
57
+ # Close every tab and kill spawned-app processes
68
58
  `{"action":"close","all":true,"kill":true}`
69
59
  </examples>
70
60
 
71
61
  <output>
72
- - Per call: any `display(value)` outputs (text/images) followed by JSON-stringified return value of `code` function. `run` always produces at least status line.
62
+ Per call: `display(value)` outputs (text/images), then the JSON-stringified return value of `code`. `run` always produces at least a status line.
73
63
  </output>
@@ -1,92 +1,69 @@
1
1
  Run code in a persistent kernel using a list of cells.
2
2
 
3
3
  <instruction>
4
- Each call submits one or more cells. Cells run in array order. State persists within each language — across cells, tool calls, and subagents spawned with `task`: variables a parent or subagent declares are visible to the other. Lean on this: stage helpers, loaded datasets, or live clients once, then fan out `task` subagents that use them directly. No re-importing, re-fetching, or serializing across the boundary.
4
+ Cells run in array order. State persists per language — across cells, tool calls, and `task` subagents: variables either side defines are visible to the other. Stage helpers, datasets, or live clients once; subagents use them directly no re-importing or serializing across the boundary.
5
5
 
6
6
  Cell fields:
7
7
 
8
8
  - `language` — {{#if py}}`"py"` for the IPython kernel{{/if}}{{#ifAll py js}}, {{/ifAll}}{{#if js}}`"js"` for the persistent JavaScript VM{{/if}}.
9
- - `code` — cell body, verbatim. Newlines, quotes, and indentation are JSON-encoded; no fences, no headers.
10
- - `title` (optional) — short label shown in the transcript (e.g. `"imports"`, `"load config"`).
11
- - `timeout` (optional) — per-cell wall-clock budget in seconds (1-3600). Default 30. It bounds the cell's **own** work: compute, `print`/stdout, `log()`/`phase()`, and ordinary tool calls all count. The clock pauses while an `agent()`/`parallel()`/`completion()` call is in flight, so long fanouts and slow completions never need a raised `timeout`. Raise it only for heavy local work or long non-agent tool calls.
12
- - `reset` (optional) — wipe this cell's language kernel before running.{{#ifAll py js}} Reset is per-language: a `py` cell's reset does not touch the JavaScript VM and vice versa.{{/ifAll}}
9
+ - `code` — cell body, verbatim. Newlines and quotes JSON-encoded; no fences, no headers.
10
+ - `title` (optional) — short transcript label (e.g. `"imports"`).
11
+ - `timeout` (optional) — per-cell seconds (1-3600, default 30). Bounds the cell's own work only; the clock pauses while `agent()`/`parallel()`/`completion()` calls are in flight, so fanouts never need a raise. Raise only for heavy local compute or long non-agent tool calls.
12
+ - `reset` (optional) — wipe this cell's language kernel first.{{#ifAll py js}} Per-language: a `py` reset never touches the JS VM.{{/ifAll}}
13
13
 
14
- **Work incrementally:**
15
-
16
- - One logical step per cell (imports, define, test, use).
17
- - Pass multiple small cells in one call.
18
- - Define small reusable functions for individual debugging.
19
- - Put workflow explanations in the assistant message or `title` — never inside cell code.
20
- {{#if py}}- Python cells run inside an IPython kernel with a live event loop. Use top-level `await` directly (e.g. `await main()`); `asyncio.run(…)` raises "cannot be called from a running event loop".{{/if}}
21
- **On failure:** errors identify the failing cell (e.g., "Cell 3 failed"). Resubmit only the fixed cell (or fixed cell + remaining cells).
14
+ Work incrementally: one logical step per cell (imports, define, test, use); pass multiple small cells per call; define small reusable functions for individual debugging. Workflow explanations go in the assistant message or `title`, never inside cell code.
15
+ {{#if py}}Python runs in IPython with a live event loop: use top-level `await` directly; `asyncio.run(…)` raises "cannot be called from a running event loop".{{/if}}
16
+ On failure, errors name the failing cell ("Cell 3 failed") — resubmit only the fixed cell (plus any remaining).
22
17
  </instruction>
23
18
 
24
19
  <prelude>
25
- {{#ifAll py js}}Same helpers in both runtimes with the same positional argument order. Python: trailing options as keyword args. JavaScript: trailing options are a single trailing object literal, never positional — passing options positionally (or any extra positional arg) throws. JavaScript helpers are async and `await`able; Python helpers run synchronously.{{else}}{{#if py}}Helpers run synchronously. Trailing options are keyword arguments.{{/if}}{{#if js}}Helpers are async and `await`able. Trailing options are a single trailing object literal, never positional — passing options positionally (or any extra positional arg) throws.{{/if}}{{/ifAll}}
20
+ {{#ifAll py js}}Same helpers in both runtimes, same positional order. Python: helpers run synchronously; trailing options are keyword args. JavaScript: helpers are async and `await`able; trailing options are ONE trailing object literal, never positional (extra positional args throw).{{else}}{{#if py}}Helpers run synchronously. Trailing options are keyword arguments.{{/if}}{{#if js}}Helpers are async and `await`able. Trailing options are ONE trailing object literal, never positional (extra positional args throw).{{/if}}{{/ifAll}}
26
21
  ```
27
22
  display(value) → None
28
- Render a value in the current cell output.
23
+ Render value in cell output, shows presentable values natively (figures, images, dataframes)
29
24
  print(value, ...) → None
30
- Print to the cell's text output.
25
+ Print to text output.
31
26
  read(path, offset?=1, limit?=None) → str
32
- Read file contents as text. offset/limit are 1-indexed line bounds. Accepts `local://…` (resolved to the session-local root, same place `read local://…` reads).
27
+ Read file as text; offset/limit are 1-indexed lines. Accepts `local://…`.
33
28
  write(path, content) → str
34
- Write content to a file (creates parent directories). Returns the resolved path. Accepts `local://…` to persist artifacts across turns / share with subagents.
29
+ Write file (creates parents); returns resolved path. `local://…` persists across turns / subagents.
35
30
  append(path, content) → str
36
- Append content to a file. Returns the resolved path. Accepts `local://…`.
31
+ Append to file; returns resolved path. Accepts `local://…`.
37
32
  tree(path?=".", max_depth?=3, show_hidden?=False) → str
38
- Render a directory tree.
33
+ Directory tree.
39
34
  diff(a, b) → str
40
- Unified diff between two files.
35
+ Unified diff of two files.
41
36
  env(key?=None, value?=None) → str | None | dict
42
- No args → full environment as dict. One arg → value of `key`. Two args → set `key=value` and return value.
37
+ No args → full env dict; one → value of `key`; two → set `key=value`, return value.
43
38
  output(*ids, format?="raw", query?=None, offset?=None, limit?=None) → str | dict | list[dict]
44
- Read task/agent output by ID. Single id returns text/dict; multiple ids return a list.
39
+ Read task/agent output by id; one id text/dict, multiple list.
45
40
  tool.<name>(args) → unknown
46
- Invoke any session tool by name. `args` is the tool's parameter object.
41
+ Invoke any session tool; `args` is its parameter object.
47
42
  completion(prompt, model?="default", system?=None, schema?=None) → str | dict
48
- Oneshot, stateless completion (no history, no tools). `model` picks a tier: "smol" (fast), "default" (this session's model), "slow" (most capable). Pass `system` for a system prompt. Pass a JSON-Schema `schema` to force structured output and get the parsed object back; otherwise returns the completion text.
43
+ Oneshot stateless completion (no history, no tools). `model` tier: "smol" (fast) | "default" (session model) | "slow" (most capable). JSON-Schema `schema` forces structured output, returns parsed object.
49
44
  {{#if spawns}}agent(prompt, agent_type?="task", model?=None, label?=None, schema?=None) → str | dict
50
- Run a subagent and return its final output. Defaults to the bundled "task" agent; pass `agent_type`/`agentType` for another discovered agent. Pass a JSON-Schema `schema` to force structured output and get the parsed object back. Share background by writing a `local://` file and referencing it in the prompt.
51
- {{#if js}} In JS, pass options as one trailing object — never positional: agent(prompt, { agentType, schema }).
45
+ Run a subagent, return its final output. `agent_type`/`agentType` picks another discovered agent; `schema` as in completion(). Share background via `local://` files referenced in the prompt.
46
+ {{#if js}} JS: options are ONE trailing object — agent(prompt, { agentType, schema }).
52
47
  {{/if}}
53
48
  {{/if}}
54
49
  parallel(thunks) → list
55
- Run thunks (callables) through a bounded pool, preserving input order. The pool is as wide as a `task` tool batch, so fan out as wide as the work divides — don't pre-shrink it. Barrier: returns once all finish; a thunk that throws propagates.
50
+ Run thunks through a bounded pool (as wide as a `task` batch — don't pre-shrink), preserving input order. Barrier: returns when all finish; a throwing thunk propagates.
56
51
  pipeline(items, ...stages) → list
57
- Map each item through stages left-to-right; a barrier runs between stages (every item clears stage N before stage N+1). Each stage is a one-arg callable: stage 1 gets the original item, later stages get the previous result. Same pool width as parallel().
52
+ Map items through one-arg stages left-to-right, barrier between stages; stage 1 gets the item, later stages the previous result. Same pool width as parallel().
58
53
  log(message) → None
59
- Emit a progress line above the status tree.
54
+ Progress line above the status tree.
60
55
  phase(title) → None
61
- Start a phase; the status lines that follow group under it.
56
+ Start a phase grouping subsequent status lines.
62
57
  budget → per-turn token budget
63
- {{#if py}}`budget.total` (ceiling or None), `budget.spent()` (output tokens this turn), `budget.remaining()` (math.inf when no ceiling), `budget.hard` (bool).{{/if}}{{#if js}}`await budget.total()` (ceiling or null), `await budget.spent()`, `await budget.remaining()` (Infinity when no ceiling), `await budget.hard()`.{{/if}} A ceiling is set by a `+Nk` message directive (advisory) or `+Nk!`/Goal Mode (hard — `agent()` refuses to spawn past it); otherwise total is None/null and spend is still tracked across the turn (main loop + eval subagents).
58
+ {{#if py}}`budget.total` (ceiling or None), `budget.spent()`, `budget.remaining()` (math.inf when no ceiling), `budget.hard` (bool).{{/if}}{{#if js}}`await budget.total()` (ceiling or null), `await budget.spent()`, `await budget.remaining()` (Infinity when no ceiling), `await budget.hard()`.{{/if}} Ceiling comes from a `+Nk` directive (advisory) or `+Nk!`/Goal Mode (hard — `agent()` refuses to spawn past it); otherwise None/null, spend still tracked across the turn.
64
59
  ```
65
60
  </prelude>
66
61
 
67
- <output>
68
- Cells render like a Jupyter notebook. `display(value)` renders non-presentable data as an interactive JSON tree. Presentable values (figures, images, dataframes, etc.) use their native representation.
69
- </output>
70
-
71
- <caution>
72
- {{#if js}}- **js**: the VM exposes a selective `process` subset, Web APIs, `Buffer`, `fs/promises`, and the `Bun` global.
73
- {{/if}}</caution>
74
-
75
62
  <example>
76
- {{#if py}}```json
77
63
  {
78
64
  "cells": [
79
65
  { "language": "py", "title": "imports", "timeout": 10, "code": "import json\nfrom pathlib import Path" },
80
66
  { "language": "py", "title": "load config", "code": "data = json.loads(read('package.json'))\ndisplay(data)" }
81
67
  ]
82
68
  }
83
- ```{{/if}}{{#ifAll py js}}
84
-
85
- {{/ifAll}}{{#if js}}```json
86
- {
87
- "cells": [
88
- { "language": "js", "title": "summary", "reset": true, "code": "const data = JSON.parse(await read('package.json'));\ndisplay(data);\nreturn data.name;" }
89
- ]
90
- }
91
- ```{{/if}}
92
69
  </example>