@oh-my-pi/pi-coding-agent 15.11.4 → 15.11.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +82 -1
- package/dist/cli.js +520 -451
- package/dist/types/cli/bench-cli.d.ts +78 -0
- package/dist/types/cli/usage-cli.d.ts +10 -1
- package/dist/types/commands/bench.d.ts +29 -0
- package/dist/types/commands/usage.d.ts +9 -0
- package/dist/types/config/model-resolver.d.ts +3 -2
- package/dist/types/config/settings-schema.d.ts +125 -3
- package/dist/types/edit/renderer.d.ts +1 -0
- package/dist/types/modes/components/oauth-selector.d.ts +10 -1
- package/dist/types/modes/components/reset-usage-selector.d.ts +12 -0
- package/dist/types/modes/components/session-selector.d.ts +1 -1
- package/dist/types/modes/components/settings-selector.d.ts +8 -1
- package/dist/types/modes/components/snapcompact-shape-preview.d.ts +31 -0
- package/dist/types/modes/components/tool-execution.d.ts +18 -0
- package/dist/types/modes/controllers/selector-controller.d.ts +1 -0
- package/dist/types/modes/interactive-mode.d.ts +10 -0
- package/dist/types/modes/session-observer-registry.d.ts +2 -0
- package/dist/types/modes/setup-wizard/scenes/sign-in.d.ts +3 -0
- package/dist/types/modes/setup-wizard/scenes/types.d.ts +10 -1
- package/dist/types/modes/setup-wizard/scenes/web-search.d.ts +3 -0
- package/dist/types/modes/types.d.ts +2 -0
- package/dist/types/modes/utils/context-usage.d.ts +6 -1
- package/dist/types/session/agent-session.d.ts +14 -1
- package/dist/types/session/auth-storage.d.ts +1 -1
- package/dist/types/session/codex-auto-reset.d.ts +107 -0
- package/dist/types/session/snapcompact-inline.d.ts +107 -4
- package/dist/types/slash-commands/helpers/reset-usage.d.ts +27 -0
- package/dist/types/task/render.d.ts +1 -0
- package/dist/types/tools/bash.d.ts +2 -0
- package/dist/types/tools/eval-render.d.ts +1 -0
- package/dist/types/tools/renderers.d.ts +13 -0
- package/dist/types/tools/ssh.d.ts +1 -0
- package/dist/types/tools/todo.d.ts +0 -11
- package/package.json +11 -11
- package/src/cli/bench-cli.ts +437 -0
- package/src/cli/usage-cli.ts +187 -16
- package/src/cli-commands.ts +1 -0
- package/src/commands/bench.ts +42 -0
- package/src/commands/usage.ts +8 -0
- package/src/config/model-registry.ts +52 -5
- package/src/config/model-resolver.ts +36 -5
- package/src/config/settings-schema.ts +148 -3
- package/src/config/settings.ts +9 -0
- package/src/edit/renderer.ts +5 -0
- package/src/hindsight/client.ts +26 -1
- package/src/hindsight/state.ts +6 -2
- package/src/internal-urls/docs-index.generated.ts +2 -2
- package/src/mcp/transports/stdio.ts +81 -7
- package/src/modes/components/oauth-selector.ts +67 -7
- package/src/modes/components/reset-usage-selector.ts +161 -0
- package/src/modes/components/session-selector.ts +8 -2
- package/src/modes/components/settings-selector.ts +89 -47
- package/src/modes/components/snapcompact-shape-preview-doc.md +11 -0
- package/src/modes/components/snapcompact-shape-preview.ts +192 -0
- package/src/modes/components/tool-execution.ts +26 -0
- package/src/modes/components/transcript-container.ts +23 -1
- package/src/modes/controllers/command-controller.ts +24 -1
- package/src/modes/controllers/input-controller.ts +8 -6
- package/src/modes/controllers/selector-controller.ts +72 -2
- package/src/modes/interactive-mode.ts +83 -0
- package/src/modes/session-observer-registry.ts +61 -3
- package/src/modes/setup-wizard/index.ts +1 -0
- package/src/modes/setup-wizard/scenes/glyph.ts +24 -6
- package/src/modes/setup-wizard/scenes/providers.ts +36 -2
- package/src/modes/setup-wizard/scenes/sign-in.ts +10 -1
- package/src/modes/setup-wizard/scenes/theme.ts +28 -1
- package/src/modes/setup-wizard/scenes/types.ts +10 -1
- package/src/modes/setup-wizard/scenes/web-search.ts +22 -6
- package/src/modes/setup-wizard/wizard-overlay.ts +38 -1
- package/src/modes/theme/theme.ts +2 -2
- package/src/modes/types.ts +2 -0
- package/src/modes/utils/context-usage.ts +75 -1
- package/src/prompts/bench.md +7 -0
- package/src/prompts/system/snapcompact-context-frames-note.md +1 -0
- package/src/prompts/system/snapcompact-context-stub.md +1 -0
- package/src/prompts/system/snapcompact-toolresult-note.md +1 -1
- package/src/prompts/tools/browser.md +33 -43
- package/src/prompts/tools/eval.md +27 -50
- package/src/prompts/tools/irc.md +29 -31
- package/src/prompts/tools/read.md +31 -37
- package/src/prompts/tools/todo.md +1 -2
- package/src/sdk.ts +4 -2
- package/src/session/agent-session.ts +136 -6
- package/src/session/auth-storage.ts +3 -0
- package/src/session/codex-auto-reset.ts +190 -0
- package/src/session/snapcompact-inline.ts +404 -75
- package/src/slash-commands/builtin-registry.ts +145 -8
- package/src/slash-commands/helpers/context-report.ts +28 -1
- package/src/slash-commands/helpers/reset-usage.ts +66 -0
- package/src/slash-commands/helpers/usage-report.ts +12 -0
- package/src/task/index.ts +30 -7
- package/src/task/render.ts +34 -19
- package/src/tools/bash.ts +3 -0
- package/src/tools/eval-render.ts +4 -0
- package/src/tools/renderers.ts +13 -0
- package/src/tools/ssh.ts +3 -0
- package/src/tools/todo.ts +8 -128
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type SelectItem, SelectList, truncateToWidth } from "@oh-my-pi/pi-tui";
|
|
1
|
+
import { type SelectItem, SelectList, type SgrMouseEvent, truncateToWidth } from "@oh-my-pi/pi-tui";
|
|
2
2
|
import { SETTINGS_SCHEMA } from "../../../config/settings-schema";
|
|
3
3
|
import { getSearchProvider, setPreferredSearchProvider } from "../../../web/search/provider";
|
|
4
4
|
import { isSearchProviderPreference, type SearchProviderId } from "../../../web/search/types";
|
|
@@ -31,6 +31,8 @@ export class WebSearchTab implements SetupTab {
|
|
|
31
31
|
#availability = new Map<SearchProviderId, Availability>();
|
|
32
32
|
#status: string[] = [];
|
|
33
33
|
#disposed = false;
|
|
34
|
+
/** Render line where the select list begins. */
|
|
35
|
+
#listRowStart = 0;
|
|
34
36
|
|
|
35
37
|
constructor(private readonly host: SetupSceneHost) {
|
|
36
38
|
this.#list = new SelectList(WEB_SEARCH_ITEMS, MAX_VISIBLE, getSelectListTheme());
|
|
@@ -55,6 +57,22 @@ export class WebSearchTab implements SetupTab {
|
|
|
55
57
|
this.#list.handleInput(data);
|
|
56
58
|
}
|
|
57
59
|
|
|
60
|
+
/** Wheel moves the highlight; hover lights the row under the pointer; click confirms it. */
|
|
61
|
+
routeMouse(event: SgrMouseEvent, line: number, _col: number): void {
|
|
62
|
+
if (event.wheel !== null) {
|
|
63
|
+
this.#list.handleWheel(event.wheel);
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
const index = this.#list.hitTest(line - this.#listRowStart);
|
|
67
|
+
if (event.motion) {
|
|
68
|
+
this.#list.setHoverIndex(index ?? null);
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
if (event.leftClick && index !== undefined) {
|
|
72
|
+
this.#list.clickItem(index);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
58
76
|
invalidate(): void {
|
|
59
77
|
this.#list.invalidate();
|
|
60
78
|
}
|
|
@@ -64,11 +82,9 @@ export class WebSearchTab implements SetupTab {
|
|
|
64
82
|
}
|
|
65
83
|
|
|
66
84
|
render(width: number): readonly string[] {
|
|
67
|
-
const lines = [
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
...this.#list.render(width),
|
|
71
|
-
];
|
|
85
|
+
const lines = [theme.fg("muted", "Choose the provider the web_search tool should prefer."), ""];
|
|
86
|
+
this.#listRowStart = lines.length;
|
|
87
|
+
lines.push(...this.#list.render(width));
|
|
72
88
|
const selected = this.#list.getSelectedItem();
|
|
73
89
|
if (selected) {
|
|
74
90
|
lines.push("", ...this.#readinessLines(selected.value).map(line => truncateToWidth(line, width)));
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type Component, matchesKey, padding, truncateToWidth, visibleWidth } from "@oh-my-pi/pi-tui";
|
|
1
|
+
import { type Component, matchesKey, padding, parseSgrMouse, truncateToWidth, visibleWidth } from "@oh-my-pi/pi-tui";
|
|
2
2
|
import { APP_NAME } from "@oh-my-pi/pi-utils";
|
|
3
3
|
import { gradientLogo, PI_LOGO } from "../components/welcome";
|
|
4
4
|
import { theme } from "../theme/theme";
|
|
@@ -61,6 +61,8 @@ export class SetupWizardComponent implements Component {
|
|
|
61
61
|
#timer: NodeJS.Timeout | undefined;
|
|
62
62
|
#done = Promise.withResolvers<void>();
|
|
63
63
|
#disposed = false;
|
|
64
|
+
/** Screen row where the active scene's body began in the last rendered frame. */
|
|
65
|
+
#bodyRowStart = 0;
|
|
64
66
|
|
|
65
67
|
constructor(
|
|
66
68
|
readonly ctx: InteractiveModeContext,
|
|
@@ -87,6 +89,10 @@ export class SetupWizardComponent implements Component {
|
|
|
87
89
|
|
|
88
90
|
handleInput(data: string): void {
|
|
89
91
|
if (this.#phase === "done") return;
|
|
92
|
+
if (data.startsWith("\x1b[<")) {
|
|
93
|
+
this.#handleMouse(data);
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
90
96
|
if (matchesKey(data, "ctrl+c")) {
|
|
91
97
|
this.#beginOutro();
|
|
92
98
|
return;
|
|
@@ -116,6 +122,36 @@ export class SetupWizardComponent implements Component {
|
|
|
116
122
|
this.#activeScene?.handleInput?.(data);
|
|
117
123
|
}
|
|
118
124
|
|
|
125
|
+
/**
|
|
126
|
+
* Mouse handling for the fullscreen wizard (SGR tracking is on while the
|
|
127
|
+
* overlay holds the alternate screen). The frame paints from screen row 0,
|
|
128
|
+
* so report coordinates index directly into the last rendered lines: scene
|
|
129
|
+
* body rows start at #bodyRowStart, indented by SCENE_MARGIN_X. Scenes
|
|
130
|
+
* that implement routeMouse get hit-tested events (wheel, hover, click);
|
|
131
|
+
* for the rest a wheel notch falls back to an arrow key. A left click
|
|
132
|
+
* advances the splash/outro like Enter. Raw reports never reach scene
|
|
133
|
+
* keyboard input.
|
|
134
|
+
*/
|
|
135
|
+
#handleMouse(data: string): void {
|
|
136
|
+
const event = parseSgrMouse(data);
|
|
137
|
+
if (!event) return;
|
|
138
|
+
if (this.#phase === "splash" || this.#phase === "outro") {
|
|
139
|
+
if (!event.leftClick) return;
|
|
140
|
+
if (this.#phase === "splash") this.#beginScene();
|
|
141
|
+
else this.#complete();
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
const scene = this.#activeScene;
|
|
145
|
+
if (!scene) return;
|
|
146
|
+
if (scene.routeMouse) {
|
|
147
|
+
scene.routeMouse(event, event.row - this.#bodyRowStart, event.col - SCENE_MARGIN_X);
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
if (event.wheel !== null) {
|
|
151
|
+
scene.handleInput?.(event.wheel === -1 ? "\x1b[A" : "\x1b[B");
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
119
155
|
render(width: number): readonly string[] {
|
|
120
156
|
const safeWidth = Math.max(1, width);
|
|
121
157
|
const height = Math.max(1, this.ctx.ui.terminal.rows);
|
|
@@ -163,6 +199,7 @@ export class SetupWizardComponent implements Component {
|
|
|
163
199
|
header.push(indentLine(theme.fg("muted", subtitle), width, SCENE_MARGIN_X));
|
|
164
200
|
}
|
|
165
201
|
header.push("");
|
|
202
|
+
this.#bodyRowStart = header.length;
|
|
166
203
|
|
|
167
204
|
const footer = [
|
|
168
205
|
"",
|
package/src/modes/theme/theme.ts
CHANGED
|
@@ -715,7 +715,7 @@ const NERD_SYMBOLS: SymbolMap = {
|
|
|
715
715
|
"tool.debug": "\uEAD8",
|
|
716
716
|
"tool.mcp": "\uEB2D",
|
|
717
717
|
"tool.job": "\uEBA2",
|
|
718
|
-
"tool.task": "\
|
|
718
|
+
"tool.task": "\uf4a0",
|
|
719
719
|
"tool.todo": "\uEAB3",
|
|
720
720
|
"tool.memory": "\uEACE",
|
|
721
721
|
"tool.ask": "\uEAC7",
|
|
@@ -2762,7 +2762,7 @@ export function getSettingsListTheme(): SettingsListTheme {
|
|
|
2762
2762
|
label: (text: string, selected: boolean, changed: boolean) =>
|
|
2763
2763
|
changed ? theme.fg("statusLineGitDirty", text) : selected ? theme.fg("accent", text) : text,
|
|
2764
2764
|
value: (text: string, selected: boolean, changed: boolean) =>
|
|
2765
|
-
|
|
2765
|
+
changed ? theme.fg("statusLineGitDirty", text) : selected ? theme.fg("accent", text) : theme.fg("muted", text),
|
|
2766
2766
|
description: (text: string) => theme.fg("dim", text),
|
|
2767
2767
|
cursor: theme.fg("accent", `${theme.nav.cursor} `),
|
|
2768
2768
|
hint: (text: string) => theme.fg("dim", text),
|
package/src/modes/types.ts
CHANGED
|
@@ -81,6 +81,7 @@ export interface InteractiveModeContext {
|
|
|
81
81
|
pendingMessagesContainer: Container;
|
|
82
82
|
statusContainer: Container;
|
|
83
83
|
todoContainer: Container;
|
|
84
|
+
subagentContainer: Container;
|
|
84
85
|
btwContainer: Container;
|
|
85
86
|
omfgContainer: Container;
|
|
86
87
|
errorBannerContainer: Container;
|
|
@@ -287,6 +288,7 @@ export interface InteractiveModeContext {
|
|
|
287
288
|
handleResumeSession(sessionPath: string): Promise<void>;
|
|
288
289
|
handleSessionDeleteCommand(): Promise<void>;
|
|
289
290
|
showOAuthSelector(mode: "login" | "logout", providerId?: string): Promise<void>;
|
|
291
|
+
showResetUsageSelector(): Promise<void>;
|
|
290
292
|
showProviderSetup(): Promise<void>;
|
|
291
293
|
showHookConfirm(title: string, message: string): Promise<boolean>;
|
|
292
294
|
showDebugSelector(): Promise<void>;
|
|
@@ -6,6 +6,7 @@ import { countTokens } from "@oh-my-pi/pi-natives";
|
|
|
6
6
|
import { formatNumber } from "@oh-my-pi/pi-utils";
|
|
7
7
|
import type { Skill } from "../../extensibility/skills";
|
|
8
8
|
import type { AgentSession } from "../../session/agent-session";
|
|
9
|
+
import { estimateInlineSavings, type SnapcompactSavingsEstimate } from "../../session/snapcompact-inline";
|
|
9
10
|
import type { Tool } from "../../tools";
|
|
10
11
|
import type { theme as Theme } from "../theme/theme";
|
|
11
12
|
|
|
@@ -36,6 +37,8 @@ export interface ContextBreakdown {
|
|
|
36
37
|
usedTokens: number;
|
|
37
38
|
autoCompactBufferTokens: number;
|
|
38
39
|
freeTokens: number;
|
|
40
|
+
/** Estimated snapcompact wire savings; set when requested and a snapcompact.* setting is enabled. */
|
|
41
|
+
snapcompact?: SnapcompactSavingsEstimate;
|
|
39
42
|
}
|
|
40
43
|
|
|
41
44
|
const EMPTY_STRING_PARTS: readonly string[] = [];
|
|
@@ -109,7 +112,10 @@ function computeNonMessageBreakdown(session: AgentSession): {
|
|
|
109
112
|
* Compute a breakdown of estimated context usage by category for the active
|
|
110
113
|
* session and model.
|
|
111
114
|
*/
|
|
112
|
-
export function computeContextBreakdown(
|
|
115
|
+
export function computeContextBreakdown(
|
|
116
|
+
session: AgentSession,
|
|
117
|
+
options?: { snapcompactSavings?: boolean },
|
|
118
|
+
): ContextBreakdown {
|
|
113
119
|
const model = session.model;
|
|
114
120
|
const contextWindow = model?.contextWindow ?? 0;
|
|
115
121
|
|
|
@@ -169,6 +175,22 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
|
|
|
169
175
|
|
|
170
176
|
const freeTokens = Math.max(0, contextWindow - usedTokens - autoCompactBufferTokens);
|
|
171
177
|
|
|
178
|
+
// Estimated wire savings from snapcompact inline imaging. Opt-in: only the
|
|
179
|
+
// /context surfaces need it; other callers skip the extra token counting.
|
|
180
|
+
let snapcompactSavings: SnapcompactSavingsEstimate | undefined;
|
|
181
|
+
if (options?.snapcompactSavings) {
|
|
182
|
+
const renderSystemPrompt = session.settings.get("snapcompact.systemPrompt");
|
|
183
|
+
const renderToolResults = session.settings.get("snapcompact.toolResults");
|
|
184
|
+
if (renderSystemPrompt !== "none" || renderToolResults) {
|
|
185
|
+
snapcompactSavings = estimateInlineSavings({
|
|
186
|
+
options: { renderSystemPrompt, renderToolResults, shape: session.settings.get("snapcompact.shape") },
|
|
187
|
+
model,
|
|
188
|
+
systemPrompt: session.systemPrompt ?? [],
|
|
189
|
+
messages: session.messages ?? [],
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
172
194
|
return {
|
|
173
195
|
model,
|
|
174
196
|
contextWindow,
|
|
@@ -176,6 +198,7 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
|
|
|
176
198
|
usedTokens,
|
|
177
199
|
autoCompactBufferTokens,
|
|
178
200
|
freeTokens,
|
|
201
|
+
snapcompact: snapcompactSavings,
|
|
179
202
|
};
|
|
180
203
|
}
|
|
181
204
|
|
|
@@ -298,6 +321,57 @@ function buildLegendLines(breakdown: ContextBreakdown, theme: typeof Theme): str
|
|
|
298
321
|
);
|
|
299
322
|
}
|
|
300
323
|
|
|
324
|
+
const snap = breakdown.snapcompact;
|
|
325
|
+
if (snap) {
|
|
326
|
+
lines.push("");
|
|
327
|
+
if (!snap.visionCapable) {
|
|
328
|
+
lines.push(theme.fg("muted", "Snapcompact: inactive (model has no image input)"));
|
|
329
|
+
} else {
|
|
330
|
+
lines.push(theme.fg("muted", "Snapcompact (estimated wire savings)"));
|
|
331
|
+
if (snap.systemPrompt) {
|
|
332
|
+
const sp = snap.systemPrompt;
|
|
333
|
+
if (sp.applied) {
|
|
334
|
+
lines.push(
|
|
335
|
+
` System prompt (${sp.scope === "agents-md" ? "AGENTS.md" : "all"}): saves ${theme.bold(`~${formatNumber(sp.savedTokens)}`)} ` +
|
|
336
|
+
theme.fg(
|
|
337
|
+
"dim",
|
|
338
|
+
`(${formatNumber(sp.textTokens)} text → ${sp.frames} frame${sp.frames === 1 ? "" : "s"} ≈ ${formatNumber(sp.imageTokens)})`,
|
|
339
|
+
),
|
|
340
|
+
);
|
|
341
|
+
} else {
|
|
342
|
+
const reason =
|
|
343
|
+
sp.reason === "budget"
|
|
344
|
+
? "image budget exhausted"
|
|
345
|
+
: sp.reason === "empty"
|
|
346
|
+
? "nothing to image"
|
|
347
|
+
: "frames would not save tokens";
|
|
348
|
+
lines.push(
|
|
349
|
+
` System prompt (${sp.scope === "agents-md" ? "AGENTS.md" : "all"}): ${theme.fg("dim", `stays text (${reason})`)}`,
|
|
350
|
+
);
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
if (snap.toolResults) {
|
|
354
|
+
const tr = snap.toolResults;
|
|
355
|
+
if (tr.swapped > 0) {
|
|
356
|
+
lines.push(
|
|
357
|
+
` Tool results: saves ${theme.bold(`~${formatNumber(tr.savedTokens)}`)} ` +
|
|
358
|
+
theme.fg(
|
|
359
|
+
"dim",
|
|
360
|
+
`(${tr.swapped}/${tr.total} imaged, ${formatNumber(tr.textTokens)} text → ${tr.frames} frames ≈ ${formatNumber(tr.imageTokens)})`,
|
|
361
|
+
),
|
|
362
|
+
);
|
|
363
|
+
} else {
|
|
364
|
+
lines.push(` Tool results: ${theme.fg("dim", `none imaged (${tr.total} in history)`)}`);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
if (snap.savedTokens > 0) {
|
|
368
|
+
lines.push(
|
|
369
|
+
` Next request: ${theme.bold(`~${formatNumber(Math.max(0, usedTokens - snap.savedTokens))}`)} ${theme.fg("dim", "tokens on the wire")}`,
|
|
370
|
+
);
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
301
375
|
return lines;
|
|
302
376
|
}
|
|
303
377
|
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
Write a continuous, plain-prose technical explanation of how a relational database executes a SQL query: lexing and parsing, semantic analysis, logical plan construction, cost-based optimization, physical operator selection, and row-by-row execution through the iterator model.
|
|
2
|
+
|
|
3
|
+
Form:
|
|
4
|
+
- Plain paragraphs only: no headings, no lists, no code fences, no preamble.
|
|
5
|
+
- Do not wrap up early or summarize; keep writing until you are cut off.
|
|
6
|
+
|
|
7
|
+
Output only the explanation.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
=== CONTEXT FILE INSTRUCTIONS — read the image(s) below as the loaded context files replaced in the system prompt ===
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Loaded context-file instructions were moved to PNG image(s) attached below at the start of the first user message. Read every frame in order where this marker appears, then apply those instructions as if the original context-file text remained here.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
[
|
|
1
|
+
[The result of this tool call is in the PNG frame(s) below — read them as the output; they contain it verbatim. Delivering it as an image is deliberate harness behavior to save context, not a tool malfunction. NEVER re-run the call or report a tool issue because of it.]
|
|
@@ -1,40 +1,39 @@
|
|
|
1
1
|
Drives real Chromium tab; full puppeteer access via JS execution.
|
|
2
2
|
|
|
3
3
|
<instruction>
|
|
4
|
-
-
|
|
5
|
-
- Three actions
|
|
6
|
-
- `open` — acquire or reuse named tab
|
|
7
|
-
- `close` — release tab by `name`, or every tab with `all: true`.
|
|
8
|
-
- `run` — execute JS
|
|
9
|
-
- Tabs survive across `run` calls and
|
|
10
|
-
- Browser kinds
|
|
4
|
+
- Static content (articles, docs, issues/PRs, JSON, PDFs, feeds)? Use `read` with the URL. Reach for browser only for JS execution, authentication, or interactive actions.
|
|
5
|
+
- Three actions:
|
|
6
|
+
- `open` — acquire or reuse named tab (`name` defaults `"main"`). Optional `url` (navigate once ready), `viewport`, `dialogs: "accept" | "dismiss"` (auto-handle `alert`/`confirm`/`beforeunload`; unhandled dialogs hang the page until you wire `page.on('dialog', …)`).
|
|
7
|
+
- `close` — release tab by `name`, or every tab with `all: true`. `kill: true` also terminates spawned-app process trees (default leaves them running).
|
|
8
|
+
- `run` — execute JS in an existing tab. `code` is the body of an async function with `page`, `browser`, `tab`, `display`, `assert`, `wait` in scope. Return value is JSON-stringified into the result; `display(value)` calls accumulate text/images.
|
|
9
|
+
- Tabs survive across `run` calls and in-process subagents — open once, reuse.
|
|
10
|
+
- Browser kinds (`app` field on `open`):
|
|
11
11
|
- default (no `app`) → headless Chromium with stealth patches.
|
|
12
|
-
- `app.path` → spawn absolute binary (Electron/CDP); a running instance with an open CDP port is reused. No stealth patches — NEVER tamper with real desktop app.
|
|
12
|
+
- `app.path` → spawn absolute binary (Electron/CDP); a running instance with an open CDP port is reused. No stealth patches — NEVER tamper with a real desktop app.
|
|
13
13
|
- `app.cdp_url` → connect to existing CDP endpoint (e.g. `http://127.0.0.1:9222`).
|
|
14
|
-
- `app.target` (with `path`/`cdp_url`) — substring matched against url+title to pick BrowserWindow
|
|
15
|
-
-
|
|
16
|
-
- `tab.goto(url, { waitUntil? })` — clears element cache
|
|
17
|
-
- `tab.observe({ includeAll?, viewportOnly? })` — accessibility snapshot
|
|
18
|
-
- `tab.id(n)` —
|
|
19
|
-
- `tab.click(selector)` / `tab.type(selector, text)` / `tab.fill(selector, value)` / `tab.press(key, { selector? })` / `tab.scroll(dx, dy)
|
|
20
|
-
- `tab.waitFor(selector)` —
|
|
21
|
-
- `tab.drag(from, to)` —
|
|
22
|
-
- `tab.scrollIntoView(selector)` —
|
|
23
|
-
- `tab.select(selector, …values)` — set
|
|
24
|
-
- `tab.uploadFile(selector, …filePaths)` — attach files to `<input type="file"
|
|
25
|
-
- `tab.waitForUrl(pattern, { timeout? })` —
|
|
26
|
-
- `tab.waitForResponse(pattern, { timeout? })` —
|
|
27
|
-
- `tab.evaluate(fn, …args)` —
|
|
28
|
-
- `tab.screenshot({ selector?, fullPage?, save?, silent? })` —
|
|
29
|
-
- `tab.extract(format = "markdown")` —
|
|
30
|
-
- Selectors
|
|
31
|
-
- Default `tab.observe()` over `tab.screenshot()` for page state. Screenshot only when visual appearance matters.
|
|
14
|
+
- `app.target` (with `path`/`cdp_url`) — substring matched against url+title to pick a BrowserWindow.
|
|
15
|
+
- `tab` helpers; drop to raw puppeteer `page` for anything they don't cover:
|
|
16
|
+
- `tab.goto(url, { waitUntil? })` — navigate; clears element cache.
|
|
17
|
+
- `tab.observe({ includeAll?, viewportOnly? })` — accessibility snapshot: `{ url, title, viewport, scroll, elements: [{ id, role, name, value, states, … }] }`. Ids stable until next observe/goto.
|
|
18
|
+
- `tab.id(n)` — element id from last observe → `ElementHandle` (`.click()`, `.type()`, …).
|
|
19
|
+
- `tab.click(selector)` / `tab.type(selector, text)` / `tab.fill(selector, value)` / `tab.press(key, { selector? })` / `tab.scroll(dx, dy)`.
|
|
20
|
+
- `tab.waitFor(selector)` — wait until attached; returns the `ElementHandle`.
|
|
21
|
+
- `tab.drag(from, to)` — endpoints: selector (center-to-center) or `{ x, y }` viewport point (canvases, sliders).
|
|
22
|
+
- `tab.scrollIntoView(selector)` — center element in viewport; use before clicking off-screen elements.
|
|
23
|
+
- `tab.select(selector, …values)` — set `<select>` option(s); returns resulting selection. `tab.fill` NEVER works for selects.
|
|
24
|
+
- `tab.uploadFile(selector, …filePaths)` — attach files to `<input type="file">`; paths relative to cwd.
|
|
25
|
+
- `tab.waitForUrl(pattern, { timeout? })` — substring or `RegExp`; polls `location.href` (catches SPA pushState). Returns matched URL.
|
|
26
|
+
- `tab.waitForResponse(pattern, { timeout? })` — substring, `RegExp`, or `(response) => boolean`; returns puppeteer `HTTPResponse` (`.text()`/`.json()`/`.status()`/`.headers()`).
|
|
27
|
+
- `tab.evaluate(fn, …args)` — `page.evaluate` with abort signal wired; use for ad-hoc DOM reads.
|
|
28
|
+
- `tab.screenshot({ selector?, fullPage?, save?, silent? })` — capture and attach for viewing (`silent: true` skips). Pass `save` (a path) only when a later step needs the file.
|
|
29
|
+
- `tab.extract(format = "markdown")` — Readability-extracted content (`"markdown"` | `"text"`); throws when nothing readable.
|
|
30
|
+
- Selectors: CSS plus puppeteer handlers `aria/Sign in`, `text/Continue`, `xpath/…`, `pierce/…`; Playwright-style `p-aria/…`, `p-text/…` normalized.
|
|
32
31
|
</instruction>
|
|
33
32
|
|
|
34
33
|
<critical>
|
|
35
|
-
- MUST
|
|
36
|
-
-
|
|
37
|
-
-
|
|
34
|
+
- MUST `open` before `run` — `run` never creates a tab.
|
|
35
|
+
- Default to `tab.observe()` for page state — structured data with actionable element ids. Screenshot ONLY when visual appearance matters.
|
|
36
|
+
- Navigation invalidates element ids — re-observe before using them.
|
|
38
37
|
- `code` runs with full Node access. Treat as your code, not sandboxed code.
|
|
39
38
|
</critical>
|
|
40
39
|
|
|
@@ -46,28 +45,19 @@ Drives real Chromium tab; full puppeteer access via JS execution.
|
|
|
46
45
|
# Click an observed element by id
|
|
47
46
|
`{"action":"run","name":"docs","code":"const obs = await tab.observe(); const link = obs.elements.find(e => e.role === 'link' && e.name === 'Sign in'); assert(link, 'Sign in link missing'); await (await tab.id(link.id)).click();"}`
|
|
48
47
|
|
|
49
|
-
# Screenshot to look at the page — no save path
|
|
50
|
-
`{"action":"run","name":"docs","code":"await tab.screenshot();"}`
|
|
51
|
-
|
|
52
|
-
# Keep a full-page screenshot on disk for a later step
|
|
53
|
-
`{"action":"run","name":"docs","code":"await tab.screenshot({ fullPage: true, save: 'screenshot.png' });"}`
|
|
54
|
-
|
|
55
48
|
# Fill and submit a form via selectors
|
|
56
49
|
`{"action":"run","name":"docs","code":"await tab.fill('input[name=email]', 'me@example.com'); await tab.click('text/Continue');"}`
|
|
57
50
|
|
|
51
|
+
# Screenshot to look at the page — no save path
|
|
52
|
+
`{"action":"run","name":"docs","code":"await tab.screenshot();"}`
|
|
53
|
+
|
|
58
54
|
# Attach to an existing Electron app
|
|
59
55
|
`{"action":"open","name":"cursor","app":{"path":"/Applications/Cursor.app/Contents/MacOS/Cursor"}}`
|
|
60
56
|
|
|
61
|
-
# Close
|
|
62
|
-
`{"action":"close","name":"docs"}`
|
|
63
|
-
|
|
64
|
-
# Close every tab; leave spawned apps running
|
|
65
|
-
`{"action":"close","all":true}`
|
|
66
|
-
|
|
67
|
-
# Close every tab and kill spawned-app processes too
|
|
57
|
+
# Close every tab and kill spawned-app processes
|
|
68
58
|
`{"action":"close","all":true,"kill":true}`
|
|
69
59
|
</examples>
|
|
70
60
|
|
|
71
61
|
<output>
|
|
72
|
-
|
|
62
|
+
Per call: `display(value)` outputs (text/images), then the JSON-stringified return value of `code`. `run` always produces at least a status line.
|
|
73
63
|
</output>
|
|
@@ -1,92 +1,69 @@
|
|
|
1
1
|
Run code in a persistent kernel using a list of cells.
|
|
2
2
|
|
|
3
3
|
<instruction>
|
|
4
|
-
|
|
4
|
+
Cells run in array order. State persists per language — across cells, tool calls, and `task` subagents: variables either side defines are visible to the other. Stage helpers, datasets, or live clients once; subagents use them directly — no re-importing or serializing across the boundary.
|
|
5
5
|
|
|
6
6
|
Cell fields:
|
|
7
7
|
|
|
8
8
|
- `language` — {{#if py}}`"py"` for the IPython kernel{{/if}}{{#ifAll py js}}, {{/ifAll}}{{#if js}}`"js"` for the persistent JavaScript VM{{/if}}.
|
|
9
|
-
- `code` — cell body, verbatim. Newlines
|
|
10
|
-
- `title` (optional) — short label
|
|
11
|
-
- `timeout` (optional) — per-cell
|
|
12
|
-
- `reset` (optional) — wipe this cell's language kernel
|
|
9
|
+
- `code` — cell body, verbatim. Newlines and quotes JSON-encoded; no fences, no headers.
|
|
10
|
+
- `title` (optional) — short transcript label (e.g. `"imports"`).
|
|
11
|
+
- `timeout` (optional) — per-cell seconds (1-3600, default 30). Bounds the cell's own work only; the clock pauses while `agent()`/`parallel()`/`completion()` calls are in flight, so fanouts never need a raise. Raise only for heavy local compute or long non-agent tool calls.
|
|
12
|
+
- `reset` (optional) — wipe this cell's language kernel first.{{#ifAll py js}} Per-language: a `py` reset never touches the JS VM.{{/ifAll}}
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
- Pass multiple small cells in one call.
|
|
18
|
-
- Define small reusable functions for individual debugging.
|
|
19
|
-
- Put workflow explanations in the assistant message or `title` — never inside cell code.
|
|
20
|
-
{{#if py}}- Python cells run inside an IPython kernel with a live event loop. Use top-level `await` directly (e.g. `await main()`); `asyncio.run(…)` raises "cannot be called from a running event loop".{{/if}}
|
|
21
|
-
**On failure:** errors identify the failing cell (e.g., "Cell 3 failed"). Resubmit only the fixed cell (or fixed cell + remaining cells).
|
|
14
|
+
Work incrementally: one logical step per cell (imports, define, test, use); pass multiple small cells per call; define small reusable functions for individual debugging. Workflow explanations go in the assistant message or `title`, never inside cell code.
|
|
15
|
+
{{#if py}}Python runs in IPython with a live event loop: use top-level `await` directly; `asyncio.run(…)` raises "cannot be called from a running event loop".{{/if}}
|
|
16
|
+
On failure, errors name the failing cell ("Cell 3 failed") — resubmit only the fixed cell (plus any remaining).
|
|
22
17
|
</instruction>
|
|
23
18
|
|
|
24
19
|
<prelude>
|
|
25
|
-
{{#ifAll py js}}Same helpers in both runtimes
|
|
20
|
+
{{#ifAll py js}}Same helpers in both runtimes, same positional order. Python: helpers run synchronously; trailing options are keyword args. JavaScript: helpers are async and `await`able; trailing options are ONE trailing object literal, never positional (extra positional args throw).{{else}}{{#if py}}Helpers run synchronously. Trailing options are keyword arguments.{{/if}}{{#if js}}Helpers are async and `await`able. Trailing options are ONE trailing object literal, never positional (extra positional args throw).{{/if}}{{/ifAll}}
|
|
26
21
|
```
|
|
27
22
|
display(value) → None
|
|
28
|
-
Render
|
|
23
|
+
Render value in cell output, shows presentable values natively (figures, images, dataframes)
|
|
29
24
|
print(value, ...) → None
|
|
30
|
-
Print to
|
|
25
|
+
Print to text output.
|
|
31
26
|
read(path, offset?=1, limit?=None) → str
|
|
32
|
-
Read file
|
|
27
|
+
Read file as text; offset/limit are 1-indexed lines. Accepts `local://…`.
|
|
33
28
|
write(path, content) → str
|
|
34
|
-
Write
|
|
29
|
+
Write file (creates parents); returns resolved path. `local://…` persists across turns / subagents.
|
|
35
30
|
append(path, content) → str
|
|
36
|
-
Append
|
|
31
|
+
Append to file; returns resolved path. Accepts `local://…`.
|
|
37
32
|
tree(path?=".", max_depth?=3, show_hidden?=False) → str
|
|
38
|
-
|
|
33
|
+
Directory tree.
|
|
39
34
|
diff(a, b) → str
|
|
40
|
-
Unified diff
|
|
35
|
+
Unified diff of two files.
|
|
41
36
|
env(key?=None, value?=None) → str | None | dict
|
|
42
|
-
No args → full
|
|
37
|
+
No args → full env dict; one → value of `key`; two → set `key=value`, return value.
|
|
43
38
|
output(*ids, format?="raw", query?=None, offset?=None, limit?=None) → str | dict | list[dict]
|
|
44
|
-
Read task/agent output by
|
|
39
|
+
Read task/agent output by id; one id → text/dict, multiple → list.
|
|
45
40
|
tool.<name>(args) → unknown
|
|
46
|
-
Invoke any session tool
|
|
41
|
+
Invoke any session tool; `args` is its parameter object.
|
|
47
42
|
completion(prompt, model?="default", system?=None, schema?=None) → str | dict
|
|
48
|
-
Oneshot
|
|
43
|
+
Oneshot stateless completion (no history, no tools). `model` tier: "smol" (fast) | "default" (session model) | "slow" (most capable). JSON-Schema `schema` forces structured output, returns parsed object.
|
|
49
44
|
{{#if spawns}}agent(prompt, agent_type?="task", model?=None, label?=None, schema?=None) → str | dict
|
|
50
|
-
Run a subagent
|
|
51
|
-
{{#if js}}
|
|
45
|
+
Run a subagent, return its final output. `agent_type`/`agentType` picks another discovered agent; `schema` as in completion(). Share background via `local://` files referenced in the prompt.
|
|
46
|
+
{{#if js}} JS: options are ONE trailing object — agent(prompt, { agentType, schema }).
|
|
52
47
|
{{/if}}
|
|
53
48
|
{{/if}}
|
|
54
49
|
parallel(thunks) → list
|
|
55
|
-
Run thunks
|
|
50
|
+
Run thunks through a bounded pool (as wide as a `task` batch — don't pre-shrink), preserving input order. Barrier: returns when all finish; a throwing thunk propagates.
|
|
56
51
|
pipeline(items, ...stages) → list
|
|
57
|
-
Map
|
|
52
|
+
Map items through one-arg stages left-to-right, barrier between stages; stage 1 gets the item, later stages the previous result. Same pool width as parallel().
|
|
58
53
|
log(message) → None
|
|
59
|
-
|
|
54
|
+
Progress line above the status tree.
|
|
60
55
|
phase(title) → None
|
|
61
|
-
Start a phase
|
|
56
|
+
Start a phase grouping subsequent status lines.
|
|
62
57
|
budget → per-turn token budget
|
|
63
|
-
{{#if py}}`budget.total` (ceiling or None), `budget.spent()
|
|
58
|
+
{{#if py}}`budget.total` (ceiling or None), `budget.spent()`, `budget.remaining()` (math.inf when no ceiling), `budget.hard` (bool).{{/if}}{{#if js}}`await budget.total()` (ceiling or null), `await budget.spent()`, `await budget.remaining()` (Infinity when no ceiling), `await budget.hard()`.{{/if}} Ceiling comes from a `+Nk` directive (advisory) or `+Nk!`/Goal Mode (hard — `agent()` refuses to spawn past it); otherwise None/null, spend still tracked across the turn.
|
|
64
59
|
```
|
|
65
60
|
</prelude>
|
|
66
61
|
|
|
67
|
-
<output>
|
|
68
|
-
Cells render like a Jupyter notebook. `display(value)` renders non-presentable data as an interactive JSON tree. Presentable values (figures, images, dataframes, etc.) use their native representation.
|
|
69
|
-
</output>
|
|
70
|
-
|
|
71
|
-
<caution>
|
|
72
|
-
{{#if js}}- **js**: the VM exposes a selective `process` subset, Web APIs, `Buffer`, `fs/promises`, and the `Bun` global.
|
|
73
|
-
{{/if}}</caution>
|
|
74
|
-
|
|
75
62
|
<example>
|
|
76
|
-
{{#if py}}```json
|
|
77
63
|
{
|
|
78
64
|
"cells": [
|
|
79
65
|
{ "language": "py", "title": "imports", "timeout": 10, "code": "import json\nfrom pathlib import Path" },
|
|
80
66
|
{ "language": "py", "title": "load config", "code": "data = json.loads(read('package.json'))\ndisplay(data)" }
|
|
81
67
|
]
|
|
82
68
|
}
|
|
83
|
-
```{{/if}}{{#ifAll py js}}
|
|
84
|
-
|
|
85
|
-
{{/ifAll}}{{#if js}}```json
|
|
86
|
-
{
|
|
87
|
-
"cells": [
|
|
88
|
-
{ "language": "js", "title": "summary", "reset": true, "code": "const data = JSON.parse(await read('package.json'));\ndisplay(data);\nreturn data.name;" }
|
|
89
|
-
]
|
|
90
|
-
}
|
|
91
|
-
```{{/if}}
|
|
92
69
|
</example>
|