npm - @oh-my-pi/pi-coding-agent - Versions diffs - 14.5.11 → 14.5.13 - Mend

@oh-my-pi/pi-coding-agent 14.5.11 → 14.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/CHANGELOG.md +58 -0
package/package.json +18 -10
package/src/cli/jupyter-cli.ts +1 -1
package/src/config/model-equivalence.ts +49 -16
package/src/config/model-registry.ts +100 -25
package/src/config/model-resolver.ts +29 -15
package/src/config/settings-schema.ts +20 -6
package/src/config/settings.ts +9 -8
package/src/config.ts +9 -0
package/src/eval/backend.ts +43 -0
package/src/eval/eval.lark +43 -0
package/src/eval/index.ts +5 -0
package/src/eval/js/context-manager.ts +717 -0
package/src/eval/js/executor.ts +131 -0
package/src/eval/js/index.ts +46 -0
package/src/eval/js/prelude.ts +2 -0
package/src/eval/js/prelude.txt +84 -0
package/src/eval/js/tool-bridge.ts +124 -0
package/src/eval/parse.ts +337 -0
package/src/{ipy → eval/py}/executor.ts +2 -180
package/src/{ipy → eval/py}/gateway-coordinator.ts +4 -3
package/src/eval/py/index.ts +58 -0
package/src/{ipy → eval/py}/kernel.ts +5 -41
package/src/{ipy → eval/py}/prelude.py +39 -227
package/src/eval/types.ts +48 -0
package/src/export/html/template.generated.ts +1 -1
package/src/export/html/template.js +23 -17
package/src/extensibility/extensions/types.ts +2 -3
package/src/internal-urls/docs-index.generated.ts +5 -5
package/src/lsp/client.ts +9 -0
package/src/lsp/index.ts +395 -0
package/src/lsp/types.ts +15 -4
package/src/main.ts +25 -14
package/src/mcp/oauth-flow.ts +1 -1
package/src/memories/index.ts +1 -1
package/src/modes/acp/acp-event-mapper.ts +1 -1
package/src/modes/components/{python-execution.ts → eval-execution.ts} +11 -4
package/src/modes/components/login-dialog.ts +1 -1
package/src/modes/components/oauth-selector.ts +2 -1
package/src/modes/components/tool-execution.ts +3 -4
package/src/modes/controllers/command-controller.ts +28 -8
package/src/modes/controllers/input-controller.ts +4 -4
package/src/modes/controllers/selector-controller.ts +2 -1
package/src/modes/interactive-mode.ts +4 -5
package/src/modes/types.ts +3 -3
package/src/modes/utils/ui-helpers.ts +2 -2
package/src/prompts/system/system-prompt.md +3 -3
package/src/prompts/tools/atom.md +3 -2
package/src/prompts/tools/browser.md +61 -16
package/src/prompts/tools/eval.md +92 -0
package/src/prompts/tools/lsp.md +7 -3
package/src/sdk.ts +45 -31
package/src/session/agent-session.ts +44 -54
package/src/session/messages.ts +1 -1
package/src/slash-commands/builtin-registry.ts +1 -1
package/src/system-prompt.ts +34 -66
package/src/task/executor.ts +5 -9
package/src/tools/browser/attach.ts +175 -0
package/src/tools/browser/launch.ts +576 -0
package/src/tools/browser/readable.ts +90 -0
package/src/tools/browser/registry.ts +198 -0
package/src/tools/browser/render.ts +212 -0
package/src/tools/browser/tab-protocol.ts +101 -0
package/src/tools/browser/tab-supervisor.ts +429 -0
package/src/tools/browser/tab-worker-entry.ts +21 -0
package/src/tools/browser/tab-worker.ts +1006 -0
package/src/tools/browser.ts +231 -1567
package/src/tools/checkpoint.ts +2 -2
package/src/tools/{python.ts → eval.ts} +324 -315
package/src/tools/exit-plan-mode.ts +1 -1
package/src/tools/index.ts +62 -100
package/src/tools/plan-mode-guard.ts +27 -1
package/src/tools/read.ts +0 -6
package/src/tools/recipe/runners/pkg.ts +34 -32
package/src/tools/renderers.ts +4 -2
package/src/tools/resolve.ts +7 -2
package/src/tools/todo-write.ts +0 -1
package/src/tools/tool-timeouts.ts +2 -2
package/src/utils/markit.ts +15 -7
package/src/utils/tools-manager.ts +5 -5
package/src/web/search/index.ts +5 -5
package/src/web/search/provider.ts +121 -39
package/src/web/search/providers/gemini.ts +2 -2
package/src/web/search/render.ts +2 -2
package/src/ipy/modules.ts +0 -144
package/src/prompts/tools/python.md +0 -57
/package/src/{ipy → eval/py}/cancellation.ts +0 -0
/package/src/{ipy → eval/py}/prelude.ts +0 -0
/package/src/{ipy → eval/py}/runtime.ts +0 -0

package/src/modes/controllers/command-controller.ts CHANGED Viewed

@@ -14,14 +14,14 @@ import { formatDuration, Snowflake, setProjectDir } from "@oh-my-pi/pi-utils";
 import { $ } from "bun";
 import { reset as resetCapabilities } from "../../capability";
 import { clearClaudePluginRootsCache } from "../../discovery/helpers";
+import { getGatewayStatus } from "../../eval/py/gateway-coordinator";
 import { loadCustomShare } from "../../export/custom-share";
 import type { CompactOptions } from "../../extensibility/extensions/types";
-import { getGatewayStatus } from "../../ipy/gateway-coordinator";
 import { buildMemoryToolDeveloperInstructions, clearMemoryData, enqueueMemoryConsolidation } from "../../memories";
 import { BashExecutionComponent } from "../../modes/components/bash-execution";
 import { BorderedLoader } from "../../modes/components/bordered-loader";
 import { DynamicBorder } from "../../modes/components/dynamic-border";
-import { PythonExecutionComponent } from "../../modes/components/python-execution";
+import { EvalExecutionComponent } from "../../modes/components/eval-execution";
 import { getMarkdownTheme, getSymbolTheme, theme } from "../../modes/theme/theme";
 import type { InteractiveModeContext } from "../../modes/types";
 import { computeContextBreakdown, renderContextUsage } from "../../modes/utils/context-usage";
@@ -285,9 +285,26 @@ export class CommandController {
 		this.#doCopy(combined, `Copied ${matches.length} code block${matches.length > 1 ? "s" : ""} to clipboard`);
 	}
+	#extractEvalCode(args: unknown): string | undefined {
+		if (!args || typeof args !== "object") return undefined;
+		const cells = (args as { cells?: unknown }).cells;
+		if (!Array.isArray(cells)) return undefined;
+		const codeBlocks: string[] = [];
+		for (const cell of cells) {
+			if (!cell || typeof cell !== "object") continue;
+			const code = (cell as { code?: unknown }).code;
+			if (typeof code === "string" && code.length > 0) {
+				codeBlocks.push(code);
+			}
+		}
+		return codeBlocks.length > 0 ? codeBlocks.join("\n\n") : undefined;
+	}
 	#copyLastCommand() {
 		const messages = this.ctx.session.messages;
-		// Walk backwards to find the last bash/python tool call
+		// Walk backwards to find the last bash/eval tool call
 		for (let i = messages.length - 1; i >= 0; i--) {
 			const msg = messages[i];
 			if (msg.role !== "assistant") continue;
@@ -298,13 +315,16 @@ export class CommandController {
 					this.#doCopy(tc.arguments.command, "Copied last bash command to clipboard");
 					return;
 				}
-				if (tc.name === "python" && typeof tc.arguments.code === "string") {
-					this.#doCopy(tc.arguments.code, "Copied last python code to clipboard");
-					return;
+				if (tc.name === "eval") {
+					const code = this.#extractEvalCode(tc.arguments);
+					if (code) {
+						this.#doCopy(code, "Copied last eval code to clipboard");
+						return;
+					}
 				}
 			}
 		}
-		this.ctx.showWarning("No bash or python command found in the conversation.");
+		this.ctx.showWarning("No bash or eval command found in the conversation.");
 	}
 	#doCopy(content: string, label: string) {
@@ -779,7 +799,7 @@ export class CommandController {
 	async handlePythonCommand(code: string, excludeFromContext = false): Promise<void> {
 		const isDeferred = this.ctx.session.isStreaming;
-		this.ctx.pythonComponent = new PythonExecutionComponent(code, this.ctx.ui, excludeFromContext);
+		this.ctx.pythonComponent = new EvalExecutionComponent(code, this.ctx.ui, excludeFromContext);
 		if (isDeferred) {
 			this.ctx.pendingMessagesContainer.addChild(this.ctx.pythonComponent);

package/src/modes/controllers/input-controller.ts CHANGED Viewed

@@ -37,7 +37,7 @@ export class InputController {
 					this.ctx.session.isCompacting ||
 					this.ctx.session.isGeneratingHandoff ||
 					this.ctx.session.isBashRunning ||
-					this.ctx.session.isPythonRunning ||
+					this.ctx.session.isEvalRunning ||
 					this.ctx.autoCompactionLoader ||
 					this.ctx.retryLoader ||
 					this.ctx.autoCompactionEscapeHandler ||
@@ -67,8 +67,8 @@ export class InputController {
 				this.ctx.editor.setText("");
 				this.ctx.isBashMode = false;
 				this.ctx.updateEditorBorderColor();
-			} else if (this.ctx.session.isPythonRunning) {
-				this.ctx.session.abortPython();
+			} else if (this.ctx.session.isEvalRunning) {
+				this.ctx.session.abortEval();
 			} else if (this.ctx.isPythonMode) {
 				this.ctx.editor.setText("");
 				this.ctx.isPythonMode = false;
@@ -304,7 +304,7 @@ export class InputController {
 				const isExcluded = text.startsWith("$$");
 				const code = isExcluded ? text.slice(2).trim() : text.slice(1).trim();
 				if (code) {
-					if (this.ctx.session.isPythonRunning) {
+					if (this.ctx.session.isEvalRunning) {
 						this.ctx.showWarning("A Python execution is already running. Press Esc to cancel it first.");
 						this.ctx.editor.setText(text);
 						return;

package/src/modes/controllers/selector-controller.ts CHANGED Viewed

@@ -1,7 +1,8 @@
 import * as os from "node:os";
 import * as path from "node:path";
 import { ThinkingLevel } from "@oh-my-pi/pi-agent-core";
-import { getOAuthProviders, type OAuthProvider } from "@oh-my-pi/pi-ai";
+import { getOAuthProviders } from "@oh-my-pi/pi-ai/utils/oauth";
+import type { OAuthProvider } from "@oh-my-pi/pi-ai/utils/oauth/types";
 import type { Component, OverlayHandle } from "@oh-my-pi/pi-tui";
 import { Input, Loader, Spacer, Text } from "@oh-my-pi/pi-tui";
 import { getAgentDbPath, getConfigDirName, getProjectDir } from "@oh-my-pi/pi-utils";

package/src/modes/interactive-mode.ts CHANGED Viewed

@@ -57,10 +57,10 @@ import type { AssistantMessageComponent } from "./components/assistant-message";
 import type { BashExecutionComponent } from "./components/bash-execution";
 import { CustomEditor } from "./components/custom-editor";
 import { DynamicBorder } from "./components/dynamic-border";
+import type { EvalExecutionComponent } from "./components/eval-execution";
 import type { HookEditorComponent } from "./components/hook-editor";
 import type { HookInputComponent } from "./components/hook-input";
 import type { HookSelectorComponent } from "./components/hook-selector";
-import type { PythonExecutionComponent } from "./components/python-execution";
 import { StatusLineComponent } from "./components/status-line";
 import type { ToolExecutionHandle } from "./components/tool-execution";
 import { WelcomeComponent, type LspServerInfo as WelcomeLspServerInfo } from "./components/welcome";
@@ -166,8 +166,8 @@ export class InteractiveMode implements InteractiveModeContext {
 	pendingTools = new Map<string, ToolExecutionHandle>();
 	pendingBashComponents: BashExecutionComponent[] = [];
 	bashComponent: BashExecutionComponent | undefined = undefined;
-	pendingPythonComponents: PythonExecutionComponent[] = [];
-	pythonComponent: PythonExecutionComponent | undefined = undefined;
+	pendingPythonComponents: EvalExecutionComponent[] = [];
+	pythonComponent: EvalExecutionComponent | undefined = undefined;
 	isPythonMode = false;
 	streamingComponent: AssistantMessageComponent | undefined = undefined;
 	streamingMessage: AssistantMessage | undefined = undefined;
@@ -335,8 +335,7 @@ export class InteractiveMode implements InteractiveModeContext {
 	async init(): Promise<void> {
 		if (this.isInitialized) return;
-		logger.time("InteractiveMode.init:keybindings");
-		this.keybindings = KeybindingsManager.create();
+		this.keybindings = logger.time("InteractiveMode.init:keybindings", () => KeybindingsManager.create());
 		// Register session manager flush for signal handlers (SIGINT, SIGTERM, SIGHUP)
 		this.#cleanupUnsubscribe = postmortem.register("session-manager-flush", () => this.sessionManager.flush());

package/src/modes/types.ts CHANGED Viewed

@@ -18,10 +18,10 @@ import type { ExitPlanModeDetails, LspStartupServerInfo } from "../tools";
 import type { AssistantMessageComponent } from "./components/assistant-message";
 import type { BashExecutionComponent } from "./components/bash-execution";
 import type { CustomEditor } from "./components/custom-editor";
+import type { EvalExecutionComponent } from "./components/eval-execution";
 import type { HookEditorComponent } from "./components/hook-editor";
 import type { HookInputComponent } from "./components/hook-input";
 import type { HookSelectorComponent } from "./components/hook-selector";
-import type { PythonExecutionComponent } from "./components/python-execution";
 import type { StatusLineComponent } from "./components/status-line";
 import type { ToolExecutionHandle } from "./components/tool-execution";
 import type { OAuthManualInputManager } from "./oauth-manual-input";
@@ -93,8 +93,8 @@ export interface InteractiveModeContext {
 	pendingTools: Map<string, ToolExecutionHandle>;
 	pendingBashComponents: BashExecutionComponent[];
 	bashComponent: BashExecutionComponent | undefined;
-	pendingPythonComponents: PythonExecutionComponent[];
-	pythonComponent: PythonExecutionComponent | undefined;
+	pendingPythonComponents: EvalExecutionComponent[];
+	pythonComponent: EvalExecutionComponent | undefined;
 	isPythonMode: boolean;
 	streamingComponent: AssistantMessageComponent | undefined;
 	streamingMessage: AssistantMessage | undefined;

package/src/modes/utils/ui-helpers.ts CHANGED Viewed

@@ -8,7 +8,7 @@ import { BranchSummaryMessageComponent } from "../../modes/components/branch-sum
 import { CompactionSummaryMessageComponent } from "../../modes/components/compaction-summary-message";
 import { CustomMessageComponent } from "../../modes/components/custom-message";
 import { DynamicBorder } from "../../modes/components/dynamic-border";
-import { PythonExecutionComponent } from "../../modes/components/python-execution";
+import { EvalExecutionComponent } from "../../modes/components/eval-execution";
 import { ReadToolGroupComponent } from "../../modes/components/read-tool-group";
 import { SkillMessageComponent } from "../../modes/components/skill-message";
 import { ToolExecutionComponent } from "../../modes/components/tool-execution";
@@ -84,7 +84,7 @@ export class UiHelpers {
 				break;
 			}
 			case "pythonExecution": {
-				const component = new PythonExecutionComponent(message.code, this.ctx.ui, message.excludeFromContext);
+				const component = new EvalExecutionComponent(message.code, this.ctx.ui, message.excludeFromContext);
 				if (message.output) {
 					component.appendOutput(message.output);
 				}

package/src/prompts/system/system-prompt.md CHANGED Viewed

@@ -216,12 +216,12 @@ Most tools have a `{{intentField}}` parameter. Fill it with a concise intent in
 If the task may involve external systems, SaaS APIs, chat, tickets, databases, deployments, or other non-local integrations, you **SHOULD** call `{{toolRefs.search_tool_bm25}}` before concluding no such tool exists.
 {{/if}}
-{{#ifAny (includes tools "python") (includes tools "bash")}}
+{{#ifAny (includes tools "eval") (includes tools "bash")}}
 ### Tool priority
 1. Use specialized tools first{{#ifAny (includes tools "read") (includes tools "search") (includes tools "find") (includes tools "edit") (includes tools "lsp")}}: {{#has tools "read"}}`{{toolRefs.read}}`, {{/has}}{{#has tools "search"}}`{{toolRefs.search}}`, {{/has}}{{#has tools "find"}}`{{toolRefs.find}}`, {{/has}}{{#has tools "edit"}}`{{toolRefs.edit}}`, {{/has}}{{#has tools "lsp"}}`{{toolRefs.lsp}}`{{/has}}{{/ifAny}}
-2. Python: logic, loops, processing, display
+2. Eval: logic, loops, processing, display (default python; pass `language: "js"` for in-process JavaScript)
 3. Bash: simple one-liners only
-You **MUST NOT** use Python or Bash when a specialized tool exists.
+You **MUST NOT** use Eval or Bash when a specialized tool exists.
 {{/ifAny}}
 {{#ifAny (includes tools "read") (includes tools "write") (includes tools "search") (includes tools "find") (includes tools "edit")}}

package/src/prompts/tools/atom.md CHANGED Viewed

@@ -36,7 +36,7 @@ Lid=       blank the anchored line's content but KEEP the line (results in an em
 - To insert ABOVE a line, you **MUST** use `^Lid` then `+TEXT`. To insert above line 1, you **MUST** use `^` (BOF) then `+TEXT`. To insert below a line, you **MUST** use `@Lid` then `+TEXT`.
 - Multiple `---PATH` sections **MAY** appear in one input; each section is applied in order.
 - `!rm` / `!mv DEST` **MUST NOT** be combined with line edits in the same section.
-- Lids contain a content hash. If a line has changed since you read it, the tool rejects the edit and shows the current content; you **MUST** re-read and retry with fresh Lids. Small drift (≤5 lines) where the original hash still matches a nearby line auto-rebases with a warning. Larger shifts may show a hash-only candidate, but two-letter hashes collide; verify surrounding content or re-read before using it.
+- Lids contain a content hash. If a line has changed since you read it, the tool rejects the edit and shows the current content; you **MUST** re-read and retry with fresh Lids.
 - After `+TEXT` (or `+`) the cursor advances past the inserted line, so consecutive `+TEXT` ops stack in order. After `Lid=TEXT` the cursor sits on the modified anchor; after `-Lid` it sits on the slot the deleted line vacated. You **MUST** use a fresh `@Lid` / `^Lid` / `^` / `$` to reposition.
 - The tool is syntax-blind: it will not check brackets, indentation, table column counts, or fence integrity. You **MUST** verify indentation-sensitive or structured files after editing (Python, Markdown tables/fences).
 - A section whose PATH does not yet exist creates the file from your `+TEXT` lines (use `^` or `$` then `+TEXT…`). No separate "create file" op is needed.
@@ -83,7 +83,7 @@ Lid=       blank the anchored line's content but KEEP the line (results in an em
 \	return (name || DEF).trim().toUpperCase();
 \}
-# Replace a block with a longer multi-line block, including blank lines (canonical form for refactors)
+# Replace one contiguous block when the existing lines themselves change; the replacement may have more/fewer lines than the selected range
 ---a.ts
 {{hrefr 3}}..{{hrefr 6}}=/** Format a display label, falling back to DEF when empty. */
 \export function label(name: string): string {
@@ -139,6 +139,7 @@ $
 - Current/added preview lines include fresh `LINE+hash|content` anchors. Removed preview lines show deleted content and **MUST NOT** be reused as anchors.
 - You **MUST** emit only lines that change. You **MUST NOT** echo unchanged context; the anchor implies position.
 - You **MUST NOT** write `Lid=<sameTextThatIsAlreadyOnThatLine>`; the tool reports a no-op (no change applied). Emit `Lid=TEXT` only when TEXT differs.
+- You **MUST NOT** use `Lid=<originalLineContent>` + `\continuations` as an "insert after" idiom. That form is a *replacement*: its first line lands at the anchor, and its continuations push the original next line down. When the anchor is a closing brace and your continuations also end in `}`, the original line below — often itself `}` (a sibling block, mod, or impl closer) — sits adjacent to yours and you ship a duplicate `}`. For pure insertion, use `@Lid` + `+TEXT…` (after) or `^Lid` + `+TEXT…` (before). Never re-state the anchor's content as the first line of a replacement.
 - A line of the form `Lid|content` (a Lid, then `|`, then text, with NO leading `+`/`-`/`^`/`@`/`\`/`=`/`..`) is **FORBIDDEN**. That shape only appears in `read`/`grep` output as an anchor for *you*; it is never an edit op. If you copy a `Lid|content` line verbatim from a read into a patch, you have made an error — every edit op must start with `+`, `-`, `^`, `@`, `\`, `$`, `!`, or a Lid immediately followed by `=` or `..`.
 - To replace a contiguous block with new content, the canonical form is `LidA..LidB=FIRST_LINE` + `\NEXT_LINE…`. You **MUST NOT** write the old block and then the new block — that is unified-diff thinking and the tool does not understand it. If you find yourself emitting pre-image lines (with or without operators) before your new content, STOP and rewrite the section as a single range-replace.
 - TEXT after `=`, `+`, or `\` includes leading whitespace verbatim. You **MUST NOT** trim or re-indent it.

package/src/prompts/tools/browser.md CHANGED Viewed

@@ -1,25 +1,70 @@
-Navigates, clicks, types, scrolls, drags, queries DOM content, and captures screenshots.
+Drives a real Chromium tab with full puppeteer access via JS execution.
 <instruction>
-- For fetching static web content (articles, docs, issues/PRs, JSON, PDFs, feeds), prefer the `read` tool with a URL — it returns clean reader-mode text without spinning up a browser. Use this tool only when you need JS execution, authentication, or interactive actions.
-- `"open"` starts a headless session (or implicitly on first action); `"goto"` navigates to `url`; `"close"` releases the browser
-- `"observe"` captures a numbered accessibility snapshot — prefer `click_id`/`type_id`/`fill_id` using returned `element_id` values; flags: `include_all`, `viewport_only`
-- `"click"`, `"type"`, `"fill"`, `"press"`, `"scroll"`, `"drag"` for selector-based interactions — prefer ARIA/text selectors (`p-aria/[name="Sign in"]`, `p-text/Continue`) over brittle CSS
-- `"click_id"`, `"type_id"`, `"fill_id"` to interact with observed elements without selectors
-- `"wait_for_selector"` before interacting when the page is dynamic
-- `"evaluate"` runs a JS expression in page context
-- `"get_text"`, `"get_html"`, `"get_attribute"` for DOM queries — batch via `args: [{ selector, attribute? }]`
-- `"extract_readable"` returns reader-mode content; `format`: `"markdown"` (default) or `"text"`
-- `"screenshot"` captures images (optionally with `selector`); can save to disk via `path`
+- For fetching static web content (articles, docs, issues/PRs, JSON, PDFs, feeds), prefer the `read` tool with a URL — reader-mode text without spinning up a browser. Use this tool when you need JS execution, authentication, or interactive actions.
+- Three actions only:
+  - `open` — acquire (or reuse) a named tab. `name` defaults to `"main"`. Optional `url` navigates after the tab is ready. Optional `viewport` sets dimensions. Optional `dialogs: "accept" | "dismiss"` auto-handles `alert`/`confirm`/`beforeunload` so navigation/clicks don't hang (default: leave dialogs unhandled — page hangs until caller wires `page.on('dialog', …)`).
+  - `close` — release a tab by `name`, or every tab with `all: true`. For spawned-app browsers, set `kill: true` to terminate the process tree (default leaves it running).
+  - `run` — execute JS against an existing tab. The `code` is the body of an async function with `page`, `browser`, `tab`, `display`, `assert`, `wait` in scope. The function's return value is JSON-stringified into the tool result; multiple `display(value)` calls accumulate text/images.
+- Tabs survive across `run` calls and across in-process subagents. Open once, reuse many times.
+- Browser kinds, selected by the `app` field on `open`:
+  - default (no `app`) → headless Chromium with stealth patches.
+  - `app.path` → spawn an absolute binary (Electron/CDP). If a running instance already exposes a CDP port, it is reused; otherwise stale instances are killed and a fresh one is spawned. No stealth patches — never tamper with a real desktop app.
+  - `app.cdp_url` → connect to an existing CDP endpoint (e.g. `http://127.0.0.1:9222`).
+  - `app.target` (with `path`/`cdp_url`) — substring matched against url+title to pick a BrowserWindow when the app exposes several.
+- Inside `run`, `tab` exposes high-level helpers; reach for `page` (raw puppeteer Page) when you need anything they don't cover. Available helpers:
+  - `tab.goto(url, { waitUntil? })` — clears the element cache and navigates.
+  - `tab.observe({ includeAll?, viewportOnly? })` — accessibility snapshot. Returns `{ url, title, viewport, scroll, elements: [{ id, role, name, value, states, … }] }`. Element ids are stable until the next observe/goto.
+  - `tab.id(n)` — resolves an element id from the most recent observe to a real `ElementHandle` you can `.click()`, `.type()`, etc.
+  - `tab.click(selector)` / `tab.type(selector, text)` / `tab.fill(selector, value)` / `tab.press(key, { selector? })` / `tab.scroll(dx, dy)` — selector-based actions.
+  - `tab.waitFor(selector)` — waits until the selector is attached, returns the resolved `ElementHandle` for chaining (e.g. `const btn = await tab.waitFor('text/Submit'); await btn.click();`).
+  - `tab.drag(from, to)` — drag from one point to another. Each endpoint is either a selector string (drag center-to-center) or a `{ x, y }` viewport-coordinate point (e.g. for canvases, sliders).
+  - `tab.scrollIntoView(selector)` — scroll the matching element to the center of the viewport (use before clicking off-screen elements).
+  - `tab.select(selector, …values)` — set the selected option(s) on a `<select>`. Returns the values that ended up selected. `tab.fill` does **NOT** work for selects.
+  - `tab.uploadFile(selector, …filePaths)` — attach files to an `<input type="file">`. Paths resolve relative to cwd.
+  - `tab.waitForUrl(pattern, { timeout? })` — pattern is a substring or `RegExp`. Polls `location.href` so it works for SPA pushState navigations, not just real navigations. Returns the matched URL.
+  - `tab.waitForResponse(pattern, { timeout? })` — pattern is a substring, `RegExp`, or `(response) => boolean`. Returns the raw puppeteer `HTTPResponse` (call `.text()` / `.json()` / `.status()` / `.headers()` on it).
+  - `tab.evaluate(fn, …args)` — sugar for `page.evaluate` with the abort signal already wired. Use this instead of dropping to `page.evaluate` for ad-hoc DOM reads.
+  - `tab.screenshot({ selector?, fullPage?, save?, silent? })` — auto-attaches the image to the tool output unless `silent: true`. Saves full-res to `save` (or `browser.screenshotDir` setting) and a downscaled copy to the model.
+  - `tab.extract(format = "markdown")` — Readability-extracted page content.
+- Selectors accept CSS as well as puppeteer query handlers: `aria/Sign in`, `text/Continue`, `xpath/…`, `pierce/…`. Playwright-style `p-aria/[name="…"]`, `p-text/…`, etc. are normalized.
+- Default to `tab.observe()` over `tab.screenshot()` for understanding page state. Screenshot only when visual appearance matters.
 </instruction>
 <critical>
-**You **MUST** default to `observe`, not `screenshot`.**
-- `observe` is cheaper, faster, and returns structured data — use it to understand page state, find elements, and plan interactions.
-- You **SHOULD** only use `screenshot` when visual appearance matters (verifying layout, debugging CSS, capturing a visual artifact for the user).
-- You **MUST NOT** screenshot just to "see what's on the page" — `observe` gives you that with element IDs you can act on immediately.
+- You **MUST** call `open` before `run`. `run` does not implicitly create a tab.
+- You **MUST NOT** screenshot just to "see what's on the page" — `tab.observe()` returns structured data with element ids you can act on immediately.
+- After a `tab.goto()` or any navigation, prior element ids from `tab.observe()` are invalidated. Re-observe before referencing them.
+- `code` runs with full Node access. Treat it as your code, not sandboxed code.
 </critical>
+<examples>
+# Open a tab and read structured page data
+`{"action":"open","name":"docs","url":"https://example.com"}`
+`{"action":"run","name":"docs","code":"const obs = await tab.observe(); display(obs); return obs.elements.length;"}`
+# Click an observed element by id
+`{"action":"run","name":"docs","code":"const obs = await tab.observe(); const link = obs.elements.find(e => e.role === 'link' && e.name === 'Sign in'); assert(link, 'Sign in link missing'); await (await tab.id(link.id)).click();"}`
+# Save a full-page screenshot to disk
+`{"action":"run","name":"docs","code":"await tab.screenshot({ fullPage: true, save: 'screenshot.png' });"}`
+# Fill and submit a form via selectors
+`{"action":"run","name":"docs","code":"await tab.fill('input[name=email]', 'me@example.com'); await tab.click('text/Continue');"}`
+# Attach to an existing Electron app
+`{"action":"open","name":"cursor","app":{"path":"/Applications/Cursor.app/Contents/MacOS/Cursor"}}`
+# Close one tab (browser stays alive if other tabs reference it)
+`{"action":"close","name":"docs"}`
+# Close every tab; leave spawned apps running
+`{"action":"close","all":true}`
+# Close every tab and kill spawned-app processes too
+`{"action":"close","all":true,"kill":true}`
+</examples>
 <output>
-Text for navigation/DOM queries, images for screenshots.
+Per call: any `display(value)` outputs (text/images) followed by the JSON-stringified return value of the `code` function. `run` always produces at least a status line.
 </output>

package/src/prompts/tools/eval.md ADDED Viewed

@@ -0,0 +1,92 @@
+Run code in a persistent kernel, using a series of codeblocks acting as cells.
+<instruction>
+Each cell is a markdown fenced code block. The opening fence's info string carries metadata:
+```
+<lang>? <duration>? (title-fragment | key=value)*
+```
+- **Language**: {{#if py}}`py`/`python` for Python{{/if}}{{#ifAll py js}}, {{/ifAll}}{{#if js}}`js`/`javascript`/`ts`/`typescript` for JavaScript{{/if}}.{{#ifAll py js}} Omitted → inherit the previous cell's language (the first cell defaults to Python, falling back to JavaScript when Python is unavailable).{{else}} Omitted → inherit the previous cell's language.{{/ifAll}}
+- **Positional duration**: `15s`, `500ms`, `2m`, or a bare integer (seconds). Default 30s.
+- **Attributes**:
+  - `id="…"` — cell id (shown as the title in the transcript).
+  - `t=<duration>` — overrides the positional duration.
+  - `rst=true` — wipe **this cell's own language kernel** before running.{{#ifAll py js}} Other languages are untouched.{{/ifAll}}
+**Work incrementally:** one logical step per cell (imports, define, test, use). Pass multiple small cells in one call. Define small reusable functions you can debug individually. You **MUST** put workflow explanations in the assistant message or cell title — never inside cell code.
+**On failure:** errors identify the failing cell (e.g., "Cell 3 failed"). Resubmit only the fixed cell (or fixed cell + remaining cells).
+</instruction>
+<prelude>
+{{#ifAll py js}}The same helpers are available in both runtimes with the same positional argument order. Python takes the trailing options as keyword args; JavaScript takes the same options as a trailing object literal. JavaScript helpers are async and `await`able; Python helpers run synchronously.{{else}}{{#if py}}Helpers run synchronously. Trailing options are passed as keyword arguments.{{/if}}{{#if js}}Helpers are async and `await`able. Trailing options are passed as a final object literal.{{/if}}{{/ifAll}}
+```
+display(value) → None
+    Render a value in the current cell output.
+print(value, ...) → None
+    Print to the cell's text output.
+read(path, offset?=1, limit?=None) → str
+    Read file contents as text. offset/limit are 1-indexed line bounds.
+write(path, content) → str
+    Write content to a file (creates parent directories). Returns the resolved path.
+append(path, content) → str
+    Append content to a file. Returns the resolved path.
+stat(path) → {path, size, is_file, is_dir, mtime}
+    File or directory metadata. mtime is an ISO-8601 string.
+find(pattern, path?=".", type?="file", limit?=1000, hidden?=False, sort_by_mtime?=False, maxdepth?=None, mindepth?=None) → list[path]
+    Recursive glob find. Respects .gitignore.
+glob(pattern, path?=".", hidden?=False) → list[path]
+    Non-recursive glob. Use find() for recursive walks. Respects .gitignore.
+grep(pattern, path, ignore_case?=False, literal?=False, context?=0) → list[{line, text}]
+    Search a single file.
+rgrep(pattern, path?=".", glob_pattern?="*", ignore_case?=False, literal?=False, limit?=100, hidden?=False) → list[{file, line, text}]
+    Search recursively across files. Respects .gitignore.
+sed(path, pattern, repl, flags?=0) → int
+    Regex replace in a file (like sed -i). Returns replacement count.
+tree(path?=".", max_depth?=3, show_hidden?=False) → str
+    Render a directory tree.
+diff(a, b) → str
+    Unified diff between two files.
+run(cmd, cwd?=None, timeout?=None) → {stdout, stderr, exit_code}
+    Run a shell command.
+env(key?=None, value?=None) → str | None | dict
+    No args → full environment as dict. One arg → value of `key`. Two args → set `key=value` and return value.
+output(*ids, format?="raw", query?=None, offset?=None, limit?=None) → str | dict | list[dict]
+    Read task/agent output by ID. Single id returns text/dict; multiple ids return a list.
+```
+{{#if js}}**JavaScript only:** `tool.<name>(args)` invokes any session tool directly (e.g. `await tool.read({ path: "src/foo.ts" })`).
+{{/if}}</prelude>
+<output>
+Cells render like a Jupyter notebook. Pass any value to `display(value)`; non-presentable data is rendered as an interactive JSON tree, and presentable values (figures, images, dataframes, etc.) render with their native representation.
+</output>
+<caution>
+- In session mode, use `rst=true` on a cell to wipe its language's kernel before running.{{#ifAll py js}} Reset is per-language: a python cell's `rst=true` does not touch the JavaScript kernel and vice versa.{{/ifAll}}
+{{#if js}}- **js**: the VM exposes a selective `process` subset, Web APIs, `Buffer`, `fs/promises`.
+{{/if}}</caution>
+<example>
+{{#if py}}```py id="imports" t="10s"
+import json
+from pathlib import Path
+```
+```py id="load config"
+data = json.loads(read('package.json'))
+display(data)
+```
+{{/if}}{{#ifAll py js}}
+{{/ifAll}}{{#if js}}```js id="js summary" rst=true
+const data = JSON.parse(await read('package.json'));
+display(data);
+return data.name;
+```
+```
+return 'still JavaScript';
+```
+{{/if}}
+</example>

package/src/prompts/tools/lsp.md CHANGED Viewed

@@ -9,8 +9,11 @@ Interacts with Language Server Protocol servers for code intelligence.
 - `hover`: Get type info and documentation → type signature + docs
 - `symbols`: List symbols in a file, or search workspace with `file: "*"` and a `query`
 - `rename`: Rename symbol across codebase → preview or apply edits
+- `rename_file`: Rename or move a file/directory; sends `workspace/willRenameFiles` so LSP servers update import paths and other references → preview or apply edits + filesystem rename
 - `code_actions`: List available quick-fixes/refactors/import actions; apply one when `apply: true` and `query` matches title or index
 - `status`: Show active language servers
+- `capabilities`: Dump per-server capabilities (standard + experimental + executeCommand list) for discovery — file scopes to one server, omitted/`"*"` lists every active server
+- `request`: Send a raw LSP request to a server — `query` is the method name (e.g., `rust-analyzer/expandMacro`, `typescript/goToSourceDefinition`, `workspace/executeCommand`); use `payload` for arbitrary JSON params or let the tool auto-build them from `file`/`line`/`symbol`
 - `reload`: Restart a specific server (via `file`) or all servers with `file: "*"`
 </operations>
@@ -18,9 +21,10 @@ Interacts with Language Server Protocol servers for code intelligence.
 - `file`: File path, glob pattern (e.g. `src/**/*.ts`), or `"*"` for workspace scope. Globs are expanded locally before dispatch. `"*"` routes `diagnostics`/`symbols`/`reload` to their workspace-wide form.
 - `line`: 1-indexed line number for position-based actions
 - `symbol`: Substring on the target line used to resolve column automatically. Append `#N` to pick the Nth occurrence on that line (1-indexed; default 1) — e.g. `foo#2` selects the second `foo`.
-- `query`: Symbol search query, code-action kind filter (list mode), or code-action selector (apply mode)
-- `new_name`: Required for rename
-- `apply`: Apply edits for rename/code_actions (default true for rename, list mode for code_actions unless explicitly true)
+- `query`: Symbol search query, code-action kind filter / selector (list/apply mode), or LSP method name when `action: request`
+- `new_name`: Required for `rename` (new symbol identifier) and `rename_file` (destination path)
+- `apply`: Apply edits for rename/rename_file/code_actions (default true for rename and rename_file; list mode for code_actions unless explicitly true)
+- `payload`: JSON-encoded params for `action: request`. Overrides the auto-built `{ textDocument, position }` shape when present.
 - `timeout`: Request timeout in seconds (clamped to 5-60, default 20)
 </parameters>