npm - @oh-my-pi/pi-coding-agent - Versions diffs - 14.5.10 → 14.5.12 - Mend

@oh-my-pi/pi-coding-agent 14.5.10 → 14.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CHANGELOG.md +42 -0
package/package.json +7 -7
package/src/export/html/template.generated.ts +1 -1
package/src/export/html/template.js +29 -9
package/src/internal-urls/docs-index.generated.ts +54 -54
package/src/ipy/gateway-coordinator.ts +2 -1
package/src/modes/controllers/todo-command-controller.ts +22 -74
package/src/modes/interactive-mode.ts +9 -6
package/src/modes/types.ts +0 -2
package/src/prompts/system/eager-todo.md +1 -1
package/src/prompts/tools/atom.md +3 -2
package/src/prompts/tools/browser.md +61 -16
package/src/prompts/tools/todo-write.md +19 -19
package/src/session/agent-session.ts +23 -29
package/src/tools/browser/attach.ts +175 -0
package/src/tools/browser/launch.ts +554 -0
package/src/tools/browser/readable.ts +90 -0
package/src/tools/browser/registry.ts +417 -0
package/src/tools/browser/render.ts +212 -0
package/src/tools/browser/vm.ts +792 -0
package/src/tools/browser.ts +249 -1568
package/src/tools/plan-mode-guard.ts +27 -1
package/src/tools/renderers.ts +2 -0
package/src/tools/todo-write.ts +157 -195
package/examples/custom-tools/todo/index.ts +0 -211
package/examples/extensions/todo.ts +0 -295

package/src/ipy/gateway-coordinator.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import * as fs from "node:fs";
 import { createServer } from "node:net";
 import * as path from "node:path";
+import { Process } from "@oh-my-pi/pi-natives";
 import { getAgentDir, isEnoent, logger, procmgr } from "@oh-my-pi/pi-utils";
 import type { Subprocess } from "bun";
 import { Settings } from "../config/settings";
@@ -300,7 +301,7 @@ async function startGatewayProcess(
 async function killGateway(pid: number, context: string): Promise<void> {
 	try {
-		await procmgr.terminate({ target: pid });
+		await Process.fromPid(pid)?.terminate();
 	} catch (err) {
 		logger.warn("Failed to kill shared gateway process", {
 			error: err instanceof Error ? err.message : String(err),

package/src/modes/controllers/todo-command-controller.ts CHANGED Viewed

@@ -21,7 +21,7 @@ const USAGE = [
 	"  /todo export <path>                Write todos as Markdown to <path>",
 	"  /todo import <path>                Replace todos from Markdown at <path>",
 	"  /todo append [<phase>] <task...>   Append a task; phase fuzzy-matched or auto-created",
-	"  /todo start  <task>                Mark task in_progress (id or fuzzy content)",
+	"  /todo start  <task>                Mark task in_progress (fuzzy content match)",
 	"  /todo done   [<task|phase>]        Mark task/phase/all completed",
 	"  /todo drop   [<task|phase>]        Mark task/phase/all abandoned",
 	"  /todo rm     [<task|phase>]        Remove task/phase/all",
@@ -59,44 +59,9 @@ function tokenize(input: string): string[] {
 }
 // =============================================================================
-// Roman numerals + name normalization
+// Name normalization
 // =============================================================================
-const ROMAN_PAIRS: Array<[number, string]> = [
-	[1000, "M"],
-	[900, "CM"],
-	[500, "D"],
-	[400, "CD"],
-	[100, "C"],
-	[90, "XC"],
-	[50, "L"],
-	[40, "XL"],
-	[10, "X"],
-	[9, "IX"],
-	[5, "V"],
-	[4, "IV"],
-	[1, "I"],
-];
-function toRoman(n: number): string {
-	if (n <= 0) return "I";
-	let out = "";
-	let rem = n;
-	for (const [value, sym] of ROMAN_PAIRS) {
-		while (rem >= value) {
-			out += sym;
-			rem -= value;
-		}
-	}
-	return out;
-}
-const PHASE_PREFIX_RE = /^([IVXLCDM]+|[A-Z]|\d+)\.\s*/i;
-function stripPrefix(name: string): string {
-	return name.replace(PHASE_PREFIX_RE, "").trim();
-}
 function titleCase(s: string): string {
 	return s
 		.split(/\s+/)
@@ -105,13 +70,6 @@ function titleCase(s: string): string {
 		.join(" ");
 }
-function buildPhaseName(rawName: string, existingPhases: TodoPhase[]): string {
-	const stripped = stripPrefix(rawName.trim());
-	if (!stripped) return `${toRoman(existingPhases.length + 1)}. Todos`;
-	const titled = titleCase(stripped);
-	return `${toRoman(existingPhases.length + 1)}. ${titled}`;
-}
 // =============================================================================
 // Fuzzy matching
 // =============================================================================
@@ -119,20 +77,13 @@ function buildPhaseName(rawName: string, existingPhases: TodoPhase[]): string {
 function findPhaseFuzzy(phases: TodoPhase[], query: string): TodoPhase | undefined {
 	const q = query.trim().toLowerCase();
 	if (!q) return undefined;
-	// Exact id
-	const byId = phases.find(p => p.id.toLowerCase() === q);
-	if (byId) return byId;
 	// Exact name (case-insensitive)
 	const byName = phases.find(p => p.name.toLowerCase() === q);
 	if (byName) return byName;
-	// Stripped name match
-	const strippedQ = stripPrefix(q);
-	const byStripped = phases.find(p => stripPrefix(p.name).toLowerCase() === strippedQ);
-	if (byStripped) return byStripped;
-	// Substring (prefer prefix match on stripped name)
-	const prefixMatches = phases.filter(p => stripPrefix(p.name).toLowerCase().startsWith(strippedQ));
+	// Substring (prefer prefix match)
+	const prefixMatches = phases.filter(p => p.name.toLowerCase().startsWith(q));
 	if (prefixMatches.length === 1) return prefixMatches[0];
-	const subMatches = phases.filter(p => stripPrefix(p.name).toLowerCase().includes(strippedQ));
+	const subMatches = phases.filter(p => p.name.toLowerCase().includes(q));
 	if (subMatches.length === 1) return subMatches[0];
 	return undefined;
 }
@@ -140,9 +91,10 @@ function findPhaseFuzzy(phases: TodoPhase[], query: string): TodoPhase | undefin
 function findTaskFuzzy(phases: TodoPhase[], query: string): { task: TodoItem; phase: TodoPhase } | undefined {
 	const q = query.trim().toLowerCase();
 	if (!q) return undefined;
+	// Exact content (case-insensitive)
 	for (const phase of phases) {
 		for (const task of phase.tasks) {
-			if (task.id.toLowerCase() === q) return { task, phase };
+			if (task.content.toLowerCase() === q) return { task, phase };
 		}
 	}
 	const matches: Array<{ task: TodoItem; phase: TodoPhase }> = [];
@@ -169,7 +121,7 @@ function buildSystemReminder(action: string, phases: TodoPhase[]): string {
 	return [
 		"<system-reminder>",
 		`The user manually modified the todo list (${action}).`,
-		"Current todo list (note task ids may have been reassigned by /todo edit):",
+		"Current todo list:",
 		"",
 		md,
 		"</system-reminder>",
@@ -327,28 +279,24 @@ export class TodoCommandController {
 		if (phaseName) {
 			targetPhase = findPhaseFuzzy(next, phaseName);
 			if (!targetPhase) {
-				const newName = buildPhaseName(phaseName, next);
-				targetPhase = { id: `phase-${next.length + 1}`, name: newName, tasks: [] };
+				targetPhase = { name: titleCase(phaseName), tasks: [] };
 				next.push(targetPhase);
 			}
 		} else if (next.length > 0) {
 			targetPhase = next[next.length - 1];
 		} else {
-			targetPhase = { id: "phase-1", name: `${toRoman(1)}. Todos`, tasks: [] };
+			targetPhase = { name: "Todos", tasks: [] };
 			next.push(targetPhase);
 		}
-		const usedTaskIds = new Set(next.flatMap(p => p.tasks.map(t => t.id)));
-		let n = 1;
-		while (usedTaskIds.has(`task-${n}`)) n++;
+		const finalContent = titleCaseSentence(content);
 		targetPhase.tasks.push({
-			id: `task-${n}`,
-			content: titleCaseSentence(content),
+			content: finalContent,
 			status: "pending",
 		});
 		this.#commit(next, `/todo append → ${targetPhase.name}`);
-		this.ctx.showStatus(`Appended to ${targetPhase.name}: ${content}`);
+		this.ctx.showStatus(`Appended to ${targetPhase.name}: ${finalContent}`);
 	}
 	// ------------------------------------------------------------- start / done / drop / rm
@@ -364,12 +312,12 @@ export class TodoCommandController {
 			this.ctx.showError(`No task matched "${rest}". Use /todo to list current tasks.`);
 			return;
 		}
-		const { phases, errors } = applyOpsToPhases(current, [{ op: "start", task: hit.task.id }]);
+		const { phases, errors } = applyOpsToPhases(current, [{ op: "start", task: hit.task.content }]);
 		if (errors.length > 0) {
 			this.ctx.showError(errors.join("; "));
 			return;
 		}
-		this.#commit(phases, `/todo start ${hit.task.id}`);
+		this.#commit(phases, `/todo start ${hit.task.content}`);
 		this.ctx.showStatus(`Started: ${hit.task.content}`);
 	}
@@ -391,19 +339,19 @@ export class TodoCommandController {
 		const taskHit = findTaskFuzzy(current, trimmed);
 		if (taskHit) {
-			const { phases, errors } = applyOpsToPhases(current, [{ op, task: taskHit.task.id }]);
+			const { phases, errors } = applyOpsToPhases(current, [{ op, task: taskHit.task.content }]);
 			if (errors.length > 0) {
 				this.ctx.showError(errors.join("; "));
 				return;
 			}
-			this.#commit(phases, `/todo ${op} ${taskHit.task.id}`);
+			this.#commit(phases, `/todo ${op} ${taskHit.task.content}`);
 			this.ctx.showStatus(`Marked ${target}: ${taskHit.task.content}`);
 			return;
 		}
 		const phaseHit = findPhaseFuzzy(current, trimmed);
 		if (phaseHit) {
-			const { phases, errors } = applyOpsToPhases(current, [{ op, phase: phaseHit.id }]);
+			const { phases, errors } = applyOpsToPhases(current, [{ op, phase: phaseHit.name }]);
 			if (errors.length > 0) {
 				this.ctx.showError(errors.join("; "));
 				return;
@@ -426,18 +374,18 @@ export class TodoCommandController {
 		}
 		const taskHit = findTaskFuzzy(current, trimmed);
 		if (taskHit) {
-			const { phases, errors } = applyOpsToPhases(current, [{ op: "rm", task: taskHit.task.id }]);
+			const { phases, errors } = applyOpsToPhases(current, [{ op: "rm", task: taskHit.task.content }]);
 			if (errors.length > 0) {
 				this.ctx.showError(errors.join("; "));
 				return;
 			}
-			this.#commit(phases, `/todo rm ${taskHit.task.id}`);
+			this.#commit(phases, `/todo rm ${taskHit.task.content}`);
 			this.ctx.showStatus(`Removed: ${taskHit.task.content}`);
 			return;
 		}
 		const phaseHit = findPhaseFuzzy(current, trimmed);
 		if (phaseHit) {
-			const { phases, errors } = applyOpsToPhases(current, [{ op: "rm", phase: phaseHit.id }]);
+			const { phases, errors } = applyOpsToPhases(current, [{ op: "rm", phase: phaseHit.name }]);
 			if (errors.length > 0) {
 				this.ctx.showError(errors.join("; "));
 				return;
@@ -460,7 +408,7 @@ export class TodoCommandController {
 		const current = this.#currentPhases();
 		const initialMarkdown =
-			current.length > 0 ? phasesToMarkdown(current) : "# I. Todos\n- [ ] (replace this with your tasks)\n";
+			current.length > 0 ? phasesToMarkdown(current) : "# Todos\n- [ ] (replace this with your tasks)\n";
 		const fileHandle = await this.#openTtyHandle();
 		this.ctx.ui.stop();

package/src/modes/interactive-mode.ts CHANGED Viewed

@@ -48,6 +48,7 @@ import { getRecentSessions } from "../session/session-manager";
 import { STTController, type SttState } from "../stt";
 import type { ExitPlanModeDetails, LspStartupServerInfo } from "../tools";
 import { normalizeLocalScheme } from "../tools/path-utils";
+import { formatPhaseDisplayName } from "../tools/todo-write";
 import type { EventBus } from "../utils/event-bus";
 import { getEditorCommand, openInEditor } from "../utils/external-editor";
 import { getSessionAccentAnsi, getSessionAccentHexForTitle } from "../utils/session-color";
@@ -707,9 +708,12 @@ export class InteractiveMode implements InteractiveModeContext {
 		const lines = ["", indent + theme.bold(theme.fg("accent", "Todos"))];
 		if (!this.todoExpanded) {
-			const activePhase = this.#getActivePhase(phases);
+			const activeIdx = phases.indexOf(this.#getActivePhase(phases) ?? phases[0]);
+			const activePhase = phases[activeIdx];
 			if (!activePhase) return;
-			lines.push(`${indent}${theme.fg("accent", `${hook} ${activePhase.name}`)}`);
+			lines.push(
+				`${indent}${theme.fg("accent", `${hook} ${formatPhaseDisplayName(activePhase.name, activeIdx + 1)}`)}`,
+			);
 			const visibleTasks = activePhase.tasks.slice(0, 5);
 			visibleTasks.forEach((todo, index) => {
 				const prefix = `${indent}${index === 0 ? hook : " "} `;
@@ -723,13 +727,13 @@ export class InteractiveMode implements InteractiveModeContext {
 			return;
 		}
-		for (const phase of phases) {
-			lines.push(`${indent}${theme.fg("accent", `${hook} ${phase.name}`)}`);
+		phases.forEach((phase, phaseIndex) => {
+			lines.push(`${indent}${theme.fg("accent", `${hook} ${formatPhaseDisplayName(phase.name, phaseIndex + 1)}`)}`);
 			phase.tasks.forEach((todo, index) => {
 				const prefix = `${indent}${index === 0 ? hook : " "} `;
 				lines.push(this.#formatTodoLine(todo, prefix));
 			});
-		}
+		});
 		this.todoContainer.addChild(new Text(lines.join("\n"), 1, 0));
 	}
@@ -1712,7 +1716,6 @@ export class InteractiveMode implements InteractiveModeContext {
 		} else {
 			this.todoPhases = [
 				{
-					id: "default",
 					name: "Todos",
 					tasks: todos as TodoItem[],
 				},

package/src/modes/types.ts CHANGED Viewed

@@ -42,7 +42,6 @@ export type SubmittedUserInput = {
 export type TodoStatus = "pending" | "in_progress" | "completed" | "abandoned";
 export type TodoItem = {
-	id: string;
 	content: string;
 	status: TodoStatus;
 	details?: string;
@@ -50,7 +49,6 @@ export type TodoItem = {
 };
 export type TodoPhase = {
-	id: string;
 	name: string;
 	tasks: TodoItem[];
 };

package/src/prompts/system/eager-todo.md CHANGED Viewed

@@ -2,7 +2,7 @@
 Before doing substantive work on the upcoming user request, create a comprehensive phased todo first.
 You **MUST** call `todo_write` first in this turn.
-You **MUST** initialize the todo list with a single `replace` op.
+You **MUST** initialize the todo list with a single `init` op.
 You **MUST** cover the entire request from investigation through implementation and verification — not just the next immediate step.
 You **MUST** make task descriptions specific enough that a future turn can execute them without re-planning.
 You **MUST** keep task `content` to a short label (5-10 words). Put file paths, implementation steps, and specifics in `details`.

package/src/prompts/tools/atom.md CHANGED Viewed

@@ -36,7 +36,7 @@ Lid=       blank the anchored line's content but KEEP the line (results in an em
 - To insert ABOVE a line, you **MUST** use `^Lid` then `+TEXT`. To insert above line 1, you **MUST** use `^` (BOF) then `+TEXT`. To insert below a line, you **MUST** use `@Lid` then `+TEXT`.
 - Multiple `---PATH` sections **MAY** appear in one input; each section is applied in order.
 - `!rm` / `!mv DEST` **MUST NOT** be combined with line edits in the same section.
-- Lids contain a content hash. If a line has changed since you read it, the tool rejects the edit and shows the current content; you **MUST** re-read and retry with fresh Lids. Small drift (≤5 lines) where the original hash still matches a nearby line auto-rebases with a warning. Larger shifts may show a hash-only candidate, but two-letter hashes collide; verify surrounding content or re-read before using it.
+- Lids contain a content hash. If a line has changed since you read it, the tool rejects the edit and shows the current content; you **MUST** re-read and retry with fresh Lids.
 - After `+TEXT` (or `+`) the cursor advances past the inserted line, so consecutive `+TEXT` ops stack in order. After `Lid=TEXT` the cursor sits on the modified anchor; after `-Lid` it sits on the slot the deleted line vacated. You **MUST** use a fresh `@Lid` / `^Lid` / `^` / `$` to reposition.
 - The tool is syntax-blind: it will not check brackets, indentation, table column counts, or fence integrity. You **MUST** verify indentation-sensitive or structured files after editing (Python, Markdown tables/fences).
 - A section whose PATH does not yet exist creates the file from your `+TEXT` lines (use `^` or `$` then `+TEXT…`). No separate "create file" op is needed.
@@ -83,7 +83,7 @@ Lid=       blank the anchored line's content but KEEP the line (results in an em
 \	return (name || DEF).trim().toUpperCase();
 \}
-# Replace a block with a longer multi-line block, including blank lines (canonical form for refactors)
+# Replace one contiguous block when the existing lines themselves change; the replacement may have more/fewer lines than the selected range
 ---a.ts
 {{hrefr 3}}..{{hrefr 6}}=/** Format a display label, falling back to DEF when empty. */
 \export function label(name: string): string {
@@ -139,6 +139,7 @@ $
 - Current/added preview lines include fresh `LINE+hash|content` anchors. Removed preview lines show deleted content and **MUST NOT** be reused as anchors.
 - You **MUST** emit only lines that change. You **MUST NOT** echo unchanged context; the anchor implies position.
 - You **MUST NOT** write `Lid=<sameTextThatIsAlreadyOnThatLine>`; the tool reports a no-op (no change applied). Emit `Lid=TEXT` only when TEXT differs.
+- You **MUST NOT** use `Lid=<originalLineContent>` + `\continuations` as an "insert after" idiom. That form is a *replacement*: its first line lands at the anchor, and its continuations push the original next line down. When the anchor is a closing brace and your continuations also end in `}`, the original line below — often itself `}` (a sibling block, mod, or impl closer) — sits adjacent to yours and you ship a duplicate `}`. For pure insertion, use `@Lid` + `+TEXT…` (after) or `^Lid` + `+TEXT…` (before). Never re-state the anchor's content as the first line of a replacement.
 - A line of the form `Lid|content` (a Lid, then `|`, then text, with NO leading `+`/`-`/`^`/`@`/`\`/`=`/`..`) is **FORBIDDEN**. That shape only appears in `read`/`grep` output as an anchor for *you*; it is never an edit op. If you copy a `Lid|content` line verbatim from a read into a patch, you have made an error — every edit op must start with `+`, `-`, `^`, `@`, `\`, `$`, `!`, or a Lid immediately followed by `=` or `..`.
 - To replace a contiguous block with new content, the canonical form is `LidA..LidB=FIRST_LINE` + `\NEXT_LINE…`. You **MUST NOT** write the old block and then the new block — that is unified-diff thinking and the tool does not understand it. If you find yourself emitting pre-image lines (with or without operators) before your new content, STOP and rewrite the section as a single range-replace.
 - TEXT after `=`, `+`, or `\` includes leading whitespace verbatim. You **MUST NOT** trim or re-indent it.

package/src/prompts/tools/browser.md CHANGED Viewed

@@ -1,25 +1,70 @@
-Navigates, clicks, types, scrolls, drags, queries DOM content, and captures screenshots.
+Drives a real Chromium tab with full puppeteer access via JS execution.
 <instruction>
-- For fetching static web content (articles, docs, issues/PRs, JSON, PDFs, feeds), prefer the `read` tool with a URL — it returns clean reader-mode text without spinning up a browser. Use this tool only when you need JS execution, authentication, or interactive actions.
-- `"open"` starts a headless session (or implicitly on first action); `"goto"` navigates to `url`; `"close"` releases the browser
-- `"observe"` captures a numbered accessibility snapshot — prefer `click_id`/`type_id`/`fill_id` using returned `element_id` values; flags: `include_all`, `viewport_only`
-- `"click"`, `"type"`, `"fill"`, `"press"`, `"scroll"`, `"drag"` for selector-based interactions — prefer ARIA/text selectors (`p-aria/[name="Sign in"]`, `p-text/Continue`) over brittle CSS
-- `"click_id"`, `"type_id"`, `"fill_id"` to interact with observed elements without selectors
-- `"wait_for_selector"` before interacting when the page is dynamic
-- `"evaluate"` runs a JS expression in page context
-- `"get_text"`, `"get_html"`, `"get_attribute"` for DOM queries — batch via `args: [{ selector, attribute? }]`
-- `"extract_readable"` returns reader-mode content; `format`: `"markdown"` (default) or `"text"`
-- `"screenshot"` captures images (optionally with `selector`); can save to disk via `path`
+- For fetching static web content (articles, docs, issues/PRs, JSON, PDFs, feeds), prefer the `read` tool with a URL — reader-mode text without spinning up a browser. Use this tool when you need JS execution, authentication, or interactive actions.
+- Three actions only:
+  - `open` — acquire (or reuse) a named tab. `name` defaults to `"main"`. Optional `url` navigates after the tab is ready. Optional `viewport` sets dimensions. Optional `dialogs: "accept" | "dismiss"` auto-handles `alert`/`confirm`/`beforeunload` so navigation/clicks don't hang (default: leave dialogs unhandled — page hangs until caller wires `page.on('dialog', …)`).
+  - `close` — release a tab by `name`, or every tab with `all: true`. For spawned-app browsers, set `kill: true` to terminate the process tree (default leaves it running).
+  - `run` — execute JS against an existing tab. The `code` is the body of an async function with `page`, `browser`, `tab`, `display`, `assert`, `wait` in scope. The function's return value is JSON-stringified into the tool result; multiple `display(value)` calls accumulate text/images.
+- Tabs survive across `run` calls and across in-process subagents. Open once, reuse many times.
+- Browser kinds, selected by the `app` field on `open`:
+  - default (no `app`) → headless Chromium with stealth patches.
+  - `app.path` → spawn an absolute binary (Electron/CDP). If a running instance already exposes a CDP port, it is reused; otherwise stale instances are killed and a fresh one is spawned. No stealth patches — never tamper with a real desktop app.
+  - `app.cdp_url` → connect to an existing CDP endpoint (e.g. `http://127.0.0.1:9222`).
+  - `app.target` (with `path`/`cdp_url`) — substring matched against url+title to pick a BrowserWindow when the app exposes several.
+- Inside `run`, `tab` exposes high-level helpers; reach for `page` (raw puppeteer Page) when you need anything they don't cover. Available helpers:
+  - `tab.goto(url, { waitUntil? })` — clears the element cache and navigates.
+  - `tab.observe({ includeAll?, viewportOnly? })` — accessibility snapshot. Returns `{ url, title, viewport, scroll, elements: [{ id, role, name, value, states, … }] }`. Element ids are stable until the next observe/goto.
+  - `tab.id(n)` — resolves an element id from the most recent observe to a real `ElementHandle` you can `.click()`, `.type()`, etc.
+  - `tab.click(selector)` / `tab.type(selector, text)` / `tab.fill(selector, value)` / `tab.press(key, { selector? })` / `tab.scroll(dx, dy)` — selector-based actions.
+  - `tab.waitFor(selector)` — waits until the selector is attached, returns the resolved `ElementHandle` for chaining (e.g. `const btn = await tab.waitFor('text/Submit'); await btn.click();`).
+  - `tab.drag(from, to)` — drag from one point to another. Each endpoint is either a selector string (drag center-to-center) or a `{ x, y }` viewport-coordinate point (e.g. for canvases, sliders).
+  - `tab.scrollIntoView(selector)` — scroll the matching element to the center of the viewport (use before clicking off-screen elements).
+  - `tab.select(selector, …values)` — set the selected option(s) on a `<select>`. Returns the values that ended up selected. `tab.fill` does **NOT** work for selects.
+  - `tab.uploadFile(selector, …filePaths)` — attach files to an `<input type="file">`. Paths resolve relative to cwd.
+  - `tab.waitForUrl(pattern, { timeout? })` — pattern is a substring or `RegExp`. Polls `location.href` so it works for SPA pushState navigations, not just real navigations. Returns the matched URL.
+  - `tab.waitForResponse(pattern, { timeout? })` — pattern is a substring, `RegExp`, or `(response) => boolean`. Returns the raw puppeteer `HTTPResponse` (call `.text()` / `.json()` / `.status()` / `.headers()` on it).
+  - `tab.evaluate(fn, …args)` — sugar for `page.evaluate` with the abort signal already wired. Use this instead of dropping to `page.evaluate` for ad-hoc DOM reads.
+  - `tab.screenshot({ selector?, fullPage?, save?, silent? })` — auto-attaches the image to the tool output unless `silent: true`. Saves full-res to `save` (or `browser.screenshotDir` setting) and a downscaled copy to the model.
+  - `tab.extract(format = "markdown")` — Readability-extracted page content.
+- Selectors accept CSS as well as puppeteer query handlers: `aria/Sign in`, `text/Continue`, `xpath/…`, `pierce/…`. Playwright-style `p-aria/[name="…"]`, `p-text/…`, etc. are normalized.
+- Default to `tab.observe()` over `tab.screenshot()` for understanding page state. Screenshot only when visual appearance matters.
 </instruction>
 <critical>
-**You **MUST** default to `observe`, not `screenshot`.**
-- `observe` is cheaper, faster, and returns structured data — use it to understand page state, find elements, and plan interactions.
-- You **SHOULD** only use `screenshot` when visual appearance matters (verifying layout, debugging CSS, capturing a visual artifact for the user).
-- You **MUST NOT** screenshot just to "see what's on the page" — `observe` gives you that with element IDs you can act on immediately.
+- You **MUST** call `open` before `run`. `run` does not implicitly create a tab.
+- You **MUST NOT** screenshot just to "see what's on the page" — `tab.observe()` returns structured data with element ids you can act on immediately.
+- After a `tab.goto()` or any navigation, prior element ids from `tab.observe()` are invalidated. Re-observe before referencing them.
+- `code` runs with full Node access. Treat it as your code, not sandboxed code.
 </critical>
+<examples>
+# Open a tab and read structured page data
+`{"action":"open","name":"docs","url":"https://example.com"}`
+`{"action":"run","name":"docs","code":"const obs = await tab.observe(); display(obs); return obs.elements.length;"}`
+# Click an observed element by id
+`{"action":"run","name":"docs","code":"const obs = await tab.observe(); const link = obs.elements.find(e => e.role === 'link' && e.name === 'Sign in'); assert(link, 'Sign in link missing'); await (await tab.id(link.id)).click();"}`
+# Save a full-page screenshot to disk
+`{"action":"run","name":"docs","code":"await tab.screenshot({ fullPage: true, save: 'screenshot.png' });"}`
+# Fill and submit a form via selectors
+`{"action":"run","name":"docs","code":"await tab.fill('input[name=email]', 'me@example.com'); await tab.click('text/Continue');"}`
+# Attach to an existing Electron app
+`{"action":"open","name":"cursor","app":{"path":"/Applications/Cursor.app/Contents/MacOS/Cursor"}}`
+# Close one tab (browser stays alive if other tabs reference it)
+`{"action":"close","name":"docs"}`
+# Close every tab; leave spawned apps running
+`{"action":"close","all":true}`
+# Close every tab and kill spawned-app processes too
+`{"action":"close","all":true,"kill":true}`
+</examples>
 <output>
-Text for navigation/DOM queries, images for screenshots.
+Per call: any `display(value)` outputs (text/images) followed by the JSON-stringified return value of the `code` function. `run` always produces at least a status line.
 </output>

package/src/prompts/tools/todo-write.md CHANGED Viewed

@@ -5,23 +5,23 @@ The next pending task is auto-promoted to `in_progress` after each completion.
 |`op`|Required fields|Effect|
 |---|---|---|
-|`replace`|`phases`|Replace the full list (initial setup, full restructure)|
-|`start`|`task`|Set task to `in_progress`|
-|`done`|`task` or `phase` (or neither = all)|Mark completed|
-|`drop`|`task` or `phase` (or neither = all)|Mark abandoned|
-|`rm`|`task` or `phase` (or neither = all)|Remove|
-|`append`|`phase`, `items: {id, label}[]`|Append tasks; creates phase if missing|
-|`note`|`task`, `text`|Append a note to `task.notes`. Only use to leave reminders for future-you.|
+|`init`|`list`|Initialize the full list|
+|`start`|`task`|Mark in progress|
+|`done`|`task` or `phase`|Mark completed|
+|`drop`|`task` or `phase`|Mark abandoned|
+|`rm`|`task` or `phase`|Remove|
+|`append`|`phase`, `items: string[]`|Append tasks; lazily creates phase|
+|`note`|`task`, `text`|Append a note to a task. Reminders for future-you only.|
 ## Anatomy
-- **Task `label`**: 5–10 words, what is being done, not how.
-- **Phase `name`**: short noun phrase prefixed with a roman numeral — `I. Foundation`, `II. Auth`, `III. Verification`. Single-phase plans still use `I.`. Never use snake_case, arabic numerals, or letter prefixes.
+- **Task content**: 5–10 words, what is being done, not how. Used as the task identifier — unique.
+- **Phase name**: short noun phrase (e.g. `Foundation`, `Auth`, `Verification`). Used as the phase identifier — unique. Do not add prefixes like `1.`, `A)`, `Phase 1:`, etc.
 ## Rules
-- Mark tasks done immediately after finishing — never defer.
+- Mark tasks done immediately after finishing.
 - Complete phases in order.
-- On blockers, `append` a new task to the active phase.
-- Keep ids stable once introduced.
+- On blockers, `append` a new task to the active phase to unblock yourself, or `drop`.
+- `task` and `phase` fields reference content/name verbatim; keep them stable once introduced.
 ## When to create a list
 - Task requires 3+ distinct steps
@@ -31,17 +31,17 @@ The next pending task is auto-promoted to `in_progress` after each completion.
 <examples>
 # Initial setup (multi-phase)
-`{"ops":[{"op":"replace","phases":[{"name":"I. Foundation","tasks":[{"content":"Scaffold crate"},{"content":"Wire workspace"}]},{"name":"II. Auth","tasks":[{"content":"Port credential store"},{"content":"Wire OAuth providers"}]},{"name":"III. Verification","tasks":[{"content":"Run cargo test"}]}]}]}`
-# Initial setup (single phase — still prefixed)
-`{"ops":[{"op":"replace","phases":[{"name":"I. Implementation","tasks":[{"content":"Apply fix"},{"content":"Run tests"}]}]}]}`
+`{"ops":[{"op":"init","list":[{"phase":"Foundation","items":["Scaffold crate","Wire workspace"]},{"phase":"Auth","items":["Port credential store","Wire OAuth providers"]},{"phase":"Verification","items":["Run cargo test"]}]}]}`
+# Initial setup (single phase)
+`{"ops":[{"op":"init","list":[{"phase":"Implementation","items":["Apply fix","Run tests"]}]}]}`
 # Complete one task
-`{"ops":[{"op":"done","task":"task-2"}]}`
+`{"ops":[{"op":"done","task":"Wire workspace"}]}`
 # Complete a whole phase
-`{"ops":[{"op":"done","phase":"II. Auth"}]}`
+`{"ops":[{"op":"done","phase":"Auth"}]}`
 # Remove all tasks
 `{"ops":[{"op":"rm"}]}`
 # Drop one task
-`{"ops":[{"op":"drop","task":"task-7"}]}`
+`{"ops":[{"op":"drop","task":"Run cargo test"}]}`
 # Append tasks to a phase
-`{"ops":[{"op":"append","phase":"II. Auth","items":[{"id":"task-8","label":"Handle retries"},{"id":"task-9","label":"Run tests"}]}]}`
+`{"ops":[{"op":"append","phase":"Auth","items":["Handle retries","Run tests"]}]}`
 </examples>

package/src/session/agent-session.ts CHANGED Viewed

@@ -52,16 +52,8 @@ import {
 	parseRateLimitReason,
 	streamSimple,
 } from "@oh-my-pi/pi-ai";
-import { killTree, MacOSPowerAssertion } from "@oh-my-pi/pi-natives";
-import {
-	abortableSleep,
-	getAgentDbPath,
-	isEnoent,
-	logger,
-	prompt,
-	Snowflake,
-	setNativeKillTree,
-} from "@oh-my-pi/pi-utils";
+import { MacOSPowerAssertion } from "@oh-my-pi/pi-natives";
+import { abortableSleep, getAgentDbPath, isEnoent, logger, prompt, Snowflake } from "@oh-my-pi/pi-utils";
 import type { AsyncJob, AsyncJobManager } from "../async";
 import type { Rule } from "../capability/rule";
 import { MODEL_ROLE_IDS, type ModelRegistry } from "../config/model-registry";
@@ -387,6 +379,11 @@ function formatRetryFallbackBaseSelector(selector: RetryFallbackSelector): strin
 	return `${selector.provider}/${selector.id}`;
 }
+/** Composite key for auto-clear timers, keyed by phase name + task content. */
+function todoClearKey(phaseName: string, taskContent: string): string {
+	return `${phaseName}\u0000${taskContent}`;
+}
 const noOpUIContext: ExtensionUIContext = {
 	select: async (_title, _options, _dialogOptions) => undefined,
 	confirm: async (_title, _message, _dialogOptions) => false,
@@ -575,8 +572,6 @@ export class AgentSession {
 	}
 	constructor(config: AgentSessionConfig) {
-		setNativeKillTree(killTree);
 		this.agent = config.agent;
 		this.sessionManager = config.sessionManager;
 		this.settings = config.settings;
@@ -3347,10 +3342,9 @@ export class AgentSession {
 	#cloneTodoPhases(phases: TodoPhase[]): TodoPhase[] {
 		return phases.map(phase => ({
-			id: phase.id,
 			name: phase.name,
 			tasks: phase.tasks.map(task => {
-				const out: TodoItem = { id: task.id, content: task.content, status: task.status };
+				const out: TodoItem = { content: task.content, status: task.status };
 				if (task.notes && task.notes.length > 0) out.notes = [...task.notes];
 				return out;
 			}),
@@ -3362,43 +3356,43 @@ export class AgentSession {
 		const delaySec = this.settings.get("tasks.todoClearDelay") ?? 60;
 		if (delaySec < 0) return; // "Never" — no auto-clear
 		const delayMs = delaySec * 1000;
-		const doneTaskIds = new Set<string>();
+		const doneKeys = new Set<string>();
 		for (const phase of phases) {
 			for (const task of phase.tasks) {
 				if (task.status === "completed" || task.status === "abandoned") {
-					doneTaskIds.add(task.id);
+					doneKeys.add(todoClearKey(phase.name, task.content));
 				}
 			}
 		}
 		// Cancel timers for tasks that are no longer done (e.g. status was reverted)
-		for (const [id, timer] of this.#todoClearTimers) {
-			if (!doneTaskIds.has(id)) {
+		for (const [key, timer] of this.#todoClearTimers) {
+			if (!doneKeys.has(key)) {
 				clearTimeout(timer);
-				this.#todoClearTimers.delete(id);
+				this.#todoClearTimers.delete(key);
 			}
 		}
 		// Schedule new timers for newly-done tasks
-		for (const id of doneTaskIds) {
-			if (this.#todoClearTimers.has(id)) continue;
+		for (const key of doneKeys) {
+			if (this.#todoClearTimers.has(key)) continue;
 			if (delayMs === 0) {
 				// Instant — run synchronously on next microtask to batch removals
-				const timer = setTimeout(() => this.#runTodoAutoClear(id), 0);
-				this.#todoClearTimers.set(id, timer);
+				const timer = setTimeout(() => this.#runTodoAutoClear(key), 0);
+				this.#todoClearTimers.set(key, timer);
 			} else {
-				const timer = setTimeout(() => this.#runTodoAutoClear(id), delayMs);
-				this.#todoClearTimers.set(id, timer);
+				const timer = setTimeout(() => this.#runTodoAutoClear(key), delayMs);
+				this.#todoClearTimers.set(key, timer);
 			}
 		}
 	}
 	/** Remove a single completed task and notify the UI. */
-	#runTodoAutoClear(taskId: string): void {
-		this.#todoClearTimers.delete(taskId);
+	#runTodoAutoClear(key: string): void {
+		this.#todoClearTimers.delete(key);
 		let removed = false;
 		for (const phase of this.#todoPhases) {
-			const idx = phase.tasks.findIndex(t => t.id === taskId);
+			const idx = phase.tasks.findIndex(t => todoClearKey(phase.name, t.content) === key);
 			if (idx !== -1 && (phase.tasks[idx].status === "completed" || phase.tasks[idx].status === "abandoned")) {
 				phase.tasks.splice(idx, 1);
 				removed = true;
@@ -4568,7 +4562,7 @@ export class AgentSession {
 						(task): task is TodoItem & { status: "pending" | "in_progress" } =>
 							task.status === "pending" || task.status === "in_progress",
 					)
-					.map(task => ({ id: task.id, content: task.content, status: task.status })),
+					.map(task => ({ content: task.content, status: task.status })),
 			}))
 			.filter(phase => phase.tasks.length > 0);
 		const incomplete = incompleteByPhase.flatMap(phase => phase.tasks);