npm - pi-cursor-sdk - Versions diffs - 0.1.19 → 0.1.21 - Mend

pi-cursor-sdk 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/CHANGELOG.md +52 -0
package/README.md +72 -11
package/docs/cursor-dogfood-checklist.md +57 -0
package/docs/cursor-live-smoke-checklist.md +116 -10
package/docs/cursor-model-ux-spec.md +60 -19
package/docs/cursor-native-tool-replay.md +21 -11
package/docs/cursor-native-tool-visual-audit.md +104 -59
package/docs/cursor-testing-lessons.md +10 -5
package/docs/cursor-tool-surfaces.md +69 -0
package/package.json +37 -11
package/scripts/debug-provider-events.d.mts +59 -0
package/scripts/debug-provider-events.mjs +70 -175
package/scripts/debug-sdk-events.d.mts +90 -0
package/scripts/debug-sdk-events.mjs +36 -98
package/scripts/fixtures/plan-strip-shim/index.ts +12 -0
package/scripts/isolated-cursor-smoke.sh +264 -102
package/scripts/lib/cursor-child-process.d.mts +10 -0
package/scripts/lib/cursor-child-process.mjs +50 -0
package/scripts/lib/cursor-cli-args.d.mts +63 -0
package/scripts/lib/cursor-cli-args.mjs +129 -0
package/scripts/lib/cursor-script-fail.d.mts +1 -0
package/scripts/lib/cursor-script-fail.mjs +13 -0
package/scripts/lib/cursor-sdk-output-filter.d.mts +5 -0
package/scripts/lib/cursor-smoke-env.d.mts +38 -0
package/scripts/lib/cursor-smoke-env.mjs +81 -0
package/scripts/lib/cursor-smoke-shell.sh +174 -0
package/scripts/lib/cursor-visual-render.d.mts +15 -0
package/scripts/lib/cursor-visual-render.mjs +131 -0
package/scripts/probe-mcp-coldstart.mjs +226 -0
package/scripts/refresh-cursor-model-snapshots.mjs +29 -65
package/scripts/steering-rpc-smoke.mjs +170 -65
package/scripts/tmux-live-smoke.sh +152 -98
package/scripts/visual-tui-smoke.mjs +659 -0
package/shared/cursor-sdk-event-debug-env.d.mts +12 -0
package/shared/cursor-sdk-event-debug-env.mjs +13 -0
package/shared/cursor-sensitive-text.d.mts +1 -0
package/{scripts/lib/cursor-probe-utils.mjs → shared/cursor-sensitive-text.mjs} +1 -13
package/shared/cursor-setting-sources.d.mts +5 -0
package/shared/cursor-setting-sources.mjs +22 -0
package/src/context.ts +21 -12
package/src/cursor-bridge-contract.ts +1 -3
package/src/cursor-incomplete-tool-visibility.ts +72 -49
package/src/cursor-mcp-timeout-override.ts +66 -11
package/src/cursor-native-tool-display-registration.ts +63 -27
package/src/cursor-native-tool-display-replay.ts +246 -143
package/src/cursor-native-tool-display-state.ts +2 -0
package/src/cursor-native-tool-display-tools.ts +149 -41
package/src/cursor-provider-live-run-drain.ts +1 -52
package/src/cursor-provider-run-finalizer.ts +235 -0
package/src/cursor-provider-run-outcome.ts +149 -0
package/src/cursor-provider-turn-api-key.ts +8 -0
package/src/cursor-provider-turn-coordinator.ts +113 -440
package/src/cursor-provider-turn-display-router.ts +216 -0
package/src/cursor-provider-turn-emit.ts +59 -0
package/src/cursor-provider-turn-finalize.ts +119 -0
package/src/cursor-provider-turn-lifecycle-emitter.ts +97 -0
package/src/cursor-provider-turn-message-offset.ts +15 -0
package/src/cursor-provider-turn-prepare.ts +216 -0
package/src/cursor-provider-turn-runner.ts +138 -0
package/src/cursor-provider-turn-sdk-normalizer.ts +88 -0
package/src/cursor-provider-turn-send.ts +103 -0
package/src/cursor-provider-turn-shell-output.ts +107 -0
package/src/cursor-provider-turn-tool-ledger.ts +126 -0
package/src/cursor-provider-turn-types.ts +87 -0
package/src/cursor-provider.ts +16 -482
package/src/cursor-replay-activity-builders.ts +276 -0
package/src/cursor-replay-source-names.ts +33 -0
package/src/cursor-replay-summary-args.ts +191 -0
package/src/cursor-replay-tool-details.ts +464 -0
package/src/cursor-run-final-text.ts +56 -0
package/src/cursor-sdk-abort-error-guard.ts +4 -0
package/src/cursor-sdk-event-debug-constants.ts +14 -5
package/src/cursor-sdk-event-debug.ts +8 -2
package/src/cursor-sensitive-text.ts +3 -36
package/src/cursor-session-agent.ts +265 -88
package/src/cursor-setting-sources.ts +7 -10
package/src/cursor-state.ts +232 -28
package/src/cursor-tool-lifecycle.ts +17 -42
package/src/cursor-tool-manifest.ts +41 -0
package/src/cursor-tool-names.ts +18 -79
package/src/cursor-tool-presentation-registry.ts +556 -0
package/src/cursor-tool-transcript.ts +1 -1
package/src/cursor-tool-visibility.ts +39 -0
package/src/cursor-transcript-tool-formatters.ts +0 -59
package/src/cursor-transcript-tool-specs.ts +169 -232
package/src/cursor-transcript-utils.ts +0 -44
package/src/cursor-web-tool-activity.ts +10 -60
package/src/cursor-web-tool-args.ts +39 -0
package/src/index.ts +4 -10

package/docs/cursor-native-tool-visual-audit.md CHANGED Viewed

@@ -1,8 +1,40 @@
 # Cursor Native Tool Visual Audit Workflow
-This workflow verifies Cursor SDK tool replay the way a human sees it in pi's interactive TUI, without stealing macOS focus.
+This workflow is the canonical repo path for verifying Cursor SDK tool replay the way a human sees it in pi's interactive TUI, without stealing macOS focus.
-Use it before accepting replay-card commits or PRs. Text logs and JSONL are necessary, but they are not enough when the claim is visual parity: always keep before/after PNGs for the exact prompt.
+Use it before accepting replay-card commits or PRs, and for every Cursor provider/runtime release where TUI card/color behavior could regress. Text logs and JSONL are necessary, but they are not enough when the claim is visual parity: always keep PNGs for the exact prompt, and keep before/after PNGs when reviewing a rendering change.
+Current cutover baseline: pi 0.76.0+, exact `@cursor/sdk@1.0.14`, local validation packages `@earendil-works/pi-ai`, `@earendil-works/pi-coding-agent`, and `@earendil-works/pi-tui` at 0.76.0.
+## Cursor SDK 1.0.14 / pi 0.76.0 cutover visual record
+Record the required cutover validation here or in the final release handoff. The default matrix is native replay only: the runner forces native replay registration on, forces Cursor setting sources off, disables the pi bridge, disables overlapping built-in pi tool exposure, and clears inherited Cursor SDK event-debug artifact env. With `--event-debug`, debug capture writes to a deterministic directory under the visual output directory. Do not commit raw ANSI logs, screenshots, terminal recordings, debug artifacts, or `.debug/visual-smoke` scratch files.
+| Field | Required value / evidence |
+| --- | --- |
+| Command/session used | `npm run smoke:visual -- --ext "$PWD" --cwd "$PWD" --mode plan --out-dir <fresh /tmp dir> --label <matrix label> --prompt <matrix prompt>` with default native-replay isolation |
+| Baseline versions | `pi --version` = 0.76.0; `npm ls` = `@cursor/sdk@1.0.14` and local `@earendil-works/*@0.76.0` |
+| Card categories checked | Claim only categories proven by both PNG and JSONL. Required cutover categories are read, grep/search, find/glob, list, shell success, write, edit/diff, and true read failure. Neutral Cursor plan/todo/task/mode activity is optional/opportunistic and only counts when JSONL contains a completed Cursor workflow event. |
+| Observed status/card colors | Confirm native-looking cards use native pi styling; neutral Cursor activity is not red; true errors are distinct; diff previews show red/green; plan status is readable |
+| Screenshot/ANSI evidence location | External path only, for example `/tmp/pi-cursor-sdk-1014-visual.*/read-package.{ansi,txt,html,png,jsonl.path}` |
+| Debug artifact location | External `.debug/cursor-sdk-events/...` or temp artifact directory path only; do not commit raw artifacts |
+| Pass/fail notes | Summarize any mismatch, blocker, or auth/environment limitation |
+Required prompt matrix for this cutover:
+| Label | Prompt | Required JSONL proof | Required visual proof |
+| --- | --- | --- | --- |
+| `read-package` | `Use only your file read tool. Read ./package.json and answer with only the package name. Do not use shell, grep, glob, find, or list tools.` | `toolCall.name=read`, `toolResult.toolName=read`, `isError=false` | Native-looking read card; collapsed label/path readable |
+| `grep-readme` | `Use only your grep/search tool to search ./README.md for the literal string "pi-cursor-sdk". Do not use shell, read, glob, find, ls, or list tools. Report only the first matching file path.` | `toolCall.name=grep`, `toolResult.toolName=grep`, `isError=false` | Native-looking grep/search card; match preview readable |
+| `find-readme` | `Use only your glob/file-search/find tool to find README.md from the repository root. Do not use shell, read, grep, ls, or list tools. Report matched paths exactly.` | `toolCall.name=find`, `toolResult.toolName=find`, `isError=false` | Native-looking find/glob card; matched path readable |
+| `list-src` | `Use only your directory listing tool to list ./src. Do not use shell, read, grep, glob, or find tools. Report whether cursor-provider.ts is present.` | `toolCall.name=ls`, `toolResult.toolName=ls`, `isError=false` | Native-looking list card; directory/path readable |
+| `shell-success` | `Use only your shell/terminal tool to run printf 'cursor visual smoke\\n'. Do not use read, grep, glob, find, ls, edit, or write. Report the output.` | `toolCall.name=bash`, `toolResult.toolName=bash`, `isError=false` | Shell success card is not red/error-styled; stdout readable |
+| `write-file` | `Use your normal file write tool to create .debug/visual-smoke/cursor-mode.txt with exactly two lines: alpha and beta. Do not use shell.` | `toolCall.name=write`, `toolResult.toolName=write`, `isError=false` | Native-looking write card; path/content preview readable |
+| `edit-file` | `Use your normal file edit/str-replace tool to change beta to gamma in .debug/visual-smoke/cursor-mode.txt. Do not use shell.` | `toolCall.name=edit`, `toolResult.toolName=edit`, `isError=false` | Native-looking edit card; diff preview shows red/green added/removed lines |
+| `read-missing` | `Use only your file read tool to read .debug/visual-smoke/does-not-exist.txt. Then explain the result. Do not use shell, grep, glob, find, ls, edit, or write.` | `toolCall.name=read`, `toolResult.toolName=read`, `isError=true` | True failure is visible, bounded, and distinct from neutral Cursor activity |
+| `workflow-activity` | `Stay in Cursor plan mode. If Cursor exposes plan, todo, task, or mode activity for this request, use that capability to outline a tiny unit test without editing files. Otherwise answer with a concise numbered plan. Do not use shell or file mutation tools.` | Optional: completed `cursor` activity whose details/source identify `createPlan`, `updateTodos`, `task`, or mode activity. If absent, record this category as not exercised. | Optional: neutral Cursor workflow activity is neutral, not red, and does not mutate pi plan/todo state. If absent, do not claim this visual category passed. |
+Do not mark a category passed because the prompt was sent. A category passes only when the PNG shows the expected card and the JSONL shows the expected completed `toolCall` / `toolResult` pair. If Cursor chooses a different tool, rerun with a tighter prompt or record that the category was not exercised.
 ## When to use this
@@ -16,70 +48,70 @@ Use this workflow when changing or reviewing:
 Do not use this for ordinary unit-only logic changes.
-## Why this workflow exists
+## Canonical visual inspection path
 Earlier manual verification used a visible Terminal window plus `screencapture`. That worked, but it stole system focus and made it easy for the user to type into the audit window by accident.
-The preferred workflow is now offscreen:
+The canonical workflow is now offscreen and browser-rendered:
 1. Spawn `pi` in a pseudo-terminal at a fixed size.
 2. Feed the prompt programmatically.
-3. Save raw ANSI output and plain text output.
-4. Render the terminal buffer through xterm.js in headless Playwright.
-5. Save a PNG screenshot.
+3. Save raw ANSI output and stripped plain text output.
+4. Render the terminal buffer through a browser-backed terminal renderer, preferably xterm.js.
+5. Save PNG screenshots with `agent_browser` when the harness is available, or Playwright directly when running outside that harness.
 6. Inspect the session JSONL for exact persisted `toolCall` / `toolResult` data.
-This gives human-like visual evidence without activating Terminal, iTerm, or a browser window.
+This is the best default release path because it exercises the real pi TUI, captures card class/color/label/order/truncation issues before users see them, avoids desktop focus stealing, and leaves reviewable artifacts. Use visible Terminal/Ghostty screenshots only for terminal-specific or pixel-level bugs that cannot be judged through browser-rendered ANSI.
 ## Tool stack
-Install the harness outside this repo so generated assets and temporary dependencies do not pollute commits:
+The canonical runner is checked in at `scripts/visual-tui-smoke.mjs` and exposed as `npm run smoke:visual`. It uses tmux for the fixed-size PTY, `@xterm/xterm` for browser rendering, and Playwright for automatic PNG capture. It resolves `pi` by directly walking the parent `PATH`, uses `process.execPath` for Node, and prepends that Node directory for prereq checks and tmux launches so `#!/usr/bin/env node` shims use the validated Node and a login shell or stale tmux server `PATH` cannot silently select a different executable.
+One-time setup from a clean checkout:
 ```bash
-HARNESS=/tmp/pi-visual-harness
-rm -rf "$HARNESS"
-mkdir -p "$HARNESS"
-cd "$HARNESS"
-npm init -y
-npm install node-pty @xterm/xterm playwright
-npm rebuild node-pty
+npm install
+npx playwright install chromium
 ```
-`npm rebuild node-pty` is useful after Node upgrades; without it, `node-pty` may fail with `posix_spawnp failed`.
+`npx playwright install chromium` is only needed for automatic PNG capture. When running inside the pi agent harness, `agent_browser` is the preferred screenshot tool for generated HTML/ANSI output because it can open local files, verify saved artifacts, and capture exact evidence paths; in that case, run `npm run smoke:visual -- --no-screenshot ...` and screenshot the generated `.html` with `agent_browser`. Outside the harness, use Playwright through the checked-in runner.
 ## Runner contract
-A runner script should:
-- Spawn `pi -e <extension-dir> --model cursor/composer-2.5` with:
-  - `PI_CURSOR_NATIVE_TOOL_DISPLAY=1`
-  - `TERM=xterm-256color`
-  - fixed PTY size, for example `150x45`
-  - cwd set to the target audit repo.
-- Wait for startup.
-- Write the exact prompt and carriage return to the PTY.
-- Wait a bounded amount of time.
-- Save:
-  - `<label>.ansi` raw terminal bytes.
-  - `<label>.txt` stripped text for quick search.
-  - `<label>.png` rendered xterm screenshot.
-  - `<label>.jsonl.path` pointing to the latest pi session JSONL.
-- Kill the PTY child after capture.
-- Check for leftover commands when prompts can background work, especially shell timeout tests.
-Example invocation shape:
+`scripts/visual-tui-smoke.mjs` is the durable source of truth for this workflow. It must keep supporting:
+- fixed-size tmux PTY execution of the parent-resolved `pi -e <extension-dir> --model cursor/composer-2.5`
+- parent-resolved `pi` and `tmux` command paths reused in tmux-launched runs, with `process.execPath`'s directory prepended for prereq checks and tmux launches so Node shims use the validated Node
+- `PI_CURSOR_NATIVE_TOOL_DISPLAY=1`
+- `PI_CURSOR_REGISTER_NATIVE_TOOLS=1` by default
+- `PI_CURSOR_SETTING_SOURCES=none` by default
+- `PI_CURSOR_PI_TOOL_BRIDGE=0` by default
+- `PI_CURSOR_EXPOSE_BUILTIN_TOOLS=0` by default
+- Cursor SDK event-debug artifact env cleared before each run; `--event-debug` sets a deterministic debug directory under `--out-dir`
+- `TERM=xterm-256color`
+- cwd set to the target audit repo
+- prompt paste plus carriage return into the interactive TUI
+- bounded post-prompt wait via `--wait-ms`
+- artifacts outside the repo by default
+- `<label>.ansi`, `<label>.txt`, `<label>.html`, `<label>.png`, and `<label>.jsonl.path`
+- `--label`, `--ext`, `--cwd`, `--prompt`, `--prompt-file`, `--wait-ms`, and `--out-dir`
+- `--setting-sources` and `--bridge` opt-ins for non-default visual audits; `--expose-builtin-tools` is accepted only with `--bridge`
+- repeatable `--leftover-pattern` checks for prompts that can background work
+- `-h` / `--help` with examples and exit codes
+Example invocation:
 ```bash
-node /tmp/pi-visual-harness/run-pi-visual.mjs \
-  --label after-shell-nonzero \
-  --ext /path/to/pi-cursor-sdk \
-  --cwd /path/to/test-workspace \
-  --prompt "Run \`printf 'cursor-shell-stderr\\n' >&2; exit 7\` using only the shell/terminal tool. Do not use read, grep, glob, find, ls, edit, or write. Print the command result exactly, then stop." \
-  --wait-ms 30000 \
-  --out-dir /tmp/pi-visual-harness/review-current
+npm run smoke:visual -- \
+  --label shell-success \
+  --ext "$PWD" \
+  --cwd "$PWD" \
+  --prompt "Use only your shell/terminal tool to run printf 'cursor visual smoke\\n'. Do not use read, grep, glob, find, ls, edit, or write. Report the output." \
+  --wait-ms 60000 \
+  --out-dir /tmp/pi-cursor-sdk-visual-review
 ```
-Keep the runner in `/tmp` unless the project explicitly decides to check in a maintained audit harness.
+The runner writes the `.png` through Playwright by default. In the pi agent harness, pass `--no-screenshot`, open the generated `.html` with `agent_browser`, save a PNG screenshot, and record that path beside the runner artifacts. The default evidence is native replay evidence only. For bridge/default-settings visual audits, pass `--bridge`, `--bridge --expose-builtin-tools`, or `--setting-sources <value>` explicitly and label that evidence separately.
 ## Before/after comparison
@@ -103,34 +135,35 @@ ln -s "$AFTER_WT/node_modules" "$BEFORE_WT/node_modules"
 Then run the same prompt against both extension dirs:
 ```bash
-node /tmp/pi-visual-harness/run-pi-visual.mjs \
+npm run smoke:visual -- \
   --label before-glob-single \
   --ext "$BEFORE_WT" \
   --cwd "$TARGET" \
-  --prompt "Find files matching \`src/tools/reindex.ts\` using only the glob/file-search tool. Do not use shell, bash, grep, read, or ls. Print the matched files exactly as found, then stop." \
+  --prompt "Use only your glob/file-search/find tool to find src/tools/reindex.ts. Do not use shell, bash, grep, read, ls, or list. Print the matched files exactly as found, then stop." \
   --wait-ms 16000 \
-  --out-dir /tmp/pi-visual-harness/review-current
+  --out-dir /tmp/pi-cursor-sdk-visual-review-current
-node /tmp/pi-visual-harness/run-pi-visual.mjs \
+npm run smoke:visual -- \
   --label after-glob-single \
   --ext "$AFTER_WT" \
   --cwd "$TARGET" \
-  --prompt "Find files matching \`src/tools/reindex.ts\` using only the glob/file-search tool. Do not use shell, bash, grep, read, or ls. Print the matched files exactly as found, then stop." \
+  --prompt "Use only your glob/file-search/find tool to find src/tools/reindex.ts. Do not use shell, bash, grep, read, ls, or list. Print the matched files exactly as found, then stop." \
   --wait-ms 16000 \
-  --out-dir /tmp/pi-visual-harness/review-current
+  --out-dir /tmp/pi-cursor-sdk-visual-review-current
 ```
-For review, create a simple HTML/PNG gallery that places `before-*.png` and `after-*.png` side by side. Keep the generated gallery in `/tmp` unless explicitly asked to commit visual artifacts.
+For review, create a simple HTML/PNG gallery that places `before-*.png` and `after-*.png` side by side. Keep the generated gallery in `/tmp` unless explicitly asked to commit visual artifacts. In agent-harness runs, use `agent_browser` to open that gallery or the generated single-run HTML and save verified screenshots.
 ## JSONL inspection
 For each visual claim, inspect the JSONL path written by the runner. Confirm at least:
-- `toolCall.name` is the expected pi-facing replay tool name.
+- `toolCall.name` matches the prompt matrix for the category being claimed.
 - `toolCall.arguments` show the expected user-facing args.
 - `toolResult.toolName` matches the call.
 - `toolResult.content[0].text` contains the recorded body expected in the card.
 - `toolResult.isError` matches the visual card state.
+- The screenshot label and JSONL path are recorded together, so a card category cannot be claimed from a screenshot or JSONL alone.
 For local pi MCP bridge claims, also confirm:
@@ -143,7 +176,7 @@ Small helper pattern:
 ```bash
 python3 - <<'PY'
 import json, pathlib
-path = pathlib.Path('/tmp/pi-visual-harness/review-current/after-shell-nonzero.jsonl.path').read_text().strip()
+path = pathlib.Path('/tmp/pi-cursor-sdk-visual-review-current/shell-success.jsonl.path').read_text().strip()
 for line in pathlib.Path(path).read_text().splitlines():
     obj = json.loads(line)
     msg = obj.get('message', {})
@@ -159,25 +192,37 @@ PY
 ## Safety rules
-- Prefer the offscreen PTY renderer. Do not use `osascript`, visible Terminal windows, or `screencapture` unless a user explicitly asks for a real desktop screenshot.
+- Prefer the canonical offscreen PTY plus browser-rendered screenshot path. Do not use `osascript`, visible Terminal windows, or `screencapture` unless a user explicitly asks for a real desktop screenshot or the bug is terminal-specific.
 - Keep generated screenshots, HTML galleries, ANSI logs, and temporary harness dependencies out of the repo by default.
 - Use short, deterministic prompts with bounded wait times.
-- For timeout/background prompts, always check for leftovers:
+- For timeout/background prompts, always check for leftovers, preferably with the runner's repeatable `--leftover-pattern` option:
+```bash
+npm run smoke:visual -- \
+  --label shell-timeout \
+  --prompt 'Run sleep 30 && echo should-not-print using only the shell tool.' \
+  --leftover-pattern 'sleep 30|should-not-print'
+```
+Manual fallback:
 ```bash
-ps -axo pid,etime,command | rg "sleep 2|should-not-print|<audit-session-label>" || true
+ps -axo pid,etime,command | rg "sleep 30|should-not-print|<audit-session-label>" || true
 ```
 - If the model uses a different tool than requested, record it as model/provider behavior unless JSONL shows replay lost or misrendered a completed Cursor tool event.
-- Visual output can differ slightly from macOS Terminal fonts because xterm.js renders offscreen. Treat this workflow as evidence for card class, color state, labels, ordering, truncation, and content. Use a real terminal screenshot only for pixel-level terminal-specific bugs.
+- Do not use `--bridge`, `--bridge --expose-builtin-tools`, or non-`none` `--setting-sources` for the default native replay matrix. Those opt-ins validate different surfaces and must be labeled separately.
+- Visual output can differ slightly from macOS Terminal fonts because browser/xterm renderers run offscreen. Treat this workflow as authoritative release evidence for card class, color state, labels, ordering, truncation, footer/status readability, and content. Use a real terminal screenshot only for pixel-level terminal-specific bugs.
 ## Required evidence before commit or merge
 Before accepting a replay-card change, provide:
-- Before and after PNG paths.
+- Browser-rendered PNG paths captured from offscreen ANSI output.
+- Before and after PNG paths when comparing a rendering change.
 - The prompt used for each pair.
+- ANSI/text/HTML paths when helpful for review.
 - JSONL paths for each run.
 - A short statement of what changed visually.
-- The relevant JSONL `toolCall` / `toolResult` facts.
+- The relevant JSONL `toolCall` / `toolResult` facts, including expected tool name and `isError` state from the prompt matrix.
 - `npm test` and `npm run typecheck` results, unless the change is documentation-only.

package/docs/cursor-testing-lessons.md CHANGED Viewed

@@ -4,6 +4,8 @@
 This document records maintainer testing lessons for `pi-cursor-sdk`. It complements unit tests and the [Cursor live smoke checklist](./cursor-live-smoke-checklist.md). Use it when adding regression coverage, debugging false-green releases, or building isolated smoke harnesses.
+For a **minimal one-session dogfood pass** (baseline env, one native + one bridge call, JSONL ID patterns, bootstrap manifest, edit diff card), use the [Cursor dogfood checklist](./cursor-dogfood-checklist.md) before running the full live smoke matrix.
 ## Core lesson: integration-shaped bugs beat unit mocks
 The native replay `Tool grep not found` failure was integration-shaped, not unit-shaped:
@@ -236,7 +238,7 @@ The script writes timestamped artifacts under `--out` (default `/tmp/pi-cursor-s
 Stdout prints artifact paths and summary counts only. Raw payloads stay on disk and may contain local paths, project text, tool args/results, or secrets — do not commit or share them.
-Hard repo rule: Cursor SDK behavior claims must come from the installed `@cursor/sdk` package and/or https://cursor.com/docs/sdk/typescript, not from memory or ad-hoc probes alone.
+Hard repo rule: Cursor SDK behavior claims must come from the installed `@cursor/sdk` package and/or https://cursor.com/docs/sdk/typescript, not from memory or ad-hoc probes alone. Current cutover validation targets exact `@cursor/sdk@1.0.14` and pi 0.76.0 local packages.
 ## Pi provider SDK event capture
@@ -313,7 +315,7 @@ Capture is file-only by default: no stderr markers, and bridge diagnostics durin
 ### Discarded incomplete SDK tool calls
-When Cursor emits `tool-call-started` without a matching completion/step result, the provider surfaces a bounded neutral **Cursor … did not complete** activity card or thinking trace at run end. pi bridge MCP calls (`pi__*`) are excluded because pi already shows the real pi tool execution path.
+When Cursor emits `tool-call-started` without a matching completion/step result, the provider surfaces a bounded neutral **Cursor … did not complete** activity card or thinking trace at run end for failed/aborted runs, runs with no assistant text, and external/side-effectful tools. Incomplete fast local discovery starts (`read`, `grep`, `glob`, `ls`) are debug-only after a successful text-producing run so stale SDK start events do not create red post-answer cards. pi bridge MCP calls (`pi__*`) are excluded because pi already shows the real pi tool execution path.
 With `PI_CURSOR_SDK_EVENT_DEBUG=1`, each discarded started call is also recorded in `coordinator-events.jsonl` under phase `discarded-incomplete-started-tool-call` with:
@@ -321,7 +323,7 @@ With `PI_CURSOR_SDK_EVENT_DEBUG=1`, each discarded started call is also recorded
 - scrubbed call-id hash (raw call IDs are not written)
 - reason such as `no-completion-at-run-end`, `abort`, or `sdk-failure`
-Stderr output for these records requires `PI_CURSOR_SDK_EVENT_DEBUG_STDERR=1`. This complements the standalone `npm run debug:sdk-events` probe by interpreting a specific provider discard path during normal pi runs. User-visible incomplete cards explain the gap in the TUI; debug artifacts remain maintainer-only (**#52**).
+Stderr output for these records requires `PI_CURSOR_SDK_EVENT_DEBUG_STDERR=1`. This complements the standalone `npm run debug:sdk-events` probe by interpreting a specific provider discard path during normal pi runs. User-visible incomplete cards explain actionable gaps in the TUI; debug artifacts remain maintainer-only (**#52**) and are the source of truth for suppressed fast-local stale starts.
 ## Tool calls listed as plain text (#40 triage)
@@ -340,7 +342,7 @@ Ask the reporter (or capture yourself) for:
 | `pi --version` and installed `pi-cursor-sdk` version | Confirms extension/runtime in use |
 | Model ID (for example `cursor/composer-2.5`) | Routing/replay behavior is model-scoped |
 | Exact repro prompt and prior turns | Multi-turn replay history affects prompt text |
-| Flags: `--cursor-no-fast`, `PI_CURSOR_PI_TOOL_BRIDGE`, `PI_CURSOR_EXPOSE_BUILTIN_TOOLS`, `PI_CURSOR_SETTING_SOURCES` | Bridge vs native-only vs narrowed settings |
+| Flags: `--cursor-no-fast`, `PI_CURSOR_PI_TOOL_BRIDGE`, `PI_CURSOR_EXPOSE_BUILTIN_TOOLS`, `PI_CURSOR_SETTING_SOURCES`, `PI_CURSOR_TOOL_MANIFEST` | Bridge vs native-only vs narrowed settings; bootstrap callable-surface manifest |
 | Whether the listed names are `pi__*` bridge MCP, Cursor-native (`browser_navigate`, `WebSearch`), or `cursor-replay-*` replay IDs | Three different surfaces (see [Cursor native tool replay](./cursor-native-tool-replay.md#live-bridge-vs-replay)) |
 | Red toast / `errorMessage` text, if any | Distinguishes #55 failure surfacing from silent text echo |
 | Process exit / uncaught `ConnectError` / `ETIMEDOUT` stack trace, if any | Hard network crash (**#43**), not #40 model text echo |
@@ -425,4 +427,7 @@ rg '"type": "toolCall"|Tool call \(Cursor|cursor-replay-' "$SMOKE_DIR/session"/*
 - `scripts/validate-smoke-jsonl.mjs`
 - `scripts/debug-sdk-events.mjs`
 - `scripts/debug-provider-events.mjs`
-- `test/helpers/cursor-provider-harness.ts` — controllable native replay pi mock (`createNativeToolDisplayPiForTest`)
+- `shared/` — runtime-safe ESM helpers consumed by provider `src/` and maintainer scripts (`cursor-sensitive-text.mjs`, `cursor-setting-sources.mjs`).
+- `scripts/lib/` — maintainer plumbing (CLI arg parsing, secret-aware `fail()`, child-process shutdown, shell timeout/auth helpers). Re-exports `shared/` helpers so published smoke/debug scripts stay aligned with provider runtime (`test/maintainer-scripts-lib.test.ts`).
+- `test/helpers/pi-harness.ts` — canonical fake pi/extension harness (`createPiHarness`, shared model/context/event helpers)
+- `test/helpers/cursor-provider-harness.ts` — Cursor SDK provider mocks and stream helpers (re-exports pi-harness fixtures; `createNativeToolDisplayPiForTest` for native replay)

package/docs/cursor-tool-surfaces.md ADDED Viewed

@@ -0,0 +1,69 @@
+# Cursor tool surfaces in pi
+pi-cursor-sdk runs Cursor models through the local `@cursor/sdk` agent runtime. A single pi session can expose **three related but different** tool namespaces. This page is the user-facing guide; maintainer replay details live in [Cursor native tool replay](./cursor-native-tool-replay.md).
+## The three surfaces
+| Surface | Who owns it | Callable by Cursor? | What pi shows |
+| --- | --- | --- | --- |
+| **Cursor SDK host tools** | Cursor local agent | Yes | Native replay cards (`read`, `bash`, …) or neutral Cursor activity. Representative ToolType list: [SDK ToolType replay matrix](./cursor-native-tool-replay.md#sdk-tooltype-replay-matrix). |
+| **Configured Cursor MCP** | Cursor settings / `~/.cursor/mcp.json` | Yes (when loaded) | Neutral **Cursor MCP** activity cards on replay |
+| **Pi bridge (`pi__*`)** | pi-cursor-sdk loopback MCP | Yes, when exposed | Real pi tool names (`cursor_ask_question`, extension tools, …) |
+**Not callable:** `cursor-replay-*` IDs in JSONL, pi history tool names used only for display, and transcript labels. Cursor must call exposed `pi__*` MCP names for bridged pi tools, not the pi card name.
+## Discoverability
+- **MCP `listTools`** (and pi's MCP catalog when present) lists **MCP servers only** — for example `pi_tools` with `pi__cursor_ask_question`. It does **not** enumerate Cursor SDK host tools such as `Read` or `Shell`.
+- **Bootstrap prompts** include a short **Cursor SDK tool boundary** block plus a compact **callable tool surfaces** manifest by default (disable manifest with `PI_CURSOR_TOOL_MANIFEST=0`). The manifest lists host-tool categories, bridge `pi__*` names for the current run, and a reminder that configured Cursor MCP servers appear at runtime via `listTools`. MCP `listTools` entries for bridged pi tools point back to the bootstrap prompt instead of repeating the full contract.
+- **Incremental prompts** omit the full boundary block but keep a short tail guard (including an explicit shell `cd` hint); the session agent retains prior bootstrap context.
+- **In-session debug:** `/cursor-tools` prints bridge enablement, manifest enablement, effective `PI_CURSOR_SETTING_SOURCES`, and the current callable-surface snapshot.
+## Pi bridge vs Cursor native
+Default behavior:
+- Cursor host tools handle files, shell, grep, edits, tasks, and Cursor-native MCP/plugins.
+- The pi bridge exposes **active pi tools** as `pi__*` MCP names when `PI_CURSOR_PI_TOOL_BRIDGE` is enabled (default on).
+- Overlapping pi builtins (`read`, `bash`, `write`, `edit`, `grep`, `find`, `ls`) are **hidden** from the bridge unless `PI_CURSOR_EXPOSE_BUILTIN_TOOLS=1`.
+`pi-cursor-sdk` always registers `cursor_ask_question` for Cursor models when the bridge is on; Cursor sees `pi__cursor_ask_question`.
+```bash
+# Disable pi bridge entirely
+PI_CURSOR_PI_TOOL_BRIDGE=0 pi --model cursor/composer-2.5
+# Expose overlapping pi builtins through the bridge
+PI_CURSOR_EXPOSE_BUILTIN_TOOLS=1 pi --model cursor/composer-2.5
+# Disable bootstrap tool manifest
+PI_CURSOR_TOOL_MANIFEST=0 pi --model cursor/composer-2.5
+```
+## Cursor settings vs pi toggles
+Disabling or removing an MCP server **only in pi** does not remove Cursor ambient MCP loaded from Cursor config.
+| Control | Effect |
+| --- | --- |
+| `PI_CURSOR_SETTING_SOURCES=all` (default) | Loads user/project Cursor MCP, plugins, rules (`~/.cursor/mcp.json`, etc.) |
+| `PI_CURSOR_SETTING_SOURCES=none` | Disables ambient Cursor setting sources for local agents |
+| `PI_CURSOR_SETTING_SOURCES=project,plugins` | Narrows which layers load |
+| Empty or edited `~/.cursor/mcp.json` | Changes which user MCP servers Cursor connects to |
+To reproduce a **minimal** surface (pi-cursor-sdk + Cursor host only), use extension-only install, empty user MCP config, and `PI_CURSOR_SETTING_SOURCES=none` when you do not need Cursor rules/MCP from disk.
+## JSONL ID patterns (debugging)
+| ID prefix | Meaning |
+| --- | --- |
+| `cursor-replay-*` | Display-only replay of Cursor SDK activity |
+| `cursor-pi-bridge-run-*` | Live pi execution via bridge |
+Example mistake: treating `cursor-replay-…` as a tool to invoke. Replay never re-runs work.
+## Related docs
+- [README — Cursor provider tool contract](../README.md#cursor-provider-tool-contract)
+- [Cursor native tool replay](./cursor-native-tool-replay.md)
+- [Cursor model UX spec](./cursor-model-ux-spec.md)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "pi-cursor-sdk",
-	"version": "0.1.19",
+	"version": "0.1.21",
 	"description": "pi provider extension backed by @cursor/sdk local agents",
 	"author": "Mitch Fultz (https://github.com/fitchmultz)",
 	"license": "MIT",
@@ -22,20 +22,39 @@
 	},
 	"homepage": "https://github.com/fitchmultz/pi-cursor-sdk#readme",
 	"files": [
+		"shared",
 		"src",
 		"scripts/refresh-cursor-model-snapshots.mjs",
 		"scripts/steering-rpc-smoke.mjs",
 		"scripts/tmux-live-smoke.sh",
+		"scripts/visual-tui-smoke.mjs",
 		"scripts/isolated-cursor-smoke.sh",
+		"scripts/fixtures/plan-strip-shim",
 		"scripts/validate-smoke-jsonl.mjs",
+		"scripts/probe-mcp-coldstart.mjs",
 		"scripts/debug-sdk-events.mjs",
+		"scripts/debug-sdk-events.d.mts",
 		"scripts/debug-provider-events.mjs",
-		"scripts/lib/cursor-probe-utils.mjs",
+		"scripts/debug-provider-events.d.mts",
+		"scripts/lib/cursor-cli-args.mjs",
+		"scripts/lib/cursor-cli-args.d.mts",
+		"scripts/lib/cursor-child-process.mjs",
+		"scripts/lib/cursor-child-process.d.mts",
+		"scripts/lib/cursor-script-fail.mjs",
+		"scripts/lib/cursor-script-fail.d.mts",
+		"scripts/lib/cursor-smoke-env.mjs",
+		"scripts/lib/cursor-smoke-env.d.mts",
+		"scripts/lib/cursor-smoke-shell.sh",
+		"scripts/lib/cursor-visual-render.mjs",
+		"scripts/lib/cursor-visual-render.d.mts",
 		"scripts/lib/cursor-sdk-output-filter.mjs",
+		"scripts/lib/cursor-sdk-output-filter.d.mts",
 		"README.md",
 		"docs/cursor-model-ux-spec.md",
+		"docs/cursor-tool-surfaces.md",
 		"docs/cursor-live-smoke-checklist.md",
 		"docs/cursor-testing-lessons.md",
+		"docs/cursor-dogfood-checklist.md",
 		"docs/cursor-native-tool-replay.md",
 		"docs/cursor-native-tool-visual-audit.md",
 		"LICENSE",
@@ -46,31 +65,38 @@
 		"node": ">=22.19.0"
 	},
 	"scripts": {
-		"typecheck": "tsc --noEmit",
+		"typecheck": "npm run typecheck:src && npm run typecheck:tests && npm run typecheck:replay-compile",
+		"typecheck:src": "tsc --noEmit",
+		"typecheck:tests": "tsc -p tsconfig.test.json --noEmit",
+		"typecheck:replay-compile": "tsc --noEmit -p test/tsconfig.json",
 		"test": "vitest run",
 		"test:watch": "vitest",
 		"refresh:cursor-snapshots": "node scripts/refresh-cursor-model-snapshots.mjs",
 		"smoke:live": "scripts/tmux-live-smoke.sh",
+		"smoke:visual": "node scripts/visual-tui-smoke.mjs",
 		"smoke:isolated": "scripts/isolated-cursor-smoke.sh",
 		"smoke:steering": "node scripts/steering-rpc-smoke.mjs",
 		"smoke:jsonl": "node scripts/validate-smoke-jsonl.mjs",
 		"debug:sdk-events": "node scripts/debug-sdk-events.mjs",
-		"debug:provider-events": "node scripts/debug-provider-events.mjs"
+		"debug:provider-events": "node scripts/debug-provider-events.mjs",
+		"debug:mcp-coldstart": "node scripts/probe-mcp-coldstart.mjs"
 	},
 	"dependencies": {
-		"@cursor/sdk": "^1.0.13",
+		"@cursor/sdk": "1.0.14",
 		"@modelcontextprotocol/sdk": "^1.29.0"
 	},
 	"peerDependencies": {
-		"@earendil-works/pi-ai": "*",
-		"@earendil-works/pi-coding-agent": "*",
-		"@earendil-works/pi-tui": "*",
+		"@earendil-works/pi-ai": ">=0.76.0",
+		"@earendil-works/pi-coding-agent": ">=0.76.0",
+		"@earendil-works/pi-tui": ">=0.76.0",
 		"typebox": "*"
 	},
 	"devDependencies": {
-		"@earendil-works/pi-ai": "^0.75.5",
-		"@earendil-works/pi-coding-agent": "^0.75.5",
-		"@earendil-works/pi-tui": "^0.75.5",
+		"@earendil-works/pi-ai": "0.76.0",
+		"@earendil-works/pi-coding-agent": "0.76.0",
+		"@earendil-works/pi-tui": "0.76.0",
+		"@xterm/xterm": "^6.0.0",
+		"playwright": "^1.60.0",
 		"typebox": "^1.1.38",
 		"typescript": "^6.0.3",
 		"vitest": "^4.1.6"

package/scripts/debug-provider-events.d.mts ADDED Viewed

@@ -0,0 +1,59 @@
+export interface CursorDebugProviderEventsArgs {
+	cwd: string;
+	model: string;
+	prompt?: string;
+	promptFile?: string;
+	out?: string;
+	settingSources?: string[] | undefined;
+	sessionDir?: string;
+	apiKey?: string;
+	help: boolean;
+}
+export declare function parseDebugProviderEventsArgs(
+	argv: string[],
+	env?: NodeJS.ProcessEnv,
+): CursorDebugProviderEventsArgs;
+export interface CursorPiSessionSnapshotState {
+	copied: boolean;
+	sessionFile?: string;
+	reason?: string;
+	recoveredAfterChildExit?: boolean;
+}
+export type CursorDebugCaptureCounts = Record<string, number | Record<string, number>>;
+export interface CursorDebugCaptureSummary {
+	artifactDir: string;
+	sessionFile?: string;
+	counts: CursorDebugCaptureCounts;
+	piSessionSnapshot?: CursorPiSessionSnapshotState;
+	artifacts?: Record<string, string>;
+	elapsedMs?: number;
+	waitResultRecorded?: boolean;
+}
+export interface CursorDebugProviderEventsRunSummary {
+	artifactDir: string;
+	artifacts: Record<string, string>;
+	counts: CursorDebugCaptureCounts;
+	elapsedMs: number;
+	model: string;
+	cwd: string;
+	sessionDir: string;
+	extensionVersion: string;
+	sdkVersion: string;
+	waitResultRecorded: boolean;
+}
+export declare function backfillPiSessionSnapshot(
+	captureSummary: CursorDebugCaptureSummary | undefined,
+	artifactDir: string,
+	sessionDir: string,
+): CursorDebugCaptureSummary | undefined;
+export declare function runDebugProviderEvents(
+	args: CursorDebugProviderEventsArgs,
+	env?: NodeJS.ProcessEnv,
+): Promise<CursorDebugProviderEventsRunSummary>;