pi-cursor-sdk 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/CHANGELOG.md +52 -0
  2. package/README.md +72 -11
  3. package/docs/cursor-dogfood-checklist.md +57 -0
  4. package/docs/cursor-live-smoke-checklist.md +116 -10
  5. package/docs/cursor-model-ux-spec.md +60 -19
  6. package/docs/cursor-native-tool-replay.md +21 -11
  7. package/docs/cursor-native-tool-visual-audit.md +104 -59
  8. package/docs/cursor-testing-lessons.md +10 -5
  9. package/docs/cursor-tool-surfaces.md +69 -0
  10. package/package.json +37 -11
  11. package/scripts/debug-provider-events.d.mts +59 -0
  12. package/scripts/debug-provider-events.mjs +70 -175
  13. package/scripts/debug-sdk-events.d.mts +90 -0
  14. package/scripts/debug-sdk-events.mjs +36 -98
  15. package/scripts/fixtures/plan-strip-shim/index.ts +12 -0
  16. package/scripts/isolated-cursor-smoke.sh +264 -102
  17. package/scripts/lib/cursor-child-process.d.mts +10 -0
  18. package/scripts/lib/cursor-child-process.mjs +50 -0
  19. package/scripts/lib/cursor-cli-args.d.mts +63 -0
  20. package/scripts/lib/cursor-cli-args.mjs +129 -0
  21. package/scripts/lib/cursor-script-fail.d.mts +1 -0
  22. package/scripts/lib/cursor-script-fail.mjs +13 -0
  23. package/scripts/lib/cursor-sdk-output-filter.d.mts +5 -0
  24. package/scripts/lib/cursor-smoke-env.d.mts +38 -0
  25. package/scripts/lib/cursor-smoke-env.mjs +81 -0
  26. package/scripts/lib/cursor-smoke-shell.sh +174 -0
  27. package/scripts/lib/cursor-visual-render.d.mts +15 -0
  28. package/scripts/lib/cursor-visual-render.mjs +131 -0
  29. package/scripts/probe-mcp-coldstart.mjs +226 -0
  30. package/scripts/refresh-cursor-model-snapshots.mjs +29 -65
  31. package/scripts/steering-rpc-smoke.mjs +170 -65
  32. package/scripts/tmux-live-smoke.sh +152 -98
  33. package/scripts/visual-tui-smoke.mjs +659 -0
  34. package/shared/cursor-sdk-event-debug-env.d.mts +12 -0
  35. package/shared/cursor-sdk-event-debug-env.mjs +13 -0
  36. package/shared/cursor-sensitive-text.d.mts +1 -0
  37. package/{scripts/lib/cursor-probe-utils.mjs → shared/cursor-sensitive-text.mjs} +1 -13
  38. package/shared/cursor-setting-sources.d.mts +5 -0
  39. package/shared/cursor-setting-sources.mjs +22 -0
  40. package/src/context.ts +21 -12
  41. package/src/cursor-bridge-contract.ts +1 -3
  42. package/src/cursor-incomplete-tool-visibility.ts +72 -49
  43. package/src/cursor-mcp-timeout-override.ts +66 -11
  44. package/src/cursor-native-tool-display-registration.ts +63 -27
  45. package/src/cursor-native-tool-display-replay.ts +246 -143
  46. package/src/cursor-native-tool-display-state.ts +2 -0
  47. package/src/cursor-native-tool-display-tools.ts +149 -41
  48. package/src/cursor-provider-live-run-drain.ts +1 -52
  49. package/src/cursor-provider-run-finalizer.ts +235 -0
  50. package/src/cursor-provider-run-outcome.ts +149 -0
  51. package/src/cursor-provider-turn-api-key.ts +8 -0
  52. package/src/cursor-provider-turn-coordinator.ts +113 -440
  53. package/src/cursor-provider-turn-display-router.ts +216 -0
  54. package/src/cursor-provider-turn-emit.ts +59 -0
  55. package/src/cursor-provider-turn-finalize.ts +119 -0
  56. package/src/cursor-provider-turn-lifecycle-emitter.ts +97 -0
  57. package/src/cursor-provider-turn-message-offset.ts +15 -0
  58. package/src/cursor-provider-turn-prepare.ts +216 -0
  59. package/src/cursor-provider-turn-runner.ts +138 -0
  60. package/src/cursor-provider-turn-sdk-normalizer.ts +88 -0
  61. package/src/cursor-provider-turn-send.ts +103 -0
  62. package/src/cursor-provider-turn-shell-output.ts +107 -0
  63. package/src/cursor-provider-turn-tool-ledger.ts +126 -0
  64. package/src/cursor-provider-turn-types.ts +87 -0
  65. package/src/cursor-provider.ts +16 -482
  66. package/src/cursor-replay-activity-builders.ts +276 -0
  67. package/src/cursor-replay-source-names.ts +33 -0
  68. package/src/cursor-replay-summary-args.ts +191 -0
  69. package/src/cursor-replay-tool-details.ts +464 -0
  70. package/src/cursor-run-final-text.ts +56 -0
  71. package/src/cursor-sdk-abort-error-guard.ts +4 -0
  72. package/src/cursor-sdk-event-debug-constants.ts +14 -5
  73. package/src/cursor-sdk-event-debug.ts +8 -2
  74. package/src/cursor-sensitive-text.ts +3 -36
  75. package/src/cursor-session-agent.ts +265 -88
  76. package/src/cursor-setting-sources.ts +7 -10
  77. package/src/cursor-state.ts +232 -28
  78. package/src/cursor-tool-lifecycle.ts +17 -42
  79. package/src/cursor-tool-manifest.ts +41 -0
  80. package/src/cursor-tool-names.ts +18 -79
  81. package/src/cursor-tool-presentation-registry.ts +556 -0
  82. package/src/cursor-tool-transcript.ts +1 -1
  83. package/src/cursor-tool-visibility.ts +39 -0
  84. package/src/cursor-transcript-tool-formatters.ts +0 -59
  85. package/src/cursor-transcript-tool-specs.ts +169 -232
  86. package/src/cursor-transcript-utils.ts +0 -44
  87. package/src/cursor-web-tool-activity.ts +10 -60
  88. package/src/cursor-web-tool-args.ts +39 -0
  89. package/src/index.ts +4 -10
@@ -1,8 +1,40 @@
1
1
  # Cursor Native Tool Visual Audit Workflow
2
2
 
3
- This workflow verifies Cursor SDK tool replay the way a human sees it in pi's interactive TUI, without stealing macOS focus.
3
+ This workflow is the canonical repo path for verifying Cursor SDK tool replay the way a human sees it in pi's interactive TUI, without stealing macOS focus.
4
4
 
5
- Use it before accepting replay-card commits or PRs. Text logs and JSONL are necessary, but they are not enough when the claim is visual parity: always keep before/after PNGs for the exact prompt.
5
+ Use it before accepting replay-card commits or PRs, and for every Cursor provider/runtime release where TUI card/color behavior could regress. Text logs and JSONL are necessary, but they are not enough when the claim is visual parity: always keep PNGs for the exact prompt, and keep before/after PNGs when reviewing a rendering change.
6
+
7
+ Current cutover baseline: pi 0.76.0+, exact `@cursor/sdk@1.0.14`, local validation packages `@earendil-works/pi-ai`, `@earendil-works/pi-coding-agent`, and `@earendil-works/pi-tui` at 0.76.0.
8
+
9
+ ## Cursor SDK 1.0.14 / pi 0.76.0 cutover visual record
10
+
11
+ Record the required cutover validation here or in the final release handoff. The default matrix is native replay only: the runner forces native replay registration on, forces Cursor setting sources off, disables the pi bridge, disables overlapping built-in pi tool exposure, and clears inherited Cursor SDK event-debug artifact env. With `--event-debug`, debug capture writes to a deterministic directory under the visual output directory. Do not commit raw ANSI logs, screenshots, terminal recordings, debug artifacts, or `.debug/visual-smoke` scratch files.
12
+
13
+ | Field | Required value / evidence |
14
+ | --- | --- |
15
+ | Command/session used | `npm run smoke:visual -- --ext "$PWD" --cwd "$PWD" --mode plan --out-dir <fresh /tmp dir> --label <matrix label> --prompt <matrix prompt>` with default native-replay isolation |
16
+ | Baseline versions | `pi --version` = 0.76.0; `npm ls` = `@cursor/sdk@1.0.14` and local `@earendil-works/*@0.76.0` |
17
+ | Card categories checked | Claim only categories proven by both PNG and JSONL. Required cutover categories are read, grep/search, find/glob, list, shell success, write, edit/diff, and true read failure. Neutral Cursor plan/todo/task/mode activity is optional/opportunistic and only counts when JSONL contains a completed Cursor workflow event. |
18
+ | Observed status/card colors | Confirm native-looking cards use native pi styling; neutral Cursor activity is not red; true errors are distinct; diff previews show red/green; plan status is readable |
19
+ | Screenshot/ANSI evidence location | External path only, for example `/tmp/pi-cursor-sdk-1014-visual.*/read-package.{ansi,txt,html,png,jsonl.path}` |
20
+ | Debug artifact location | External `.debug/cursor-sdk-events/...` or temp artifact directory path only; do not commit raw artifacts |
21
+ | Pass/fail notes | Summarize any mismatch, blocker, or auth/environment limitation |
22
+
23
+ Required prompt matrix for this cutover:
24
+
25
+ | Label | Prompt | Required JSONL proof | Required visual proof |
26
+ | --- | --- | --- | --- |
27
+ | `read-package` | `Use only your file read tool. Read ./package.json and answer with only the package name. Do not use shell, grep, glob, find, or list tools.` | `toolCall.name=read`, `toolResult.toolName=read`, `isError=false` | Native-looking read card; collapsed label/path readable |
28
+ | `grep-readme` | `Use only your grep/search tool to search ./README.md for the literal string "pi-cursor-sdk". Do not use shell, read, glob, find, ls, or list tools. Report only the first matching file path.` | `toolCall.name=grep`, `toolResult.toolName=grep`, `isError=false` | Native-looking grep/search card; match preview readable |
29
+ | `find-readme` | `Use only your glob/file-search/find tool to find README.md from the repository root. Do not use shell, read, grep, ls, or list tools. Report matched paths exactly.` | `toolCall.name=find`, `toolResult.toolName=find`, `isError=false` | Native-looking find/glob card; matched path readable |
30
+ | `list-src` | `Use only your directory listing tool to list ./src. Do not use shell, read, grep, glob, or find tools. Report whether cursor-provider.ts is present.` | `toolCall.name=ls`, `toolResult.toolName=ls`, `isError=false` | Native-looking list card; directory/path readable |
31
+ | `shell-success` | `Use only your shell/terminal tool to run printf 'cursor visual smoke\\n'. Do not use read, grep, glob, find, ls, edit, or write. Report the output.` | `toolCall.name=bash`, `toolResult.toolName=bash`, `isError=false` | Shell success card is not red/error-styled; stdout readable |
32
+ | `write-file` | `Use your normal file write tool to create .debug/visual-smoke/cursor-mode.txt with exactly two lines: alpha and beta. Do not use shell.` | `toolCall.name=write`, `toolResult.toolName=write`, `isError=false` | Native-looking write card; path/content preview readable |
33
+ | `edit-file` | `Use your normal file edit/str-replace tool to change beta to gamma in .debug/visual-smoke/cursor-mode.txt. Do not use shell.` | `toolCall.name=edit`, `toolResult.toolName=edit`, `isError=false` | Native-looking edit card; diff preview shows red/green added/removed lines |
34
+ | `read-missing` | `Use only your file read tool to read .debug/visual-smoke/does-not-exist.txt. Then explain the result. Do not use shell, grep, glob, find, ls, edit, or write.` | `toolCall.name=read`, `toolResult.toolName=read`, `isError=true` | True failure is visible, bounded, and distinct from neutral Cursor activity |
35
+ | `workflow-activity` | `Stay in Cursor plan mode. If Cursor exposes plan, todo, task, or mode activity for this request, use that capability to outline a tiny unit test without editing files. Otherwise answer with a concise numbered plan. Do not use shell or file mutation tools.` | Optional: completed `cursor` activity whose details/source identify `createPlan`, `updateTodos`, `task`, or mode activity. If absent, record this category as not exercised. | Optional: neutral Cursor workflow activity is neutral, not red, and does not mutate pi plan/todo state. If absent, do not claim this visual category passed. |
36
+
37
+ Do not mark a category passed because the prompt was sent. A category passes only when the PNG shows the expected card and the JSONL shows the expected completed `toolCall` / `toolResult` pair. If Cursor chooses a different tool, rerun with a tighter prompt or record that the category was not exercised.
6
38
 
7
39
  ## When to use this
8
40
 
@@ -16,70 +48,70 @@ Use this workflow when changing or reviewing:
16
48
 
17
49
  Do not use this for ordinary unit-only logic changes.
18
50
 
19
- ## Why this workflow exists
51
+ ## Canonical visual inspection path
20
52
 
21
53
  Earlier manual verification used a visible Terminal window plus `screencapture`. That worked, but it stole system focus and made it easy for the user to type into the audit window by accident.
22
54
 
23
- The preferred workflow is now offscreen:
55
+ The canonical workflow is now offscreen and browser-rendered:
24
56
 
25
57
  1. Spawn `pi` in a pseudo-terminal at a fixed size.
26
58
  2. Feed the prompt programmatically.
27
- 3. Save raw ANSI output and plain text output.
28
- 4. Render the terminal buffer through xterm.js in headless Playwright.
29
- 5. Save a PNG screenshot.
59
+ 3. Save raw ANSI output and stripped plain text output.
60
+ 4. Render the terminal buffer through a browser-backed terminal renderer, preferably xterm.js.
61
+ 5. Save PNG screenshots with `agent_browser` when the harness is available, or Playwright directly when running outside that harness.
30
62
  6. Inspect the session JSONL for exact persisted `toolCall` / `toolResult` data.
31
63
 
32
- This gives human-like visual evidence without activating Terminal, iTerm, or a browser window.
64
+ This is the best default release path because it exercises the real pi TUI, captures card class/color/label/order/truncation issues before users see them, avoids desktop focus stealing, and leaves reviewable artifacts. Use visible Terminal/Ghostty screenshots only for terminal-specific or pixel-level bugs that cannot be judged through browser-rendered ANSI.
33
65
 
34
66
  ## Tool stack
35
67
 
36
- Install the harness outside this repo so generated assets and temporary dependencies do not pollute commits:
68
+ The canonical runner is checked in at `scripts/visual-tui-smoke.mjs` and exposed as `npm run smoke:visual`. It uses tmux for the fixed-size PTY, `@xterm/xterm` for browser rendering, and Playwright for automatic PNG capture. It resolves `pi` by directly walking the parent `PATH`, uses `process.execPath` for Node, and prepends that Node directory for prereq checks and tmux launches so `#!/usr/bin/env node` shims use the validated Node and a login shell or stale tmux server `PATH` cannot silently select a different executable.
69
+
70
+ One-time setup from a clean checkout:
37
71
 
38
72
  ```bash
39
- HARNESS=/tmp/pi-visual-harness
40
- rm -rf "$HARNESS"
41
- mkdir -p "$HARNESS"
42
- cd "$HARNESS"
43
- npm init -y
44
- npm install node-pty @xterm/xterm playwright
45
- npm rebuild node-pty
73
+ npm install
74
+ npx playwright install chromium
46
75
  ```
47
76
 
48
- `npm rebuild node-pty` is useful after Node upgrades; without it, `node-pty` may fail with `posix_spawnp failed`.
77
+ `npx playwright install chromium` is only needed for automatic PNG capture. When running inside the pi agent harness, `agent_browser` is the preferred screenshot tool for generated HTML/ANSI output because it can open local files, verify saved artifacts, and capture exact evidence paths; in that case, run `npm run smoke:visual -- --no-screenshot ...` and screenshot the generated `.html` with `agent_browser`. Outside the harness, use Playwright through the checked-in runner.
49
78
 
50
79
  ## Runner contract
51
80
 
52
- A runner script should:
53
-
54
- - Spawn `pi -e <extension-dir> --model cursor/composer-2.5` with:
55
- - `PI_CURSOR_NATIVE_TOOL_DISPLAY=1`
56
- - `TERM=xterm-256color`
57
- - fixed PTY size, for example `150x45`
58
- - cwd set to the target audit repo.
59
- - Wait for startup.
60
- - Write the exact prompt and carriage return to the PTY.
61
- - Wait a bounded amount of time.
62
- - Save:
63
- - `<label>.ansi` raw terminal bytes.
64
- - `<label>.txt` stripped text for quick search.
65
- - `<label>.png` rendered xterm screenshot.
66
- - `<label>.jsonl.path` pointing to the latest pi session JSONL.
67
- - Kill the PTY child after capture.
68
- - Check for leftover commands when prompts can background work, especially shell timeout tests.
69
-
70
- Example invocation shape:
81
+ `scripts/visual-tui-smoke.mjs` is the durable source of truth for this workflow. It must keep supporting:
82
+
83
+ - fixed-size tmux PTY execution of the parent-resolved `pi -e <extension-dir> --model cursor/composer-2.5`
84
+ - parent-resolved `pi` and `tmux` command paths reused in tmux-launched runs, with `process.execPath`'s directory prepended for prereq checks and tmux launches so Node shims use the validated Node
85
+ - `PI_CURSOR_NATIVE_TOOL_DISPLAY=1`
86
+ - `PI_CURSOR_REGISTER_NATIVE_TOOLS=1` by default
87
+ - `PI_CURSOR_SETTING_SOURCES=none` by default
88
+ - `PI_CURSOR_PI_TOOL_BRIDGE=0` by default
89
+ - `PI_CURSOR_EXPOSE_BUILTIN_TOOLS=0` by default
90
+ - Cursor SDK event-debug artifact env cleared before each run; `--event-debug` sets a deterministic debug directory under `--out-dir`
91
+ - `TERM=xterm-256color`
92
+ - cwd set to the target audit repo
93
+ - prompt paste plus carriage return into the interactive TUI
94
+ - bounded post-prompt wait via `--wait-ms`
95
+ - artifacts outside the repo by default
96
+ - `<label>.ansi`, `<label>.txt`, `<label>.html`, `<label>.png`, and `<label>.jsonl.path`
97
+ - `--label`, `--ext`, `--cwd`, `--prompt`, `--prompt-file`, `--wait-ms`, and `--out-dir`
98
+ - `--setting-sources` and `--bridge` opt-ins for non-default visual audits; `--expose-builtin-tools` is accepted only with `--bridge`
99
+ - repeatable `--leftover-pattern` checks for prompts that can background work
100
+ - `-h` / `--help` with examples and exit codes
101
+
102
+ Example invocation:
71
103
 
72
104
  ```bash
73
- node /tmp/pi-visual-harness/run-pi-visual.mjs \
74
- --label after-shell-nonzero \
75
- --ext /path/to/pi-cursor-sdk \
76
- --cwd /path/to/test-workspace \
77
- --prompt "Run \`printf 'cursor-shell-stderr\\n' >&2; exit 7\` using only the shell/terminal tool. Do not use read, grep, glob, find, ls, edit, or write. Print the command result exactly, then stop." \
78
- --wait-ms 30000 \
79
- --out-dir /tmp/pi-visual-harness/review-current
105
+ npm run smoke:visual -- \
106
+ --label shell-success \
107
+ --ext "$PWD" \
108
+ --cwd "$PWD" \
109
+ --prompt "Use only your shell/terminal tool to run printf 'cursor visual smoke\\n'. Do not use read, grep, glob, find, ls, edit, or write. Report the output." \
110
+ --wait-ms 60000 \
111
+ --out-dir /tmp/pi-cursor-sdk-visual-review
80
112
  ```
81
113
 
82
- Keep the runner in `/tmp` unless the project explicitly decides to check in a maintained audit harness.
114
+ The runner writes the `.png` through Playwright by default. In the pi agent harness, pass `--no-screenshot`, open the generated `.html` with `agent_browser`, save a PNG screenshot, and record that path beside the runner artifacts. The default evidence is native replay evidence only. For bridge/default-settings visual audits, pass `--bridge`, `--bridge --expose-builtin-tools`, or `--setting-sources <value>` explicitly and label that evidence separately.
83
115
 
84
116
  ## Before/after comparison
85
117
 
@@ -103,34 +135,35 @@ ln -s "$AFTER_WT/node_modules" "$BEFORE_WT/node_modules"
103
135
  Then run the same prompt against both extension dirs:
104
136
 
105
137
  ```bash
106
- node /tmp/pi-visual-harness/run-pi-visual.mjs \
138
+ npm run smoke:visual -- \
107
139
  --label before-glob-single \
108
140
  --ext "$BEFORE_WT" \
109
141
  --cwd "$TARGET" \
110
- --prompt "Find files matching \`src/tools/reindex.ts\` using only the glob/file-search tool. Do not use shell, bash, grep, read, or ls. Print the matched files exactly as found, then stop." \
142
+ --prompt "Use only your glob/file-search/find tool to find src/tools/reindex.ts. Do not use shell, bash, grep, read, ls, or list. Print the matched files exactly as found, then stop." \
111
143
  --wait-ms 16000 \
112
- --out-dir /tmp/pi-visual-harness/review-current
144
+ --out-dir /tmp/pi-cursor-sdk-visual-review-current
113
145
 
114
- node /tmp/pi-visual-harness/run-pi-visual.mjs \
146
+ npm run smoke:visual -- \
115
147
  --label after-glob-single \
116
148
  --ext "$AFTER_WT" \
117
149
  --cwd "$TARGET" \
118
- --prompt "Find files matching \`src/tools/reindex.ts\` using only the glob/file-search tool. Do not use shell, bash, grep, read, or ls. Print the matched files exactly as found, then stop." \
150
+ --prompt "Use only your glob/file-search/find tool to find src/tools/reindex.ts. Do not use shell, bash, grep, read, ls, or list. Print the matched files exactly as found, then stop." \
119
151
  --wait-ms 16000 \
120
- --out-dir /tmp/pi-visual-harness/review-current
152
+ --out-dir /tmp/pi-cursor-sdk-visual-review-current
121
153
  ```
122
154
 
123
- For review, create a simple HTML/PNG gallery that places `before-*.png` and `after-*.png` side by side. Keep the generated gallery in `/tmp` unless explicitly asked to commit visual artifacts.
155
+ For review, create a simple HTML/PNG gallery that places `before-*.png` and `after-*.png` side by side. Keep the generated gallery in `/tmp` unless explicitly asked to commit visual artifacts. In agent-harness runs, use `agent_browser` to open that gallery or the generated single-run HTML and save verified screenshots.
124
156
 
125
157
  ## JSONL inspection
126
158
 
127
159
  For each visual claim, inspect the JSONL path written by the runner. Confirm at least:
128
160
 
129
- - `toolCall.name` is the expected pi-facing replay tool name.
161
+ - `toolCall.name` matches the prompt matrix for the category being claimed.
130
162
  - `toolCall.arguments` show the expected user-facing args.
131
163
  - `toolResult.toolName` matches the call.
132
164
  - `toolResult.content[0].text` contains the recorded body expected in the card.
133
165
  - `toolResult.isError` matches the visual card state.
166
+ - The screenshot label and JSONL path are recorded together, so a card category cannot be claimed from a screenshot or JSONL alone.
134
167
 
135
168
  For local pi MCP bridge claims, also confirm:
136
169
 
@@ -143,7 +176,7 @@ Small helper pattern:
143
176
  ```bash
144
177
  python3 - <<'PY'
145
178
  import json, pathlib
146
- path = pathlib.Path('/tmp/pi-visual-harness/review-current/after-shell-nonzero.jsonl.path').read_text().strip()
179
+ path = pathlib.Path('/tmp/pi-cursor-sdk-visual-review-current/shell-success.jsonl.path').read_text().strip()
147
180
  for line in pathlib.Path(path).read_text().splitlines():
148
181
  obj = json.loads(line)
149
182
  msg = obj.get('message', {})
@@ -159,25 +192,37 @@ PY
159
192
 
160
193
  ## Safety rules
161
194
 
162
- - Prefer the offscreen PTY renderer. Do not use `osascript`, visible Terminal windows, or `screencapture` unless a user explicitly asks for a real desktop screenshot.
195
+ - Prefer the canonical offscreen PTY plus browser-rendered screenshot path. Do not use `osascript`, visible Terminal windows, or `screencapture` unless a user explicitly asks for a real desktop screenshot or the bug is terminal-specific.
163
196
  - Keep generated screenshots, HTML galleries, ANSI logs, and temporary harness dependencies out of the repo by default.
164
197
  - Use short, deterministic prompts with bounded wait times.
165
- - For timeout/background prompts, always check for leftovers:
198
+ - For timeout/background prompts, always check for leftovers, preferably with the runner's repeatable `--leftover-pattern` option:
199
+
200
+ ```bash
201
+ npm run smoke:visual -- \
202
+ --label shell-timeout \
203
+ --prompt 'Run sleep 30 && echo should-not-print using only the shell tool.' \
204
+ --leftover-pattern 'sleep 30|should-not-print'
205
+ ```
206
+
207
+ Manual fallback:
166
208
 
167
209
  ```bash
168
- ps -axo pid,etime,command | rg "sleep 2|should-not-print|<audit-session-label>" || true
210
+ ps -axo pid,etime,command | rg "sleep 30|should-not-print|<audit-session-label>" || true
169
211
  ```
170
212
 
171
213
  - If the model uses a different tool than requested, record it as model/provider behavior unless JSONL shows replay lost or misrendered a completed Cursor tool event.
172
- - Visual output can differ slightly from macOS Terminal fonts because xterm.js renders offscreen. Treat this workflow as evidence for card class, color state, labels, ordering, truncation, and content. Use a real terminal screenshot only for pixel-level terminal-specific bugs.
214
+ - Do not use `--bridge`, `--bridge --expose-builtin-tools`, or non-`none` `--setting-sources` for the default native replay matrix. Those opt-ins validate different surfaces and must be labeled separately.
215
+ - Visual output can differ slightly from macOS Terminal fonts because browser/xterm renderers run offscreen. Treat this workflow as authoritative release evidence for card class, color state, labels, ordering, truncation, footer/status readability, and content. Use a real terminal screenshot only for pixel-level terminal-specific bugs.
173
216
 
174
217
  ## Required evidence before commit or merge
175
218
 
176
219
  Before accepting a replay-card change, provide:
177
220
 
178
- - Before and after PNG paths.
221
+ - Browser-rendered PNG paths captured from offscreen ANSI output.
222
+ - Before and after PNG paths when comparing a rendering change.
179
223
  - The prompt used for each pair.
224
+ - ANSI/text/HTML paths when helpful for review.
180
225
  - JSONL paths for each run.
181
226
  - A short statement of what changed visually.
182
- - The relevant JSONL `toolCall` / `toolResult` facts.
227
+ - The relevant JSONL `toolCall` / `toolResult` facts, including expected tool name and `isError` state from the prompt matrix.
183
228
  - `npm test` and `npm run typecheck` results, unless the change is documentation-only.
@@ -4,6 +4,8 @@
4
4
 
5
5
  This document records maintainer testing lessons for `pi-cursor-sdk`. It complements unit tests and the [Cursor live smoke checklist](./cursor-live-smoke-checklist.md). Use it when adding regression coverage, debugging false-green releases, or building isolated smoke harnesses.
6
6
 
7
+ For a **minimal one-session dogfood pass** (baseline env, one native + one bridge call, JSONL ID patterns, bootstrap manifest, edit diff card), use the [Cursor dogfood checklist](./cursor-dogfood-checklist.md) before running the full live smoke matrix.
8
+
7
9
  ## Core lesson: integration-shaped bugs beat unit mocks
8
10
 
9
11
  The native replay `Tool grep not found` failure was integration-shaped, not unit-shaped:
@@ -236,7 +238,7 @@ The script writes timestamped artifacts under `--out` (default `/tmp/pi-cursor-s
236
238
 
237
239
  Stdout prints artifact paths and summary counts only. Raw payloads stay on disk and may contain local paths, project text, tool args/results, or secrets — do not commit or share them.
238
240
 
239
- Hard repo rule: Cursor SDK behavior claims must come from the installed `@cursor/sdk` package and/or https://cursor.com/docs/sdk/typescript, not from memory or ad-hoc probes alone.
241
+ Hard repo rule: Cursor SDK behavior claims must come from the installed `@cursor/sdk` package and/or https://cursor.com/docs/sdk/typescript, not from memory or ad-hoc probes alone. Current cutover validation targets exact `@cursor/sdk@1.0.14` and pi 0.76.0 local packages.
240
242
 
241
243
  ## Pi provider SDK event capture
242
244
 
@@ -313,7 +315,7 @@ Capture is file-only by default: no stderr markers, and bridge diagnostics durin
313
315
 
314
316
  ### Discarded incomplete SDK tool calls
315
317
 
316
- When Cursor emits `tool-call-started` without a matching completion/step result, the provider surfaces a bounded neutral **Cursor … did not complete** activity card or thinking trace at run end. pi bridge MCP calls (`pi__*`) are excluded because pi already shows the real pi tool execution path.
318
+ When Cursor emits `tool-call-started` without a matching completion/step result, the provider surfaces a bounded neutral **Cursor … did not complete** activity card or thinking trace at run end for failed/aborted runs, runs with no assistant text, and external/side-effectful tools. Incomplete fast local discovery starts (`read`, `grep`, `glob`, `ls`) are debug-only after a successful text-producing run so stale SDK start events do not create red post-answer cards. pi bridge MCP calls (`pi__*`) are excluded because pi already shows the real pi tool execution path.
317
319
 
318
320
  With `PI_CURSOR_SDK_EVENT_DEBUG=1`, each discarded started call is also recorded in `coordinator-events.jsonl` under phase `discarded-incomplete-started-tool-call` with:
319
321
 
@@ -321,7 +323,7 @@ With `PI_CURSOR_SDK_EVENT_DEBUG=1`, each discarded started call is also recorded
321
323
  - scrubbed call-id hash (raw call IDs are not written)
322
324
  - reason such as `no-completion-at-run-end`, `abort`, or `sdk-failure`
323
325
 
324
- Stderr output for these records requires `PI_CURSOR_SDK_EVENT_DEBUG_STDERR=1`. This complements the standalone `npm run debug:sdk-events` probe by interpreting a specific provider discard path during normal pi runs. User-visible incomplete cards explain the gap in the TUI; debug artifacts remain maintainer-only (**#52**).
326
+ Stderr output for these records requires `PI_CURSOR_SDK_EVENT_DEBUG_STDERR=1`. This complements the standalone `npm run debug:sdk-events` probe by interpreting a specific provider discard path during normal pi runs. User-visible incomplete cards explain actionable gaps in the TUI; debug artifacts remain maintainer-only (**#52**) and are the source of truth for suppressed fast-local stale starts.
325
327
 
326
328
  ## Tool calls listed as plain text (#40 triage)
327
329
 
@@ -340,7 +342,7 @@ Ask the reporter (or capture yourself) for:
340
342
  | `pi --version` and installed `pi-cursor-sdk` version | Confirms extension/runtime in use |
341
343
  | Model ID (for example `cursor/composer-2.5`) | Routing/replay behavior is model-scoped |
342
344
  | Exact repro prompt and prior turns | Multi-turn replay history affects prompt text |
343
- | Flags: `--cursor-no-fast`, `PI_CURSOR_PI_TOOL_BRIDGE`, `PI_CURSOR_EXPOSE_BUILTIN_TOOLS`, `PI_CURSOR_SETTING_SOURCES` | Bridge vs native-only vs narrowed settings |
345
+ | Flags: `--cursor-no-fast`, `PI_CURSOR_PI_TOOL_BRIDGE`, `PI_CURSOR_EXPOSE_BUILTIN_TOOLS`, `PI_CURSOR_SETTING_SOURCES`, `PI_CURSOR_TOOL_MANIFEST` | Bridge vs native-only vs narrowed settings; bootstrap callable-surface manifest |
344
346
  | Whether the listed names are `pi__*` bridge MCP, Cursor-native (`browser_navigate`, `WebSearch`), or `cursor-replay-*` replay IDs | Three different surfaces (see [Cursor native tool replay](./cursor-native-tool-replay.md#live-bridge-vs-replay)) |
345
347
  | Red toast / `errorMessage` text, if any | Distinguishes #55 failure surfacing from silent text echo |
346
348
  | Process exit / uncaught `ConnectError` / `ETIMEDOUT` stack trace, if any | Hard network crash (**#43**), not #40 model text echo |
@@ -425,4 +427,7 @@ rg '"type": "toolCall"|Tool call \(Cursor|cursor-replay-' "$SMOKE_DIR/session"/*
425
427
  - `scripts/validate-smoke-jsonl.mjs`
426
428
  - `scripts/debug-sdk-events.mjs`
427
429
  - `scripts/debug-provider-events.mjs`
428
- - `test/helpers/cursor-provider-harness.ts`controllable native replay pi mock (`createNativeToolDisplayPiForTest`)
430
+ - `shared/`runtime-safe ESM helpers consumed by provider `src/` and maintainer scripts (`cursor-sensitive-text.mjs`, `cursor-setting-sources.mjs`).
431
+ - `scripts/lib/` — maintainer plumbing (CLI arg parsing, secret-aware `fail()`, child-process shutdown, shell timeout/auth helpers). Re-exports `shared/` helpers so published smoke/debug scripts stay aligned with provider runtime (`test/maintainer-scripts-lib.test.ts`).
432
+ - `test/helpers/pi-harness.ts` — canonical fake pi/extension harness (`createPiHarness`, shared model/context/event helpers)
433
+ - `test/helpers/cursor-provider-harness.ts` — Cursor SDK provider mocks and stream helpers (re-exports pi-harness fixtures; `createNativeToolDisplayPiForTest` for native replay)
@@ -0,0 +1,69 @@
1
+ # Cursor tool surfaces in pi
2
+
3
+ pi-cursor-sdk runs Cursor models through the local `@cursor/sdk` agent runtime. A single pi session can expose **three related but different** tool namespaces. This page is the user-facing guide; maintainer replay details live in [Cursor native tool replay](./cursor-native-tool-replay.md).
4
+
5
+ ## The three surfaces
6
+
7
+ | Surface | Who owns it | Callable by Cursor? | What pi shows |
8
+ | --- | --- | --- | --- |
9
+ | **Cursor SDK host tools** | Cursor local agent | Yes | Native replay cards (`read`, `bash`, …) or neutral Cursor activity. Representative ToolType list: [SDK ToolType replay matrix](./cursor-native-tool-replay.md#sdk-tooltype-replay-matrix). |
10
+ | **Configured Cursor MCP** | Cursor settings / `~/.cursor/mcp.json` | Yes (when loaded) | Neutral **Cursor MCP** activity cards on replay |
11
+ | **Pi bridge (`pi__*`)** | pi-cursor-sdk loopback MCP | Yes, when exposed | Real pi tool names (`cursor_ask_question`, extension tools, …) |
12
+
13
+ **Not callable:** `cursor-replay-*` IDs in JSONL, pi history tool names used only for display, and transcript labels. Cursor must call exposed `pi__*` MCP names for bridged pi tools, not the pi card name.
14
+
15
+ ## Discoverability
16
+
17
+ - **MCP `listTools`** (and pi's MCP catalog when present) lists **MCP servers only** — for example `pi_tools` with `pi__cursor_ask_question`. It does **not** enumerate Cursor SDK host tools such as `Read` or `Shell`.
18
+ - **Bootstrap prompts** include a short **Cursor SDK tool boundary** block plus a compact **callable tool surfaces** manifest by default (disable manifest with `PI_CURSOR_TOOL_MANIFEST=0`). The manifest lists host-tool categories, bridge `pi__*` names for the current run, and a reminder that configured Cursor MCP servers appear at runtime via `listTools`. MCP `listTools` entries for bridged pi tools point back to the bootstrap prompt instead of repeating the full contract.
19
+ - **Incremental prompts** omit the full boundary block but keep a short tail guard (including an explicit shell `cd` hint); the session agent retains prior bootstrap context.
20
+ - **In-session debug:** `/cursor-tools` prints bridge enablement, manifest enablement, effective `PI_CURSOR_SETTING_SOURCES`, and the current callable-surface snapshot.
21
+
22
+ ## Pi bridge vs Cursor native
23
+
24
+ Default behavior:
25
+
26
+ - Cursor host tools handle files, shell, grep, edits, tasks, and Cursor-native MCP/plugins.
27
+ - The pi bridge exposes **active pi tools** as `pi__*` MCP names when `PI_CURSOR_PI_TOOL_BRIDGE` is enabled (default on).
28
+ - Overlapping pi builtins (`read`, `bash`, `write`, `edit`, `grep`, `find`, `ls`) are **hidden** from the bridge unless `PI_CURSOR_EXPOSE_BUILTIN_TOOLS=1`.
29
+
30
+ `pi-cursor-sdk` always registers `cursor_ask_question` for Cursor models when the bridge is on; Cursor sees `pi__cursor_ask_question`.
31
+
32
+ ```bash
33
+ # Disable pi bridge entirely
34
+ PI_CURSOR_PI_TOOL_BRIDGE=0 pi --model cursor/composer-2.5
35
+
36
+ # Expose overlapping pi builtins through the bridge
37
+ PI_CURSOR_EXPOSE_BUILTIN_TOOLS=1 pi --model cursor/composer-2.5
38
+
39
+ # Disable bootstrap tool manifest
40
+ PI_CURSOR_TOOL_MANIFEST=0 pi --model cursor/composer-2.5
41
+ ```
42
+
43
+ ## Cursor settings vs pi toggles
44
+
45
+ Disabling or removing an MCP server **only in pi** does not remove Cursor ambient MCP loaded from Cursor config.
46
+
47
+ | Control | Effect |
48
+ | --- | --- |
49
+ | `PI_CURSOR_SETTING_SOURCES=all` (default) | Loads user/project Cursor MCP, plugins, rules (`~/.cursor/mcp.json`, etc.) |
50
+ | `PI_CURSOR_SETTING_SOURCES=none` | Disables ambient Cursor setting sources for local agents |
51
+ | `PI_CURSOR_SETTING_SOURCES=project,plugins` | Narrows which layers load |
52
+ | Empty or edited `~/.cursor/mcp.json` | Changes which user MCP servers Cursor connects to |
53
+
54
+ To reproduce a **minimal** surface (pi-cursor-sdk + Cursor host only), use extension-only install, empty user MCP config, and `PI_CURSOR_SETTING_SOURCES=none` when you do not need Cursor rules/MCP from disk.
55
+
56
+ ## JSONL ID patterns (debugging)
57
+
58
+ | ID prefix | Meaning |
59
+ | --- | --- |
60
+ | `cursor-replay-*` | Display-only replay of Cursor SDK activity |
61
+ | `cursor-pi-bridge-run-*` | Live pi execution via bridge |
62
+
63
+ Example mistake: treating `cursor-replay-…` as a tool to invoke. Replay never re-runs work.
64
+
65
+ ## Related docs
66
+
67
+ - [README — Cursor provider tool contract](../README.md#cursor-provider-tool-contract)
68
+ - [Cursor native tool replay](./cursor-native-tool-replay.md)
69
+ - [Cursor model UX spec](./cursor-model-ux-spec.md)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-cursor-sdk",
3
- "version": "0.1.19",
3
+ "version": "0.1.21",
4
4
  "description": "pi provider extension backed by @cursor/sdk local agents",
5
5
  "author": "Mitch Fultz (https://github.com/fitchmultz)",
6
6
  "license": "MIT",
@@ -22,20 +22,39 @@
22
22
  },
23
23
  "homepage": "https://github.com/fitchmultz/pi-cursor-sdk#readme",
24
24
  "files": [
25
+ "shared",
25
26
  "src",
26
27
  "scripts/refresh-cursor-model-snapshots.mjs",
27
28
  "scripts/steering-rpc-smoke.mjs",
28
29
  "scripts/tmux-live-smoke.sh",
30
+ "scripts/visual-tui-smoke.mjs",
29
31
  "scripts/isolated-cursor-smoke.sh",
32
+ "scripts/fixtures/plan-strip-shim",
30
33
  "scripts/validate-smoke-jsonl.mjs",
34
+ "scripts/probe-mcp-coldstart.mjs",
31
35
  "scripts/debug-sdk-events.mjs",
36
+ "scripts/debug-sdk-events.d.mts",
32
37
  "scripts/debug-provider-events.mjs",
33
- "scripts/lib/cursor-probe-utils.mjs",
38
+ "scripts/debug-provider-events.d.mts",
39
+ "scripts/lib/cursor-cli-args.mjs",
40
+ "scripts/lib/cursor-cli-args.d.mts",
41
+ "scripts/lib/cursor-child-process.mjs",
42
+ "scripts/lib/cursor-child-process.d.mts",
43
+ "scripts/lib/cursor-script-fail.mjs",
44
+ "scripts/lib/cursor-script-fail.d.mts",
45
+ "scripts/lib/cursor-smoke-env.mjs",
46
+ "scripts/lib/cursor-smoke-env.d.mts",
47
+ "scripts/lib/cursor-smoke-shell.sh",
48
+ "scripts/lib/cursor-visual-render.mjs",
49
+ "scripts/lib/cursor-visual-render.d.mts",
34
50
  "scripts/lib/cursor-sdk-output-filter.mjs",
51
+ "scripts/lib/cursor-sdk-output-filter.d.mts",
35
52
  "README.md",
36
53
  "docs/cursor-model-ux-spec.md",
54
+ "docs/cursor-tool-surfaces.md",
37
55
  "docs/cursor-live-smoke-checklist.md",
38
56
  "docs/cursor-testing-lessons.md",
57
+ "docs/cursor-dogfood-checklist.md",
39
58
  "docs/cursor-native-tool-replay.md",
40
59
  "docs/cursor-native-tool-visual-audit.md",
41
60
  "LICENSE",
@@ -46,31 +65,38 @@
46
65
  "node": ">=22.19.0"
47
66
  },
48
67
  "scripts": {
49
- "typecheck": "tsc --noEmit",
68
+ "typecheck": "npm run typecheck:src && npm run typecheck:tests && npm run typecheck:replay-compile",
69
+ "typecheck:src": "tsc --noEmit",
70
+ "typecheck:tests": "tsc -p tsconfig.test.json --noEmit",
71
+ "typecheck:replay-compile": "tsc --noEmit -p test/tsconfig.json",
50
72
  "test": "vitest run",
51
73
  "test:watch": "vitest",
52
74
  "refresh:cursor-snapshots": "node scripts/refresh-cursor-model-snapshots.mjs",
53
75
  "smoke:live": "scripts/tmux-live-smoke.sh",
76
+ "smoke:visual": "node scripts/visual-tui-smoke.mjs",
54
77
  "smoke:isolated": "scripts/isolated-cursor-smoke.sh",
55
78
  "smoke:steering": "node scripts/steering-rpc-smoke.mjs",
56
79
  "smoke:jsonl": "node scripts/validate-smoke-jsonl.mjs",
57
80
  "debug:sdk-events": "node scripts/debug-sdk-events.mjs",
58
- "debug:provider-events": "node scripts/debug-provider-events.mjs"
81
+ "debug:provider-events": "node scripts/debug-provider-events.mjs",
82
+ "debug:mcp-coldstart": "node scripts/probe-mcp-coldstart.mjs"
59
83
  },
60
84
  "dependencies": {
61
- "@cursor/sdk": "^1.0.13",
85
+ "@cursor/sdk": "1.0.14",
62
86
  "@modelcontextprotocol/sdk": "^1.29.0"
63
87
  },
64
88
  "peerDependencies": {
65
- "@earendil-works/pi-ai": "*",
66
- "@earendil-works/pi-coding-agent": "*",
67
- "@earendil-works/pi-tui": "*",
89
+ "@earendil-works/pi-ai": ">=0.76.0",
90
+ "@earendil-works/pi-coding-agent": ">=0.76.0",
91
+ "@earendil-works/pi-tui": ">=0.76.0",
68
92
  "typebox": "*"
69
93
  },
70
94
  "devDependencies": {
71
- "@earendil-works/pi-ai": "^0.75.5",
72
- "@earendil-works/pi-coding-agent": "^0.75.5",
73
- "@earendil-works/pi-tui": "^0.75.5",
95
+ "@earendil-works/pi-ai": "0.76.0",
96
+ "@earendil-works/pi-coding-agent": "0.76.0",
97
+ "@earendil-works/pi-tui": "0.76.0",
98
+ "@xterm/xterm": "^6.0.0",
99
+ "playwright": "^1.60.0",
74
100
  "typebox": "^1.1.38",
75
101
  "typescript": "^6.0.3",
76
102
  "vitest": "^4.1.6"
@@ -0,0 +1,59 @@
1
+ export interface CursorDebugProviderEventsArgs {
2
+ cwd: string;
3
+ model: string;
4
+ prompt?: string;
5
+ promptFile?: string;
6
+ out?: string;
7
+ settingSources?: string[] | undefined;
8
+ sessionDir?: string;
9
+ apiKey?: string;
10
+ help: boolean;
11
+ }
12
+
13
+ export declare function parseDebugProviderEventsArgs(
14
+ argv: string[],
15
+ env?: NodeJS.ProcessEnv,
16
+ ): CursorDebugProviderEventsArgs;
17
+
18
+ export interface CursorPiSessionSnapshotState {
19
+ copied: boolean;
20
+ sessionFile?: string;
21
+ reason?: string;
22
+ recoveredAfterChildExit?: boolean;
23
+ }
24
+
25
+ export type CursorDebugCaptureCounts = Record<string, number | Record<string, number>>;
26
+
27
+ export interface CursorDebugCaptureSummary {
28
+ artifactDir: string;
29
+ sessionFile?: string;
30
+ counts: CursorDebugCaptureCounts;
31
+ piSessionSnapshot?: CursorPiSessionSnapshotState;
32
+ artifacts?: Record<string, string>;
33
+ elapsedMs?: number;
34
+ waitResultRecorded?: boolean;
35
+ }
36
+
37
+ export interface CursorDebugProviderEventsRunSummary {
38
+ artifactDir: string;
39
+ artifacts: Record<string, string>;
40
+ counts: CursorDebugCaptureCounts;
41
+ elapsedMs: number;
42
+ model: string;
43
+ cwd: string;
44
+ sessionDir: string;
45
+ extensionVersion: string;
46
+ sdkVersion: string;
47
+ waitResultRecorded: boolean;
48
+ }
49
+
50
+ export declare function backfillPiSessionSnapshot(
51
+ captureSummary: CursorDebugCaptureSummary | undefined,
52
+ artifactDir: string,
53
+ sessionDir: string,
54
+ ): CursorDebugCaptureSummary | undefined;
55
+
56
+ export declare function runDebugProviderEvents(
57
+ args: CursorDebugProviderEventsArgs,
58
+ env?: NodeJS.ProcessEnv,
59
+ ): Promise<CursorDebugProviderEventsRunSummary>;