sunpeak 0.20.2 → 0.20.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +59 -89
  2. package/bin/commands/inspect.mjs +97 -2
  3. package/bin/commands/new.mjs +33 -9
  4. package/bin/commands/test-init.mjs +25 -23
  5. package/bin/sunpeak.js +16 -15
  6. package/dist/chatgpt/index.cjs +1 -1
  7. package/dist/chatgpt/index.js +1 -1
  8. package/dist/claude/index.cjs +1 -1
  9. package/dist/claude/index.js +1 -1
  10. package/dist/host/chatgpt/index.cjs +1 -1
  11. package/dist/host/chatgpt/index.js +1 -1
  12. package/dist/index.cjs +3 -3
  13. package/dist/index.cjs.map +1 -1
  14. package/dist/index.js +3 -3
  15. package/dist/index.js.map +1 -1
  16. package/dist/inspector/index.cjs +1 -1
  17. package/dist/inspector/index.js +1 -1
  18. package/dist/{inspector-BBDa5yCm.js → inspector-60Na_Zc4.js} +2 -2
  19. package/dist/inspector-60Na_Zc4.js.map +1 -0
  20. package/dist/{inspector-DAA1Wiyh.cjs → inspector-D0qOqYX2.cjs} +2 -2
  21. package/dist/{inspector-BBDa5yCm.js.map → inspector-D0qOqYX2.cjs.map} +1 -1
  22. package/dist/mcp/index.cjs +1 -1
  23. package/dist/mcp/index.cjs.map +1 -1
  24. package/dist/mcp/index.js +1 -1
  25. package/dist/mcp/index.js.map +1 -1
  26. package/dist/{use-app-DPkj5Jp_.cjs → use-app-B33mckz4.cjs} +7 -3
  27. package/dist/use-app-B33mckz4.cjs.map +1 -0
  28. package/dist/{use-app-Cr0auUa1.js → use-app-kv5GQr0G.js} +7 -3
  29. package/dist/use-app-kv5GQr0G.js.map +1 -0
  30. package/package.json +3 -3
  31. package/template/README.md +21 -23
  32. package/template/dist/albums/albums.html +1 -1
  33. package/template/dist/albums/albums.json +1 -1
  34. package/template/dist/carousel/carousel.html +1 -1
  35. package/template/dist/carousel/carousel.json +1 -1
  36. package/template/dist/map/map.html +1 -1
  37. package/template/dist/map/map.json +1 -1
  38. package/template/dist/review/review.html +1 -1
  39. package/template/dist/review/review.json +1 -1
  40. package/template/node_modules/.vite/deps/_metadata.json +3 -3
  41. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps.js +6 -2
  42. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps.js.map +1 -1
  43. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_app-bridge.js +1 -1
  44. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_app-bridge.js.map +1 -1
  45. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_react.js +6 -2
  46. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_react.js.map +1 -1
  47. package/template/node_modules/.vite-mcp/deps/_metadata.json +22 -22
  48. package/template/package.json +2 -1
  49. package/template/tests/e2e/visual.spec.ts +2 -2
  50. package/dist/inspector-DAA1Wiyh.cjs.map +0 -1
  51. package/dist/use-app-Cr0auUa1.js.map +0 -1
  52. package/dist/use-app-DPkj5Jp_.cjs.map +0 -1
package/README.md CHANGED
@@ -16,7 +16,13 @@
16
16
  [![TypeScript](https://img.shields.io/badge/TypeScript-5.9-blue?style=flat&logo=typescript&label=ts&color=FFB800&logoColor=white&labelColor=000035)](https://www.typescriptlang.org/)
17
17
  [![React](https://img.shields.io/badge/React-19-blue?style=flat&logo=react&label=react&color=FFB800&logoColor=white&labelColor=000035)](https://reactjs.org/)
18
18
 
19
- Inspector, testing framework, and runtime framework for MCP servers and MCP Apps.
19
+ MCP App framework, MCP testing framework, and inspector for MCP servers and MCP Apps.
20
+
21
+ Build cross-platform: sunpeak is a ChatGPT App framework, Claude Connector framework, and more.
22
+
23
+ ```bash
24
+ npx sunpeak new
25
+ ```
20
26
 
21
27
  [Demo (Hosted)](https://sunpeak.ai/inspector) ~
22
28
  [Demo (Video)](https://cdn.sunpeak.ai/sunpeak-demo-prod.mp4) ~
@@ -26,126 +32,89 @@ Inspector, testing framework, and runtime framework for MCP servers and MCP Apps
26
32
 
27
33
  ## sunpeak is three things
28
34
 
29
- ### 1. Inspector
35
+ ### 1. App Framework
36
+
37
+ Building an MCP App from scratch means wiring up an MCP server, handling protocol message routing, managing resource HTML bundles, and setting up a dev environment with hot reload. Each host has different capabilities and CSS variables, so you end up writing platform-specific code without a clear structure.
30
38
 
31
- Manually test any MCP server in replicated ChatGPT and Claude runtimes.
39
+ sunpeak gives you a convention-over-configuration framework with the inspector and testing built in.
32
40
 
33
41
  ```bash
34
- sunpeak inspect --server http://localhost:8000/mcp
42
+ npx sunpeak new
35
43
  ```
36
44
 
37
- <div align="center">
38
- <a href="https://sunpeak.ai/docs/mcp-apps-inspector">
39
- <picture>
40
- <img alt="Inspector" src="https://cdn.sunpeak.ai/chatgpt-simulator.png">
41
- </picture>
42
- </a>
43
- </div>
44
-
45
- - Multi-host inspector replicating ChatGPT and Claude runtimes
46
- - Toggle themes, display modes, device types from the sidebar or URL params
47
- - Call real tool handlers or use simulation fixtures for mock data
45
+ This creates a project, starts a dev server with HMR, and opens the inspector at `localhost:3000`:
48
46
 
49
- ### 2. Testing Framework
47
+ ```
48
+ sunpeak-app/
49
+ ├── src/resources/review/review.tsx # UI component (React)
50
+ ├── src/tools/review-diff.ts # Tool handler, schema, resource link
51
+ ├── tests/simulations/review-diff.json # Mock data for the inspector
52
+ └── package.json
53
+ ```
50
54
 
51
- Automatically test any MCP server against replicated ChatGPT and Claude runtimes.
55
+ Tools, resources, and simulations are auto-discovered from the file system. Multi-platform React hooks (`useToolData`, `useAppState`, `useTheme`, `useDisplayMode`) let you write your app logic once and deploy it across ChatGPT, Claude, and future hosts.
52
56
 
53
- ```ts
54
- import { test, expect } from 'sunpeak/test';
57
+ [App framework documentation →](https://sunpeak.ai/docs/mcp-apps-framework)
55
58
 
56
- test('review tool renders title', async ({ inspector }) => {
57
- const result = await inspector.renderTool('review-diff');
58
- const app = result.app();
59
- await expect(app.locator('h1:has-text("Refactor")')).toBeVisible();
60
- });
61
- ```
59
+ ---
62
60
 
63
- - **Works for any MCP server**: `sunpeak test init` scaffolds tests for Python, Go, TS, or any language
64
- - **MCP-native assertions**: `toBeError()`, `toHaveTextContent()`, `toHaveStructuredContent()`
65
- - **Multi-host**: Tests run against ChatGPT and Claude hosts automatically
66
- - **Live tests**: Automated browser tests against real ChatGPT via `sunpeak/test/live`
67
- - **Evals**: Test your tool interface design against multiple LLMs (GPT-4o, Claude, Gemini, etc.) via `sunpeak/eval`
61
+ ### 2. Testing Framework
68
62
 
69
- ### 3. App Framework
63
+ MCP Apps render inside host iframes with host-specific themes, display modes, and capabilities. Standard browser testing can't replicate this because the runtime environment only exists inside ChatGPT and Claude. Each app also has many dimensions of state: tool inputs, tool results, server tool responses, host context, and display configuration. Testing all combinations manually is slow and error-prone.
70
64
 
71
- Next.js for MCP Apps. Convention-over-configuration project structure with the inspector and testing built in.
65
+ sunpeak replicates these host runtimes and provides simulation fixtures (JSON files that define reproducible tool states) so you can test every combination of host, theme, and data in CI without accounts or API credits.
72
66
 
73
67
  ```bash
74
- sunpeak-app/
75
- ├── src/
76
- │ ├── resources/
77
- │ │ └── review/
78
- │ │ └── review.tsx # Review UI component + resource metadata.
79
- │ ├── tools/
80
- │ │ ├── review-diff.ts # Tool with handler, schema, and optional resource link.
81
- │ │ ├── review-post.ts # Multiple tools can share one resource.
82
- │ │ └── review.ts # Backend-only tool (no resource, no UI).
83
- │ └── server.ts # Optional: auth, server config.
84
- ├── tests/simulations/
85
- │ ├── review-diff.json # Mock state for testing (includes serverTools).
86
- │ ├── review-post.json # Mock state for testing (includes serverTools).
87
- │ └── review-purchase.json # Mock state for testing (includes serverTools).
88
- └── package.json
68
+ npx sunpeak test init --server http://localhost:8000/mcp
89
69
  ```
90
70
 
91
- - **Runtime APIs**: Strongly typed React hooks (`useToolData`, `useAppState`, `useHostContext`, etc.)
92
- - **Convention over configuration**: Resources, tools, and simulations are auto-discovered
93
- - **Multi-platform**: Build once, deploy to ChatGPT, Claude, and future hosts
94
-
95
- ## Quickstart
96
-
97
- Requirements: Node (20+), pnpm (10+)
71
+ This scaffolds E2E tests, visual regression, live host tests, and multi-model evals. Then run them:
98
72
 
99
73
  ```bash
100
- pnpm add -g sunpeak
101
- sunpeak new
74
+ npx sunpeak test
102
75
  ```
103
76
 
104
- ## CLI
77
+ Playwright fixtures handle inspector startup, MCP connection, iframe traversal, and host switching. Works with Python, Go, TypeScript, Rust, or any language.
105
78
 
106
- **Testing** (works with any MCP server):
79
+ ```ts
80
+ import { test, expect } from 'sunpeak/test';
107
81
 
108
- | Command | Description |
109
- | ------------------------------------- | ------------------------------------------- |
110
- | `sunpeak inspect --server <url\|cmd>` | Inspect any MCP server in the inspector |
111
- | `sunpeak test` | Run unit + e2e tests |
112
- | `sunpeak test --unit` | Run unit tests only (Vitest) |
113
- | `sunpeak test --e2e` | Run e2e tests only (Playwright) |
114
- | `sunpeak test --visual` | Run e2e tests with visual regression |
115
- | `sunpeak test --visual --update` | Update visual regression baselines |
116
- | `sunpeak test --live` | Run live tests against real hosts |
117
- | `sunpeak test --eval` | Run evals against multiple LLM models |
118
- | `sunpeak test init` | Scaffold test infrastructure into a project |
82
+ test('search tool returns results', async ({ mcp }) => {
83
+ const result = await mcp.callTool('search', { query: 'headphones' });
84
+ expect(result.isError).toBeFalsy();
85
+ });
119
86
 
120
- **App framework** (for sunpeak projects):
87
+ test('album cards render', async ({ inspector }) => {
88
+ const result = await inspector.renderTool('show-albums');
89
+ await expect(result.app().locator('button:has-text("Summer Slice")')).toBeVisible();
90
+ });
91
+ ```
121
92
 
122
- | Command | Description |
123
- | -------------------------------- | ------------------------------------------- |
124
- | `sunpeak new [name] [resources]` | Create a new project |
125
- | `sunpeak dev` | Start dev server + inspector + MCP endpoint |
126
- | `sunpeak build` | Build resources + tools for production |
127
- | `sunpeak start` | Start production MCP server |
128
- | `sunpeak upgrade` | Upgrade sunpeak to latest version |
93
+ [Testing documentation →](https://sunpeak.ai/docs/testing/overview)
129
94
 
130
- ## Coding Agent Skills
95
+ ---
131
96
 
132
- Install the sunpeak skills to give your coding agent (Claude Code, Cursor, etc.) built-in knowledge of sunpeak patterns, hooks, and testing:
97
+ ### 3. Inspector
98
+
99
+ MCP servers are opaque. You can call tools and read the JSON responses, but you can't see how your app actually looks and behaves inside ChatGPT or Claude without deploying to each host, setting up a tunnel, paying for accounts, and manually refreshing through a multi-step cycle on every code change.
100
+
101
+ The sunpeak inspector replicates the ChatGPT and Claude app runtimes locally. Point it at any MCP server and see your tools and resources rendered the same way they appear in production hosts.
133
102
 
134
103
  ```bash
135
- pnpm dlx skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server
104
+ npx sunpeak inspect --server http://localhost:8000/mcp
136
105
  ```
137
106
 
138
- ## Troubleshooting
139
-
140
- If your app doesn't render in ChatGPT or Claude:
107
+ <div align="center">
108
+ <a href="https://sunpeak.ai/docs/mcp-apps-inspector">
109
+ <picture>
110
+ <img alt="Inspector" src="https://cdn.sunpeak.ai/chatgpt-simulator.png">
111
+ </picture>
112
+ </a>
113
+ </div>
141
114
 
142
- 1. **Check your tunnel** is running and pointing to the correct port
143
- 2. **Restart `sunpeak dev`** to clear stale connections
144
- 3. **Refresh or re-add the MCP server** in the host's settings (Settings > MCP Servers)
145
- 4. **Hard refresh** the host page (`Cmd+Shift+R` / `Ctrl+Shift+R`)
146
- 5. **Open a new chat** in the host (cached iframes persist per-conversation)
115
+ Toggle between hosts, themes, display modes, and device types from the sidebar. Call real tool handlers or load simulation fixtures for deterministic mock data. Changes reflect instantly via HMR. Works with any MCP server in any language.
147
116
 
148
- Full guide: [sunpeak.ai/docs/app-framework/guides/troubleshooting](https://sunpeak.ai/docs/app-framework/guides/troubleshooting)
117
+ [Inspector documentation ](https://sunpeak.ai/docs/mcp-apps-inspector)
149
118
 
150
119
  ## Resources
151
120
 
@@ -153,3 +122,4 @@ Full guide: [sunpeak.ai/docs/app-framework/guides/troubleshooting](https://sunpe
153
122
  - [MCP Overview](https://sunpeak.ai/docs/mcp-apps/mcp/overview) · [Tools](https://sunpeak.ai/docs/mcp-apps/mcp/tools) · [Resources](https://sunpeak.ai/docs/mcp-apps/mcp/resources)
154
123
  - [MCP Apps SDK](https://github.com/modelcontextprotocol/ext-apps)
155
124
  - [ChatGPT Apps SDK Design Guidelines](https://developers.openai.com/apps-sdk/concepts/design-guidelines)
125
+ - [Troubleshooting](https://sunpeak.ai/docs/app-framework/guides/troubleshooting)
@@ -667,6 +667,45 @@ root.render(
667
667
  * @param {{ callToolDirect?: (name: string, args: Record<string, unknown>) => Promise<object>, simulationsDir?: string | null }} [pluginOpts]
668
668
  */
669
669
  function sunpeakInspectEndpointsPlugin(getClient, setClient, pluginOpts = {}) {
670
+ // Server URL and options for automatic session recovery.
671
+ // Set by inspectServer() after creating the initial connection.
672
+ let _serverUrl = '';
673
+ /** @type {Record<string, unknown>} */
674
+ let _connectionOpts = {};
675
+
676
+ /**
677
+ * Check if an error is a dead-session error (MCP server no longer recognizes
678
+ * the session ID). This happens when the MCP server restarts, the session
679
+ * times out, or the connection is interrupted.
680
+ * @param {Error} err
681
+ */
682
+ function isDeadSession(err) {
683
+ const msg = err?.message ?? '';
684
+ return msg.includes('Unknown session') || msg.includes('404') || msg.includes('fetch failed');
685
+ }
686
+
687
+ /**
688
+ * Attempt to reconnect to the MCP server and replace the current client.
689
+ * Returns true if reconnection succeeded.
690
+ */
691
+ async function tryReconnect() {
692
+ if (!_serverUrl) return false;
693
+ try {
694
+ console.warn(`[inspect] MCP session lost, reconnecting to ${_serverUrl}...`);
695
+ const newConn = await createMcpConnection(_serverUrl, _connectionOpts);
696
+ setClient(newConn.client);
697
+ console.log('[inspect] MCP session re-established');
698
+ return true;
699
+ } catch (err) {
700
+ console.error(`[inspect] MCP reconnection failed: ${err?.message ?? err}`);
701
+ return false;
702
+ }
703
+ }
704
+
705
+ // Initialize reconnection state from plugin options.
706
+ if (pluginOpts.serverUrl) _serverUrl = pluginOpts.serverUrl;
707
+ if (pluginOpts.connectionOpts) _connectionOpts = pluginOpts.connectionOpts;
708
+
670
709
  // In-memory OAuth state keyed by server URL, persisted across reconnects.
671
710
  /** @type {Map<string, { provider: any, getAuthUrl: () => URL | undefined, hasTokens: () => boolean, stateParam: string }>} */
672
711
  const oauthProviders = new Map();
@@ -680,7 +719,7 @@ function sunpeakInspectEndpointsPlugin(getClient, setClient, pluginOpts = {}) {
680
719
  return {
681
720
  name: 'sunpeak-inspect-endpoints',
682
721
  configureServer(server) {
683
- // List tools from connected server
722
+ // List tools from connected server (with automatic session recovery)
684
723
  server.middlewares.use('/__sunpeak/list-tools', async (_req, res) => {
685
724
  try {
686
725
  const client = getClient();
@@ -688,6 +727,15 @@ function sunpeakInspectEndpointsPlugin(getClient, setClient, pluginOpts = {}) {
688
727
  res.writeHead(200, { 'Content-Type': 'application/json' });
689
728
  res.end(JSON.stringify(result));
690
729
  } catch (err) {
730
+ // If the session died (server restarted, timeout, etc.), try to reconnect once.
731
+ if (isDeadSession(err) && await tryReconnect()) {
732
+ try {
733
+ const result = await getClient().listTools();
734
+ res.writeHead(200, { 'Content-Type': 'application/json' });
735
+ res.end(JSON.stringify(result));
736
+ return;
737
+ } catch { /* fall through to error response */ }
738
+ }
691
739
  res.writeHead(500, { 'Content-Type': 'application/json' });
692
740
  res.end(JSON.stringify({ error: err.message }));
693
741
  }
@@ -732,6 +780,16 @@ function sunpeakInspectEndpointsPlugin(getClient, setClient, pluginOpts = {}) {
732
780
  res.writeHead(200, { 'Content-Type': 'application/json' });
733
781
  res.end(JSON.stringify(result));
734
782
  } catch (err) {
783
+ // Try reconnecting on dead session before returning error
784
+ if (isDeadSession(err) && await tryReconnect()) {
785
+ try {
786
+ const { name, arguments: args } = parsed;
787
+ const result = await getClient().callTool({ name, arguments: args });
788
+ res.writeHead(200, { 'Content-Type': 'application/json' });
789
+ res.end(JSON.stringify(result));
790
+ return;
791
+ } catch { /* fall through */ }
792
+ }
735
793
  res.writeHead(200, { 'Content-Type': 'application/json' });
736
794
  res.end(
737
795
  JSON.stringify({
@@ -1171,6 +1229,22 @@ function sunpeakInspectEndpointsPlugin(getClient, setClient, pluginOpts = {}) {
1171
1229
  res.end('');
1172
1230
  }
1173
1231
  } catch (err) {
1232
+ // Try reconnecting on dead session before returning error
1233
+ if (isDeadSession(err) && await tryReconnect()) {
1234
+ try {
1235
+ const retryResult = await getClient().readResource({ uri });
1236
+ const retryContent = retryResult.contents?.[0];
1237
+ if (retryContent) {
1238
+ const mimeType = retryContent.mimeType || 'text/html';
1239
+ res.writeHead(200, {
1240
+ 'Content-Type': `${mimeType}; charset=utf-8`,
1241
+ 'X-Content-Type-Options': 'nosniff',
1242
+ });
1243
+ res.end(typeof retryContent.text === 'string' ? retryContent.text : '');
1244
+ return;
1245
+ }
1246
+ } catch { /* fall through */ }
1247
+ }
1174
1248
  res.writeHead(500, { 'Content-Type': 'text/plain' });
1175
1249
  res.end(`Error reading resource: ${err.message}`);
1176
1250
  }
@@ -1308,6 +1382,23 @@ export async function inspectServer(opts) {
1308
1382
 
1309
1383
  console.log('Connected. Discovering tools and resources...');
1310
1384
 
1385
+ // Monitor transport health. The MCP SDK opens a background SSE stream after
1386
+ // initialization. If this stream drops, the server may purge the session,
1387
+ // causing "Unknown session" errors on subsequent requests. Log lifecycle
1388
+ // events so we can diagnose connection issues when they occur.
1389
+ if (mcpConnection.transport) {
1390
+ const origOnError = mcpConnection.transport.onerror;
1391
+ mcpConnection.transport.onerror = (err) => {
1392
+ console.warn(`[inspect] MCP transport error: ${err?.message ?? err}`);
1393
+ origOnError?.(err);
1394
+ };
1395
+ const origOnClose = mcpConnection.transport.onclose;
1396
+ mcpConnection.transport.onclose = () => {
1397
+ console.warn('[inspect] MCP transport closed (session may be lost)');
1398
+ origOnClose?.();
1399
+ };
1400
+ }
1401
+
1311
1402
  // Extract app name and icon from server info (reported during MCP initialize)
1312
1403
  const serverInfo = mcpConnection.client.getServerVersion();
1313
1404
  const serverAppName = nameOverride ?? serverInfo?.name;
@@ -1387,7 +1478,7 @@ export async function inspectServer(opts) {
1387
1478
  sunpeakInspectEndpointsPlugin(
1388
1479
  () => mcpConnection.client,
1389
1480
  (newClient) => { mcpConnection.client = newClient; },
1390
- { callToolDirect: opts.callToolDirect, simulationsDir }
1481
+ { callToolDirect: opts.callToolDirect, simulationsDir, serverUrl: resolvedServerUrl, connectionOpts }
1391
1482
  ),
1392
1483
  // Serve /dist/{name}/{name}.html from the project directory (for Prod Resources mode).
1393
1484
  // The Inspector polls these paths via HEAD to check if built resources exist.
@@ -1476,6 +1567,10 @@ export async function inspectServer(opts) {
1476
1567
  // Without this, Vite defaults to localhost which may resolve to IPv6-only
1477
1568
  // (::1) on macOS, causing ECONNREFUSED for IPv4 clients.
1478
1569
  host: '0.0.0.0',
1570
+ // Allow any hostname so the inspector works behind tunnels, in containers,
1571
+ // and with custom /etc/hosts entries. Without this, Vite 8's DNS rebinding
1572
+ // protection blocks requests whose Host header isn't localhost/127.0.0.1.
1573
+ allowedHosts: 'all',
1479
1574
  open: open ?? (!process.env.CI && !process.env.SUNPEAK_LIVE_TEST),
1480
1575
  },
1481
1576
  optimizeDeps: {
@@ -299,6 +299,28 @@ export async function init(projectName, resourcesArg, deps = defaultDeps) {
299
299
 
300
300
  // Install dependencies with spinner
301
301
  const pm = d.detectPackageManager();
302
+
303
+ // Replace package manager references in README
304
+ if (pm !== 'pnpm') {
305
+ const readmePath = join(targetDir, 'README.md');
306
+ if (d.existsSync(readmePath)) {
307
+ const run = pm === 'npm' ? 'npm run' : pm;
308
+ const dlx = pm === 'npm' ? 'npx' : 'yarn dlx';
309
+ let readme = d.readFileSync(readmePath, 'utf-8');
310
+ readme = readme.replace(/pnpm dev\b/g, `${run} dev`);
311
+ readme = readme.replace(/pnpm build\b/g, `${run} build`);
312
+ readme = readme.replace(/pnpm start\b/g, `${run} start`);
313
+ readme = readme.replace(/pnpm test\b/g, `${run} test`);
314
+ readme = readme.replace(/pnpm test:unit\b/g, `${run} test:unit`);
315
+ readme = readme.replace(/pnpm test:e2e\b/g, `${run} test:e2e`);
316
+ readme = readme.replace(/pnpm test:visual\b/g, `${run} test:visual`);
317
+ readme = readme.replace(/pnpm test:live\b/g, `${run} test:live`);
318
+ readme = readme.replace(/pnpm test:eval\b/g, `${run} test:eval`);
319
+ readme = readme.replace(/pnpm add\b/g, pm === 'npm' ? 'npm install' : `${pm} add`);
320
+ readme = readme.replace(/pnpm dlx\b/g, dlx);
321
+ d.writeFileSync(readmePath, readme);
322
+ }
323
+ }
302
324
  const s = d.spinner();
303
325
  s.start(`Installing dependencies with ${pm}...`);
304
326
 
@@ -366,30 +388,32 @@ export async function init(projectName, resourcesArg, deps = defaultDeps) {
366
388
  initialValue: true,
367
389
  });
368
390
  if (!clack.isCancel(installSkill) && installSkill) {
391
+ const dlx = pm === 'yarn' ? 'yarn dlx' : pm === 'npm' ? 'npx' : 'pnpm dlx';
369
392
  try {
370
- d.execSync('pnpm dlx skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server', {
393
+ d.execSync(`${dlx} skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server`, {
371
394
  cwd: targetDir,
372
395
  stdio: 'inherit',
373
396
  });
374
397
  } catch {
375
- d.console.log('Skill install skipped. You can install later with: pnpm dlx skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server');
398
+ d.console.log(`Skill install skipped. You can install later with: ${dlx} skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server`);
376
399
  }
377
400
  }
378
401
  }
379
402
 
403
+ const run = pm === 'npm' ? 'npm run' : pm;
380
404
  d.outro(`Done! To get started:
381
405
 
382
406
  cd ${projectName}
383
- sunpeak dev
407
+ ${run} dev
384
408
 
385
409
  Your project commands:
386
410
 
387
- sunpeak dev # Start dev server + MCP endpoint
388
- sunpeak build # Build for production
389
- sunpeak test # Run unit + e2e tests
390
- sunpeak test --eval # Run LLM evals (configure models in tests/evals/eval.config.ts)
391
- sunpeak test --visual # Run visual regression tests
392
- sunpeak test --live # Run live tests against real AI hosts`);
411
+ ${run} dev # Start dev server + MCP endpoint
412
+ ${run} build # Build for production
413
+ ${run} test # Run unit + e2e tests
414
+ ${run} test:eval # Run LLM evals (configure models in tests/evals/eval.config.ts)
415
+ ${run} test:visual # Run visual regression tests
416
+ ${run} test:live # Run live tests against real AI hosts`);
393
417
  }
394
418
 
395
419
  // Allow running directly
@@ -153,13 +153,15 @@ export async function testInit(args = [], deps = defaultDeps) {
153
153
  initialValue: true,
154
154
  });
155
155
  if (!d.isCancel(installSkill) && installSkill) {
156
+ const pm = d.detectPackageManager();
157
+ const dlx = pm === 'yarn' ? 'yarn dlx' : pm === 'npm' ? 'npx' : 'pnpm dlx';
156
158
  try {
157
- d.execSync('pnpm dlx skills add Sunpeak-AI/sunpeak@test-mcp-server', {
159
+ d.execSync(`${dlx} skills add Sunpeak-AI/sunpeak@test-mcp-server`, {
158
160
  cwd: d.cwd(),
159
161
  stdio: 'inherit',
160
162
  });
161
163
  } catch {
162
- d.log.info('Skill install skipped. Install later: pnpm dlx skills add Sunpeak-AI/sunpeak@test-mcp-server');
164
+ d.log.info(`Skill install skipped. Install later: ${dlx} skills add Sunpeak-AI/sunpeak@test-mcp-server`);
163
165
  }
164
166
  }
165
167
  }
@@ -357,7 +359,7 @@ function scaffoldEvals(evalsDir, { server, isSunpeak, d: deps } = {}) {
357
359
  * 2. Install the AI SDK and provider packages (e.g. pnpm add ai @ai-sdk/openai)
358
360
  * 3. Copy .env.example to .env and add your API keys
359
361
  * 4. Replace this file with evals for your own tools
360
- * 5. Run: sunpeak test --eval
362
+ * 5. Run: npx sunpeak test --eval
361
363
  *
362
364
  * Each case sends a prompt to every configured model and checks
363
365
  * that the model calls the expected tool with the expected arguments.
@@ -403,10 +405,10 @@ function scaffoldVisualTest(filePath, d) {
403
405
  /**
404
406
  * Visual regression tests — compare screenshots against saved baselines.
405
407
  *
406
- * Screenshots only run with: sunpeak test --visual
407
- * Update baselines with: sunpeak test --visual --update
408
+ * Screenshots only run with: npx sunpeak test --visual
409
+ * Update baselines with: npx sunpeak test --visual --update
408
410
  *
409
- * During normal \`sunpeak test\` runs, screenshot() calls are silently
411
+ * During normal \`npx sunpeak test\` runs, screenshot() calls are silently
410
412
  * skipped so these tests still pass without baselines.
411
413
  *
412
414
  * Uncomment the tests below and replace 'your-tool' with your tool name.
@@ -465,7 +467,7 @@ function scaffoldLiveTests(liveDir, { isSunpeak, server, d } = {}) {
465
467
  * Prerequisites:
466
468
  * 1. Your MCP server must be accessible via a public URL (e.g., ngrok tunnel)
467
469
  * 2. The server must be registered as an MCP action in the host
468
- * 3. Run: sunpeak test --live
470
+ * 3. Run: npx sunpeak test --live
469
471
  *
470
472
  * On first run, a browser window opens for you to log in to the host.
471
473
  * The session is saved for subsequent runs (typically lasts a few hours).
@@ -508,9 +510,9 @@ export default defineLiveConfig({${serverOption}
508
510
  * - live.setColorScheme('dark', app) — switch theme while app is visible
509
511
  * - live.page — the underlying Playwright page
510
512
  *
511
- * Run with: sunpeak test --live
513
+ * Run with: npx sunpeak test --live
512
514
  *
513
- * These tests are excluded from normal \`sunpeak test\` runs because
515
+ * These tests are excluded from normal \`npx sunpeak test\` runs because
514
516
  * they require host accounts and cost API credits.
515
517
  */
516
518
 
@@ -553,7 +555,7 @@ function scaffoldUnitTest(filePath, d) {
553
555
  * Import your tool handler directly and test its input/output
554
556
  * without starting the MCP server or inspector.
555
557
  *
556
- * Run with: sunpeak test --unit
558
+ * Run with: npx sunpeak test --unit
557
559
  *
558
560
  * To set up vitest, add it to your devDependencies:
559
561
  * npm install -D vitest
@@ -701,10 +703,10 @@ test('server exposes tools', async ({ mcp }) => {
701
703
  }
702
704
 
703
705
  d.log.step('Ready! Run tests with:');
704
- d.log.message(' sunpeak test # E2E tests');
705
- d.log.message(' sunpeak test --visual # Visual regression (generates baselines on first run)');
706
- d.log.message(' sunpeak test --live # Live tests against real hosts (requires login)');
707
- d.log.message(' sunpeak test --eval # Multi-model evals (configure models in evals/eval.config.ts)');
706
+ d.log.message(' npx sunpeak test # E2E tests');
707
+ d.log.message(' npx sunpeak test --visual # Visual regression (generates baselines on first run)');
708
+ d.log.message(' npx sunpeak test --live # Live tests against real hosts (requires login)');
709
+ d.log.message(' npx sunpeak test --eval # Multi-model evals (configure models in evals/eval.config.ts)');
708
710
  }
709
711
 
710
712
  async function initJsProject(cliServer, d) {
@@ -784,11 +786,11 @@ test('server exposes tools', async ({ mcp }) => {
784
786
  d.log.message(` ${pkgMgr} add -D sunpeak @playwright/test vitest`);
785
787
  d.log.message(` ${pkgMgr} exec playwright install chromium`);
786
788
  d.log.message('');
787
- d.log.message(' sunpeak test # E2E tests');
788
- d.log.message(' sunpeak test --unit # Unit tests (vitest)');
789
- d.log.message(' sunpeak test --visual # Visual regression');
790
- d.log.message(' sunpeak test --live # Live tests against real hosts');
791
- d.log.message(' sunpeak test --eval # Multi-model evals');
789
+ d.log.message(' npx sunpeak test # E2E tests');
790
+ d.log.message(' npx sunpeak test --unit # Unit tests (vitest)');
791
+ d.log.message(' npx sunpeak test --visual # Visual regression');
792
+ d.log.message(' npx sunpeak test --live # Live tests against real hosts');
793
+ d.log.message(' npx sunpeak test --eval # Multi-model evals');
792
794
  }
793
795
 
794
796
  async function initSunpeakProject(d) {
@@ -835,10 +837,10 @@ export default defineConfig();
835
837
  scaffoldUnitTest(join(cwd, 'tests', 'unit', 'example.test.ts'), d);
836
838
 
837
839
  d.log.step('Scaffolded test types:');
838
- d.log.message(' tests/e2e/visual.test.ts — Visual regression (sunpeak test --visual)');
839
- d.log.message(' tests/live/ — Live host tests (sunpeak test --live)');
840
- d.log.message(' tests/evals/ — Multi-model evals (sunpeak test --eval)');
841
- d.log.message(' tests/unit/example.test.ts — Unit tests (sunpeak test --unit)');
840
+ d.log.message(' tests/e2e/visual.test.ts — Visual regression (npx sunpeak test --visual)');
841
+ d.log.message(' tests/live/ — Live host tests (npx sunpeak test --live)');
842
+ d.log.message(' tests/evals/ — Multi-model evals (npx sunpeak test --eval)');
843
+ d.log.message(' tests/unit/example.test.ts — Unit tests (npx sunpeak test --unit)');
842
844
  d.log.message('');
843
845
  d.log.message(' Migrate existing e2e tests:');
844
846
  d.log.message(' Replace: import { test, expect } from "@playwright/test"');
package/bin/sunpeak.js CHANGED
@@ -102,22 +102,11 @@ function getVersion() {
102
102
  {
103
103
  const resources = discoverResources();
104
104
  console.log(`
105
- ☀️ 🏔️ sunpeak - Inspector, testing framework, and app framework for MCP Apps
105
+ ☀️ 🏔️ sunpeak - App framework, testing framework, and inspector for MCP Apps
106
106
 
107
- Install:
108
- pnpm add -g sunpeak
107
+ Usage: npx sunpeak <command>
109
108
 
110
- Testing (works with any MCP server):
111
- sunpeak inspect Inspect any MCP server in the inspector
112
- --server, -s <url|cmd> MCP server URL or stdio command (required)
113
- --simulations <dir> Simulation JSON directory
114
- sunpeak test Run e2e tests against the inspector
115
- init Scaffold test infrastructure into a project
116
- --unit Run unit tests (vitest)
117
- --live Run live tests against real hosts
118
- --eval Run evals against LLM models
119
-
120
- App framework (for sunpeak projects):
109
+ App framework:
121
110
  sunpeak new [name] [resources] Create a new project
122
111
  sunpeak dev Start dev server + inspector + MCP endpoint
123
112
  --no-begging Suppress GitHub star message
@@ -125,8 +114,20 @@ App framework (for sunpeak projects):
125
114
  sunpeak start Start production MCP server
126
115
  --port, -p Server port (default: 8000, or PORT env)
127
116
  sunpeak upgrade Upgrade sunpeak to latest version
128
- sunpeak --version Show version number
129
117
 
118
+ Testing (works with any MCP server):
119
+ sunpeak test Run e2e tests against the inspector
120
+ init Scaffold test infrastructure into a project
121
+ --unit Run unit tests (vitest)
122
+ --live Run live tests against real hosts
123
+ --eval Run evals against LLM models
124
+
125
+ Inspector (works with any MCP server):
126
+ sunpeak inspect Inspect any MCP server in the inspector
127
+ --server, -s <url|cmd> MCP server URL or stdio command (required)
128
+ --simulations <dir> Simulation JSON directory
129
+
130
+ sunpeak --version Show version number
130
131
  Resources: ${resources.join(', ')} (comma/space separated)
131
132
  Example: sunpeak new sunpeak-app "${resources.slice(0, 2).join(',')}"
132
133
  `);
@@ -1,6 +1,6 @@
1
1
  Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
2
2
  const require_chunk = require("../chunk-9hOWP6kD.cjs");
3
- const require_inspector = require("../inspector-DAA1Wiyh.cjs");
3
+ const require_inspector = require("../inspector-D0qOqYX2.cjs");
4
4
  const require_inspector_url = require("../inspector-url-C3LTKgXt.cjs");
5
5
  const require_discovery = require("../discovery-Clu4uHp1.cjs");
6
6
  //#region src/chatgpt/index.ts
@@ -1,5 +1,5 @@
1
1
  import { r as __exportAll } from "../chunk-D6g4UhsZ.js";
2
- import { _ as McpAppHost, d as ThemeProvider, f as useThemeContext, g as extractResourceCSP, h as IframeResource, n as resolveServerToolResult, t as Inspector, v as SCREEN_WIDTHS } from "../inspector-BBDa5yCm.js";
2
+ import { _ as McpAppHost, d as ThemeProvider, f as useThemeContext, g as extractResourceCSP, h as IframeResource, n as resolveServerToolResult, t as Inspector, v as SCREEN_WIDTHS } from "../inspector-60Na_Zc4.js";
3
3
  import { t as createInspectorUrl } from "../inspector-url-CyQcuBI9.js";
4
4
  import { c as toPascalCase, i as findResourceKey, n as extractSimulationKey, r as findResourceDirs, s as getComponentName, t as extractResourceKey } from "../discovery-Cgoegt62.js";
5
5
  //#region src/chatgpt/index.ts
@@ -1,4 +1,4 @@
1
1
  Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
2
2
  require("../chunk-9hOWP6kD.cjs");
3
- const require_inspector = require("../inspector-DAA1Wiyh.cjs");
3
+ const require_inspector = require("../inspector-D0qOqYX2.cjs");
4
4
  exports.Inspector = require_inspector.Inspector;
@@ -1,2 +1,2 @@
1
- import { t as Inspector } from "../inspector-BBDa5yCm.js";
1
+ import { t as Inspector } from "../inspector-60Na_Zc4.js";
2
2
  export { Inspector };
@@ -1,6 +1,6 @@
1
1
  Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
2
2
  require("../../chunk-9hOWP6kD.cjs");
3
- const require_use_app = require("../../use-app-DPkj5Jp_.cjs");
3
+ const require_use_app = require("../../use-app-B33mckz4.cjs");
4
4
  let react = require("react");
5
5
  //#region src/host/chatgpt/openai-types.ts
6
6
  /**
@@ -1,4 +1,4 @@
1
- import { t as useApp } from "../../use-app-Cr0auUa1.js";
1
+ import { t as useApp } from "../../use-app-kv5GQr0G.js";
2
2
  import { useCallback } from "react";
3
3
  //#region src/host/chatgpt/openai-types.ts
4
4
  /**