sunpeak 0.19.2 → 0.19.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +6 -4
  2. package/bin/commands/dev.mjs +1 -1
  3. package/bin/commands/inspect.mjs +1 -1
  4. package/bin/commands/new.mjs +9 -5
  5. package/bin/commands/start.mjs +3 -1
  6. package/bin/commands/test-init.mjs +478 -76
  7. package/bin/commands/test.mjs +357 -4
  8. package/bin/lib/eval/eval-reporter.mjs +105 -0
  9. package/bin/lib/eval/eval-runner.mjs +310 -0
  10. package/bin/lib/eval/eval-types.d.mts +168 -0
  11. package/bin/lib/eval/eval-vitest-plugin.mjs +158 -0
  12. package/bin/lib/eval/model-registry.mjs +73 -0
  13. package/bin/lib/sandbox-server.mjs +5 -2
  14. package/bin/sunpeak.js +1 -0
  15. package/dist/chatgpt/index.cjs +1 -1
  16. package/dist/chatgpt/index.js +1 -1
  17. package/dist/claude/index.cjs +1 -1
  18. package/dist/claude/index.js +1 -1
  19. package/dist/host/chatgpt/index.cjs +1 -1
  20. package/dist/host/chatgpt/index.js +1 -1
  21. package/dist/index.cjs +134 -124
  22. package/dist/index.cjs.map +1 -1
  23. package/dist/index.d.ts +3 -1
  24. package/dist/index.js +71 -62
  25. package/dist/index.js.map +1 -1
  26. package/dist/inspector/index.cjs +1 -1
  27. package/dist/inspector/index.js +1 -1
  28. package/dist/{inspector-Cdo5BK2D.js → inspector-D5DckQuU.js} +236 -98
  29. package/dist/inspector-D5DckQuU.js.map +1 -0
  30. package/dist/{inspector-8nPV2A-z.cjs → inspector-jY9O18z9.cjs} +237 -99
  31. package/dist/inspector-jY9O18z9.cjs.map +1 -0
  32. package/dist/mcp/index.cjs +237 -140
  33. package/dist/mcp/index.cjs.map +1 -1
  34. package/dist/mcp/index.d.ts +1 -1
  35. package/dist/mcp/index.js +230 -134
  36. package/dist/mcp/index.js.map +1 -1
  37. package/dist/mcp/production-server.d.ts +31 -0
  38. package/dist/{protocol-C7kTcBr_.cjs → protocol-C8pFDmcy.cjs} +8194 -8187
  39. package/dist/protocol-C8pFDmcy.cjs.map +1 -0
  40. package/dist/{protocol-BfAACnv0.js → protocol-CRqiPTLT.js} +8186 -8185
  41. package/dist/protocol-CRqiPTLT.js.map +1 -0
  42. package/dist/{use-app-CfP9VypY.js → use-app-Bfargfa3.js} +194 -94
  43. package/dist/use-app-Bfargfa3.js.map +1 -0
  44. package/dist/{use-app-CzcYw1Kz.cjs → use-app-CbsBEmwv.cjs} +254 -148
  45. package/dist/use-app-CbsBEmwv.cjs.map +1 -0
  46. package/package.json +27 -3
  47. package/template/README.md +17 -7
  48. package/template/_gitignore +2 -0
  49. package/template/dist/albums/albums.html +15 -15
  50. package/template/dist/albums/albums.json +1 -1
  51. package/template/dist/carousel/carousel.html +19 -19
  52. package/template/dist/carousel/carousel.json +1 -1
  53. package/template/dist/map/map.html +14 -14
  54. package/template/dist/map/map.json +1 -1
  55. package/template/dist/review/review.html +11 -11
  56. package/template/dist/review/review.json +1 -1
  57. package/template/node_modules/.bin/vitest +2 -2
  58. package/template/node_modules/.vite/deps/_metadata.json +3 -3
  59. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps.js +192 -91
  60. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps.js.map +1 -1
  61. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_app-bridge.js +231 -92
  62. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_app-bridge.js.map +1 -1
  63. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_react.js +208 -105
  64. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_react.js.map +1 -1
  65. package/template/node_modules/.vite-mcp/deps/_metadata.json +25 -25
  66. package/template/node_modules/.vite-mcp/deps/{protocol-B_qKkui_.js → protocol-BqGB4zBx.js} +45 -45
  67. package/template/node_modules/.vite-mcp/deps/protocol-BqGB4zBx.js.map +1 -0
  68. package/template/node_modules/.vite-mcp/deps/vitest.js +7 -7
  69. package/template/node_modules/.vite-mcp/deps/vitest.js.map +1 -1
  70. package/template/tests/e2e/visual.spec.ts-snapshots/albums-dark-chatgpt-darwin.png +0 -0
  71. package/template/tests/e2e/visual.spec.ts-snapshots/albums-dark-claude-darwin.png +0 -0
  72. package/template/tests/e2e/visual.spec.ts-snapshots/albums-fullscreen-chatgpt-darwin.png +0 -0
  73. package/template/tests/e2e/visual.spec.ts-snapshots/albums-fullscreen-claude-darwin.png +0 -0
  74. package/template/tests/e2e/visual.spec.ts-snapshots/albums-light-chatgpt-darwin.png +0 -0
  75. package/template/tests/e2e/visual.spec.ts-snapshots/albums-light-claude-darwin.png +0 -0
  76. package/template/tests/e2e/visual.spec.ts-snapshots/albums-page-light-chatgpt-darwin.png +0 -0
  77. package/template/tests/e2e/visual.spec.ts-snapshots/albums-page-light-claude-darwin.png +0 -0
  78. package/template/tests/evals/.env.example +5 -0
  79. package/template/tests/evals/albums.eval.ts +28 -0
  80. package/template/tests/evals/carousel.eval.ts +26 -0
  81. package/template/tests/evals/eval.config.ts +26 -0
  82. package/template/tests/evals/map.eval.ts +23 -0
  83. package/template/tests/evals/review.eval.ts +48 -0
  84. package/dist/inspector-8nPV2A-z.cjs.map +0 -1
  85. package/dist/inspector-Cdo5BK2D.js.map +0 -1
  86. package/dist/protocol-BfAACnv0.js.map +0 -1
  87. package/dist/protocol-C7kTcBr_.cjs.map +0 -1
  88. package/dist/use-app-CfP9VypY.js.map +0 -1
  89. package/dist/use-app-CzcYw1Kz.cjs.map +0 -1
  90. package/template/node_modules/.vite-mcp/deps/protocol-B_qKkui_.js.map +0 -1
package/README.md CHANGED
@@ -64,6 +64,7 @@ test('review tool renders title', async ({ mcp }) => {
64
64
  - **MCP-native assertions**: `toBeError()`, `toHaveTextContent()`, `toHaveStructuredContent()`
65
65
  - **Multi-host**: Tests run against ChatGPT and Claude hosts automatically
66
66
  - **Live tests**: Automated browser tests against real ChatGPT via `sunpeak/test/live`
67
+ - **Evals**: Test your tool interface design against multiple LLMs (GPT-4o, Claude, Gemini, etc.) via `sunpeak/eval`
67
68
 
68
69
  ### 3. App Framework
69
70
 
@@ -113,6 +114,7 @@ sunpeak new
113
114
  | `sunpeak test --visual` | Run e2e tests with visual regression |
114
115
  | `sunpeak test --visual --update` | Update visual regression baselines |
115
116
  | `sunpeak test --live` | Run live tests against real hosts |
117
+ | `sunpeak test --eval` | Run evals against multiple LLM models |
116
118
  | `sunpeak test init` | Scaffold test infrastructure into a project |
117
119
 
118
120
  **App framework** (for sunpeak projects):
@@ -125,12 +127,12 @@ sunpeak new
125
127
  | `sunpeak start` | Start production MCP server |
126
128
  | `sunpeak upgrade` | Upgrade sunpeak to latest version |
127
129
 
128
- ## Coding Agent Skill
130
+ ## Coding Agent Skills
129
131
 
130
- Install the `create-sunpeak-app` skill to give your coding agent (Claude Code, Cursor, etc.) built-in knowledge of sunpeak patterns, hooks, simulation files, and testing conventions:
132
+ Install the sunpeak skills to give your coding agent (Claude Code, Cursor, etc.) built-in knowledge of sunpeak patterns, hooks, and testing:
131
133
 
132
134
  ```bash
133
- npx skills add Sunpeak-AI/sunpeak@create-sunpeak-app
135
+ npx skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server
134
136
  ```
135
137
 
136
138
  ## Troubleshooting
@@ -143,7 +145,7 @@ If your app doesn't render in ChatGPT or Claude:
143
145
  4. **Hard refresh** the host page (`Cmd+Shift+R` / `Ctrl+Shift+R`)
144
146
  5. **Open a new chat** in the host (cached iframes persist per-conversation)
145
147
 
146
- Full guide: [sunpeak.ai/docs/guides/troubleshooting](https://sunpeak.ai/docs/guides/troubleshooting)
148
+ Full guide: [sunpeak.ai/docs/app-framework/guides/troubleshooting](https://sunpeak.ai/docs/app-framework/guides/troubleshooting)
147
149
 
148
150
  ## Resources
149
151
 
@@ -363,7 +363,7 @@ export async function dev(projectRoot = process.cwd(), args = []) {
363
363
  const sandbox = await startSandboxServer({ preferredPort: sandboxPort });
364
364
 
365
365
  // Find available ports for the MCP server and HMR WebSocket
366
- const mcpPort = await getPort(8000);
366
+ const mcpPort = await getPort(Number(process.env.SUNPEAK_MCP_PORT || 8000));
367
367
  const hmrPort = await getPort(Number(process.env.SUNPEAK_HMR_PORT || 24679));
368
368
 
369
369
  console.log(`\nStarting MCP server with ${simulations.length} simulation(s) (Vite HMR)...`);
@@ -1145,7 +1145,7 @@ export async function inspectServer(opts) {
1145
1145
  server.bindCLIShortcuts({ print: true });
1146
1146
 
1147
1147
  // Print troubleshooting link (dimmed)
1148
- console.log('\n \x1b[2mApp not loading? \u2192 https://sunpeak.ai/docs/guides/troubleshooting\x1b[0m');
1148
+ console.log('\n \x1b[2mApp not loading? \u2192 https://sunpeak.ai/docs/app-framework/guides/troubleshooting\x1b[0m');
1149
1149
 
1150
1150
  // Print star-begging message unless suppressed
1151
1151
  if (!noBegging) {
@@ -44,7 +44,7 @@ async function defaultSelectResources(availableResources) {
44
44
  const maxLen = Math.max(...availableResources.map((r) => r.length));
45
45
  return availableResources.map((r) => ({
46
46
  value: r,
47
- label: `${r.padEnd(maxLen)} (https://sunpeak.ai/docs/api-reference/resources/${r})`,
47
+ label: `${r.padEnd(maxLen)} (https://sunpeak.ai/docs/app-framework/resources/${r})`,
48
48
  }));
49
49
  })(),
50
50
  initialValues: availableResources,
@@ -214,6 +214,10 @@ export async function init(projectName, resourcesArg, deps = defaultDeps) {
214
214
  if (src.includes('/tests/live/') && name === `${resource}.spec.ts`) {
215
215
  return false;
216
216
  }
217
+ // Skip eval files for excluded resources
218
+ if (src.includes('/tests/evals/') && name === `${resource}.eval.ts`) {
219
+ return false;
220
+ }
217
221
  }
218
222
 
219
223
  return true;
@@ -278,20 +282,20 @@ export async function init(projectName, resourcesArg, deps = defaultDeps) {
278
282
  s.stop(`Install failed. You can try running "${pm} install" manually.`);
279
283
  }
280
284
 
281
- // Offer to install the sunpeak skill (only in interactive mode)
285
+ // Offer to install the sunpeak skills (only in interactive mode)
282
286
  if (resourcesArg === undefined) {
283
287
  const installSkill = await d.confirm({
284
- message: 'Install the sunpeak skill? (helps your coding agent build your app)',
288
+ message: 'Install the sunpeak skills? (helps your coding agent build and test your app)',
285
289
  initialValue: true,
286
290
  });
287
291
  if (!clack.isCancel(installSkill) && installSkill) {
288
292
  try {
289
- d.execSync('npx skills add Sunpeak-AI/sunpeak@create-sunpeak-app', {
293
+ d.execSync('npx skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server', {
290
294
  cwd: targetDir,
291
295
  stdio: 'inherit',
292
296
  });
293
297
  } catch {
294
- d.console.log('Skill install skipped. You can install later with: npx skills add Sunpeak-AI/sunpeak@create-sunpeak-app');
298
+ d.console.log('Skill install skipped. You can install later with: npx skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server');
295
299
  }
296
300
  }
297
301
  }
@@ -45,6 +45,7 @@ export async function start(projectRoot = process.cwd(), args = []) {
45
45
 
46
46
  const jsonLogs = args.includes('--json-logs');
47
47
  const sse = args.includes('--sse');
48
+ const stateless = args.includes('--stateless');
48
49
 
49
50
  // Import production server from sunpeak
50
51
  const isTemplate = projectRoot.endsWith('/template') || projectRoot.endsWith('\\template');
@@ -190,9 +191,10 @@ export async function start(projectRoot = process.cwd(), args = []) {
190
191
  port = await getPort(port);
191
192
 
192
193
  console.log(`\nStarting ${name} v${version} on ${host}:${port}...`);
194
+ if (stateless) console.log('Stateless mode enabled (no session tracking)');
193
195
 
194
196
  startProductionHttpServer(
195
- { name, version, serverInfo: serverConfig, tools, resources, auth, ...(sse ? { enableJsonResponse: false } : {}) },
197
+ { name, version, serverInfo: serverConfig, tools, resources, auth, stateless, ...(sse ? { enableJsonResponse: false } : {}) },
196
198
  { port, host }
197
199
  );
198
200
  }