sunpeak 0.19.4 → 0.19.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +6 -4
  2. package/bin/commands/dev.mjs +1 -1
  3. package/bin/commands/inspect.mjs +1 -1
  4. package/bin/commands/new.mjs +92 -10
  5. package/bin/commands/start.mjs +3 -1
  6. package/bin/commands/test-init.mjs +548 -76
  7. package/bin/commands/test.mjs +401 -4
  8. package/bin/lib/eval/eval-providers.mjs +34 -0
  9. package/bin/lib/eval/eval-reporter.mjs +105 -0
  10. package/bin/lib/eval/eval-runner.mjs +362 -0
  11. package/bin/lib/eval/eval-types.d.mts +168 -0
  12. package/bin/lib/eval/eval-vitest-plugin.mjs +147 -0
  13. package/bin/lib/eval/model-registry.mjs +73 -0
  14. package/bin/lib/inspect/inspect-config.mjs +1 -1
  15. package/bin/lib/sandbox-server.mjs +5 -2
  16. package/bin/lib/test/test-config.mjs +1 -1
  17. package/bin/sunpeak.js +1 -0
  18. package/dist/chatgpt/index.cjs +1 -1
  19. package/dist/chatgpt/index.js +1 -1
  20. package/dist/claude/index.cjs +1 -1
  21. package/dist/claude/index.js +1 -1
  22. package/dist/host/chatgpt/index.cjs +1 -1
  23. package/dist/host/chatgpt/index.js +1 -1
  24. package/dist/index.cjs +2 -2
  25. package/dist/index.js +2 -2
  26. package/dist/inspector/index.cjs +1 -1
  27. package/dist/inspector/index.js +1 -1
  28. package/dist/{inspector-Bp9jrHIu.js → inspector-D5DckQuU.js} +19 -19
  29. package/dist/{inspector-Bp9jrHIu.js.map → inspector-D5DckQuU.js.map} +1 -1
  30. package/dist/{inspector-Cvq3yjNL.cjs → inspector-jY9O18z9.cjs} +19 -19
  31. package/dist/{inspector-Cvq3yjNL.cjs.map → inspector-jY9O18z9.cjs.map} +1 -1
  32. package/dist/mcp/index.cjs +2 -2
  33. package/dist/mcp/index.cjs.map +1 -1
  34. package/dist/mcp/index.js +2 -2
  35. package/dist/mcp/index.js.map +1 -1
  36. package/dist/{use-app-Ck5kR1Sf.js → use-app-Bfargfa3.js} +2 -2
  37. package/dist/{use-app-Ck5kR1Sf.js.map → use-app-Bfargfa3.js.map} +1 -1
  38. package/dist/{use-app-DHYiev3D.cjs → use-app-CbsBEmwv.cjs} +2 -2
  39. package/dist/{use-app-DHYiev3D.cjs.map → use-app-CbsBEmwv.cjs.map} +1 -1
  40. package/package.json +32 -2
  41. package/template/README.md +17 -7
  42. package/template/_gitignore +2 -0
  43. package/template/dist/albums/albums.html +1 -1
  44. package/template/dist/albums/albums.json +1 -1
  45. package/template/dist/carousel/carousel.html +1 -1
  46. package/template/dist/carousel/carousel.json +1 -1
  47. package/template/dist/map/map.html +1 -1
  48. package/template/dist/map/map.json +1 -1
  49. package/template/dist/review/review.html +1 -1
  50. package/template/dist/review/review.json +1 -1
  51. package/template/node_modules/.bin/vitest +2 -2
  52. package/template/node_modules/.vite/deps/_metadata.json +3 -3
  53. package/template/node_modules/.vite-mcp/deps/_metadata.json +20 -20
  54. package/template/node_modules/.vite-mcp/deps/vitest.js +7 -7
  55. package/template/node_modules/.vite-mcp/deps/vitest.js.map +1 -1
  56. package/template/tests/evals/_env.example +5 -0
  57. package/template/tests/evals/albums.eval.ts +31 -0
  58. package/template/tests/evals/carousel.eval.ts +16 -0
  59. package/template/tests/evals/eval.config.ts +26 -0
  60. package/template/tests/evals/map.eval.ts +16 -0
  61. package/template/tests/evals/review.eval.ts +53 -0
package/README.md CHANGED
@@ -64,6 +64,7 @@ test('review tool renders title', async ({ mcp }) => {
64
64
  - **MCP-native assertions**: `toBeError()`, `toHaveTextContent()`, `toHaveStructuredContent()`
65
65
  - **Multi-host**: Tests run against ChatGPT and Claude hosts automatically
66
66
  - **Live tests**: Automated browser tests against real ChatGPT via `sunpeak/test/live`
67
+ - **Evals**: Test your tool interface design against multiple LLMs (GPT-4o, Claude, Gemini, etc.) via `sunpeak/eval`
67
68
 
68
69
  ### 3. App Framework
69
70
 
@@ -113,6 +114,7 @@ sunpeak new
113
114
  | `sunpeak test --visual` | Run e2e tests with visual regression |
114
115
  | `sunpeak test --visual --update` | Update visual regression baselines |
115
116
  | `sunpeak test --live` | Run live tests against real hosts |
117
+ | `sunpeak test --eval` | Run evals against multiple LLM models |
116
118
  | `sunpeak test init` | Scaffold test infrastructure into a project |
117
119
 
118
120
  **App framework** (for sunpeak projects):
@@ -125,12 +127,12 @@ sunpeak new
125
127
  | `sunpeak start` | Start production MCP server |
126
128
  | `sunpeak upgrade` | Upgrade sunpeak to latest version |
127
129
 
128
- ## Coding Agent Skill
130
+ ## Coding Agent Skills
129
131
 
130
- Install the `create-sunpeak-app` skill to give your coding agent (Claude Code, Cursor, etc.) built-in knowledge of sunpeak patterns, hooks, simulation files, and testing conventions:
132
+ Install the sunpeak skills to give your coding agent (Claude Code, Cursor, etc.) built-in knowledge of sunpeak patterns, hooks, and testing:
131
133
 
132
134
  ```bash
133
- npx skills add Sunpeak-AI/sunpeak@create-sunpeak-app
135
+ pnpm dlx skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server
134
136
  ```
135
137
 
136
138
  ## Troubleshooting
@@ -143,7 +145,7 @@ If your app doesn't render in ChatGPT or Claude:
143
145
  4. **Hard refresh** the host page (`Cmd+Shift+R` / `Ctrl+Shift+R`)
144
146
  5. **Open a new chat** in the host (cached iframes persist per-conversation)
145
147
 
146
- Full guide: [sunpeak.ai/docs/guides/troubleshooting](https://sunpeak.ai/docs/guides/troubleshooting)
148
+ Full guide: [sunpeak.ai/docs/app-framework/guides/troubleshooting](https://sunpeak.ai/docs/app-framework/guides/troubleshooting)
147
149
 
148
150
  ## Resources
149
151
 
@@ -363,7 +363,7 @@ export async function dev(projectRoot = process.cwd(), args = []) {
363
363
  const sandbox = await startSandboxServer({ preferredPort: sandboxPort });
364
364
 
365
365
  // Find available ports for the MCP server and HMR WebSocket
366
- const mcpPort = await getPort(8000);
366
+ const mcpPort = await getPort(Number(process.env.SUNPEAK_MCP_PORT || 8000));
367
367
  const hmrPort = await getPort(Number(process.env.SUNPEAK_HMR_PORT || 24679));
368
368
 
369
369
  console.log(`\nStarting MCP server with ${simulations.length} simulation(s) (Vite HMR)...`);
@@ -1145,7 +1145,7 @@ export async function inspectServer(opts) {
1145
1145
  server.bindCLIShortcuts({ print: true });
1146
1146
 
1147
1147
  // Print troubleshooting link (dimmed)
1148
- console.log('\n \x1b[2mApp not loading? \u2192 https://sunpeak.ai/docs/guides/troubleshooting\x1b[0m');
1148
+ console.log('\n \x1b[2mApp not loading? \u2192 https://sunpeak.ai/docs/app-framework/guides/troubleshooting\x1b[0m');
1149
1149
 
1150
1150
  // Print star-begging message unless suppressed
1151
1151
  if (!noBegging) {
@@ -9,6 +9,7 @@ const execAsync = promisify(exec);
9
9
  import * as clack from '@clack/prompts';
10
10
  import { discoverResources } from '../lib/patterns.mjs';
11
11
  import { detectPackageManager } from '../utils.mjs';
12
+ import { EVAL_PROVIDERS } from '../lib/eval/eval-providers.mjs';
12
13
 
13
14
  const __dirname = dirname(fileURLToPath(import.meta.url));
14
15
 
@@ -44,7 +45,7 @@ async function defaultSelectResources(availableResources) {
44
45
  const maxLen = Math.max(...availableResources.map((r) => r.length));
45
46
  return availableResources.map((r) => ({
46
47
  value: r,
47
- label: `${r.padEnd(maxLen)} (https://sunpeak.ai/docs/api-reference/resources/${r})`,
48
+ label: `${r.padEnd(maxLen)} (https://sunpeak.ai/docs/app-framework/resources/${r})`,
48
49
  }));
49
50
  })(),
50
51
  initialValues: availableResources,
@@ -57,6 +58,21 @@ async function defaultSelectResources(availableResources) {
57
58
  return selected;
58
59
  }
59
60
 
61
+ /**
62
+ * Default prompt for eval provider selection.
63
+ * @returns {Promise<Array<{ pkg: string, models: string[] }>>}
64
+ */
65
+ async function defaultSelectProviders() {
66
+ const selected = await clack.multiselect({
67
+ message: 'AI providers for evals (space to toggle, enter to skip)',
68
+ options: EVAL_PROVIDERS.map((p) => ({ value: p, label: p.label })),
69
+ initialValues: [],
70
+ required: false,
71
+ });
72
+ if (clack.isCancel(selected)) return [];
73
+ return selected;
74
+ }
75
+
60
76
  /**
61
77
  * Default dependencies (real implementations)
62
78
  */
@@ -73,6 +89,8 @@ export const defaultDeps = {
73
89
  execAsync,
74
90
  promptName: defaultPromptName,
75
91
  selectResources: defaultSelectResources,
92
+ selectProviders: defaultSelectProviders,
93
+ password: clack.password,
76
94
  confirm: clack.confirm,
77
95
  intro: clack.intro,
78
96
  outro: clack.outro,
@@ -214,6 +232,10 @@ export async function init(projectName, resourcesArg, deps = defaultDeps) {
214
232
  if (src.includes('/tests/live/') && name === `${resource}.spec.ts`) {
215
233
  return false;
216
234
  }
235
+ // Skip eval files for excluded resources
236
+ if (src.includes('/tests/evals/') && name === `${resource}.eval.ts`) {
237
+ return false;
238
+ }
217
239
  }
218
240
 
219
241
  return true;
@@ -229,6 +251,15 @@ export async function init(projectName, resourcesArg, deps = defaultDeps) {
229
251
  d.renameSync(srcPath, destPath);
230
252
  }
231
253
  }
254
+ // Rename nested dotfiles (underscore convention for npm compatibility)
255
+ const nestedDotfiles = [['tests/evals/_env.example', 'tests/evals/.env.example']];
256
+ for (const [from, to] of nestedDotfiles) {
257
+ const srcPath = join(targetDir, from);
258
+ const destPath = join(targetDir, to);
259
+ if (d.existsSync(srcPath)) {
260
+ d.renameSync(srcPath, destPath);
261
+ }
262
+ }
232
263
 
233
264
  // Read sunpeak version from root package.json
234
265
  const rootPkg = JSON.parse(d.readFileSync(d.rootPkgPath, 'utf-8'));
@@ -278,26 +309,74 @@ export async function init(projectName, resourcesArg, deps = defaultDeps) {
278
309
  s.stop(`Install failed. You can try running "${pm} install" manually.`);
279
310
  }
280
311
 
281
- // Offer to install the sunpeak skill (only in interactive mode)
312
+ // Offer to configure eval providers (only in interactive mode)
313
+ if (resourcesArg === undefined) {
314
+ const providers = await d.selectProviders();
315
+ if (!clack.isCancel(providers) && providers.length > 0) {
316
+ // Install AI SDK core + selected provider packages
317
+ const pkgsToInstall = ['ai', ...providers.map((p) => p.pkg)];
318
+ try {
319
+ await d.execAsync(`${pm} add -D ${pkgsToInstall.join(' ')}`, { cwd: targetDir });
320
+ } catch {
321
+ d.console.log(`Provider install failed. Install manually: ${pm} add -D ${pkgsToInstall.join(' ')}`);
322
+ }
323
+
324
+ // Uncomment selected models in eval.config.ts
325
+ const evalConfigPath = join(targetDir, 'tests', 'evals', 'eval.config.ts');
326
+ if (d.existsSync(evalConfigPath)) {
327
+ let config = d.readFileSync(evalConfigPath, 'utf-8');
328
+ for (const p of providers) {
329
+ for (const model of p.models) {
330
+ // Uncomment lines matching this model (e.g., " // 'gpt-4o'," → " 'gpt-4o',")
331
+ config = config.replace(
332
+ new RegExp(`^(\\s*)// ('${model.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}',?.*)$`, 'm'),
333
+ '$1$2'
334
+ );
335
+ }
336
+ }
337
+ d.writeFileSync(evalConfigPath, config);
338
+ }
339
+
340
+ // Prompt for API keys and write .env
341
+ const envLines = [];
342
+ const seen = new Set();
343
+ for (const p of providers) {
344
+ if (seen.has(p.envVar)) continue;
345
+ seen.add(p.envVar);
346
+ const key = await d.password({
347
+ message: `${p.envVar} (enter to skip)`,
348
+ mask: '*',
349
+ });
350
+ if (!clack.isCancel(key) && key) {
351
+ envLines.push(`${p.envVar}=${key}`);
352
+ }
353
+ }
354
+ const envPath = join(targetDir, 'tests', 'evals', '.env');
355
+ if (envLines.length > 0) {
356
+ d.writeFileSync(envPath, envLines.join('\n') + '\n');
357
+ clack.log.info(`API keys saved to tests/evals/.env (gitignored)`);
358
+ }
359
+ }
360
+ }
361
+
362
+ // Offer to install the sunpeak skills (only in interactive mode)
282
363
  if (resourcesArg === undefined) {
283
364
  const installSkill = await d.confirm({
284
- message: 'Install the sunpeak skill? (helps your coding agent build your app)',
365
+ message: 'Install the sunpeak skills? (helps your coding agent build and test your app)',
285
366
  initialValue: true,
286
367
  });
287
368
  if (!clack.isCancel(installSkill) && installSkill) {
288
369
  try {
289
- d.execSync('npx skills add Sunpeak-AI/sunpeak@create-sunpeak-app', {
370
+ d.execSync('pnpm dlx skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server', {
290
371
  cwd: targetDir,
291
372
  stdio: 'inherit',
292
373
  });
293
374
  } catch {
294
- d.console.log('Skill install skipped. You can install later with: npx skills add Sunpeak-AI/sunpeak@create-sunpeak-app');
375
+ d.console.log('Skill install skipped. You can install later with: pnpm dlx skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server');
295
376
  }
296
377
  }
297
378
  }
298
379
 
299
- const runCmd = pm === 'npm' ? 'npm run' : pm;
300
-
301
380
  d.outro(`Done! To get started:
302
381
 
303
382
  cd ${projectName}
@@ -305,9 +384,12 @@ export async function init(projectName, resourcesArg, deps = defaultDeps) {
305
384
 
306
385
  Your project commands:
307
386
 
308
- sunpeak dev # Start dev server + MCP endpoint
309
- sunpeak build # Build for production
310
- ${runCmd} test # Run tests`);
387
+ sunpeak dev # Start dev server + MCP endpoint
388
+ sunpeak build # Build for production
389
+ sunpeak test # Run unit + e2e tests
390
+ sunpeak test --eval # Run LLM evals (configure models in tests/evals/eval.config.ts)
391
+ sunpeak test --visual # Run visual regression tests
392
+ sunpeak test --live # Run live tests against real AI hosts`);
311
393
  }
312
394
 
313
395
  // Allow running directly
@@ -45,6 +45,7 @@ export async function start(projectRoot = process.cwd(), args = []) {
45
45
 
46
46
  const jsonLogs = args.includes('--json-logs');
47
47
  const sse = args.includes('--sse');
48
+ const stateless = args.includes('--stateless');
48
49
 
49
50
  // Import production server from sunpeak
50
51
  const isTemplate = projectRoot.endsWith('/template') || projectRoot.endsWith('\\template');
@@ -190,9 +191,10 @@ export async function start(projectRoot = process.cwd(), args = []) {
190
191
  port = await getPort(port);
191
192
 
192
193
  console.log(`\nStarting ${name} v${version} on ${host}:${port}...`);
194
+ if (stateless) console.log('Stateless mode enabled (no session tracking)');
193
195
 
194
196
  startProductionHttpServer(
195
- { name, version, serverInfo: serverConfig, tools, resources, auth, ...(sse ? { enableJsonResponse: false } : {}) },
197
+ { name, version, serverInfo: serverConfig, tools, resources, auth, stateless, ...(sse ? { enableJsonResponse: false } : {}) },
196
198
  { port, host }
197
199
  );
198
200
  }