sunpeak 0.19.2 → 0.19.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +6 -4
  2. package/bin/commands/dev.mjs +1 -1
  3. package/bin/commands/inspect.mjs +1 -1
  4. package/bin/commands/new.mjs +9 -5
  5. package/bin/commands/start.mjs +3 -1
  6. package/bin/commands/test-init.mjs +478 -76
  7. package/bin/commands/test.mjs +357 -4
  8. package/bin/lib/eval/eval-reporter.mjs +105 -0
  9. package/bin/lib/eval/eval-runner.mjs +310 -0
  10. package/bin/lib/eval/eval-types.d.mts +168 -0
  11. package/bin/lib/eval/eval-vitest-plugin.mjs +158 -0
  12. package/bin/lib/eval/model-registry.mjs +73 -0
  13. package/bin/lib/sandbox-server.mjs +5 -2
  14. package/bin/sunpeak.js +1 -0
  15. package/dist/chatgpt/index.cjs +1 -1
  16. package/dist/chatgpt/index.js +1 -1
  17. package/dist/claude/index.cjs +1 -1
  18. package/dist/claude/index.js +1 -1
  19. package/dist/host/chatgpt/index.cjs +1 -1
  20. package/dist/host/chatgpt/index.js +1 -1
  21. package/dist/index.cjs +134 -124
  22. package/dist/index.cjs.map +1 -1
  23. package/dist/index.d.ts +3 -1
  24. package/dist/index.js +71 -62
  25. package/dist/index.js.map +1 -1
  26. package/dist/inspector/index.cjs +1 -1
  27. package/dist/inspector/index.js +1 -1
  28. package/dist/{inspector-Cdo5BK2D.js → inspector-D5DckQuU.js} +236 -98
  29. package/dist/inspector-D5DckQuU.js.map +1 -0
  30. package/dist/{inspector-8nPV2A-z.cjs → inspector-jY9O18z9.cjs} +237 -99
  31. package/dist/inspector-jY9O18z9.cjs.map +1 -0
  32. package/dist/mcp/index.cjs +237 -140
  33. package/dist/mcp/index.cjs.map +1 -1
  34. package/dist/mcp/index.d.ts +1 -1
  35. package/dist/mcp/index.js +230 -134
  36. package/dist/mcp/index.js.map +1 -1
  37. package/dist/mcp/production-server.d.ts +31 -0
  38. package/dist/{protocol-C7kTcBr_.cjs → protocol-C8pFDmcy.cjs} +8194 -8187
  39. package/dist/protocol-C8pFDmcy.cjs.map +1 -0
  40. package/dist/{protocol-BfAACnv0.js → protocol-CRqiPTLT.js} +8186 -8185
  41. package/dist/protocol-CRqiPTLT.js.map +1 -0
  42. package/dist/{use-app-CfP9VypY.js → use-app-Bfargfa3.js} +194 -94
  43. package/dist/use-app-Bfargfa3.js.map +1 -0
  44. package/dist/{use-app-CzcYw1Kz.cjs → use-app-CbsBEmwv.cjs} +254 -148
  45. package/dist/use-app-CbsBEmwv.cjs.map +1 -0
  46. package/package.json +27 -3
  47. package/template/README.md +17 -7
  48. package/template/_gitignore +2 -0
  49. package/template/dist/albums/albums.html +15 -15
  50. package/template/dist/albums/albums.json +1 -1
  51. package/template/dist/carousel/carousel.html +19 -19
  52. package/template/dist/carousel/carousel.json +1 -1
  53. package/template/dist/map/map.html +14 -14
  54. package/template/dist/map/map.json +1 -1
  55. package/template/dist/review/review.html +11 -11
  56. package/template/dist/review/review.json +1 -1
  57. package/template/node_modules/.bin/vitest +2 -2
  58. package/template/node_modules/.vite/deps/_metadata.json +3 -3
  59. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps.js +192 -91
  60. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps.js.map +1 -1
  61. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_app-bridge.js +231 -92
  62. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_app-bridge.js.map +1 -1
  63. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_react.js +208 -105
  64. package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_react.js.map +1 -1
  65. package/template/node_modules/.vite-mcp/deps/_metadata.json +25 -25
  66. package/template/node_modules/.vite-mcp/deps/{protocol-B_qKkui_.js → protocol-BqGB4zBx.js} +45 -45
  67. package/template/node_modules/.vite-mcp/deps/protocol-BqGB4zBx.js.map +1 -0
  68. package/template/node_modules/.vite-mcp/deps/vitest.js +7 -7
  69. package/template/node_modules/.vite-mcp/deps/vitest.js.map +1 -1
  70. package/template/tests/e2e/visual.spec.ts-snapshots/albums-dark-chatgpt-darwin.png +0 -0
  71. package/template/tests/e2e/visual.spec.ts-snapshots/albums-dark-claude-darwin.png +0 -0
  72. package/template/tests/e2e/visual.spec.ts-snapshots/albums-fullscreen-chatgpt-darwin.png +0 -0
  73. package/template/tests/e2e/visual.spec.ts-snapshots/albums-fullscreen-claude-darwin.png +0 -0
  74. package/template/tests/e2e/visual.spec.ts-snapshots/albums-light-chatgpt-darwin.png +0 -0
  75. package/template/tests/e2e/visual.spec.ts-snapshots/albums-light-claude-darwin.png +0 -0
  76. package/template/tests/e2e/visual.spec.ts-snapshots/albums-page-light-chatgpt-darwin.png +0 -0
  77. package/template/tests/e2e/visual.spec.ts-snapshots/albums-page-light-claude-darwin.png +0 -0
  78. package/template/tests/evals/.env.example +5 -0
  79. package/template/tests/evals/albums.eval.ts +28 -0
  80. package/template/tests/evals/carousel.eval.ts +26 -0
  81. package/template/tests/evals/eval.config.ts +26 -0
  82. package/template/tests/evals/map.eval.ts +23 -0
  83. package/template/tests/evals/review.eval.ts +48 -0
  84. package/dist/inspector-8nPV2A-z.cjs.map +0 -1
  85. package/dist/inspector-Cdo5BK2D.js.map +0 -1
  86. package/dist/protocol-BfAACnv0.js.map +0 -1
  87. package/dist/protocol-C7kTcBr_.cjs.map +0 -1
  88. package/dist/use-app-CfP9VypY.js.map +0 -1
  89. package/dist/use-app-CzcYw1Kz.cjs.map +0 -1
  90. package/template/node_modules/.vite-mcp/deps/protocol-B_qKkui_.js.map +0 -1
@@ -1,7 +1,28 @@
1
1
  import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
2
- import { join } from 'path';
2
+ import { execSync } from 'child_process';
3
+ import { join, dirname } from 'path';
3
4
  import * as p from '@clack/prompts';
4
5
 
6
+ /**
7
+ * Default dependencies (real implementations).
8
+ * Override in tests via the `deps` parameter.
9
+ */
10
+ export const defaultDeps = {
11
+ existsSync,
12
+ readFileSync,
13
+ writeFileSync,
14
+ mkdirSync,
15
+ execSync,
16
+ cwd: () => process.cwd(),
17
+ intro: p.intro,
18
+ outro: p.outro,
19
+ confirm: p.confirm,
20
+ isCancel: p.isCancel,
21
+ select: p.select,
22
+ text: p.text,
23
+ log: p.log,
24
+ };
25
+
5
26
  /**
6
27
  * sunpeak test init — Scaffold test infrastructure for MCP servers.
7
28
  *
@@ -9,9 +30,18 @@ import * as p from '@clack/prompts';
9
30
  * - Non-JS projects: self-contained tests/sunpeak/ directory
10
31
  * - JS/TS projects: root-level config + test files
11
32
  * - sunpeak projects: migrate to defineConfig()
33
+ *
34
+ * Scaffolds all 5 test types:
35
+ * 1. E2E tests — Playwright-based inspector tests (mcp fixture)
36
+ * 2. Visual regression — Screenshot comparison via mcp.screenshot()
37
+ * 3. Live tests — Test against real ChatGPT/Claude hosts
38
+ * 4. Evals — Multi-model tool calling reliability tests
39
+ * 5. Unit tests — Direct tool handler tests (JS/TS projects only)
12
40
  */
13
- export async function testInit(args = []) {
14
- p.intro('Setting up sunpeak tests');
41
+ export async function testInit(args = [], deps = defaultDeps) {
42
+ const d = { ...defaultDeps, ...deps };
43
+
44
+ d.intro('Setting up sunpeak tests');
15
45
 
16
46
  // Parse --server flag from CLI args
17
47
  const serverIdx = args.indexOf('--server');
@@ -20,26 +50,42 @@ export async function testInit(args = []) {
20
50
  ? args[serverIdx + 1]
21
51
  : undefined;
22
52
 
23
- const projectType = detectProjectType();
53
+ const projectType = detectProjectType(d);
24
54
 
25
55
  if (projectType === 'sunpeak') {
26
- await initSunpeakProject();
56
+ await initSunpeakProject(d);
27
57
  } else if (projectType === 'js') {
28
- await initJsProject(cliServer);
58
+ await initJsProject(cliServer, d);
29
59
  } else {
30
- await initExternalProject(cliServer);
60
+ await initExternalProject(cliServer, d);
61
+ }
62
+
63
+ // Offer to install the testing skill
64
+ const installSkill = await d.confirm({
65
+ message: 'Install the test-mcp-server skill? (helps your coding agent write tests)',
66
+ initialValue: true,
67
+ });
68
+ if (!d.isCancel(installSkill) && installSkill) {
69
+ try {
70
+ d.execSync('npx skills add Sunpeak-AI/sunpeak@test-mcp-server', {
71
+ cwd: d.cwd(),
72
+ stdio: 'inherit',
73
+ });
74
+ } catch {
75
+ d.log.info('Skill install skipped. Install later: npx skills add Sunpeak-AI/sunpeak@test-mcp-server');
76
+ }
31
77
  }
32
78
 
33
- p.outro('Done!');
79
+ d.outro('Done!');
34
80
  }
35
81
 
36
- function detectProjectType() {
37
- const cwd = process.cwd();
82
+ function detectProjectType(d) {
83
+ const cwd = d.cwd();
38
84
  const pkgPath = join(cwd, 'package.json');
39
85
 
40
- if (existsSync(pkgPath)) {
86
+ if (d.existsSync(pkgPath)) {
41
87
  try {
42
- const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
88
+ const pkg = JSON.parse(d.readFileSync(pkgPath, 'utf-8'));
43
89
  const deps = { ...pkg.dependencies, ...pkg.devDependencies };
44
90
  if ('sunpeak' in deps) return 'sunpeak';
45
91
  return 'js';
@@ -52,7 +98,7 @@ function detectProjectType() {
52
98
  return 'external';
53
99
  }
54
100
 
55
- async function getServerConfig(cliServer) {
101
+ async function getServerConfig(cliServer, d) {
56
102
  // If provided via --server flag, detect type automatically
57
103
  if (cliServer) {
58
104
  if (cliServer.startsWith('http://') || cliServer.startsWith('https://')) {
@@ -61,7 +107,7 @@ async function getServerConfig(cliServer) {
61
107
  return { type: 'command', value: cliServer };
62
108
  }
63
109
 
64
- const serverType = await p.select({
110
+ const serverType = await d.select({
65
111
  message: 'How does your MCP server start?',
66
112
  options: [
67
113
  { value: 'command', label: 'Command (e.g., python server.py)' },
@@ -70,23 +116,23 @@ async function getServerConfig(cliServer) {
70
116
  ],
71
117
  });
72
118
 
73
- if (p.isCancel(serverType)) process.exit(0);
119
+ if (d.isCancel(serverType)) process.exit(0);
74
120
 
75
121
  if (serverType === 'command') {
76
- const command = await p.text({
122
+ const command = await d.text({
77
123
  message: 'Server start command:',
78
124
  placeholder: 'python src/server.py',
79
125
  });
80
- if (p.isCancel(command)) process.exit(0);
126
+ if (d.isCancel(command)) process.exit(0);
81
127
  return { type: 'command', value: command };
82
128
  }
83
129
 
84
130
  if (serverType === 'url') {
85
- const url = await p.text({
131
+ const url = await d.text({
86
132
  message: 'Server URL:',
87
133
  placeholder: 'http://localhost:8000/mcp',
88
134
  });
89
- if (p.isCancel(url)) process.exit(0);
135
+ if (d.isCancel(url)) process.exit(0);
90
136
  return { type: 'url', value: url };
91
137
  }
92
138
 
@@ -122,21 +168,321 @@ function generateServerConfigBlock(server, relativeTo = '.') {
122
168
  },`;
123
169
  }
124
170
 
125
- async function initExternalProject(cliServer) {
126
- p.log.info('Detected non-JS project. Creating self-contained test directory.');
171
+ /**
172
+ * Scaffold eval boilerplate into a directory.
173
+ * @param {string} evalsDir - Directory to create eval files in
174
+ * @param {{ server?: object, isSunpeak?: boolean, d?: object }} options
175
+ */
176
+ function scaffoldEvals(evalsDir, { server, isSunpeak, d: deps } = {}) {
177
+ const d = deps || defaultDeps;
178
+ if (d.existsSync(join(evalsDir, 'eval.config.ts'))) {
179
+ d.log.info('Eval config already exists. Skipping eval scaffold.');
180
+ return;
181
+ }
182
+
183
+ d.mkdirSync(evalsDir, { recursive: true });
184
+
185
+ // Generate server line for eval config
186
+ let serverLine = ' // server: \'http://localhost:8000/mcp\',';
187
+ if (isSunpeak) {
188
+ serverLine = ' // Omit server for sunpeak projects (auto-detected).\n // server: \'http://localhost:8000/mcp\',';
189
+ } else if (server?.type === 'url') {
190
+ serverLine = ` server: '${server.value}',`;
191
+ } else if (server?.type === 'command') {
192
+ serverLine = ` server: '${server.value}',`;
193
+ }
194
+
195
+ // Build the eval config content
196
+ const configLines = [
197
+ "import { defineEvalConfig } from 'sunpeak/eval';",
198
+ "",
199
+ "// API keys are loaded automatically from .env in this directory (gitignored).",
200
+ "// See .env.example for the format.",
201
+ "",
202
+ "export default defineEvalConfig({",
203
+ " // MCP server to test.",
204
+ serverLine,
205
+ "",
206
+ " models: [",
207
+ " // Uncomment models and install their provider packages:",
208
+ " // 'gpt-4o', // OPENAI_API_KEY",
209
+ " // 'gpt-4o-mini', // OPENAI_API_KEY",
210
+ " // 'o4-mini', // OPENAI_API_KEY",
211
+ " // 'claude-sonnet-4-20250514', // ANTHROPIC_API_KEY",
212
+ " // 'gemini-2.0-flash', // GOOGLE_GENERATIVE_AI_API_KEY",
213
+ " ],",
214
+ "",
215
+ " defaults: {",
216
+ " runs: 10, // Number of times to run each case per model",
217
+ " maxSteps: 1, // Max tool call steps per run",
218
+ " temperature: 0, // 0 for most deterministic results",
219
+ " timeout: 30_000, // Timeout per run in ms",
220
+ " },",
221
+ "});",
222
+ "",
223
+ ];
224
+
225
+ d.writeFileSync(join(evalsDir, 'eval.config.ts'), configLines.join('\n'));
226
+
227
+ // Scaffold .env template
228
+ d.writeFileSync(
229
+ join(evalsDir, '.env.example'),
230
+ `# Copy this file to .env and fill in your API keys.
231
+ # .env is gitignored — never commit API keys.
232
+ # OPENAI_API_KEY=sk-...
233
+ # ANTHROPIC_API_KEY=sk-ant-...
234
+ # GOOGLE_GENERATIVE_AI_API_KEY=...
235
+ `
236
+ );
237
+
238
+ d.writeFileSync(
239
+ join(evalsDir, 'example.eval.ts'),
240
+ `import { defineEval } from 'sunpeak/eval';
241
+
242
+ /**
243
+ * Example eval — tests whether LLMs call your tools correctly.
244
+ *
245
+ * To get started:
246
+ * 1. Configure models in eval.config.ts (uncomment the ones you want)
247
+ * 2. Install the AI SDK and provider packages: pnpm add ai @ai-sdk/openai
248
+ * 3. Copy .env.example to .env and add your API keys
249
+ * 4. Replace this file with evals for your own tools
250
+ * 5. Run: sunpeak test --eval
251
+ *
252
+ * Each case sends a prompt to every configured model and checks
253
+ * that the model calls the expected tool with the expected arguments.
254
+ * Cases run multiple times (configured via \`runs\` in eval.config.ts)
255
+ * to measure reliability across non-deterministic LLM responses.
256
+ */
257
+ export default defineEval({
258
+ // This eval is skipped when no models are configured.
259
+ // Delete this file and create your own evals to get started.
260
+ cases: [
261
+ {
262
+ name: 'example (replace me)',
263
+ prompt: 'Show me a demo',
264
+ // expect which tool gets called and (optionally) its arguments:
265
+ expect: {
266
+ tool: 'your-tool-name',
267
+ // args: { key: 'value' },
268
+ },
269
+ },
270
+ ],
271
+ });
272
+ `
273
+ );
274
+
275
+ d.log.success(`Created ${evalsDir}/ with eval config and example.`);
276
+ }
277
+
278
+ /**
279
+ * Scaffold a visual regression test file.
280
+ * @param {string} filePath - Full path to the visual test file
281
+ * @param {object} d - Dependencies
282
+ */
283
+ function scaffoldVisualTest(filePath, d) {
284
+ if (d.existsSync(filePath)) {
285
+ d.log.info('Visual test already exists. Skipping.');
286
+ return;
287
+ }
288
+
289
+ d.writeFileSync(
290
+ filePath,
291
+ `import { test, expect } from 'sunpeak/test';
292
+
293
+ /**
294
+ * Visual regression tests — compare screenshots against saved baselines.
295
+ *
296
+ * Screenshots only run with: sunpeak test --visual
297
+ * Update baselines with: sunpeak test --visual --update
298
+ *
299
+ * During normal \`sunpeak test\` runs, screenshot() calls are silently
300
+ * skipped so these tests still pass without baselines.
301
+ *
302
+ * Uncomment the tests below and replace 'your-tool' with your tool name.
303
+ */
304
+
305
+ // test('tool renders correctly in light mode', async ({ mcp }) => {
306
+ // const result = await mcp.callTool('your-tool', { key: 'value' }, { theme: 'light' });
307
+ // expect(result).not.toBeError();
308
+ //
309
+ // // Wait for UI to render, then screenshot:
310
+ // // const app = result.app();
311
+ // // await expect(app.getByText('Expected text')).toBeVisible();
312
+ // // await mcp.screenshot('tool-light');
313
+ // });
314
+
315
+ // test('tool renders correctly in dark mode', async ({ mcp }) => {
316
+ // const result = await mcp.callTool('your-tool', { key: 'value' }, { theme: 'dark' });
317
+ // expect(result).not.toBeError();
318
+ //
319
+ // // const app = result.app();
320
+ // // await expect(app.getByText('Expected text')).toBeVisible();
321
+ // // await mcp.screenshot('tool-dark');
322
+ // });
323
+
324
+ // Full-page screenshot (captures the inspector chrome too):
325
+ // test('full page renders correctly', async ({ mcp }) => {
326
+ // const result = await mcp.callTool('your-tool', {}, { theme: 'light' });
327
+ // const app = result.app();
328
+ // await expect(app.getByText('Expected text')).toBeVisible();
329
+ // await mcp.screenshot('tool-page', { target: 'page', maxDiffPixelRatio: 0.02 });
330
+ // });
331
+ `
332
+ );
333
+ d.log.success(`Created ${filePath}`);
334
+ }
335
+
336
+ /**
337
+ * Scaffold live test boilerplate (test against real ChatGPT/Claude).
338
+ * @param {string} liveDir - Directory to create live test files in
339
+ * @param {{ isSunpeak?: boolean, d: object }} options
340
+ */
341
+ function scaffoldLiveTests(liveDir, { isSunpeak, d } = {}) {
342
+ if (d.existsSync(join(liveDir, 'playwright.config.ts'))) {
343
+ d.log.info('Live test config already exists. Skipping live test scaffold.');
344
+ return;
345
+ }
346
+
347
+ d.mkdirSync(liveDir, { recursive: true });
348
+
349
+ // Live test playwright config
350
+ const liveConfigPreamble = `import { defineLiveConfig } from 'sunpeak/test/live/config';
351
+
352
+ /**
353
+ * Live tests run against real AI hosts (ChatGPT, Claude).
354
+ *
355
+ * Prerequisites:
356
+ * 1. Your MCP server must be accessible via a public URL (e.g., ngrok tunnel)
357
+ * 2. The server must be registered as an MCP action in the host
358
+ * 3. Run: sunpeak test --live
359
+ *
360
+ * On first run, a browser window opens for you to log in to the host.
361
+ * The session is saved for subsequent runs (typically lasts a few hours).`;
362
+
363
+ const liveConfigExport = `export default defineLiveConfig({
364
+ // hosts: ['chatgpt'], // Which hosts to test against
365
+ // colorScheme: 'light', // Default color scheme
366
+ // viewport: { width: 1280, height: 720 },
367
+ devOverlay: false,
368
+ });
369
+ `;
370
+
371
+ const configContent = isSunpeak
372
+ ? `${liveConfigPreamble}
373
+ */
374
+ ${liveConfigExport}`
375
+ : `${liveConfigPreamble}
376
+ *
377
+ * NOTE: defineLiveConfig() starts a local sunpeak dev server as its backend.
378
+ * If your MCP server is not a sunpeak project, you may need to customize the
379
+ * webServer option in the Playwright config below to start your own server,
380
+ * or remove webServer entirely if your server is already running.
381
+ */
382
+ ${liveConfigExport}`;
383
+
384
+ d.writeFileSync(join(liveDir, 'playwright.config.ts'), configContent);
385
+
386
+ // Live test example
387
+ d.writeFileSync(
388
+ join(liveDir, 'example.test.ts'),
389
+ `import { test, expect } from 'sunpeak/test/live';
390
+
391
+ /**
392
+ * Live tests invoke tools through real AI hosts (ChatGPT, Claude).
393
+ *
394
+ * The \`live\` fixture provides:
395
+ * - live.invoke(toolName) — invoke a tool and get the app locator
396
+ * - live.setColorScheme('dark', app) — switch theme while app is visible
397
+ * - live.page — the underlying Playwright page
398
+ *
399
+ * Run with: sunpeak test --live
400
+ *
401
+ * These tests are excluded from normal \`sunpeak test\` runs because
402
+ * they require host accounts and cost API credits.
403
+ */
404
+
405
+ // Uncomment and replace 'your-tool' with the tool name as it appears in the host.
406
+ // test('tool renders in the host', async ({ live }) => {
407
+ // const app = await live.invoke('your-tool');
408
+ //
409
+ // await expect(app.getByText('Expected text')).toBeVisible({ timeout: 15_000 });
410
+ //
411
+ // // Test dark mode:
412
+ // await live.setColorScheme('dark', app);
413
+ // await expect(app.getByText('Expected text')).toBeVisible();
414
+ // });
415
+ `
416
+ );
417
+
418
+ d.log.success(`Created ${liveDir}/ with live test config and example.`);
419
+ }
420
+
421
+ /**
422
+ * Scaffold a unit test example for JS/TS projects.
423
+ * @param {string} filePath - Full path to the unit test file
424
+ * @param {object} d - Dependencies
425
+ */
426
+ function scaffoldUnitTest(filePath, d) {
427
+ if (d.existsSync(filePath)) {
428
+ d.log.info('Unit test already exists. Skipping.');
429
+ return;
430
+ }
431
+
432
+ d.mkdirSync(dirname(filePath), { recursive: true });
433
+
434
+ d.writeFileSync(
435
+ filePath,
436
+ `import { describe, it, expect } from 'vitest';
437
+
438
+ /**
439
+ * Unit tests for your MCP tool handlers.
440
+ *
441
+ * Import your tool handler directly and test its input/output
442
+ * without starting the MCP server or inspector.
443
+ *
444
+ * Run with: sunpeak test --unit
445
+ *
446
+ * To set up vitest, add it to your devDependencies:
447
+ * npm install -D vitest
448
+ *
449
+ * Uncomment and customize the tests below for your tools.
450
+ */
451
+
452
+ // import handler, { tool, schema } from '../../src/tools/your-tool';
453
+ // const extra = {} as Parameters<typeof handler>[1];
454
+
455
+ // describe('your tool', () => {
456
+ // it('returns expected output', async () => {
457
+ // const result = await handler({ key: 'value' }, extra);
458
+ // expect(result.structuredContent).toBeDefined();
459
+ // });
460
+ //
461
+ // it('exports correct tool config', () => {
462
+ // expect(tool.title).toBe('Your Tool');
463
+ // expect(tool.annotations?.readOnlyHint).toBe(true);
464
+ // });
465
+ // });
466
+ `
467
+ );
468
+ d.log.success(`Created ${filePath}`);
469
+ }
470
+
471
+ async function initExternalProject(cliServer, d) {
472
+ d.log.info('Detected non-JS project. Creating self-contained test directory.');
127
473
 
128
- const server = await getServerConfig(cliServer);
129
- const testDir = join(process.cwd(), 'tests', 'sunpeak');
474
+ const server = await getServerConfig(cliServer, d);
475
+ const testDir = join(d.cwd(), 'tests', 'sunpeak');
130
476
 
131
- if (existsSync(testDir)) {
132
- p.log.warn('tests/sunpeak/ already exists. Skipping scaffold.');
477
+ if (d.existsSync(testDir)) {
478
+ d.log.warn('tests/sunpeak/ already exists. Skipping scaffold.');
133
479
  return;
134
480
  }
135
481
 
136
- mkdirSync(testDir, { recursive: true });
482
+ d.mkdirSync(testDir, { recursive: true });
137
483
 
138
484
  // package.json
139
- writeFileSync(
485
+ d.writeFileSync(
140
486
  join(testDir, 'package.json'),
141
487
  JSON.stringify(
142
488
  {
@@ -157,7 +503,7 @@ async function initExternalProject(cliServer) {
157
503
 
158
504
  // sunpeak.config.ts (used as playwright config)
159
505
  const serverBlock = generateServerConfigBlock(server, '../..');
160
- writeFileSync(
506
+ d.writeFileSync(
161
507
  join(testDir, 'playwright.config.ts'),
162
508
  `import { defineConfig } from 'sunpeak/test/config';
163
509
 
@@ -168,7 +514,7 @@ ${serverBlock}
168
514
  );
169
515
 
170
516
  // tsconfig.json
171
- writeFileSync(
517
+ d.writeFileSync(
172
518
  join(testDir, 'tsconfig.json'),
173
519
  JSON.stringify(
174
520
  {
@@ -185,8 +531,8 @@ ${serverBlock}
185
531
  ) + '\n'
186
532
  );
187
533
 
188
- // smoke test — runnable out of the box, verifies the server is reachable
189
- writeFileSync(
534
+ // 1. E2E test — smoke test, verifies the server is reachable
535
+ d.writeFileSync(
190
536
  join(testDir, 'smoke.test.ts'),
191
537
  `import { test, expect } from 'sunpeak/test';
192
538
 
@@ -207,27 +553,40 @@ test('server is reachable and inspector loads', async ({ mcp }) => {
207
553
  `
208
554
  );
209
555
 
210
- p.log.success('Created tests/sunpeak/ with config and starter test.');
211
- p.log.step('Next steps:');
212
- p.log.message(' cd tests/sunpeak');
213
- p.log.message(' npm install');
214
- p.log.message(' npx playwright install chromium');
215
- p.log.message(' npx sunpeak test');
556
+ // 2. Visual regression test
557
+ scaffoldVisualTest(join(testDir, 'visual.test.ts'), d);
558
+
559
+ // 3. Live tests
560
+ scaffoldLiveTests(join(testDir, 'live'), { isSunpeak: false, d });
561
+
562
+ // 4. Eval boilerplate
563
+ scaffoldEvals(join(testDir, 'evals'), { server, d });
564
+
565
+ d.log.success('Created tests/sunpeak/ with all test types.');
566
+ d.log.step('Next steps:');
567
+ d.log.message(' cd tests/sunpeak');
568
+ d.log.message(' npm install');
569
+ d.log.message(' npx playwright install chromium');
570
+ d.log.message('');
571
+ d.log.message(' npx sunpeak test # E2E tests');
572
+ d.log.message(' npx sunpeak test --visual # Visual regression (generates baselines on first run)');
573
+ d.log.message(' npx sunpeak test --live # Live tests against real hosts (requires login)');
574
+ d.log.message(' npx sunpeak test --eval # Multi-model evals (configure models in evals/eval.config.ts)');
216
575
  }
217
576
 
218
- async function initJsProject(cliServer) {
219
- p.log.info('Detected JS/TS project. Adding test config at project root.');
577
+ async function initJsProject(cliServer, d) {
578
+ d.log.info('Detected JS/TS project. Adding test config at project root.');
220
579
 
221
- const server = await getServerConfig(cliServer);
222
- const cwd = process.cwd();
580
+ const server = await getServerConfig(cliServer, d);
581
+ const cwd = d.cwd();
223
582
 
224
583
  // Create playwright.config.ts
225
584
  const configPath = join(cwd, 'playwright.config.ts');
226
- if (existsSync(configPath)) {
227
- p.log.warn('playwright.config.ts already exists. Skipping config creation.');
585
+ if (d.existsSync(configPath)) {
586
+ d.log.warn('playwright.config.ts already exists. Skipping config creation.');
228
587
  } else {
229
588
  const serverBlock = generateServerConfigBlock(server);
230
- writeFileSync(
589
+ d.writeFileSync(
231
590
  configPath,
232
591
  `import { defineConfig } from 'sunpeak/test/config';
233
592
 
@@ -236,16 +595,16 @@ ${serverBlock}
236
595
  });
237
596
  `
238
597
  );
239
- p.log.success('Created playwright.config.ts');
598
+ d.log.success('Created playwright.config.ts');
240
599
  }
241
600
 
242
- // Create test directory and smoke test
243
- const testDir = join(cwd, 'tests', 'e2e');
244
- mkdirSync(testDir, { recursive: true });
601
+ // 1. E2E test smoke test
602
+ const e2eDir = join(cwd, 'tests', 'e2e');
603
+ d.mkdirSync(e2eDir, { recursive: true });
245
604
 
246
- const testPath = join(testDir, 'smoke.test.ts');
247
- if (!existsSync(testPath)) {
248
- writeFileSync(
605
+ const testPath = join(e2eDir, 'smoke.test.ts');
606
+ if (!d.existsSync(testPath)) {
607
+ d.writeFileSync(
249
608
  testPath,
250
609
  `import { test, expect } from 'sunpeak/test';
251
610
 
@@ -264,42 +623,85 @@ test('server is reachable and inspector loads', async ({ mcp }) => {
264
623
  // });
265
624
  `
266
625
  );
267
- p.log.success('Created tests/e2e/smoke.test.ts');
626
+ d.log.success('Created tests/e2e/smoke.test.ts');
268
627
  }
269
628
 
270
- p.log.step('Next steps:');
271
- p.log.message(' npm install -D sunpeak @playwright/test');
272
- p.log.message(' npx playwright install chromium');
273
- p.log.message(' npx sunpeak test');
629
+ // 2. Visual regression test
630
+ scaffoldVisualTest(join(e2eDir, 'visual.test.ts'), d);
631
+
632
+ // 3. Live tests
633
+ scaffoldLiveTests(join(cwd, 'tests', 'live'), { isSunpeak: false, d });
634
+
635
+ // 4. Eval boilerplate
636
+ scaffoldEvals(join(cwd, 'tests', 'evals'), { server, d });
637
+
638
+ // 5. Unit test
639
+ scaffoldUnitTest(join(cwd, 'tests', 'unit', 'example.test.ts'), d);
640
+
641
+ d.log.step('Next steps:');
642
+ d.log.message(' npm install -D sunpeak @playwright/test vitest');
643
+ d.log.message(' npx playwright install chromium');
644
+ d.log.message('');
645
+ d.log.message(' npx sunpeak test # E2E tests');
646
+ d.log.message(' npx sunpeak test --unit # Unit tests (vitest)');
647
+ d.log.message(' npx sunpeak test --visual # Visual regression');
648
+ d.log.message(' npx sunpeak test --live # Live tests against real hosts');
649
+ d.log.message(' npx sunpeak test --eval # Multi-model evals');
274
650
  }
275
651
 
276
- async function initSunpeakProject() {
277
- p.log.info('Detected sunpeak project. Updating config to use defineConfig().');
652
+ async function initSunpeakProject(d) {
653
+ d.log.info('Detected sunpeak project. Updating config to use defineConfig().');
278
654
 
279
- const cwd = process.cwd();
655
+ const cwd = d.cwd();
280
656
  const configPath = join(cwd, 'playwright.config.ts');
281
657
 
282
- if (existsSync(configPath)) {
283
- const content = readFileSync(configPath, 'utf-8');
658
+ if (d.existsSync(configPath)) {
659
+ const content = d.readFileSync(configPath, 'utf-8');
284
660
  if (content.includes('sunpeak/test/config')) {
285
- p.log.info('Config already uses sunpeak/test/config. Nothing to do.');
286
- return;
661
+ d.log.info('Config already uses sunpeak/test/config. Nothing to do.');
662
+ } else {
663
+ d.log.warn('playwright.config.ts exists but does not use sunpeak/test/config.');
664
+ d.log.message(' To migrate, replace your config with:');
665
+ d.log.message(" import { defineConfig } from 'sunpeak/test/config';");
666
+ d.log.message(' export default defineConfig();');
287
667
  }
288
- }
289
-
290
- writeFileSync(
291
- configPath,
292
- `import { defineConfig } from 'sunpeak/test/config';
668
+ } else {
669
+ d.writeFileSync(
670
+ configPath,
671
+ `import { defineConfig } from 'sunpeak/test/config';
293
672
 
294
673
  export default defineConfig();
295
674
  `
296
- );
675
+ );
676
+ d.log.success('Updated playwright.config.ts to use defineConfig()');
677
+ }
297
678
 
298
- p.log.success('Updated playwright.config.ts to use defineConfig()');
299
- p.log.step('Migrate test files:');
300
- p.log.message(' Replace: import { test, expect } from "@playwright/test"');
301
- p.log.message(' With: import { test, expect } from "sunpeak/test"');
302
- p.log.message('');
303
- p.log.message(' Use the `mcp` fixture instead of raw page navigation.');
304
- p.log.message(' See sunpeak docs for migration examples.');
679
+ // Scaffold missing test types
680
+
681
+ // 1. Visual regression test
682
+ const e2eDir = join(cwd, 'tests', 'e2e');
683
+ d.mkdirSync(e2eDir, { recursive: true });
684
+ scaffoldVisualTest(join(e2eDir, 'visual.test.ts'), d);
685
+
686
+ // 2. Live tests
687
+ scaffoldLiveTests(join(cwd, 'tests', 'live'), { isSunpeak: true, d });
688
+
689
+ // 3. Eval boilerplate
690
+ scaffoldEvals(join(cwd, 'tests', 'evals'), { isSunpeak: true, d });
691
+
692
+ // 4. Unit test
693
+ scaffoldUnitTest(join(cwd, 'tests', 'unit', 'example.test.ts'), d);
694
+
695
+ d.log.step('Scaffolded test types:');
696
+ d.log.message(' tests/e2e/visual.test.ts — Visual regression (sunpeak test --visual)');
697
+ d.log.message(' tests/live/ — Live host tests (sunpeak test --live)');
698
+ d.log.message(' tests/evals/ — Multi-model evals (sunpeak test --eval)');
699
+ d.log.message(' tests/unit/example.test.ts — Unit tests (sunpeak test --unit)');
700
+ d.log.message('');
701
+ d.log.message(' Migrate existing e2e tests:');
702
+ d.log.message(' Replace: import { test, expect } from "@playwright/test"');
703
+ d.log.message(' With: import { test, expect } from "sunpeak/test"');
704
+ d.log.message('');
705
+ d.log.message(' Use the `mcp` fixture instead of raw page navigation.');
706
+ d.log.message(' See sunpeak docs for migration examples.');
305
707
  }