sunpeak 0.19.2 → 0.19.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/bin/commands/dev.mjs +1 -1
- package/bin/commands/inspect.mjs +1 -1
- package/bin/commands/new.mjs +9 -5
- package/bin/commands/start.mjs +3 -1
- package/bin/commands/test-init.mjs +478 -76
- package/bin/commands/test.mjs +357 -4
- package/bin/lib/eval/eval-reporter.mjs +105 -0
- package/bin/lib/eval/eval-runner.mjs +310 -0
- package/bin/lib/eval/eval-types.d.mts +168 -0
- package/bin/lib/eval/eval-vitest-plugin.mjs +158 -0
- package/bin/lib/eval/model-registry.mjs +73 -0
- package/bin/lib/sandbox-server.mjs +5 -2
- package/bin/sunpeak.js +1 -0
- package/dist/chatgpt/index.cjs +1 -1
- package/dist/chatgpt/index.js +1 -1
- package/dist/claude/index.cjs +1 -1
- package/dist/claude/index.js +1 -1
- package/dist/host/chatgpt/index.cjs +1 -1
- package/dist/host/chatgpt/index.js +1 -1
- package/dist/index.cjs +134 -124
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.js +71 -62
- package/dist/index.js.map +1 -1
- package/dist/inspector/index.cjs +1 -1
- package/dist/inspector/index.js +1 -1
- package/dist/{inspector-Cdo5BK2D.js → inspector-D5DckQuU.js} +236 -98
- package/dist/inspector-D5DckQuU.js.map +1 -0
- package/dist/{inspector-8nPV2A-z.cjs → inspector-jY9O18z9.cjs} +237 -99
- package/dist/inspector-jY9O18z9.cjs.map +1 -0
- package/dist/mcp/index.cjs +237 -140
- package/dist/mcp/index.cjs.map +1 -1
- package/dist/mcp/index.d.ts +1 -1
- package/dist/mcp/index.js +230 -134
- package/dist/mcp/index.js.map +1 -1
- package/dist/mcp/production-server.d.ts +31 -0
- package/dist/{protocol-C7kTcBr_.cjs → protocol-C8pFDmcy.cjs} +8194 -8187
- package/dist/protocol-C8pFDmcy.cjs.map +1 -0
- package/dist/{protocol-BfAACnv0.js → protocol-CRqiPTLT.js} +8186 -8185
- package/dist/protocol-CRqiPTLT.js.map +1 -0
- package/dist/{use-app-CfP9VypY.js → use-app-Bfargfa3.js} +194 -94
- package/dist/use-app-Bfargfa3.js.map +1 -0
- package/dist/{use-app-CzcYw1Kz.cjs → use-app-CbsBEmwv.cjs} +254 -148
- package/dist/use-app-CbsBEmwv.cjs.map +1 -0
- package/package.json +27 -3
- package/template/README.md +17 -7
- package/template/_gitignore +2 -0
- package/template/dist/albums/albums.html +15 -15
- package/template/dist/albums/albums.json +1 -1
- package/template/dist/carousel/carousel.html +19 -19
- package/template/dist/carousel/carousel.json +1 -1
- package/template/dist/map/map.html +14 -14
- package/template/dist/map/map.json +1 -1
- package/template/dist/review/review.html +11 -11
- package/template/dist/review/review.json +1 -1
- package/template/node_modules/.bin/vitest +2 -2
- package/template/node_modules/.vite/deps/_metadata.json +3 -3
- package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps.js +192 -91
- package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps.js.map +1 -1
- package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_app-bridge.js +231 -92
- package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_app-bridge.js.map +1 -1
- package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_react.js +208 -105
- package/template/node_modules/.vite-mcp/deps/@modelcontextprotocol_ext-apps_react.js.map +1 -1
- package/template/node_modules/.vite-mcp/deps/_metadata.json +25 -25
- package/template/node_modules/.vite-mcp/deps/{protocol-B_qKkui_.js → protocol-BqGB4zBx.js} +45 -45
- package/template/node_modules/.vite-mcp/deps/protocol-BqGB4zBx.js.map +1 -0
- package/template/node_modules/.vite-mcp/deps/vitest.js +7 -7
- package/template/node_modules/.vite-mcp/deps/vitest.js.map +1 -1
- package/template/tests/e2e/visual.spec.ts-snapshots/albums-dark-chatgpt-darwin.png +0 -0
- package/template/tests/e2e/visual.spec.ts-snapshots/albums-dark-claude-darwin.png +0 -0
- package/template/tests/e2e/visual.spec.ts-snapshots/albums-fullscreen-chatgpt-darwin.png +0 -0
- package/template/tests/e2e/visual.spec.ts-snapshots/albums-fullscreen-claude-darwin.png +0 -0
- package/template/tests/e2e/visual.spec.ts-snapshots/albums-light-chatgpt-darwin.png +0 -0
- package/template/tests/e2e/visual.spec.ts-snapshots/albums-light-claude-darwin.png +0 -0
- package/template/tests/e2e/visual.spec.ts-snapshots/albums-page-light-chatgpt-darwin.png +0 -0
- package/template/tests/e2e/visual.spec.ts-snapshots/albums-page-light-claude-darwin.png +0 -0
- package/template/tests/evals/.env.example +5 -0
- package/template/tests/evals/albums.eval.ts +28 -0
- package/template/tests/evals/carousel.eval.ts +26 -0
- package/template/tests/evals/eval.config.ts +26 -0
- package/template/tests/evals/map.eval.ts +23 -0
- package/template/tests/evals/review.eval.ts +48 -0
- package/dist/inspector-8nPV2A-z.cjs.map +0 -1
- package/dist/inspector-Cdo5BK2D.js.map +0 -1
- package/dist/protocol-BfAACnv0.js.map +0 -1
- package/dist/protocol-C7kTcBr_.cjs.map +0 -1
- package/dist/use-app-CfP9VypY.js.map +0 -1
- package/dist/use-app-CzcYw1Kz.cjs.map +0 -1
- package/template/node_modules/.vite-mcp/deps/protocol-B_qKkui_.js.map +0 -1
|
@@ -1,7 +1,28 @@
|
|
|
1
1
|
import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
|
|
2
|
-
import {
|
|
2
|
+
import { execSync } from 'child_process';
|
|
3
|
+
import { join, dirname } from 'path';
|
|
3
4
|
import * as p from '@clack/prompts';
|
|
4
5
|
|
|
6
|
+
/**
|
|
7
|
+
* Default dependencies (real implementations).
|
|
8
|
+
* Override in tests via the `deps` parameter.
|
|
9
|
+
*/
|
|
10
|
+
export const defaultDeps = {
|
|
11
|
+
existsSync,
|
|
12
|
+
readFileSync,
|
|
13
|
+
writeFileSync,
|
|
14
|
+
mkdirSync,
|
|
15
|
+
execSync,
|
|
16
|
+
cwd: () => process.cwd(),
|
|
17
|
+
intro: p.intro,
|
|
18
|
+
outro: p.outro,
|
|
19
|
+
confirm: p.confirm,
|
|
20
|
+
isCancel: p.isCancel,
|
|
21
|
+
select: p.select,
|
|
22
|
+
text: p.text,
|
|
23
|
+
log: p.log,
|
|
24
|
+
};
|
|
25
|
+
|
|
5
26
|
/**
|
|
6
27
|
* sunpeak test init — Scaffold test infrastructure for MCP servers.
|
|
7
28
|
*
|
|
@@ -9,9 +30,18 @@ import * as p from '@clack/prompts';
|
|
|
9
30
|
* - Non-JS projects: self-contained tests/sunpeak/ directory
|
|
10
31
|
* - JS/TS projects: root-level config + test files
|
|
11
32
|
* - sunpeak projects: migrate to defineConfig()
|
|
33
|
+
*
|
|
34
|
+
* Scaffolds all 5 test types:
|
|
35
|
+
* 1. E2E tests — Playwright-based inspector tests (mcp fixture)
|
|
36
|
+
* 2. Visual regression — Screenshot comparison via mcp.screenshot()
|
|
37
|
+
* 3. Live tests — Test against real ChatGPT/Claude hosts
|
|
38
|
+
* 4. Evals — Multi-model tool calling reliability tests
|
|
39
|
+
* 5. Unit tests — Direct tool handler tests (JS/TS projects only)
|
|
12
40
|
*/
|
|
13
|
-
export async function testInit(args = []) {
|
|
14
|
-
|
|
41
|
+
export async function testInit(args = [], deps = defaultDeps) {
|
|
42
|
+
const d = { ...defaultDeps, ...deps };
|
|
43
|
+
|
|
44
|
+
d.intro('Setting up sunpeak tests');
|
|
15
45
|
|
|
16
46
|
// Parse --server flag from CLI args
|
|
17
47
|
const serverIdx = args.indexOf('--server');
|
|
@@ -20,26 +50,42 @@ export async function testInit(args = []) {
|
|
|
20
50
|
? args[serverIdx + 1]
|
|
21
51
|
: undefined;
|
|
22
52
|
|
|
23
|
-
const projectType = detectProjectType();
|
|
53
|
+
const projectType = detectProjectType(d);
|
|
24
54
|
|
|
25
55
|
if (projectType === 'sunpeak') {
|
|
26
|
-
await initSunpeakProject();
|
|
56
|
+
await initSunpeakProject(d);
|
|
27
57
|
} else if (projectType === 'js') {
|
|
28
|
-
await initJsProject(cliServer);
|
|
58
|
+
await initJsProject(cliServer, d);
|
|
29
59
|
} else {
|
|
30
|
-
await initExternalProject(cliServer);
|
|
60
|
+
await initExternalProject(cliServer, d);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Offer to install the testing skill
|
|
64
|
+
const installSkill = await d.confirm({
|
|
65
|
+
message: 'Install the test-mcp-server skill? (helps your coding agent write tests)',
|
|
66
|
+
initialValue: true,
|
|
67
|
+
});
|
|
68
|
+
if (!d.isCancel(installSkill) && installSkill) {
|
|
69
|
+
try {
|
|
70
|
+
d.execSync('npx skills add Sunpeak-AI/sunpeak@test-mcp-server', {
|
|
71
|
+
cwd: d.cwd(),
|
|
72
|
+
stdio: 'inherit',
|
|
73
|
+
});
|
|
74
|
+
} catch {
|
|
75
|
+
d.log.info('Skill install skipped. Install later: npx skills add Sunpeak-AI/sunpeak@test-mcp-server');
|
|
76
|
+
}
|
|
31
77
|
}
|
|
32
78
|
|
|
33
|
-
|
|
79
|
+
d.outro('Done!');
|
|
34
80
|
}
|
|
35
81
|
|
|
36
|
-
function detectProjectType() {
|
|
37
|
-
const cwd =
|
|
82
|
+
function detectProjectType(d) {
|
|
83
|
+
const cwd = d.cwd();
|
|
38
84
|
const pkgPath = join(cwd, 'package.json');
|
|
39
85
|
|
|
40
|
-
if (existsSync(pkgPath)) {
|
|
86
|
+
if (d.existsSync(pkgPath)) {
|
|
41
87
|
try {
|
|
42
|
-
const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
|
|
88
|
+
const pkg = JSON.parse(d.readFileSync(pkgPath, 'utf-8'));
|
|
43
89
|
const deps = { ...pkg.dependencies, ...pkg.devDependencies };
|
|
44
90
|
if ('sunpeak' in deps) return 'sunpeak';
|
|
45
91
|
return 'js';
|
|
@@ -52,7 +98,7 @@ function detectProjectType() {
|
|
|
52
98
|
return 'external';
|
|
53
99
|
}
|
|
54
100
|
|
|
55
|
-
async function getServerConfig(cliServer) {
|
|
101
|
+
async function getServerConfig(cliServer, d) {
|
|
56
102
|
// If provided via --server flag, detect type automatically
|
|
57
103
|
if (cliServer) {
|
|
58
104
|
if (cliServer.startsWith('http://') || cliServer.startsWith('https://')) {
|
|
@@ -61,7 +107,7 @@ async function getServerConfig(cliServer) {
|
|
|
61
107
|
return { type: 'command', value: cliServer };
|
|
62
108
|
}
|
|
63
109
|
|
|
64
|
-
const serverType = await
|
|
110
|
+
const serverType = await d.select({
|
|
65
111
|
message: 'How does your MCP server start?',
|
|
66
112
|
options: [
|
|
67
113
|
{ value: 'command', label: 'Command (e.g., python server.py)' },
|
|
@@ -70,23 +116,23 @@ async function getServerConfig(cliServer) {
|
|
|
70
116
|
],
|
|
71
117
|
});
|
|
72
118
|
|
|
73
|
-
if (
|
|
119
|
+
if (d.isCancel(serverType)) process.exit(0);
|
|
74
120
|
|
|
75
121
|
if (serverType === 'command') {
|
|
76
|
-
const command = await
|
|
122
|
+
const command = await d.text({
|
|
77
123
|
message: 'Server start command:',
|
|
78
124
|
placeholder: 'python src/server.py',
|
|
79
125
|
});
|
|
80
|
-
if (
|
|
126
|
+
if (d.isCancel(command)) process.exit(0);
|
|
81
127
|
return { type: 'command', value: command };
|
|
82
128
|
}
|
|
83
129
|
|
|
84
130
|
if (serverType === 'url') {
|
|
85
|
-
const url = await
|
|
131
|
+
const url = await d.text({
|
|
86
132
|
message: 'Server URL:',
|
|
87
133
|
placeholder: 'http://localhost:8000/mcp',
|
|
88
134
|
});
|
|
89
|
-
if (
|
|
135
|
+
if (d.isCancel(url)) process.exit(0);
|
|
90
136
|
return { type: 'url', value: url };
|
|
91
137
|
}
|
|
92
138
|
|
|
@@ -122,21 +168,321 @@ function generateServerConfigBlock(server, relativeTo = '.') {
|
|
|
122
168
|
},`;
|
|
123
169
|
}
|
|
124
170
|
|
|
125
|
-
|
|
126
|
-
|
|
171
|
+
/**
|
|
172
|
+
* Scaffold eval boilerplate into a directory.
|
|
173
|
+
* @param {string} evalsDir - Directory to create eval files in
|
|
174
|
+
* @param {{ server?: object, isSunpeak?: boolean, d?: object }} options
|
|
175
|
+
*/
|
|
176
|
+
function scaffoldEvals(evalsDir, { server, isSunpeak, d: deps } = {}) {
|
|
177
|
+
const d = deps || defaultDeps;
|
|
178
|
+
if (d.existsSync(join(evalsDir, 'eval.config.ts'))) {
|
|
179
|
+
d.log.info('Eval config already exists. Skipping eval scaffold.');
|
|
180
|
+
return;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
d.mkdirSync(evalsDir, { recursive: true });
|
|
184
|
+
|
|
185
|
+
// Generate server line for eval config
|
|
186
|
+
let serverLine = ' // server: \'http://localhost:8000/mcp\',';
|
|
187
|
+
if (isSunpeak) {
|
|
188
|
+
serverLine = ' // Omit server for sunpeak projects (auto-detected).\n // server: \'http://localhost:8000/mcp\',';
|
|
189
|
+
} else if (server?.type === 'url') {
|
|
190
|
+
serverLine = ` server: '${server.value}',`;
|
|
191
|
+
} else if (server?.type === 'command') {
|
|
192
|
+
serverLine = ` server: '${server.value}',`;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Build the eval config content
|
|
196
|
+
const configLines = [
|
|
197
|
+
"import { defineEvalConfig } from 'sunpeak/eval';",
|
|
198
|
+
"",
|
|
199
|
+
"// API keys are loaded automatically from .env in this directory (gitignored).",
|
|
200
|
+
"// See .env.example for the format.",
|
|
201
|
+
"",
|
|
202
|
+
"export default defineEvalConfig({",
|
|
203
|
+
" // MCP server to test.",
|
|
204
|
+
serverLine,
|
|
205
|
+
"",
|
|
206
|
+
" models: [",
|
|
207
|
+
" // Uncomment models and install their provider packages:",
|
|
208
|
+
" // 'gpt-4o', // OPENAI_API_KEY",
|
|
209
|
+
" // 'gpt-4o-mini', // OPENAI_API_KEY",
|
|
210
|
+
" // 'o4-mini', // OPENAI_API_KEY",
|
|
211
|
+
" // 'claude-sonnet-4-20250514', // ANTHROPIC_API_KEY",
|
|
212
|
+
" // 'gemini-2.0-flash', // GOOGLE_GENERATIVE_AI_API_KEY",
|
|
213
|
+
" ],",
|
|
214
|
+
"",
|
|
215
|
+
" defaults: {",
|
|
216
|
+
" runs: 10, // Number of times to run each case per model",
|
|
217
|
+
" maxSteps: 1, // Max tool call steps per run",
|
|
218
|
+
" temperature: 0, // 0 for most deterministic results",
|
|
219
|
+
" timeout: 30_000, // Timeout per run in ms",
|
|
220
|
+
" },",
|
|
221
|
+
"});",
|
|
222
|
+
"",
|
|
223
|
+
];
|
|
224
|
+
|
|
225
|
+
d.writeFileSync(join(evalsDir, 'eval.config.ts'), configLines.join('\n'));
|
|
226
|
+
|
|
227
|
+
// Scaffold .env template
|
|
228
|
+
d.writeFileSync(
|
|
229
|
+
join(evalsDir, '.env.example'),
|
|
230
|
+
`# Copy this file to .env and fill in your API keys.
|
|
231
|
+
# .env is gitignored — never commit API keys.
|
|
232
|
+
# OPENAI_API_KEY=sk-...
|
|
233
|
+
# ANTHROPIC_API_KEY=sk-ant-...
|
|
234
|
+
# GOOGLE_GENERATIVE_AI_API_KEY=...
|
|
235
|
+
`
|
|
236
|
+
);
|
|
237
|
+
|
|
238
|
+
d.writeFileSync(
|
|
239
|
+
join(evalsDir, 'example.eval.ts'),
|
|
240
|
+
`import { defineEval } from 'sunpeak/eval';
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Example eval — tests whether LLMs call your tools correctly.
|
|
244
|
+
*
|
|
245
|
+
* To get started:
|
|
246
|
+
* 1. Configure models in eval.config.ts (uncomment the ones you want)
|
|
247
|
+
* 2. Install the AI SDK and provider packages: pnpm add ai @ai-sdk/openai
|
|
248
|
+
* 3. Copy .env.example to .env and add your API keys
|
|
249
|
+
* 4. Replace this file with evals for your own tools
|
|
250
|
+
* 5. Run: sunpeak test --eval
|
|
251
|
+
*
|
|
252
|
+
* Each case sends a prompt to every configured model and checks
|
|
253
|
+
* that the model calls the expected tool with the expected arguments.
|
|
254
|
+
* Cases run multiple times (configured via \`runs\` in eval.config.ts)
|
|
255
|
+
* to measure reliability across non-deterministic LLM responses.
|
|
256
|
+
*/
|
|
257
|
+
export default defineEval({
|
|
258
|
+
// This eval is skipped when no models are configured.
|
|
259
|
+
// Delete this file and create your own evals to get started.
|
|
260
|
+
cases: [
|
|
261
|
+
{
|
|
262
|
+
name: 'example (replace me)',
|
|
263
|
+
prompt: 'Show me a demo',
|
|
264
|
+
// expect which tool gets called and (optionally) its arguments:
|
|
265
|
+
expect: {
|
|
266
|
+
tool: 'your-tool-name',
|
|
267
|
+
// args: { key: 'value' },
|
|
268
|
+
},
|
|
269
|
+
},
|
|
270
|
+
],
|
|
271
|
+
});
|
|
272
|
+
`
|
|
273
|
+
);
|
|
274
|
+
|
|
275
|
+
d.log.success(`Created ${evalsDir}/ with eval config and example.`);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Scaffold a visual regression test file.
|
|
280
|
+
* @param {string} filePath - Full path to the visual test file
|
|
281
|
+
* @param {object} d - Dependencies
|
|
282
|
+
*/
|
|
283
|
+
function scaffoldVisualTest(filePath, d) {
|
|
284
|
+
if (d.existsSync(filePath)) {
|
|
285
|
+
d.log.info('Visual test already exists. Skipping.');
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
d.writeFileSync(
|
|
290
|
+
filePath,
|
|
291
|
+
`import { test, expect } from 'sunpeak/test';
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Visual regression tests — compare screenshots against saved baselines.
|
|
295
|
+
*
|
|
296
|
+
* Screenshots only run with: sunpeak test --visual
|
|
297
|
+
* Update baselines with: sunpeak test --visual --update
|
|
298
|
+
*
|
|
299
|
+
* During normal \`sunpeak test\` runs, screenshot() calls are silently
|
|
300
|
+
* skipped so these tests still pass without baselines.
|
|
301
|
+
*
|
|
302
|
+
* Uncomment the tests below and replace 'your-tool' with your tool name.
|
|
303
|
+
*/
|
|
304
|
+
|
|
305
|
+
// test('tool renders correctly in light mode', async ({ mcp }) => {
|
|
306
|
+
// const result = await mcp.callTool('your-tool', { key: 'value' }, { theme: 'light' });
|
|
307
|
+
// expect(result).not.toBeError();
|
|
308
|
+
//
|
|
309
|
+
// // Wait for UI to render, then screenshot:
|
|
310
|
+
// // const app = result.app();
|
|
311
|
+
// // await expect(app.getByText('Expected text')).toBeVisible();
|
|
312
|
+
// // await mcp.screenshot('tool-light');
|
|
313
|
+
// });
|
|
314
|
+
|
|
315
|
+
// test('tool renders correctly in dark mode', async ({ mcp }) => {
|
|
316
|
+
// const result = await mcp.callTool('your-tool', { key: 'value' }, { theme: 'dark' });
|
|
317
|
+
// expect(result).not.toBeError();
|
|
318
|
+
//
|
|
319
|
+
// // const app = result.app();
|
|
320
|
+
// // await expect(app.getByText('Expected text')).toBeVisible();
|
|
321
|
+
// // await mcp.screenshot('tool-dark');
|
|
322
|
+
// });
|
|
323
|
+
|
|
324
|
+
// Full-page screenshot (captures the inspector chrome too):
|
|
325
|
+
// test('full page renders correctly', async ({ mcp }) => {
|
|
326
|
+
// const result = await mcp.callTool('your-tool', {}, { theme: 'light' });
|
|
327
|
+
// const app = result.app();
|
|
328
|
+
// await expect(app.getByText('Expected text')).toBeVisible();
|
|
329
|
+
// await mcp.screenshot('tool-page', { target: 'page', maxDiffPixelRatio: 0.02 });
|
|
330
|
+
// });
|
|
331
|
+
`
|
|
332
|
+
);
|
|
333
|
+
d.log.success(`Created ${filePath}`);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Scaffold live test boilerplate (test against real ChatGPT/Claude).
|
|
338
|
+
* @param {string} liveDir - Directory to create live test files in
|
|
339
|
+
* @param {{ isSunpeak?: boolean, d: object }} options
|
|
340
|
+
*/
|
|
341
|
+
function scaffoldLiveTests(liveDir, { isSunpeak, d } = {}) {
|
|
342
|
+
if (d.existsSync(join(liveDir, 'playwright.config.ts'))) {
|
|
343
|
+
d.log.info('Live test config already exists. Skipping live test scaffold.');
|
|
344
|
+
return;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
d.mkdirSync(liveDir, { recursive: true });
|
|
348
|
+
|
|
349
|
+
// Live test playwright config
|
|
350
|
+
const liveConfigPreamble = `import { defineLiveConfig } from 'sunpeak/test/live/config';
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Live tests run against real AI hosts (ChatGPT, Claude).
|
|
354
|
+
*
|
|
355
|
+
* Prerequisites:
|
|
356
|
+
* 1. Your MCP server must be accessible via a public URL (e.g., ngrok tunnel)
|
|
357
|
+
* 2. The server must be registered as an MCP action in the host
|
|
358
|
+
* 3. Run: sunpeak test --live
|
|
359
|
+
*
|
|
360
|
+
* On first run, a browser window opens for you to log in to the host.
|
|
361
|
+
* The session is saved for subsequent runs (typically lasts a few hours).`;
|
|
362
|
+
|
|
363
|
+
const liveConfigExport = `export default defineLiveConfig({
|
|
364
|
+
// hosts: ['chatgpt'], // Which hosts to test against
|
|
365
|
+
// colorScheme: 'light', // Default color scheme
|
|
366
|
+
// viewport: { width: 1280, height: 720 },
|
|
367
|
+
devOverlay: false,
|
|
368
|
+
});
|
|
369
|
+
`;
|
|
370
|
+
|
|
371
|
+
const configContent = isSunpeak
|
|
372
|
+
? `${liveConfigPreamble}
|
|
373
|
+
*/
|
|
374
|
+
${liveConfigExport}`
|
|
375
|
+
: `${liveConfigPreamble}
|
|
376
|
+
*
|
|
377
|
+
* NOTE: defineLiveConfig() starts a local sunpeak dev server as its backend.
|
|
378
|
+
* If your MCP server is not a sunpeak project, you may need to customize the
|
|
379
|
+
* webServer option in the Playwright config below to start your own server,
|
|
380
|
+
* or remove webServer entirely if your server is already running.
|
|
381
|
+
*/
|
|
382
|
+
${liveConfigExport}`;
|
|
383
|
+
|
|
384
|
+
d.writeFileSync(join(liveDir, 'playwright.config.ts'), configContent);
|
|
385
|
+
|
|
386
|
+
// Live test example
|
|
387
|
+
d.writeFileSync(
|
|
388
|
+
join(liveDir, 'example.test.ts'),
|
|
389
|
+
`import { test, expect } from 'sunpeak/test/live';
|
|
390
|
+
|
|
391
|
+
/**
|
|
392
|
+
* Live tests invoke tools through real AI hosts (ChatGPT, Claude).
|
|
393
|
+
*
|
|
394
|
+
* The \`live\` fixture provides:
|
|
395
|
+
* - live.invoke(toolName) — invoke a tool and get the app locator
|
|
396
|
+
* - live.setColorScheme('dark', app) — switch theme while app is visible
|
|
397
|
+
* - live.page — the underlying Playwright page
|
|
398
|
+
*
|
|
399
|
+
* Run with: sunpeak test --live
|
|
400
|
+
*
|
|
401
|
+
* These tests are excluded from normal \`sunpeak test\` runs because
|
|
402
|
+
* they require host accounts and cost API credits.
|
|
403
|
+
*/
|
|
404
|
+
|
|
405
|
+
// Uncomment and replace 'your-tool' with the tool name as it appears in the host.
|
|
406
|
+
// test('tool renders in the host', async ({ live }) => {
|
|
407
|
+
// const app = await live.invoke('your-tool');
|
|
408
|
+
//
|
|
409
|
+
// await expect(app.getByText('Expected text')).toBeVisible({ timeout: 15_000 });
|
|
410
|
+
//
|
|
411
|
+
// // Test dark mode:
|
|
412
|
+
// await live.setColorScheme('dark', app);
|
|
413
|
+
// await expect(app.getByText('Expected text')).toBeVisible();
|
|
414
|
+
// });
|
|
415
|
+
`
|
|
416
|
+
);
|
|
417
|
+
|
|
418
|
+
d.log.success(`Created ${liveDir}/ with live test config and example.`);
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Scaffold a unit test example for JS/TS projects.
|
|
423
|
+
* @param {string} filePath - Full path to the unit test file
|
|
424
|
+
* @param {object} d - Dependencies
|
|
425
|
+
*/
|
|
426
|
+
function scaffoldUnitTest(filePath, d) {
|
|
427
|
+
if (d.existsSync(filePath)) {
|
|
428
|
+
d.log.info('Unit test already exists. Skipping.');
|
|
429
|
+
return;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
d.mkdirSync(dirname(filePath), { recursive: true });
|
|
433
|
+
|
|
434
|
+
d.writeFileSync(
|
|
435
|
+
filePath,
|
|
436
|
+
`import { describe, it, expect } from 'vitest';
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* Unit tests for your MCP tool handlers.
|
|
440
|
+
*
|
|
441
|
+
* Import your tool handler directly and test its input/output
|
|
442
|
+
* without starting the MCP server or inspector.
|
|
443
|
+
*
|
|
444
|
+
* Run with: sunpeak test --unit
|
|
445
|
+
*
|
|
446
|
+
* To set up vitest, add it to your devDependencies:
|
|
447
|
+
* npm install -D vitest
|
|
448
|
+
*
|
|
449
|
+
* Uncomment and customize the tests below for your tools.
|
|
450
|
+
*/
|
|
451
|
+
|
|
452
|
+
// import handler, { tool, schema } from '../../src/tools/your-tool';
|
|
453
|
+
// const extra = {} as Parameters<typeof handler>[1];
|
|
454
|
+
|
|
455
|
+
// describe('your tool', () => {
|
|
456
|
+
// it('returns expected output', async () => {
|
|
457
|
+
// const result = await handler({ key: 'value' }, extra);
|
|
458
|
+
// expect(result.structuredContent).toBeDefined();
|
|
459
|
+
// });
|
|
460
|
+
//
|
|
461
|
+
// it('exports correct tool config', () => {
|
|
462
|
+
// expect(tool.title).toBe('Your Tool');
|
|
463
|
+
// expect(tool.annotations?.readOnlyHint).toBe(true);
|
|
464
|
+
// });
|
|
465
|
+
// });
|
|
466
|
+
`
|
|
467
|
+
);
|
|
468
|
+
d.log.success(`Created ${filePath}`);
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
async function initExternalProject(cliServer, d) {
|
|
472
|
+
d.log.info('Detected non-JS project. Creating self-contained test directory.');
|
|
127
473
|
|
|
128
|
-
const server = await getServerConfig(cliServer);
|
|
129
|
-
const testDir = join(
|
|
474
|
+
const server = await getServerConfig(cliServer, d);
|
|
475
|
+
const testDir = join(d.cwd(), 'tests', 'sunpeak');
|
|
130
476
|
|
|
131
|
-
if (existsSync(testDir)) {
|
|
132
|
-
|
|
477
|
+
if (d.existsSync(testDir)) {
|
|
478
|
+
d.log.warn('tests/sunpeak/ already exists. Skipping scaffold.');
|
|
133
479
|
return;
|
|
134
480
|
}
|
|
135
481
|
|
|
136
|
-
mkdirSync(testDir, { recursive: true });
|
|
482
|
+
d.mkdirSync(testDir, { recursive: true });
|
|
137
483
|
|
|
138
484
|
// package.json
|
|
139
|
-
writeFileSync(
|
|
485
|
+
d.writeFileSync(
|
|
140
486
|
join(testDir, 'package.json'),
|
|
141
487
|
JSON.stringify(
|
|
142
488
|
{
|
|
@@ -157,7 +503,7 @@ async function initExternalProject(cliServer) {
|
|
|
157
503
|
|
|
158
504
|
// sunpeak.config.ts (used as playwright config)
|
|
159
505
|
const serverBlock = generateServerConfigBlock(server, '../..');
|
|
160
|
-
writeFileSync(
|
|
506
|
+
d.writeFileSync(
|
|
161
507
|
join(testDir, 'playwright.config.ts'),
|
|
162
508
|
`import { defineConfig } from 'sunpeak/test/config';
|
|
163
509
|
|
|
@@ -168,7 +514,7 @@ ${serverBlock}
|
|
|
168
514
|
);
|
|
169
515
|
|
|
170
516
|
// tsconfig.json
|
|
171
|
-
writeFileSync(
|
|
517
|
+
d.writeFileSync(
|
|
172
518
|
join(testDir, 'tsconfig.json'),
|
|
173
519
|
JSON.stringify(
|
|
174
520
|
{
|
|
@@ -185,8 +531,8 @@ ${serverBlock}
|
|
|
185
531
|
) + '\n'
|
|
186
532
|
);
|
|
187
533
|
|
|
188
|
-
//
|
|
189
|
-
writeFileSync(
|
|
534
|
+
// 1. E2E test — smoke test, verifies the server is reachable
|
|
535
|
+
d.writeFileSync(
|
|
190
536
|
join(testDir, 'smoke.test.ts'),
|
|
191
537
|
`import { test, expect } from 'sunpeak/test';
|
|
192
538
|
|
|
@@ -207,27 +553,40 @@ test('server is reachable and inspector loads', async ({ mcp }) => {
|
|
|
207
553
|
`
|
|
208
554
|
);
|
|
209
555
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
556
|
+
// 2. Visual regression test
|
|
557
|
+
scaffoldVisualTest(join(testDir, 'visual.test.ts'), d);
|
|
558
|
+
|
|
559
|
+
// 3. Live tests
|
|
560
|
+
scaffoldLiveTests(join(testDir, 'live'), { isSunpeak: false, d });
|
|
561
|
+
|
|
562
|
+
// 4. Eval boilerplate
|
|
563
|
+
scaffoldEvals(join(testDir, 'evals'), { server, d });
|
|
564
|
+
|
|
565
|
+
d.log.success('Created tests/sunpeak/ with all test types.');
|
|
566
|
+
d.log.step('Next steps:');
|
|
567
|
+
d.log.message(' cd tests/sunpeak');
|
|
568
|
+
d.log.message(' npm install');
|
|
569
|
+
d.log.message(' npx playwright install chromium');
|
|
570
|
+
d.log.message('');
|
|
571
|
+
d.log.message(' npx sunpeak test # E2E tests');
|
|
572
|
+
d.log.message(' npx sunpeak test --visual # Visual regression (generates baselines on first run)');
|
|
573
|
+
d.log.message(' npx sunpeak test --live # Live tests against real hosts (requires login)');
|
|
574
|
+
d.log.message(' npx sunpeak test --eval # Multi-model evals (configure models in evals/eval.config.ts)');
|
|
216
575
|
}
|
|
217
576
|
|
|
218
|
-
async function initJsProject(cliServer) {
|
|
219
|
-
|
|
577
|
+
async function initJsProject(cliServer, d) {
|
|
578
|
+
d.log.info('Detected JS/TS project. Adding test config at project root.');
|
|
220
579
|
|
|
221
|
-
const server = await getServerConfig(cliServer);
|
|
222
|
-
const cwd =
|
|
580
|
+
const server = await getServerConfig(cliServer, d);
|
|
581
|
+
const cwd = d.cwd();
|
|
223
582
|
|
|
224
583
|
// Create playwright.config.ts
|
|
225
584
|
const configPath = join(cwd, 'playwright.config.ts');
|
|
226
|
-
if (existsSync(configPath)) {
|
|
227
|
-
|
|
585
|
+
if (d.existsSync(configPath)) {
|
|
586
|
+
d.log.warn('playwright.config.ts already exists. Skipping config creation.');
|
|
228
587
|
} else {
|
|
229
588
|
const serverBlock = generateServerConfigBlock(server);
|
|
230
|
-
writeFileSync(
|
|
589
|
+
d.writeFileSync(
|
|
231
590
|
configPath,
|
|
232
591
|
`import { defineConfig } from 'sunpeak/test/config';
|
|
233
592
|
|
|
@@ -236,16 +595,16 @@ ${serverBlock}
|
|
|
236
595
|
});
|
|
237
596
|
`
|
|
238
597
|
);
|
|
239
|
-
|
|
598
|
+
d.log.success('Created playwright.config.ts');
|
|
240
599
|
}
|
|
241
600
|
|
|
242
|
-
//
|
|
243
|
-
const
|
|
244
|
-
mkdirSync(
|
|
601
|
+
// 1. E2E test — smoke test
|
|
602
|
+
const e2eDir = join(cwd, 'tests', 'e2e');
|
|
603
|
+
d.mkdirSync(e2eDir, { recursive: true });
|
|
245
604
|
|
|
246
|
-
const testPath = join(
|
|
247
|
-
if (!existsSync(testPath)) {
|
|
248
|
-
writeFileSync(
|
|
605
|
+
const testPath = join(e2eDir, 'smoke.test.ts');
|
|
606
|
+
if (!d.existsSync(testPath)) {
|
|
607
|
+
d.writeFileSync(
|
|
249
608
|
testPath,
|
|
250
609
|
`import { test, expect } from 'sunpeak/test';
|
|
251
610
|
|
|
@@ -264,42 +623,85 @@ test('server is reachable and inspector loads', async ({ mcp }) => {
|
|
|
264
623
|
// });
|
|
265
624
|
`
|
|
266
625
|
);
|
|
267
|
-
|
|
626
|
+
d.log.success('Created tests/e2e/smoke.test.ts');
|
|
268
627
|
}
|
|
269
628
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
629
|
+
// 2. Visual regression test
|
|
630
|
+
scaffoldVisualTest(join(e2eDir, 'visual.test.ts'), d);
|
|
631
|
+
|
|
632
|
+
// 3. Live tests
|
|
633
|
+
scaffoldLiveTests(join(cwd, 'tests', 'live'), { isSunpeak: false, d });
|
|
634
|
+
|
|
635
|
+
// 4. Eval boilerplate
|
|
636
|
+
scaffoldEvals(join(cwd, 'tests', 'evals'), { server, d });
|
|
637
|
+
|
|
638
|
+
// 5. Unit test
|
|
639
|
+
scaffoldUnitTest(join(cwd, 'tests', 'unit', 'example.test.ts'), d);
|
|
640
|
+
|
|
641
|
+
d.log.step('Next steps:');
|
|
642
|
+
d.log.message(' npm install -D sunpeak @playwright/test vitest');
|
|
643
|
+
d.log.message(' npx playwright install chromium');
|
|
644
|
+
d.log.message('');
|
|
645
|
+
d.log.message(' npx sunpeak test # E2E tests');
|
|
646
|
+
d.log.message(' npx sunpeak test --unit # Unit tests (vitest)');
|
|
647
|
+
d.log.message(' npx sunpeak test --visual # Visual regression');
|
|
648
|
+
d.log.message(' npx sunpeak test --live # Live tests against real hosts');
|
|
649
|
+
d.log.message(' npx sunpeak test --eval # Multi-model evals');
|
|
274
650
|
}
|
|
275
651
|
|
|
276
|
-
async function initSunpeakProject() {
|
|
277
|
-
|
|
652
|
+
async function initSunpeakProject(d) {
|
|
653
|
+
d.log.info('Detected sunpeak project. Updating config to use defineConfig().');
|
|
278
654
|
|
|
279
|
-
const cwd =
|
|
655
|
+
const cwd = d.cwd();
|
|
280
656
|
const configPath = join(cwd, 'playwright.config.ts');
|
|
281
657
|
|
|
282
|
-
if (existsSync(configPath)) {
|
|
283
|
-
const content = readFileSync(configPath, 'utf-8');
|
|
658
|
+
if (d.existsSync(configPath)) {
|
|
659
|
+
const content = d.readFileSync(configPath, 'utf-8');
|
|
284
660
|
if (content.includes('sunpeak/test/config')) {
|
|
285
|
-
|
|
286
|
-
|
|
661
|
+
d.log.info('Config already uses sunpeak/test/config. Nothing to do.');
|
|
662
|
+
} else {
|
|
663
|
+
d.log.warn('playwright.config.ts exists but does not use sunpeak/test/config.');
|
|
664
|
+
d.log.message(' To migrate, replace your config with:');
|
|
665
|
+
d.log.message(" import { defineConfig } from 'sunpeak/test/config';");
|
|
666
|
+
d.log.message(' export default defineConfig();');
|
|
287
667
|
}
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
`import { defineConfig } from 'sunpeak/test/config';
|
|
668
|
+
} else {
|
|
669
|
+
d.writeFileSync(
|
|
670
|
+
configPath,
|
|
671
|
+
`import { defineConfig } from 'sunpeak/test/config';
|
|
293
672
|
|
|
294
673
|
export default defineConfig();
|
|
295
674
|
`
|
|
296
|
-
|
|
675
|
+
);
|
|
676
|
+
d.log.success('Updated playwright.config.ts to use defineConfig()');
|
|
677
|
+
}
|
|
297
678
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
679
|
+
// Scaffold missing test types
|
|
680
|
+
|
|
681
|
+
// 1. Visual regression test
|
|
682
|
+
const e2eDir = join(cwd, 'tests', 'e2e');
|
|
683
|
+
d.mkdirSync(e2eDir, { recursive: true });
|
|
684
|
+
scaffoldVisualTest(join(e2eDir, 'visual.test.ts'), d);
|
|
685
|
+
|
|
686
|
+
// 2. Live tests
|
|
687
|
+
scaffoldLiveTests(join(cwd, 'tests', 'live'), { isSunpeak: true, d });
|
|
688
|
+
|
|
689
|
+
// 3. Eval boilerplate
|
|
690
|
+
scaffoldEvals(join(cwd, 'tests', 'evals'), { isSunpeak: true, d });
|
|
691
|
+
|
|
692
|
+
// 4. Unit test
|
|
693
|
+
scaffoldUnitTest(join(cwd, 'tests', 'unit', 'example.test.ts'), d);
|
|
694
|
+
|
|
695
|
+
d.log.step('Scaffolded test types:');
|
|
696
|
+
d.log.message(' tests/e2e/visual.test.ts — Visual regression (sunpeak test --visual)');
|
|
697
|
+
d.log.message(' tests/live/ — Live host tests (sunpeak test --live)');
|
|
698
|
+
d.log.message(' tests/evals/ — Multi-model evals (sunpeak test --eval)');
|
|
699
|
+
d.log.message(' tests/unit/example.test.ts — Unit tests (sunpeak test --unit)');
|
|
700
|
+
d.log.message('');
|
|
701
|
+
d.log.message(' Migrate existing e2e tests:');
|
|
702
|
+
d.log.message(' Replace: import { test, expect } from "@playwright/test"');
|
|
703
|
+
d.log.message(' With: import { test, expect } from "sunpeak/test"');
|
|
704
|
+
d.log.message('');
|
|
705
|
+
d.log.message(' Use the `mcp` fixture instead of raw page navigation.');
|
|
706
|
+
d.log.message(' See sunpeak docs for migration examples.');
|
|
305
707
|
}
|