sunpeak 0.19.4 → 0.19.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/bin/commands/dev.mjs +1 -1
- package/bin/commands/inspect.mjs +1 -1
- package/bin/commands/new.mjs +92 -10
- package/bin/commands/start.mjs +3 -1
- package/bin/commands/test-init.mjs +548 -76
- package/bin/commands/test.mjs +401 -4
- package/bin/lib/eval/eval-providers.mjs +34 -0
- package/bin/lib/eval/eval-reporter.mjs +105 -0
- package/bin/lib/eval/eval-runner.mjs +362 -0
- package/bin/lib/eval/eval-types.d.mts +168 -0
- package/bin/lib/eval/eval-vitest-plugin.mjs +147 -0
- package/bin/lib/eval/model-registry.mjs +73 -0
- package/bin/lib/inspect/inspect-config.mjs +1 -1
- package/bin/lib/sandbox-server.mjs +5 -2
- package/bin/lib/test/test-config.mjs +1 -1
- package/bin/sunpeak.js +1 -0
- package/dist/chatgpt/index.cjs +1 -1
- package/dist/chatgpt/index.js +1 -1
- package/dist/claude/index.cjs +1 -1
- package/dist/claude/index.js +1 -1
- package/dist/host/chatgpt/index.cjs +1 -1
- package/dist/host/chatgpt/index.js +1 -1
- package/dist/index.cjs +2 -2
- package/dist/index.js +2 -2
- package/dist/inspector/index.cjs +1 -1
- package/dist/inspector/index.js +1 -1
- package/dist/{inspector-Bp9jrHIu.js → inspector-D5DckQuU.js} +19 -19
- package/dist/{inspector-Bp9jrHIu.js.map → inspector-D5DckQuU.js.map} +1 -1
- package/dist/{inspector-Cvq3yjNL.cjs → inspector-jY9O18z9.cjs} +19 -19
- package/dist/{inspector-Cvq3yjNL.cjs.map → inspector-jY9O18z9.cjs.map} +1 -1
- package/dist/mcp/index.cjs +2 -2
- package/dist/mcp/index.cjs.map +1 -1
- package/dist/mcp/index.js +2 -2
- package/dist/mcp/index.js.map +1 -1
- package/dist/{use-app-Ck5kR1Sf.js → use-app-Bfargfa3.js} +2 -2
- package/dist/{use-app-Ck5kR1Sf.js.map → use-app-Bfargfa3.js.map} +1 -1
- package/dist/{use-app-DHYiev3D.cjs → use-app-CbsBEmwv.cjs} +2 -2
- package/dist/{use-app-DHYiev3D.cjs.map → use-app-CbsBEmwv.cjs.map} +1 -1
- package/package.json +32 -2
- package/template/README.md +17 -7
- package/template/_gitignore +2 -0
- package/template/dist/albums/albums.html +1 -1
- package/template/dist/albums/albums.json +1 -1
- package/template/dist/carousel/carousel.html +1 -1
- package/template/dist/carousel/carousel.json +1 -1
- package/template/dist/map/map.html +1 -1
- package/template/dist/map/map.json +1 -1
- package/template/dist/review/review.html +1 -1
- package/template/dist/review/review.json +1 -1
- package/template/node_modules/.bin/vitest +2 -2
- package/template/node_modules/.vite/deps/_metadata.json +3 -3
- package/template/node_modules/.vite-mcp/deps/_metadata.json +20 -20
- package/template/node_modules/.vite-mcp/deps/vitest.js +7 -7
- package/template/node_modules/.vite-mcp/deps/vitest.js.map +1 -1
- package/template/tests/evals/_env.example +5 -0
- package/template/tests/evals/albums.eval.ts +31 -0
- package/template/tests/evals/carousel.eval.ts +16 -0
- package/template/tests/evals/eval.config.ts +26 -0
- package/template/tests/evals/map.eval.ts +16 -0
- package/template/tests/evals/review.eval.ts +53 -0
|
@@ -1,6 +1,43 @@
|
|
|
1
1
|
import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
|
|
2
|
-
import {
|
|
2
|
+
import { execSync } from 'child_process';
|
|
3
|
+
import { join, dirname } from 'path';
|
|
3
4
|
import * as p from '@clack/prompts';
|
|
5
|
+
import { EVAL_PROVIDERS, generateModelLines } from '../lib/eval/eval-providers.mjs';
|
|
6
|
+
import { detectPackageManager } from '../utils.mjs';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Default dependencies (real implementations).
|
|
10
|
+
* Override in tests via the `deps` parameter.
|
|
11
|
+
*/
|
|
12
|
+
async function defaultSelectProviders() {
|
|
13
|
+
const selected = await p.multiselect({
|
|
14
|
+
message: 'AI providers for evals (space to toggle, enter to skip)',
|
|
15
|
+
options: EVAL_PROVIDERS.map((prov) => ({ value: prov, label: prov.label })),
|
|
16
|
+
initialValues: [],
|
|
17
|
+
required: false,
|
|
18
|
+
});
|
|
19
|
+
if (p.isCancel(selected)) return [];
|
|
20
|
+
return selected;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export const defaultDeps = {
|
|
24
|
+
existsSync,
|
|
25
|
+
readFileSync,
|
|
26
|
+
writeFileSync,
|
|
27
|
+
mkdirSync,
|
|
28
|
+
execSync,
|
|
29
|
+
cwd: () => process.cwd(),
|
|
30
|
+
intro: p.intro,
|
|
31
|
+
outro: p.outro,
|
|
32
|
+
confirm: p.confirm,
|
|
33
|
+
isCancel: p.isCancel,
|
|
34
|
+
select: p.select,
|
|
35
|
+
text: p.text,
|
|
36
|
+
log: p.log,
|
|
37
|
+
password: p.password,
|
|
38
|
+
selectProviders: defaultSelectProviders,
|
|
39
|
+
detectPackageManager,
|
|
40
|
+
};
|
|
4
41
|
|
|
5
42
|
/**
|
|
6
43
|
* sunpeak test init — Scaffold test infrastructure for MCP servers.
|
|
@@ -9,9 +46,18 @@ import * as p from '@clack/prompts';
|
|
|
9
46
|
* - Non-JS projects: self-contained tests/sunpeak/ directory
|
|
10
47
|
* - JS/TS projects: root-level config + test files
|
|
11
48
|
* - sunpeak projects: migrate to defineConfig()
|
|
49
|
+
*
|
|
50
|
+
* Scaffolds all 5 test types:
|
|
51
|
+
* 1. E2E tests — Playwright-based inspector tests (mcp fixture)
|
|
52
|
+
* 2. Visual regression — Screenshot comparison via mcp.screenshot()
|
|
53
|
+
* 3. Live tests — Test against real ChatGPT/Claude hosts
|
|
54
|
+
* 4. Evals — Multi-model tool calling reliability tests
|
|
55
|
+
* 5. Unit tests — Direct tool handler tests (JS/TS projects only)
|
|
12
56
|
*/
|
|
13
|
-
export async function testInit(args = []) {
|
|
14
|
-
|
|
57
|
+
export async function testInit(args = [], deps = defaultDeps) {
|
|
58
|
+
const d = { ...defaultDeps, ...deps };
|
|
59
|
+
|
|
60
|
+
d.intro('Setting up sunpeak tests');
|
|
15
61
|
|
|
16
62
|
// Parse --server flag from CLI args
|
|
17
63
|
const serverIdx = args.indexOf('--server');
|
|
@@ -20,26 +66,97 @@ export async function testInit(args = []) {
|
|
|
20
66
|
? args[serverIdx + 1]
|
|
21
67
|
: undefined;
|
|
22
68
|
|
|
23
|
-
const projectType = detectProjectType();
|
|
69
|
+
const projectType = detectProjectType(d);
|
|
24
70
|
|
|
25
71
|
if (projectType === 'sunpeak') {
|
|
26
|
-
await initSunpeakProject();
|
|
72
|
+
await initSunpeakProject(d);
|
|
27
73
|
} else if (projectType === 'js') {
|
|
28
|
-
await initJsProject(cliServer);
|
|
74
|
+
await initJsProject(cliServer, d);
|
|
29
75
|
} else {
|
|
30
|
-
await initExternalProject(cliServer);
|
|
76
|
+
await initExternalProject(cliServer, d);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Offer to configure eval providers
|
|
80
|
+
const providers = await d.selectProviders();
|
|
81
|
+
if (!d.isCancel(providers) && providers.length > 0) {
|
|
82
|
+
const pm = d.detectPackageManager();
|
|
83
|
+
const pkgsToInstall = ['ai', ...providers.map((p) => p.pkg)];
|
|
84
|
+
const installCmd = `${pm} add -D ${pkgsToInstall.join(' ')}`;
|
|
85
|
+
try {
|
|
86
|
+
d.execSync(installCmd, { cwd: d.cwd(), stdio: 'inherit' });
|
|
87
|
+
} catch {
|
|
88
|
+
d.log.info(`Provider install failed. Install manually: ${installCmd}`);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Uncomment selected models in eval.config.ts
|
|
92
|
+
const evalDir = d.existsSync(join(d.cwd(), 'tests', 'evals'))
|
|
93
|
+
? join(d.cwd(), 'tests', 'evals')
|
|
94
|
+
: d.existsSync(join(d.cwd(), 'tests', 'sunpeak', 'evals'))
|
|
95
|
+
? join(d.cwd(), 'tests', 'sunpeak', 'evals')
|
|
96
|
+
: null;
|
|
97
|
+
if (evalDir) {
|
|
98
|
+
const configPath = join(evalDir, 'eval.config.ts');
|
|
99
|
+
if (d.existsSync(configPath)) {
|
|
100
|
+
let config = d.readFileSync(configPath, 'utf-8');
|
|
101
|
+
for (const prov of providers) {
|
|
102
|
+
for (const model of prov.models) {
|
|
103
|
+
config = config.replace(
|
|
104
|
+
new RegExp(`^(\\s*)// ('${model.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}',?.*)$`, 'm'),
|
|
105
|
+
'$1$2'
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
d.writeFileSync(configPath, config);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Prompt for API keys and write .env
|
|
113
|
+
const envLines = [];
|
|
114
|
+
const seen = new Set();
|
|
115
|
+
for (const prov of providers) {
|
|
116
|
+
if (seen.has(prov.envVar)) continue;
|
|
117
|
+
seen.add(prov.envVar);
|
|
118
|
+
const key = await d.password({
|
|
119
|
+
message: `${prov.envVar} (enter to skip)`,
|
|
120
|
+
mask: '*',
|
|
121
|
+
});
|
|
122
|
+
if (!d.isCancel(key) && key) {
|
|
123
|
+
envLines.push(`${prov.envVar}=${key}`);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
if (envLines.length > 0 && evalDir) {
|
|
127
|
+
const relEnvPath = evalDir.startsWith(d.cwd()) ? evalDir.slice(d.cwd().length + 1) : evalDir;
|
|
128
|
+
d.writeFileSync(join(evalDir, '.env'), envLines.join('\n') + '\n');
|
|
129
|
+
d.log.info(`API keys saved to ${relEnvPath}/.env (gitignored)`);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
31
132
|
}
|
|
32
133
|
|
|
33
|
-
|
|
134
|
+
// Offer to install the testing skill
|
|
135
|
+
const installSkill = await d.confirm({
|
|
136
|
+
message: 'Install the test-mcp-server skill? (helps your coding agent write tests)',
|
|
137
|
+
initialValue: true,
|
|
138
|
+
});
|
|
139
|
+
if (!d.isCancel(installSkill) && installSkill) {
|
|
140
|
+
try {
|
|
141
|
+
d.execSync('pnpm dlx skills add Sunpeak-AI/sunpeak@test-mcp-server', {
|
|
142
|
+
cwd: d.cwd(),
|
|
143
|
+
stdio: 'inherit',
|
|
144
|
+
});
|
|
145
|
+
} catch {
|
|
146
|
+
d.log.info('Skill install skipped. Install later: pnpm dlx skills add Sunpeak-AI/sunpeak@test-mcp-server');
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
d.outro('Done!');
|
|
34
151
|
}
|
|
35
152
|
|
|
36
|
-
function detectProjectType() {
|
|
37
|
-
const cwd =
|
|
153
|
+
function detectProjectType(d) {
|
|
154
|
+
const cwd = d.cwd();
|
|
38
155
|
const pkgPath = join(cwd, 'package.json');
|
|
39
156
|
|
|
40
|
-
if (existsSync(pkgPath)) {
|
|
157
|
+
if (d.existsSync(pkgPath)) {
|
|
41
158
|
try {
|
|
42
|
-
const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
|
|
159
|
+
const pkg = JSON.parse(d.readFileSync(pkgPath, 'utf-8'));
|
|
43
160
|
const deps = { ...pkg.dependencies, ...pkg.devDependencies };
|
|
44
161
|
if ('sunpeak' in deps) return 'sunpeak';
|
|
45
162
|
return 'js';
|
|
@@ -52,7 +169,7 @@ function detectProjectType() {
|
|
|
52
169
|
return 'external';
|
|
53
170
|
}
|
|
54
171
|
|
|
55
|
-
async function getServerConfig(cliServer) {
|
|
172
|
+
async function getServerConfig(cliServer, d) {
|
|
56
173
|
// If provided via --server flag, detect type automatically
|
|
57
174
|
if (cliServer) {
|
|
58
175
|
if (cliServer.startsWith('http://') || cliServer.startsWith('https://')) {
|
|
@@ -61,7 +178,7 @@ async function getServerConfig(cliServer) {
|
|
|
61
178
|
return { type: 'command', value: cliServer };
|
|
62
179
|
}
|
|
63
180
|
|
|
64
|
-
const serverType = await
|
|
181
|
+
const serverType = await d.select({
|
|
65
182
|
message: 'How does your MCP server start?',
|
|
66
183
|
options: [
|
|
67
184
|
{ value: 'command', label: 'Command (e.g., python server.py)' },
|
|
@@ -70,23 +187,23 @@ async function getServerConfig(cliServer) {
|
|
|
70
187
|
],
|
|
71
188
|
});
|
|
72
189
|
|
|
73
|
-
if (
|
|
190
|
+
if (d.isCancel(serverType)) process.exit(0);
|
|
74
191
|
|
|
75
192
|
if (serverType === 'command') {
|
|
76
|
-
const command = await
|
|
193
|
+
const command = await d.text({
|
|
77
194
|
message: 'Server start command:',
|
|
78
195
|
placeholder: 'python src/server.py',
|
|
79
196
|
});
|
|
80
|
-
if (
|
|
197
|
+
if (d.isCancel(command)) process.exit(0);
|
|
81
198
|
return { type: 'command', value: command };
|
|
82
199
|
}
|
|
83
200
|
|
|
84
201
|
if (serverType === 'url') {
|
|
85
|
-
const url = await
|
|
202
|
+
const url = await d.text({
|
|
86
203
|
message: 'Server URL:',
|
|
87
204
|
placeholder: 'http://localhost:8000/mcp',
|
|
88
205
|
});
|
|
89
|
-
if (
|
|
206
|
+
if (d.isCancel(url)) process.exit(0);
|
|
90
207
|
return { type: 'url', value: url };
|
|
91
208
|
}
|
|
92
209
|
|
|
@@ -122,27 +239,324 @@ function generateServerConfigBlock(server, relativeTo = '.') {
|
|
|
122
239
|
},`;
|
|
123
240
|
}
|
|
124
241
|
|
|
125
|
-
|
|
126
|
-
|
|
242
|
+
/**
|
|
243
|
+
* Scaffold eval boilerplate into a directory.
|
|
244
|
+
* @param {string} evalsDir - Directory to create eval files in
|
|
245
|
+
* @param {{ server?: object, isSunpeak?: boolean, d?: object }} options
|
|
246
|
+
*/
|
|
247
|
+
function scaffoldEvals(evalsDir, { server, isSunpeak, d: deps } = {}) {
|
|
248
|
+
const d = deps || defaultDeps;
|
|
249
|
+
if (d.existsSync(join(evalsDir, 'eval.config.ts'))) {
|
|
250
|
+
d.log.info('Eval config already exists. Skipping eval scaffold.');
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
d.mkdirSync(evalsDir, { recursive: true });
|
|
255
|
+
|
|
256
|
+
// Generate server line for eval config
|
|
257
|
+
let serverLine = ' // server: \'http://localhost:8000/mcp\',';
|
|
258
|
+
if (isSunpeak) {
|
|
259
|
+
serverLine = ' // Omit server for sunpeak projects (auto-detected).\n // server: \'http://localhost:8000/mcp\',';
|
|
260
|
+
} else if (server?.type === 'url') {
|
|
261
|
+
serverLine = ` server: '${server.value}',`;
|
|
262
|
+
} else if (server?.type === 'command') {
|
|
263
|
+
serverLine = ` server: '${server.value}',`;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// Build the eval config content
|
|
267
|
+
const configLines = [
|
|
268
|
+
"import { defineEvalConfig } from 'sunpeak/eval';",
|
|
269
|
+
"",
|
|
270
|
+
"// API keys are loaded automatically from .env in this directory (gitignored).",
|
|
271
|
+
"// See .env.example for the format.",
|
|
272
|
+
"",
|
|
273
|
+
"export default defineEvalConfig({",
|
|
274
|
+
" // MCP server to test.",
|
|
275
|
+
serverLine,
|
|
276
|
+
"",
|
|
277
|
+
" models: [",
|
|
278
|
+
" // Uncomment models and install their provider packages:",
|
|
279
|
+
...generateModelLines(),
|
|
280
|
+
" ],",
|
|
281
|
+
"",
|
|
282
|
+
" defaults: {",
|
|
283
|
+
" runs: 5, // Number of times to run each case per model",
|
|
284
|
+
" maxSteps: 1, // Max tool call steps per run",
|
|
285
|
+
" temperature: 0, // 0 for most deterministic results",
|
|
286
|
+
" timeout: 30_000, // Timeout per run in ms",
|
|
287
|
+
" },",
|
|
288
|
+
"});",
|
|
289
|
+
"",
|
|
290
|
+
];
|
|
291
|
+
|
|
292
|
+
d.writeFileSync(join(evalsDir, 'eval.config.ts'), configLines.join('\n'));
|
|
293
|
+
|
|
294
|
+
// Scaffold .env template
|
|
295
|
+
d.writeFileSync(
|
|
296
|
+
join(evalsDir, '.env.example'),
|
|
297
|
+
`# Copy this file to .env and fill in your API keys.
|
|
298
|
+
# .env is gitignored — never commit API keys.
|
|
299
|
+
# OPENAI_API_KEY=sk-...
|
|
300
|
+
# ANTHROPIC_API_KEY=sk-ant-...
|
|
301
|
+
# GOOGLE_GENERATIVE_AI_API_KEY=...
|
|
302
|
+
`
|
|
303
|
+
);
|
|
304
|
+
|
|
305
|
+
d.writeFileSync(
|
|
306
|
+
join(evalsDir, 'example.eval.ts'),
|
|
307
|
+
`import { defineEval } from 'sunpeak/eval';
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Example eval — tests whether LLMs call your tools correctly.
|
|
311
|
+
*
|
|
312
|
+
* To get started:
|
|
313
|
+
* 1. Configure models in eval.config.ts (uncomment the ones you want)
|
|
314
|
+
* 2. Install the AI SDK and provider packages (e.g. pnpm add ai @ai-sdk/openai)
|
|
315
|
+
* 3. Copy .env.example to .env and add your API keys
|
|
316
|
+
* 4. Replace this file with evals for your own tools
|
|
317
|
+
* 5. Run: sunpeak test --eval
|
|
318
|
+
*
|
|
319
|
+
* Each case sends a prompt to every configured model and checks
|
|
320
|
+
* that the model calls the expected tool with the expected arguments.
|
|
321
|
+
* Cases run multiple times (configured via \`runs\` in eval.config.ts)
|
|
322
|
+
* to measure reliability across non-deterministic LLM responses.
|
|
323
|
+
*/
|
|
324
|
+
export default defineEval({
|
|
325
|
+
// This eval is skipped when no models are configured.
|
|
326
|
+
// Delete this file and create your own evals to get started.
|
|
327
|
+
cases: [
|
|
328
|
+
{
|
|
329
|
+
name: 'example (replace me)',
|
|
330
|
+
prompt: 'Show me a demo',
|
|
331
|
+
// expect which tool gets called and (optionally) its arguments:
|
|
332
|
+
expect: {
|
|
333
|
+
tool: 'your-tool-name',
|
|
334
|
+
// args: { key: 'value' },
|
|
335
|
+
},
|
|
336
|
+
},
|
|
337
|
+
],
|
|
338
|
+
});
|
|
339
|
+
`
|
|
340
|
+
);
|
|
341
|
+
|
|
342
|
+
d.log.success(`Created ${evalsDir}/ with eval config and example.`);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Scaffold a visual regression test file.
|
|
347
|
+
* @param {string} filePath - Full path to the visual test file
|
|
348
|
+
* @param {object} d - Dependencies
|
|
349
|
+
*/
|
|
350
|
+
function scaffoldVisualTest(filePath, d) {
|
|
351
|
+
if (d.existsSync(filePath)) {
|
|
352
|
+
d.log.info('Visual test already exists. Skipping.');
|
|
353
|
+
return;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
d.writeFileSync(
|
|
357
|
+
filePath,
|
|
358
|
+
`import { test, expect } from 'sunpeak/test';
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* Visual regression tests — compare screenshots against saved baselines.
|
|
362
|
+
*
|
|
363
|
+
* Screenshots only run with: sunpeak test --visual
|
|
364
|
+
* Update baselines with: sunpeak test --visual --update
|
|
365
|
+
*
|
|
366
|
+
* During normal \`sunpeak test\` runs, screenshot() calls are silently
|
|
367
|
+
* skipped so these tests still pass without baselines.
|
|
368
|
+
*
|
|
369
|
+
* Uncomment the tests below and replace 'your-tool' with your tool name.
|
|
370
|
+
*/
|
|
371
|
+
|
|
372
|
+
// test('tool renders correctly in light mode', async ({ mcp }) => {
|
|
373
|
+
// const result = await mcp.callTool('your-tool', { key: 'value' }, { theme: 'light' });
|
|
374
|
+
// expect(result).not.toBeError();
|
|
375
|
+
//
|
|
376
|
+
// // Wait for UI to render, then screenshot:
|
|
377
|
+
// // const app = result.app();
|
|
378
|
+
// // await expect(app.getByText('Expected text')).toBeVisible();
|
|
379
|
+
// // await mcp.screenshot('tool-light');
|
|
380
|
+
// });
|
|
381
|
+
|
|
382
|
+
// test('tool renders correctly in dark mode', async ({ mcp }) => {
|
|
383
|
+
// const result = await mcp.callTool('your-tool', { key: 'value' }, { theme: 'dark' });
|
|
384
|
+
// expect(result).not.toBeError();
|
|
385
|
+
//
|
|
386
|
+
// // const app = result.app();
|
|
387
|
+
// // await expect(app.getByText('Expected text')).toBeVisible();
|
|
388
|
+
// // await mcp.screenshot('tool-dark');
|
|
389
|
+
// });
|
|
390
|
+
|
|
391
|
+
// Full-page screenshot (captures the inspector chrome too):
|
|
392
|
+
// test('full page renders correctly', async ({ mcp }) => {
|
|
393
|
+
// const result = await mcp.callTool('your-tool', {}, { theme: 'light' });
|
|
394
|
+
// const app = result.app();
|
|
395
|
+
// await expect(app.getByText('Expected text')).toBeVisible();
|
|
396
|
+
// await mcp.screenshot('tool-page', { target: 'page', maxDiffPixelRatio: 0.02 });
|
|
397
|
+
// });
|
|
398
|
+
`
|
|
399
|
+
);
|
|
400
|
+
d.log.success(`Created ${filePath}`);
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/**
|
|
404
|
+
* Scaffold live test boilerplate (test against real ChatGPT/Claude).
|
|
405
|
+
* @param {string} liveDir - Directory to create live test files in
|
|
406
|
+
* @param {{ isSunpeak?: boolean, d: object }} options
|
|
407
|
+
*/
|
|
408
|
+
function scaffoldLiveTests(liveDir, { isSunpeak, d } = {}) {
|
|
409
|
+
if (d.existsSync(join(liveDir, 'playwright.config.ts'))) {
|
|
410
|
+
d.log.info('Live test config already exists. Skipping live test scaffold.');
|
|
411
|
+
return;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
d.mkdirSync(liveDir, { recursive: true });
|
|
415
|
+
|
|
416
|
+
// Live test playwright config
|
|
417
|
+
const liveConfigPreamble = `import { defineLiveConfig } from 'sunpeak/test/live/config';
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* Live tests run against real AI hosts (ChatGPT, Claude).
|
|
421
|
+
*
|
|
422
|
+
* Prerequisites:
|
|
423
|
+
* 1. Your MCP server must be accessible via a public URL (e.g., ngrok tunnel)
|
|
424
|
+
* 2. The server must be registered as an MCP action in the host
|
|
425
|
+
* 3. Run: sunpeak test --live
|
|
426
|
+
*
|
|
427
|
+
* On first run, a browser window opens for you to log in to the host.
|
|
428
|
+
* The session is saved for subsequent runs (typically lasts a few hours).`;
|
|
429
|
+
|
|
430
|
+
const liveConfigExport = `export default defineLiveConfig({
|
|
431
|
+
// hosts: ['chatgpt'], // Which hosts to test against
|
|
432
|
+
// colorScheme: 'light', // Default color scheme
|
|
433
|
+
// viewport: { width: 1280, height: 720 },
|
|
434
|
+
devOverlay: false,
|
|
435
|
+
});
|
|
436
|
+
`;
|
|
437
|
+
|
|
438
|
+
const configContent = isSunpeak
|
|
439
|
+
? `${liveConfigPreamble}
|
|
440
|
+
*/
|
|
441
|
+
${liveConfigExport}`
|
|
442
|
+
: `${liveConfigPreamble}
|
|
443
|
+
*
|
|
444
|
+
* NOTE: defineLiveConfig() starts a local sunpeak dev server as its backend.
|
|
445
|
+
* If your MCP server is not a sunpeak project, you may need to customize the
|
|
446
|
+
* webServer option in the Playwright config below to start your own server,
|
|
447
|
+
* or remove webServer entirely if your server is already running.
|
|
448
|
+
*/
|
|
449
|
+
${liveConfigExport}`;
|
|
127
450
|
|
|
128
|
-
|
|
129
|
-
const testDir = join(process.cwd(), 'tests', 'sunpeak');
|
|
451
|
+
d.writeFileSync(join(liveDir, 'playwright.config.ts'), configContent);
|
|
130
452
|
|
|
131
|
-
|
|
132
|
-
|
|
453
|
+
// Live test example
|
|
454
|
+
d.writeFileSync(
|
|
455
|
+
join(liveDir, 'example.test.ts'),
|
|
456
|
+
`import { test, expect } from 'sunpeak/test/live';
|
|
457
|
+
|
|
458
|
+
/**
|
|
459
|
+
* Live tests invoke tools through real AI hosts (ChatGPT, Claude).
|
|
460
|
+
*
|
|
461
|
+
* The \`live\` fixture provides:
|
|
462
|
+
* - live.invoke(toolName) — invoke a tool and get the app locator
|
|
463
|
+
* - live.setColorScheme('dark', app) — switch theme while app is visible
|
|
464
|
+
* - live.page — the underlying Playwright page
|
|
465
|
+
*
|
|
466
|
+
* Run with: sunpeak test --live
|
|
467
|
+
*
|
|
468
|
+
* These tests are excluded from normal \`sunpeak test\` runs because
|
|
469
|
+
* they require host accounts and cost API credits.
|
|
470
|
+
*/
|
|
471
|
+
|
|
472
|
+
// Uncomment and replace 'your-tool' with the tool name as it appears in the host.
|
|
473
|
+
// test('tool renders in the host', async ({ live }) => {
|
|
474
|
+
// const app = await live.invoke('your-tool');
|
|
475
|
+
//
|
|
476
|
+
// await expect(app.getByText('Expected text')).toBeVisible({ timeout: 15_000 });
|
|
477
|
+
//
|
|
478
|
+
// // Test dark mode:
|
|
479
|
+
// await live.setColorScheme('dark', app);
|
|
480
|
+
// await expect(app.getByText('Expected text')).toBeVisible();
|
|
481
|
+
// });
|
|
482
|
+
`
|
|
483
|
+
);
|
|
484
|
+
|
|
485
|
+
d.log.success(`Created ${liveDir}/ with live test config and example.`);
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
/**
|
|
489
|
+
* Scaffold a unit test example for JS/TS projects.
|
|
490
|
+
* @param {string} filePath - Full path to the unit test file
|
|
491
|
+
* @param {object} d - Dependencies
|
|
492
|
+
*/
|
|
493
|
+
function scaffoldUnitTest(filePath, d) {
|
|
494
|
+
if (d.existsSync(filePath)) {
|
|
495
|
+
d.log.info('Unit test already exists. Skipping.');
|
|
133
496
|
return;
|
|
134
497
|
}
|
|
135
498
|
|
|
136
|
-
mkdirSync(
|
|
499
|
+
d.mkdirSync(dirname(filePath), { recursive: true });
|
|
500
|
+
|
|
501
|
+
d.writeFileSync(
|
|
502
|
+
filePath,
|
|
503
|
+
`import { describe, it, expect } from 'vitest';
|
|
504
|
+
|
|
505
|
+
/**
|
|
506
|
+
* Unit tests for your MCP tool handlers.
|
|
507
|
+
*
|
|
508
|
+
* Import your tool handler directly and test its input/output
|
|
509
|
+
* without starting the MCP server or inspector.
|
|
510
|
+
*
|
|
511
|
+
* Run with: sunpeak test --unit
|
|
512
|
+
*
|
|
513
|
+
* To set up vitest, add it to your devDependencies:
|
|
514
|
+
* npm install -D vitest
|
|
515
|
+
*
|
|
516
|
+
* Uncomment and customize the tests below for your tools.
|
|
517
|
+
*/
|
|
518
|
+
|
|
519
|
+
// import handler, { tool, schema } from '../../src/tools/your-tool';
|
|
520
|
+
// const extra = {} as Parameters<typeof handler>[1];
|
|
521
|
+
|
|
522
|
+
// describe('your tool', () => {
|
|
523
|
+
// it('returns expected output', async () => {
|
|
524
|
+
// const result = await handler({ key: 'value' }, extra);
|
|
525
|
+
// expect(result.structuredContent).toBeDefined();
|
|
526
|
+
// });
|
|
527
|
+
//
|
|
528
|
+
// it('exports correct tool config', () => {
|
|
529
|
+
// expect(tool.title).toBe('Your Tool');
|
|
530
|
+
// expect(tool.annotations?.readOnlyHint).toBe(true);
|
|
531
|
+
// });
|
|
532
|
+
// });
|
|
533
|
+
`
|
|
534
|
+
);
|
|
535
|
+
d.log.success(`Created ${filePath}`);
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
async function initExternalProject(cliServer, d) {
|
|
539
|
+
d.log.info('Detected non-JS project. Creating self-contained test directory.');
|
|
540
|
+
|
|
541
|
+
const server = await getServerConfig(cliServer, d);
|
|
542
|
+
const testDir = join(d.cwd(), 'tests', 'sunpeak');
|
|
543
|
+
|
|
544
|
+
if (d.existsSync(testDir)) {
|
|
545
|
+
d.log.warn('tests/sunpeak/ already exists. Skipping scaffold.');
|
|
546
|
+
return;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
d.mkdirSync(testDir, { recursive: true });
|
|
137
550
|
|
|
138
551
|
// package.json
|
|
139
|
-
writeFileSync(
|
|
552
|
+
d.writeFileSync(
|
|
140
553
|
join(testDir, 'package.json'),
|
|
141
554
|
JSON.stringify(
|
|
142
555
|
{
|
|
143
556
|
private: true,
|
|
144
557
|
type: 'module',
|
|
145
558
|
devDependencies: {
|
|
559
|
+
'@types/node': 'latest',
|
|
146
560
|
sunpeak: 'latest',
|
|
147
561
|
'@playwright/test': 'latest',
|
|
148
562
|
},
|
|
@@ -157,7 +571,7 @@ async function initExternalProject(cliServer) {
|
|
|
157
571
|
|
|
158
572
|
// sunpeak.config.ts (used as playwright config)
|
|
159
573
|
const serverBlock = generateServerConfigBlock(server, '../..');
|
|
160
|
-
writeFileSync(
|
|
574
|
+
d.writeFileSync(
|
|
161
575
|
join(testDir, 'playwright.config.ts'),
|
|
162
576
|
`import { defineConfig } from 'sunpeak/test/config';
|
|
163
577
|
|
|
@@ -168,7 +582,7 @@ ${serverBlock}
|
|
|
168
582
|
);
|
|
169
583
|
|
|
170
584
|
// tsconfig.json
|
|
171
|
-
writeFileSync(
|
|
585
|
+
d.writeFileSync(
|
|
172
586
|
join(testDir, 'tsconfig.json'),
|
|
173
587
|
JSON.stringify(
|
|
174
588
|
{
|
|
@@ -185,8 +599,8 @@ ${serverBlock}
|
|
|
185
599
|
) + '\n'
|
|
186
600
|
);
|
|
187
601
|
|
|
188
|
-
//
|
|
189
|
-
writeFileSync(
|
|
602
|
+
// 1. E2E test — smoke test, verifies the server is reachable
|
|
603
|
+
d.writeFileSync(
|
|
190
604
|
join(testDir, 'smoke.test.ts'),
|
|
191
605
|
`import { test, expect } from 'sunpeak/test';
|
|
192
606
|
|
|
@@ -207,27 +621,41 @@ test('server is reachable and inspector loads', async ({ mcp }) => {
|
|
|
207
621
|
`
|
|
208
622
|
);
|
|
209
623
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
624
|
+
// 2. Visual regression test
|
|
625
|
+
scaffoldVisualTest(join(testDir, 'visual.test.ts'), d);
|
|
626
|
+
|
|
627
|
+
// 3. Live tests
|
|
628
|
+
scaffoldLiveTests(join(testDir, 'live'), { isSunpeak: false, d });
|
|
629
|
+
|
|
630
|
+
// 4. Eval boilerplate
|
|
631
|
+
scaffoldEvals(join(testDir, 'evals'), { server, d });
|
|
632
|
+
|
|
633
|
+
d.log.success('Created tests/sunpeak/ with all test types.');
|
|
634
|
+
d.log.step('Next steps:');
|
|
635
|
+
const pm = d.detectPackageManager();
|
|
636
|
+
d.log.message(' cd tests/sunpeak');
|
|
637
|
+
d.log.message(` ${pm} install`);
|
|
638
|
+
d.log.message(` ${pm} exec playwright install chromium`);
|
|
639
|
+
d.log.message('');
|
|
640
|
+
d.log.message(' sunpeak test # E2E tests');
|
|
641
|
+
d.log.message(' sunpeak test --visual # Visual regression (generates baselines on first run)');
|
|
642
|
+
d.log.message(' sunpeak test --live # Live tests against real hosts (requires login)');
|
|
643
|
+
d.log.message(' sunpeak test --eval # Multi-model evals (configure models in evals/eval.config.ts)');
|
|
216
644
|
}
|
|
217
645
|
|
|
218
|
-
async function initJsProject(cliServer) {
|
|
219
|
-
|
|
646
|
+
async function initJsProject(cliServer, d) {
|
|
647
|
+
d.log.info('Detected JS/TS project. Adding test config at project root.');
|
|
220
648
|
|
|
221
|
-
const server = await getServerConfig(cliServer);
|
|
222
|
-
const cwd =
|
|
649
|
+
const server = await getServerConfig(cliServer, d);
|
|
650
|
+
const cwd = d.cwd();
|
|
223
651
|
|
|
224
652
|
// Create playwright.config.ts
|
|
225
653
|
const configPath = join(cwd, 'playwright.config.ts');
|
|
226
|
-
if (existsSync(configPath)) {
|
|
227
|
-
|
|
654
|
+
if (d.existsSync(configPath)) {
|
|
655
|
+
d.log.warn('playwright.config.ts already exists. Skipping config creation.');
|
|
228
656
|
} else {
|
|
229
657
|
const serverBlock = generateServerConfigBlock(server);
|
|
230
|
-
writeFileSync(
|
|
658
|
+
d.writeFileSync(
|
|
231
659
|
configPath,
|
|
232
660
|
`import { defineConfig } from 'sunpeak/test/config';
|
|
233
661
|
|
|
@@ -236,16 +664,16 @@ ${serverBlock}
|
|
|
236
664
|
});
|
|
237
665
|
`
|
|
238
666
|
);
|
|
239
|
-
|
|
667
|
+
d.log.success('Created playwright.config.ts');
|
|
240
668
|
}
|
|
241
669
|
|
|
242
|
-
//
|
|
243
|
-
const
|
|
244
|
-
mkdirSync(
|
|
670
|
+
// 1. E2E test — smoke test
|
|
671
|
+
const e2eDir = join(cwd, 'tests', 'e2e');
|
|
672
|
+
d.mkdirSync(e2eDir, { recursive: true });
|
|
245
673
|
|
|
246
|
-
const testPath = join(
|
|
247
|
-
if (!existsSync(testPath)) {
|
|
248
|
-
writeFileSync(
|
|
674
|
+
const testPath = join(e2eDir, 'smoke.test.ts');
|
|
675
|
+
if (!d.existsSync(testPath)) {
|
|
676
|
+
d.writeFileSync(
|
|
249
677
|
testPath,
|
|
250
678
|
`import { test, expect } from 'sunpeak/test';
|
|
251
679
|
|
|
@@ -264,42 +692,86 @@ test('server is reachable and inspector loads', async ({ mcp }) => {
|
|
|
264
692
|
// });
|
|
265
693
|
`
|
|
266
694
|
);
|
|
267
|
-
|
|
695
|
+
d.log.success('Created tests/e2e/smoke.test.ts');
|
|
268
696
|
}
|
|
269
697
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
698
|
+
// 2. Visual regression test
|
|
699
|
+
scaffoldVisualTest(join(e2eDir, 'visual.test.ts'), d);
|
|
700
|
+
|
|
701
|
+
// 3. Live tests
|
|
702
|
+
scaffoldLiveTests(join(cwd, 'tests', 'live'), { isSunpeak: false, d });
|
|
703
|
+
|
|
704
|
+
// 4. Eval boilerplate
|
|
705
|
+
scaffoldEvals(join(cwd, 'tests', 'evals'), { server, d });
|
|
706
|
+
|
|
707
|
+
// 5. Unit test
|
|
708
|
+
scaffoldUnitTest(join(cwd, 'tests', 'unit', 'example.test.ts'), d);
|
|
709
|
+
|
|
710
|
+
const pkgMgr = d.detectPackageManager();
|
|
711
|
+
d.log.step('Next steps:');
|
|
712
|
+
d.log.message(` ${pkgMgr} add -D sunpeak @playwright/test vitest`);
|
|
713
|
+
d.log.message(` ${pkgMgr} exec playwright install chromium`);
|
|
714
|
+
d.log.message('');
|
|
715
|
+
d.log.message(' sunpeak test # E2E tests');
|
|
716
|
+
d.log.message(' sunpeak test --unit # Unit tests (vitest)');
|
|
717
|
+
d.log.message(' sunpeak test --visual # Visual regression');
|
|
718
|
+
d.log.message(' sunpeak test --live # Live tests against real hosts');
|
|
719
|
+
d.log.message(' sunpeak test --eval # Multi-model evals');
|
|
274
720
|
}
|
|
275
721
|
|
|
276
|
-
async function initSunpeakProject() {
|
|
277
|
-
|
|
722
|
+
async function initSunpeakProject(d) {
|
|
723
|
+
d.log.info('Detected sunpeak project. Updating config to use defineConfig().');
|
|
278
724
|
|
|
279
|
-
const cwd =
|
|
725
|
+
const cwd = d.cwd();
|
|
280
726
|
const configPath = join(cwd, 'playwright.config.ts');
|
|
281
727
|
|
|
282
|
-
if (existsSync(configPath)) {
|
|
283
|
-
const content = readFileSync(configPath, 'utf-8');
|
|
728
|
+
if (d.existsSync(configPath)) {
|
|
729
|
+
const content = d.readFileSync(configPath, 'utf-8');
|
|
284
730
|
if (content.includes('sunpeak/test/config')) {
|
|
285
|
-
|
|
286
|
-
|
|
731
|
+
d.log.info('Config already uses sunpeak/test/config. Nothing to do.');
|
|
732
|
+
} else {
|
|
733
|
+
d.log.warn('playwright.config.ts exists but does not use sunpeak/test/config.');
|
|
734
|
+
d.log.message(' To migrate, replace your config with:');
|
|
735
|
+
d.log.message(" import { defineConfig } from 'sunpeak/test/config';");
|
|
736
|
+
d.log.message(' export default defineConfig();');
|
|
287
737
|
}
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
`import { defineConfig } from 'sunpeak/test/config';
|
|
738
|
+
} else {
|
|
739
|
+
d.writeFileSync(
|
|
740
|
+
configPath,
|
|
741
|
+
`import { defineConfig } from 'sunpeak/test/config';
|
|
293
742
|
|
|
294
743
|
export default defineConfig();
|
|
295
744
|
`
|
|
296
|
-
|
|
745
|
+
);
|
|
746
|
+
d.log.success('Updated playwright.config.ts to use defineConfig()');
|
|
747
|
+
}
|
|
297
748
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
749
|
+
// Scaffold missing test types
|
|
750
|
+
|
|
751
|
+
// 1. Visual regression test
|
|
752
|
+
const e2eDir = join(cwd, 'tests', 'e2e');
|
|
753
|
+
d.mkdirSync(e2eDir, { recursive: true });
|
|
754
|
+
scaffoldVisualTest(join(e2eDir, 'visual.test.ts'), d);
|
|
755
|
+
|
|
756
|
+
// 2. Live tests
|
|
757
|
+
scaffoldLiveTests(join(cwd, 'tests', 'live'), { isSunpeak: true, d });
|
|
758
|
+
|
|
759
|
+
// 3. Eval boilerplate
|
|
760
|
+
scaffoldEvals(join(cwd, 'tests', 'evals'), { isSunpeak: true, d });
|
|
761
|
+
|
|
762
|
+
// 4. Unit test
|
|
763
|
+
scaffoldUnitTest(join(cwd, 'tests', 'unit', 'example.test.ts'), d);
|
|
764
|
+
|
|
765
|
+
d.log.step('Scaffolded test types:');
|
|
766
|
+
d.log.message(' tests/e2e/visual.test.ts — Visual regression (sunpeak test --visual)');
|
|
767
|
+
d.log.message(' tests/live/ — Live host tests (sunpeak test --live)');
|
|
768
|
+
d.log.message(' tests/evals/ — Multi-model evals (sunpeak test --eval)');
|
|
769
|
+
d.log.message(' tests/unit/example.test.ts — Unit tests (sunpeak test --unit)');
|
|
770
|
+
d.log.message('');
|
|
771
|
+
d.log.message(' Migrate existing e2e tests:');
|
|
772
|
+
d.log.message(' Replace: import { test, expect } from "@playwright/test"');
|
|
773
|
+
d.log.message(' With: import { test, expect } from "sunpeak/test"');
|
|
774
|
+
d.log.message('');
|
|
775
|
+
d.log.message(' Use the `mcp` fixture instead of raw page navigation.');
|
|
776
|
+
d.log.message(' See sunpeak docs for migration examples.');
|
|
305
777
|
}
|