sunpeak 0.19.4 → 0.19.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/bin/commands/dev.mjs +1 -1
- package/bin/commands/inspect.mjs +1 -1
- package/bin/commands/new.mjs +92 -10
- package/bin/commands/start.mjs +3 -1
- package/bin/commands/test-init.mjs +548 -76
- package/bin/commands/test.mjs +401 -4
- package/bin/lib/eval/eval-providers.mjs +34 -0
- package/bin/lib/eval/eval-reporter.mjs +105 -0
- package/bin/lib/eval/eval-runner.mjs +362 -0
- package/bin/lib/eval/eval-types.d.mts +168 -0
- package/bin/lib/eval/eval-vitest-plugin.mjs +147 -0
- package/bin/lib/eval/model-registry.mjs +73 -0
- package/bin/lib/inspect/inspect-config.mjs +1 -1
- package/bin/lib/sandbox-server.mjs +5 -2
- package/bin/lib/test/test-config.mjs +1 -1
- package/bin/sunpeak.js +1 -0
- package/dist/chatgpt/index.cjs +1 -1
- package/dist/chatgpt/index.js +1 -1
- package/dist/claude/index.cjs +1 -1
- package/dist/claude/index.js +1 -1
- package/dist/host/chatgpt/index.cjs +1 -1
- package/dist/host/chatgpt/index.js +1 -1
- package/dist/index.cjs +2 -2
- package/dist/index.js +2 -2
- package/dist/inspector/index.cjs +1 -1
- package/dist/inspector/index.js +1 -1
- package/dist/{inspector-Bp9jrHIu.js → inspector-D5DckQuU.js} +19 -19
- package/dist/{inspector-Bp9jrHIu.js.map → inspector-D5DckQuU.js.map} +1 -1
- package/dist/{inspector-Cvq3yjNL.cjs → inspector-jY9O18z9.cjs} +19 -19
- package/dist/{inspector-Cvq3yjNL.cjs.map → inspector-jY9O18z9.cjs.map} +1 -1
- package/dist/mcp/index.cjs +2 -2
- package/dist/mcp/index.cjs.map +1 -1
- package/dist/mcp/index.js +2 -2
- package/dist/mcp/index.js.map +1 -1
- package/dist/{use-app-Ck5kR1Sf.js → use-app-Bfargfa3.js} +2 -2
- package/dist/{use-app-Ck5kR1Sf.js.map → use-app-Bfargfa3.js.map} +1 -1
- package/dist/{use-app-DHYiev3D.cjs → use-app-CbsBEmwv.cjs} +2 -2
- package/dist/{use-app-DHYiev3D.cjs.map → use-app-CbsBEmwv.cjs.map} +1 -1
- package/package.json +32 -2
- package/template/README.md +17 -7
- package/template/_gitignore +2 -0
- package/template/dist/albums/albums.html +1 -1
- package/template/dist/albums/albums.json +1 -1
- package/template/dist/carousel/carousel.html +1 -1
- package/template/dist/carousel/carousel.json +1 -1
- package/template/dist/map/map.html +1 -1
- package/template/dist/map/map.json +1 -1
- package/template/dist/review/review.html +1 -1
- package/template/dist/review/review.json +1 -1
- package/template/node_modules/.bin/vitest +2 -2
- package/template/node_modules/.vite/deps/_metadata.json +3 -3
- package/template/node_modules/.vite-mcp/deps/_metadata.json +20 -20
- package/template/node_modules/.vite-mcp/deps/vitest.js +7 -7
- package/template/node_modules/.vite-mcp/deps/vitest.js.map +1 -1
- package/template/tests/evals/_env.example +5 -0
- package/template/tests/evals/albums.eval.ts +31 -0
- package/template/tests/evals/carousel.eval.ts +16 -0
- package/template/tests/evals/eval.config.ts +26 -0
- package/template/tests/evals/map.eval.ts +16 -0
- package/template/tests/evals/review.eval.ts +53 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { defineEval } from 'sunpeak/eval';
|
|
2
|
+
|
|
3
|
+
export default defineEval({
|
|
4
|
+
cases: [
|
|
5
|
+
{
|
|
6
|
+
name: 'asks for photo albums',
|
|
7
|
+
prompt: 'Show me my photo albums',
|
|
8
|
+
expect: { tool: 'show-albums' },
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
name: 'asks for food photos',
|
|
12
|
+
prompt: 'Show me photos from my Austin pizza tour',
|
|
13
|
+
expect: { tool: 'show-albums' },
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
name: 'asks for a specific category',
|
|
17
|
+
prompt: 'Show me my travel photos',
|
|
18
|
+
expect: { tool: 'show-albums' },
|
|
19
|
+
},
|
|
20
|
+
// To also check argument extraction, add args expectations
|
|
21
|
+
// (import { expect } from 'vitest' to use matchers):
|
|
22
|
+
// {
|
|
23
|
+
// name: 'passes search term',
|
|
24
|
+
// prompt: 'Show me photos from my Austin pizza tour',
|
|
25
|
+
// expect: {
|
|
26
|
+
// tool: 'show-albums',
|
|
27
|
+
// args: { search: expect.stringMatching(/pizza|austin/i) },
|
|
28
|
+
// },
|
|
29
|
+
// },
|
|
30
|
+
],
|
|
31
|
+
});
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { defineEval } from 'sunpeak/eval';
|
|
2
|
+
|
|
3
|
+
export default defineEval({
|
|
4
|
+
cases: [
|
|
5
|
+
{
|
|
6
|
+
name: 'asks for popular places',
|
|
7
|
+
prompt: 'Show me popular places to visit in Tokyo',
|
|
8
|
+
expect: { tool: 'show-carousel' },
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
name: 'asks for places to eat',
|
|
12
|
+
prompt: 'Where should I eat in Paris?',
|
|
13
|
+
expect: { tool: 'show-carousel' },
|
|
14
|
+
},
|
|
15
|
+
],
|
|
16
|
+
});
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { defineEvalConfig } from 'sunpeak/eval';
|
|
2
|
+
|
|
3
|
+
// API keys are loaded automatically from tests/evals/.env (gitignored).
|
|
4
|
+
// See .env.example for the format.
|
|
5
|
+
|
|
6
|
+
export default defineEvalConfig({
|
|
7
|
+
// Server is auto-detected for sunpeak projects.
|
|
8
|
+
// For non-sunpeak projects, uncomment:
|
|
9
|
+
// server: 'http://localhost:8000/mcp',
|
|
10
|
+
|
|
11
|
+
models: [
|
|
12
|
+
// Uncomment models and install their provider packages:
|
|
13
|
+
// 'gpt-4o', // OPENAI_API_KEY
|
|
14
|
+
// 'gpt-4o-mini', // OPENAI_API_KEY
|
|
15
|
+
// 'o4-mini', // OPENAI_API_KEY
|
|
16
|
+
// 'claude-sonnet-4-20250514', // ANTHROPIC_API_KEY
|
|
17
|
+
// 'gemini-2.0-flash', // GOOGLE_GENERATIVE_AI_API_KEY
|
|
18
|
+
],
|
|
19
|
+
|
|
20
|
+
defaults: {
|
|
21
|
+
runs: 5, // Number of times to run each case per model
|
|
22
|
+
maxSteps: 1, // Max tool call steps per run
|
|
23
|
+
temperature: 0, // 0 for most deterministic results
|
|
24
|
+
timeout: 30_000, // Timeout per run in ms
|
|
25
|
+
},
|
|
26
|
+
});
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { defineEval } from 'sunpeak/eval';
|
|
2
|
+
|
|
3
|
+
export default defineEval({
|
|
4
|
+
cases: [
|
|
5
|
+
{
|
|
6
|
+
name: 'asks for a map',
|
|
7
|
+
prompt: 'Show me a map of coffee shops near downtown Austin',
|
|
8
|
+
expect: { tool: 'show-map' },
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
name: 'asks for nearby places',
|
|
12
|
+
prompt: 'Find me some parks nearby',
|
|
13
|
+
expect: { tool: 'show-map' },
|
|
14
|
+
},
|
|
15
|
+
],
|
|
16
|
+
});
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { defineEval } from 'sunpeak/eval';
|
|
2
|
+
|
|
3
|
+
export default defineEval({
|
|
4
|
+
cases: [
|
|
5
|
+
{
|
|
6
|
+
name: 'asks to review a code diff',
|
|
7
|
+
prompt: 'Review my code changes to the auth module',
|
|
8
|
+
expect: { tool: 'review-diff' },
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
name: 'asks to draft a social post',
|
|
12
|
+
prompt: 'Write a launch announcement for X and LinkedIn',
|
|
13
|
+
expect: { tool: 'review-post' },
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
name: 'asks to review a purchase',
|
|
17
|
+
prompt: 'Review my order for the Pro plan upgrade',
|
|
18
|
+
expect: { tool: 'review-purchase' },
|
|
19
|
+
},
|
|
20
|
+
|
|
21
|
+
// To also check argument extraction, add args expectations
|
|
22
|
+
// (import { expect } from 'vitest' to use matchers):
|
|
23
|
+
// {
|
|
24
|
+
// name: 'passes platforms',
|
|
25
|
+
// prompt: 'Write a launch announcement for X and LinkedIn',
|
|
26
|
+
// expect: {
|
|
27
|
+
// tool: 'review-post',
|
|
28
|
+
// args: { platforms: expect.arrayContaining([expect.stringMatching(/x|twitter/i)]) },
|
|
29
|
+
// },
|
|
30
|
+
// },
|
|
31
|
+
//
|
|
32
|
+
// Multi-step (ordered tool call sequence):
|
|
33
|
+
// {
|
|
34
|
+
// name: 'multi-step flow',
|
|
35
|
+
// prompt: 'Draft a post and then review it',
|
|
36
|
+
// maxSteps: 3,
|
|
37
|
+
// expect: [
|
|
38
|
+
// { tool: 'review-post' },
|
|
39
|
+
// { tool: 'publish-post' },
|
|
40
|
+
// ],
|
|
41
|
+
// },
|
|
42
|
+
//
|
|
43
|
+
// Custom assertion (full access to result):
|
|
44
|
+
// {
|
|
45
|
+
// name: 'custom check',
|
|
46
|
+
// prompt: 'Show me my recent reviews',
|
|
47
|
+
// assert: (result) => {
|
|
48
|
+
// expect(result.toolCalls).toHaveLength(1);
|
|
49
|
+
// expect(result.toolCalls[0].name).toBe('review-diff');
|
|
50
|
+
// },
|
|
51
|
+
// },
|
|
52
|
+
],
|
|
53
|
+
});
|