sunpeak 0.19.4 → 0.19.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +6 -4
  2. package/bin/commands/dev.mjs +1 -1
  3. package/bin/commands/inspect.mjs +1 -1
  4. package/bin/commands/new.mjs +92 -10
  5. package/bin/commands/start.mjs +3 -1
  6. package/bin/commands/test-init.mjs +548 -76
  7. package/bin/commands/test.mjs +401 -4
  8. package/bin/lib/eval/eval-providers.mjs +34 -0
  9. package/bin/lib/eval/eval-reporter.mjs +105 -0
  10. package/bin/lib/eval/eval-runner.mjs +362 -0
  11. package/bin/lib/eval/eval-types.d.mts +168 -0
  12. package/bin/lib/eval/eval-vitest-plugin.mjs +147 -0
  13. package/bin/lib/eval/model-registry.mjs +73 -0
  14. package/bin/lib/inspect/inspect-config.mjs +1 -1
  15. package/bin/lib/sandbox-server.mjs +5 -2
  16. package/bin/lib/test/test-config.mjs +1 -1
  17. package/bin/sunpeak.js +1 -0
  18. package/dist/chatgpt/index.cjs +1 -1
  19. package/dist/chatgpt/index.js +1 -1
  20. package/dist/claude/index.cjs +1 -1
  21. package/dist/claude/index.js +1 -1
  22. package/dist/host/chatgpt/index.cjs +1 -1
  23. package/dist/host/chatgpt/index.js +1 -1
  24. package/dist/index.cjs +2 -2
  25. package/dist/index.js +2 -2
  26. package/dist/inspector/index.cjs +1 -1
  27. package/dist/inspector/index.js +1 -1
  28. package/dist/{inspector-Bp9jrHIu.js → inspector-D5DckQuU.js} +19 -19
  29. package/dist/{inspector-Bp9jrHIu.js.map → inspector-D5DckQuU.js.map} +1 -1
  30. package/dist/{inspector-Cvq3yjNL.cjs → inspector-jY9O18z9.cjs} +19 -19
  31. package/dist/{inspector-Cvq3yjNL.cjs.map → inspector-jY9O18z9.cjs.map} +1 -1
  32. package/dist/mcp/index.cjs +2 -2
  33. package/dist/mcp/index.cjs.map +1 -1
  34. package/dist/mcp/index.js +2 -2
  35. package/dist/mcp/index.js.map +1 -1
  36. package/dist/{use-app-Ck5kR1Sf.js → use-app-Bfargfa3.js} +2 -2
  37. package/dist/{use-app-Ck5kR1Sf.js.map → use-app-Bfargfa3.js.map} +1 -1
  38. package/dist/{use-app-DHYiev3D.cjs → use-app-CbsBEmwv.cjs} +2 -2
  39. package/dist/{use-app-DHYiev3D.cjs.map → use-app-CbsBEmwv.cjs.map} +1 -1
  40. package/package.json +32 -2
  41. package/template/README.md +17 -7
  42. package/template/_gitignore +2 -0
  43. package/template/dist/albums/albums.html +1 -1
  44. package/template/dist/albums/albums.json +1 -1
  45. package/template/dist/carousel/carousel.html +1 -1
  46. package/template/dist/carousel/carousel.json +1 -1
  47. package/template/dist/map/map.html +1 -1
  48. package/template/dist/map/map.json +1 -1
  49. package/template/dist/review/review.html +1 -1
  50. package/template/dist/review/review.json +1 -1
  51. package/template/node_modules/.bin/vitest +2 -2
  52. package/template/node_modules/.vite/deps/_metadata.json +3 -3
  53. package/template/node_modules/.vite-mcp/deps/_metadata.json +20 -20
  54. package/template/node_modules/.vite-mcp/deps/vitest.js +7 -7
  55. package/template/node_modules/.vite-mcp/deps/vitest.js.map +1 -1
  56. package/template/tests/evals/_env.example +5 -0
  57. package/template/tests/evals/albums.eval.ts +31 -0
  58. package/template/tests/evals/carousel.eval.ts +16 -0
  59. package/template/tests/evals/eval.config.ts +26 -0
  60. package/template/tests/evals/map.eval.ts +16 -0
  61. package/template/tests/evals/review.eval.ts +53 -0
@@ -0,0 +1,5 @@
1
+ # Copy this file to .env and fill in your API keys.
2
+ # .env is gitignored — never commit API keys.
3
+ # OPENAI_API_KEY=sk-...
4
+ # ANTHROPIC_API_KEY=sk-ant-...
5
+ # GOOGLE_GENERATIVE_AI_API_KEY=...
@@ -0,0 +1,31 @@
1
+ import { defineEval } from 'sunpeak/eval';
2
+
3
+ export default defineEval({
4
+ cases: [
5
+ {
6
+ name: 'asks for photo albums',
7
+ prompt: 'Show me my photo albums',
8
+ expect: { tool: 'show-albums' },
9
+ },
10
+ {
11
+ name: 'asks for food photos',
12
+ prompt: 'Show me photos from my Austin pizza tour',
13
+ expect: { tool: 'show-albums' },
14
+ },
15
+ {
16
+ name: 'asks for a specific category',
17
+ prompt: 'Show me my travel photos',
18
+ expect: { tool: 'show-albums' },
19
+ },
20
+ // To also check argument extraction, add args expectations
21
+ // (import { expect } from 'vitest' to use matchers):
22
+ // {
23
+ // name: 'passes search term',
24
+ // prompt: 'Show me photos from my Austin pizza tour',
25
+ // expect: {
26
+ // tool: 'show-albums',
27
+ // args: { search: expect.stringMatching(/pizza|austin/i) },
28
+ // },
29
+ // },
30
+ ],
31
+ });
@@ -0,0 +1,16 @@
1
+ import { defineEval } from 'sunpeak/eval';
2
+
3
+ export default defineEval({
4
+ cases: [
5
+ {
6
+ name: 'asks for popular places',
7
+ prompt: 'Show me popular places to visit in Tokyo',
8
+ expect: { tool: 'show-carousel' },
9
+ },
10
+ {
11
+ name: 'asks for places to eat',
12
+ prompt: 'Where should I eat in Paris?',
13
+ expect: { tool: 'show-carousel' },
14
+ },
15
+ ],
16
+ });
@@ -0,0 +1,26 @@
1
+ import { defineEvalConfig } from 'sunpeak/eval';
2
+
3
+ // API keys are loaded automatically from tests/evals/.env (gitignored).
4
+ // See .env.example for the format.
5
+
6
+ export default defineEvalConfig({
7
+ // Server is auto-detected for sunpeak projects.
8
+ // For non-sunpeak projects, uncomment:
9
+ // server: 'http://localhost:8000/mcp',
10
+
11
+ models: [
12
+ // Uncomment models and install their provider packages:
13
+ // 'gpt-4o', // OPENAI_API_KEY
14
+ // 'gpt-4o-mini', // OPENAI_API_KEY
15
+ // 'o4-mini', // OPENAI_API_KEY
16
+ // 'claude-sonnet-4-20250514', // ANTHROPIC_API_KEY
17
+ // 'gemini-2.0-flash', // GOOGLE_GENERATIVE_AI_API_KEY
18
+ ],
19
+
20
+ defaults: {
21
+ runs: 5, // Number of times to run each case per model
22
+ maxSteps: 1, // Max tool call steps per run
23
+ temperature: 0, // 0 for most deterministic results
24
+ timeout: 30_000, // Timeout per run in ms
25
+ },
26
+ });
@@ -0,0 +1,16 @@
1
+ import { defineEval } from 'sunpeak/eval';
2
+
3
+ export default defineEval({
4
+ cases: [
5
+ {
6
+ name: 'asks for a map',
7
+ prompt: 'Show me a map of coffee shops near downtown Austin',
8
+ expect: { tool: 'show-map' },
9
+ },
10
+ {
11
+ name: 'asks for nearby places',
12
+ prompt: 'Find me some parks nearby',
13
+ expect: { tool: 'show-map' },
14
+ },
15
+ ],
16
+ });
@@ -0,0 +1,53 @@
1
+ import { defineEval } from 'sunpeak/eval';
2
+
3
+ export default defineEval({
4
+ cases: [
5
+ {
6
+ name: 'asks to review a code diff',
7
+ prompt: 'Review my code changes to the auth module',
8
+ expect: { tool: 'review-diff' },
9
+ },
10
+ {
11
+ name: 'asks to draft a social post',
12
+ prompt: 'Write a launch announcement for X and LinkedIn',
13
+ expect: { tool: 'review-post' },
14
+ },
15
+ {
16
+ name: 'asks to review a purchase',
17
+ prompt: 'Review my order for the Pro plan upgrade',
18
+ expect: { tool: 'review-purchase' },
19
+ },
20
+
21
+ // To also check argument extraction, add args expectations
22
+ // (import { expect } from 'vitest' to use matchers):
23
+ // {
24
+ // name: 'passes platforms',
25
+ // prompt: 'Write a launch announcement for X and LinkedIn',
26
+ // expect: {
27
+ // tool: 'review-post',
28
+ // args: { platforms: expect.arrayContaining([expect.stringMatching(/x|twitter/i)]) },
29
+ // },
30
+ // },
31
+ //
32
+ // Multi-step (ordered tool call sequence):
33
+ // {
34
+ // name: 'multi-step flow',
35
+ // prompt: 'Draft a post and then review it',
36
+ // maxSteps: 3,
37
+ // expect: [
38
+ // { tool: 'review-post' },
39
+ // { tool: 'publish-post' },
40
+ // ],
41
+ // },
42
+ //
43
+ // Custom assertion (full access to result):
44
+ // {
45
+ // name: 'custom check',
46
+ // prompt: 'Show me my recent reviews',
47
+ // assert: (result) => {
48
+ // expect(result.toolCalls).toHaveLength(1);
49
+ // expect(result.toolCalls[0].name).toBe('review-diff');
50
+ // },
51
+ // },
52
+ ],
53
+ });