npm - sunpeak - Versions diffs - 0.19.4 → 0.19.12 - Mend

sunpeak 0.19.4 → 0.19.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/README.md +6 -4
package/bin/commands/dev.mjs +1 -1
package/bin/commands/inspect.mjs +1 -1
package/bin/commands/new.mjs +92 -10
package/bin/commands/start.mjs +3 -1
package/bin/commands/test-init.mjs +548 -76
package/bin/commands/test.mjs +401 -4
package/bin/lib/eval/eval-providers.mjs +34 -0
package/bin/lib/eval/eval-reporter.mjs +105 -0
package/bin/lib/eval/eval-runner.mjs +362 -0
package/bin/lib/eval/eval-types.d.mts +168 -0
package/bin/lib/eval/eval-vitest-plugin.mjs +147 -0
package/bin/lib/eval/model-registry.mjs +73 -0
package/bin/lib/inspect/inspect-config.mjs +1 -1
package/bin/lib/sandbox-server.mjs +5 -2
package/bin/lib/test/test-config.mjs +1 -1
package/bin/sunpeak.js +1 -0
package/dist/chatgpt/index.cjs +1 -1
package/dist/chatgpt/index.js +1 -1
package/dist/claude/index.cjs +1 -1
package/dist/claude/index.js +1 -1
package/dist/host/chatgpt/index.cjs +1 -1
package/dist/host/chatgpt/index.js +1 -1
package/dist/index.cjs +2 -2
package/dist/index.js +2 -2
package/dist/inspector/index.cjs +1 -1
package/dist/inspector/index.js +1 -1
package/dist/{inspector-Bp9jrHIu.js → inspector-D5DckQuU.js} +19 -19
package/dist/{inspector-Bp9jrHIu.js.map → inspector-D5DckQuU.js.map} +1 -1
package/dist/{inspector-Cvq3yjNL.cjs → inspector-jY9O18z9.cjs} +19 -19
package/dist/{inspector-Cvq3yjNL.cjs.map → inspector-jY9O18z9.cjs.map} +1 -1
package/dist/mcp/index.cjs +2 -2
package/dist/mcp/index.cjs.map +1 -1
package/dist/mcp/index.js +2 -2
package/dist/mcp/index.js.map +1 -1
package/dist/{use-app-Ck5kR1Sf.js → use-app-Bfargfa3.js} +2 -2
package/dist/{use-app-Ck5kR1Sf.js.map → use-app-Bfargfa3.js.map} +1 -1
package/dist/{use-app-DHYiev3D.cjs → use-app-CbsBEmwv.cjs} +2 -2
package/dist/{use-app-DHYiev3D.cjs.map → use-app-CbsBEmwv.cjs.map} +1 -1
package/package.json +32 -2
package/template/README.md +17 -7
package/template/_gitignore +2 -0
package/template/dist/albums/albums.html +1 -1
package/template/dist/albums/albums.json +1 -1
package/template/dist/carousel/carousel.html +1 -1
package/template/dist/carousel/carousel.json +1 -1
package/template/dist/map/map.html +1 -1
package/template/dist/map/map.json +1 -1
package/template/dist/review/review.html +1 -1
package/template/dist/review/review.json +1 -1
package/template/node_modules/.bin/vitest +2 -2
package/template/node_modules/.vite/deps/_metadata.json +3 -3
package/template/node_modules/.vite-mcp/deps/_metadata.json +20 -20
package/template/node_modules/.vite-mcp/deps/vitest.js +7 -7
package/template/node_modules/.vite-mcp/deps/vitest.js.map +1 -1
package/template/tests/evals/_env.example +5 -0
package/template/tests/evals/albums.eval.ts +31 -0
package/template/tests/evals/carousel.eval.ts +16 -0
package/template/tests/evals/eval.config.ts +26 -0
package/template/tests/evals/map.eval.ts +16 -0
package/template/tests/evals/review.eval.ts +53 -0

package/README.md CHANGED Viewed

@@ -64,6 +64,7 @@ test('review tool renders title', async ({ mcp }) => {
 - **MCP-native assertions**: `toBeError()`, `toHaveTextContent()`, `toHaveStructuredContent()`
 - **Multi-host**: Tests run against ChatGPT and Claude hosts automatically
 - **Live tests**: Automated browser tests against real ChatGPT via `sunpeak/test/live`
+- **Evals**: Test your tool interface design against multiple LLMs (GPT-4o, Claude, Gemini, etc.) via `sunpeak/eval`
 ### 3. App Framework
@@ -113,6 +114,7 @@ sunpeak new
 | `sunpeak test --visual`               | Run e2e tests with visual regression        |
 | `sunpeak test --visual --update`      | Update visual regression baselines          |
 | `sunpeak test --live`                 | Run live tests against real hosts           |
+| `sunpeak test --eval`                 | Run evals against multiple LLM models       |
 | `sunpeak test init`                   | Scaffold test infrastructure into a project |
 **App framework** (for sunpeak projects):
@@ -125,12 +127,12 @@ sunpeak new
 | `sunpeak start`                  | Start production MCP server                 |
 | `sunpeak upgrade`                | Upgrade sunpeak to latest version           |
-## Coding Agent Skill
+## Coding Agent Skills
-Install the `create-sunpeak-app` skill to give your coding agent (Claude Code, Cursor, etc.) built-in knowledge of sunpeak patterns, hooks, simulation files, and testing conventions:
+Install the sunpeak skills to give your coding agent (Claude Code, Cursor, etc.) built-in knowledge of sunpeak patterns, hooks, and testing:
 ```bash
-npx skills add Sunpeak-AI/sunpeak@create-sunpeak-app
+pnpm dlx skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server
 ```
 ## Troubleshooting
@@ -143,7 +145,7 @@ If your app doesn't render in ChatGPT or Claude:
 4. **Hard refresh** the host page (`Cmd+Shift+R` / `Ctrl+Shift+R`)
 5. **Open a new chat** in the host (cached iframes persist per-conversation)
-Full guide: [sunpeak.ai/docs/guides/troubleshooting](https://sunpeak.ai/docs/guides/troubleshooting)
+Full guide: [sunpeak.ai/docs/app-framework/guides/troubleshooting](https://sunpeak.ai/docs/app-framework/guides/troubleshooting)
 ## Resources

package/bin/commands/dev.mjs CHANGED Viewed

@@ -363,7 +363,7 @@ export async function dev(projectRoot = process.cwd(), args = []) {
   const sandbox = await startSandboxServer({ preferredPort: sandboxPort });
   // Find available ports for the MCP server and HMR WebSocket
-  const mcpPort = await getPort(8000);
+  const mcpPort = await getPort(Number(process.env.SUNPEAK_MCP_PORT || 8000));
   const hmrPort = await getPort(Number(process.env.SUNPEAK_HMR_PORT || 24679));
   console.log(`\nStarting MCP server with ${simulations.length} simulation(s) (Vite HMR)...`);

package/bin/commands/inspect.mjs CHANGED Viewed

@@ -1145,7 +1145,7 @@ export async function inspectServer(opts) {
   server.bindCLIShortcuts({ print: true });
   // Print troubleshooting link (dimmed)
-  console.log('\n  \x1b[2mApp not loading? \u2192 https://sunpeak.ai/docs/guides/troubleshooting\x1b[0m');
+  console.log('\n  \x1b[2mApp not loading? \u2192 https://sunpeak.ai/docs/app-framework/guides/troubleshooting\x1b[0m');
   // Print star-begging message unless suppressed
   if (!noBegging) {

package/bin/commands/new.mjs CHANGED Viewed

@@ -9,6 +9,7 @@ const execAsync = promisify(exec);
 import * as clack from '@clack/prompts';
 import { discoverResources } from '../lib/patterns.mjs';
 import { detectPackageManager } from '../utils.mjs';
+import { EVAL_PROVIDERS } from '../lib/eval/eval-providers.mjs';
 const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -44,7 +45,7 @@ async function defaultSelectResources(availableResources) {
       const maxLen = Math.max(...availableResources.map((r) => r.length));
       return availableResources.map((r) => ({
         value: r,
-        label: `${r.padEnd(maxLen)}  (https://sunpeak.ai/docs/api-reference/resources/${r})`,
+        label: `${r.padEnd(maxLen)}  (https://sunpeak.ai/docs/app-framework/resources/${r})`,
       }));
     })(),
     initialValues: availableResources,
@@ -57,6 +58,21 @@ async function defaultSelectResources(availableResources) {
   return selected;
 }
+/**
+ * Default prompt for eval provider selection.
+ * @returns {Promise<Array<{ pkg: string, models: string[] }>>}
+ */
+async function defaultSelectProviders() {
+  const selected = await clack.multiselect({
+    message: 'AI providers for evals (space to toggle, enter to skip)',
+    options: EVAL_PROVIDERS.map((p) => ({ value: p, label: p.label })),
+    initialValues: [],
+    required: false,
+  });
+  if (clack.isCancel(selected)) return [];
+  return selected;
+}
 /**
  * Default dependencies (real implementations)
  */
@@ -73,6 +89,8 @@ export const defaultDeps = {
   execAsync,
   promptName: defaultPromptName,
   selectResources: defaultSelectResources,
+  selectProviders: defaultSelectProviders,
+  password: clack.password,
   confirm: clack.confirm,
   intro: clack.intro,
   outro: clack.outro,
@@ -214,6 +232,10 @@ export async function init(projectName, resourcesArg, deps = defaultDeps) {
         if (src.includes('/tests/live/') && name === `${resource}.spec.ts`) {
           return false;
         }
+        // Skip eval files for excluded resources
+        if (src.includes('/tests/evals/') && name === `${resource}.eval.ts`) {
+          return false;
+        }
       }
       return true;
@@ -229,6 +251,15 @@ export async function init(projectName, resourcesArg, deps = defaultDeps) {
       d.renameSync(srcPath, destPath);
     }
   }
+  // Rename nested dotfiles (underscore convention for npm compatibility)
+  const nestedDotfiles = [['tests/evals/_env.example', 'tests/evals/.env.example']];
+  for (const [from, to] of nestedDotfiles) {
+    const srcPath = join(targetDir, from);
+    const destPath = join(targetDir, to);
+    if (d.existsSync(srcPath)) {
+      d.renameSync(srcPath, destPath);
+    }
+  }
   // Read sunpeak version from root package.json
   const rootPkg = JSON.parse(d.readFileSync(d.rootPkgPath, 'utf-8'));
@@ -278,26 +309,74 @@ export async function init(projectName, resourcesArg, deps = defaultDeps) {
     s.stop(`Install failed. You can try running "${pm} install" manually.`);
   }
-  // Offer to install the sunpeak skill (only in interactive mode)
+  // Offer to configure eval providers (only in interactive mode)
+  if (resourcesArg === undefined) {
+    const providers = await d.selectProviders();
+    if (!clack.isCancel(providers) && providers.length > 0) {
+      // Install AI SDK core + selected provider packages
+      const pkgsToInstall = ['ai', ...providers.map((p) => p.pkg)];
+      try {
+        await d.execAsync(`${pm} add -D ${pkgsToInstall.join(' ')}`, { cwd: targetDir });
+      } catch {
+        d.console.log(`Provider install failed. Install manually: ${pm} add -D ${pkgsToInstall.join(' ')}`);
+      }
+      // Uncomment selected models in eval.config.ts
+      const evalConfigPath = join(targetDir, 'tests', 'evals', 'eval.config.ts');
+      if (d.existsSync(evalConfigPath)) {
+        let config = d.readFileSync(evalConfigPath, 'utf-8');
+        for (const p of providers) {
+          for (const model of p.models) {
+            // Uncomment lines matching this model (e.g., "    // 'gpt-4o'," → "    'gpt-4o',")
+            config = config.replace(
+              new RegExp(`^(\\s*)// ('${model.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}',?.*)$`, 'm'),
+              '$1$2'
+            );
+          }
+        }
+        d.writeFileSync(evalConfigPath, config);
+      }
+      // Prompt for API keys and write .env
+      const envLines = [];
+      const seen = new Set();
+      for (const p of providers) {
+        if (seen.has(p.envVar)) continue;
+        seen.add(p.envVar);
+        const key = await d.password({
+          message: `${p.envVar} (enter to skip)`,
+          mask: '*',
+        });
+        if (!clack.isCancel(key) && key) {
+          envLines.push(`${p.envVar}=${key}`);
+        }
+      }
+      const envPath = join(targetDir, 'tests', 'evals', '.env');
+      if (envLines.length > 0) {
+        d.writeFileSync(envPath, envLines.join('\n') + '\n');
+        clack.log.info(`API keys saved to tests/evals/.env (gitignored)`);
+      }
+    }
+  }
+  // Offer to install the sunpeak skills (only in interactive mode)
   if (resourcesArg === undefined) {
     const installSkill = await d.confirm({
-      message: 'Install the sunpeak skill? (helps your coding agent build your app)',
+      message: 'Install the sunpeak skills? (helps your coding agent build and test your app)',
       initialValue: true,
     });
     if (!clack.isCancel(installSkill) && installSkill) {
       try {
-        d.execSync('npx skills add Sunpeak-AI/sunpeak@create-sunpeak-app', {
+        d.execSync('pnpm dlx skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server', {
           cwd: targetDir,
           stdio: 'inherit',
         });
       } catch {
-        d.console.log('Skill install skipped. You can install later with: npx skills add Sunpeak-AI/sunpeak@create-sunpeak-app');
+        d.console.log('Skill install skipped. You can install later with: pnpm dlx skills add Sunpeak-AI/sunpeak@create-sunpeak-app Sunpeak-AI/sunpeak@test-mcp-server');
       }
     }
   }
-  const runCmd = pm === 'npm' ? 'npm run' : pm;
   d.outro(`Done! To get started:
   cd ${projectName}
@@ -305,9 +384,12 @@ export async function init(projectName, resourcesArg, deps = defaultDeps) {
 Your project commands:
-  sunpeak dev       # Start dev server + MCP endpoint
-  sunpeak build     # Build for production
-  ${runCmd} test         # Run tests`);
+  sunpeak dev                # Start dev server + MCP endpoint
+  sunpeak build              # Build for production
+  sunpeak test               # Run unit + e2e tests
+  sunpeak test --eval        # Run LLM evals (configure models in tests/evals/eval.config.ts)
+  sunpeak test --visual      # Run visual regression tests
+  sunpeak test --live        # Run live tests against real AI hosts`);
 }
 // Allow running directly

package/bin/commands/start.mjs CHANGED Viewed

@@ -45,6 +45,7 @@ export async function start(projectRoot = process.cwd(), args = []) {
   const jsonLogs = args.includes('--json-logs');
   const sse = args.includes('--sse');
+  const stateless = args.includes('--stateless');
   // Import production server from sunpeak
   const isTemplate = projectRoot.endsWith('/template') || projectRoot.endsWith('\\template');
@@ -190,9 +191,10 @@ export async function start(projectRoot = process.cwd(), args = []) {
   port = await getPort(port);
   console.log(`\nStarting ${name} v${version} on ${host}:${port}...`);
+  if (stateless) console.log('Stateless mode enabled (no session tracking)');
   startProductionHttpServer(
-    { name, version, serverInfo: serverConfig, tools, resources, auth, ...(sse ? { enableJsonResponse: false } : {}) },
+    { name, version, serverInfo: serverConfig, tools, resources, auth, stateless, ...(sse ? { enableJsonResponse: false } : {}) },
     { port, host }
   );
 }