npm - vskill - Versions diffs - 0.5.104 → 0.5.106 - Mend

vskill 0.5.104 → 0.5.106

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/README.md +123 -2
package/agents.json +1 -1
package/dist/agents/agents-registry.d.ts +0 -1
package/dist/agents/agents-registry.js +10 -30
package/dist/agents/agents-registry.js.map +1 -1
package/dist/api/client.d.ts +21 -0
package/dist/api/client.js +39 -5
package/dist/api/client.js.map +1 -1
package/dist/commands/diff.d.ts +2 -5
package/dist/commands/diff.js +82 -117
package/dist/commands/diff.js.map +1 -1
package/dist/commands/eval/serve.js +4 -0
package/dist/commands/eval/serve.js.map +1 -1
package/dist/commands/keys.d.ts +14 -0
package/dist/commands/keys.js +166 -0
package/dist/commands/keys.js.map +1 -0
package/dist/commands/masked-stdin.d.ts +14 -0
package/dist/commands/masked-stdin.js +79 -0
package/dist/commands/masked-stdin.js.map +1 -0
package/dist/commands/skill.d.ts +3 -29
package/dist/commands/skill.js +6 -32
package/dist/commands/skill.js.map +1 -1
package/dist/eval/anthropic-catalog.d.ts +49 -0
package/dist/eval/anthropic-catalog.js +238 -0
package/dist/eval/anthropic-catalog.js.map +1 -0
package/dist/eval/llm.d.ts +1 -1
package/dist/eval/llm.js +68 -10
package/dist/eval/llm.js.map +1 -1
package/dist/eval/mcp-detector.js +24 -3
package/dist/eval/mcp-detector.js.map +1 -1
package/dist/eval/model-resolver.d.ts +39 -0
package/dist/eval/model-resolver.js +94 -0
package/dist/eval/model-resolver.js.map +1 -0
package/dist/eval/pricing.js +42 -16
package/dist/eval/pricing.js.map +1 -1
package/dist/eval-server/api-routes.d.ts +9 -0
package/dist/eval-server/api-routes.js +241 -73
package/dist/eval-server/api-routes.js.map +1 -1
package/dist/eval-server/authoring-routes.js +21 -1
package/dist/eval-server/authoring-routes.js.map +1 -1
package/dist/eval-server/boot-preflight.js +8 -10
package/dist/eval-server/boot-preflight.js.map +1 -1
package/dist/eval-server/darwin-migrator.d.ts +57 -0
package/dist/eval-server/darwin-migrator.js +169 -0
package/dist/eval-server/darwin-migrator.js.map +1 -0
package/dist/eval-server/eval-server.d.ts +1 -0
package/dist/eval-server/eval-server.js +21 -0
package/dist/eval-server/eval-server.js.map +1 -1
package/dist/eval-server/integration-routes.js +7 -0
package/dist/eval-server/integration-routes.js.map +1 -1
package/dist/eval-server/platform-proxy.d.ts +18 -0
package/dist/eval-server/platform-proxy.js +153 -0
package/dist/eval-server/platform-proxy.js.map +1 -0
package/dist/eval-server/providers.d.ts +7 -12
package/dist/eval-server/providers.js +13 -15
package/dist/eval-server/providers.js.map +1 -1
package/dist/eval-server/settings-store.d.ts +31 -26
package/dist/eval-server/settings-store.js +246 -143
package/dist/eval-server/settings-store.js.map +1 -1
package/dist/eval-server/skill-create-routes.js +18 -0
package/dist/eval-server/skill-create-routes.js.map +1 -1
package/dist/eval-server/skill-name-resolver.d.ts +35 -0
package/dist/eval-server/skill-name-resolver.js +146 -0
package/dist/eval-server/skill-name-resolver.js.map +1 -0
package/dist/eval-ui/assets/{CommandPalette-DiPALzlG.js → CommandPalette-COqdrmRl.js} +1 -1
package/dist/eval-ui/assets/CreateSkillPage-C3IjO8es.js +12 -0
package/dist/eval-ui/assets/UpdateDropdown-DnKKMBBN.js +1 -0
package/dist/eval-ui/assets/index-Dmja1p3A.css +1 -0
package/dist/eval-ui/assets/index-KIcQ5e5a.js +102 -0
package/dist/eval-ui/index.html +2 -2
package/dist/first-run-onboarding.d.ts +19 -0
package/dist/first-run-onboarding.js +104 -0
package/dist/first-run-onboarding.js.map +1 -0
package/dist/index.js +18 -0
package/dist/index.js.map +1 -1
package/dist/installer/canonical.js +12 -13
package/dist/installer/canonical.js.map +1 -1
package/dist/utils/resolve-binary.js +7 -8
package/dist/utils/resolve-binary.js.map +1 -1
package/package.json +5 -2
package/dist/eval-ui/assets/CreateSkillPage-BMrFELep.js +0 -12
package/dist/eval-ui/assets/UpdateDropdown-Bj8kZzuR.js +0 -1
package/dist/eval-ui/assets/_shimNode-D3bBqrAh.js +0 -1
package/dist/eval-ui/assets/index-BSPDkfZG.js +0 -102
package/dist/eval-ui/assets/index-C0Gc_4KC.css +0 -1
package/dist/eval-ui/assets/resolve-binary-DIxhrZ6O.js +0 -2

package/dist/eval-server/api-routes.js CHANGED Viewed

@@ -1,15 +1,16 @@
 // ---------------------------------------------------------------------------
 // api-routes.ts -- REST API route handlers for the eval UI
 // ---------------------------------------------------------------------------
-import { readFileSync, writeFileSync, mkdirSync, existsSync, rmSync, readdirSync, statSync } from "node:fs";
+import { readFileSync, writeFileSync, mkdirSync, existsSync, readdirSync, statSync } from "node:fs";
 import { execSync } from "node:child_process";
 import { join, resolve, dirname } from "node:path";
+import { homedir } from "node:os";
 import { sendJson, readBody } from "./router.js";
 import { initSSE, sendSSE, sendSSEDone, withHeartbeat, startDynamicHeartbeat } from "./sse-helpers.js";
 import { dataEventBus, emitDataEvent } from "./data-events.js";
 import { classifyError } from "./error-classifier.js";
 import { readLockfile } from "../lockfile/lockfile.js";
-import { parseSource } from "../resolvers/source-resolver.js";
+import { resolveSkillApiName as resolveSkillApiNameImpl } from "./skill-name-resolver.js";
 import { runBenchmarkSSE, runSingleCaseSSE } from "./benchmark-runner.js";
 import { getSkillSemaphore } from "./concurrency.js";
 import { resolveSkillDir } from "./skill-resolver.js";
@@ -17,6 +18,7 @@ import { classifyOrigin, scanSkillsTriScope } from "../eval/skill-scanner.js";
 import { scanInstalledPluginSkills, scanAuthoredPluginSkills, } from "../eval/plugin-scanner.js";
 import { resolveGlobalSkillsDir } from "../eval/path-utils.js";
 import { loadAndValidateEvals, EvalValidationError } from "../eval/schema.js";
+import { ANTHROPIC_CATALOG_SNAPSHOT, findAnthropicModel } from "../eval/anthropic-catalog.js";
 import { readBenchmark } from "../eval/benchmark.js";
 import { writeHistoryEntry, listHistory, readHistoryEntry, computeRegressions, deleteHistoryEntry, getCaseHistory, computeStats } from "../eval/benchmark-history.js";
 import { judgeAssertion } from "../eval/judge.js";
@@ -32,6 +34,8 @@ import { writeActivationRun, listActivationRuns, getActivationRun } from "../eva
 import { AGENTS_REGISTRY, detectInstalledAgents } from "../agents/agents-registry.js";
 import { resolveOllamaBaseUrl } from "../eval/env.js";
 import * as settingsStore from "./settings-store.js";
+import { isProviderId, getProviderById } from "./providers.js";
+import { DarwinKeychainMigrator } from "./darwin-migrator.js";
 import { loadStudioSelection, saveStudioSelection } from "./studio-json.js";
 /**
  * Build the response for GET /api/agents/installed.
@@ -474,18 +478,41 @@ function computeBenchmarkStatus(benchmark, evalIds, hasEvals) {
     // Use overall_pass_rate as single source of truth
     return (benchmark.overall_pass_rate ?? 0) >= 1 ? "pass" : "fail";
 }
-const PROVIDER_MODELS = {
+// 0711 — Anthropic models + pricing now derive from the dated catalog
+// snapshot at `src/eval/anthropic-catalog.ts`. Manual maintenance of this
+// list led to stale prices (Opus 4.7 shown at $15/$75 instead of $5/$25)
+// because three different files held copies of the same fact. The catalog
+// file is the single source of truth; CI fails if its snapshotDate is
+// older than 6 months.
+function buildAnthropicProviderModels() {
+    return ANTHROPIC_CATALOG_SNAPSHOT.models
+        .filter((m) => m.status === "active")
+        .map((m) => ({
+        id: m.id,
+        label: `${m.displayName} (API)`,
+        pricing: {
+            prompt: m.pricing.promptUsdPer1M,
+            completion: m.pricing.completionUsdPer1M,
+        },
+    }));
+}
+function aliasInfo(alias, fallbackLabel) {
+    const entry = findAnthropicModel(alias);
+    if (!entry)
+        return { label: fallbackLabel };
+    return { label: entry.displayName, resolvedId: entry.id };
+}
+export const PROVIDER_MODELS = {
+    // Opus first so it is the default when no override is set
+    // (getEffectiveRawModel returns models[0]). Labels come from the catalog so
+    // the picker shows the exact dated version (e.g. "Claude Opus 4.7"), not the
+    // bare family name — keeps the Studio truthful when a model is bumped.
     "claude-cli": [
-        { id: "sonnet", label: "Claude Sonnet" },
-        { id: "opus", label: "Claude Opus" },
-        { id: "haiku", label: "Claude Haiku" },
-    ],
-    "anthropic": [
-        { id: "claude-sonnet-4-6", label: "Claude Sonnet 4.6 (API)" },
-        { id: "claude-opus-4-7", label: "Claude Opus 4.7 (API)" },
-        { id: "claude-opus-4-6", label: "Claude Opus 4.6 (API)" },
-        { id: "claude-haiku-4-5-20251001", label: "Claude Haiku 4.5 (API)" },
+        { id: "opus", ...aliasInfo("opus", "Claude Opus") },
+        { id: "sonnet", ...aliasInfo("sonnet", "Claude Sonnet") },
+        { id: "haiku", ...aliasInfo("haiku", "Claude Haiku") },
     ],
+    "anthropic": buildAnthropicProviderModels(),
     "ollama": [
         { id: "llama3.1:8b", label: "Llama 3.1 8B" },
         { id: "qwen2.5:32b", label: "Qwen 2.5 32B" },
@@ -500,6 +527,13 @@ const PROVIDER_MODELS = {
         { id: "o3", label: "OpenAI o3" },
         { id: "o4-mini", label: "OpenAI o4-mini" },
     ],
+    "openai": [
+        { id: "gpt-4o-mini", label: "GPT-4o mini (API)", pricing: { prompt: 0.15, completion: 0.60 } },
+        { id: "gpt-4o", label: "GPT-4o (API)", pricing: { prompt: 2.50, completion: 10 } },
+        { id: "gpt-4.1", label: "GPT-4.1 (API)", pricing: { prompt: 2, completion: 8 } },
+        { id: "gpt-4.1-mini", label: "GPT-4.1 mini (API)", pricing: { prompt: 0.40, completion: 1.60 } },
+        { id: "o4-mini", label: "o4-mini (API)", pricing: { prompt: 1.10, completion: 4.40 } },
+    ],
     "openrouter": [
         // Anthropic via OpenRouter
         { id: "anthropic/claude-opus-4", label: "Claude Opus 4 (via OpenRouter)" },
@@ -647,6 +681,25 @@ function isBinaryOnPath(name) {
         return false;
     }
 }
+// 0701 — Read the active Claude Code model from ~/.claude/settings.json so the
+// Studio picker can surface "routing to claude-opus-4-7[1m]" under the generic
+// Claude Code rows. Returns null on any read/parse failure — callers fall back
+// to generic aliases. Re-read on every call (no caching) so toggling /model in
+// Claude Code is reflected on the next picker open.
+export function resolveClaudeCodeModel() {
+    try {
+        const path = join(homedir(), ".claude", "settings.json");
+        const raw = readFileSync(path, "utf8");
+        const parsed = JSON.parse(raw);
+        if (!parsed || typeof parsed !== "object")
+            return null;
+        const model = parsed.model;
+        return typeof model === "string" && model.length > 0 ? model : null;
+    }
+    catch {
+        return null;
+    }
+}
 export async function detectAvailableProviders() {
     const providers = [];
     // Claude CLI — delegates to the `claude` binary; the CLI owns session auth.
@@ -656,9 +709,10 @@ export async function detectAvailableProviders() {
         label: "Use current Claude Code session",
         available: true,
         models: PROVIDER_MODELS["claude-cli"],
+        resolvedModel: resolveClaudeCodeModel(),
     });
     // Anthropic API — available if ANTHROPIC_API_KEY is set OR a key is in the
-    // settings-store (browser tier or Darwin keychain).
+    // settings-store. After 0702 the tier concept is gone; storage is file-only.
     providers.push({
         id: "anthropic",
         label: "Anthropic API",
@@ -666,6 +720,14 @@ export async function detectAvailableProviders() {
             settingsStore.hasKeySync("anthropic"),
         models: PROVIDER_MODELS["anthropic"],
     });
+    // OpenAI API — available if OPENAI_API_KEY is set OR a key is stored (0702 T-023).
+    providers.push({
+        id: "openai",
+        label: "OpenAI API",
+        available: !!process.env.OPENAI_API_KEY ||
+            settingsStore.hasKeySync("openai"),
+        models: PROVIDER_MODELS["openai"],
+    });
     // OpenRouter — available if OPENROUTER_API_KEY is set OR a key is stored.
     providers.push({
         id: "openrouter",
@@ -788,9 +850,12 @@ export function registerRoutes(router, root, projectName) {
                 id: m.id,
                 name: m.name || m.id,
                 contextWindow: typeof m.context_length === "number" ? m.context_length : undefined,
+                // 0710 — OpenRouter publishes USD per token; canonicalize to USD per 1M
+                // tokens so the wire contract matches PROVIDER_MODELS["anthropic"]
+                // (3, 15, 75, …) and every consumer can assume one unit.
                 pricing: {
-                    prompt: parseFloat(m.pricing?.prompt || "0"),
-                    completion: parseFloat(m.pricing?.completion || "0"),
+                    prompt: parseFloat(m.pricing?.prompt || "0") * 1_000_000,
+                    completion: parseFloat(m.pricing?.completion || "0") * 1_000_000,
                 },
             }));
             OPENROUTER_CACHE.set(cacheKey, { value: models, fetchedAt: now });
@@ -813,6 +878,11 @@ export function registerRoutes(router, root, projectName) {
     router.get("/api/settings/keys", async (_req, res) => {
         sendJson(res, settingsStore.listKeys());
     });
+    // 0702 T-024: expose the absolute keys.env path for the Settings footer +
+    // "Copy path" button. Key contents are NEVER returned — path only.
+    router.get("/api/settings/storage-path", async (_req, res) => {
+        sendJson(res, { path: settingsStore.getKeysFilePath() });
+    });
     router.post("/api/settings/keys", async (req, res) => {
         // Reject any request that smuggles the key in a query-string — JSON body only.
         const url = req.url || "";
@@ -825,24 +895,22 @@ export function registerRoutes(router, root, projectName) {
             sendJson(res, { error: "key must be non-empty string" }, 400);
             return;
         }
-        if (body.provider !== "anthropic" && body.provider !== "openrouter") {
+        if (typeof body.provider !== "string" || !isProviderId(body.provider)) {
             sendJson(res, { error: `unknown provider: ${String(body.provider)}` }, 400);
             return;
         }
+        const providerId = body.provider;
         try {
-            const saved = await settingsStore.saveKey(body.provider, body.key.trim(), body.tier ?? "browser");
-            // Prefix hint — non-blocking, purely informational
-            let warning;
-            if (body.provider === "anthropic" && !body.key.startsWith("sk-ant-")) {
-                warning = "key doesn't match typical Anthropic prefix sk-ant-";
-            }
-            else if (body.provider === "openrouter" && !body.key.startsWith("sk-or-")) {
-                warning = "key doesn't match typical OpenRouter prefix sk-or-";
-            }
+            const saved = await settingsStore.saveKey(providerId, body.key.trim());
+            // Prefix hint — non-blocking, purely informational. Sourced from PROVIDERS
+            // so adding a provider later automatically extends the warning.
+            const descriptor = getProviderById(providerId);
+            const warning = descriptor.keyPrefix && !body.key.startsWith(descriptor.keyPrefix)
+                ? `key doesn't match typical ${descriptor.label} prefix ${descriptor.keyPrefix}`
+                : undefined;
             sendJson(res, {
                 ok: true,
                 updatedAt: saved.updatedAt,
-                tier: saved.tier,
                 available: true,
                 ...(warning ? { warning } : {}),
             });
@@ -851,15 +919,51 @@ export function registerRoutes(router, root, projectName) {
             sendJson(res, { error: err.message }, 500);
         }
     });
-    router.delete("/api/settings/keys/:provider", async (req, res) => {
-        const provider = req.params?.provider;
-        if (provider !== "anthropic" && provider !== "openrouter") {
+    router.delete("/api/settings/keys/:provider", async (_req, res, params) => {
+        const provider = params.provider;
+        if (typeof provider !== "string" || !isProviderId(provider)) {
             sendJson(res, { error: `unknown provider: ${String(provider)}` }, 400);
             return;
         }
         await settingsStore.removeKey(provider);
         sendJson(res, { ok: true });
     });
+    // Migration — one-shot copy from pre-0702 macOS Keychain into the file store.
+    // Non-Darwin platforms short-circuit inside the migrator (no spawn).
+    router.get("/api/settings/migration-status", async (_req, res) => {
+        try {
+            const migrator = new DarwinKeychainMigrator();
+            const availability = await migrator.available();
+            sendJson(res, {
+                hasLegacyKeys: availability.hasLegacyKeys,
+                providers: availability.providers,
+                ackStatus: availability.ackStatus ?? null,
+            });
+        }
+        catch (err) {
+            sendJson(res, { error: err.message }, 500);
+        }
+    });
+    router.post("/api/settings/migration/perform", async (_req, res) => {
+        try {
+            const migrator = new DarwinKeychainMigrator();
+            const result = await migrator.migrate();
+            sendJson(res, { migrated: result.migrated });
+        }
+        catch (err) {
+            sendJson(res, { error: err.message }, 500);
+        }
+    });
+    router.post("/api/settings/migration/acknowledge", async (_req, res) => {
+        try {
+            const migrator = new DarwinKeychainMigrator();
+            await migrator.acknowledge();
+            sendJson(res, { ok: true });
+        }
+        catch (err) {
+            sendJson(res, { error: err.message }, 500);
+        }
+    });
     // Config — expose current provider/model + available providers + project
     // IMPORTANT: Return raw model IDs (e.g. "sonnet"), NOT display models
     // (e.g. "claude-sonnet"). The frontend round-trips config.model back to
@@ -1060,49 +1164,67 @@ export function registerRoutes(router, root, projectName) {
     // MUST be registered BEFORE the /:plugin/:skill catch-all below.
     // -------------------------------------------------------------------------
     const PLATFORM_BASE = "https://verified-skill.com";
-    /** Resolve plugin/skill to full hierarchical API name using lockfile source. */
+    /**
+     * Resolve plugin/skill to full hierarchical API name. Lockfile path first
+     * (installed skills); falls back to authored-skill discovery on disk +
+     * git remote parse for skills authored in this repo. See skill-name-resolver.ts.
+     */
     function resolveSkillApiName(skill) {
-        const lock = readLockfile();
-        if (!lock)
-            return skill;
-        const entry = lock.skills[skill];
-        if (!entry?.source)
-            return skill;
-        const parsed = parseSource(entry.source);
-        if (parsed.type === "github" || parsed.type === "github-plugin" || parsed.type === "marketplace") {
-            return `${parsed.owner}/${parsed.repo}/${skill}`;
-        }
-        return skill;
+        return resolveSkillApiNameImpl(skill, root);
     }
-    // T-009: Versions proxy route
+    // T-009 (proxy) + 0707 T-021 (harden): Versions endpoint
+    //
+    // Envelope:  { versions: VersionEntry[], count: number, source: "platform" | "none" }
+    //   - source:"platform" → remote Verified-Skill platform responded
+    //   - source:"none"     → skill has no VCS surface (local fixture, platform
+    //                         unreachable, or platform returned non-OK). In this
+    //                         case the `X-Skill-VCS: unavailable` response header
+    //                         is also emitted so the UI can badge the skill as
+    //                         "no version history" without treating it as an error.
+    //
+    // Never returns 5xx for the "no VCS surface" case — that is normal empty state.
     router.get("/api/skills/:plugin/:skill/versions", async (req, res, params) => {
-        const fullName = resolveSkillApiName(params.skill);
+        const fullName = await resolveSkillApiName(params.skill);
         const parts = fullName.split("/");
         const apiPath = parts.length === 3
             ? `/api/v1/skills/${parts.map(encodeURIComponent).join("/")}/versions`
             : `/api/v1/skills/${encodeURIComponent(fullName)}/versions`;
+        const emptyEnvelope = () => {
+            res.setHeader("X-Skill-VCS", "unavailable");
+            sendJson(res, { versions: [], count: 0, source: "none" }, 200, req);
+        };
+        let fetchResp;
         try {
-            const resp = await fetch(`${PLATFORM_BASE}${apiPath}`, {
+            fetchResp = await fetch(`${PLATFORM_BASE}${apiPath}`, {
                 signal: AbortSignal.timeout(10_000),
             });
-            if (!resp.ok) {
-                sendJson(res, { error: "Platform API unavailable" }, 502, req);
-                return;
-            }
-            const data = (await resp.json());
-            const versions = Array.isArray(data.versions) ? data.versions : [];
-            // Enrich with isInstalled from lockfile
-            const lock = readLockfile();
-            const installedVersion = lock?.skills[params.skill]?.version;
-            const enriched = versions.map((v) => ({
-                ...v,
-                isInstalled: installedVersion ? v.version === installedVersion : undefined,
-            }));
-            sendJson(res, enriched, 200, req);
         }
         catch {
-            sendJson(res, { error: "Platform API unavailable" }, 502, req);
+            // Network failure / timeout / no VCS surface → empty envelope, not 502.
+            emptyEnvelope();
+            return;
+        }
+        if (!fetchResp.ok) {
+            emptyEnvelope();
+            return;
+        }
+        let data;
+        try {
+            data = (await fetchResp.json());
+        }
+        catch {
+            emptyEnvelope();
+            return;
         }
+        const versions = Array.isArray(data.versions) ? data.versions : [];
+        // Enrich with isInstalled from lockfile
+        const lock = readLockfile();
+        const installedVersion = lock?.skills[params.skill]?.version;
+        const enriched = versions.map((v) => ({
+            ...v,
+            isInstalled: installedVersion ? v.version === installedVersion : undefined,
+        }));
+        sendJson(res, { versions: enriched, count: enriched.length, source: "platform" }, 200, req);
     });
     // T-010: Diff proxy route
     router.get("/api/skills/:plugin/:skill/versions/diff", async (req, res, params) => {
@@ -1113,11 +1235,11 @@ export function registerRoutes(router, root, projectName) {
             sendJson(res, { error: "Missing required query params: from and to" }, 400, req);
             return;
         }
-        const fullName = resolveSkillApiName(params.skill);
+        const fullName = await resolveSkillApiName(params.skill);
         const parts = fullName.split("/");
         const basePath = parts.length === 3
-            ? `/api/v1/skills/${parts.map(encodeURIComponent).join("/")}/versions`
-            : `/api/v1/skills/${encodeURIComponent(fullName)}/versions`;
+            ? `/api/v1/skills/${parts.map(encodeURIComponent).join("/")}/versions/diff`
+            : `/api/v1/skills/${encodeURIComponent(fullName)}/versions/diff`;
         try {
             const resp = await fetch(`${PLATFORM_BASE}${basePath}?from=${encodeURIComponent(from)}&to=${encodeURIComponent(to)}`, { signal: AbortSignal.timeout(10_000) });
             if (!resp.ok) {
@@ -1357,7 +1479,10 @@ export function registerRoutes(router, root, projectName) {
             return;
         }
         try {
-            rmSync(skillDir, { recursive: true, force: true });
+            // 0722: route to OS trash (Trash / Recycle Bin / XDG) instead of hard delete
+            // so accidental deletes are recoverable from the user's native Trash app.
+            const trash = (await import("trash")).default;
+            await trash([skillDir]);
             sendJson(res, { ok: true, deleted: `${params.plugin}/${params.skill}` }, 200, req);
         }
         catch (err) {
@@ -1378,20 +1503,35 @@ export function registerRoutes(router, root, projectName) {
         sendJson(res, { description, rawContent: skillContent }, 200, req);
     });
     // Get evals.json
+    //
+    // Envelope (0707 T-023 / T-025):
+    //   200 { exists: false, evals: [] }      when evals.json is missing
+    //   200 { exists: true,  ...EvalsFile }   when valid (evals[], skill_name, …)
+    //   422 { error, errors[] }               when malformed / fails schema
+    //   500 { error }                         only for unexpected I/O failures
+    //
+    // "missing" must be distinguishable from "malformed" so the UI can render
+    // the former as an empty-state CTA ("Create evals") and the latter as a
+    // validation error panel. The earlier 400 status conflated malformed with
+    // generic client errors — 422 Unprocessable Entity is the correct semantic
+    // for well-formed requests whose payload fails validation.
     router.get("/api/skills/:plugin/:skill/evals", async (req, res, params) => {
         const skillDir = resolveSkillDir(root, params.plugin, params.skill);
         const evalsPath = join(skillDir, "evals", "evals.json");
         if (!existsSync(evalsPath)) {
-            sendJson(res, { error: "No evals.json found" }, 404, req);
+            // 0704: 200 empty-state sentinel (was 404) — "no evals.json yet" is
+            // authoring empty state, not an error the client has to filter.
+            sendJson(res, { exists: false, evals: [] }, 200, req);
             return;
         }
         try {
             const evals = loadAndValidateEvals(skillDir);
-            sendJson(res, evals, 200, req);
+            sendJson(res, { exists: true, ...evals }, 200, req);
         }
         catch (err) {
             if (err instanceof EvalValidationError) {
-                sendJson(res, { error: err.message, errors: err.errors }, 400, req);
+                // 0707 T-023: malformed → 422 (was 400).
+                sendJson(res, { error: err.message, errors: err.errors }, 422, req);
             }
             else {
                 sendJson(res, { error: String(err.message) }, 500, req);
@@ -2022,14 +2162,19 @@ export function registerRoutes(router, root, projectName) {
         sendJson(res, stats, 200, req);
     });
     // Get latest benchmark
+    //
+    // Envelope (0707 T-022 / T-025):
+    //   200 null                 when no benchmark has been persisted
+    //   200 <BenchmarkResult>    when a benchmark exists
+    //
+    // Always 200 — a missing benchmark is normal empty state, not an error.
+    // Works for any plugin slug (including dashes like `google-workspace`)
+    // because routing uses `[^/]+` groups (see router.ts T-020).
     router.get("/api/skills/:plugin/:skill/benchmark/latest", async (req, res, params) => {
+        // 0704: always 200; body null = no benchmark persisted yet.
         const skillDir = resolveSkillDir(root, params.plugin, params.skill);
         const benchmark = await readBenchmark(skillDir);
-        if (!benchmark) {
-            sendJson(res, { error: "No benchmark found" }, 404, req);
-            return;
-        }
-        sendJson(res, benchmark, 200, req);
+        sendJson(res, benchmark ?? null, 200, req);
     });
     // Run activation test (SSE)
     router.post("/api/skills/:plugin/:skill/activation-test", async (req, res, params) => {
@@ -2148,10 +2293,33 @@ Return ONLY the JSON lines, no other text.`;
         }
     });
     // List activation test history (summaries only)
+    //
+    // Envelope (0707 T-024 / T-025):
+    //   200 { runs: [],     count: 0 }          when the history log doesn't
+    //                                           exist / the skill has never been
+    //                                           activation-tested. listActivation-
+    //                                           Runs() catches ENOENT internally
+    //                                           and returns [] — see activation-
+    //                                           history.ts readHistoryFile().
+    //   200 { runs: [...],  count: <N> }        when entries exist
+    //   500 { error }                           only for unexpected I/O failures
+    //                                           (ENOENT is explicitly not one)
     router.get("/api/skills/:plugin/:skill/activation-history", async (req, res, params) => {
         const skillDir = resolveSkillDir(root, params.plugin, params.skill);
-        const runs = await listActivationRuns(skillDir);
-        sendJson(res, { runs }, 200, req);
+        try {
+            const runs = await listActivationRuns(skillDir);
+            sendJson(res, { runs, count: runs.length }, 200, req);
+        }
+        catch (err) {
+            const code = err?.code;
+            if (code === "ENOENT") {
+                // Defensive — listActivationRuns already swallows ENOENT, but in case
+                // a future refactor propagates it we still return the empty envelope.
+                sendJson(res, { runs: [], count: 0 }, 200, req);
+                return;
+            }
+            sendJson(res, { error: err.message }, 500, req);
+        }
     });
     // Get full activation test run by ID
     router.get("/api/skills/:plugin/:skill/activation-history/:runId", async (req, res, params) => {