vskill 0.5.105 → 0.5.107
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents.json +1 -1
- package/dist/eval-server/api-routes.d.ts +8 -0
- package/dist/eval-server/api-routes.js +56 -25
- package/dist/eval-server/api-routes.js.map +1 -1
- package/dist/eval-server/eval-server.js +11 -0
- package/dist/eval-server/eval-server.js.map +1 -1
- package/dist/eval-server/integration-routes.js +7 -0
- package/dist/eval-server/integration-routes.js.map +1 -1
- package/dist/eval-server/platform-proxy.d.ts +18 -0
- package/dist/eval-server/platform-proxy.js +153 -0
- package/dist/eval-server/platform-proxy.js.map +1 -0
- package/dist/eval-server/skill-name-resolver.d.ts +35 -0
- package/dist/eval-server/skill-name-resolver.js +146 -0
- package/dist/eval-server/skill-name-resolver.js.map +1 -0
- package/dist/eval-ui/assets/{CommandPalette-DAWyTgXL.js → CommandPalette-sazEtk0a.js} +1 -1
- package/dist/eval-ui/assets/CreateSkillPage-B5qENwBj.js +12 -0
- package/dist/eval-ui/assets/{UpdateDropdown-6bfwsX0-.js → UpdateDropdown-Ce9LXOxb.js} +1 -1
- package/dist/eval-ui/assets/index-BBoVqs6V.css +1 -0
- package/dist/eval-ui/assets/index-vn5bFfrb.js +102 -0
- package/dist/eval-ui/index.html +2 -2
- package/package.json +4 -2
- package/dist/eval-ui/assets/CreateSkillPage-D5xmvnzV.js +0 -12
- package/dist/eval-ui/assets/index-C0Gc_4KC.css +0 -1
- package/dist/eval-ui/assets/index-C_-YXE6O.js +0 -102
package/agents.json
CHANGED
|
@@ -99,6 +99,13 @@ export declare function deriveSourceAgent(skillDir: string, root: string, origin
|
|
|
99
99
|
* response never fails because of a single bad skill.
|
|
100
100
|
*/
|
|
101
101
|
export declare function buildSkillMetadata(skillDir: string, origin: "source" | "installed", root: string): SkillMetadataFields;
|
|
102
|
+
/**
|
|
103
|
+
* 0682 F-001 — Test helper. Resets `currentOverrides` to the default and
|
|
104
|
+
* clears the `studioLoaded` flag so subsequent /api/config calls re-attempt
|
|
105
|
+
* loadStudioSelection. Production code never needs this; it exists solely so
|
|
106
|
+
* vitest can simulate a fresh server boot per-case.
|
|
107
|
+
*/
|
|
108
|
+
export declare function resetStudioRestoreState(): void;
|
|
102
109
|
interface ModelOption {
|
|
103
110
|
id: string;
|
|
104
111
|
label: string;
|
|
@@ -106,6 +113,7 @@ interface ModelOption {
|
|
|
106
113
|
prompt: number;
|
|
107
114
|
completion: number;
|
|
108
115
|
};
|
|
116
|
+
resolvedId?: string;
|
|
109
117
|
}
|
|
110
118
|
export declare const PROVIDER_MODELS: Record<ProviderName, ModelOption[]>;
|
|
111
119
|
type OpenRouterCacheEntry = {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// ---------------------------------------------------------------------------
|
|
2
2
|
// api-routes.ts -- REST API route handlers for the eval UI
|
|
3
3
|
// ---------------------------------------------------------------------------
|
|
4
|
-
import { readFileSync, writeFileSync, mkdirSync, existsSync,
|
|
4
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync, readdirSync, statSync } from "node:fs";
|
|
5
5
|
import { execSync } from "node:child_process";
|
|
6
6
|
import { join, resolve, dirname } from "node:path";
|
|
7
7
|
import { homedir } from "node:os";
|
|
@@ -10,7 +10,7 @@ import { initSSE, sendSSE, sendSSEDone, withHeartbeat, startDynamicHeartbeat } f
|
|
|
10
10
|
import { dataEventBus, emitDataEvent } from "./data-events.js";
|
|
11
11
|
import { classifyError } from "./error-classifier.js";
|
|
12
12
|
import { readLockfile } from "../lockfile/lockfile.js";
|
|
13
|
-
import {
|
|
13
|
+
import { resolveSkillApiName as resolveSkillApiNameImpl } from "./skill-name-resolver.js";
|
|
14
14
|
import { runBenchmarkSSE, runSingleCaseSSE } from "./benchmark-runner.js";
|
|
15
15
|
import { getSkillSemaphore } from "./concurrency.js";
|
|
16
16
|
import { resolveSkillDir } from "./skill-resolver.js";
|
|
@@ -18,7 +18,7 @@ import { classifyOrigin, scanSkillsTriScope } from "../eval/skill-scanner.js";
|
|
|
18
18
|
import { scanInstalledPluginSkills, scanAuthoredPluginSkills, } from "../eval/plugin-scanner.js";
|
|
19
19
|
import { resolveGlobalSkillsDir } from "../eval/path-utils.js";
|
|
20
20
|
import { loadAndValidateEvals, EvalValidationError } from "../eval/schema.js";
|
|
21
|
-
import { ANTHROPIC_CATALOG_SNAPSHOT } from "../eval/anthropic-catalog.js";
|
|
21
|
+
import { ANTHROPIC_CATALOG_SNAPSHOT, findAnthropicModel } from "../eval/anthropic-catalog.js";
|
|
22
22
|
import { readBenchmark } from "../eval/benchmark.js";
|
|
23
23
|
import { writeHistoryEntry, listHistory, readHistoryEntry, computeRegressions, deleteHistoryEntry, getCaseHistory, computeStats } from "../eval/benchmark-history.js";
|
|
24
24
|
import { judgeAssertion } from "../eval/judge.js";
|
|
@@ -455,6 +455,25 @@ export function buildSkillMetadata(skillDir, origin, root) {
|
|
|
455
455
|
// (which only matters for the `vskill eval run` CLI command).
|
|
456
456
|
// ---------------------------------------------------------------------------
|
|
457
457
|
let currentOverrides = { provider: "claude-cli" };
|
|
458
|
+
// 0682 F-001 — Tracks whether the persistent studio.json selection has been
|
|
459
|
+
// loaded for this process lifetime. Initialized to false; set to true once
|
|
460
|
+
// loadStudioSelection(root) has been attempted (regardless of whether a file
|
|
461
|
+
// existed). Without this flag, the prior gate `if (!currentOverrides.provider)`
|
|
462
|
+
// was permanently false (the default `{ provider: "claude-cli" }` already
|
|
463
|
+
// populates `provider`), so the persisted selection at .vskill/studio.json
|
|
464
|
+
// was silently discarded on every server boot — a CRITICAL regression of
|
|
465
|
+
// AC-US1-03 / FR-005.
|
|
466
|
+
let studioLoaded = false;
|
|
467
|
+
/**
|
|
468
|
+
* 0682 F-001 — Test helper. Resets `currentOverrides` to the default and
|
|
469
|
+
* clears the `studioLoaded` flag so subsequent /api/config calls re-attempt
|
|
470
|
+
* loadStudioSelection. Production code never needs this; it exists solely so
|
|
471
|
+
* vitest can simulate a fresh server boot per-case.
|
|
472
|
+
*/
|
|
473
|
+
export function resetStudioRestoreState() {
|
|
474
|
+
currentOverrides = { provider: "claude-cli" };
|
|
475
|
+
studioLoaded = false;
|
|
476
|
+
}
|
|
458
477
|
/** Return the effective raw model ID (suitable for round-tripping via the API). */
|
|
459
478
|
function getEffectiveRawModel() {
|
|
460
479
|
if (currentOverrides.model)
|
|
@@ -496,11 +515,21 @@ function buildAnthropicProviderModels() {
|
|
|
496
515
|
},
|
|
497
516
|
}));
|
|
498
517
|
}
|
|
518
|
+
function aliasInfo(alias, fallbackLabel) {
|
|
519
|
+
const entry = findAnthropicModel(alias);
|
|
520
|
+
if (!entry)
|
|
521
|
+
return { label: fallbackLabel };
|
|
522
|
+
return { label: entry.displayName, resolvedId: entry.id };
|
|
523
|
+
}
|
|
499
524
|
export const PROVIDER_MODELS = {
|
|
525
|
+
// Opus first so it is the default when no override is set
|
|
526
|
+
// (getEffectiveRawModel returns models[0]). Labels come from the catalog so
|
|
527
|
+
// the picker shows the exact dated version (e.g. "Claude Opus 4.7"), not the
|
|
528
|
+
// bare family name — keeps the Studio truthful when a model is bumped.
|
|
500
529
|
"claude-cli": [
|
|
501
|
-
{ id: "
|
|
502
|
-
{ id: "
|
|
503
|
-
{ id: "haiku",
|
|
530
|
+
{ id: "opus", ...aliasInfo("opus", "Claude Opus") },
|
|
531
|
+
{ id: "sonnet", ...aliasInfo("sonnet", "Claude Sonnet") },
|
|
532
|
+
{ id: "haiku", ...aliasInfo("haiku", "Claude Haiku") },
|
|
504
533
|
],
|
|
505
534
|
"anthropic": buildAnthropicProviderModels(),
|
|
506
535
|
"ollama": [
|
|
@@ -959,8 +988,13 @@ export function registerRoutes(router, root, projectName) {
|
|
|
959
988
|
// (e.g. "claude-sonnet"). The frontend round-trips config.model back to
|
|
960
989
|
// generate-evals and other endpoints, so it must be a valid CLI model ID.
|
|
961
990
|
router.get("/api/config", async (_req, res) => {
|
|
962
|
-
//
|
|
963
|
-
|
|
991
|
+
// 0682 F-001 — Boot-time restoration of .vskill/studio.json selection.
|
|
992
|
+
// Use a dedicated `studioLoaded` flag rather than checking
|
|
993
|
+
// `!currentOverrides.provider` because the module default
|
|
994
|
+
// `{ provider: "claude-cli" }` would make the prior guard permanently
|
|
995
|
+
// false, silently discarding the persisted selection on every boot.
|
|
996
|
+
if (!studioLoaded) {
|
|
997
|
+
studioLoaded = true;
|
|
964
998
|
const stored = loadStudioSelection(root);
|
|
965
999
|
if (stored) {
|
|
966
1000
|
currentOverrides.provider = stored.activeAgent;
|
|
@@ -1154,19 +1188,13 @@ export function registerRoutes(router, root, projectName) {
|
|
|
1154
1188
|
// MUST be registered BEFORE the /:plugin/:skill catch-all below.
|
|
1155
1189
|
// -------------------------------------------------------------------------
|
|
1156
1190
|
const PLATFORM_BASE = "https://verified-skill.com";
|
|
1157
|
-
/**
|
|
1191
|
+
/**
|
|
1192
|
+
* Resolve plugin/skill to full hierarchical API name. Lockfile path first
|
|
1193
|
+
* (installed skills); falls back to authored-skill discovery on disk +
|
|
1194
|
+
* git remote parse for skills authored in this repo. See skill-name-resolver.ts.
|
|
1195
|
+
*/
|
|
1158
1196
|
function resolveSkillApiName(skill) {
|
|
1159
|
-
|
|
1160
|
-
if (!lock)
|
|
1161
|
-
return skill;
|
|
1162
|
-
const entry = lock.skills[skill];
|
|
1163
|
-
if (!entry?.source)
|
|
1164
|
-
return skill;
|
|
1165
|
-
const parsed = parseSource(entry.source);
|
|
1166
|
-
if (parsed.type === "github" || parsed.type === "github-plugin" || parsed.type === "marketplace") {
|
|
1167
|
-
return `${parsed.owner}/${parsed.repo}/${skill}`;
|
|
1168
|
-
}
|
|
1169
|
-
return skill;
|
|
1197
|
+
return resolveSkillApiNameImpl(skill, root);
|
|
1170
1198
|
}
|
|
1171
1199
|
// T-009 (proxy) + 0707 T-021 (harden): Versions endpoint
|
|
1172
1200
|
//
|
|
@@ -1180,7 +1208,7 @@ export function registerRoutes(router, root, projectName) {
|
|
|
1180
1208
|
//
|
|
1181
1209
|
// Never returns 5xx for the "no VCS surface" case — that is normal empty state.
|
|
1182
1210
|
router.get("/api/skills/:plugin/:skill/versions", async (req, res, params) => {
|
|
1183
|
-
const fullName = resolveSkillApiName(params.skill);
|
|
1211
|
+
const fullName = await resolveSkillApiName(params.skill);
|
|
1184
1212
|
const parts = fullName.split("/");
|
|
1185
1213
|
const apiPath = parts.length === 3
|
|
1186
1214
|
? `/api/v1/skills/${parts.map(encodeURIComponent).join("/")}/versions`
|
|
@@ -1231,11 +1259,11 @@ export function registerRoutes(router, root, projectName) {
|
|
|
1231
1259
|
sendJson(res, { error: "Missing required query params: from and to" }, 400, req);
|
|
1232
1260
|
return;
|
|
1233
1261
|
}
|
|
1234
|
-
const fullName = resolveSkillApiName(params.skill);
|
|
1262
|
+
const fullName = await resolveSkillApiName(params.skill);
|
|
1235
1263
|
const parts = fullName.split("/");
|
|
1236
1264
|
const basePath = parts.length === 3
|
|
1237
|
-
? `/api/v1/skills/${parts.map(encodeURIComponent).join("/")}/versions`
|
|
1238
|
-
: `/api/v1/skills/${encodeURIComponent(fullName)}/versions`;
|
|
1265
|
+
? `/api/v1/skills/${parts.map(encodeURIComponent).join("/")}/versions/diff`
|
|
1266
|
+
: `/api/v1/skills/${encodeURIComponent(fullName)}/versions/diff`;
|
|
1239
1267
|
try {
|
|
1240
1268
|
const resp = await fetch(`${PLATFORM_BASE}${basePath}?from=${encodeURIComponent(from)}&to=${encodeURIComponent(to)}`, { signal: AbortSignal.timeout(10_000) });
|
|
1241
1269
|
if (!resp.ok) {
|
|
@@ -1475,7 +1503,10 @@ export function registerRoutes(router, root, projectName) {
|
|
|
1475
1503
|
return;
|
|
1476
1504
|
}
|
|
1477
1505
|
try {
|
|
1478
|
-
|
|
1506
|
+
// 0722: route to OS trash (Trash / Recycle Bin / XDG) instead of hard delete
|
|
1507
|
+
// so accidental deletes are recoverable from the user's native Trash app.
|
|
1508
|
+
const trash = (await import("trash")).default;
|
|
1509
|
+
await trash([skillDir]);
|
|
1479
1510
|
sendJson(res, { ok: true, deleted: `${params.plugin}/${params.skill}` }, 200, req);
|
|
1480
1511
|
}
|
|
1481
1512
|
catch (err) {
|