vskill 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents.json +1 -1
- package/dist/eval-server/api-routes.js +18 -7
- package/dist/eval-server/api-routes.js.map +1 -1
- package/dist/eval-ui/assets/{CreateSkillPage-8zJnqwjr.js → CreateSkillPage-DN-DGWb_.js} +5 -5
- package/dist/eval-ui/assets/{FindSkillsPalette-C_GiEl_q.js → FindSkillsPalette-_QIpTcPE.js} +2 -2
- package/dist/eval-ui/assets/{SearchPaletteCore-D1atnd7R.js → SearchPaletteCore-CThylxw2.js} +2 -2
- package/dist/eval-ui/assets/{SkillDetailPanel-Cg8-E-fm.js → SkillDetailPanel-CYt2aARB.js} +1 -1
- package/dist/eval-ui/assets/{UpdateDropdown-DvdC8JlL.js → UpdateDropdown-CO0ZyyC_.js} +1 -1
- package/dist/eval-ui/assets/{index-Dr23ls2i.js → index-vzRnBcdU.js} +41 -41
- package/dist/eval-ui/index.html +1 -1
- package/package.json +1 -1
package/agents.json
CHANGED
|
@@ -951,6 +951,17 @@ function getEffectiveRawModel() {
|
|
|
951
951
|
function getClient() {
|
|
952
952
|
return createLlmClient(currentOverrides);
|
|
953
953
|
}
|
|
954
|
+
// Per-request client: prefer body-supplied provider/model, then session globals.
|
|
955
|
+
// Lets the frontend send the user's selected model with each request so historical
|
|
956
|
+
// runs reliably reflect the picker value rather than a stale session default.
|
|
957
|
+
function clientFromBody(body) {
|
|
958
|
+
const reqProvider = typeof body?.provider === "string" ? body.provider : undefined;
|
|
959
|
+
const reqModel = typeof body?.model === "string" ? body.model : undefined;
|
|
960
|
+
const provider = (reqProvider || currentOverrides.provider || "claude-cli");
|
|
961
|
+
const model = reqModel || currentOverrides.model;
|
|
962
|
+
const client = createLlmClient({ provider, model });
|
|
963
|
+
return { client, provider };
|
|
964
|
+
}
|
|
954
965
|
/** Derive sidebar badge status from benchmark + current eval IDs. */
|
|
955
966
|
function computeBenchmarkStatus(benchmark, evalIds, hasEvals) {
|
|
956
967
|
if (!benchmark)
|
|
@@ -2549,7 +2560,7 @@ export function registerRoutes(router, root, projectName) {
|
|
|
2549
2560
|
const evals = loadAndValidateEvals(skillDir);
|
|
2550
2561
|
const skillMdPath = join(skillDir, "SKILL.md");
|
|
2551
2562
|
const skillContent = existsSync(skillMdPath) ? readFileSync(skillMdPath, "utf-8") : "";
|
|
2552
|
-
const client =
|
|
2563
|
+
const { client, provider: effectiveProvider } = clientFromBody(body);
|
|
2553
2564
|
const systemPrompt = buildEvalSystemPrompt(skillContent);
|
|
2554
2565
|
// Create separate judge client if judgeModel is specified
|
|
2555
2566
|
let judgeClient;
|
|
@@ -2570,7 +2581,7 @@ export function registerRoutes(router, root, projectName) {
|
|
|
2570
2581
|
}
|
|
2571
2582
|
await runBenchmarkSSE({
|
|
2572
2583
|
res, skillDir, skillName: evals.skill_name, systemPrompt,
|
|
2573
|
-
runType: "benchmark", provider:
|
|
2584
|
+
runType: "benchmark", provider: effectiveProvider,
|
|
2574
2585
|
evalCases: evals.evals, filterIds, client, judgeClient, judgeCache,
|
|
2575
2586
|
isAborted: () => aborted, concurrency,
|
|
2576
2587
|
});
|
|
@@ -2592,11 +2603,11 @@ export function registerRoutes(router, root, projectName) {
|
|
|
2592
2603
|
initSSE(res, req);
|
|
2593
2604
|
try {
|
|
2594
2605
|
const evals = loadAndValidateEvals(skillDir);
|
|
2595
|
-
const client =
|
|
2606
|
+
const { client, provider: effectiveProvider } = clientFromBody(body);
|
|
2596
2607
|
await runBenchmarkSSE({
|
|
2597
2608
|
res, skillDir, skillName: evals.skill_name,
|
|
2598
2609
|
systemPrompt: "You are a helpful AI assistant.",
|
|
2599
|
-
runType: "baseline", provider:
|
|
2610
|
+
runType: "baseline", provider: effectiveProvider,
|
|
2600
2611
|
evalCases: evals.evals, filterIds, client, isAborted: () => aborted,
|
|
2601
2612
|
});
|
|
2602
2613
|
}
|
|
@@ -2636,14 +2647,14 @@ export function registerRoutes(router, root, projectName) {
|
|
|
2636
2647
|
}
|
|
2637
2648
|
const skillMdPath = join(skillDir, "SKILL.md");
|
|
2638
2649
|
const skillContent = existsSync(skillMdPath) ? readFileSync(skillMdPath, "utf-8") : "";
|
|
2639
|
-
const client =
|
|
2650
|
+
const { client, provider: effectiveProvider } = clientFromBody(body);
|
|
2640
2651
|
const systemPrompt = isBaseline
|
|
2641
2652
|
? buildBaselineSystemPrompt()
|
|
2642
2653
|
: buildEvalSystemPrompt(skillContent);
|
|
2643
2654
|
await sem.acquire();
|
|
2644
2655
|
const benchCase = await runSingleCaseSSE({
|
|
2645
2656
|
res, evalCase, systemPrompt, client, isAborted: () => aborted,
|
|
2646
|
-
provider:
|
|
2657
|
+
provider: effectiveProvider,
|
|
2647
2658
|
});
|
|
2648
2659
|
if (!released) {
|
|
2649
2660
|
released = true;
|
|
@@ -2659,7 +2670,7 @@ export function registerRoutes(router, root, projectName) {
|
|
|
2659
2670
|
cases: [benchCase],
|
|
2660
2671
|
overall_pass_rate: benchCase.pass_rate,
|
|
2661
2672
|
type: isBaseline ? "baseline" : "benchmark",
|
|
2662
|
-
provider:
|
|
2673
|
+
provider: effectiveProvider,
|
|
2663
2674
|
totalDurationMs: benchCase.durationMs ?? 0,
|
|
2664
2675
|
totalInputTokens: benchCase.inputTokens ?? null,
|
|
2665
2676
|
totalOutputTokens: benchCase.outputTokens ?? null,
|