vskill 0.5.141 → 0.5.143
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents.json +1 -1
- package/dist/eval/activation-tester.d.ts +5 -0
- package/dist/eval/activation-tester.js +25 -5
- package/dist/eval/activation-tester.js.map +1 -1
- package/dist/eval/test-case-parser.d.ts +8 -0
- package/dist/eval/test-case-parser.js +74 -0
- package/dist/eval/test-case-parser.js.map +1 -0
- package/dist/eval-server/api-routes.d.ts +18 -0
- package/dist/eval-server/api-routes.js +127 -0
- package/dist/eval-server/api-routes.js.map +1 -1
- package/dist/eval-ui/assets/{CommandPalette-COMOl8Vg.js → CommandPalette-CEp1wNr0.js} +1 -1
- package/dist/eval-ui/assets/{CreateSkillPage-DqUUj-0q.js → CreateSkillPage-DZSf85Db.js} +1 -1
- package/dist/eval-ui/assets/{FindSkillsPalette-Cy98Ygh7.js → FindSkillsPalette-CvN0J9-i.js} +2 -2
- package/dist/eval-ui/assets/{SearchPaletteCore-CV6YIjYd.js → SearchPaletteCore-BLRubqQS.js} +1 -1
- package/dist/eval-ui/assets/{SkillDetailPanel-ctAUsQxo.js → SkillDetailPanel-Bb0NxXCs.js} +1 -1
- package/dist/eval-ui/assets/UpdateDropdown-Bl3VyBRb.js +1 -0
- package/dist/eval-ui/assets/index-5lXKRaDO.js +122 -0
- package/dist/eval-ui/assets/{index-ByzTygib.css → index-CycZyHaL.css} +1 -1
- package/dist/eval-ui/index.html +2 -2
- package/package.json +3 -2
- package/dist/eval-ui/assets/UpdateDropdown-WsXxpeur.js +0 -1
- package/dist/eval-ui/assets/index-B1nvsGfw.js +0 -110
package/agents.json
CHANGED
|
@@ -7,6 +7,7 @@ export interface ActivationPrompt {
|
|
|
7
7
|
prompt: string;
|
|
8
8
|
expected: "should_activate" | "should_not_activate" | "auto";
|
|
9
9
|
}
|
|
10
|
+
export type Verdict = "ok" | "scope_warning" | "drift_warning";
|
|
10
11
|
export interface ActivationResult {
|
|
11
12
|
prompt: string;
|
|
12
13
|
expected: "should_activate" | "should_not_activate";
|
|
@@ -15,6 +16,7 @@ export interface ActivationResult {
|
|
|
15
16
|
reasoning: string;
|
|
16
17
|
classification: "TP" | "TN" | "FP" | "FN";
|
|
17
18
|
autoClassified?: boolean;
|
|
19
|
+
verdict: Verdict;
|
|
18
20
|
}
|
|
19
21
|
export interface ActivationSummary {
|
|
20
22
|
results: ActivationResult[];
|
|
@@ -26,6 +28,9 @@ export interface ActivationSummary {
|
|
|
26
28
|
tn: number;
|
|
27
29
|
fp: number;
|
|
28
30
|
fn: number;
|
|
31
|
+
scopeWarnings: number;
|
|
32
|
+
driftWarnings: number;
|
|
29
33
|
autoClassifiedCount: number;
|
|
30
34
|
}
|
|
31
35
|
export declare function testActivation(skillDescription: string, prompts: ActivationPrompt[], client: LlmClient, onResult?: (result: ActivationResult) => void, meta?: SkillMeta, onProgress?: (phase: "classifying", index: number, total: number) => void): Promise<ActivationSummary>;
|
|
36
|
+
export declare function computeVerdict(autoClassified: boolean | undefined, expected: "should_activate" | "should_not_activate", actual: boolean): Verdict;
|
|
@@ -79,6 +79,7 @@ Would this user prompt trigger this skill?`;
|
|
|
79
79
|
? json.confidence
|
|
80
80
|
: "low";
|
|
81
81
|
const classification = classifyResult(p.expected, activate);
|
|
82
|
+
const verdict = computeVerdict(p.autoClassified, p.expected, activate);
|
|
82
83
|
const result = {
|
|
83
84
|
prompt: p.prompt,
|
|
84
85
|
expected: p.expected,
|
|
@@ -87,6 +88,7 @@ Would this user prompt trigger this skill?`;
|
|
|
87
88
|
reasoning: String(json.reasoning || ""),
|
|
88
89
|
classification,
|
|
89
90
|
autoClassified: p.autoClassified,
|
|
91
|
+
verdict,
|
|
90
92
|
};
|
|
91
93
|
results.push(result);
|
|
92
94
|
onResult?.(result);
|
|
@@ -100,6 +102,7 @@ Would this user prompt trigger this skill?`;
|
|
|
100
102
|
reasoning: `Error: ${err instanceof Error ? err.message : String(err)}`,
|
|
101
103
|
classification: p.expected === "should_activate" ? "FN" : "TN",
|
|
102
104
|
autoClassified: p.autoClassified,
|
|
105
|
+
verdict: "ok",
|
|
103
106
|
};
|
|
104
107
|
results.push(result);
|
|
105
108
|
onResult?.(result);
|
|
@@ -116,22 +119,39 @@ function classifyResult(expected, actual) {
|
|
|
116
119
|
return "TN";
|
|
117
120
|
return "FP";
|
|
118
121
|
}
|
|
122
|
+
// Auto-classified disagreement → soft warning (not a real FP/FN). Manual labels
|
|
123
|
+
// signal user authority; their disagreements remain strict. See increment 0775.
|
|
124
|
+
export function computeVerdict(autoClassified, expected, actual) {
|
|
125
|
+
if (!autoClassified)
|
|
126
|
+
return "ok";
|
|
127
|
+
if (expected === "should_not_activate" && actual)
|
|
128
|
+
return "scope_warning";
|
|
129
|
+
if (expected === "should_activate" && !actual)
|
|
130
|
+
return "drift_warning";
|
|
131
|
+
return "ok";
|
|
132
|
+
}
|
|
119
133
|
function computeSummary(results) {
|
|
120
|
-
const
|
|
121
|
-
const
|
|
122
|
-
const
|
|
123
|
-
const
|
|
134
|
+
const ok = (r) => r.verdict === "ok";
|
|
135
|
+
const tp = results.filter((r) => r.classification === "TP" && ok(r)).length;
|
|
136
|
+
const tn = results.filter((r) => r.classification === "TN" && ok(r)).length;
|
|
137
|
+
const fp = results.filter((r) => r.classification === "FP" && ok(r)).length;
|
|
138
|
+
const fn = results.filter((r) => r.classification === "FN" && ok(r)).length;
|
|
139
|
+
const scopeWarnings = results.filter((r) => r.verdict === "scope_warning").length;
|
|
140
|
+
const driftWarnings = results.filter((r) => r.verdict === "drift_warning").length;
|
|
124
141
|
const total = results.length;
|
|
142
|
+
const scoredTotal = tp + tn + fp + fn;
|
|
125
143
|
return {
|
|
126
144
|
results,
|
|
127
145
|
precision: tp + fp > 0 ? tp / (tp + fp) : 0,
|
|
128
146
|
recall: tp + fn > 0 ? tp / (tp + fn) : 0,
|
|
129
|
-
reliability:
|
|
147
|
+
reliability: scoredTotal > 0 ? (tp + tn) / scoredTotal : 0,
|
|
130
148
|
total,
|
|
131
149
|
tp,
|
|
132
150
|
tn,
|
|
133
151
|
fp,
|
|
134
152
|
fn,
|
|
153
|
+
scopeWarnings,
|
|
154
|
+
driftWarnings,
|
|
135
155
|
autoClassifiedCount: results.filter((r) => r.autoClassified).length,
|
|
136
156
|
};
|
|
137
157
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"activation-tester.js","sourceRoot":"","sources":["../../src/eval/activation-tester.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4EAA4E;AAC5E,8EAA8E;
|
|
1
|
+
{"version":3,"file":"activation-tester.js","sourceRoot":"","sources":["../../src/eval/activation-tester.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4EAA4E;AAC5E,8EAA8E;AA0C9E,MAAM,wBAAwB,GAAG;;;;;;;;;;;;EAY/B,CAAC;AAEH,MAAM,sBAAsB,GAAG;;;wBAGP,CAAC;AAEzB,8EAA8E;AAC9E,kEAAkE;AAClE,8EAA8E;AAE9E,KAAK,UAAU,mBAAmB,CAChC,IAAe,EACf,MAAc,EACd,MAAiB;IAEjB,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,UAAU,IAAI,CAAC,IAAI,WAAW,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,oBAAoB,MAAM,EAAE,CAAC;QAClG,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,sBAAsB,EAAE,UAAU,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,8BAA8B,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QAC7E,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC,CAAC;QAC9C,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,qBAAqB,CAAC;IAClE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,iBAAiB,CAAC;IAC3B,CAAC;AACH,CAAC;AAYD,KAAK,UAAU,cAAc,CAC3B,OAA2B,EAC3B,MAAiB,EACjB,IAAgB,EAChB,UAAyE;IAEzE,MAAM,QAAQ,GAAqB,EAAE,CAAC;IACtC,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IACjF,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;YAC1B,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,QAAQ,GAAG,MAAM,mBAAmB,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;gBACnE,UAAU,EAAE,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;gBACpD,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;YACtE,CAAC;iBAAM,CAAC;gBACN,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,iBAAiB,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;YACzF,CAAC;QACH,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;QACnF,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,8EAA8E;AAC9E,yDAAyD;AACzD,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,gBAAwB,EACxB,OAA2B,EAC3B,MAAiB,EACjB,QAA6C,EAC7C,IAAgB,EAChB,UAAyE;IAEzE,qCAAqC;IACrC,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC;IAEzE,oDAAoD;IACpD,MAAM,OAAO,GAAuB,EAAE,CAAC;IAEvC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG;EACrB,gBAAgB;;;EAGhB,CAAC,CAAC,MAAM;;2CAEiC,CAAC;QAExC,IAAI,CAAC;YACH,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,wBAAwB,EAAE,UAAU,CAAC,CAAC;YACvF,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,8BAA8B,CAAC,IAAI,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;YACrF,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC,CAAC;YAE9C,MAAM,QAAQ,GAAG,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;YACjC,MAAM,UAAU,GAAG,CAAC,MAAM,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC;gBACpE,CAAC,CAAE,IAAI,CAAC,UAAwC;gBAChD,CAAC,CAAC,KAAK,CAAC;YAEV,MAAM,cAAc,GAAG,cAAc,CAAC,CAAC,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAC5D,MAAM,OAAO,GAAG,cAAc,CAAC,CAAC,CAAC,cAAc,EAAE,CAAC,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAEvE,MAAM,MAAM,GAAqB;gBAC/B,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,QAAQ;gBACR,UAAU;gBACV,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,CAAC;gBACvC,cAAc;gBACd,cAAc,EAAE,CAAC,CAAC,cAAc;gBAChC,OAAO;aACR,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrB,QAAQ,EAAE,CAAC,MAAM,CAAC,CAAC;QACrB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,MAAM,GAAqB;gBAC/B,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,QAAQ,EAAE,KAAK;gBACf,UAAU,EAAE,KAAK;gBACjB,SAAS,EAAE,UAAU,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;gBACvE,cAAc,EAAE,CAAC,CAAC,QAAQ,KAAK,iBAAiB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI;gBAC9D,cAAc,EAAE,CAAC,CAAC,cAAc;gBAChC,OAAO,EAAE,IAAI;aACd,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrB,QAAQ,EAAE,CAAC,MAAM,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,cAAc,CAAC,OAAO,CAAC,CAAC;AACjC,CAAC;AAED,SAAS,cAAc,CACrB,QAAmD,EACnD,MAAe;IAEf,IAAI,QAAQ,KAAK,iBAAiB,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAC1D,IAAI,QAAQ,KAAK,iBAAiB,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAC3D,IAAI,QAAQ,KAAK,qBAAqB,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAC/D,OAAO,IAAI,CAAC;AACd,CAAC;AAED,gFAAgF;AAChF,gFAAgF;AAChF,MAAM,UAAU,cAAc,CAC5B,cAAmC,EACnC,QAAmD,EACnD,MAAe;IAEf,IAAI,CAAC,cAAc;QAAE,OAAO,IAAI,CAAC;IACjC,IAAI,QAAQ,KAAK,qBAAqB,IAAI,MAAM;QAAE,OAAO,eAAe,CAAC;IACzE,IAAI,QAAQ,KAAK,iBAAiB,IAAI,CAAC,MAAM;QAAE,OAAO,eAAe,CAAC;IACtE,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,OAA2B;IACjD,MAAM,EAAE,GAAG,CAAC,CAAmB,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC;IACvD,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,KAAK,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC5E,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,KAAK,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC5E,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,KAAK,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC5E,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,KAAK,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC5E,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,eAAe,CAAC,CAAC,MAAM,CAAC;IAClF,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,eAAe,CAAC,CAAC,MAAM,CAAC;IAClF,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;IAC7B,MAAM,WAAW,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;IAEtC,OAAO;QACL,OAAO;QACP,SAAS,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACxC,WAAW,EAAE,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC1D,KAAK;QACL,EAAE;QACF,EAAE;QACF,EAAE;QACF,EAAE;QACF,aAAa;QACb,aAAa;QACb,mBAAmB,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,MAAM;KACpE,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export type TestCaseExpected = "should_activate" | "should_not_activate" | "auto";
|
|
2
|
+
export interface ParsedTestCase {
|
|
3
|
+
prompt: string;
|
|
4
|
+
expected: TestCaseExpected;
|
|
5
|
+
}
|
|
6
|
+
export declare function parseTestCases(content: string): ParsedTestCase[];
|
|
7
|
+
export declare function serializeTestCases(prompts: ParsedTestCase[]): string;
|
|
8
|
+
export declare function upsertTestCasesIntoSkillMd(content: string, prompts: ParsedTestCase[]): string;
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// test-case-parser.ts — author-anchored activation-test fixtures in SKILL.md
|
|
3
|
+
//
|
|
4
|
+
// Ports the `## Test Cases` parser from vskill-platform's
|
|
5
|
+
// src/lib/eval/prompt-generator.ts:22-48 (parseAuthorTestCases) and adds a
|
|
6
|
+
// matching writer + upsert helper. The shape is intentionally identical to the
|
|
7
|
+
// platform's so a single SKILL.md can be consumed by both systems.
|
|
8
|
+
//
|
|
9
|
+
// See increment 0776 for the why.
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
const SECTION_RE = /## Test Cases\s*\n([\s\S]*?)(?=\n## |\n---|\n$|$)/i;
|
|
12
|
+
const PAIR_RE = /-\s*Prompt:\s*"([^"]+)"\s*\n\s*Expected:\s*"([^"]+)"/gi;
|
|
13
|
+
export function parseTestCases(content) {
|
|
14
|
+
if (!content)
|
|
15
|
+
return [];
|
|
16
|
+
const sectionMatch = content.match(SECTION_RE);
|
|
17
|
+
if (!sectionMatch)
|
|
18
|
+
return [];
|
|
19
|
+
const section = sectionMatch[1];
|
|
20
|
+
const cases = [];
|
|
21
|
+
// Reset lastIndex via a fresh regex each call to keep this function pure
|
|
22
|
+
const pair = new RegExp(PAIR_RE.source, "gi");
|
|
23
|
+
let m;
|
|
24
|
+
while ((m = pair.exec(section)) !== null) {
|
|
25
|
+
cases.push({ prompt: m[1], expected: textToExpected(m[2]) });
|
|
26
|
+
}
|
|
27
|
+
return cases;
|
|
28
|
+
}
|
|
29
|
+
export function serializeTestCases(prompts) {
|
|
30
|
+
if (prompts.length === 0)
|
|
31
|
+
return "";
|
|
32
|
+
const lines = prompts.map((p) => `- Prompt: "${p.prompt}"\n Expected: "${expectedToText(p.expected)}"`);
|
|
33
|
+
return `## Test Cases\n\n${lines.join("\n")}\n`;
|
|
34
|
+
}
|
|
35
|
+
// Replace-or-append the `## Test Cases` block. Empty prompts → remove the
|
|
36
|
+
// section entirely (keeps SKILL.md clean when the author clears fixtures).
|
|
37
|
+
export function upsertTestCasesIntoSkillMd(content, prompts) {
|
|
38
|
+
const trimmed = content.replace(/\s+$/, "");
|
|
39
|
+
const hasSection = SECTION_RE.test(trimmed);
|
|
40
|
+
if (prompts.length === 0) {
|
|
41
|
+
if (!hasSection)
|
|
42
|
+
return content;
|
|
43
|
+
return removeSection(trimmed) + "\n";
|
|
44
|
+
}
|
|
45
|
+
const block = serializeTestCases(prompts).trimEnd();
|
|
46
|
+
if (hasSection) {
|
|
47
|
+
return trimmed.replace(SECTION_RE, block) + "\n";
|
|
48
|
+
}
|
|
49
|
+
return trimmed + "\n\n" + block + "\n";
|
|
50
|
+
}
|
|
51
|
+
function removeSection(content) {
|
|
52
|
+
// Match the heading + body + trailing whitespace up to the next section
|
|
53
|
+
// boundary, then collapse the gap to a single blank line.
|
|
54
|
+
return content
|
|
55
|
+
.replace(/\n*## Test Cases\s*\n[\s\S]*?(?=\n## |\n---|\n$|$)/i, "")
|
|
56
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
57
|
+
.replace(/\s+$/, "");
|
|
58
|
+
}
|
|
59
|
+
function textToExpected(raw) {
|
|
60
|
+
const norm = raw.trim().toLowerCase();
|
|
61
|
+
if (norm === "should activate")
|
|
62
|
+
return "should_activate";
|
|
63
|
+
if (norm === "should not activate")
|
|
64
|
+
return "should_not_activate";
|
|
65
|
+
return "auto";
|
|
66
|
+
}
|
|
67
|
+
function expectedToText(expected) {
|
|
68
|
+
if (expected === "should_activate")
|
|
69
|
+
return "should activate";
|
|
70
|
+
if (expected === "should_not_activate")
|
|
71
|
+
return "should not activate";
|
|
72
|
+
return "auto";
|
|
73
|
+
}
|
|
74
|
+
//# sourceMappingURL=test-case-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test-case-parser.js","sourceRoot":"","sources":["../../src/eval/test-case-parser.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,6EAA6E;AAC7E,EAAE;AACF,0DAA0D;AAC1D,2EAA2E;AAC3E,+EAA+E;AAC/E,mEAAmE;AACnE,EAAE;AACF,kCAAkC;AAClC,8EAA8E;AAS9E,MAAM,UAAU,GAAG,oDAAoD,CAAC;AACxE,MAAM,OAAO,GAAG,wDAAwD,CAAC;AAEzE,MAAM,UAAU,cAAc,CAAC,OAAe;IAC5C,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IACxB,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAC/C,IAAI,CAAC,YAAY;QAAE,OAAO,EAAE,CAAC;IAC7B,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;IAEhC,MAAM,KAAK,GAAqB,EAAE,CAAC;IACnC,yEAAyE;IACzE,MAAM,IAAI,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAC9C,IAAI,CAAyB,CAAC;IAC9B,OAAO,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACzC,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC/D,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,OAAyB;IAC1D,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CACvB,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,MAAM,mBAAmB,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAC9E,CAAC;IACF,OAAO,oBAAoB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;AAClD,CAAC;AAED,0EAA0E;AAC1E,2EAA2E;AAC3E,MAAM,UAAU,0BAA0B,CACxC,OAAe,EACf,OAAyB;IAEzB,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAC5C,MAAM,UAAU,GAAG,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAE5C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU;YAAE,OAAO,OAAO,CAAC;QAChC,OAAO,aAAa,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC;IACvC,CAAC;IAED,MAAM,KAAK,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,CAAC;IACpD,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,IAAI,CAAC;IACnD,CAAC;IACD,OAAO,OAAO,GAAG,MAAM,GAAG,KAAK,GAAG,IAAI,CAAC;AACzC,CAAC;AAED,SAAS,aAAa,CAAC,OAAe;IACpC,wEAAwE;IACxE,0DAA0D;IAC1D,OAAO,OAAO;SACX,OAAO,CAAC,qDAAqD,EAAE,EAAE,CAAC;SAClE,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1B,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,cAAc,CAAC,GAAW;IACjC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACtC,IAAI,IAAI,KAAK,iBAAiB;QAAE,OAAO,iBAAiB,CAAC;IACzD,IAAI,IAAI,KAAK,qBAAqB;QAAE,OAAO,qBAAqB,CAAC;IACjE,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,cAAc,CAAC,QAA0B;IAChD,IAAI,QAAQ,KAAK,iBAAiB;QAAE,OAAO,iBAAiB,CAAC;IAC7D,IAAI,QAAQ,KAAK,qBAAqB;QAAE,OAAO,qBAAqB,CAAC;IACrE,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -43,6 +43,24 @@ export interface AgentsResponse {
|
|
|
43
43
|
consumers: string[];
|
|
44
44
|
}>;
|
|
45
45
|
}
|
|
46
|
+
export interface PlatformHealth {
|
|
47
|
+
degraded: boolean;
|
|
48
|
+
reason: string | null;
|
|
49
|
+
statsAgeMs: number;
|
|
50
|
+
oldestActiveAgeMs: number;
|
|
51
|
+
}
|
|
52
|
+
/** Test hook — clear the 60 s cache so the next computePlatformHealth re-fetches. */
|
|
53
|
+
export declare function resetPlatformHealthCache(): void;
|
|
54
|
+
/**
|
|
55
|
+
* 0778 — Compute platform health by probing two upstream verified-skill.com
|
|
56
|
+
* endpoints. Bounded by a 1500 ms timeout. Errors of any kind return the
|
|
57
|
+
* safe fallback so the studio never amber-flashes on user wifi blips.
|
|
58
|
+
*/
|
|
59
|
+
export declare function computePlatformHealth(opts?: {
|
|
60
|
+
fetchImpl?: typeof fetch;
|
|
61
|
+
/** Test-only: bypass the in-memory cache. */
|
|
62
|
+
skipCache?: boolean;
|
|
63
|
+
}): Promise<PlatformHealth>;
|
|
46
64
|
/** Test hook — clear the 30 s cache so the next buildAgentsResponse() re-scans. */
|
|
47
65
|
export declare function resetAgentPresenceCache(): void;
|
|
48
66
|
interface BuildAgentsOptions {
|
|
@@ -32,6 +32,7 @@ import { computeVerdict } from "../eval/verdict.js";
|
|
|
32
32
|
import { generateActionItems } from "../eval/action-items.js";
|
|
33
33
|
import { buildEvalInitPrompt, parseGeneratedEvals, buildIntegrationEvalPrompt, parseGeneratedIntegrationEvals, detectBrowserRequirements, detectPlatformTargets } from "../eval/prompt-builder.js";
|
|
34
34
|
import { testActivation } from "../eval/activation-tester.js";
|
|
35
|
+
import { parseTestCases, upsertTestCasesIntoSkillMd, } from "../eval/test-case-parser.js";
|
|
35
36
|
import { detectMcpDependencies, detectSkillDependencies } from "../eval/mcp-detector.js";
|
|
36
37
|
import { writeActivationRun, listActivationRuns, getActivationRun } from "../eval/activation-history.js";
|
|
37
38
|
import { AGENTS_REGISTRY, detectInstalledAgents } from "../agents/agents-registry.js";
|
|
@@ -65,6 +66,81 @@ export function buildInstalledAgentsResponse(detectedAgents) {
|
|
|
65
66
|
}
|
|
66
67
|
let agentPresenceCache = null;
|
|
67
68
|
const AGENT_PRESENCE_CACHE_TTL = 30_000;
|
|
69
|
+
const PLATFORM_HEALTH_CACHE_TTL = 60_000;
|
|
70
|
+
const PLATFORM_HEALTH_TIMEOUT_MS = 1500;
|
|
71
|
+
const PLATFORM_HEARTBEAT_STALE_MS = 30 * 60 * 1000;
|
|
72
|
+
const PLATFORM_OLDEST_ACTIVE_STALE_MS = 24 * 60 * 60 * 1000;
|
|
73
|
+
let platformHealthCache = null;
|
|
74
|
+
/** Test hook — clear the 60 s cache so the next computePlatformHealth re-fetches. */
|
|
75
|
+
export function resetPlatformHealthCache() {
|
|
76
|
+
platformHealthCache = null;
|
|
77
|
+
}
|
|
78
|
+
function formatDuration(ms) {
|
|
79
|
+
if (ms < 60_000)
|
|
80
|
+
return `${Math.round(ms / 1000)}s`;
|
|
81
|
+
if (ms < 3_600_000)
|
|
82
|
+
return `${Math.round(ms / 60_000)}m`;
|
|
83
|
+
if (ms < 86_400_000) {
|
|
84
|
+
const h = Math.floor(ms / 3_600_000);
|
|
85
|
+
const m = Math.round((ms % 3_600_000) / 60_000);
|
|
86
|
+
return m > 0 ? `${h}h ${m}m` : `${h}h`;
|
|
87
|
+
}
|
|
88
|
+
return `${Math.round(ms / 86_400_000)}d`;
|
|
89
|
+
}
|
|
90
|
+
const SAFE_FALLBACK = {
|
|
91
|
+
degraded: false,
|
|
92
|
+
reason: "platform-unreachable",
|
|
93
|
+
statsAgeMs: 0,
|
|
94
|
+
oldestActiveAgeMs: 0,
|
|
95
|
+
};
|
|
96
|
+
/**
|
|
97
|
+
* 0778 — Compute platform health by probing two upstream verified-skill.com
|
|
98
|
+
* endpoints. Bounded by a 1500 ms timeout. Errors of any kind return the
|
|
99
|
+
* safe fallback so the studio never amber-flashes on user wifi blips.
|
|
100
|
+
*/
|
|
101
|
+
export async function computePlatformHealth(opts = {}) {
|
|
102
|
+
const f = opts.fetchImpl ?? fetch;
|
|
103
|
+
const now = Date.now();
|
|
104
|
+
if (!opts.skipCache &&
|
|
105
|
+
platformHealthCache &&
|
|
106
|
+
now - platformHealthCache.ts < PLATFORM_HEALTH_CACHE_TTL) {
|
|
107
|
+
return platformHealthCache.data;
|
|
108
|
+
}
|
|
109
|
+
let result;
|
|
110
|
+
try {
|
|
111
|
+
const signal = AbortSignal.timeout(PLATFORM_HEALTH_TIMEOUT_MS);
|
|
112
|
+
const [statsRes, queueRes] = await Promise.all([
|
|
113
|
+
f("https://verified-skill.com/api/v1/submissions/stats", { signal }),
|
|
114
|
+
f("https://verified-skill.com/api/v1/queue/health", { signal }),
|
|
115
|
+
]);
|
|
116
|
+
if (!statsRes.ok || !queueRes.ok)
|
|
117
|
+
throw new Error("upstream non-2xx");
|
|
118
|
+
const stats = (await statsRes.json());
|
|
119
|
+
const queue = (await queueRes.json());
|
|
120
|
+
const statsAgeMs = Number(queue.statsAge?.ageMs ?? 0);
|
|
121
|
+
const oldestActiveAgeMs = Number(queue.oldestActive?.ageMs ?? 0);
|
|
122
|
+
const reasons = [];
|
|
123
|
+
if (stats.degraded === true)
|
|
124
|
+
reasons.push("platform reports degraded");
|
|
125
|
+
if (statsAgeMs > PLATFORM_HEARTBEAT_STALE_MS) {
|
|
126
|
+
reasons.push(`heartbeat stale ${formatDuration(statsAgeMs)}`);
|
|
127
|
+
}
|
|
128
|
+
if (oldestActiveAgeMs > PLATFORM_OLDEST_ACTIVE_STALE_MS) {
|
|
129
|
+
reasons.push(`oldest active submission ${formatDuration(oldestActiveAgeMs)}`);
|
|
130
|
+
}
|
|
131
|
+
result = {
|
|
132
|
+
degraded: reasons.length > 0,
|
|
133
|
+
reason: reasons.length > 0 ? reasons.join("; ") : null,
|
|
134
|
+
statsAgeMs,
|
|
135
|
+
oldestActiveAgeMs,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
catch {
|
|
139
|
+
result = { ...SAFE_FALLBACK };
|
|
140
|
+
}
|
|
141
|
+
platformHealthCache = { data: result, ts: now };
|
|
142
|
+
return result;
|
|
143
|
+
}
|
|
68
144
|
/** Test hook — clear the 30 s cache so the next buildAgentsResponse() re-scans. */
|
|
69
145
|
export function resetAgentPresenceCache() {
|
|
70
146
|
agentPresenceCache = null;
|
|
@@ -1653,6 +1729,17 @@ export function registerRoutes(router, root, projectName) {
|
|
|
1653
1729
|
// `localSkill` so the Studio's bell dropdown can render tooltips and route
|
|
1654
1730
|
// smart clicks via `revealSkill` instead of guessing local fs identifiers
|
|
1655
1731
|
// from the canonical platform name.
|
|
1732
|
+
// 0778 — Platform health proxy. Returns a small JSON shape the bell uses
|
|
1733
|
+
// to surface upstream-degraded state. NEVER throws; failure → safe fallback.
|
|
1734
|
+
router.get("/api/platform/health", async (req, res) => {
|
|
1735
|
+
try {
|
|
1736
|
+
const data = await computePlatformHealth();
|
|
1737
|
+
sendJson(res, data, 200, req);
|
|
1738
|
+
}
|
|
1739
|
+
catch {
|
|
1740
|
+
sendJson(res, { ...SAFE_FALLBACK }, 200, req);
|
|
1741
|
+
}
|
|
1742
|
+
});
|
|
1656
1743
|
router.get("/api/skills/updates", async (req, res) => {
|
|
1657
1744
|
try {
|
|
1658
1745
|
const { getOutdatedJson } = await import("../commands/outdated.js");
|
|
@@ -2959,6 +3046,46 @@ export function registerRoutes(router, root, projectName) {
|
|
|
2959
3046
|
sendSSEDone(res, { error: err instanceof Error ? err.message : String(err) });
|
|
2960
3047
|
}
|
|
2961
3048
|
});
|
|
3049
|
+
// GET parsed `## Test Cases` block from SKILL.md (increment 0776)
|
|
3050
|
+
router.get("/api/skills/:plugin/:skill/test-cases", (req, res, params) => {
|
|
3051
|
+
const skillDir = resolveSkillDir(root, params.plugin, params.skill);
|
|
3052
|
+
const skillMdPath = join(skillDir, "SKILL.md");
|
|
3053
|
+
const content = existsSync(skillMdPath) ? readFileSync(skillMdPath, "utf-8") : "";
|
|
3054
|
+
const prompts = parseTestCases(content);
|
|
3055
|
+
sendJson(res, { prompts, source: prompts.length > 0 ? "skill-md" : null }, 200, req);
|
|
3056
|
+
});
|
|
3057
|
+
// PUT — upsert the `## Test Cases` block in SKILL.md (increment 0776).
|
|
3058
|
+
// Empty prompts array removes the section. Frontmatter and other body
|
|
3059
|
+
// sections are preserved verbatim.
|
|
3060
|
+
router.put("/api/skills/:plugin/:skill/test-cases", async (req, res, params) => {
|
|
3061
|
+
const skillDir = resolveSkillDir(root, params.plugin, params.skill);
|
|
3062
|
+
const skillMdPath = join(skillDir, "SKILL.md");
|
|
3063
|
+
const body = (await readBody(req));
|
|
3064
|
+
if (!body || !Array.isArray(body.prompts)) {
|
|
3065
|
+
sendJson(res, { ok: false, error: "Body must be { prompts: ParsedTestCase[] }" }, 400, req);
|
|
3066
|
+
return;
|
|
3067
|
+
}
|
|
3068
|
+
const allowed = ["should_activate", "should_not_activate", "auto"];
|
|
3069
|
+
for (const p of body.prompts) {
|
|
3070
|
+
if (!p || typeof p.prompt !== "string" || p.prompt.length === 0) {
|
|
3071
|
+
sendJson(res, { ok: false, error: "Each prompt must have a non-empty string prompt" }, 400, req);
|
|
3072
|
+
return;
|
|
3073
|
+
}
|
|
3074
|
+
if (p.prompt.includes('"')) {
|
|
3075
|
+
sendJson(res, { ok: false, error: 'Prompt strings may not contain double quotes (got: ' + p.prompt + ")" }, 400, req);
|
|
3076
|
+
return;
|
|
3077
|
+
}
|
|
3078
|
+
if (!allowed.includes(p.expected)) {
|
|
3079
|
+
sendJson(res, { ok: false, error: "expected must be one of: " + allowed.join(", ") }, 400, req);
|
|
3080
|
+
return;
|
|
3081
|
+
}
|
|
3082
|
+
}
|
|
3083
|
+
const existing = existsSync(skillMdPath) ? readFileSync(skillMdPath, "utf-8") : "";
|
|
3084
|
+
const updated = upsertTestCasesIntoSkillMd(existing, body.prompts);
|
|
3085
|
+
mkdirSync(dirname(skillMdPath), { recursive: true });
|
|
3086
|
+
writeFileSync(skillMdPath, updated, "utf-8");
|
|
3087
|
+
sendJson(res, { ok: true, count: body.prompts.length }, 200, req);
|
|
3088
|
+
});
|
|
2962
3089
|
// AI-generate activation test prompts (SSE)
|
|
2963
3090
|
router.post("/api/skills/:plugin/:skill/activation-prompts", async (req, res, params) => {
|
|
2964
3091
|
const skillDir = resolveSkillDir(root, params.plugin, params.skill);
|