vskill 0.5.141 → 0.5.142
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents.json +1 -1
- package/dist/eval/activation-tester.d.ts +5 -0
- package/dist/eval/activation-tester.js +25 -5
- package/dist/eval/activation-tester.js.map +1 -1
- package/dist/eval/test-case-parser.d.ts +8 -0
- package/dist/eval/test-case-parser.js +74 -0
- package/dist/eval/test-case-parser.js.map +1 -0
- package/dist/eval-server/api-routes.js +41 -0
- package/dist/eval-server/api-routes.js.map +1 -1
- package/dist/eval-ui/assets/{CommandPalette-COMOl8Vg.js → CommandPalette-D42KauW9.js} +1 -1
- package/dist/eval-ui/assets/{CreateSkillPage-DqUUj-0q.js → CreateSkillPage-BWoyQQ0n.js} +1 -1
- package/dist/eval-ui/assets/{FindSkillsPalette-Cy98Ygh7.js → FindSkillsPalette-BtyebylE.js} +2 -2
- package/dist/eval-ui/assets/{SearchPaletteCore-CV6YIjYd.js → SearchPaletteCore-9ObzwMtM.js} +1 -1
- package/dist/eval-ui/assets/{SkillDetailPanel-ctAUsQxo.js → SkillDetailPanel-8XS9hdR_.js} +1 -1
- package/dist/eval-ui/assets/{UpdateDropdown-WsXxpeur.js → UpdateDropdown-D0dHy9y-.js} +1 -1
- package/dist/eval-ui/assets/{index-ByzTygib.css → index-CycZyHaL.css} +1 -1
- package/dist/eval-ui/assets/{index-B1nvsGfw.js → index-DUUmBJUa.js} +45 -35
- package/dist/eval-ui/index.html +2 -2
- package/package.json +3 -2
package/agents.json
CHANGED
|
@@ -7,6 +7,7 @@ export interface ActivationPrompt {
|
|
|
7
7
|
prompt: string;
|
|
8
8
|
expected: "should_activate" | "should_not_activate" | "auto";
|
|
9
9
|
}
|
|
10
|
+
export type Verdict = "ok" | "scope_warning" | "drift_warning";
|
|
10
11
|
export interface ActivationResult {
|
|
11
12
|
prompt: string;
|
|
12
13
|
expected: "should_activate" | "should_not_activate";
|
|
@@ -15,6 +16,7 @@ export interface ActivationResult {
|
|
|
15
16
|
reasoning: string;
|
|
16
17
|
classification: "TP" | "TN" | "FP" | "FN";
|
|
17
18
|
autoClassified?: boolean;
|
|
19
|
+
verdict: Verdict;
|
|
18
20
|
}
|
|
19
21
|
export interface ActivationSummary {
|
|
20
22
|
results: ActivationResult[];
|
|
@@ -26,6 +28,9 @@ export interface ActivationSummary {
|
|
|
26
28
|
tn: number;
|
|
27
29
|
fp: number;
|
|
28
30
|
fn: number;
|
|
31
|
+
scopeWarnings: number;
|
|
32
|
+
driftWarnings: number;
|
|
29
33
|
autoClassifiedCount: number;
|
|
30
34
|
}
|
|
31
35
|
export declare function testActivation(skillDescription: string, prompts: ActivationPrompt[], client: LlmClient, onResult?: (result: ActivationResult) => void, meta?: SkillMeta, onProgress?: (phase: "classifying", index: number, total: number) => void): Promise<ActivationSummary>;
|
|
36
|
+
export declare function computeVerdict(autoClassified: boolean | undefined, expected: "should_activate" | "should_not_activate", actual: boolean): Verdict;
|
|
@@ -79,6 +79,7 @@ Would this user prompt trigger this skill?`;
|
|
|
79
79
|
? json.confidence
|
|
80
80
|
: "low";
|
|
81
81
|
const classification = classifyResult(p.expected, activate);
|
|
82
|
+
const verdict = computeVerdict(p.autoClassified, p.expected, activate);
|
|
82
83
|
const result = {
|
|
83
84
|
prompt: p.prompt,
|
|
84
85
|
expected: p.expected,
|
|
@@ -87,6 +88,7 @@ Would this user prompt trigger this skill?`;
|
|
|
87
88
|
reasoning: String(json.reasoning || ""),
|
|
88
89
|
classification,
|
|
89
90
|
autoClassified: p.autoClassified,
|
|
91
|
+
verdict,
|
|
90
92
|
};
|
|
91
93
|
results.push(result);
|
|
92
94
|
onResult?.(result);
|
|
@@ -100,6 +102,7 @@ Would this user prompt trigger this skill?`;
|
|
|
100
102
|
reasoning: `Error: ${err instanceof Error ? err.message : String(err)}`,
|
|
101
103
|
classification: p.expected === "should_activate" ? "FN" : "TN",
|
|
102
104
|
autoClassified: p.autoClassified,
|
|
105
|
+
verdict: "ok",
|
|
103
106
|
};
|
|
104
107
|
results.push(result);
|
|
105
108
|
onResult?.(result);
|
|
@@ -116,22 +119,39 @@ function classifyResult(expected, actual) {
|
|
|
116
119
|
return "TN";
|
|
117
120
|
return "FP";
|
|
118
121
|
}
|
|
122
|
+
// Auto-classified disagreement → soft warning (not a real FP/FN). Manual labels
|
|
123
|
+
// signal user authority; their disagreements remain strict. See increment 0775.
|
|
124
|
+
export function computeVerdict(autoClassified, expected, actual) {
|
|
125
|
+
if (!autoClassified)
|
|
126
|
+
return "ok";
|
|
127
|
+
if (expected === "should_not_activate" && actual)
|
|
128
|
+
return "scope_warning";
|
|
129
|
+
if (expected === "should_activate" && !actual)
|
|
130
|
+
return "drift_warning";
|
|
131
|
+
return "ok";
|
|
132
|
+
}
|
|
119
133
|
function computeSummary(results) {
|
|
120
|
-
const
|
|
121
|
-
const
|
|
122
|
-
const
|
|
123
|
-
const
|
|
134
|
+
const ok = (r) => r.verdict === "ok";
|
|
135
|
+
const tp = results.filter((r) => r.classification === "TP" && ok(r)).length;
|
|
136
|
+
const tn = results.filter((r) => r.classification === "TN" && ok(r)).length;
|
|
137
|
+
const fp = results.filter((r) => r.classification === "FP" && ok(r)).length;
|
|
138
|
+
const fn = results.filter((r) => r.classification === "FN" && ok(r)).length;
|
|
139
|
+
const scopeWarnings = results.filter((r) => r.verdict === "scope_warning").length;
|
|
140
|
+
const driftWarnings = results.filter((r) => r.verdict === "drift_warning").length;
|
|
124
141
|
const total = results.length;
|
|
142
|
+
const scoredTotal = tp + tn + fp + fn;
|
|
125
143
|
return {
|
|
126
144
|
results,
|
|
127
145
|
precision: tp + fp > 0 ? tp / (tp + fp) : 0,
|
|
128
146
|
recall: tp + fn > 0 ? tp / (tp + fn) : 0,
|
|
129
|
-
reliability:
|
|
147
|
+
reliability: scoredTotal > 0 ? (tp + tn) / scoredTotal : 0,
|
|
130
148
|
total,
|
|
131
149
|
tp,
|
|
132
150
|
tn,
|
|
133
151
|
fp,
|
|
134
152
|
fn,
|
|
153
|
+
scopeWarnings,
|
|
154
|
+
driftWarnings,
|
|
135
155
|
autoClassifiedCount: results.filter((r) => r.autoClassified).length,
|
|
136
156
|
};
|
|
137
157
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"activation-tester.js","sourceRoot":"","sources":["../../src/eval/activation-tester.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4EAA4E;AAC5E,8EAA8E;
|
|
1
|
+
{"version":3,"file":"activation-tester.js","sourceRoot":"","sources":["../../src/eval/activation-tester.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4EAA4E;AAC5E,8EAA8E;AA0C9E,MAAM,wBAAwB,GAAG;;;;;;;;;;;;EAY/B,CAAC;AAEH,MAAM,sBAAsB,GAAG;;;wBAGP,CAAC;AAEzB,8EAA8E;AAC9E,kEAAkE;AAClE,8EAA8E;AAE9E,KAAK,UAAU,mBAAmB,CAChC,IAAe,EACf,MAAc,EACd,MAAiB;IAEjB,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,UAAU,IAAI,CAAC,IAAI,WAAW,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,oBAAoB,MAAM,EAAE,CAAC;QAClG,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,sBAAsB,EAAE,UAAU,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,8BAA8B,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QAC7E,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC,CAAC;QAC9C,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,qBAAqB,CAAC;IAClE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,iBAAiB,CAAC;IAC3B,CAAC;AACH,CAAC;AAYD,KAAK,UAAU,cAAc,CAC3B,OAA2B,EAC3B,MAAiB,EACjB,IAAgB,EAChB,UAAyE;IAEzE,MAAM,QAAQ,GAAqB,EAAE,CAAC;IACtC,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IACjF,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;YAC1B,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,QAAQ,GAAG,MAAM,mBAAmB,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;gBACnE,UAAU,EAAE,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;gBACpD,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;YACtE,CAAC;iBAAM,CAAC;gBACN,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,iBAAiB,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;YACzF,CAAC;QACH,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;QACnF,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,8EAA8E;AAC9E,yDAAyD;AACzD,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,gBAAwB,EACxB,OAA2B,EAC3B,MAAiB,EACjB,QAA6C,EAC7C,IAAgB,EAChB,UAAyE;IAEzE,qCAAqC;IACrC,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC;IAEzE,oDAAoD;IACpD,MAAM,OAAO,GAAuB,EAAE,CAAC;IAEvC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG;EACrB,gBAAgB;;;EAGhB,CAAC,CAAC,MAAM;;2CAEiC,CAAC;QAExC,IAAI,CAAC;YACH,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,wBAAwB,EAAE,UAAU,CAAC,CAAC;YACvF,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,8BAA8B,CAAC,IAAI,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;YACrF,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC,CAAC;YAE9C,MAAM,QAAQ,GAAG,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;YACjC,MAAM,UAAU,GAAG,CAAC,MAAM,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC;gBACpE,CAAC,CAAE,IAAI,CAAC,UAAwC;gBAChD,CAAC,CAAC,KAAK,CAAC;YAEV,MAAM,cAAc,GAAG,cAAc,CAAC,CAAC,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAC5D,MAAM,OAAO,GAAG,cAAc,CAAC,CAAC,CAAC,cAAc,EAAE,CAAC,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAEvE,MAAM,MAAM,GAAqB;gBAC/B,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,QAAQ;gBACR,UAAU;gBACV,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,CAAC;gBACvC,cAAc;gBACd,cAAc,EAAE,CAAC,CAAC,cAAc;gBAChC,OAAO;aACR,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrB,QAAQ,EAAE,CAAC,MAAM,CAAC,CAAC;QACrB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,MAAM,GAAqB;gBAC/B,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,QAAQ,EAAE,KAAK;gBACf,UAAU,EAAE,KAAK;gBACjB,SAAS,EAAE,UAAU,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;gBACvE,cAAc,EAAE,CAAC,CAAC,QAAQ,KAAK,iBAAiB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI;gBAC9D,cAAc,EAAE,CAAC,CAAC,cAAc;gBAChC,OAAO,EAAE,IAAI;aACd,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrB,QAAQ,EAAE,CAAC,MAAM,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,cAAc,CAAC,OAAO,CAAC,CAAC;AACjC,CAAC;AAED,SAAS,cAAc,CACrB,QAAmD,EACnD,MAAe;IAEf,IAAI,QAAQ,KAAK,iBAAiB,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAC1D,IAAI,QAAQ,KAAK,iBAAiB,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAC3D,IAAI,QAAQ,KAAK,qBAAqB,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAC/D,OAAO,IAAI,CAAC;AACd,CAAC;AAED,gFAAgF;AAChF,gFAAgF;AAChF,MAAM,UAAU,cAAc,CAC5B,cAAmC,EACnC,QAAmD,EACnD,MAAe;IAEf,IAAI,CAAC,cAAc;QAAE,OAAO,IAAI,CAAC;IACjC,IAAI,QAAQ,KAAK,qBAAqB,IAAI,MAAM;QAAE,OAAO,eAAe,CAAC;IACzE,IAAI,QAAQ,KAAK,iBAAiB,IAAI,CAAC,MAAM;QAAE,OAAO,eAAe,CAAC;IACtE,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,OAA2B;IACjD,MAAM,EAAE,GAAG,CAAC,CAAmB,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC;IACvD,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,KAAK,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC5E,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,KAAK,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC5E,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,KAAK,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC5E,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,KAAK,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC5E,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,eAAe,CAAC,CAAC,MAAM,CAAC;IAClF,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,eAAe,CAAC,CAAC,MAAM,CAAC;IAClF,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;IAC7B,MAAM,WAAW,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;IAEtC,OAAO;QACL,OAAO;QACP,SAAS,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACxC,WAAW,EAAE,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC1D,KAAK;QACL,EAAE;QACF,EAAE;QACF,EAAE;QACF,EAAE;QACF,aAAa;QACb,aAAa;QACb,mBAAmB,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,MAAM;KACpE,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export type TestCaseExpected = "should_activate" | "should_not_activate" | "auto";
|
|
2
|
+
export interface ParsedTestCase {
|
|
3
|
+
prompt: string;
|
|
4
|
+
expected: TestCaseExpected;
|
|
5
|
+
}
|
|
6
|
+
export declare function parseTestCases(content: string): ParsedTestCase[];
|
|
7
|
+
export declare function serializeTestCases(prompts: ParsedTestCase[]): string;
|
|
8
|
+
export declare function upsertTestCasesIntoSkillMd(content: string, prompts: ParsedTestCase[]): string;
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// test-case-parser.ts — author-anchored activation-test fixtures in SKILL.md
|
|
3
|
+
//
|
|
4
|
+
// Ports the `## Test Cases` parser from vskill-platform's
|
|
5
|
+
// src/lib/eval/prompt-generator.ts:22-48 (parseAuthorTestCases) and adds a
|
|
6
|
+
// matching writer + upsert helper. The shape is intentionally identical to the
|
|
7
|
+
// platform's so a single SKILL.md can be consumed by both systems.
|
|
8
|
+
//
|
|
9
|
+
// See increment 0776 for the why.
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
const SECTION_RE = /## Test Cases\s*\n([\s\S]*?)(?=\n## |\n---|\n$|$)/i;
|
|
12
|
+
const PAIR_RE = /-\s*Prompt:\s*"([^"]+)"\s*\n\s*Expected:\s*"([^"]+)"/gi;
|
|
13
|
+
export function parseTestCases(content) {
|
|
14
|
+
if (!content)
|
|
15
|
+
return [];
|
|
16
|
+
const sectionMatch = content.match(SECTION_RE);
|
|
17
|
+
if (!sectionMatch)
|
|
18
|
+
return [];
|
|
19
|
+
const section = sectionMatch[1];
|
|
20
|
+
const cases = [];
|
|
21
|
+
// Reset lastIndex via a fresh regex each call to keep this function pure
|
|
22
|
+
const pair = new RegExp(PAIR_RE.source, "gi");
|
|
23
|
+
let m;
|
|
24
|
+
while ((m = pair.exec(section)) !== null) {
|
|
25
|
+
cases.push({ prompt: m[1], expected: textToExpected(m[2]) });
|
|
26
|
+
}
|
|
27
|
+
return cases;
|
|
28
|
+
}
|
|
29
|
+
export function serializeTestCases(prompts) {
|
|
30
|
+
if (prompts.length === 0)
|
|
31
|
+
return "";
|
|
32
|
+
const lines = prompts.map((p) => `- Prompt: "${p.prompt}"\n Expected: "${expectedToText(p.expected)}"`);
|
|
33
|
+
return `## Test Cases\n\n${lines.join("\n")}\n`;
|
|
34
|
+
}
|
|
35
|
+
// Replace-or-append the `## Test Cases` block. Empty prompts → remove the
|
|
36
|
+
// section entirely (keeps SKILL.md clean when the author clears fixtures).
|
|
37
|
+
export function upsertTestCasesIntoSkillMd(content, prompts) {
|
|
38
|
+
const trimmed = content.replace(/\s+$/, "");
|
|
39
|
+
const hasSection = SECTION_RE.test(trimmed);
|
|
40
|
+
if (prompts.length === 0) {
|
|
41
|
+
if (!hasSection)
|
|
42
|
+
return content;
|
|
43
|
+
return removeSection(trimmed) + "\n";
|
|
44
|
+
}
|
|
45
|
+
const block = serializeTestCases(prompts).trimEnd();
|
|
46
|
+
if (hasSection) {
|
|
47
|
+
return trimmed.replace(SECTION_RE, block) + "\n";
|
|
48
|
+
}
|
|
49
|
+
return trimmed + "\n\n" + block + "\n";
|
|
50
|
+
}
|
|
51
|
+
function removeSection(content) {
|
|
52
|
+
// Match the heading + body + trailing whitespace up to the next section
|
|
53
|
+
// boundary, then collapse the gap to a single blank line.
|
|
54
|
+
return content
|
|
55
|
+
.replace(/\n*## Test Cases\s*\n[\s\S]*?(?=\n## |\n---|\n$|$)/i, "")
|
|
56
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
57
|
+
.replace(/\s+$/, "");
|
|
58
|
+
}
|
|
59
|
+
function textToExpected(raw) {
|
|
60
|
+
const norm = raw.trim().toLowerCase();
|
|
61
|
+
if (norm === "should activate")
|
|
62
|
+
return "should_activate";
|
|
63
|
+
if (norm === "should not activate")
|
|
64
|
+
return "should_not_activate";
|
|
65
|
+
return "auto";
|
|
66
|
+
}
|
|
67
|
+
function expectedToText(expected) {
|
|
68
|
+
if (expected === "should_activate")
|
|
69
|
+
return "should activate";
|
|
70
|
+
if (expected === "should_not_activate")
|
|
71
|
+
return "should not activate";
|
|
72
|
+
return "auto";
|
|
73
|
+
}
|
|
74
|
+
//# sourceMappingURL=test-case-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test-case-parser.js","sourceRoot":"","sources":["../../src/eval/test-case-parser.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,6EAA6E;AAC7E,EAAE;AACF,0DAA0D;AAC1D,2EAA2E;AAC3E,+EAA+E;AAC/E,mEAAmE;AACnE,EAAE;AACF,kCAAkC;AAClC,8EAA8E;AAS9E,MAAM,UAAU,GAAG,oDAAoD,CAAC;AACxE,MAAM,OAAO,GAAG,wDAAwD,CAAC;AAEzE,MAAM,UAAU,cAAc,CAAC,OAAe;IAC5C,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IACxB,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAC/C,IAAI,CAAC,YAAY;QAAE,OAAO,EAAE,CAAC;IAC7B,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;IAEhC,MAAM,KAAK,GAAqB,EAAE,CAAC;IACnC,yEAAyE;IACzE,MAAM,IAAI,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAC9C,IAAI,CAAyB,CAAC;IAC9B,OAAO,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACzC,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC/D,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,OAAyB;IAC1D,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CACvB,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,MAAM,mBAAmB,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAC9E,CAAC;IACF,OAAO,oBAAoB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;AAClD,CAAC;AAED,0EAA0E;AAC1E,2EAA2E;AAC3E,MAAM,UAAU,0BAA0B,CACxC,OAAe,EACf,OAAyB;IAEzB,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAC5C,MAAM,UAAU,GAAG,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAE5C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU;YAAE,OAAO,OAAO,CAAC;QAChC,OAAO,aAAa,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC;IACvC,CAAC;IAED,MAAM,KAAK,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,CAAC;IACpD,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,IAAI,CAAC;IACnD,CAAC;IACD,OAAO,OAAO,GAAG,MAAM,GAAG,KAAK,GAAG,IAAI,CAAC;AACzC,CAAC;AAED,SAAS,aAAa,CAAC,OAAe;IACpC,wEAAwE;IACxE,0DAA0D;IAC1D,OAAO,OAAO;SACX,OAAO,CAAC,qDAAqD,EAAE,EAAE,CAAC;SAClE,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1B,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,cAAc,CAAC,GAAW;IACjC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACtC,IAAI,IAAI,KAAK,iBAAiB;QAAE,OAAO,iBAAiB,CAAC;IACzD,IAAI,IAAI,KAAK,qBAAqB;QAAE,OAAO,qBAAqB,CAAC;IACjE,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,cAAc,CAAC,QAA0B;IAChD,IAAI,QAAQ,KAAK,iBAAiB;QAAE,OAAO,iBAAiB,CAAC;IAC7D,IAAI,QAAQ,KAAK,qBAAqB;QAAE,OAAO,qBAAqB,CAAC;IACrE,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -32,6 +32,7 @@ import { computeVerdict } from "../eval/verdict.js";
|
|
|
32
32
|
import { generateActionItems } from "../eval/action-items.js";
|
|
33
33
|
import { buildEvalInitPrompt, parseGeneratedEvals, buildIntegrationEvalPrompt, parseGeneratedIntegrationEvals, detectBrowserRequirements, detectPlatformTargets } from "../eval/prompt-builder.js";
|
|
34
34
|
import { testActivation } from "../eval/activation-tester.js";
|
|
35
|
+
import { parseTestCases, upsertTestCasesIntoSkillMd, } from "../eval/test-case-parser.js";
|
|
35
36
|
import { detectMcpDependencies, detectSkillDependencies } from "../eval/mcp-detector.js";
|
|
36
37
|
import { writeActivationRun, listActivationRuns, getActivationRun } from "../eval/activation-history.js";
|
|
37
38
|
import { AGENTS_REGISTRY, detectInstalledAgents } from "../agents/agents-registry.js";
|
|
@@ -2959,6 +2960,46 @@ export function registerRoutes(router, root, projectName) {
|
|
|
2959
2960
|
sendSSEDone(res, { error: err instanceof Error ? err.message : String(err) });
|
|
2960
2961
|
}
|
|
2961
2962
|
});
|
|
2963
|
+
// GET parsed `## Test Cases` block from SKILL.md (increment 0776)
|
|
2964
|
+
router.get("/api/skills/:plugin/:skill/test-cases", (req, res, params) => {
|
|
2965
|
+
const skillDir = resolveSkillDir(root, params.plugin, params.skill);
|
|
2966
|
+
const skillMdPath = join(skillDir, "SKILL.md");
|
|
2967
|
+
const content = existsSync(skillMdPath) ? readFileSync(skillMdPath, "utf-8") : "";
|
|
2968
|
+
const prompts = parseTestCases(content);
|
|
2969
|
+
sendJson(res, { prompts, source: prompts.length > 0 ? "skill-md" : null }, 200, req);
|
|
2970
|
+
});
|
|
2971
|
+
// PUT — upsert the `## Test Cases` block in SKILL.md (increment 0776).
|
|
2972
|
+
// Empty prompts array removes the section. Frontmatter and other body
|
|
2973
|
+
// sections are preserved verbatim.
|
|
2974
|
+
router.put("/api/skills/:plugin/:skill/test-cases", async (req, res, params) => {
|
|
2975
|
+
const skillDir = resolveSkillDir(root, params.plugin, params.skill);
|
|
2976
|
+
const skillMdPath = join(skillDir, "SKILL.md");
|
|
2977
|
+
const body = (await readBody(req));
|
|
2978
|
+
if (!body || !Array.isArray(body.prompts)) {
|
|
2979
|
+
sendJson(res, { ok: false, error: "Body must be { prompts: ParsedTestCase[] }" }, 400, req);
|
|
2980
|
+
return;
|
|
2981
|
+
}
|
|
2982
|
+
const allowed = ["should_activate", "should_not_activate", "auto"];
|
|
2983
|
+
for (const p of body.prompts) {
|
|
2984
|
+
if (!p || typeof p.prompt !== "string" || p.prompt.length === 0) {
|
|
2985
|
+
sendJson(res, { ok: false, error: "Each prompt must have a non-empty string prompt" }, 400, req);
|
|
2986
|
+
return;
|
|
2987
|
+
}
|
|
2988
|
+
if (p.prompt.includes('"')) {
|
|
2989
|
+
sendJson(res, { ok: false, error: 'Prompt strings may not contain double quotes (got: ' + p.prompt + ")" }, 400, req);
|
|
2990
|
+
return;
|
|
2991
|
+
}
|
|
2992
|
+
if (!allowed.includes(p.expected)) {
|
|
2993
|
+
sendJson(res, { ok: false, error: "expected must be one of: " + allowed.join(", ") }, 400, req);
|
|
2994
|
+
return;
|
|
2995
|
+
}
|
|
2996
|
+
}
|
|
2997
|
+
const existing = existsSync(skillMdPath) ? readFileSync(skillMdPath, "utf-8") : "";
|
|
2998
|
+
const updated = upsertTestCasesIntoSkillMd(existing, body.prompts);
|
|
2999
|
+
mkdirSync(dirname(skillMdPath), { recursive: true });
|
|
3000
|
+
writeFileSync(skillMdPath, updated, "utf-8");
|
|
3001
|
+
sendJson(res, { ok: true, count: body.prompts.length }, 200, req);
|
|
3002
|
+
});
|
|
2962
3003
|
// AI-generate activation test prompts (SSE)
|
|
2963
3004
|
router.post("/api/skills/:plugin/:skill/activation-prompts", async (req, res, params) => {
|
|
2964
3005
|
const skillDir = resolveSkillDir(root, params.plugin, params.skill);
|