vskill 0.5.140 → 0.5.142
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents.json +1 -1
- package/dist/eval/activation-tester.d.ts +5 -0
- package/dist/eval/activation-tester.js +25 -5
- package/dist/eval/activation-tester.js.map +1 -1
- package/dist/eval/test-case-parser.d.ts +8 -0
- package/dist/eval/test-case-parser.js +74 -0
- package/dist/eval/test-case-parser.js.map +1 -0
- package/dist/eval-server/api-routes.d.ts +35 -0
- package/dist/eval-server/api-routes.js +170 -6
- package/dist/eval-server/api-routes.js.map +1 -1
- package/dist/eval-server/skill-create-routes.d.ts +20 -0
- package/dist/eval-server/skill-create-routes.js +50 -2
- package/dist/eval-server/skill-create-routes.js.map +1 -1
- package/dist/eval-server/skill-dir-registry.d.ts +2 -14
- package/dist/eval-server/skill-dir-registry.js +18 -0
- package/dist/eval-server/skill-dir-registry.js.map +1 -1
- package/dist/eval-ui/assets/{CommandPalette-DeEo1aM2.js → CommandPalette-D42KauW9.js} +1 -1
- package/dist/eval-ui/assets/{CreateSkillPage-BGi_wZ1y.js → CreateSkillPage-BWoyQQ0n.js} +1 -1
- package/dist/eval-ui/assets/{FindSkillsPalette-BbJJSef2.js → FindSkillsPalette-BtyebylE.js} +2 -2
- package/dist/eval-ui/assets/{SearchPaletteCore-BK0cFJb6.js → SearchPaletteCore-9ObzwMtM.js} +1 -1
- package/dist/eval-ui/assets/{SkillDetailPanel-DxidXsNP.js → SkillDetailPanel-8XS9hdR_.js} +1 -1
- package/dist/eval-ui/assets/{UpdateDropdown-CcaDVKoz.js → UpdateDropdown-D0dHy9y-.js} +1 -1
- package/dist/eval-ui/assets/{index-ByzTygib.css → index-CycZyHaL.css} +1 -1
- package/dist/eval-ui/assets/index-DUUmBJUa.js +120 -0
- package/dist/eval-ui/index.html +2 -2
- package/dist/first-run-onboarding.d.ts +10 -0
- package/dist/first-run-onboarding.js +50 -3
- package/dist/first-run-onboarding.js.map +1 -1
- package/package.json +3 -2
- package/dist/eval-ui/assets/index-C12VKBb6.js +0 -110
package/agents.json
CHANGED
|
@@ -7,6 +7,7 @@ export interface ActivationPrompt {
|
|
|
7
7
|
prompt: string;
|
|
8
8
|
expected: "should_activate" | "should_not_activate" | "auto";
|
|
9
9
|
}
|
|
10
|
+
export type Verdict = "ok" | "scope_warning" | "drift_warning";
|
|
10
11
|
export interface ActivationResult {
|
|
11
12
|
prompt: string;
|
|
12
13
|
expected: "should_activate" | "should_not_activate";
|
|
@@ -15,6 +16,7 @@ export interface ActivationResult {
|
|
|
15
16
|
reasoning: string;
|
|
16
17
|
classification: "TP" | "TN" | "FP" | "FN";
|
|
17
18
|
autoClassified?: boolean;
|
|
19
|
+
verdict: Verdict;
|
|
18
20
|
}
|
|
19
21
|
export interface ActivationSummary {
|
|
20
22
|
results: ActivationResult[];
|
|
@@ -26,6 +28,9 @@ export interface ActivationSummary {
|
|
|
26
28
|
tn: number;
|
|
27
29
|
fp: number;
|
|
28
30
|
fn: number;
|
|
31
|
+
scopeWarnings: number;
|
|
32
|
+
driftWarnings: number;
|
|
29
33
|
autoClassifiedCount: number;
|
|
30
34
|
}
|
|
31
35
|
export declare function testActivation(skillDescription: string, prompts: ActivationPrompt[], client: LlmClient, onResult?: (result: ActivationResult) => void, meta?: SkillMeta, onProgress?: (phase: "classifying", index: number, total: number) => void): Promise<ActivationSummary>;
|
|
36
|
+
export declare function computeVerdict(autoClassified: boolean | undefined, expected: "should_activate" | "should_not_activate", actual: boolean): Verdict;
|
|
@@ -79,6 +79,7 @@ Would this user prompt trigger this skill?`;
|
|
|
79
79
|
? json.confidence
|
|
80
80
|
: "low";
|
|
81
81
|
const classification = classifyResult(p.expected, activate);
|
|
82
|
+
const verdict = computeVerdict(p.autoClassified, p.expected, activate);
|
|
82
83
|
const result = {
|
|
83
84
|
prompt: p.prompt,
|
|
84
85
|
expected: p.expected,
|
|
@@ -87,6 +88,7 @@ Would this user prompt trigger this skill?`;
|
|
|
87
88
|
reasoning: String(json.reasoning || ""),
|
|
88
89
|
classification,
|
|
89
90
|
autoClassified: p.autoClassified,
|
|
91
|
+
verdict,
|
|
90
92
|
};
|
|
91
93
|
results.push(result);
|
|
92
94
|
onResult?.(result);
|
|
@@ -100,6 +102,7 @@ Would this user prompt trigger this skill?`;
|
|
|
100
102
|
reasoning: `Error: ${err instanceof Error ? err.message : String(err)}`,
|
|
101
103
|
classification: p.expected === "should_activate" ? "FN" : "TN",
|
|
102
104
|
autoClassified: p.autoClassified,
|
|
105
|
+
verdict: "ok",
|
|
103
106
|
};
|
|
104
107
|
results.push(result);
|
|
105
108
|
onResult?.(result);
|
|
@@ -116,22 +119,39 @@ function classifyResult(expected, actual) {
|
|
|
116
119
|
return "TN";
|
|
117
120
|
return "FP";
|
|
118
121
|
}
|
|
122
|
+
// Auto-classified disagreement → soft warning (not a real FP/FN). Manual labels
|
|
123
|
+
// signal user authority; their disagreements remain strict. See increment 0775.
|
|
124
|
+
export function computeVerdict(autoClassified, expected, actual) {
|
|
125
|
+
if (!autoClassified)
|
|
126
|
+
return "ok";
|
|
127
|
+
if (expected === "should_not_activate" && actual)
|
|
128
|
+
return "scope_warning";
|
|
129
|
+
if (expected === "should_activate" && !actual)
|
|
130
|
+
return "drift_warning";
|
|
131
|
+
return "ok";
|
|
132
|
+
}
|
|
119
133
|
function computeSummary(results) {
|
|
120
|
-
const
|
|
121
|
-
const
|
|
122
|
-
const
|
|
123
|
-
const
|
|
134
|
+
const ok = (r) => r.verdict === "ok";
|
|
135
|
+
const tp = results.filter((r) => r.classification === "TP" && ok(r)).length;
|
|
136
|
+
const tn = results.filter((r) => r.classification === "TN" && ok(r)).length;
|
|
137
|
+
const fp = results.filter((r) => r.classification === "FP" && ok(r)).length;
|
|
138
|
+
const fn = results.filter((r) => r.classification === "FN" && ok(r)).length;
|
|
139
|
+
const scopeWarnings = results.filter((r) => r.verdict === "scope_warning").length;
|
|
140
|
+
const driftWarnings = results.filter((r) => r.verdict === "drift_warning").length;
|
|
124
141
|
const total = results.length;
|
|
142
|
+
const scoredTotal = tp + tn + fp + fn;
|
|
125
143
|
return {
|
|
126
144
|
results,
|
|
127
145
|
precision: tp + fp > 0 ? tp / (tp + fp) : 0,
|
|
128
146
|
recall: tp + fn > 0 ? tp / (tp + fn) : 0,
|
|
129
|
-
reliability:
|
|
147
|
+
reliability: scoredTotal > 0 ? (tp + tn) / scoredTotal : 0,
|
|
130
148
|
total,
|
|
131
149
|
tp,
|
|
132
150
|
tn,
|
|
133
151
|
fp,
|
|
134
152
|
fn,
|
|
153
|
+
scopeWarnings,
|
|
154
|
+
driftWarnings,
|
|
135
155
|
autoClassifiedCount: results.filter((r) => r.autoClassified).length,
|
|
136
156
|
};
|
|
137
157
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"activation-tester.js","sourceRoot":"","sources":["../../src/eval/activation-tester.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4EAA4E;AAC5E,8EAA8E;
|
|
1
|
+
{"version":3,"file":"activation-tester.js","sourceRoot":"","sources":["../../src/eval/activation-tester.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4EAA4E;AAC5E,8EAA8E;AA0C9E,MAAM,wBAAwB,GAAG;;;;;;;;;;;;EAY/B,CAAC;AAEH,MAAM,sBAAsB,GAAG;;;wBAGP,CAAC;AAEzB,8EAA8E;AAC9E,kEAAkE;AAClE,8EAA8E;AAE9E,KAAK,UAAU,mBAAmB,CAChC,IAAe,EACf,MAAc,EACd,MAAiB;IAEjB,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,UAAU,IAAI,CAAC,IAAI,WAAW,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,oBAAoB,MAAM,EAAE,CAAC;QAClG,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,sBAAsB,EAAE,UAAU,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,8BAA8B,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QAC7E,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC,CAAC;QAC9C,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,qBAAqB,CAAC;IAClE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,iBAAiB,CAAC;IAC3B,CAAC;AACH,CAAC;AAYD,KAAK,UAAU,cAAc,CAC3B,OAA2B,EAC3B,MAAiB,EACjB,IAAgB,EAChB,UAAyE;IAEzE,MAAM,QAAQ,GAAqB,EAAE,CAAC;IACtC,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IACjF,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;YAC1B,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,QAAQ,GAAG,MAAM,mBAAmB,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;gBACnE,UAAU,EAAE,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;gBACpD,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;YACtE,CAAC;iBAAM,CAAC;gBACN,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,iBAAiB,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;YACzF,CAAC;QACH,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;QACnF,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,8EAA8E;AAC9E,yDAAyD;AACzD,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,gBAAwB,EACxB,OAA2B,EAC3B,MAAiB,EACjB,QAA6C,EAC7C,IAAgB,EAChB,UAAyE;IAEzE,qCAAqC;IACrC,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC;IAEzE,oDAAoD;IACpD,MAAM,OAAO,GAAuB,EAAE,CAAC;IAEvC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG;EACrB,gBAAgB;;;EAGhB,CAAC,CAAC,MAAM;;2CAEiC,CAAC;QAExC,IAAI,CAAC;YACH,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,wBAAwB,EAAE,UAAU,CAAC,CAAC;YACvF,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,8BAA8B,CAAC,IAAI,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;YACrF,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC,CAAC;YAE9C,MAAM,QAAQ,GAAG,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;YACjC,MAAM,UAAU,GAAG,CAAC,MAAM,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC;gBACpE,CAAC,CAAE,IAAI,CAAC,UAAwC;gBAChD,CAAC,CAAC,KAAK,CAAC;YAEV,MAAM,cAAc,GAAG,cAAc,CAAC,CAAC,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAC5D,MAAM,OAAO,GAAG,cAAc,CAAC,CAAC,CAAC,cAAc,EAAE,CAAC,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAEvE,MAAM,MAAM,GAAqB;gBAC/B,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,QAAQ;gBACR,UAAU;gBACV,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,CAAC;gBACvC,cAAc;gBACd,cAAc,EAAE,CAAC,CAAC,cAAc;gBAChC,OAAO;aACR,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrB,QAAQ,EAAE,CAAC,MAAM,CAAC,CAAC;QACrB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,MAAM,GAAqB;gBAC/B,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,QAAQ,EAAE,KAAK;gBACf,UAAU,EAAE,KAAK;gBACjB,SAAS,EAAE,UAAU,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;gBACvE,cAAc,EAAE,CAAC,CAAC,QAAQ,KAAK,iBAAiB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI;gBAC9D,cAAc,EAAE,CAAC,CAAC,cAAc;gBAChC,OAAO,EAAE,IAAI;aACd,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrB,QAAQ,EAAE,CAAC,MAAM,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,cAAc,CAAC,OAAO,CAAC,CAAC;AACjC,CAAC;AAED,SAAS,cAAc,CACrB,QAAmD,EACnD,MAAe;IAEf,IAAI,QAAQ,KAAK,iBAAiB,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAC1D,IAAI,QAAQ,KAAK,iBAAiB,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAC3D,IAAI,QAAQ,KAAK,qBAAqB,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAC/D,OAAO,IAAI,CAAC;AACd,CAAC;AAED,gFAAgF;AAChF,gFAAgF;AAChF,MAAM,UAAU,cAAc,CAC5B,cAAmC,EACnC,QAAmD,EACnD,MAAe;IAEf,IAAI,CAAC,cAAc;QAAE,OAAO,IAAI,CAAC;IACjC,IAAI,QAAQ,KAAK,qBAAqB,IAAI,MAAM;QAAE,OAAO,eAAe,CAAC;IACzE,IAAI,QAAQ,KAAK,iBAAiB,IAAI,CAAC,MAAM;QAAE,OAAO,eAAe,CAAC;IACtE,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,OAA2B;IACjD,MAAM,EAAE,GAAG,CAAC,CAAmB,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC;IACvD,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,KAAK,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC5E,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,KAAK,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC5E,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,KAAK,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC5E,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,KAAK,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC5E,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,eAAe,CAAC,CAAC,MAAM,CAAC;IAClF,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,eAAe,CAAC,CAAC,MAAM,CAAC;IAClF,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;IAC7B,MAAM,WAAW,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;IAEtC,OAAO;QACL,OAAO;QACP,SAAS,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACxC,WAAW,EAAE,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC1D,KAAK;QACL,EAAE;QACF,EAAE;QACF,EAAE;QACF,EAAE;QACF,aAAa;QACb,aAAa;QACb,mBAAmB,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,MAAM;KACpE,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export type TestCaseExpected = "should_activate" | "should_not_activate" | "auto";
|
|
2
|
+
export interface ParsedTestCase {
|
|
3
|
+
prompt: string;
|
|
4
|
+
expected: TestCaseExpected;
|
|
5
|
+
}
|
|
6
|
+
export declare function parseTestCases(content: string): ParsedTestCase[];
|
|
7
|
+
export declare function serializeTestCases(prompts: ParsedTestCase[]): string;
|
|
8
|
+
export declare function upsertTestCasesIntoSkillMd(content: string, prompts: ParsedTestCase[]): string;
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// test-case-parser.ts — author-anchored activation-test fixtures in SKILL.md
|
|
3
|
+
//
|
|
4
|
+
// Ports the `## Test Cases` parser from vskill-platform's
|
|
5
|
+
// src/lib/eval/prompt-generator.ts:22-48 (parseAuthorTestCases) and adds a
|
|
6
|
+
// matching writer + upsert helper. The shape is intentionally identical to the
|
|
7
|
+
// platform's so a single SKILL.md can be consumed by both systems.
|
|
8
|
+
//
|
|
9
|
+
// See increment 0776 for the why.
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
const SECTION_RE = /## Test Cases\s*\n([\s\S]*?)(?=\n## |\n---|\n$|$)/i;
|
|
12
|
+
const PAIR_RE = /-\s*Prompt:\s*"([^"]+)"\s*\n\s*Expected:\s*"([^"]+)"/gi;
|
|
13
|
+
export function parseTestCases(content) {
|
|
14
|
+
if (!content)
|
|
15
|
+
return [];
|
|
16
|
+
const sectionMatch = content.match(SECTION_RE);
|
|
17
|
+
if (!sectionMatch)
|
|
18
|
+
return [];
|
|
19
|
+
const section = sectionMatch[1];
|
|
20
|
+
const cases = [];
|
|
21
|
+
// Reset lastIndex via a fresh regex each call to keep this function pure
|
|
22
|
+
const pair = new RegExp(PAIR_RE.source, "gi");
|
|
23
|
+
let m;
|
|
24
|
+
while ((m = pair.exec(section)) !== null) {
|
|
25
|
+
cases.push({ prompt: m[1], expected: textToExpected(m[2]) });
|
|
26
|
+
}
|
|
27
|
+
return cases;
|
|
28
|
+
}
|
|
29
|
+
export function serializeTestCases(prompts) {
|
|
30
|
+
if (prompts.length === 0)
|
|
31
|
+
return "";
|
|
32
|
+
const lines = prompts.map((p) => `- Prompt: "${p.prompt}"\n Expected: "${expectedToText(p.expected)}"`);
|
|
33
|
+
return `## Test Cases\n\n${lines.join("\n")}\n`;
|
|
34
|
+
}
|
|
35
|
+
// Replace-or-append the `## Test Cases` block. Empty prompts → remove the
|
|
36
|
+
// section entirely (keeps SKILL.md clean when the author clears fixtures).
|
|
37
|
+
export function upsertTestCasesIntoSkillMd(content, prompts) {
|
|
38
|
+
const trimmed = content.replace(/\s+$/, "");
|
|
39
|
+
const hasSection = SECTION_RE.test(trimmed);
|
|
40
|
+
if (prompts.length === 0) {
|
|
41
|
+
if (!hasSection)
|
|
42
|
+
return content;
|
|
43
|
+
return removeSection(trimmed) + "\n";
|
|
44
|
+
}
|
|
45
|
+
const block = serializeTestCases(prompts).trimEnd();
|
|
46
|
+
if (hasSection) {
|
|
47
|
+
return trimmed.replace(SECTION_RE, block) + "\n";
|
|
48
|
+
}
|
|
49
|
+
return trimmed + "\n\n" + block + "\n";
|
|
50
|
+
}
|
|
51
|
+
function removeSection(content) {
|
|
52
|
+
// Match the heading + body + trailing whitespace up to the next section
|
|
53
|
+
// boundary, then collapse the gap to a single blank line.
|
|
54
|
+
return content
|
|
55
|
+
.replace(/\n*## Test Cases\s*\n[\s\S]*?(?=\n## |\n---|\n$|$)/i, "")
|
|
56
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
57
|
+
.replace(/\s+$/, "");
|
|
58
|
+
}
|
|
59
|
+
function textToExpected(raw) {
|
|
60
|
+
const norm = raw.trim().toLowerCase();
|
|
61
|
+
if (norm === "should activate")
|
|
62
|
+
return "should_activate";
|
|
63
|
+
if (norm === "should not activate")
|
|
64
|
+
return "should_not_activate";
|
|
65
|
+
return "auto";
|
|
66
|
+
}
|
|
67
|
+
function expectedToText(expected) {
|
|
68
|
+
if (expected === "should_activate")
|
|
69
|
+
return "should activate";
|
|
70
|
+
if (expected === "should_not_activate")
|
|
71
|
+
return "should not activate";
|
|
72
|
+
return "auto";
|
|
73
|
+
}
|
|
74
|
+
//# sourceMappingURL=test-case-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test-case-parser.js","sourceRoot":"","sources":["../../src/eval/test-case-parser.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,6EAA6E;AAC7E,EAAE;AACF,0DAA0D;AAC1D,2EAA2E;AAC3E,+EAA+E;AAC/E,mEAAmE;AACnE,EAAE;AACF,kCAAkC;AAClC,8EAA8E;AAS9E,MAAM,UAAU,GAAG,oDAAoD,CAAC;AACxE,MAAM,OAAO,GAAG,wDAAwD,CAAC;AAEzE,MAAM,UAAU,cAAc,CAAC,OAAe;IAC5C,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IACxB,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAC/C,IAAI,CAAC,YAAY;QAAE,OAAO,EAAE,CAAC;IAC7B,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;IAEhC,MAAM,KAAK,GAAqB,EAAE,CAAC;IACnC,yEAAyE;IACzE,MAAM,IAAI,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAC9C,IAAI,CAAyB,CAAC;IAC9B,OAAO,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACzC,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC/D,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,OAAyB;IAC1D,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CACvB,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,MAAM,mBAAmB,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAC9E,CAAC;IACF,OAAO,oBAAoB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;AAClD,CAAC;AAED,0EAA0E;AAC1E,2EAA2E;AAC3E,MAAM,UAAU,0BAA0B,CACxC,OAAe,EACf,OAAyB;IAEzB,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAC5C,MAAM,UAAU,GAAG,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAE5C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU;YAAE,OAAO,OAAO,CAAC;QAChC,OAAO,aAAa,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC;IACvC,CAAC;IAED,MAAM,KAAK,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,CAAC;IACpD,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,IAAI,CAAC;IACnD,CAAC;IACD,OAAO,OAAO,GAAG,MAAM,GAAG,KAAK,GAAG,IAAI,CAAC;AACzC,CAAC;AAED,SAAS,aAAa,CAAC,OAAe;IACpC,wEAAwE;IACxE,0DAA0D;IAC1D,OAAO,OAAO;SACX,OAAO,CAAC,qDAAqD,EAAE,EAAE,CAAC;SAClE,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1B,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,cAAc,CAAC,GAAW;IACjC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACtC,IAAI,IAAI,KAAK,iBAAiB;QAAE,OAAO,iBAAiB,CAAC;IACzD,IAAI,IAAI,KAAK,qBAAqB;QAAE,OAAO,qBAAqB,CAAC;IACjE,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,cAAc,CAAC,QAA0B;IAChD,IAAI,QAAQ,KAAK,iBAAiB;QAAE,OAAO,iBAAiB,CAAC;IAC7D,IAAI,QAAQ,KAAK,qBAAqB;QAAE,OAAO,qBAAqB,CAAC;IACrE,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -28,6 +28,7 @@ export interface AgentScopeEntry {
|
|
|
28
28
|
isDefault: boolean;
|
|
29
29
|
localSkillCount: number;
|
|
30
30
|
globalSkillCount: number;
|
|
31
|
+
pluginSkillCount: number;
|
|
31
32
|
resolvedLocalDir: string;
|
|
32
33
|
resolvedGlobalDir: string;
|
|
33
34
|
lastSync: string | null;
|
|
@@ -111,6 +112,40 @@ export declare function parseSkillFrontmatter(content: string): Record<string, s
|
|
|
111
112
|
* for `origin="source"` or if no registry entry matches.
|
|
112
113
|
*/
|
|
113
114
|
export declare function deriveSourceAgent(skillDir: string, root: string, origin: "source" | "installed"): string | null;
|
|
115
|
+
/**
|
|
116
|
+
* 0770 — Pure regex parser. Normalizes any github.com origin remote
|
|
117
|
+
* (SSH, HTTPS, ssh://) to its canonical `https://github.com/owner/repo`
|
|
118
|
+
* form (no `.git` suffix, no trailing path). Returns null for non-github
|
|
119
|
+
* hosts, malformed input, empty/whitespace strings.
|
|
120
|
+
*/
|
|
121
|
+
export declare function parseGithubRemote(remote: string | null | undefined): string | null;
|
|
122
|
+
/**
|
|
123
|
+
* 0770 — Walk parent directories from `startDir` looking for a `.git` entry
|
|
124
|
+
* (directory OR file — git worktrees use a `.git` file). Bails at the
|
|
125
|
+
* filesystem root or after `maxLevels` iterations. Returns the absolute
|
|
126
|
+
* path of the discovered git root, or null.
|
|
127
|
+
*/
|
|
128
|
+
export declare function walkUpForGitRoot(startDir: string, maxLevels?: number): string | null;
|
|
129
|
+
/**
|
|
130
|
+
* 0770 — Test-only helper to clear the module-level memoization cache so
|
|
131
|
+
* tests can isolate detection runs across `beforeEach`.
|
|
132
|
+
*/
|
|
133
|
+
export declare function resetAuthoredSourceLinkCache(): void;
|
|
134
|
+
/**
|
|
135
|
+
* 0770 — Detect source-repo provenance for a locally-authored skill (no
|
|
136
|
+
* lockfile entry). Walks for `.git`, reads `origin` remote, normalizes via
|
|
137
|
+
* `parseGithubRemote`, and computes `skillPath` from `git ls-files` (with a
|
|
138
|
+
* filesystem fallback for untracked SKILL.md files). Memoized per absolute
|
|
139
|
+
* skill dir for the eval-server process lifetime.
|
|
140
|
+
*
|
|
141
|
+
* All git invocations use `execFileSync` with explicit argv (no shell), a
|
|
142
|
+
* 1500ms hard timeout, and silenced stderr. Any error converts to
|
|
143
|
+
* `{null, null}` — `buildSkillMetadata` never throws because of git.
|
|
144
|
+
*/
|
|
145
|
+
export declare function detectAuthoredSourceLink(skillDir: string): {
|
|
146
|
+
repoUrl: string | null;
|
|
147
|
+
skillPath: string | null;
|
|
148
|
+
};
|
|
114
149
|
/**
|
|
115
150
|
* Build the T-025 metadata payload for a single skill. Reads SKILL.md from
|
|
116
151
|
* disk if present; returns EMPTY_METADATA on any error so the /api/skills
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
// ---------------------------------------------------------------------------
|
|
4
4
|
import { readFileSync, writeFileSync, mkdirSync, existsSync, readdirSync, statSync } from "node:fs";
|
|
5
5
|
import { execSync, execFileSync } from "node:child_process";
|
|
6
|
-
import { join, resolve, dirname, basename } from "node:path";
|
|
6
|
+
import { join, resolve, dirname, basename, relative } from "node:path";
|
|
7
7
|
import { homedir } from "node:os";
|
|
8
8
|
import { sendJson, readBody } from "./router.js";
|
|
9
9
|
import { initSSE, sendSSE, sendSSEDone, withHeartbeat, startDynamicHeartbeat } from "./sse-helpers.js";
|
|
@@ -32,6 +32,7 @@ import { computeVerdict } from "../eval/verdict.js";
|
|
|
32
32
|
import { generateActionItems } from "../eval/action-items.js";
|
|
33
33
|
import { buildEvalInitPrompt, parseGeneratedEvals, buildIntegrationEvalPrompt, parseGeneratedIntegrationEvals, detectBrowserRequirements, detectPlatformTargets } from "../eval/prompt-builder.js";
|
|
34
34
|
import { testActivation } from "../eval/activation-tester.js";
|
|
35
|
+
import { parseTestCases, upsertTestCasesIntoSkillMd, } from "../eval/test-case-parser.js";
|
|
35
36
|
import { detectMcpDependencies, detectSkillDependencies } from "../eval/mcp-detector.js";
|
|
36
37
|
import { writeActivationRun, listActivationRuns, getActivationRun } from "../eval/activation-history.js";
|
|
37
38
|
import { AGENTS_REGISTRY, detectInstalledAgents } from "../agents/agents-registry.js";
|
|
@@ -122,6 +123,14 @@ export async function buildAgentsResponse(opts) {
|
|
|
122
123
|
agentPresenceCache.binariesKey === cacheKey.binariesKey) {
|
|
123
124
|
return agentPresenceCache.data;
|
|
124
125
|
}
|
|
126
|
+
// 0772 US-002: count plugin skills once for claude-code. The plugin scanner
|
|
127
|
+
// walks ~/.claude/plugins/cache/<marketplace>/<plugin>/<version>/skills/, so
|
|
128
|
+
// the result is independent of agent identity (plugins are CC-only by
|
|
129
|
+
// current registry design). Pass `home` so tests can override the homedir.
|
|
130
|
+
const claudePluginCount = scanInstalledPluginSkills({
|
|
131
|
+
agentId: "claude-code",
|
|
132
|
+
home,
|
|
133
|
+
}).length;
|
|
125
134
|
// Map each agent → resolved local + global dir. For tests, `home` overrides
|
|
126
135
|
// the homedir-derived global path. In production, resolveGlobalSkillsDir()
|
|
127
136
|
// handles cross-platform resolution (darwin / linux / win32).
|
|
@@ -167,6 +176,7 @@ export async function buildAgentsResponse(opts) {
|
|
|
167
176
|
isDefault,
|
|
168
177
|
localSkillCount,
|
|
169
178
|
globalSkillCount,
|
|
179
|
+
pluginSkillCount: agent.id === "claude-code" ? claudePluginCount : 0,
|
|
170
180
|
resolvedLocalDir,
|
|
171
181
|
resolvedGlobalDir,
|
|
172
182
|
lastSync,
|
|
@@ -597,6 +607,116 @@ export function deriveSourceAgent(skillDir, root, origin) {
|
|
|
597
607
|
}
|
|
598
608
|
return null;
|
|
599
609
|
}
|
|
610
|
+
/**
|
|
611
|
+
* 0770 — Pure regex parser. Normalizes any github.com origin remote
|
|
612
|
+
* (SSH, HTTPS, ssh://) to its canonical `https://github.com/owner/repo`
|
|
613
|
+
* form (no `.git` suffix, no trailing path). Returns null for non-github
|
|
614
|
+
* hosts, malformed input, empty/whitespace strings.
|
|
615
|
+
*/
|
|
616
|
+
export function parseGithubRemote(remote) {
|
|
617
|
+
const trimmed = (remote ?? "").trim();
|
|
618
|
+
if (!trimmed)
|
|
619
|
+
return null;
|
|
620
|
+
// SSH: git@github.com:owner/repo[.git]
|
|
621
|
+
let m = /^git@github\.com:([^/\s]+)\/([^/\s]+?)(?:\.git)?$/.exec(trimmed);
|
|
622
|
+
if (m)
|
|
623
|
+
return `https://github.com/${m[1]}/${m[2]}`;
|
|
624
|
+
// ssh://git@github.com/owner/repo[.git]
|
|
625
|
+
m = /^ssh:\/\/git@github\.com\/([^/\s]+)\/([^/\s]+?)(?:\.git)?$/.exec(trimmed);
|
|
626
|
+
if (m)
|
|
627
|
+
return `https://github.com/${m[1]}/${m[2]}`;
|
|
628
|
+
// http(s)://github.com/owner/repo[.git][/...]
|
|
629
|
+
m = /^https?:\/\/github\.com\/([^/\s]+)\/([^/\s?#]+?)(?:\.git)?(?:[/?#].*)?$/.exec(trimmed);
|
|
630
|
+
if (m)
|
|
631
|
+
return `https://github.com/${m[1]}/${m[2]}`;
|
|
632
|
+
return null;
|
|
633
|
+
}
|
|
634
|
+
/**
|
|
635
|
+
* 0770 — Walk parent directories from `startDir` looking for a `.git` entry
|
|
636
|
+
* (directory OR file — git worktrees use a `.git` file). Bails at the
|
|
637
|
+
* filesystem root or after `maxLevels` iterations. Returns the absolute
|
|
638
|
+
* path of the discovered git root, or null.
|
|
639
|
+
*/
|
|
640
|
+
export function walkUpForGitRoot(startDir, maxLevels = 12) {
|
|
641
|
+
let current = resolve(startDir);
|
|
642
|
+
for (let i = 0; i < maxLevels; i++) {
|
|
643
|
+
if (existsSync(join(current, ".git")))
|
|
644
|
+
return current;
|
|
645
|
+
const parent = dirname(current);
|
|
646
|
+
if (parent === current)
|
|
647
|
+
return null;
|
|
648
|
+
current = parent;
|
|
649
|
+
}
|
|
650
|
+
return null;
|
|
651
|
+
}
|
|
652
|
+
const authoredSourceLinkCache = new Map();
|
|
653
|
+
/**
|
|
654
|
+
* 0770 — Test-only helper to clear the module-level memoization cache so
|
|
655
|
+
* tests can isolate detection runs across `beforeEach`.
|
|
656
|
+
*/
|
|
657
|
+
export function resetAuthoredSourceLinkCache() {
|
|
658
|
+
authoredSourceLinkCache.clear();
|
|
659
|
+
}
|
|
660
|
+
/**
|
|
661
|
+
* 0770 — Detect source-repo provenance for a locally-authored skill (no
|
|
662
|
+
* lockfile entry). Walks for `.git`, reads `origin` remote, normalizes via
|
|
663
|
+
* `parseGithubRemote`, and computes `skillPath` from `git ls-files` (with a
|
|
664
|
+
* filesystem fallback for untracked SKILL.md files). Memoized per absolute
|
|
665
|
+
* skill dir for the eval-server process lifetime.
|
|
666
|
+
*
|
|
667
|
+
* All git invocations use `execFileSync` with explicit argv (no shell), a
|
|
668
|
+
* 1500ms hard timeout, and silenced stderr. Any error converts to
|
|
669
|
+
* `{null, null}` — `buildSkillMetadata` never throws because of git.
|
|
670
|
+
*/
|
|
671
|
+
export function detectAuthoredSourceLink(skillDir) {
|
|
672
|
+
const absDir = resolve(skillDir);
|
|
673
|
+
const cached = authoredSourceLinkCache.get(absDir);
|
|
674
|
+
if (cached)
|
|
675
|
+
return cached;
|
|
676
|
+
const compute = () => {
|
|
677
|
+
const gitRoot = walkUpForGitRoot(absDir);
|
|
678
|
+
if (!gitRoot)
|
|
679
|
+
return { repoUrl: null, skillPath: null };
|
|
680
|
+
let remote = "";
|
|
681
|
+
try {
|
|
682
|
+
remote = execFileSync("git", ["config", "--get", "remote.origin.url"], {
|
|
683
|
+
cwd: gitRoot,
|
|
684
|
+
timeout: 1500,
|
|
685
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
686
|
+
encoding: "utf-8",
|
|
687
|
+
}).trim();
|
|
688
|
+
}
|
|
689
|
+
catch {
|
|
690
|
+
return { repoUrl: null, skillPath: null };
|
|
691
|
+
}
|
|
692
|
+
const repoUrl = parseGithubRemote(remote);
|
|
693
|
+
if (!repoUrl)
|
|
694
|
+
return { repoUrl: null, skillPath: null };
|
|
695
|
+
let skillPath = null;
|
|
696
|
+
try {
|
|
697
|
+
const tracked = execFileSync("git", ["ls-files", "--full-name", "SKILL.md"], {
|
|
698
|
+
cwd: absDir,
|
|
699
|
+
timeout: 1500,
|
|
700
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
701
|
+
encoding: "utf-8",
|
|
702
|
+
}).trim();
|
|
703
|
+
if (tracked)
|
|
704
|
+
skillPath = tracked;
|
|
705
|
+
}
|
|
706
|
+
catch {
|
|
707
|
+
// fall through to filesystem fallback
|
|
708
|
+
}
|
|
709
|
+
if (!skillPath) {
|
|
710
|
+
// Filesystem fallback for untracked SKILL.md — same path the file will
|
|
711
|
+
// have on github.com once committed and pushed.
|
|
712
|
+
skillPath = relative(gitRoot, join(absDir, "SKILL.md")).replace(/\\/g, "/");
|
|
713
|
+
}
|
|
714
|
+
return { repoUrl, skillPath };
|
|
715
|
+
};
|
|
716
|
+
const result = compute();
|
|
717
|
+
authoredSourceLinkCache.set(absDir, result);
|
|
718
|
+
return result;
|
|
719
|
+
}
|
|
600
720
|
/**
|
|
601
721
|
* 0737 — Resolve the source-repo provenance (repoUrl + skillPath) for a
|
|
602
722
|
* skill by looking up its lockfile entry. Two precedences:
|
|
@@ -608,19 +728,20 @@ export function deriveSourceAgent(skillDir, root, origin) {
|
|
|
608
728
|
* skill dir basename and the lockfile key differ — fall back to the parent
|
|
609
729
|
* directory's basename when no exact match exists.
|
|
610
730
|
*
|
|
611
|
-
*
|
|
612
|
-
* `
|
|
613
|
-
*
|
|
731
|
+
* 0770 — When no lockfile entry resolves provenance, fall through to
|
|
732
|
+
* `detectAuthoredSourceLink` which inspects the parent git repo's origin
|
|
733
|
+
* remote. Lockfile-derived values still take precedence to preserve
|
|
734
|
+
* install-time provenance when the workspace itself is a git repo.
|
|
614
735
|
*/
|
|
615
736
|
function resolveSourceLink(skillDir, root) {
|
|
616
737
|
const lock = readLockfile(root);
|
|
617
738
|
if (!lock)
|
|
618
|
-
return
|
|
739
|
+
return detectAuthoredSourceLink(skillDir);
|
|
619
740
|
const skillName = basename(skillDir);
|
|
620
741
|
const parentName = basename(dirname(skillDir));
|
|
621
742
|
const entry = lock.skills[skillName] ?? lock.skills[parentName];
|
|
622
743
|
if (!entry)
|
|
623
|
-
return
|
|
744
|
+
return detectAuthoredSourceLink(skillDir);
|
|
624
745
|
if (entry.sourceRepoUrl) {
|
|
625
746
|
return {
|
|
626
747
|
repoUrl: entry.sourceRepoUrl,
|
|
@@ -636,6 +757,9 @@ function resolveSourceLink(skillDir, root) {
|
|
|
636
757
|
// copy-chip (local path); a fresh `vskill add` writes the explicit
|
|
637
758
|
// `sourceSkillPath` and restores the working anchor via the branch above.
|
|
638
759
|
const m = /^github:([^/]+)\/([^/#]+)/.exec(entry.source ?? "");
|
|
760
|
+
// 0770: do NOT fall through here — an installed skill with a non-github
|
|
761
|
+
// `source` (e.g. `marketplace:...`) is still installed, not authored. Local
|
|
762
|
+
// git detection would leak the workspace remote (umbrella, etc.).
|
|
639
763
|
if (!m)
|
|
640
764
|
return { repoUrl: null, skillPath: null };
|
|
641
765
|
return {
|
|
@@ -2836,6 +2960,46 @@ export function registerRoutes(router, root, projectName) {
|
|
|
2836
2960
|
sendSSEDone(res, { error: err instanceof Error ? err.message : String(err) });
|
|
2837
2961
|
}
|
|
2838
2962
|
});
|
|
2963
|
+
// GET parsed `## Test Cases` block from SKILL.md (increment 0776)
|
|
2964
|
+
router.get("/api/skills/:plugin/:skill/test-cases", (req, res, params) => {
|
|
2965
|
+
const skillDir = resolveSkillDir(root, params.plugin, params.skill);
|
|
2966
|
+
const skillMdPath = join(skillDir, "SKILL.md");
|
|
2967
|
+
const content = existsSync(skillMdPath) ? readFileSync(skillMdPath, "utf-8") : "";
|
|
2968
|
+
const prompts = parseTestCases(content);
|
|
2969
|
+
sendJson(res, { prompts, source: prompts.length > 0 ? "skill-md" : null }, 200, req);
|
|
2970
|
+
});
|
|
2971
|
+
// PUT — upsert the `## Test Cases` block in SKILL.md (increment 0776).
|
|
2972
|
+
// Empty prompts array removes the section. Frontmatter and other body
|
|
2973
|
+
// sections are preserved verbatim.
|
|
2974
|
+
router.put("/api/skills/:plugin/:skill/test-cases", async (req, res, params) => {
|
|
2975
|
+
const skillDir = resolveSkillDir(root, params.plugin, params.skill);
|
|
2976
|
+
const skillMdPath = join(skillDir, "SKILL.md");
|
|
2977
|
+
const body = (await readBody(req));
|
|
2978
|
+
if (!body || !Array.isArray(body.prompts)) {
|
|
2979
|
+
sendJson(res, { ok: false, error: "Body must be { prompts: ParsedTestCase[] }" }, 400, req);
|
|
2980
|
+
return;
|
|
2981
|
+
}
|
|
2982
|
+
const allowed = ["should_activate", "should_not_activate", "auto"];
|
|
2983
|
+
for (const p of body.prompts) {
|
|
2984
|
+
if (!p || typeof p.prompt !== "string" || p.prompt.length === 0) {
|
|
2985
|
+
sendJson(res, { ok: false, error: "Each prompt must have a non-empty string prompt" }, 400, req);
|
|
2986
|
+
return;
|
|
2987
|
+
}
|
|
2988
|
+
if (p.prompt.includes('"')) {
|
|
2989
|
+
sendJson(res, { ok: false, error: 'Prompt strings may not contain double quotes (got: ' + p.prompt + ")" }, 400, req);
|
|
2990
|
+
return;
|
|
2991
|
+
}
|
|
2992
|
+
if (!allowed.includes(p.expected)) {
|
|
2993
|
+
sendJson(res, { ok: false, error: "expected must be one of: " + allowed.join(", ") }, 400, req);
|
|
2994
|
+
return;
|
|
2995
|
+
}
|
|
2996
|
+
}
|
|
2997
|
+
const existing = existsSync(skillMdPath) ? readFileSync(skillMdPath, "utf-8") : "";
|
|
2998
|
+
const updated = upsertTestCasesIntoSkillMd(existing, body.prompts);
|
|
2999
|
+
mkdirSync(dirname(skillMdPath), { recursive: true });
|
|
3000
|
+
writeFileSync(skillMdPath, updated, "utf-8");
|
|
3001
|
+
sendJson(res, { ok: true, count: body.prompts.length }, 200, req);
|
|
3002
|
+
});
|
|
2839
3003
|
// AI-generate activation test prompts (SSE)
|
|
2840
3004
|
router.post("/api/skills/:plugin/:skill/activation-prompts", async (req, res, params) => {
|
|
2841
3005
|
const skillDir = resolveSkillDir(root, params.plugin, params.skill);
|