skilltest 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +103 -0
- package/LICENSE +21 -0
- package/README.md +326 -0
- package/dist/index.js +1626 -0
- package/dist/index.js.map +1 -0
- package/package.json +51 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,1626 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// src/index.ts
|
|
4
|
+
import fs5 from "node:fs";
|
|
5
|
+
import path5 from "node:path";
|
|
6
|
+
import { fileURLToPath } from "node:url";
|
|
7
|
+
import { Command } from "commander";
|
|
8
|
+
|
|
9
|
+
// src/core/skill-parser.ts
|
|
10
|
+
import fs from "node:fs/promises";
|
|
11
|
+
import path from "node:path";
|
|
12
|
+
import matter from "gray-matter";
|
|
13
|
+
import yaml from "js-yaml";
|
|
14
|
+
import { z } from "zod";
|
|
15
|
+
var frontmatterSchema = z.object({
|
|
16
|
+
name: z.string(),
|
|
17
|
+
description: z.string(),
|
|
18
|
+
license: z.string().optional()
|
|
19
|
+
}).passthrough();
|
|
20
|
+
var FRONTMATTER_BLOCK_REGEX = /^---\r?\n([\s\S]*?)\r?\n---\r?\n?/;
|
|
21
|
+
async function resolveSkillPath(inputPath) {
|
|
22
|
+
const absoluteInput = path.resolve(inputPath);
|
|
23
|
+
let stat;
|
|
24
|
+
try {
|
|
25
|
+
stat = await fs.stat(absoluteInput);
|
|
26
|
+
} catch {
|
|
27
|
+
throw new Error(`Path not found: ${inputPath}`);
|
|
28
|
+
}
|
|
29
|
+
if (stat.isDirectory()) {
|
|
30
|
+
const skillFile = path.join(absoluteInput, "SKILL.md");
|
|
31
|
+
try {
|
|
32
|
+
const skillStat = await fs.stat(skillFile);
|
|
33
|
+
if (!skillStat.isFile()) {
|
|
34
|
+
throw new Error();
|
|
35
|
+
}
|
|
36
|
+
} catch {
|
|
37
|
+
throw new Error(`No SKILL.md found in directory: ${inputPath}`);
|
|
38
|
+
}
|
|
39
|
+
return { skillRoot: absoluteInput, skillFile };
|
|
40
|
+
}
|
|
41
|
+
if (!stat.isFile()) {
|
|
42
|
+
throw new Error(`Path is not a file or directory: ${inputPath}`);
|
|
43
|
+
}
|
|
44
|
+
if (path.basename(absoluteInput) !== "SKILL.md") {
|
|
45
|
+
throw new Error(`Expected SKILL.md or a directory containing SKILL.md. Received: ${inputPath}`);
|
|
46
|
+
}
|
|
47
|
+
return { skillRoot: path.dirname(absoluteInput), skillFile: absoluteInput };
|
|
48
|
+
}
|
|
49
|
+
async function loadSkillFile(inputPath) {
|
|
50
|
+
const { skillRoot, skillFile } = await resolveSkillPath(inputPath);
|
|
51
|
+
const raw = await fs.readFile(skillFile, "utf8");
|
|
52
|
+
const lineCount = raw === "" ? 0 : raw.split(/\r?\n/).length;
|
|
53
|
+
return { skillRoot, skillFile, raw, lineCount };
|
|
54
|
+
}
|
|
55
|
+
function parseFrontmatter(rawSkill) {
|
|
56
|
+
const blockMatch = rawSkill.match(FRONTMATTER_BLOCK_REGEX);
|
|
57
|
+
const rawFrontmatter = blockMatch?.[1] ?? null;
|
|
58
|
+
if (!rawFrontmatter) {
|
|
59
|
+
return {
|
|
60
|
+
hasFrontmatter: false,
|
|
61
|
+
rawFrontmatter: null,
|
|
62
|
+
data: null,
|
|
63
|
+
content: rawSkill,
|
|
64
|
+
error: null
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
try {
|
|
68
|
+
const parsedByYaml = yaml.load(rawFrontmatter);
|
|
69
|
+
if (parsedByYaml === null || typeof parsedByYaml !== "object" || Array.isArray(parsedByYaml)) {
|
|
70
|
+
return {
|
|
71
|
+
hasFrontmatter: true,
|
|
72
|
+
rawFrontmatter,
|
|
73
|
+
data: null,
|
|
74
|
+
content: rawSkill.replace(FRONTMATTER_BLOCK_REGEX, ""),
|
|
75
|
+
error: "Frontmatter must parse into a YAML object."
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
const parsedByMatter = matter(rawSkill);
|
|
79
|
+
return {
|
|
80
|
+
hasFrontmatter: true,
|
|
81
|
+
rawFrontmatter,
|
|
82
|
+
data: parsedByMatter.data,
|
|
83
|
+
content: parsedByMatter.content,
|
|
84
|
+
error: null
|
|
85
|
+
};
|
|
86
|
+
} catch (error) {
|
|
87
|
+
const message = error instanceof Error ? error.message : "Unknown frontmatter parse error";
|
|
88
|
+
return {
|
|
89
|
+
hasFrontmatter: true,
|
|
90
|
+
rawFrontmatter,
|
|
91
|
+
data: null,
|
|
92
|
+
content: rawSkill.replace(FRONTMATTER_BLOCK_REGEX, ""),
|
|
93
|
+
error: message
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
async function parseSkillStrict(inputPath) {
|
|
98
|
+
const skillContext = await loadSkillFile(inputPath);
|
|
99
|
+
const parsedFrontmatter = parseFrontmatter(skillContext.raw);
|
|
100
|
+
if (!parsedFrontmatter.hasFrontmatter) {
|
|
101
|
+
throw new Error("SKILL.md is missing YAML frontmatter.");
|
|
102
|
+
}
|
|
103
|
+
if (parsedFrontmatter.error) {
|
|
104
|
+
throw new Error(`Invalid frontmatter: ${parsedFrontmatter.error}`);
|
|
105
|
+
}
|
|
106
|
+
const validation = frontmatterSchema.safeParse(parsedFrontmatter.data ?? {});
|
|
107
|
+
if (!validation.success) {
|
|
108
|
+
const issue = validation.error.issues[0];
|
|
109
|
+
throw new Error(`Invalid frontmatter field '${issue.path.join(".")}': ${issue.message}`);
|
|
110
|
+
}
|
|
111
|
+
return {
|
|
112
|
+
skillRoot: skillContext.skillRoot,
|
|
113
|
+
skillFile: skillContext.skillFile,
|
|
114
|
+
raw: skillContext.raw,
|
|
115
|
+
content: parsedFrontmatter.content,
|
|
116
|
+
frontmatterRaw: parsedFrontmatter.rawFrontmatter,
|
|
117
|
+
frontmatter: validation.data
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
var RELATIVE_LINK_PREFIXES = ["./", "../", "scripts/", "references/", "assets/"];
|
|
121
|
+
function extractRelativeFileReferences(markdown) {
|
|
122
|
+
const references = /* @__PURE__ */ new Set();
|
|
123
|
+
const markdownLinkRegex = /\[[^\]]+\]\(([^)]+)\)/g;
|
|
124
|
+
for (const match of markdown.matchAll(markdownLinkRegex)) {
|
|
125
|
+
const rawTarget = (match[1] ?? "").trim();
|
|
126
|
+
const cleaned = cleanReferenceTarget(rawTarget);
|
|
127
|
+
if (cleaned && isLikelyRelativePath(cleaned)) {
|
|
128
|
+
references.add(cleaned);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
const inlineCodeRegex = /`([^`]+)`/g;
|
|
132
|
+
for (const match of markdown.matchAll(inlineCodeRegex)) {
|
|
133
|
+
const candidate = (match[1] ?? "").trim();
|
|
134
|
+
if (isLikelyRelativePath(candidate)) {
|
|
135
|
+
references.add(cleanReferenceTarget(candidate));
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
const barePathRegex = /\b(?:scripts|references|assets)\/[A-Za-z0-9._\-/]+/g;
|
|
139
|
+
for (const match of markdown.matchAll(barePathRegex)) {
|
|
140
|
+
const candidate = match[0];
|
|
141
|
+
if (candidate) {
|
|
142
|
+
references.add(cleanReferenceTarget(candidate));
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return Array.from(references);
|
|
146
|
+
}
|
|
147
|
+
function cleanReferenceTarget(target) {
|
|
148
|
+
if (!target) {
|
|
149
|
+
return null;
|
|
150
|
+
}
|
|
151
|
+
let cleaned = target.trim();
|
|
152
|
+
if (cleaned.startsWith("<") && cleaned.endsWith(">")) {
|
|
153
|
+
cleaned = cleaned.slice(1, -1).trim();
|
|
154
|
+
}
|
|
155
|
+
if (cleaned === "" || cleaned.startsWith("#")) {
|
|
156
|
+
return null;
|
|
157
|
+
}
|
|
158
|
+
if (/^(https?:|mailto:|tel:)/i.test(cleaned)) {
|
|
159
|
+
return null;
|
|
160
|
+
}
|
|
161
|
+
const hashIndex = cleaned.indexOf("#");
|
|
162
|
+
if (hashIndex >= 0) {
|
|
163
|
+
cleaned = cleaned.slice(0, hashIndex).trim();
|
|
164
|
+
}
|
|
165
|
+
return cleaned || null;
|
|
166
|
+
}
|
|
167
|
+
function isLikelyRelativePath(candidate) {
|
|
168
|
+
if (!candidate) {
|
|
169
|
+
return false;
|
|
170
|
+
}
|
|
171
|
+
if (candidate.startsWith("/")) {
|
|
172
|
+
return false;
|
|
173
|
+
}
|
|
174
|
+
if (/^[A-Za-z]:\\/.test(candidate)) {
|
|
175
|
+
return false;
|
|
176
|
+
}
|
|
177
|
+
if (/^(https?:|mailto:|tel:)/i.test(candidate)) {
|
|
178
|
+
return false;
|
|
179
|
+
}
|
|
180
|
+
if (RELATIVE_LINK_PREFIXES.some((prefix) => candidate.startsWith(prefix))) {
|
|
181
|
+
return true;
|
|
182
|
+
}
|
|
183
|
+
return /^[A-Za-z0-9._-]+(?:\/[A-Za-z0-9._-]+)+$/.test(candidate);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// src/core/linter/compat.ts
|
|
187
|
+
function runCompatibilityChecks(context) {
|
|
188
|
+
const issues = [];
|
|
189
|
+
const frontmatter = context.frontmatter.data ?? {};
|
|
190
|
+
const body = context.frontmatter.content;
|
|
191
|
+
const hasAllowedTools = Object.prototype.hasOwnProperty.call(frontmatter, "allowed-tools");
|
|
192
|
+
const mentionsClaudeOnly = /\bclaude code\b/i.test(body);
|
|
193
|
+
const mentionsCodexOnly = /\bcodex\b/i.test(body) && !/\bopenai\b/i.test(body);
|
|
194
|
+
if (hasAllowedTools) {
|
|
195
|
+
issues.push({
|
|
196
|
+
id: "compat.allowed-tools",
|
|
197
|
+
title: "Platform-Specific Frontmatter",
|
|
198
|
+
status: "warn",
|
|
199
|
+
message: "Frontmatter includes allowed-tools, which is typically Claude-specific.",
|
|
200
|
+
suggestion: "Document fallback behavior for platforms that ignore allowed-tools."
|
|
201
|
+
});
|
|
202
|
+
} else {
|
|
203
|
+
issues.push({
|
|
204
|
+
id: "compat.allowed-tools",
|
|
205
|
+
title: "Platform-Specific Frontmatter",
|
|
206
|
+
status: "pass",
|
|
207
|
+
message: "No known provider-specific frontmatter keys detected."
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
if (mentionsClaudeOnly || mentionsCodexOnly) {
|
|
211
|
+
const platform = mentionsClaudeOnly ? "Claude" : "Codex";
|
|
212
|
+
issues.push({
|
|
213
|
+
id: "compat.provider-phrasing",
|
|
214
|
+
title: "Provider-Specific Language",
|
|
215
|
+
status: "warn",
|
|
216
|
+
message: `Skill body appears tuned to ${platform}-specific behavior.`,
|
|
217
|
+
suggestion: "Add neutral instructions or an explicit compatibility note for other agents."
|
|
218
|
+
});
|
|
219
|
+
} else {
|
|
220
|
+
issues.push({
|
|
221
|
+
id: "compat.provider-phrasing",
|
|
222
|
+
title: "Provider-Specific Language",
|
|
223
|
+
status: "pass",
|
|
224
|
+
message: "Skill body appears provider-neutral."
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
const likelyCompatibility = hasAllowedTools || mentionsClaudeOnly || mentionsCodexOnly ? "Likely compatible with some agents, but includes platform-specific assumptions." : "Likely broadly compatible across Anthropic, OpenAI/Codex-style, and other markdown skill runners.";
|
|
228
|
+
issues.push({
|
|
229
|
+
id: "compat.summary",
|
|
230
|
+
title: "Compatibility Hint",
|
|
231
|
+
status: hasAllowedTools || mentionsClaudeOnly || mentionsCodexOnly ? "warn" : "pass",
|
|
232
|
+
message: likelyCompatibility
|
|
233
|
+
});
|
|
234
|
+
return issues;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// src/core/linter/content.ts
|
|
238
|
+
var VAGUE_PATTERNS = [
|
|
239
|
+
/\bdo something appropriate\b/i,
|
|
240
|
+
/\bhandle as needed\b/i,
|
|
241
|
+
/\buse best judgment\b/i,
|
|
242
|
+
/\bif possible\b/i,
|
|
243
|
+
/\bwhen relevant\b/i,
|
|
244
|
+
/\bdo what seems right\b/i
|
|
245
|
+
];
|
|
246
|
+
var SECRET_PATTERNS = [
|
|
247
|
+
{ label: "OpenAI key", regex: /\bsk-[A-Za-z0-9]{20,}\b/ },
|
|
248
|
+
{ label: "AWS access key", regex: /\bAKIA[0-9A-Z]{16}\b/ },
|
|
249
|
+
{ label: "GitHub token", regex: /\bghp_[A-Za-z0-9]{20,}\b/ },
|
|
250
|
+
{ label: "Slack token", regex: /\bxox[baprs]-[A-Za-z0-9-]{20,}\b/ },
|
|
251
|
+
{ label: "Generic private key header", regex: /-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----/ }
|
|
252
|
+
];
|
|
253
|
+
function runContentChecks(context) {
|
|
254
|
+
const issues = [];
|
|
255
|
+
const body = context.frontmatter.content;
|
|
256
|
+
const bodyLines = body.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
257
|
+
const description = typeof context.frontmatter.data?.description === "string" ? context.frontmatter.data.description : "";
|
|
258
|
+
if (!/^#{1,6}\s+\S+/m.test(body)) {
|
|
259
|
+
issues.push({
|
|
260
|
+
id: "content.headers",
|
|
261
|
+
title: "Section Headers",
|
|
262
|
+
status: "warn",
|
|
263
|
+
message: "No markdown headers found in SKILL.md body.",
|
|
264
|
+
suggestion: "Add section headers to improve scannability and maintenance."
|
|
265
|
+
});
|
|
266
|
+
} else {
|
|
267
|
+
issues.push({
|
|
268
|
+
id: "content.headers",
|
|
269
|
+
title: "Section Headers",
|
|
270
|
+
status: "pass",
|
|
271
|
+
message: "SKILL.md contains markdown section headers."
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
const hasExamples = /example/i.test(body) || /```[\s\S]*?```/.test(body);
|
|
275
|
+
if (!hasExamples) {
|
|
276
|
+
issues.push({
|
|
277
|
+
id: "content.examples",
|
|
278
|
+
title: "Examples",
|
|
279
|
+
status: "warn",
|
|
280
|
+
message: "No examples detected in SKILL.md body.",
|
|
281
|
+
suggestion: "Add at least one concrete example to guide usage."
|
|
282
|
+
});
|
|
283
|
+
} else {
|
|
284
|
+
issues.push({
|
|
285
|
+
id: "content.examples",
|
|
286
|
+
title: "Examples",
|
|
287
|
+
status: "pass",
|
|
288
|
+
message: "Examples were detected in SKILL.md."
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
const vagueMatches = VAGUE_PATTERNS.filter((pattern) => pattern.test(body));
|
|
292
|
+
if (vagueMatches.length > 0) {
|
|
293
|
+
issues.push({
|
|
294
|
+
id: "content.vagueness",
|
|
295
|
+
title: "Instruction Specificity",
|
|
296
|
+
status: "warn",
|
|
297
|
+
message: "Potentially vague instruction phrases detected.",
|
|
298
|
+
suggestion: "Replace vague guidance with explicit decision rules or step-by-step instructions."
|
|
299
|
+
});
|
|
300
|
+
} else {
|
|
301
|
+
issues.push({
|
|
302
|
+
id: "content.vagueness",
|
|
303
|
+
title: "Instruction Specificity",
|
|
304
|
+
status: "pass",
|
|
305
|
+
message: "No obvious vague placeholder phrasing found."
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
if (context.frontmatter.rawFrontmatter && /[<>]/.test(context.frontmatter.rawFrontmatter)) {
|
|
309
|
+
issues.push({
|
|
310
|
+
id: "content.frontmatter-angle-brackets",
|
|
311
|
+
title: "Frontmatter Angle Brackets",
|
|
312
|
+
status: "warn",
|
|
313
|
+
message: "Frontmatter contains angle bracket characters (< or >), which can be misinterpreted in some agents.",
|
|
314
|
+
suggestion: "Remove XML-like tags from frontmatter values when possible."
|
|
315
|
+
});
|
|
316
|
+
} else {
|
|
317
|
+
issues.push({
|
|
318
|
+
id: "content.frontmatter-angle-brackets",
|
|
319
|
+
title: "Frontmatter Angle Brackets",
|
|
320
|
+
status: "pass",
|
|
321
|
+
message: "No angle bracket tokens detected in frontmatter."
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
const secretHits = /* @__PURE__ */ new Set();
|
|
325
|
+
for (const pattern of SECRET_PATTERNS) {
|
|
326
|
+
if (pattern.regex.test(context.skill.raw)) {
|
|
327
|
+
secretHits.add(pattern.label);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
if (secretHits.size > 0) {
|
|
331
|
+
issues.push({
|
|
332
|
+
id: "content.secrets",
|
|
333
|
+
title: "Hardcoded Secrets",
|
|
334
|
+
status: "fail",
|
|
335
|
+
message: `Potential secrets detected (${Array.from(secretHits).join(", ")}).`,
|
|
336
|
+
suggestion: "Remove secrets from skill files and use environment variables or secret managers."
|
|
337
|
+
});
|
|
338
|
+
} else {
|
|
339
|
+
issues.push({
|
|
340
|
+
id: "content.secrets",
|
|
341
|
+
title: "Hardcoded Secrets",
|
|
342
|
+
status: "pass",
|
|
343
|
+
message: "No obvious API keys or secrets patterns were detected."
|
|
344
|
+
});
|
|
345
|
+
}
|
|
346
|
+
if (bodyLines.length < 10) {
|
|
347
|
+
issues.push({
|
|
348
|
+
id: "content.body-length",
|
|
349
|
+
title: "Body Completeness",
|
|
350
|
+
status: "warn",
|
|
351
|
+
message: `SKILL.md body has only ${bodyLines.length} non-empty lines.`,
|
|
352
|
+
suggestion: "Add more detailed instructions; short bodies are often incomplete."
|
|
353
|
+
});
|
|
354
|
+
} else {
|
|
355
|
+
issues.push({
|
|
356
|
+
id: "content.body-length",
|
|
357
|
+
title: "Body Completeness",
|
|
358
|
+
status: "pass",
|
|
359
|
+
message: `SKILL.md body has ${bodyLines.length} non-empty lines.`
|
|
360
|
+
});
|
|
361
|
+
}
|
|
362
|
+
if (description && description.length < 50) {
|
|
363
|
+
issues.push({
|
|
364
|
+
id: "content.description-length",
|
|
365
|
+
title: "Description Specificity",
|
|
366
|
+
status: "warn",
|
|
367
|
+
message: `Description length is ${description.length} characters, which may be too vague for reliable triggering.`,
|
|
368
|
+
suggestion: "Expand description with concrete scope and activation conditions."
|
|
369
|
+
});
|
|
370
|
+
} else if (description) {
|
|
371
|
+
issues.push({
|
|
372
|
+
id: "content.description-length",
|
|
373
|
+
title: "Description Specificity",
|
|
374
|
+
status: "pass",
|
|
375
|
+
message: "Description length is sufficient for triggerability heuristics."
|
|
376
|
+
});
|
|
377
|
+
}
|
|
378
|
+
return issues;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// src/core/linter/disclosure.ts
|
|
382
|
+
import fs3 from "node:fs/promises";
|
|
383
|
+
import path3 from "node:path";
|
|
384
|
+
|
|
385
|
+
// src/utils/fs.ts
|
|
386
|
+
import fs2 from "node:fs/promises";
|
|
387
|
+
import path2 from "node:path";
|
|
388
|
+
async function pathExists(targetPath) {
|
|
389
|
+
try {
|
|
390
|
+
await fs2.access(targetPath);
|
|
391
|
+
return true;
|
|
392
|
+
} catch {
|
|
393
|
+
return false;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
async function listFilesRecursive(directory) {
|
|
397
|
+
const entries = await fs2.readdir(directory, { withFileTypes: true });
|
|
398
|
+
const files = [];
|
|
399
|
+
for (const entry of entries) {
|
|
400
|
+
const absolutePath = path2.join(directory, entry.name);
|
|
401
|
+
if (entry.isDirectory()) {
|
|
402
|
+
files.push(...await listFilesRecursive(absolutePath));
|
|
403
|
+
continue;
|
|
404
|
+
}
|
|
405
|
+
if (entry.isFile()) {
|
|
406
|
+
files.push(absolutePath);
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
return files;
|
|
410
|
+
}
|
|
411
|
+
async function readJsonFile(filePath) {
|
|
412
|
+
const raw = await fs2.readFile(filePath, "utf8");
|
|
413
|
+
return JSON.parse(raw);
|
|
414
|
+
}
|
|
415
|
+
async function writeJsonFile(filePath, data) {
|
|
416
|
+
const absolute = path2.resolve(filePath);
|
|
417
|
+
await fs2.mkdir(path2.dirname(absolute), { recursive: true });
|
|
418
|
+
await fs2.writeFile(absolute, `${JSON.stringify(data, null, 2)}
|
|
419
|
+
`, "utf8");
|
|
420
|
+
}
|
|
421
|
+
function toPosixPath(inputPath) {
|
|
422
|
+
return inputPath.split(path2.sep).join("/");
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
// src/core/linter/disclosure.ts
|
|
426
|
+
function isPathInsideRoot(root, candidate) {
|
|
427
|
+
const relative = path3.relative(root, candidate);
|
|
428
|
+
return !relative.startsWith("..") && !path3.isAbsolute(relative);
|
|
429
|
+
}
|
|
430
|
+
async function detectNestedReferenceChain(skillRoot, rootContent) {
|
|
431
|
+
const initialReferences = extractRelativeFileReferences(rootContent);
|
|
432
|
+
let maxDepth = 0;
|
|
433
|
+
for (const reference of initialReferences) {
|
|
434
|
+
const firstLevelPath = path3.resolve(skillRoot, reference);
|
|
435
|
+
if (!await pathExists(firstLevelPath)) {
|
|
436
|
+
continue;
|
|
437
|
+
}
|
|
438
|
+
const firstLevelRaw = await fs3.readFile(firstLevelPath, "utf8");
|
|
439
|
+
const secondLevelRefs = extractRelativeFileReferences(firstLevelRaw);
|
|
440
|
+
if (secondLevelRefs.length > 0) {
|
|
441
|
+
maxDepth = Math.max(maxDepth, 1);
|
|
442
|
+
}
|
|
443
|
+
for (const secondLevelReference of secondLevelRefs) {
|
|
444
|
+
const secondLevelPath = path3.resolve(path3.dirname(firstLevelPath), secondLevelReference);
|
|
445
|
+
if (!await pathExists(secondLevelPath)) {
|
|
446
|
+
continue;
|
|
447
|
+
}
|
|
448
|
+
const secondLevelRaw = await fs3.readFile(secondLevelPath, "utf8");
|
|
449
|
+
const thirdLevelRefs = extractRelativeFileReferences(secondLevelRaw);
|
|
450
|
+
if (thirdLevelRefs.length > 0) {
|
|
451
|
+
maxDepth = Math.max(maxDepth, 2);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
return maxDepth;
|
|
456
|
+
}
|
|
457
|
+
async function runDisclosureChecks(context) {
|
|
458
|
+
const issues = [];
|
|
459
|
+
const references = extractRelativeFileReferences(context.skill.raw);
|
|
460
|
+
const referencesDir = path3.join(context.skill.skillRoot, "references");
|
|
461
|
+
if (context.skill.lineCount > 200 && !await pathExists(referencesDir)) {
|
|
462
|
+
issues.push({
|
|
463
|
+
id: "disclosure.skill-split",
|
|
464
|
+
title: "Progressive Disclosure",
|
|
465
|
+
status: "warn",
|
|
466
|
+
message: "SKILL.md exceeds 200 lines and no references/ directory is present.",
|
|
467
|
+
suggestion: "Move detailed material into references/ files and keep SKILL.md focused."
|
|
468
|
+
});
|
|
469
|
+
} else {
|
|
470
|
+
issues.push({
|
|
471
|
+
id: "disclosure.skill-split",
|
|
472
|
+
title: "Progressive Disclosure",
|
|
473
|
+
status: "pass",
|
|
474
|
+
message: "Top-level file length and references/ usage look reasonable."
|
|
475
|
+
});
|
|
476
|
+
}
|
|
477
|
+
const nonRelativeOrEscaping = [];
|
|
478
|
+
for (const rawReference of references) {
|
|
479
|
+
const cleaned = cleanReferenceTarget(rawReference);
|
|
480
|
+
if (!cleaned) {
|
|
481
|
+
continue;
|
|
482
|
+
}
|
|
483
|
+
if (path3.isAbsolute(cleaned) || /^[A-Za-z]:\\/.test(cleaned) || cleaned.startsWith("~")) {
|
|
484
|
+
nonRelativeOrEscaping.push(cleaned);
|
|
485
|
+
continue;
|
|
486
|
+
}
|
|
487
|
+
if (!isLikelyRelativePath(cleaned)) {
|
|
488
|
+
nonRelativeOrEscaping.push(cleaned);
|
|
489
|
+
continue;
|
|
490
|
+
}
|
|
491
|
+
const resolved = path3.resolve(context.skill.skillRoot, cleaned);
|
|
492
|
+
if (!isPathInsideRoot(context.skill.skillRoot, resolved)) {
|
|
493
|
+
nonRelativeOrEscaping.push(cleaned);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
if (nonRelativeOrEscaping.length > 0) {
|
|
497
|
+
issues.push({
|
|
498
|
+
id: "disclosure.relative-path-root",
|
|
499
|
+
title: "Reference Path Scope",
|
|
500
|
+
status: "fail",
|
|
501
|
+
message: `Found non-relative or out-of-root references: ${nonRelativeOrEscaping.join(", ")}`,
|
|
502
|
+
suggestion: "Use relative paths that stay within the skill root directory."
|
|
503
|
+
});
|
|
504
|
+
} else {
|
|
505
|
+
issues.push({
|
|
506
|
+
id: "disclosure.relative-path-root",
|
|
507
|
+
title: "Reference Path Scope",
|
|
508
|
+
status: "pass",
|
|
509
|
+
message: "All detected file references are relative and scoped to skill root."
|
|
510
|
+
});
|
|
511
|
+
}
|
|
512
|
+
const chainDepth = await detectNestedReferenceChain(context.skill.skillRoot, context.skill.raw);
|
|
513
|
+
if (chainDepth > 1) {
|
|
514
|
+
issues.push({
|
|
515
|
+
id: "disclosure.reference-depth",
|
|
516
|
+
title: "Reference Chain Depth",
|
|
517
|
+
status: "warn",
|
|
518
|
+
message: "Deep reference chains detected (>1 level).",
|
|
519
|
+
suggestion: "Avoid linking from references to more nested references where possible."
|
|
520
|
+
});
|
|
521
|
+
} else {
|
|
522
|
+
issues.push({
|
|
523
|
+
id: "disclosure.reference-depth",
|
|
524
|
+
title: "Reference Chain Depth",
|
|
525
|
+
status: "pass",
|
|
526
|
+
message: "Reference depth is shallow and easy to navigate."
|
|
527
|
+
});
|
|
528
|
+
}
|
|
529
|
+
const normalizedReferences = references.map((item) => toPosixPath(item));
|
|
530
|
+
if (normalizedReferences.some((item) => item.includes("../"))) {
|
|
531
|
+
issues.push({
|
|
532
|
+
id: "disclosure.parent-traversal",
|
|
533
|
+
title: "Parent Traversal",
|
|
534
|
+
status: "warn",
|
|
535
|
+
message: "References include parent-directory traversal (../).",
|
|
536
|
+
suggestion: "Prefer references rooted within the skill directory for portability."
|
|
537
|
+
});
|
|
538
|
+
} else {
|
|
539
|
+
issues.push({
|
|
540
|
+
id: "disclosure.parent-traversal",
|
|
541
|
+
title: "Parent Traversal",
|
|
542
|
+
status: "pass",
|
|
543
|
+
message: "No parent-directory traversal references detected."
|
|
544
|
+
});
|
|
545
|
+
}
|
|
546
|
+
return issues;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
// src/core/linter/frontmatter.ts
|
|
550
|
+
var SKILL_NAME_REGEX = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
|
|
551
|
+
function getStringField(data, key) {
|
|
552
|
+
if (!data) {
|
|
553
|
+
return null;
|
|
554
|
+
}
|
|
555
|
+
const value = data[key];
|
|
556
|
+
if (typeof value !== "string") {
|
|
557
|
+
return null;
|
|
558
|
+
}
|
|
559
|
+
return value;
|
|
560
|
+
}
|
|
561
|
+
function descriptionLooksActionable(description) {
|
|
562
|
+
const whatPattern = /\b(create|build|generate|analyze|test|validate|review|refactor|debug|audit|compose|transform|summari[sz]e|plan)\b/i;
|
|
563
|
+
const whenPattern = /\b(when|if|for|whenever|use this|ideal for|best for|should use)\b/i;
|
|
564
|
+
return whatPattern.test(description) && whenPattern.test(description);
|
|
565
|
+
}
|
|
566
|
+
function runFrontmatterChecks(context) {
|
|
567
|
+
const issues = [];
|
|
568
|
+
if (!context.frontmatter.hasFrontmatter) {
|
|
569
|
+
issues.push({
|
|
570
|
+
id: "frontmatter.exists",
|
|
571
|
+
title: "Frontmatter Presence",
|
|
572
|
+
status: "fail",
|
|
573
|
+
message: "SKILL.md is missing YAML frontmatter delimited by --- blocks.",
|
|
574
|
+
suggestion: "Add YAML frontmatter at the top with at least name and description."
|
|
575
|
+
});
|
|
576
|
+
return issues;
|
|
577
|
+
}
|
|
578
|
+
if (context.frontmatter.error) {
|
|
579
|
+
issues.push({
|
|
580
|
+
id: "frontmatter.valid-yaml",
|
|
581
|
+
title: "Frontmatter YAML",
|
|
582
|
+
status: "fail",
|
|
583
|
+
message: `Frontmatter is not valid YAML: ${context.frontmatter.error}`,
|
|
584
|
+
suggestion: "Fix YAML syntax so the frontmatter parses as an object."
|
|
585
|
+
});
|
|
586
|
+
return issues;
|
|
587
|
+
}
|
|
588
|
+
issues.push({
|
|
589
|
+
id: "frontmatter.valid-yaml",
|
|
590
|
+
title: "Frontmatter YAML",
|
|
591
|
+
status: "pass",
|
|
592
|
+
message: "Frontmatter exists and parses correctly."
|
|
593
|
+
});
|
|
594
|
+
const data = context.frontmatter.data ?? {};
|
|
595
|
+
const name = getStringField(data, "name");
|
|
596
|
+
if (!name) {
|
|
597
|
+
issues.push({
|
|
598
|
+
id: "frontmatter.name.required",
|
|
599
|
+
title: "Frontmatter Name",
|
|
600
|
+
status: "fail",
|
|
601
|
+
message: "Missing required frontmatter field: name.",
|
|
602
|
+
suggestion: "Set name to lowercase words separated by single hyphens."
|
|
603
|
+
});
|
|
604
|
+
} else if (name.length > 64) {
|
|
605
|
+
issues.push({
|
|
606
|
+
id: "frontmatter.name.length",
|
|
607
|
+
title: "Frontmatter Name Length",
|
|
608
|
+
status: "fail",
|
|
609
|
+
message: `name is too long (${name.length} chars, max 64).`,
|
|
610
|
+
suggestion: "Shorten the skill name to 64 characters or fewer."
|
|
611
|
+
});
|
|
612
|
+
} else if (!SKILL_NAME_REGEX.test(name)) {
|
|
613
|
+
issues.push({
|
|
614
|
+
id: "frontmatter.name.format",
|
|
615
|
+
title: "Frontmatter Name Format",
|
|
616
|
+
status: "fail",
|
|
617
|
+
message: "name must be lowercase alphanumeric with single hyphen separators only.",
|
|
618
|
+
suggestion: "Use format like 'api-tester' or 'code-review'."
|
|
619
|
+
});
|
|
620
|
+
} else {
|
|
621
|
+
issues.push({
|
|
622
|
+
id: "frontmatter.name.valid",
|
|
623
|
+
title: "Frontmatter Name",
|
|
624
|
+
status: "pass",
|
|
625
|
+
message: "name is present and follows naming conventions."
|
|
626
|
+
});
|
|
627
|
+
}
|
|
628
|
+
const description = getStringField(data, "description");
|
|
629
|
+
if (!description || description.trim() === "") {
|
|
630
|
+
issues.push({
|
|
631
|
+
id: "frontmatter.description.required",
|
|
632
|
+
title: "Frontmatter Description",
|
|
633
|
+
status: "fail",
|
|
634
|
+
message: "Missing required frontmatter field: description.",
|
|
635
|
+
suggestion: "Add a clear description of what the skill does and when to use it."
|
|
636
|
+
});
|
|
637
|
+
} else if (description.length > 1024) {
|
|
638
|
+
issues.push({
|
|
639
|
+
id: "frontmatter.description.length",
|
|
640
|
+
title: "Frontmatter Description Length",
|
|
641
|
+
status: "fail",
|
|
642
|
+
message: `description is too long (${description.length} chars, max 1024).`,
|
|
643
|
+
suggestion: "Keep description concise while still specific."
|
|
644
|
+
});
|
|
645
|
+
} else {
|
|
646
|
+
issues.push({
|
|
647
|
+
id: "frontmatter.description.valid",
|
|
648
|
+
title: "Frontmatter Description",
|
|
649
|
+
status: "pass",
|
|
650
|
+
message: "description is present and within allowed length."
|
|
651
|
+
});
|
|
652
|
+
}
|
|
653
|
+
const license = getStringField(data, "license");
|
|
654
|
+
if (!license || license.trim() === "") {
|
|
655
|
+
issues.push({
|
|
656
|
+
id: "frontmatter.license.recommended",
|
|
657
|
+
title: "Frontmatter License",
|
|
658
|
+
status: "warn",
|
|
659
|
+
message: "No license field found in frontmatter.",
|
|
660
|
+
suggestion: "Add a license (for example: MIT) to clarify reuse terms."
|
|
661
|
+
});
|
|
662
|
+
} else {
|
|
663
|
+
issues.push({
|
|
664
|
+
id: "frontmatter.license.present",
|
|
665
|
+
title: "Frontmatter License",
|
|
666
|
+
status: "pass",
|
|
667
|
+
message: "license field is present."
|
|
668
|
+
});
|
|
669
|
+
}
|
|
670
|
+
if (description && description.trim() !== "" && !descriptionLooksActionable(description)) {
|
|
671
|
+
issues.push({
|
|
672
|
+
id: "frontmatter.description.triggerability",
|
|
673
|
+
title: "Description Trigger Clarity",
|
|
674
|
+
status: "warn",
|
|
675
|
+
message: "Description should explain both what the skill does and when it should be used.",
|
|
676
|
+
suggestion: "Include explicit 'use when...' language plus concrete capability wording."
|
|
677
|
+
});
|
|
678
|
+
} else if (description) {
|
|
679
|
+
issues.push({
|
|
680
|
+
id: "frontmatter.description.triggerability",
|
|
681
|
+
title: "Description Trigger Clarity",
|
|
682
|
+
status: "pass",
|
|
683
|
+
message: "Description appears to cover both capability and usage context."
|
|
684
|
+
});
|
|
685
|
+
}
|
|
686
|
+
return issues;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
// src/core/linter/structure.ts
|
|
690
|
+
import fs4 from "node:fs/promises";
|
|
691
|
+
import path4 from "node:path";
|
|
692
|
+
function hasTableOfContents(content) {
|
|
693
|
+
if (/^#{1,6}\s+table of contents\b/im.test(content)) {
|
|
694
|
+
return true;
|
|
695
|
+
}
|
|
696
|
+
return /^\s*[-*]\s+\[[^\]]+\]\(#[^)]+\)/im.test(content);
|
|
697
|
+
}
|
|
698
|
+
function classifyReferencePath(relativePath) {
|
|
699
|
+
const normalized = toPosixPath(relativePath).replace(/^\.\//, "");
|
|
700
|
+
if (normalized.startsWith("scripts/")) {
|
|
701
|
+
return "scripts";
|
|
702
|
+
}
|
|
703
|
+
if (normalized.startsWith("references/")) {
|
|
704
|
+
return "references";
|
|
705
|
+
}
|
|
706
|
+
if (normalized.startsWith("assets/")) {
|
|
707
|
+
return "assets";
|
|
708
|
+
}
|
|
709
|
+
return "other";
|
|
710
|
+
}
|
|
711
|
+
async function runStructureChecks(context) {
|
|
712
|
+
const issues = [];
|
|
713
|
+
const references = extractRelativeFileReferences(context.skill.raw);
|
|
714
|
+
if (context.skill.lineCount > 500) {
|
|
715
|
+
issues.push({
|
|
716
|
+
id: "structure.skill-size",
|
|
717
|
+
title: "SKILL.md Size",
|
|
718
|
+
status: "warn",
|
|
719
|
+
message: `SKILL.md is ${context.skill.lineCount} lines (recommended max is 500).`,
|
|
720
|
+
suggestion: "Split detailed guidance into references/ files."
|
|
721
|
+
});
|
|
722
|
+
} else {
|
|
723
|
+
issues.push({
|
|
724
|
+
id: "structure.skill-size",
|
|
725
|
+
title: "SKILL.md Size",
|
|
726
|
+
status: "pass",
|
|
727
|
+
message: `SKILL.md length is ${context.skill.lineCount} lines.`
|
|
728
|
+
});
|
|
729
|
+
}
|
|
730
|
+
const referencesDir = path4.join(context.skill.skillRoot, "references");
|
|
731
|
+
if (await pathExists(referencesDir)) {
|
|
732
|
+
const files = await listFilesRecursive(referencesDir);
|
|
733
|
+
let oversizedWithoutToc = 0;
|
|
734
|
+
for (const file of files) {
|
|
735
|
+
const raw = await fs4.readFile(file, "utf8");
|
|
736
|
+
const lineCount = raw === "" ? 0 : raw.split(/\r?\n/).length;
|
|
737
|
+
if (lineCount > 300 && !hasTableOfContents(raw)) {
|
|
738
|
+
oversizedWithoutToc += 1;
|
|
739
|
+
issues.push({
|
|
740
|
+
id: `structure.references.toc.${toPosixPath(path4.relative(context.skill.skillRoot, file))}`,
|
|
741
|
+
title: "Reference File Navigation",
|
|
742
|
+
status: "warn",
|
|
743
|
+
message: `${toPosixPath(path4.relative(context.skill.skillRoot, file))} is ${lineCount} lines and has no table of contents.`,
|
|
744
|
+
suggestion: "Add a table of contents for long reference files."
|
|
745
|
+
});
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
if (oversizedWithoutToc === 0) {
|
|
749
|
+
issues.push({
|
|
750
|
+
id: "structure.references.toc",
|
|
751
|
+
title: "Reference File Navigation",
|
|
752
|
+
status: "pass",
|
|
753
|
+
message: "No oversized reference files missing a table of contents."
|
|
754
|
+
});
|
|
755
|
+
}
|
|
756
|
+
} else {
|
|
757
|
+
issues.push({
|
|
758
|
+
id: "structure.references.toc",
|
|
759
|
+
title: "Reference File Navigation",
|
|
760
|
+
status: "pass",
|
|
761
|
+
message: "No references/ directory found, so no long reference files to validate."
|
|
762
|
+
});
|
|
763
|
+
}
|
|
764
|
+
const missingByType = {
|
|
765
|
+
scripts: [],
|
|
766
|
+
references: [],
|
|
767
|
+
assets: [],
|
|
768
|
+
other: []
|
|
769
|
+
};
|
|
770
|
+
for (const reference of references) {
|
|
771
|
+
const resolved = path4.resolve(context.skill.skillRoot, reference);
|
|
772
|
+
if (!await pathExists(resolved)) {
|
|
773
|
+
const kind = classifyReferencePath(reference);
|
|
774
|
+
missingByType[kind].push(reference);
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
const categories = [
|
|
778
|
+
{ key: "scripts", title: "Script References" },
|
|
779
|
+
{ key: "references", title: "Reference File Links" },
|
|
780
|
+
{ key: "assets", title: "Asset References" }
|
|
781
|
+
];
|
|
782
|
+
for (const category of categories) {
|
|
783
|
+
const missing = missingByType[category.key];
|
|
784
|
+
if (missing.length > 0) {
|
|
785
|
+
issues.push({
|
|
786
|
+
id: `structure.${category.key}.exists`,
|
|
787
|
+
title: category.title,
|
|
788
|
+
status: "fail",
|
|
789
|
+
message: `Missing referenced ${category.key} file(s): ${missing.join(", ")}`,
|
|
790
|
+
suggestion: "Create the files or fix the paths in SKILL.md."
|
|
791
|
+
});
|
|
792
|
+
} else {
|
|
793
|
+
issues.push({
|
|
794
|
+
id: `structure.${category.key}.exists`,
|
|
795
|
+
title: category.title,
|
|
796
|
+
status: "pass",
|
|
797
|
+
message: `All referenced ${category.key} files exist.`
|
|
798
|
+
});
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
const missingGeneric = missingByType.other;
|
|
802
|
+
if (missingGeneric.length > 0) {
|
|
803
|
+
issues.push({
|
|
804
|
+
id: "structure.relative-links.broken",
|
|
805
|
+
title: "Relative Links",
|
|
806
|
+
status: "fail",
|
|
807
|
+
message: `Broken relative path reference(s): ${missingGeneric.join(", ")}`,
|
|
808
|
+
suggestion: "Fix or remove broken file links."
|
|
809
|
+
});
|
|
810
|
+
} else {
|
|
811
|
+
issues.push({
|
|
812
|
+
id: "structure.relative-links.broken",
|
|
813
|
+
title: "Relative Links",
|
|
814
|
+
status: "pass",
|
|
815
|
+
message: "No broken generic relative file references were found."
|
|
816
|
+
});
|
|
817
|
+
}
|
|
818
|
+
return issues;
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
// src/core/linter/index.ts
|
|
822
|
+
function summarizeIssues(issues) {
|
|
823
|
+
const summary = {
|
|
824
|
+
total: issues.length,
|
|
825
|
+
passed: 0,
|
|
826
|
+
warnings: 0,
|
|
827
|
+
failures: 0
|
|
828
|
+
};
|
|
829
|
+
for (const issue of issues) {
|
|
830
|
+
if (issue.status === "pass") {
|
|
831
|
+
summary.passed += 1;
|
|
832
|
+
continue;
|
|
833
|
+
}
|
|
834
|
+
if (issue.status === "warn") {
|
|
835
|
+
summary.warnings += 1;
|
|
836
|
+
continue;
|
|
837
|
+
}
|
|
838
|
+
summary.failures += 1;
|
|
839
|
+
}
|
|
840
|
+
return summary;
|
|
841
|
+
}
|
|
842
|
+
async function runLinter(inputPath) {
|
|
843
|
+
const skill = await loadSkillFile(inputPath);
|
|
844
|
+
const frontmatter = parseFrontmatter(skill.raw);
|
|
845
|
+
const context = {
|
|
846
|
+
skill,
|
|
847
|
+
frontmatter
|
|
848
|
+
};
|
|
849
|
+
const issues = [];
|
|
850
|
+
issues.push(...runFrontmatterChecks(context));
|
|
851
|
+
issues.push(...await runStructureChecks(context));
|
|
852
|
+
issues.push(...runContentChecks(context));
|
|
853
|
+
issues.push(...await runDisclosureChecks(context));
|
|
854
|
+
issues.push(...runCompatibilityChecks(context));
|
|
855
|
+
return {
|
|
856
|
+
target: inputPath,
|
|
857
|
+
issues,
|
|
858
|
+
summary: summarizeIssues(issues)
|
|
859
|
+
};
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
// src/reporters/terminal.ts
|
|
863
|
+
import { Chalk } from "chalk";
|
|
864
|
+
function getChalkInstance(enableColor) {
|
|
865
|
+
return new Chalk({ level: enableColor ? 1 : 0 });
|
|
866
|
+
}
|
|
867
|
+
function renderIssueLine(issue, c) {
|
|
868
|
+
const label = issue.status === "pass" ? c.green("PASS") : issue.status === "warn" ? c.yellow("WARN") : c.red("FAIL");
|
|
869
|
+
const detail = issue.suggestion ? `
|
|
870
|
+
suggestion: ${issue.suggestion}` : "";
|
|
871
|
+
return ` ${label} ${issue.title}
|
|
872
|
+
${issue.message}${detail}`;
|
|
873
|
+
}
|
|
874
|
+
function renderLintReport(report, enableColor) {
|
|
875
|
+
const c = getChalkInstance(enableColor);
|
|
876
|
+
const { passed, warnings, failures, total } = report.summary;
|
|
877
|
+
const headerLines = [
|
|
878
|
+
`\u250C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510`,
|
|
879
|
+
`\u2502 skilltest lint \u2502`,
|
|
880
|
+
`\u251C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524`,
|
|
881
|
+
`\u2502 target: ${report.target}`,
|
|
882
|
+
`\u2502 summary: ${passed}/${total} checks passed, ${warnings} warnings, ${failures} failures`,
|
|
883
|
+
`\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518`
|
|
884
|
+
];
|
|
885
|
+
const renderedIssues = report.issues.map((issue) => renderIssueLine(issue, c)).join("\n");
|
|
886
|
+
return `${headerLines.join("\n")}
|
|
887
|
+
${renderedIssues}`;
|
|
888
|
+
}
|
|
889
|
+
function formatPercent(value) {
|
|
890
|
+
return `${(value * 100).toFixed(1)}%`;
|
|
891
|
+
}
|
|
892
|
+
function renderTriggerReport(result, enableColor, verbose) {
|
|
893
|
+
const c = getChalkInstance(enableColor);
|
|
894
|
+
const lines = [];
|
|
895
|
+
lines.push("\u250C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510");
|
|
896
|
+
lines.push("\u2502 skilltest trigger \u2502");
|
|
897
|
+
lines.push("\u251C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524");
|
|
898
|
+
lines.push(`\u2502 skill: ${result.skillName}`);
|
|
899
|
+
lines.push(`\u2502 provider/model: ${result.provider}/${result.model}`);
|
|
900
|
+
lines.push(
|
|
901
|
+
`\u2502 precision: ${formatPercent(result.metrics.precision)} recall: ${formatPercent(result.metrics.recall)} f1: ${formatPercent(result.metrics.f1)}`
|
|
902
|
+
);
|
|
903
|
+
lines.push(
|
|
904
|
+
`\u2502 TP ${result.metrics.truePositives} TN ${result.metrics.trueNegatives} FP ${result.metrics.falsePositives} FN ${result.metrics.falseNegatives}`
|
|
905
|
+
);
|
|
906
|
+
lines.push("\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518");
|
|
907
|
+
for (const [index, testCase] of result.cases.entries()) {
|
|
908
|
+
const status = testCase.matched ? c.green("PASS") : c.red("FAIL");
|
|
909
|
+
lines.push(`${index + 1}. ${status} query: ${testCase.query}`);
|
|
910
|
+
lines.push(` expected: ${testCase.expected} | actual: ${testCase.actual}`);
|
|
911
|
+
if (verbose && testCase.rawModelResponse) {
|
|
912
|
+
lines.push(` model: ${testCase.rawModelResponse.replace(/\s+/g, " ").trim()}`);
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
lines.push("Suggestions:");
|
|
916
|
+
for (const suggestion of result.suggestions) {
|
|
917
|
+
lines.push(`- ${suggestion}`);
|
|
918
|
+
}
|
|
919
|
+
return lines.join("\n");
|
|
920
|
+
}
|
|
921
|
+
function renderEvalReport(result, enableColor, verbose) {
|
|
922
|
+
const c = getChalkInstance(enableColor);
|
|
923
|
+
const lines = [];
|
|
924
|
+
lines.push("\u250C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510");
|
|
925
|
+
lines.push("\u2502 skilltest eval \u2502");
|
|
926
|
+
lines.push("\u251C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524");
|
|
927
|
+
lines.push(`\u2502 skill: ${result.skillName}`);
|
|
928
|
+
lines.push(`\u2502 provider/model: ${result.provider}/${result.model}`);
|
|
929
|
+
lines.push(`\u2502 grader model: ${result.graderModel}`);
|
|
930
|
+
lines.push(`\u2502 assertions passed: ${result.summary.passedAssertions}/${result.summary.totalAssertions}`);
|
|
931
|
+
lines.push("\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518");
|
|
932
|
+
for (const [index, promptResult] of result.results.entries()) {
|
|
933
|
+
lines.push(`${index + 1}. prompt: ${promptResult.prompt}`);
|
|
934
|
+
lines.push(` response summary: ${promptResult.responseSummary.replace(/\s+/g, " ").trim()}`);
|
|
935
|
+
for (const assertion of promptResult.assertions) {
|
|
936
|
+
const status = assertion.passed ? c.green("PASS") : c.red("FAIL");
|
|
937
|
+
lines.push(` ${status} ${assertion.assertion}`);
|
|
938
|
+
lines.push(` evidence: ${assertion.evidence}`);
|
|
939
|
+
}
|
|
940
|
+
if (verbose) {
|
|
941
|
+
lines.push(` full response: ${promptResult.response}`);
|
|
942
|
+
}
|
|
943
|
+
}
|
|
944
|
+
return lines.join("\n");
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
// src/reporters/json.ts
|
|
948
|
+
function renderJson(value) {
|
|
949
|
+
return JSON.stringify(value, null, 2);
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
// src/commands/common.ts
|
|
953
|
+
function getGlobalCliOptions(command) {
|
|
954
|
+
const options = command.optsWithGlobals();
|
|
955
|
+
return {
|
|
956
|
+
json: Boolean(options.json),
|
|
957
|
+
color: options.color !== false
|
|
958
|
+
};
|
|
959
|
+
}
|
|
960
|
+
function writeResult(value, asJson) {
|
|
961
|
+
if (asJson) {
|
|
962
|
+
process.stdout.write(`${renderJson(value)}
|
|
963
|
+
`);
|
|
964
|
+
return;
|
|
965
|
+
}
|
|
966
|
+
process.stdout.write(`${String(value)}
|
|
967
|
+
`);
|
|
968
|
+
}
|
|
969
|
+
function writeError(error, asJson) {
|
|
970
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
971
|
+
if (asJson) {
|
|
972
|
+
process.stdout.write(`${renderJson({ error: message })}
|
|
973
|
+
`);
|
|
974
|
+
return;
|
|
975
|
+
}
|
|
976
|
+
process.stderr.write(`Error: ${message}
|
|
977
|
+
`);
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
// src/commands/lint.ts
|
|
981
|
+
function registerLintCommand(program) {
|
|
982
|
+
program.command("lint").description("Run static lint checks against a SKILL.md file or skill directory.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").action(async (targetPath, _commandOptions, command) => {
|
|
983
|
+
const globalOptions = getGlobalCliOptions(command);
|
|
984
|
+
try {
|
|
985
|
+
const report = await runLinter(targetPath);
|
|
986
|
+
if (globalOptions.json) {
|
|
987
|
+
writeResult(report, true);
|
|
988
|
+
} else {
|
|
989
|
+
writeResult(renderLintReport(report, globalOptions.color), false);
|
|
990
|
+
}
|
|
991
|
+
if (report.summary.failures > 0) {
|
|
992
|
+
process.exitCode = 1;
|
|
993
|
+
}
|
|
994
|
+
} catch (error) {
|
|
995
|
+
writeError(error, globalOptions.json);
|
|
996
|
+
process.exitCode = 2;
|
|
997
|
+
}
|
|
998
|
+
});
|
|
999
|
+
}
|
|
1000
|
+
|
|
1001
|
+
// src/commands/trigger.ts
|
|
1002
|
+
import ora from "ora";
|
|
1003
|
+
import { z as z3 } from "zod";
|
|
1004
|
+
|
|
1005
|
+
// src/core/trigger-tester.ts
|
|
1006
|
+
import { z as z2 } from "zod";
|
|
1007
|
+
var triggerQuerySchema = z2.object({
|
|
1008
|
+
query: z2.string().min(1),
|
|
1009
|
+
should_trigger: z2.boolean()
|
|
1010
|
+
});
|
|
1011
|
+
var triggerQueryArraySchema = z2.array(triggerQuerySchema);
|
|
1012
|
+
var FAKE_SKILLS = [
|
|
1013
|
+
{ name: "code-review", description: "Reviews code changes for bugs, regressions, and maintainability issues." },
|
|
1014
|
+
{ name: "api-tester", description: "Designs and runs REST API tests, validating status codes and response shapes." },
|
|
1015
|
+
{ name: "db-migrator", description: "Plans and generates safe database migration scripts with rollback guidance." },
|
|
1016
|
+
{ name: "bug-repro", description: "Reproduces reported bugs by building deterministic minimal test cases." },
|
|
1017
|
+
{ name: "release-notes", description: "Drafts release notes from commits and PR metadata for stakeholders." },
|
|
1018
|
+
{ name: "log-analyzer", description: "Analyzes service logs to identify error clusters and likely root causes." },
|
|
1019
|
+
{ name: "performance-audit", description: "Finds hotspots in runtime and suggests profiling-driven optimizations." },
|
|
1020
|
+
{ name: "security-audit", description: "Checks code and config for common security vulnerabilities and risky defaults." },
|
|
1021
|
+
{ name: "refactor-planner", description: "Breaks large refactors into safe incremental steps with validation plans." },
|
|
1022
|
+
{ name: "schema-designer", description: "Designs JSON schemas and validates data contracts for integrations." },
|
|
1023
|
+
{ name: "docs-writer", description: "Writes developer documentation, tutorials, and API usage examples." },
|
|
1024
|
+
{ name: "cli-scaffolder", description: "Creates CLI project skeletons with argument parsing and help text." },
|
|
1025
|
+
{ name: "incident-triage", description: "Triage production incidents with severity tagging and next-action checklists." },
|
|
1026
|
+
{ name: "test-generator", description: "Generates unit and integration test cases from feature requirements." },
|
|
1027
|
+
{ name: "prompt-tuner", description: "Improves prompts for reliability, formatting, and failure handling." }
|
|
1028
|
+
];
|
|
1029
|
+
function shuffle(values) {
|
|
1030
|
+
const copy = [...values];
|
|
1031
|
+
for (let index = copy.length - 1; index > 0; index -= 1) {
|
|
1032
|
+
const swapIndex = Math.floor(Math.random() * (index + 1));
|
|
1033
|
+
[copy[index], copy[swapIndex]] = [copy[swapIndex], copy[index]];
|
|
1034
|
+
}
|
|
1035
|
+
return copy;
|
|
1036
|
+
}
|
|
1037
|
+
function sample(values, count) {
|
|
1038
|
+
return shuffle(values).slice(0, Math.max(0, Math.min(count, values.length)));
|
|
1039
|
+
}
|
|
1040
|
+
function parseJsonArrayFromModelOutput(raw) {
|
|
1041
|
+
const trimmed = raw.trim();
|
|
1042
|
+
if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
|
|
1043
|
+
return JSON.parse(trimmed);
|
|
1044
|
+
}
|
|
1045
|
+
const start = trimmed.indexOf("[");
|
|
1046
|
+
const end = trimmed.lastIndexOf("]");
|
|
1047
|
+
if (start >= 0 && end > start) {
|
|
1048
|
+
const possibleJson = trimmed.slice(start, end + 1);
|
|
1049
|
+
return JSON.parse(possibleJson);
|
|
1050
|
+
}
|
|
1051
|
+
throw new Error("Model did not return a JSON array.");
|
|
1052
|
+
}
|
|
1053
|
+
async function generateQueriesWithModel(skill, provider, model, numQueries) {
|
|
1054
|
+
const shouldTriggerCount = Math.floor(numQueries / 2);
|
|
1055
|
+
const shouldNotTriggerCount = numQueries - shouldTriggerCount;
|
|
1056
|
+
const systemPrompt = [
|
|
1057
|
+
"You generate realistic user prompts to test whether a specific agent skill triggers.",
|
|
1058
|
+
"Return JSON only. No markdown, no comments.",
|
|
1059
|
+
'Each entry must be an object: {"query": string, "should_trigger": boolean}.',
|
|
1060
|
+
"Create substantive prompts, not toy one-liners."
|
|
1061
|
+
].join(" ");
|
|
1062
|
+
const userPrompt = [
|
|
1063
|
+
`Skill name: ${skill.frontmatter.name}`,
|
|
1064
|
+
`Skill description: ${skill.frontmatter.description}`,
|
|
1065
|
+
`Generate ${numQueries} prompts total.`,
|
|
1066
|
+
`Exactly ${shouldTriggerCount} should have should_trigger=true.`,
|
|
1067
|
+
`Exactly ${shouldNotTriggerCount} should have should_trigger=false.`,
|
|
1068
|
+
"Prompts should look like real user requests with enough context to drive a trigger decision."
|
|
1069
|
+
].join("\n");
|
|
1070
|
+
const raw = await provider.sendMessage(systemPrompt, userPrompt, { model });
|
|
1071
|
+
const parsed = triggerQueryArraySchema.safeParse(parseJsonArrayFromModelOutput(raw));
|
|
1072
|
+
if (!parsed.success) {
|
|
1073
|
+
throw new Error(`Failed to parse generated queries: ${parsed.error.issues[0]?.message ?? "invalid format"}`);
|
|
1074
|
+
}
|
|
1075
|
+
const trueCount = parsed.data.filter((item) => item.should_trigger).length;
|
|
1076
|
+
const falseCount = parsed.data.length - trueCount;
|
|
1077
|
+
if (parsed.data.length !== numQueries || trueCount !== shouldTriggerCount || falseCount !== shouldNotTriggerCount) {
|
|
1078
|
+
throw new Error(
|
|
1079
|
+
`Generated query split mismatch. Expected ${numQueries} (${shouldTriggerCount}/${shouldNotTriggerCount}), got ${parsed.data.length} (${trueCount}/${falseCount}).`
|
|
1080
|
+
);
|
|
1081
|
+
}
|
|
1082
|
+
return parsed.data;
|
|
1083
|
+
}
|
|
1084
|
+
function parseDecision(rawResponse, skillNames) {
|
|
1085
|
+
const normalized = rawResponse.trim().toLowerCase();
|
|
1086
|
+
if (normalized === "none" || normalized.startsWith("none")) {
|
|
1087
|
+
return "none";
|
|
1088
|
+
}
|
|
1089
|
+
for (const skillName of skillNames) {
|
|
1090
|
+
const escaped = skillName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1091
|
+
const regex = new RegExp(`\\b${escaped}\\b`, "i");
|
|
1092
|
+
if (regex.test(rawResponse)) {
|
|
1093
|
+
return skillName;
|
|
1094
|
+
}
|
|
1095
|
+
}
|
|
1096
|
+
return "unrecognized";
|
|
1097
|
+
}
|
|
1098
|
+
function calculateMetrics(skillName, cases) {
|
|
1099
|
+
let truePositives = 0;
|
|
1100
|
+
let trueNegatives = 0;
|
|
1101
|
+
let falsePositives = 0;
|
|
1102
|
+
let falseNegatives = 0;
|
|
1103
|
+
for (const testCase of cases) {
|
|
1104
|
+
const choseTargetSkill = testCase.actual === skillName;
|
|
1105
|
+
if (testCase.shouldTrigger && choseTargetSkill) {
|
|
1106
|
+
truePositives += 1;
|
|
1107
|
+
continue;
|
|
1108
|
+
}
|
|
1109
|
+
if (testCase.shouldTrigger && !choseTargetSkill) {
|
|
1110
|
+
falseNegatives += 1;
|
|
1111
|
+
continue;
|
|
1112
|
+
}
|
|
1113
|
+
if (!testCase.shouldTrigger && choseTargetSkill) {
|
|
1114
|
+
falsePositives += 1;
|
|
1115
|
+
continue;
|
|
1116
|
+
}
|
|
1117
|
+
trueNegatives += 1;
|
|
1118
|
+
}
|
|
1119
|
+
const precisionDenominator = truePositives + falsePositives;
|
|
1120
|
+
const recallDenominator = truePositives + falseNegatives;
|
|
1121
|
+
const precision = precisionDenominator === 0 ? 0 : truePositives / precisionDenominator;
|
|
1122
|
+
const recall = recallDenominator === 0 ? 0 : truePositives / recallDenominator;
|
|
1123
|
+
const f1 = precision + recall === 0 ? 0 : 2 * precision * recall / (precision + recall);
|
|
1124
|
+
return {
|
|
1125
|
+
truePositives,
|
|
1126
|
+
trueNegatives,
|
|
1127
|
+
falsePositives,
|
|
1128
|
+
falseNegatives,
|
|
1129
|
+
precision,
|
|
1130
|
+
recall,
|
|
1131
|
+
f1
|
|
1132
|
+
};
|
|
1133
|
+
}
|
|
1134
|
+
function buildSuggestions(metrics) {
|
|
1135
|
+
const suggestions = [];
|
|
1136
|
+
if (metrics.falseNegatives > 0) {
|
|
1137
|
+
suggestions.push(
|
|
1138
|
+
"False negatives found: clarify capability keywords and add explicit 'use when ...' phrasing in description."
|
|
1139
|
+
);
|
|
1140
|
+
}
|
|
1141
|
+
if (metrics.falsePositives > 0) {
|
|
1142
|
+
suggestions.push("False positives found: narrow scope boundaries and add explicit non-goals in description.");
|
|
1143
|
+
}
|
|
1144
|
+
if (suggestions.length === 0) {
|
|
1145
|
+
suggestions.push("Trigger behavior looks clean on this sample. Keep monitoring with domain-specific custom queries.");
|
|
1146
|
+
}
|
|
1147
|
+
return suggestions;
|
|
1148
|
+
}
|
|
1149
|
+
async function runTriggerTest(skill, options) {
|
|
1150
|
+
const queries = options.queries && options.queries.length > 0 ? triggerQueryArraySchema.parse(options.queries) : await generateQueriesWithModel(skill, options.provider, options.model, options.numQueries);
|
|
1151
|
+
const results = [];
|
|
1152
|
+
const skillName = skill.frontmatter.name;
|
|
1153
|
+
for (const testQuery of queries) {
|
|
1154
|
+
const fakeCount = 5 + Math.floor(Math.random() * 4);
|
|
1155
|
+
const fakeSkills = sample(FAKE_SKILLS, fakeCount);
|
|
1156
|
+
const allSkills = shuffle([
|
|
1157
|
+
...fakeSkills,
|
|
1158
|
+
{
|
|
1159
|
+
name: skill.frontmatter.name,
|
|
1160
|
+
description: skill.frontmatter.description
|
|
1161
|
+
}
|
|
1162
|
+
]);
|
|
1163
|
+
const skillListText = allSkills.map((entry) => `- ${entry.name}: ${entry.description}`).join("\n");
|
|
1164
|
+
const systemPrompt = [
|
|
1165
|
+
"You are selecting one skill to activate for a user query.",
|
|
1166
|
+
"Choose the single best matching skill name from the provided list, or 'none' if no skill is a good fit.",
|
|
1167
|
+
"Respond with only the skill name or 'none'."
|
|
1168
|
+
].join(" ");
|
|
1169
|
+
const userPrompt = [`Available skills:`, skillListText, "", `User query: ${testQuery.query}`].join("\n");
|
|
1170
|
+
const rawResponse = await options.provider.sendMessage(systemPrompt, userPrompt, { model: options.model });
|
|
1171
|
+
const decision = parseDecision(
|
|
1172
|
+
rawResponse,
|
|
1173
|
+
allSkills.map((entry) => entry.name)
|
|
1174
|
+
);
|
|
1175
|
+
const expected = testQuery.should_trigger ? skillName : "none";
|
|
1176
|
+
const matched = testQuery.should_trigger ? decision === skillName : decision !== skillName;
|
|
1177
|
+
results.push({
|
|
1178
|
+
query: testQuery.query,
|
|
1179
|
+
shouldTrigger: testQuery.should_trigger,
|
|
1180
|
+
expected,
|
|
1181
|
+
actual: decision,
|
|
1182
|
+
matched,
|
|
1183
|
+
rawModelResponse: options.verbose ? rawResponse : void 0
|
|
1184
|
+
});
|
|
1185
|
+
}
|
|
1186
|
+
const metrics = calculateMetrics(skillName, results);
|
|
1187
|
+
return {
|
|
1188
|
+
skillName,
|
|
1189
|
+
model: options.model,
|
|
1190
|
+
provider: options.provider.name,
|
|
1191
|
+
queries,
|
|
1192
|
+
cases: results,
|
|
1193
|
+
metrics,
|
|
1194
|
+
suggestions: buildSuggestions(metrics)
|
|
1195
|
+
};
|
|
1196
|
+
}
|
|
1197
|
+
|
|
1198
|
+
// src/utils/config.ts
|
|
1199
|
+
function resolveApiKey(provider, override) {
|
|
1200
|
+
if (override && override.trim() !== "") {
|
|
1201
|
+
return override.trim();
|
|
1202
|
+
}
|
|
1203
|
+
if (provider === "anthropic") {
|
|
1204
|
+
const envValue2 = process.env.ANTHROPIC_API_KEY?.trim();
|
|
1205
|
+
if (envValue2) {
|
|
1206
|
+
return envValue2;
|
|
1207
|
+
}
|
|
1208
|
+
throw new Error(
|
|
1209
|
+
"No Anthropic API key found. Set ANTHROPIC_API_KEY environment variable or pass --api-key flag."
|
|
1210
|
+
);
|
|
1211
|
+
}
|
|
1212
|
+
const envValue = process.env.OPENAI_API_KEY?.trim();
|
|
1213
|
+
if (envValue) {
|
|
1214
|
+
return envValue;
|
|
1215
|
+
}
|
|
1216
|
+
throw new Error("No OpenAI API key found. Set OPENAI_API_KEY environment variable or pass --api-key flag.");
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
// src/providers/anthropic.ts
|
|
1220
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
1221
|
+
function wait(ms) {
|
|
1222
|
+
return new Promise((resolve) => {
|
|
1223
|
+
setTimeout(resolve, ms);
|
|
1224
|
+
});
|
|
1225
|
+
}
|
|
1226
|
+
function isRateLimitError(error) {
|
|
1227
|
+
if (!error || typeof error !== "object") {
|
|
1228
|
+
return false;
|
|
1229
|
+
}
|
|
1230
|
+
const maybeStatus = error.status;
|
|
1231
|
+
if (maybeStatus === 429) {
|
|
1232
|
+
return true;
|
|
1233
|
+
}
|
|
1234
|
+
const maybeMessage = error.message;
|
|
1235
|
+
if (typeof maybeMessage === "string" && /rate limit/i.test(maybeMessage)) {
|
|
1236
|
+
return true;
|
|
1237
|
+
}
|
|
1238
|
+
return false;
|
|
1239
|
+
}
|
|
1240
|
+
var AnthropicProvider = class {
|
|
1241
|
+
name = "anthropic";
|
|
1242
|
+
client;
|
|
1243
|
+
constructor(apiKey) {
|
|
1244
|
+
this.client = new Anthropic({ apiKey });
|
|
1245
|
+
}
|
|
1246
|
+
async sendMessage(systemPrompt, userMessage, options) {
|
|
1247
|
+
let lastError;
|
|
1248
|
+
for (let attempt = 0; attempt < 3; attempt += 1) {
|
|
1249
|
+
try {
|
|
1250
|
+
const response = await this.client.messages.create({
|
|
1251
|
+
model: options.model,
|
|
1252
|
+
max_tokens: 2048,
|
|
1253
|
+
system: systemPrompt,
|
|
1254
|
+
messages: [
|
|
1255
|
+
{
|
|
1256
|
+
role: "user",
|
|
1257
|
+
content: userMessage
|
|
1258
|
+
}
|
|
1259
|
+
]
|
|
1260
|
+
});
|
|
1261
|
+
const textBlocks = response.content.filter((block) => block.type === "text");
|
|
1262
|
+
const text = textBlocks.map((block) => block.text).join("\n").trim();
|
|
1263
|
+
if (text.length === 0) {
|
|
1264
|
+
throw new Error("Model returned an empty response.");
|
|
1265
|
+
}
|
|
1266
|
+
return text;
|
|
1267
|
+
} catch (error) {
|
|
1268
|
+
lastError = error;
|
|
1269
|
+
if (!isRateLimitError(error) || attempt === 2) {
|
|
1270
|
+
break;
|
|
1271
|
+
}
|
|
1272
|
+
const delay = Math.min(4e3, 500 * 2 ** attempt) + Math.floor(Math.random() * 250);
|
|
1273
|
+
await wait(delay);
|
|
1274
|
+
}
|
|
1275
|
+
}
|
|
1276
|
+
if (lastError instanceof Error) {
|
|
1277
|
+
throw new Error(`Anthropic API call failed: ${lastError.message}`);
|
|
1278
|
+
}
|
|
1279
|
+
throw new Error("Anthropic API call failed with an unknown error.");
|
|
1280
|
+
}
|
|
1281
|
+
};
|
|
1282
|
+
|
|
1283
|
+
// src/providers/openai.ts
|
|
1284
|
+
var OpenAIProvider = class {
|
|
1285
|
+
name = "openai";
|
|
1286
|
+
_apiKey;
|
|
1287
|
+
constructor(apiKey) {
|
|
1288
|
+
this._apiKey = apiKey;
|
|
1289
|
+
}
|
|
1290
|
+
async sendMessage(_systemPrompt, _userMessage, _options) {
|
|
1291
|
+
void this._apiKey;
|
|
1292
|
+
throw new Error("OpenAI provider coming soon.");
|
|
1293
|
+
}
|
|
1294
|
+
};
|
|
1295
|
+
|
|
1296
|
+
// src/providers/index.ts
|
|
1297
|
+
function createProvider(providerName, apiKeyOverride) {
|
|
1298
|
+
const apiKey = resolveApiKey(providerName, apiKeyOverride);
|
|
1299
|
+
if (providerName === "anthropic") {
|
|
1300
|
+
return new AnthropicProvider(apiKey);
|
|
1301
|
+
}
|
|
1302
|
+
return new OpenAIProvider(apiKey);
|
|
1303
|
+
}
|
|
1304
|
+
|
|
1305
|
+
// src/commands/trigger.ts
|
|
1306
|
+
var triggerOptionsSchema = z3.object({
|
|
1307
|
+
model: z3.string(),
|
|
1308
|
+
provider: z3.enum(["anthropic", "openai"]),
|
|
1309
|
+
queries: z3.string().optional(),
|
|
1310
|
+
numQueries: z3.number().int().min(2),
|
|
1311
|
+
saveQueries: z3.string().optional(),
|
|
1312
|
+
verbose: z3.boolean().optional(),
|
|
1313
|
+
apiKey: z3.string().optional()
|
|
1314
|
+
});
|
|
1315
|
+
function registerTriggerCommand(program) {
|
|
1316
|
+
program.command("trigger").description("Evaluate whether a skill description triggers correctly.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--model <model>", "Model to use", "claude-sonnet-4-5-20250929").option("--provider <provider>", "LLM provider: anthropic|openai", "anthropic").option("--queries <path>", "Path to custom test queries JSON").option("--num-queries <n>", "Number of auto-generated queries", (value) => Number.parseInt(value, 10), 20).option("--save-queries <path>", "Save generated queries to a JSON file").option("--api-key <key>", "API key override").option("--verbose", "Show full model decisions").action(async (targetPath, commandOptions, command) => {
|
|
1317
|
+
const globalOptions = getGlobalCliOptions(command);
|
|
1318
|
+
const parsedOptions = triggerOptionsSchema.safeParse(commandOptions);
|
|
1319
|
+
if (!parsedOptions.success) {
|
|
1320
|
+
writeError(new Error(parsedOptions.error.issues[0]?.message ?? "Invalid trigger options."), globalOptions.json);
|
|
1321
|
+
process.exitCode = 2;
|
|
1322
|
+
return;
|
|
1323
|
+
}
|
|
1324
|
+
const options = parsedOptions.data;
|
|
1325
|
+
const spinner = globalOptions.json || !process.stdout.isTTY ? null : ora("Preparing trigger evaluation...").start();
|
|
1326
|
+
try {
|
|
1327
|
+
if (options.numQueries % 2 !== 0) {
|
|
1328
|
+
throw new Error("--num-queries must be an even number so the suite can split should/should-not trigger cases.");
|
|
1329
|
+
}
|
|
1330
|
+
if (spinner) {
|
|
1331
|
+
spinner.text = "Parsing skill...";
|
|
1332
|
+
}
|
|
1333
|
+
const skill = await parseSkillStrict(targetPath);
|
|
1334
|
+
if (spinner) {
|
|
1335
|
+
spinner.text = "Initializing model provider...";
|
|
1336
|
+
}
|
|
1337
|
+
const provider = createProvider(options.provider, options.apiKey);
|
|
1338
|
+
let queries = void 0;
|
|
1339
|
+
if (options.queries) {
|
|
1340
|
+
if (spinner) {
|
|
1341
|
+
spinner.text = "Loading custom trigger queries...";
|
|
1342
|
+
}
|
|
1343
|
+
const loaded = await readJsonFile(options.queries);
|
|
1344
|
+
const parsedQueries = triggerQueryArraySchema.safeParse(loaded);
|
|
1345
|
+
if (!parsedQueries.success) {
|
|
1346
|
+
throw new Error(`Invalid --queries JSON: ${parsedQueries.error.issues[0]?.message ?? "unknown format issue"}`);
|
|
1347
|
+
}
|
|
1348
|
+
queries = parsedQueries.data;
|
|
1349
|
+
}
|
|
1350
|
+
if (spinner) {
|
|
1351
|
+
spinner.text = "Running trigger simulations...";
|
|
1352
|
+
}
|
|
1353
|
+
const result = await runTriggerTest(skill, {
|
|
1354
|
+
model: options.model,
|
|
1355
|
+
provider,
|
|
1356
|
+
queries,
|
|
1357
|
+
numQueries: options.numQueries,
|
|
1358
|
+
verbose: Boolean(options.verbose)
|
|
1359
|
+
});
|
|
1360
|
+
if (options.saveQueries) {
|
|
1361
|
+
await writeJsonFile(options.saveQueries, result.queries);
|
|
1362
|
+
}
|
|
1363
|
+
spinner?.stop();
|
|
1364
|
+
if (globalOptions.json) {
|
|
1365
|
+
writeResult(result, true);
|
|
1366
|
+
} else {
|
|
1367
|
+
writeResult(renderTriggerReport(result, globalOptions.color, Boolean(options.verbose)), false);
|
|
1368
|
+
}
|
|
1369
|
+
} catch (error) {
|
|
1370
|
+
spinner?.stop();
|
|
1371
|
+
writeError(error, globalOptions.json);
|
|
1372
|
+
process.exitCode = 2;
|
|
1373
|
+
}
|
|
1374
|
+
});
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
// src/commands/eval.ts
|
|
1378
|
+
import ora2 from "ora";
|
|
1379
|
+
import { z as z6 } from "zod";
|
|
1380
|
+
|
|
1381
|
+
// src/core/eval-runner.ts
|
|
1382
|
+
import { z as z5 } from "zod";
|
|
1383
|
+
|
|
1384
|
+
// src/core/grader.ts
|
|
1385
|
+
import { z as z4 } from "zod";
|
|
1386
|
+
var gradedAssertionSchema = z4.object({
|
|
1387
|
+
assertion: z4.string(),
|
|
1388
|
+
passed: z4.boolean(),
|
|
1389
|
+
evidence: z4.string()
|
|
1390
|
+
});
|
|
1391
|
+
var graderOutputSchema = z4.object({
|
|
1392
|
+
assertions: z4.array(gradedAssertionSchema)
|
|
1393
|
+
});
|
|
1394
|
+
function extractJsonObject(raw) {
|
|
1395
|
+
const trimmed = raw.trim();
|
|
1396
|
+
if (trimmed.startsWith("{") && trimmed.endsWith("}")) {
|
|
1397
|
+
return JSON.parse(trimmed);
|
|
1398
|
+
}
|
|
1399
|
+
const start = trimmed.indexOf("{");
|
|
1400
|
+
const end = trimmed.lastIndexOf("}");
|
|
1401
|
+
if (start >= 0 && end > start) {
|
|
1402
|
+
return JSON.parse(trimmed.slice(start, end + 1));
|
|
1403
|
+
}
|
|
1404
|
+
throw new Error("Grader did not return a JSON object.");
|
|
1405
|
+
}
|
|
1406
|
+
async function gradeResponse(options) {
|
|
1407
|
+
const assertionList = options.assertions && options.assertions.length > 0 ? options.assertions : [
|
|
1408
|
+
"The response follows the skill instructions faithfully.",
|
|
1409
|
+
"The response is well-structured and actionable.",
|
|
1410
|
+
"The response addresses the user prompt directly."
|
|
1411
|
+
];
|
|
1412
|
+
const systemPrompt = [
|
|
1413
|
+
"You are a strict evaluator for agent skill outputs.",
|
|
1414
|
+
"Assess each assertion and return JSON only.",
|
|
1415
|
+
'Required output format: {"assertions":[{"assertion":"...","passed":true|false,"evidence":"..."}]}'
|
|
1416
|
+
].join(" ");
|
|
1417
|
+
const userPrompt = [
|
|
1418
|
+
`Skill: ${options.skillName}`,
|
|
1419
|
+
"Skill instructions:",
|
|
1420
|
+
options.skillBody,
|
|
1421
|
+
"",
|
|
1422
|
+
`User prompt: ${options.userPrompt}`,
|
|
1423
|
+
"",
|
|
1424
|
+
"Model response:",
|
|
1425
|
+
options.modelResponse,
|
|
1426
|
+
"",
|
|
1427
|
+
"Assertions to evaluate:",
|
|
1428
|
+
assertionList.map((assertion, index) => `${index + 1}. ${assertion}`).join("\n")
|
|
1429
|
+
].join("\n");
|
|
1430
|
+
const raw = await options.provider.sendMessage(systemPrompt, userPrompt, { model: options.model });
|
|
1431
|
+
const parsed = graderOutputSchema.safeParse(extractJsonObject(raw));
|
|
1432
|
+
if (!parsed.success) {
|
|
1433
|
+
throw new Error(`Failed to parse grader output: ${parsed.error.issues[0]?.message ?? "invalid grader JSON"}`);
|
|
1434
|
+
}
|
|
1435
|
+
return parsed.data.assertions;
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
// src/core/eval-runner.ts
|
|
1439
|
+
var evalPromptSchema = z5.object({
|
|
1440
|
+
prompt: z5.string().min(1),
|
|
1441
|
+
assertions: z5.array(z5.string().min(1)).optional()
|
|
1442
|
+
});
|
|
1443
|
+
var evalPromptArraySchema = z5.array(evalPromptSchema);
|
|
1444
|
+
function extractJsonArray(raw) {
|
|
1445
|
+
const trimmed = raw.trim();
|
|
1446
|
+
if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
|
|
1447
|
+
return JSON.parse(trimmed);
|
|
1448
|
+
}
|
|
1449
|
+
const start = trimmed.indexOf("[");
|
|
1450
|
+
const end = trimmed.lastIndexOf("]");
|
|
1451
|
+
if (start >= 0 && end > start) {
|
|
1452
|
+
return JSON.parse(trimmed.slice(start, end + 1));
|
|
1453
|
+
}
|
|
1454
|
+
throw new Error("Model did not return a JSON array.");
|
|
1455
|
+
}
|
|
1456
|
+
async function generatePrompts(skill, provider, model, count) {
|
|
1457
|
+
const systemPrompt = [
|
|
1458
|
+
"You generate realistic evaluation prompts for an agent skill.",
|
|
1459
|
+
"Return JSON only.",
|
|
1460
|
+
'Format: [{"prompt":"...","assertions":["...", "..."]}]',
|
|
1461
|
+
"Assertions should be concrete and checkable."
|
|
1462
|
+
].join(" ");
|
|
1463
|
+
const userPrompt = [
|
|
1464
|
+
`Skill name: ${skill.frontmatter.name}`,
|
|
1465
|
+
`Skill description: ${skill.frontmatter.description}`,
|
|
1466
|
+
"Skill instructions:",
|
|
1467
|
+
skill.content,
|
|
1468
|
+
"",
|
|
1469
|
+
`Generate ${count} prompts that stress the main capabilities and likely edge cases.`,
|
|
1470
|
+
"Each prompt should include 2-4 assertions."
|
|
1471
|
+
].join("\n");
|
|
1472
|
+
const raw = await provider.sendMessage(systemPrompt, userPrompt, { model });
|
|
1473
|
+
const parsed = evalPromptArraySchema.safeParse(extractJsonArray(raw));
|
|
1474
|
+
if (!parsed.success) {
|
|
1475
|
+
throw new Error(`Failed to parse generated eval prompts: ${parsed.error.issues[0]?.message ?? "invalid prompt JSON"}`);
|
|
1476
|
+
}
|
|
1477
|
+
if (parsed.data.length !== count) {
|
|
1478
|
+
throw new Error(`Expected ${count} prompts, got ${parsed.data.length}.`);
|
|
1479
|
+
}
|
|
1480
|
+
return parsed.data;
|
|
1481
|
+
}
|
|
1482
|
+
async function runEval(skill, options) {
|
|
1483
|
+
const prompts = options.prompts && options.prompts.length > 0 ? evalPromptArraySchema.parse(options.prompts) : await generatePrompts(skill, options.provider, options.model, 5);
|
|
1484
|
+
const results = [];
|
|
1485
|
+
for (const evalPrompt of prompts) {
|
|
1486
|
+
const systemPrompt = [
|
|
1487
|
+
"You are an AI assistant with an activated skill.",
|
|
1488
|
+
"Follow this SKILL.md content exactly where applicable.",
|
|
1489
|
+
"",
|
|
1490
|
+
skill.raw
|
|
1491
|
+
].join("\n");
|
|
1492
|
+
const response = await options.provider.sendMessage(systemPrompt, evalPrompt.prompt, { model: options.model });
|
|
1493
|
+
const gradedAssertions = await gradeResponse({
|
|
1494
|
+
provider: options.provider,
|
|
1495
|
+
model: options.graderModel,
|
|
1496
|
+
skillName: skill.frontmatter.name,
|
|
1497
|
+
skillBody: skill.content,
|
|
1498
|
+
userPrompt: evalPrompt.prompt,
|
|
1499
|
+
modelResponse: response,
|
|
1500
|
+
assertions: evalPrompt.assertions
|
|
1501
|
+
});
|
|
1502
|
+
const passedAssertions2 = gradedAssertions.filter((assertion) => assertion.passed).length;
|
|
1503
|
+
results.push({
|
|
1504
|
+
prompt: evalPrompt.prompt,
|
|
1505
|
+
assertions: gradedAssertions,
|
|
1506
|
+
responseSummary: response.slice(0, 200),
|
|
1507
|
+
response,
|
|
1508
|
+
passedAssertions: passedAssertions2,
|
|
1509
|
+
totalAssertions: gradedAssertions.length
|
|
1510
|
+
});
|
|
1511
|
+
}
|
|
1512
|
+
const totalAssertions = results.reduce((total, result) => total + result.totalAssertions, 0);
|
|
1513
|
+
const passedAssertions = results.reduce((total, result) => total + result.passedAssertions, 0);
|
|
1514
|
+
return {
|
|
1515
|
+
skillName: skill.frontmatter.name,
|
|
1516
|
+
model: options.model,
|
|
1517
|
+
graderModel: options.graderModel,
|
|
1518
|
+
provider: options.provider.name,
|
|
1519
|
+
prompts,
|
|
1520
|
+
results,
|
|
1521
|
+
summary: {
|
|
1522
|
+
totalPrompts: results.length,
|
|
1523
|
+
totalAssertions,
|
|
1524
|
+
passedAssertions
|
|
1525
|
+
}
|
|
1526
|
+
};
|
|
1527
|
+
}
|
|
1528
|
+
|
|
1529
|
+
// src/commands/eval.ts
|
|
1530
|
+
var evalOptionsSchema = z6.object({
|
|
1531
|
+
prompts: z6.string().optional(),
|
|
1532
|
+
model: z6.string(),
|
|
1533
|
+
graderModel: z6.string().optional(),
|
|
1534
|
+
provider: z6.enum(["anthropic", "openai"]),
|
|
1535
|
+
saveResults: z6.string().optional(),
|
|
1536
|
+
verbose: z6.boolean().optional(),
|
|
1537
|
+
apiKey: z6.string().optional()
|
|
1538
|
+
});
|
|
1539
|
+
function registerEvalCommand(program) {
|
|
1540
|
+
program.command("eval").description("Run end-to-end skill execution and quality evaluation.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--prompts <path>", "Path to eval prompts JSON").option("--model <model>", "Model to execute prompts", "claude-sonnet-4-5-20250929").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--provider <provider>", "LLM provider: anthropic|openai", "anthropic").option("--save-results <path>", "Save full evaluation results to JSON").option("--api-key <key>", "API key override").option("--verbose", "Show full model responses").action(async (targetPath, commandOptions, command) => {
|
|
1541
|
+
const globalOptions = getGlobalCliOptions(command);
|
|
1542
|
+
const parsedOptions = evalOptionsSchema.safeParse(commandOptions);
|
|
1543
|
+
if (!parsedOptions.success) {
|
|
1544
|
+
writeError(new Error(parsedOptions.error.issues[0]?.message ?? "Invalid eval options."), globalOptions.json);
|
|
1545
|
+
process.exitCode = 2;
|
|
1546
|
+
return;
|
|
1547
|
+
}
|
|
1548
|
+
const options = parsedOptions.data;
|
|
1549
|
+
const spinner = globalOptions.json || !process.stdout.isTTY ? null : ora2("Preparing evaluation...").start();
|
|
1550
|
+
try {
|
|
1551
|
+
if (spinner) {
|
|
1552
|
+
spinner.text = "Parsing skill...";
|
|
1553
|
+
}
|
|
1554
|
+
const skill = await parseSkillStrict(targetPath);
|
|
1555
|
+
if (spinner) {
|
|
1556
|
+
spinner.text = "Initializing model provider...";
|
|
1557
|
+
}
|
|
1558
|
+
const provider = createProvider(options.provider, options.apiKey);
|
|
1559
|
+
let prompts = void 0;
|
|
1560
|
+
if (options.prompts) {
|
|
1561
|
+
if (spinner) {
|
|
1562
|
+
spinner.text = "Loading test prompts...";
|
|
1563
|
+
}
|
|
1564
|
+
const loaded = await readJsonFile(options.prompts);
|
|
1565
|
+
const parsedPrompts = evalPromptArraySchema.safeParse(loaded);
|
|
1566
|
+
if (!parsedPrompts.success) {
|
|
1567
|
+
throw new Error(`Invalid --prompts JSON: ${parsedPrompts.error.issues[0]?.message ?? "unknown format issue"}`);
|
|
1568
|
+
}
|
|
1569
|
+
prompts = parsedPrompts.data;
|
|
1570
|
+
}
|
|
1571
|
+
if (spinner) {
|
|
1572
|
+
spinner.text = "Running eval prompts and grading responses...";
|
|
1573
|
+
}
|
|
1574
|
+
const result = await runEval(skill, {
|
|
1575
|
+
provider,
|
|
1576
|
+
model: options.model,
|
|
1577
|
+
graderModel: options.graderModel ?? options.model,
|
|
1578
|
+
prompts
|
|
1579
|
+
});
|
|
1580
|
+
if (options.saveResults) {
|
|
1581
|
+
await writeJsonFile(options.saveResults, result);
|
|
1582
|
+
}
|
|
1583
|
+
spinner?.stop();
|
|
1584
|
+
if (globalOptions.json) {
|
|
1585
|
+
writeResult(result, true);
|
|
1586
|
+
} else {
|
|
1587
|
+
writeResult(renderEvalReport(result, globalOptions.color, Boolean(options.verbose)), false);
|
|
1588
|
+
}
|
|
1589
|
+
} catch (error) {
|
|
1590
|
+
spinner?.stop();
|
|
1591
|
+
writeError(error, globalOptions.json);
|
|
1592
|
+
process.exitCode = 2;
|
|
1593
|
+
}
|
|
1594
|
+
});
|
|
1595
|
+
}
|
|
1596
|
+
|
|
1597
|
+
// src/index.ts
|
|
1598
|
+
function resolveVersion() {
|
|
1599
|
+
try {
|
|
1600
|
+
const currentFilePath = fileURLToPath(import.meta.url);
|
|
1601
|
+
const packageJsonPath = path5.resolve(path5.dirname(currentFilePath), "..", "package.json");
|
|
1602
|
+
const raw = fs5.readFileSync(packageJsonPath, "utf8");
|
|
1603
|
+
const parsed = JSON.parse(raw);
|
|
1604
|
+
return parsed.version ?? "0.0.0";
|
|
1605
|
+
} catch {
|
|
1606
|
+
return "0.0.0";
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
async function run(argv) {
|
|
1610
|
+
const program = new Command();
|
|
1611
|
+
program.name("skilltest").description("The testing framework for Agent Skills.").version(resolveVersion()).option("--json", "Output results as JSON").option("--no-color", "Disable colored output").showHelpAfterError();
|
|
1612
|
+
registerLintCommand(program);
|
|
1613
|
+
registerTriggerCommand(program);
|
|
1614
|
+
registerEvalCommand(program);
|
|
1615
|
+
await program.parseAsync(argv);
|
|
1616
|
+
}
|
|
1617
|
+
run(process.argv).catch((error) => {
|
|
1618
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1619
|
+
process.stderr.write(`Error: ${message}
|
|
1620
|
+
`);
|
|
1621
|
+
process.exitCode = 2;
|
|
1622
|
+
});
|
|
1623
|
+
export {
|
|
1624
|
+
run
|
|
1625
|
+
};
|
|
1626
|
+
//# sourceMappingURL=index.js.map
|