skilltest 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +5 -3
- package/README.md +78 -1
- package/dist/index.js +1475 -257
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/index.ts
|
|
4
|
-
import
|
|
4
|
+
import fs11 from "node:fs";
|
|
5
5
|
import path6 from "node:path";
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
7
|
import { Command } from "commander";
|
|
8
8
|
|
|
9
|
+
// src/commands/lint.ts
|
|
10
|
+
import fs6 from "node:fs/promises";
|
|
11
|
+
import { z as z6 } from "zod";
|
|
12
|
+
|
|
9
13
|
// src/core/skill-parser.ts
|
|
10
14
|
import fs from "node:fs/promises";
|
|
11
15
|
import path from "node:path";
|
|
@@ -239,6 +243,171 @@ function runCompatibilityChecks(context) {
|
|
|
239
243
|
return issues;
|
|
240
244
|
}
|
|
241
245
|
|
|
246
|
+
// src/core/linter/markdown-zones.ts
|
|
247
|
+
function splitLines(raw) {
|
|
248
|
+
return raw.split(/\r?\n/);
|
|
249
|
+
}
|
|
250
|
+
function stripTopFrontmatter(raw) {
|
|
251
|
+
const lines = splitLines(raw);
|
|
252
|
+
if (lines[0] !== "---") {
|
|
253
|
+
return {
|
|
254
|
+
bodyLines: lines,
|
|
255
|
+
bodyStartLine: 1
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
for (let index = 1; index < lines.length; index += 1) {
|
|
259
|
+
if (lines[index] === "---") {
|
|
260
|
+
return {
|
|
261
|
+
bodyLines: lines.slice(index + 1),
|
|
262
|
+
bodyStartLine: index + 2
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
return {
|
|
267
|
+
bodyLines: lines,
|
|
268
|
+
bodyStartLine: 1
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
function matchCodeFenceOpener(line) {
|
|
272
|
+
const match = line.match(/^\s*(`{3,}|~{3,})(.*)$/);
|
|
273
|
+
return match?.[1] ?? null;
|
|
274
|
+
}
|
|
275
|
+
function isExactCodeFenceCloser(line, delimiter) {
|
|
276
|
+
return line.trim() === delimiter;
|
|
277
|
+
}
|
|
278
|
+
function appendZone(zones, type, content, startLine, endLine) {
|
|
279
|
+
if (content === "") {
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
282
|
+
const previous = zones[zones.length - 1];
|
|
283
|
+
if (previous && previous.type === type && startLine <= previous.endLine + 1) {
|
|
284
|
+
const separator = startLine > previous.endLine ? "\n" : "";
|
|
285
|
+
previous.content += `${separator}${content}`;
|
|
286
|
+
previous.endLine = endLine;
|
|
287
|
+
return;
|
|
288
|
+
}
|
|
289
|
+
zones.push({
|
|
290
|
+
type,
|
|
291
|
+
content,
|
|
292
|
+
startLine,
|
|
293
|
+
endLine
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
function appendToOpenZone(zone, content, lineNumber) {
|
|
297
|
+
if (content === "") {
|
|
298
|
+
if (lineNumber > zone.endLine) {
|
|
299
|
+
zone.content += "\n";
|
|
300
|
+
zone.endLine = lineNumber;
|
|
301
|
+
}
|
|
302
|
+
return;
|
|
303
|
+
}
|
|
304
|
+
const separator = lineNumber > zone.endLine ? "\n" : "";
|
|
305
|
+
zone.content += `${separator}${content}`;
|
|
306
|
+
zone.endLine = lineNumber;
|
|
307
|
+
}
|
|
308
|
+
function addInlineAwareText(zones, text, lineNumber, baseType) {
|
|
309
|
+
if (text === "") {
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
312
|
+
let cursor = 0;
|
|
313
|
+
while (cursor < text.length) {
|
|
314
|
+
const inlineStart = text.indexOf("`", cursor);
|
|
315
|
+
if (inlineStart === -1) {
|
|
316
|
+
appendZone(zones, baseType, text.slice(cursor), lineNumber, lineNumber);
|
|
317
|
+
return;
|
|
318
|
+
}
|
|
319
|
+
if (inlineStart > cursor) {
|
|
320
|
+
appendZone(zones, baseType, text.slice(cursor, inlineStart), lineNumber, lineNumber);
|
|
321
|
+
}
|
|
322
|
+
const inlineEnd = text.indexOf("`", inlineStart + 1);
|
|
323
|
+
if (inlineEnd === -1) {
|
|
324
|
+
appendZone(zones, baseType, text.slice(inlineStart), lineNumber, lineNumber);
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
327
|
+
appendZone(zones, "inline-code", text.slice(inlineStart, inlineEnd + 1), lineNumber, lineNumber);
|
|
328
|
+
cursor = inlineEnd + 1;
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
function parseZones(raw) {
|
|
332
|
+
const { bodyLines, bodyStartLine } = stripTopFrontmatter(raw);
|
|
333
|
+
const zones = [];
|
|
334
|
+
let openCodeFence = null;
|
|
335
|
+
let openComment = null;
|
|
336
|
+
for (const [index, line] of bodyLines.entries()) {
|
|
337
|
+
const lineNumber = bodyStartLine + index;
|
|
338
|
+
if (openCodeFence) {
|
|
339
|
+
appendToOpenZone(openCodeFence.zone, line, lineNumber);
|
|
340
|
+
if (isExactCodeFenceCloser(line, openCodeFence.delimiter)) {
|
|
341
|
+
zones.push(openCodeFence.zone);
|
|
342
|
+
openCodeFence = null;
|
|
343
|
+
}
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
if (!openComment) {
|
|
347
|
+
const fenceDelimiter = matchCodeFenceOpener(line);
|
|
348
|
+
if (fenceDelimiter) {
|
|
349
|
+
openCodeFence = {
|
|
350
|
+
delimiter: fenceDelimiter,
|
|
351
|
+
zone: {
|
|
352
|
+
type: "code-fence",
|
|
353
|
+
content: line,
|
|
354
|
+
startLine: lineNumber,
|
|
355
|
+
endLine: lineNumber
|
|
356
|
+
}
|
|
357
|
+
};
|
|
358
|
+
continue;
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
const baseType = /^\s*>/.test(line) ? "blockquote" : "prose";
|
|
362
|
+
let cursor = 0;
|
|
363
|
+
while (cursor < line.length || openComment) {
|
|
364
|
+
if (openComment) {
|
|
365
|
+
const closeIndex = line.indexOf("-->", cursor);
|
|
366
|
+
if (closeIndex === -1) {
|
|
367
|
+
appendToOpenZone(openComment, line.slice(cursor), lineNumber);
|
|
368
|
+
cursor = line.length;
|
|
369
|
+
break;
|
|
370
|
+
}
|
|
371
|
+
appendToOpenZone(openComment, line.slice(cursor, closeIndex + 3), lineNumber);
|
|
372
|
+
zones.push(openComment);
|
|
373
|
+
openComment = null;
|
|
374
|
+
cursor = closeIndex + 3;
|
|
375
|
+
continue;
|
|
376
|
+
}
|
|
377
|
+
if (cursor >= line.length) {
|
|
378
|
+
break;
|
|
379
|
+
}
|
|
380
|
+
const commentStart = line.indexOf("<!--", cursor);
|
|
381
|
+
const textEnd = commentStart === -1 ? line.length : commentStart;
|
|
382
|
+
if (textEnd > cursor) {
|
|
383
|
+
addInlineAwareText(zones, line.slice(cursor, textEnd), lineNumber, baseType);
|
|
384
|
+
}
|
|
385
|
+
if (commentStart === -1) {
|
|
386
|
+
break;
|
|
387
|
+
}
|
|
388
|
+
const commentEnd = line.indexOf("-->", commentStart + 4);
|
|
389
|
+
if (commentEnd === -1) {
|
|
390
|
+
openComment = {
|
|
391
|
+
type: "html-comment",
|
|
392
|
+
content: line.slice(commentStart),
|
|
393
|
+
startLine: lineNumber,
|
|
394
|
+
endLine: lineNumber
|
|
395
|
+
};
|
|
396
|
+
break;
|
|
397
|
+
}
|
|
398
|
+
appendZone(zones, "html-comment", line.slice(commentStart, commentEnd + 3), lineNumber, lineNumber);
|
|
399
|
+
cursor = commentEnd + 3;
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
if (openComment) {
|
|
403
|
+
zones.push(openComment);
|
|
404
|
+
}
|
|
405
|
+
if (openCodeFence) {
|
|
406
|
+
zones.push(openCodeFence.zone);
|
|
407
|
+
}
|
|
408
|
+
return zones;
|
|
409
|
+
}
|
|
410
|
+
|
|
242
411
|
// src/core/linter/content.ts
|
|
243
412
|
var VAGUE_PATTERNS = [
|
|
244
413
|
/\bdo something appropriate\b/i,
|
|
@@ -255,6 +424,102 @@ var SECRET_PATTERNS = [
|
|
|
255
424
|
{ label: "Slack token", regex: /\bxox[baprs]-[A-Za-z0-9-]{20,}\b/ },
|
|
256
425
|
{ label: "Generic private key header", regex: /-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----/ }
|
|
257
426
|
];
|
|
427
|
+
function summarizeLineRange(matches) {
|
|
428
|
+
if (matches.length === 0) {
|
|
429
|
+
return {};
|
|
430
|
+
}
|
|
431
|
+
return {
|
|
432
|
+
startLine: Math.min(...matches.map((match) => match.startLine)),
|
|
433
|
+
endLine: Math.max(...matches.map((match) => match.endLine))
|
|
434
|
+
};
|
|
435
|
+
}
|
|
436
|
+
function uniqueLabels(matches) {
|
|
437
|
+
const labels = [];
|
|
438
|
+
const seen = /* @__PURE__ */ new Set();
|
|
439
|
+
for (const match of matches) {
|
|
440
|
+
if (seen.has(match.label)) {
|
|
441
|
+
continue;
|
|
442
|
+
}
|
|
443
|
+
seen.add(match.label);
|
|
444
|
+
labels.push(match.label);
|
|
445
|
+
}
|
|
446
|
+
return labels;
|
|
447
|
+
}
|
|
448
|
+
function collectSecretMatches(zones) {
|
|
449
|
+
const prose = [];
|
|
450
|
+
const nonProse = [];
|
|
451
|
+
for (const zone of zones) {
|
|
452
|
+
for (const pattern of SECRET_PATTERNS) {
|
|
453
|
+
if (!pattern.regex.test(zone.content)) {
|
|
454
|
+
continue;
|
|
455
|
+
}
|
|
456
|
+
const occurrence = {
|
|
457
|
+
label: pattern.label,
|
|
458
|
+
zoneType: zone.type,
|
|
459
|
+
startLine: zone.startLine,
|
|
460
|
+
endLine: zone.endLine
|
|
461
|
+
};
|
|
462
|
+
if (zone.type === "prose") {
|
|
463
|
+
prose.push(occurrence);
|
|
464
|
+
} else {
|
|
465
|
+
nonProse.push(occurrence);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
return { prose, nonProse };
|
|
470
|
+
}
|
|
471
|
+
function buildSkippedPatterns(matches) {
|
|
472
|
+
if (matches.length === 0) {
|
|
473
|
+
return void 0;
|
|
474
|
+
}
|
|
475
|
+
return matches.map((match) => ({
|
|
476
|
+
label: match.label,
|
|
477
|
+
zoneType: match.zoneType,
|
|
478
|
+
startLine: match.startLine,
|
|
479
|
+
endLine: match.endLine
|
|
480
|
+
}));
|
|
481
|
+
}
|
|
482
|
+
function buildSecretsIssue(context) {
|
|
483
|
+
if (context.suppressedCheckIds.has("content:secrets")) {
|
|
484
|
+
return null;
|
|
485
|
+
}
|
|
486
|
+
const { prose, nonProse } = collectSecretMatches(parseZones(context.skill.raw));
|
|
487
|
+
const proseLabels = uniqueLabels(prose);
|
|
488
|
+
const nonProseLabels = uniqueLabels(nonProse);
|
|
489
|
+
const skippedPatterns = buildSkippedPatterns(nonProse);
|
|
490
|
+
if (proseLabels.length > 0) {
|
|
491
|
+
return {
|
|
492
|
+
id: "content.secrets",
|
|
493
|
+
checkId: "content:secrets",
|
|
494
|
+
title: "Hardcoded Secrets",
|
|
495
|
+
status: "fail",
|
|
496
|
+
message: `Potential secrets detected (${proseLabels.join(", ")}).`,
|
|
497
|
+
suggestion: "Remove secrets from skill files and use environment variables or secret managers.",
|
|
498
|
+
...summarizeLineRange(prose),
|
|
499
|
+
skippedPatterns
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
if (nonProseLabels.length > 0) {
|
|
503
|
+
const codeFenceOnly = nonProse.every((match) => match.zoneType === "code-fence");
|
|
504
|
+
return {
|
|
505
|
+
id: "content.secrets",
|
|
506
|
+
checkId: "content:secrets",
|
|
507
|
+
title: "Hardcoded Secrets",
|
|
508
|
+
status: "warn",
|
|
509
|
+
message: codeFenceOnly ? `Possible secret in code example \u2014 verify this is a placeholder, not a real key (${nonProseLabels.join(", ")}).` : `Possible secrets found outside prose instructions (${nonProseLabels.join(", ")}). Verify these are placeholders, not real credentials.`,
|
|
510
|
+
suggestion: "Replace real-looking credentials in examples with explicit placeholders such as YOUR_API_KEY.",
|
|
511
|
+
...summarizeLineRange(nonProse),
|
|
512
|
+
skippedPatterns
|
|
513
|
+
};
|
|
514
|
+
}
|
|
515
|
+
return {
|
|
516
|
+
id: "content.secrets",
|
|
517
|
+
checkId: "content:secrets",
|
|
518
|
+
title: "Hardcoded Secrets",
|
|
519
|
+
status: "pass",
|
|
520
|
+
message: "No obvious API keys or secrets patterns were detected."
|
|
521
|
+
};
|
|
522
|
+
}
|
|
258
523
|
function runContentChecks(context) {
|
|
259
524
|
const issues = [];
|
|
260
525
|
const body = context.frontmatter.content;
|
|
@@ -334,29 +599,9 @@ function runContentChecks(context) {
|
|
|
334
599
|
message: "No angle bracket tokens detected in frontmatter."
|
|
335
600
|
});
|
|
336
601
|
}
|
|
337
|
-
const
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
secretHits.add(pattern.label);
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
if (secretHits.size > 0) {
|
|
344
|
-
issues.push({
|
|
345
|
-
id: "content.secrets",
|
|
346
|
-
checkId: "content:secrets",
|
|
347
|
-
title: "Hardcoded Secrets",
|
|
348
|
-
status: "fail",
|
|
349
|
-
message: `Potential secrets detected (${Array.from(secretHits).join(", ")}).`,
|
|
350
|
-
suggestion: "Remove secrets from skill files and use environment variables or secret managers."
|
|
351
|
-
});
|
|
352
|
-
} else {
|
|
353
|
-
issues.push({
|
|
354
|
-
id: "content.secrets",
|
|
355
|
-
checkId: "content:secrets",
|
|
356
|
-
title: "Hardcoded Secrets",
|
|
357
|
-
status: "pass",
|
|
358
|
-
message: "No obvious API keys or secrets patterns were detected."
|
|
359
|
-
});
|
|
602
|
+
const secretsIssue = buildSecretsIssue(context);
|
|
603
|
+
if (secretsIssue) {
|
|
604
|
+
issues.push(secretsIssue);
|
|
360
605
|
}
|
|
361
606
|
if (bodyLines.length < 10) {
|
|
362
607
|
issues.push({
|
|
@@ -776,93 +1021,159 @@ var SHELL_ACTIVITY_PATTERNS = [
|
|
|
776
1021
|
/\b(?:npm|pnpm|yarn|pip|git|docker|kubectl)\s+[A-Za-z0-9-]/i
|
|
777
1022
|
];
|
|
778
1023
|
var SAFETY_GUARDRAIL_PATTERN = /\b(?:ask before|confirm|approval|dry[- ]run|sandbox|least privilege|redact|never expose|do not reveal)\b/i;
|
|
779
|
-
function
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
1024
|
+
function buildOccurrence(zone, pattern) {
|
|
1025
|
+
return {
|
|
1026
|
+
label: pattern.label,
|
|
1027
|
+
zoneType: zone.type,
|
|
1028
|
+
startLine: zone.startLine,
|
|
1029
|
+
endLine: zone.endLine
|
|
1030
|
+
};
|
|
1031
|
+
}
|
|
1032
|
+
function collectZoneAwareMatches(zones, patterns) {
|
|
1033
|
+
const flagged = [];
|
|
1034
|
+
const skipped = [];
|
|
1035
|
+
for (const zone of zones) {
|
|
1036
|
+
for (const pattern of patterns) {
|
|
1037
|
+
if (!pattern.regex.test(zone.content)) {
|
|
1038
|
+
continue;
|
|
1039
|
+
}
|
|
1040
|
+
const occurrence = buildOccurrence(zone, pattern);
|
|
1041
|
+
if (zone.type === "prose") {
|
|
1042
|
+
flagged.push(occurrence);
|
|
1043
|
+
} else {
|
|
1044
|
+
skipped.push(occurrence);
|
|
1045
|
+
}
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
return { flagged, skipped };
|
|
1049
|
+
}
|
|
1050
|
+
function uniqueLabels2(matches) {
|
|
1051
|
+
const labels = [];
|
|
1052
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1053
|
+
for (const match of matches) {
|
|
1054
|
+
if (seen.has(match.label)) {
|
|
1055
|
+
continue;
|
|
784
1056
|
}
|
|
1057
|
+
seen.add(match.label);
|
|
1058
|
+
labels.push(match.label);
|
|
1059
|
+
}
|
|
1060
|
+
return labels;
|
|
1061
|
+
}
|
|
1062
|
+
function summarizeLineRange2(matches) {
|
|
1063
|
+
if (matches.length === 0) {
|
|
1064
|
+
return {};
|
|
1065
|
+
}
|
|
1066
|
+
return {
|
|
1067
|
+
startLine: Math.min(...matches.map((match) => match.startLine)),
|
|
1068
|
+
endLine: Math.max(...matches.map((match) => match.endLine))
|
|
1069
|
+
};
|
|
1070
|
+
}
|
|
1071
|
+
function buildSkippedPatterns2(matches) {
|
|
1072
|
+
if (matches.length === 0) {
|
|
1073
|
+
return void 0;
|
|
785
1074
|
}
|
|
786
|
-
return matches
|
|
1075
|
+
return matches.map((match) => ({
|
|
1076
|
+
label: match.label,
|
|
1077
|
+
zoneType: match.zoneType,
|
|
1078
|
+
startLine: match.startLine,
|
|
1079
|
+
endLine: match.endLine
|
|
1080
|
+
}));
|
|
1081
|
+
}
|
|
1082
|
+
function isSuppressed(context, checkId) {
|
|
1083
|
+
return context.suppressedCheckIds.has(checkId);
|
|
1084
|
+
}
|
|
1085
|
+
function runZoneAwareSecurityCheck(context, zones, options) {
|
|
1086
|
+
if (isSuppressed(context, options.checkId)) {
|
|
1087
|
+
return null;
|
|
1088
|
+
}
|
|
1089
|
+
const matches = collectZoneAwareMatches(zones, options.patterns);
|
|
1090
|
+
const labels = uniqueLabels2(matches.flagged);
|
|
1091
|
+
const skippedPatterns = buildSkippedPatterns2(matches.skipped);
|
|
1092
|
+
if (labels.length > 0) {
|
|
1093
|
+
return {
|
|
1094
|
+
id: options.id,
|
|
1095
|
+
checkId: options.checkId,
|
|
1096
|
+
title: options.title,
|
|
1097
|
+
status: options.statusOnMatch,
|
|
1098
|
+
message: `${options.matchMessagePrefix}: ${labels.join(", ")}.`,
|
|
1099
|
+
suggestion: options.suggestion,
|
|
1100
|
+
...summarizeLineRange2(matches.flagged),
|
|
1101
|
+
skippedPatterns
|
|
1102
|
+
};
|
|
1103
|
+
}
|
|
1104
|
+
return {
|
|
1105
|
+
id: options.id,
|
|
1106
|
+
checkId: options.checkId,
|
|
1107
|
+
title: options.title,
|
|
1108
|
+
status: "pass",
|
|
1109
|
+
message: options.passMessage,
|
|
1110
|
+
skippedPatterns
|
|
1111
|
+
};
|
|
787
1112
|
}
|
|
788
1113
|
function runSecurityChecks(context) {
|
|
789
1114
|
const issues = [];
|
|
790
1115
|
const skillText = context.skill.raw;
|
|
791
|
-
const
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
}
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
issues.push({
|
|
832
|
-
id: "security.privilege-escalation",
|
|
833
|
-
checkId: "security:privilege-escalation",
|
|
834
|
-
title: "Privilege Escalation Language",
|
|
835
|
-
status: "warn",
|
|
836
|
-
message: `Potentially risky privilege/execution language detected: ${escalationHits.join(", ")}.`,
|
|
837
|
-
suggestion: "Prefer least-privilege execution and explicit approval steps for elevated commands."
|
|
838
|
-
});
|
|
839
|
-
} else {
|
|
840
|
-
issues.push({
|
|
841
|
-
id: "security.privilege-escalation",
|
|
842
|
-
checkId: "security:privilege-escalation",
|
|
843
|
-
title: "Privilege Escalation Language",
|
|
844
|
-
status: "pass",
|
|
845
|
-
message: "No obvious privilege-escalation language detected."
|
|
846
|
-
});
|
|
1116
|
+
const needsZoneParsing = !isSuppressed(context, "security:dangerous-commands") || !isSuppressed(context, "security:exfiltration") || !isSuppressed(context, "security:privilege-escalation");
|
|
1117
|
+
const zones = needsZoneParsing ? parseZones(skillText) : [];
|
|
1118
|
+
const dangerousCommandsIssue = runZoneAwareSecurityCheck(context, zones, {
|
|
1119
|
+
id: "security.dangerous-command-patterns",
|
|
1120
|
+
checkId: "security:dangerous-commands",
|
|
1121
|
+
title: "Dangerous Command Patterns",
|
|
1122
|
+
statusOnMatch: "fail",
|
|
1123
|
+
patterns: DANGEROUS_COMMAND_PATTERNS,
|
|
1124
|
+
matchMessagePrefix: "Potentially dangerous command instruction patterns found",
|
|
1125
|
+
passMessage: "No high-risk destructive or direct pipe-to-shell patterns detected.",
|
|
1126
|
+
suggestion: "Remove destructive/pipe-exec command examples or wrap them with explicit safety constraints."
|
|
1127
|
+
});
|
|
1128
|
+
if (dangerousCommandsIssue) {
|
|
1129
|
+
issues.push(dangerousCommandsIssue);
|
|
1130
|
+
}
|
|
1131
|
+
const exfiltrationIssue = runZoneAwareSecurityCheck(context, zones, {
|
|
1132
|
+
id: "security.exfiltration-patterns",
|
|
1133
|
+
checkId: "security:exfiltration",
|
|
1134
|
+
title: "Sensitive Data Exfiltration",
|
|
1135
|
+
statusOnMatch: "fail",
|
|
1136
|
+
patterns: EXFILTRATION_PATTERNS,
|
|
1137
|
+
matchMessagePrefix: "Possible sensitive data exfiltration patterns found",
|
|
1138
|
+
passMessage: "No obvious credential access/exfiltration instructions detected.",
|
|
1139
|
+
suggestion: "Remove instructions that access or transmit secrets/credential files."
|
|
1140
|
+
});
|
|
1141
|
+
if (exfiltrationIssue) {
|
|
1142
|
+
issues.push(exfiltrationIssue);
|
|
1143
|
+
}
|
|
1144
|
+
const privilegeEscalationIssue = runZoneAwareSecurityCheck(context, zones, {
|
|
1145
|
+
id: "security.privilege-escalation",
|
|
1146
|
+
checkId: "security:privilege-escalation",
|
|
1147
|
+
title: "Privilege Escalation Language",
|
|
1148
|
+
statusOnMatch: "warn",
|
|
1149
|
+
patterns: PRIVILEGE_ESCALATION_PATTERNS,
|
|
1150
|
+
matchMessagePrefix: "Potentially risky privilege/execution language detected",
|
|
1151
|
+
passMessage: "No obvious privilege-escalation language detected.",
|
|
1152
|
+
suggestion: "Prefer least-privilege execution and explicit approval steps for elevated commands."
|
|
1153
|
+
});
|
|
1154
|
+
if (privilegeEscalationIssue) {
|
|
1155
|
+
issues.push(privilegeEscalationIssue);
|
|
847
1156
|
}
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
1157
|
+
if (!isSuppressed(context, "security:missing-guardrails")) {
|
|
1158
|
+
const hasShellActivity = SHELL_ACTIVITY_PATTERNS.some((pattern) => pattern.test(skillText));
|
|
1159
|
+
if (hasShellActivity && !SAFETY_GUARDRAIL_PATTERN.test(skillText)) {
|
|
1160
|
+
issues.push({
|
|
1161
|
+
id: "security.safety-guardrails",
|
|
1162
|
+
checkId: "security:missing-guardrails",
|
|
1163
|
+
title: "Execution Safety Guardrails",
|
|
1164
|
+
status: "warn",
|
|
1165
|
+
message: "Shell/tool execution is present, but no explicit safety guardrails were detected.",
|
|
1166
|
+
suggestion: "Add guidance such as approval requirements, dry-run mode, scope checks, and redaction rules."
|
|
1167
|
+
});
|
|
1168
|
+
} else {
|
|
1169
|
+
issues.push({
|
|
1170
|
+
id: "security.safety-guardrails",
|
|
1171
|
+
checkId: "security:missing-guardrails",
|
|
1172
|
+
title: "Execution Safety Guardrails",
|
|
1173
|
+
status: "pass",
|
|
1174
|
+
message: hasShellActivity ? "Shell/tool execution instructions include at least one safety guardrail." : "No shell/tool execution instructions detected."
|
|
1175
|
+
});
|
|
1176
|
+
}
|
|
866
1177
|
}
|
|
867
1178
|
return issues;
|
|
868
1179
|
}
|
|
@@ -1038,9 +1349,11 @@ function lintFails(report, failOn) {
|
|
|
1038
1349
|
async function runLinter(inputPath, options = {}) {
|
|
1039
1350
|
const skill = await loadSkillFile(inputPath);
|
|
1040
1351
|
const frontmatter = parseFrontmatter(skill.raw);
|
|
1352
|
+
const suppressedCheckIds = new Set(options.suppress ?? []);
|
|
1041
1353
|
const context = {
|
|
1042
1354
|
skill,
|
|
1043
|
-
frontmatter
|
|
1355
|
+
frontmatter,
|
|
1356
|
+
suppressedCheckIds
|
|
1044
1357
|
};
|
|
1045
1358
|
const issues = [];
|
|
1046
1359
|
issues.push(...runFrontmatterChecks(context));
|
|
@@ -1049,8 +1362,7 @@ async function runLinter(inputPath, options = {}) {
|
|
|
1049
1362
|
issues.push(...runSecurityChecks(context));
|
|
1050
1363
|
issues.push(...await runDisclosureChecks(context));
|
|
1051
1364
|
issues.push(...runCompatibilityChecks(context));
|
|
1052
|
-
const
|
|
1053
|
-
const filteredIssues = issues.filter((issue) => !suppress.has(issue.checkId));
|
|
1365
|
+
const filteredIssues = issues.filter((issue) => !suppressedCheckIds.has(issue.checkId));
|
|
1054
1366
|
return {
|
|
1055
1367
|
target: inputPath,
|
|
1056
1368
|
issues: filteredIssues,
|
|
@@ -1058,6 +1370,739 @@ async function runLinter(inputPath, options = {}) {
|
|
|
1058
1370
|
};
|
|
1059
1371
|
}
|
|
1060
1372
|
|
|
1373
|
+
// src/reporters/html.ts
|
|
1374
|
+
function escapeHtml(value) {
|
|
1375
|
+
return String(value ?? "").replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
1376
|
+
}
|
|
1377
|
+
function formatPercent(value) {
|
|
1378
|
+
return `${(value * 100).toFixed(1)}%`;
|
|
1379
|
+
}
|
|
1380
|
+
function formatLineRange(startLine, endLine) {
|
|
1381
|
+
if (startLine === void 0) {
|
|
1382
|
+
return null;
|
|
1383
|
+
}
|
|
1384
|
+
if (endLine === void 0 || endLine === startLine) {
|
|
1385
|
+
return `line ${startLine}`;
|
|
1386
|
+
}
|
|
1387
|
+
return `lines ${startLine}-${endLine}`;
|
|
1388
|
+
}
|
|
1389
|
+
function badgeLabel(status) {
|
|
1390
|
+
if (status === "pass") {
|
|
1391
|
+
return "PASS";
|
|
1392
|
+
}
|
|
1393
|
+
if (status === "warn") {
|
|
1394
|
+
return "WARN";
|
|
1395
|
+
}
|
|
1396
|
+
if (status === "fail") {
|
|
1397
|
+
return "FAIL";
|
|
1398
|
+
}
|
|
1399
|
+
return "SKIP";
|
|
1400
|
+
}
|
|
1401
|
+
function renderBadge(status) {
|
|
1402
|
+
return `<span class="badge ${status}">${badgeLabel(status)}</span>`;
|
|
1403
|
+
}
|
|
1404
|
+
function renderStatCards(stats) {
|
|
1405
|
+
return `<div class="stats-grid">${stats.map(
|
|
1406
|
+
(stat) => `
|
|
1407
|
+
<div class="stat-card${stat.status ? ` status-${stat.status}` : ""}">
|
|
1408
|
+
<div class="stat-label">${escapeHtml(stat.label)}</div>
|
|
1409
|
+
<div class="stat-value">${escapeHtml(stat.value)}</div>
|
|
1410
|
+
${stat.note ? `<div class="stat-note">${escapeHtml(stat.note)}</div>` : ""}
|
|
1411
|
+
</div>
|
|
1412
|
+
`
|
|
1413
|
+
).join("")}</div>`;
|
|
1414
|
+
}
|
|
1415
|
+
function renderMetaItems(items) {
|
|
1416
|
+
if (items.length === 0) {
|
|
1417
|
+
return "";
|
|
1418
|
+
}
|
|
1419
|
+
return `<div class="meta-grid">${items.map(
|
|
1420
|
+
(item) => `
|
|
1421
|
+
<div class="meta-item">
|
|
1422
|
+
<span class="meta-label">${escapeHtml(item.label)}</span>
|
|
1423
|
+
<span class="meta-value">${escapeHtml(item.value)}</span>
|
|
1424
|
+
</div>
|
|
1425
|
+
`
|
|
1426
|
+
).join("")}</div>`;
|
|
1427
|
+
}
|
|
1428
|
+
function renderHeaderCard(commandName, heading, target, stats, metaItems) {
|
|
1429
|
+
return `
|
|
1430
|
+
<section class="card header-card">
|
|
1431
|
+
<div class="eyebrow">skilltest ${escapeHtml(commandName)}</div>
|
|
1432
|
+
<h1>${escapeHtml(heading)}</h1>
|
|
1433
|
+
<div class="target-line">target: ${escapeHtml(target)}</div>
|
|
1434
|
+
${renderMetaItems(metaItems)}
|
|
1435
|
+
${renderStatCards(stats)}
|
|
1436
|
+
</section>
|
|
1437
|
+
`;
|
|
1438
|
+
}
|
|
1439
|
+
function renderSectionCard(title, body) {
|
|
1440
|
+
return `
|
|
1441
|
+
<section class="card">
|
|
1442
|
+
<h2>${escapeHtml(title)}</h2>
|
|
1443
|
+
${body}
|
|
1444
|
+
</section>
|
|
1445
|
+
`;
|
|
1446
|
+
}
|
|
1447
|
+
function renderMessageRow(status, title, message, details) {
|
|
1448
|
+
return `
|
|
1449
|
+
<div class="row">
|
|
1450
|
+
<div class="row-header">
|
|
1451
|
+
<div class="row-title">${escapeHtml(title)}</div>
|
|
1452
|
+
${renderBadge(status)}
|
|
1453
|
+
</div>
|
|
1454
|
+
<div class="row-body">${escapeHtml(message)}</div>
|
|
1455
|
+
${details ?? ""}
|
|
1456
|
+
</div>
|
|
1457
|
+
`;
|
|
1458
|
+
}
|
|
1459
|
+
function renderDetails(summary, content) {
|
|
1460
|
+
return `
|
|
1461
|
+
<details class="detail-block">
|
|
1462
|
+
<summary>${escapeHtml(summary)}</summary>
|
|
1463
|
+
<div class="detail-content">${content}</div>
|
|
1464
|
+
</details>
|
|
1465
|
+
`;
|
|
1466
|
+
}
|
|
1467
|
+
function renderPreBlock(content) {
|
|
1468
|
+
return `<pre>${escapeHtml(content)}</pre>`;
|
|
1469
|
+
}
|
|
1470
|
+
function renderDefinitionList(items) {
|
|
1471
|
+
return `<div class="definition-list">${items.map(
|
|
1472
|
+
(item) => `
|
|
1473
|
+
<div class="definition-item">
|
|
1474
|
+
<div class="definition-label">${escapeHtml(item.label)}</div>
|
|
1475
|
+
<div class="definition-value">${escapeHtml(item.value)}</div>
|
|
1476
|
+
</div>
|
|
1477
|
+
`
|
|
1478
|
+
).join("")}</div>`;
|
|
1479
|
+
}
|
|
1480
|
+
function countSkippedSecurityPatterns(issues) {
|
|
1481
|
+
return issues.reduce((total, issue) => total + (issue.skippedPatterns?.length ?? 0), 0);
|
|
1482
|
+
}
|
|
1483
|
+
function renderLintIssueRow(issue) {
|
|
1484
|
+
const lineRange = formatLineRange(issue.startLine, issue.endLine);
|
|
1485
|
+
const detailBlocks = [];
|
|
1486
|
+
if (issue.suggestion) {
|
|
1487
|
+
detailBlocks.push(renderDetails("Suggestion", `<p>${escapeHtml(issue.suggestion)}</p>`));
|
|
1488
|
+
}
|
|
1489
|
+
if (issue.skippedPatterns && issue.skippedPatterns.length > 0) {
|
|
1490
|
+
const patternItems = issue.skippedPatterns.map(
|
|
1491
|
+
(pattern) => `
|
|
1492
|
+
<div class="definition-item">
|
|
1493
|
+
<div class="definition-label">${escapeHtml(pattern.label)}</div>
|
|
1494
|
+
<div class="definition-value">${escapeHtml(
|
|
1495
|
+
`${pattern.zoneType} lines ${pattern.startLine}-${pattern.endLine}`
|
|
1496
|
+
)}</div>
|
|
1497
|
+
</div>
|
|
1498
|
+
`
|
|
1499
|
+
).join("");
|
|
1500
|
+
detailBlocks.push(renderDetails("Skipped security patterns", `<div class="definition-list">${patternItems}</div>`));
|
|
1501
|
+
}
|
|
1502
|
+
return `
|
|
1503
|
+
<div class="row">
|
|
1504
|
+
<div class="row-header">
|
|
1505
|
+
<div>
|
|
1506
|
+
<div class="row-title">${escapeHtml(issue.title)}</div>
|
|
1507
|
+
<div class="row-subtitle">${escapeHtml(issue.checkId)}</div>
|
|
1508
|
+
</div>
|
|
1509
|
+
${renderBadge(issue.status)}
|
|
1510
|
+
</div>
|
|
1511
|
+
<div class="row-body">${escapeHtml(issue.message)}</div>
|
|
1512
|
+
${renderDefinitionList(
|
|
1513
|
+
[
|
|
1514
|
+
lineRange ? { label: "Location", value: lineRange } : null,
|
|
1515
|
+
{ label: "Check ID", value: issue.checkId }
|
|
1516
|
+
].filter((item) => item !== null)
|
|
1517
|
+
)}
|
|
1518
|
+
${detailBlocks.join("")}
|
|
1519
|
+
</div>
|
|
1520
|
+
`;
|
|
1521
|
+
}
|
|
1522
|
+
function renderLintIssueList(report) {
|
|
1523
|
+
const skippedSecurityPatterns = countSkippedSecurityPatterns(report.issues);
|
|
1524
|
+
const rows = report.issues.map((issue) => renderLintIssueRow(issue)).join("");
|
|
1525
|
+
const info = skippedSecurityPatterns > 0 ? `<p class="info-line">Skipped security patterns in examples/comments: ${escapeHtml(skippedSecurityPatterns)}</p>` : "";
|
|
1526
|
+
return `<div class="row-list">${rows}</div>${info}`;
|
|
1527
|
+
}
|
|
1528
|
+
function renderTriggerCaseRow(testCase) {
|
|
1529
|
+
const details = testCase.rawModelResponse ? renderDetails("Model response", renderPreBlock(testCase.rawModelResponse)) : "";
|
|
1530
|
+
return `
|
|
1531
|
+
<div class="row">
|
|
1532
|
+
<div class="row-header">
|
|
1533
|
+
<div>
|
|
1534
|
+
<div class="row-title">${escapeHtml(testCase.query)}</div>
|
|
1535
|
+
<div class="row-subtitle">${escapeHtml(
|
|
1536
|
+
`expected=${testCase.expected} actual=${testCase.actual} should_trigger=${String(testCase.shouldTrigger)}`
|
|
1537
|
+
)}</div>
|
|
1538
|
+
</div>
|
|
1539
|
+
${renderBadge(testCase.matched ? "pass" : "fail")}
|
|
1540
|
+
</div>
|
|
1541
|
+
${renderDefinitionList([
|
|
1542
|
+
{ label: "Expected", value: testCase.expected },
|
|
1543
|
+
{ label: "Actual", value: testCase.actual }
|
|
1544
|
+
])}
|
|
1545
|
+
${details}
|
|
1546
|
+
</div>
|
|
1547
|
+
`;
|
|
1548
|
+
}
|
|
1549
|
+
function promptStatus(promptResult) {
|
|
1550
|
+
if (promptResult.totalAssertions === 0) {
|
|
1551
|
+
return "skip";
|
|
1552
|
+
}
|
|
1553
|
+
if (promptResult.passedAssertions === promptResult.totalAssertions) {
|
|
1554
|
+
return "pass";
|
|
1555
|
+
}
|
|
1556
|
+
if (promptResult.passedAssertions === 0) {
|
|
1557
|
+
return "fail";
|
|
1558
|
+
}
|
|
1559
|
+
return "warn";
|
|
1560
|
+
}
|
|
1561
|
+
function renderAssertionRow(assertion) {
|
|
1562
|
+
return renderDetails(
|
|
1563
|
+
`${badgeLabel(assertion.passed ? "pass" : "fail")} ${assertion.assertion}`,
|
|
1564
|
+
renderPreBlock(assertion.evidence)
|
|
1565
|
+
);
|
|
1566
|
+
}
|
|
1567
|
+
function renderEvalPromptRow(promptResult) {
|
|
1568
|
+
const assertionDetails = promptResult.assertions.map((assertion) => renderAssertionRow(assertion)).join("");
|
|
1569
|
+
const responseDetails = renderDetails("Full model response", renderPreBlock(promptResult.response));
|
|
1570
|
+
return `
|
|
1571
|
+
<div class="row">
|
|
1572
|
+
<div class="row-header">
|
|
1573
|
+
<div>
|
|
1574
|
+
<div class="row-title">${escapeHtml(promptResult.prompt)}</div>
|
|
1575
|
+
<div class="row-subtitle">${escapeHtml(
|
|
1576
|
+
`${promptResult.passedAssertions}/${promptResult.totalAssertions} assertions passed`
|
|
1577
|
+
)}</div>
|
|
1578
|
+
</div>
|
|
1579
|
+
${renderBadge(promptStatus(promptResult))}
|
|
1580
|
+
</div>
|
|
1581
|
+
<div class="row-body">${escapeHtml(promptResult.responseSummary)}</div>
|
|
1582
|
+
${renderDefinitionList([
|
|
1583
|
+
{ label: "Passed assertions", value: String(promptResult.passedAssertions) },
|
|
1584
|
+
{ label: "Total assertions", value: String(promptResult.totalAssertions) }
|
|
1585
|
+
])}
|
|
1586
|
+
${renderDetails("Assertion evidence", assertionDetails || `<p>No assertions.</p>`)}
|
|
1587
|
+
${responseDetails}
|
|
1588
|
+
</div>
|
|
1589
|
+
`;
|
|
1590
|
+
}
|
|
1591
|
+
function gateStatus(value) {
|
|
1592
|
+
if (value === null) {
|
|
1593
|
+
return "skip";
|
|
1594
|
+
}
|
|
1595
|
+
return value ? "pass" : "fail";
|
|
1596
|
+
}
|
|
1597
|
+
function renderGateCard(title, status, message) {
|
|
1598
|
+
return `
|
|
1599
|
+
<div class="gate-card">
|
|
1600
|
+
<div class="row-header">
|
|
1601
|
+
<div class="row-title">${escapeHtml(title)}</div>
|
|
1602
|
+
${renderBadge(status)}
|
|
1603
|
+
</div>
|
|
1604
|
+
<div class="row-body">${escapeHtml(message)}</div>
|
|
1605
|
+
</div>
|
|
1606
|
+
`;
|
|
1607
|
+
}
|
|
1608
|
+
function renderCollapsibleSection(title, summary, body, status) {
|
|
1609
|
+
return `
|
|
1610
|
+
<details class="section-card" open>
|
|
1611
|
+
<summary>
|
|
1612
|
+
<span class="section-title">${escapeHtml(title)}</span>
|
|
1613
|
+
<span class="section-summary">${renderBadge(status)} ${escapeHtml(summary)}</span>
|
|
1614
|
+
</summary>
|
|
1615
|
+
<div class="section-body">${body}</div>
|
|
1616
|
+
</details>
|
|
1617
|
+
`;
|
|
1618
|
+
}
|
|
1619
|
+
function resolveOptionalTarget(result, fallback) {
|
|
1620
|
+
return result.target ?? fallback;
|
|
1621
|
+
}
|
|
1622
|
+
function renderHtmlDocument(title, body) {
|
|
1623
|
+
return `<!DOCTYPE html>
|
|
1624
|
+
<html lang="en">
|
|
1625
|
+
<head>
|
|
1626
|
+
<meta charset="utf-8">
|
|
1627
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
1628
|
+
<title>${escapeHtml(title)}</title>
|
|
1629
|
+
<style>
|
|
1630
|
+
:root {
|
|
1631
|
+
color-scheme: light;
|
|
1632
|
+
--bg: #f5f5f5;
|
|
1633
|
+
--surface: #ffffff;
|
|
1634
|
+
--surface-muted: #fafafa;
|
|
1635
|
+
--border: #d4d4d8;
|
|
1636
|
+
--text: #111827;
|
|
1637
|
+
--muted: #6b7280;
|
|
1638
|
+
--pass: #22c55e;
|
|
1639
|
+
--warn: #eab308;
|
|
1640
|
+
--fail: #ef4444;
|
|
1641
|
+
--skip: #6b7280;
|
|
1642
|
+
--shadow: 0 10px 30px rgba(15, 23, 42, 0.08);
|
|
1643
|
+
}
|
|
1644
|
+
|
|
1645
|
+
* {
|
|
1646
|
+
box-sizing: border-box;
|
|
1647
|
+
}
|
|
1648
|
+
|
|
1649
|
+
body {
|
|
1650
|
+
margin: 0;
|
|
1651
|
+
background: linear-gradient(180deg, #fafafa 0%, #f4f4f5 100%);
|
|
1652
|
+
color: var(--text);
|
|
1653
|
+
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
|
|
1654
|
+
line-height: 1.5;
|
|
1655
|
+
}
|
|
1656
|
+
|
|
1657
|
+
.container {
|
|
1658
|
+
max-width: 1120px;
|
|
1659
|
+
margin: 0 auto;
|
|
1660
|
+
padding: 24px 16px 40px;
|
|
1661
|
+
}
|
|
1662
|
+
|
|
1663
|
+
.card,
|
|
1664
|
+
.section-card {
|
|
1665
|
+
background: var(--surface);
|
|
1666
|
+
border: 1px solid var(--border);
|
|
1667
|
+
border-radius: 16px;
|
|
1668
|
+
box-shadow: var(--shadow);
|
|
1669
|
+
margin-bottom: 16px;
|
|
1670
|
+
}
|
|
1671
|
+
|
|
1672
|
+
.card {
|
|
1673
|
+
padding: 20px;
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
.header-card h1,
|
|
1677
|
+
.card h2 {
|
|
1678
|
+
margin: 0 0 10px;
|
|
1679
|
+
font-size: 1.25rem;
|
|
1680
|
+
}
|
|
1681
|
+
|
|
1682
|
+
.eyebrow {
|
|
1683
|
+
margin-bottom: 10px;
|
|
1684
|
+
color: var(--muted);
|
|
1685
|
+
font-size: 0.78rem;
|
|
1686
|
+
letter-spacing: 0.08em;
|
|
1687
|
+
text-transform: uppercase;
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1690
|
+
.target-line,
|
|
1691
|
+
.info-line {
|
|
1692
|
+
color: var(--muted);
|
|
1693
|
+
overflow-wrap: anywhere;
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
.meta-grid,
|
|
1697
|
+
.stats-grid,
|
|
1698
|
+
.gate-grid,
|
|
1699
|
+
.definition-list {
|
|
1700
|
+
display: grid;
|
|
1701
|
+
gap: 12px;
|
|
1702
|
+
}
|
|
1703
|
+
|
|
1704
|
+
.meta-grid,
|
|
1705
|
+
.gate-grid,
|
|
1706
|
+
.definition-list {
|
|
1707
|
+
grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
|
|
1708
|
+
}
|
|
1709
|
+
|
|
1710
|
+
.stats-grid {
|
|
1711
|
+
grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
|
|
1712
|
+
margin-top: 16px;
|
|
1713
|
+
}
|
|
1714
|
+
|
|
1715
|
+
.meta-grid {
|
|
1716
|
+
margin-top: 14px;
|
|
1717
|
+
}
|
|
1718
|
+
|
|
1719
|
+
.meta-item,
|
|
1720
|
+
.definition-item,
|
|
1721
|
+
.stat-card,
|
|
1722
|
+
.gate-card {
|
|
1723
|
+
background: var(--surface-muted);
|
|
1724
|
+
border: 1px solid var(--border);
|
|
1725
|
+
border-radius: 12px;
|
|
1726
|
+
padding: 12px;
|
|
1727
|
+
}
|
|
1728
|
+
|
|
1729
|
+
.meta-item,
|
|
1730
|
+
.definition-item {
|
|
1731
|
+
display: flex;
|
|
1732
|
+
justify-content: space-between;
|
|
1733
|
+
gap: 12px;
|
|
1734
|
+
}
|
|
1735
|
+
|
|
1736
|
+
.meta-label,
|
|
1737
|
+
.definition-label,
|
|
1738
|
+
.stat-label {
|
|
1739
|
+
color: var(--muted);
|
|
1740
|
+
font-size: 0.82rem;
|
|
1741
|
+
}
|
|
1742
|
+
|
|
1743
|
+
.meta-value,
|
|
1744
|
+
.definition-value {
|
|
1745
|
+
text-align: right;
|
|
1746
|
+
overflow-wrap: anywhere;
|
|
1747
|
+
}
|
|
1748
|
+
|
|
1749
|
+
.stat-value {
|
|
1750
|
+
margin-top: 4px;
|
|
1751
|
+
font-size: 1.3rem;
|
|
1752
|
+
font-weight: 700;
|
|
1753
|
+
}
|
|
1754
|
+
|
|
1755
|
+
.stat-note {
|
|
1756
|
+
margin-top: 6px;
|
|
1757
|
+
color: var(--muted);
|
|
1758
|
+
font-size: 0.82rem;
|
|
1759
|
+
}
|
|
1760
|
+
|
|
1761
|
+
.status-pass {
|
|
1762
|
+
border-color: rgba(34, 197, 94, 0.35);
|
|
1763
|
+
}
|
|
1764
|
+
|
|
1765
|
+
.status-warn {
|
|
1766
|
+
border-color: rgba(234, 179, 8, 0.35);
|
|
1767
|
+
}
|
|
1768
|
+
|
|
1769
|
+
.status-fail {
|
|
1770
|
+
border-color: rgba(239, 68, 68, 0.35);
|
|
1771
|
+
}
|
|
1772
|
+
|
|
1773
|
+
.status-skip {
|
|
1774
|
+
border-color: rgba(107, 114, 128, 0.35);
|
|
1775
|
+
}
|
|
1776
|
+
|
|
1777
|
+
.row-list {
|
|
1778
|
+
display: grid;
|
|
1779
|
+
gap: 12px;
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
.row {
|
|
1783
|
+
border: 1px solid var(--border);
|
|
1784
|
+
border-radius: 12px;
|
|
1785
|
+
padding: 14px;
|
|
1786
|
+
background: var(--surface-muted);
|
|
1787
|
+
}
|
|
1788
|
+
|
|
1789
|
+
.row-header {
|
|
1790
|
+
display: flex;
|
|
1791
|
+
justify-content: space-between;
|
|
1792
|
+
align-items: flex-start;
|
|
1793
|
+
gap: 12px;
|
|
1794
|
+
}
|
|
1795
|
+
|
|
1796
|
+
.row-title {
|
|
1797
|
+
font-weight: 700;
|
|
1798
|
+
overflow-wrap: anywhere;
|
|
1799
|
+
}
|
|
1800
|
+
|
|
1801
|
+
.row-subtitle {
|
|
1802
|
+
margin-top: 4px;
|
|
1803
|
+
color: var(--muted);
|
|
1804
|
+
font-size: 0.84rem;
|
|
1805
|
+
overflow-wrap: anywhere;
|
|
1806
|
+
}
|
|
1807
|
+
|
|
1808
|
+
.row-body {
|
|
1809
|
+
margin-top: 10px;
|
|
1810
|
+
overflow-wrap: anywhere;
|
|
1811
|
+
}
|
|
1812
|
+
|
|
1813
|
+
.badge {
|
|
1814
|
+
display: inline-flex;
|
|
1815
|
+
align-items: center;
|
|
1816
|
+
justify-content: center;
|
|
1817
|
+
min-width: 58px;
|
|
1818
|
+
padding: 3px 10px;
|
|
1819
|
+
border-radius: 999px;
|
|
1820
|
+
border: 1px solid currentColor;
|
|
1821
|
+
font-size: 0.76rem;
|
|
1822
|
+
font-weight: 700;
|
|
1823
|
+
letter-spacing: 0.04em;
|
|
1824
|
+
white-space: nowrap;
|
|
1825
|
+
}
|
|
1826
|
+
|
|
1827
|
+
.badge.pass {
|
|
1828
|
+
color: #15803d;
|
|
1829
|
+
background: rgba(34, 197, 94, 0.14);
|
|
1830
|
+
}
|
|
1831
|
+
|
|
1832
|
+
.badge.warn {
|
|
1833
|
+
color: #a16207;
|
|
1834
|
+
background: rgba(234, 179, 8, 0.18);
|
|
1835
|
+
}
|
|
1836
|
+
|
|
1837
|
+
.badge.fail {
|
|
1838
|
+
color: #b91c1c;
|
|
1839
|
+
background: rgba(239, 68, 68, 0.14);
|
|
1840
|
+
}
|
|
1841
|
+
|
|
1842
|
+
.badge.skip {
|
|
1843
|
+
color: #4b5563;
|
|
1844
|
+
background: rgba(107, 114, 128, 0.14);
|
|
1845
|
+
}
|
|
1846
|
+
|
|
1847
|
+
details {
|
|
1848
|
+
margin-top: 10px;
|
|
1849
|
+
}
|
|
1850
|
+
|
|
1851
|
+
details summary {
|
|
1852
|
+
cursor: pointer;
|
|
1853
|
+
color: var(--muted);
|
|
1854
|
+
}
|
|
1855
|
+
|
|
1856
|
+
.detail-block {
|
|
1857
|
+
border-top: 1px dashed var(--border);
|
|
1858
|
+
padding-top: 10px;
|
|
1859
|
+
}
|
|
1860
|
+
|
|
1861
|
+
.detail-content p {
|
|
1862
|
+
margin: 0;
|
|
1863
|
+
}
|
|
1864
|
+
|
|
1865
|
+
.section-card summary {
|
|
1866
|
+
display: flex;
|
|
1867
|
+
justify-content: space-between;
|
|
1868
|
+
align-items: center;
|
|
1869
|
+
gap: 12px;
|
|
1870
|
+
padding: 18px 20px;
|
|
1871
|
+
list-style: none;
|
|
1872
|
+
}
|
|
1873
|
+
|
|
1874
|
+
.section-card summary::-webkit-details-marker {
|
|
1875
|
+
display: none;
|
|
1876
|
+
}
|
|
1877
|
+
|
|
1878
|
+
.section-title {
|
|
1879
|
+
font-size: 1rem;
|
|
1880
|
+
font-weight: 700;
|
|
1881
|
+
color: var(--text);
|
|
1882
|
+
}
|
|
1883
|
+
|
|
1884
|
+
.section-summary {
|
|
1885
|
+
display: inline-flex;
|
|
1886
|
+
align-items: center;
|
|
1887
|
+
gap: 8px;
|
|
1888
|
+
color: var(--muted);
|
|
1889
|
+
text-align: right;
|
|
1890
|
+
}
|
|
1891
|
+
|
|
1892
|
+
.section-body {
|
|
1893
|
+
padding: 0 20px 20px;
|
|
1894
|
+
}
|
|
1895
|
+
|
|
1896
|
+
.gate-grid {
|
|
1897
|
+
margin-top: 12px;
|
|
1898
|
+
}
|
|
1899
|
+
|
|
1900
|
+
pre {
|
|
1901
|
+
margin: 0;
|
|
1902
|
+
padding: 12px;
|
|
1903
|
+
background: #f8fafc;
|
|
1904
|
+
border: 1px solid var(--border);
|
|
1905
|
+
border-radius: 10px;
|
|
1906
|
+
white-space: pre-wrap;
|
|
1907
|
+
word-break: break-word;
|
|
1908
|
+
overflow-wrap: anywhere;
|
|
1909
|
+
}
|
|
1910
|
+
|
|
1911
|
+
ul {
|
|
1912
|
+
margin: 0;
|
|
1913
|
+
padding-left: 20px;
|
|
1914
|
+
}
|
|
1915
|
+
|
|
1916
|
+
@media (max-width: 720px) {
|
|
1917
|
+
.container {
|
|
1918
|
+
padding: 16px 12px 28px;
|
|
1919
|
+
}
|
|
1920
|
+
|
|
1921
|
+
.row-header,
|
|
1922
|
+
.section-card summary,
|
|
1923
|
+
.meta-item,
|
|
1924
|
+
.definition-item {
|
|
1925
|
+
flex-direction: column;
|
|
1926
|
+
align-items: flex-start;
|
|
1927
|
+
}
|
|
1928
|
+
|
|
1929
|
+
.meta-value,
|
|
1930
|
+
.definition-value,
|
|
1931
|
+
.section-summary {
|
|
1932
|
+
text-align: left;
|
|
1933
|
+
}
|
|
1934
|
+
}
|
|
1935
|
+
</style>
|
|
1936
|
+
</head>
|
|
1937
|
+
<body>
|
|
1938
|
+
<main class="container">
|
|
1939
|
+
${body}
|
|
1940
|
+
</main>
|
|
1941
|
+
</body>
|
|
1942
|
+
</html>`;
|
|
1943
|
+
}
|
|
1944
|
+
function renderLintHtml(report) {
|
|
1945
|
+
const passRate = report.summary.total === 0 ? 0 : report.summary.passed / report.summary.total;
|
|
1946
|
+
const body = [
|
|
1947
|
+
renderHeaderCard(
|
|
1948
|
+
"lint",
|
|
1949
|
+
"Static Analysis Report",
|
|
1950
|
+
report.target,
|
|
1951
|
+
[
|
|
1952
|
+
{ label: "Pass rate", value: formatPercent(passRate), note: `${report.summary.passed}/${report.summary.total} passed` },
|
|
1953
|
+
{ label: "Warnings", value: String(report.summary.warnings), status: report.summary.warnings > 0 ? "warn" : "pass" },
|
|
1954
|
+
{ label: "Failures", value: String(report.summary.failures), status: report.summary.failures > 0 ? "fail" : "pass" },
|
|
1955
|
+
{ label: "Checks", value: String(report.summary.total) }
|
|
1956
|
+
],
|
|
1957
|
+
[{ label: "Target", value: report.target }]
|
|
1958
|
+
),
|
|
1959
|
+
renderSectionCard("Lint Issues", renderLintIssueList(report))
|
|
1960
|
+
].join("");
|
|
1961
|
+
return renderHtmlDocument(`skilltest lint - ${report.target}`, body);
|
|
1962
|
+
}
|
|
1963
|
+
function renderTriggerHtml(result) {
|
|
1964
|
+
const htmlResult = result;
|
|
1965
|
+
const target = resolveOptionalTarget(htmlResult, result.skillName);
|
|
1966
|
+
const matchedCount = result.cases.filter((testCase) => testCase.matched).length;
|
|
1967
|
+
const matchRate = result.cases.length === 0 ? 0 : matchedCount / result.cases.length;
|
|
1968
|
+
const body = [
|
|
1969
|
+
renderHeaderCard(
|
|
1970
|
+
"trigger",
|
|
1971
|
+
result.skillName,
|
|
1972
|
+
target,
|
|
1973
|
+
[
|
|
1974
|
+
{ label: "Match rate", value: formatPercent(matchRate), note: `${matchedCount}/${result.cases.length} matched` },
|
|
1975
|
+
{ label: "Precision", value: formatPercent(result.metrics.precision) },
|
|
1976
|
+
{ label: "Recall", value: formatPercent(result.metrics.recall) },
|
|
1977
|
+
{ label: "F1", value: formatPercent(result.metrics.f1), status: result.metrics.f1 >= 0.8 ? "pass" : "warn" }
|
|
1978
|
+
],
|
|
1979
|
+
[
|
|
1980
|
+
{ label: "Provider", value: result.provider },
|
|
1981
|
+
{ label: "Model", value: result.model },
|
|
1982
|
+
{ label: "Seed", value: result.seed !== void 0 ? String(result.seed) : "none" },
|
|
1983
|
+
{ label: "Queries", value: String(result.queries.length) }
|
|
1984
|
+
]
|
|
1985
|
+
),
|
|
1986
|
+
renderSectionCard("Trigger Cases", `<div class="row-list">${result.cases.map((testCase) => renderTriggerCaseRow(testCase)).join("")}</div>`),
|
|
1987
|
+
renderSectionCard(
|
|
1988
|
+
"Suggestions",
|
|
1989
|
+
`<ul>${result.suggestions.map((suggestion) => `<li>${escapeHtml(suggestion)}</li>`).join("")}</ul>`
|
|
1990
|
+
)
|
|
1991
|
+
].join("");
|
|
1992
|
+
return renderHtmlDocument(`skilltest trigger - ${result.skillName}`, body);
|
|
1993
|
+
}
|
|
1994
|
+
function renderEvalHtml(result) {
|
|
1995
|
+
const htmlResult = result;
|
|
1996
|
+
const target = resolveOptionalTarget(htmlResult, result.skillName);
|
|
1997
|
+
const passRate = result.summary.totalAssertions === 0 ? 0 : result.summary.passedAssertions / result.summary.totalAssertions;
|
|
1998
|
+
const body = [
|
|
1999
|
+
renderHeaderCard(
|
|
2000
|
+
"eval",
|
|
2001
|
+
result.skillName,
|
|
2002
|
+
target,
|
|
2003
|
+
[
|
|
2004
|
+
{
|
|
2005
|
+
label: "Assertion pass rate",
|
|
2006
|
+
value: formatPercent(passRate),
|
|
2007
|
+
note: `${result.summary.passedAssertions}/${result.summary.totalAssertions} passed`
|
|
2008
|
+
},
|
|
2009
|
+
{ label: "Prompts", value: String(result.summary.totalPrompts) },
|
|
2010
|
+
{ label: "Model", value: result.model },
|
|
2011
|
+
{ label: "Grader", value: result.graderModel }
|
|
2012
|
+
],
|
|
2013
|
+
[
|
|
2014
|
+
{ label: "Provider", value: result.provider },
|
|
2015
|
+
{ label: "Execution model", value: result.model },
|
|
2016
|
+
{ label: "Grader model", value: result.graderModel },
|
|
2017
|
+
{ label: "Prompts", value: String(result.prompts.length) }
|
|
2018
|
+
]
|
|
2019
|
+
),
|
|
2020
|
+
renderSectionCard("Eval Prompts", `<div class="row-list">${result.results.map((promptResult) => renderEvalPromptRow(promptResult)).join("")}</div>`)
|
|
2021
|
+
].join("");
|
|
2022
|
+
return renderHtmlDocument(`skilltest eval - ${result.skillName}`, body);
|
|
2023
|
+
}
|
|
2024
|
+
function renderCheckHtml(result) {
|
|
2025
|
+
const skillName = result.trigger?.skillName ?? result.eval?.skillName ?? result.target;
|
|
2026
|
+
const triggerBody = result.trigger ? `<div class="row-list">${result.trigger.cases.map((testCase) => renderTriggerCaseRow(testCase)).join("")}</div>
|
|
2027
|
+
<div class="card" style="margin-top: 16px;">
|
|
2028
|
+
<h2>Trigger Suggestions</h2>
|
|
2029
|
+
<ul>${result.trigger.suggestions.map((suggestion) => `<li>${escapeHtml(suggestion)}</li>`).join("")}</ul>
|
|
2030
|
+
</div>` : renderMessageRow("skip", "Trigger skipped", result.triggerSkippedReason ?? "Skipped.");
|
|
2031
|
+
const evalBody = result.eval ? `<div class="row-list">${result.eval.results.map((promptResult) => renderEvalPromptRow(promptResult)).join("")}</div>` : renderMessageRow("skip", "Eval skipped", result.evalSkippedReason ?? "Skipped.");
|
|
2032
|
+
const lintStatus = result.gates.lintPassed ? "pass" : "fail";
|
|
2033
|
+
const triggerStatus = gateStatus(result.gates.triggerPassed);
|
|
2034
|
+
const evalStatus = gateStatus(result.gates.evalPassed);
|
|
2035
|
+
const overallStatus = result.gates.overallPassed ? "pass" : "fail";
|
|
2036
|
+
const header = renderHeaderCard(
|
|
2037
|
+
"check",
|
|
2038
|
+
skillName,
|
|
2039
|
+
result.target,
|
|
2040
|
+
[
|
|
2041
|
+
{ label: "Overall gate", value: badgeLabel(overallStatus), status: overallStatus },
|
|
2042
|
+
{
|
|
2043
|
+
label: "Trigger F1",
|
|
2044
|
+
value: result.gates.triggerF1 !== null ? formatPercent(result.gates.triggerF1) : "skipped",
|
|
2045
|
+
status: triggerStatus
|
|
2046
|
+
},
|
|
2047
|
+
{
|
|
2048
|
+
label: "Eval pass rate",
|
|
2049
|
+
value: result.gates.evalAssertPassRate !== null ? formatPercent(result.gates.evalAssertPassRate) : "skipped",
|
|
2050
|
+
status: evalStatus
|
|
2051
|
+
},
|
|
2052
|
+
{
|
|
2053
|
+
label: "Lint result",
|
|
2054
|
+
value: `${result.lint.summary.failures} fail / ${result.lint.summary.warnings} warn`,
|
|
2055
|
+
status: lintStatus
|
|
2056
|
+
}
|
|
2057
|
+
],
|
|
2058
|
+
[
|
|
2059
|
+
{ label: "Provider", value: result.provider },
|
|
2060
|
+
{ label: "Model", value: result.model },
|
|
2061
|
+
{ label: "Grader model", value: result.graderModel },
|
|
2062
|
+
{
|
|
2063
|
+
label: "Thresholds",
|
|
2064
|
+
value: `min-f1=${result.thresholds.minF1.toFixed(2)} min-assert-pass-rate=${result.thresholds.minAssertPassRate.toFixed(2)}`
|
|
2065
|
+
}
|
|
2066
|
+
]
|
|
2067
|
+
);
|
|
2068
|
+
const lintSection = renderCollapsibleSection(
|
|
2069
|
+
"Lint",
|
|
2070
|
+
`${result.lint.summary.passed}/${result.lint.summary.total} passed, ${result.lint.summary.warnings} warnings, ${result.lint.summary.failures} failures`,
|
|
2071
|
+
renderLintIssueList(result.lint),
|
|
2072
|
+
lintStatus
|
|
2073
|
+
);
|
|
2074
|
+
const triggerSection = renderCollapsibleSection(
|
|
2075
|
+
"Trigger",
|
|
2076
|
+
result.trigger ? `f1=${formatPercent(result.trigger.metrics.f1)} precision=${formatPercent(result.trigger.metrics.precision)} recall=${formatPercent(result.trigger.metrics.recall)}` : result.triggerSkippedReason ?? "Skipped.",
|
|
2077
|
+
triggerBody,
|
|
2078
|
+
triggerStatus
|
|
2079
|
+
);
|
|
2080
|
+
const evalSection = renderCollapsibleSection(
|
|
2081
|
+
"Eval",
|
|
2082
|
+
result.eval ? `assertion pass rate=${formatPercent(result.gates.evalAssertPassRate ?? 0)} (${result.eval.summary.passedAssertions}/${result.eval.summary.totalAssertions})` : result.evalSkippedReason ?? "Skipped.",
|
|
2083
|
+
evalBody,
|
|
2084
|
+
evalStatus
|
|
2085
|
+
);
|
|
2086
|
+
const qualityGate = renderSectionCard(
|
|
2087
|
+
"Quality Gate",
|
|
2088
|
+
`<div class="gate-grid">
|
|
2089
|
+
${renderGateCard("Lint gate", lintStatus, result.gates.lintPassed ? "Lint passed." : "Lint failed.")}
|
|
2090
|
+
${renderGateCard(
|
|
2091
|
+
"Trigger gate",
|
|
2092
|
+
triggerStatus,
|
|
2093
|
+
result.gates.triggerPassed === null ? result.triggerSkippedReason ?? "Skipped." : `required ${result.thresholds.minF1.toFixed(2)}, actual ${result.gates.triggerF1?.toFixed(2) ?? "n/a"}`
|
|
2094
|
+
)}
|
|
2095
|
+
${renderGateCard(
|
|
2096
|
+
"Eval gate",
|
|
2097
|
+
evalStatus,
|
|
2098
|
+
result.gates.evalPassed === null ? result.evalSkippedReason ?? "Skipped." : `required ${result.thresholds.minAssertPassRate.toFixed(2)}, actual ${result.gates.evalAssertPassRate?.toFixed(2) ?? "n/a"}`
|
|
2099
|
+
)}
|
|
2100
|
+
${renderGateCard("Overall", overallStatus, result.gates.overallPassed ? "All quality gates passed." : "One or more gates failed.")}
|
|
2101
|
+
</div>`
|
|
2102
|
+
);
|
|
2103
|
+
return renderHtmlDocument(`skilltest check - ${skillName}`, [header, lintSection, triggerSection, evalSection, qualityGate].join(""));
|
|
2104
|
+
}
|
|
2105
|
+
|
|
1061
2106
|
// src/reporters/terminal.ts
|
|
1062
2107
|
import { Chalk } from "chalk";
|
|
1063
2108
|
function getChalkInstance(enableColor) {
|
|
@@ -1070,6 +2115,14 @@ function renderIssueLine(issue, c) {
|
|
|
1070
2115
|
return ` ${label} ${issue.title}
|
|
1071
2116
|
${issue.message}${detail}`;
|
|
1072
2117
|
}
|
|
2118
|
+
function countSkippedSecurityPatterns2(issues) {
|
|
2119
|
+
return issues.reduce((total, issue) => {
|
|
2120
|
+
if (!issue.checkId.startsWith("security:")) {
|
|
2121
|
+
return total;
|
|
2122
|
+
}
|
|
2123
|
+
return total + (issue.skippedPatterns?.length ?? 0);
|
|
2124
|
+
}, 0);
|
|
2125
|
+
}
|
|
1073
2126
|
function renderLintReport(report, enableColor) {
|
|
1074
2127
|
const c = getChalkInstance(enableColor);
|
|
1075
2128
|
const { passed, warnings, failures, total } = report.summary;
|
|
@@ -1082,10 +2135,13 @@ function renderLintReport(report, enableColor) {
|
|
|
1082
2135
|
`\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518`
|
|
1083
2136
|
];
|
|
1084
2137
|
const renderedIssues = report.issues.map((issue) => renderIssueLine(issue, c)).join("\n");
|
|
2138
|
+
const skippedSecurityPatterns = countSkippedSecurityPatterns2(report.issues);
|
|
2139
|
+
const infoLine = skippedSecurityPatterns > 0 ? `
|
|
2140
|
+
${c.cyan("\u2139")} ${skippedSecurityPatterns} security pattern(s) found in code examples/comments (not flagged)` : "";
|
|
1085
2141
|
return `${headerLines.join("\n")}
|
|
1086
|
-
${renderedIssues}`;
|
|
2142
|
+
${renderedIssues}${infoLine}`;
|
|
1087
2143
|
}
|
|
1088
|
-
function
|
|
2144
|
+
function formatPercent2(value) {
|
|
1089
2145
|
return `${(value * 100).toFixed(1)}%`;
|
|
1090
2146
|
}
|
|
1091
2147
|
function renderTriggerReport(result, enableColor, verbose) {
|
|
@@ -1097,7 +2153,7 @@ function renderTriggerReport(result, enableColor, verbose) {
|
|
|
1097
2153
|
lines.push(`\u2502 skill: ${result.skillName}`);
|
|
1098
2154
|
lines.push(`\u2502 provider/model: ${result.provider}/${result.model}`);
|
|
1099
2155
|
lines.push(
|
|
1100
|
-
`\u2502 precision: ${
|
|
2156
|
+
`\u2502 precision: ${formatPercent2(result.metrics.precision)} recall: ${formatPercent2(result.metrics.recall)} f1: ${formatPercent2(result.metrics.f1)}`
|
|
1101
2157
|
);
|
|
1102
2158
|
lines.push(
|
|
1103
2159
|
`\u2502 TP ${result.metrics.truePositives} TN ${result.metrics.trueNegatives} FP ${result.metrics.falsePositives} FN ${result.metrics.falseNegatives}`
|
|
@@ -1171,11 +2227,15 @@ function renderCheckReport(result, enableColor, verbose) {
|
|
|
1171
2227
|
for (const issue of lintIssues) {
|
|
1172
2228
|
lines.push(renderIssueLine(issue, c));
|
|
1173
2229
|
}
|
|
2230
|
+
const skippedSecurityPatterns = countSkippedSecurityPatterns2(result.lint.issues);
|
|
2231
|
+
if (skippedSecurityPatterns > 0) {
|
|
2232
|
+
lines.push(` ${c.cyan("\u2139")} ${skippedSecurityPatterns} security pattern(s) found in code examples/comments (not flagged)`);
|
|
2233
|
+
}
|
|
1174
2234
|
lines.push("");
|
|
1175
2235
|
lines.push("Trigger");
|
|
1176
2236
|
if (result.trigger) {
|
|
1177
2237
|
lines.push(
|
|
1178
|
-
`- ${triggerGate} f1=${
|
|
2238
|
+
`- ${triggerGate} f1=${formatPercent2(result.trigger.metrics.f1)} (precision=${formatPercent2(result.trigger.metrics.precision)} recall=${formatPercent2(result.trigger.metrics.recall)})`
|
|
1179
2239
|
);
|
|
1180
2240
|
lines.push(
|
|
1181
2241
|
` TP ${result.trigger.metrics.truePositives} TN ${result.trigger.metrics.trueNegatives} FP ${result.trigger.metrics.falsePositives} FN ${result.trigger.metrics.falseNegatives}`
|
|
@@ -1194,7 +2254,7 @@ function renderCheckReport(result, enableColor, verbose) {
|
|
|
1194
2254
|
if (result.eval) {
|
|
1195
2255
|
const passRate = result.gates.evalAssertPassRate ?? 0;
|
|
1196
2256
|
lines.push(
|
|
1197
|
-
`- ${evalGate} assertion pass rate=${
|
|
2257
|
+
`- ${evalGate} assertion pass rate=${formatPercent2(passRate)} (${result.eval.summary.passedAssertions}/${result.eval.summary.totalAssertions})`
|
|
1198
2258
|
);
|
|
1199
2259
|
for (const promptResult of result.eval.results) {
|
|
1200
2260
|
const failedAssertions = promptResult.assertions.filter((assertion) => !assertion.passed);
|
|
@@ -1286,6 +2346,58 @@ async function gradeResponse(options) {
|
|
|
1286
2346
|
return parsed.data.assertions;
|
|
1287
2347
|
}
|
|
1288
2348
|
|
|
2349
|
+
// src/utils/concurrency.ts
|
|
2350
|
+
async function pMap(items, fn, concurrency) {
|
|
2351
|
+
if (!Number.isInteger(concurrency) || concurrency < 1) {
|
|
2352
|
+
throw new Error("pMap concurrency must be an integer greater than or equal to 1.");
|
|
2353
|
+
}
|
|
2354
|
+
if (items.length === 0) {
|
|
2355
|
+
return [];
|
|
2356
|
+
}
|
|
2357
|
+
const results = new Array(items.length);
|
|
2358
|
+
return new Promise((resolve, reject) => {
|
|
2359
|
+
let nextIndex = 0;
|
|
2360
|
+
let completed = 0;
|
|
2361
|
+
let rejected = false;
|
|
2362
|
+
const launchNext = () => {
|
|
2363
|
+
if (rejected) {
|
|
2364
|
+
return;
|
|
2365
|
+
}
|
|
2366
|
+
if (completed === items.length) {
|
|
2367
|
+
resolve(results);
|
|
2368
|
+
return;
|
|
2369
|
+
}
|
|
2370
|
+
if (nextIndex >= items.length) {
|
|
2371
|
+
return;
|
|
2372
|
+
}
|
|
2373
|
+
const currentIndex = nextIndex;
|
|
2374
|
+
nextIndex += 1;
|
|
2375
|
+
Promise.resolve().then(() => fn(items[currentIndex], currentIndex)).then((result) => {
|
|
2376
|
+
if (rejected) {
|
|
2377
|
+
return;
|
|
2378
|
+
}
|
|
2379
|
+
results[currentIndex] = result;
|
|
2380
|
+
completed += 1;
|
|
2381
|
+
if (completed === items.length) {
|
|
2382
|
+
resolve(results);
|
|
2383
|
+
return;
|
|
2384
|
+
}
|
|
2385
|
+
launchNext();
|
|
2386
|
+
}).catch((error) => {
|
|
2387
|
+
if (rejected) {
|
|
2388
|
+
return;
|
|
2389
|
+
}
|
|
2390
|
+
rejected = true;
|
|
2391
|
+
reject(error);
|
|
2392
|
+
});
|
|
2393
|
+
};
|
|
2394
|
+
const initialWorkers = Math.min(concurrency, items.length);
|
|
2395
|
+
for (let workerIndex = 0; workerIndex < initialWorkers; workerIndex += 1) {
|
|
2396
|
+
launchNext();
|
|
2397
|
+
}
|
|
2398
|
+
});
|
|
2399
|
+
}
|
|
2400
|
+
|
|
1289
2401
|
// src/core/eval-runner.ts
|
|
1290
2402
|
var evalPromptSchema = z3.object({
|
|
1291
2403
|
prompt: z3.string().min(1),
|
|
@@ -1332,34 +2444,37 @@ async function generatePrompts(skill, provider, model, count) {
|
|
|
1332
2444
|
}
|
|
1333
2445
|
async function runEval(skill, options) {
|
|
1334
2446
|
const prompts = options.prompts && options.prompts.length > 0 ? evalPromptArraySchema.parse(options.prompts) : await generatePrompts(skill, options.provider, options.model, options.numRuns);
|
|
1335
|
-
const
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
2447
|
+
const systemPrompt = [
|
|
2448
|
+
"You are an AI assistant with an activated skill.",
|
|
2449
|
+
"Follow this SKILL.md content exactly where applicable.",
|
|
2450
|
+
"",
|
|
2451
|
+
skill.raw
|
|
2452
|
+
].join("\n");
|
|
2453
|
+
const results = await pMap(
|
|
2454
|
+
prompts,
|
|
2455
|
+
async (evalPrompt) => {
|
|
2456
|
+
const response = await options.provider.sendMessage(systemPrompt, evalPrompt.prompt, { model: options.model });
|
|
2457
|
+
const gradedAssertions = await gradeResponse({
|
|
2458
|
+
provider: options.provider,
|
|
2459
|
+
model: options.graderModel,
|
|
2460
|
+
skillName: skill.frontmatter.name,
|
|
2461
|
+
skillBody: skill.content,
|
|
2462
|
+
userPrompt: evalPrompt.prompt,
|
|
2463
|
+
modelResponse: response,
|
|
2464
|
+
assertions: evalPrompt.assertions
|
|
2465
|
+
});
|
|
2466
|
+
const passedAssertions2 = gradedAssertions.filter((assertion) => assertion.passed).length;
|
|
2467
|
+
return {
|
|
2468
|
+
prompt: evalPrompt.prompt,
|
|
2469
|
+
assertions: gradedAssertions,
|
|
2470
|
+
responseSummary: response.slice(0, 200),
|
|
2471
|
+
response,
|
|
2472
|
+
passedAssertions: passedAssertions2,
|
|
2473
|
+
totalAssertions: gradedAssertions.length
|
|
2474
|
+
};
|
|
2475
|
+
},
|
|
2476
|
+
options.concurrency ?? 5
|
|
2477
|
+
);
|
|
1363
2478
|
const totalAssertions = results.reduce((total, result) => total + result.totalAssertions, 0);
|
|
1364
2479
|
const passedAssertions = results.reduce((total, result) => total + result.passedAssertions, 0);
|
|
1365
2480
|
return {
|
|
@@ -1401,23 +2516,28 @@ var FAKE_SKILLS = [
|
|
|
1401
2516
|
{ name: "test-generator", description: "Generates unit and integration test cases from feature requirements." },
|
|
1402
2517
|
{ name: "prompt-tuner", description: "Improves prompts for reliability, formatting, and failure handling." }
|
|
1403
2518
|
];
|
|
1404
|
-
function
|
|
1405
|
-
let state = seed >>> 0;
|
|
2519
|
+
function mulberry32(seed) {
|
|
1406
2520
|
return () => {
|
|
1407
|
-
|
|
1408
|
-
|
|
2521
|
+
seed |= 0;
|
|
2522
|
+
seed = seed + 1831565813 | 0;
|
|
2523
|
+
let t = Math.imul(seed ^ seed >>> 15, 1 | seed);
|
|
2524
|
+
t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t;
|
|
2525
|
+
return ((t ^ t >>> 14) >>> 0) / 4294967296;
|
|
1409
2526
|
};
|
|
1410
2527
|
}
|
|
1411
|
-
function
|
|
2528
|
+
function createRng(seed) {
|
|
2529
|
+
return seed !== void 0 ? mulberry32(seed) : Math.random;
|
|
2530
|
+
}
|
|
2531
|
+
function shuffle(values, rng) {
|
|
1412
2532
|
const copy = [...values];
|
|
1413
2533
|
for (let index = copy.length - 1; index > 0; index -= 1) {
|
|
1414
|
-
const swapIndex = Math.floor(
|
|
2534
|
+
const swapIndex = Math.floor(rng() * (index + 1));
|
|
1415
2535
|
[copy[index], copy[swapIndex]] = [copy[swapIndex], copy[index]];
|
|
1416
2536
|
}
|
|
1417
2537
|
return copy;
|
|
1418
2538
|
}
|
|
1419
|
-
function sample(values, count,
|
|
1420
|
-
return shuffle(values,
|
|
2539
|
+
function sample(values, count, rng) {
|
|
2540
|
+
return shuffle(values, rng).slice(0, Math.max(0, Math.min(count, values.length)));
|
|
1421
2541
|
}
|
|
1422
2542
|
function parseJsonArrayFromModelOutput(raw) {
|
|
1423
2543
|
const trimmed = raw.trim();
|
|
@@ -1529,48 +2649,61 @@ function buildSuggestions(metrics) {
|
|
|
1529
2649
|
return suggestions;
|
|
1530
2650
|
}
|
|
1531
2651
|
async function runTriggerTest(skill, options) {
|
|
1532
|
-
const
|
|
2652
|
+
const rng = createRng(options.seed);
|
|
1533
2653
|
const queries = options.queries && options.queries.length > 0 ? triggerQueryArraySchema.parse(options.queries) : await generateQueriesWithModel(skill, options.provider, options.model, options.numQueries);
|
|
1534
|
-
const results = [];
|
|
1535
2654
|
const skillName = skill.frontmatter.name;
|
|
1536
|
-
|
|
1537
|
-
const fakeCount = 5 + Math.floor(
|
|
1538
|
-
const fakeSkills = sample(FAKE_SKILLS, fakeCount,
|
|
2655
|
+
const preparedQueries = queries.map((testQuery) => {
|
|
2656
|
+
const fakeCount = 5 + Math.floor(rng() * 5);
|
|
2657
|
+
const fakeSkills = sample(FAKE_SKILLS, fakeCount, rng);
|
|
1539
2658
|
const allSkills = shuffle([
|
|
1540
2659
|
...fakeSkills,
|
|
1541
2660
|
{
|
|
1542
2661
|
name: skill.frontmatter.name,
|
|
1543
2662
|
description: skill.frontmatter.description
|
|
1544
2663
|
}
|
|
1545
|
-
],
|
|
2664
|
+
], rng);
|
|
1546
2665
|
const skillListText = allSkills.map((entry) => `- ${entry.name}: ${entry.description}`).join("\n");
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
2666
|
+
return {
|
|
2667
|
+
testQuery,
|
|
2668
|
+
fakeCount,
|
|
2669
|
+
fakeSkills,
|
|
2670
|
+
allSkills,
|
|
2671
|
+
skillListText
|
|
2672
|
+
};
|
|
2673
|
+
});
|
|
2674
|
+
const systemPrompt = [
|
|
2675
|
+
"You are selecting one skill to activate for a user query.",
|
|
2676
|
+
"Choose the single best matching skill name from the provided list, or 'none' if no skill is a good fit.",
|
|
2677
|
+
"Respond with only the skill name or 'none'."
|
|
2678
|
+
].join(" ");
|
|
2679
|
+
const results = await pMap(
|
|
2680
|
+
preparedQueries,
|
|
2681
|
+
async ({ testQuery, allSkills, skillListText }) => {
|
|
2682
|
+
const userPrompt = [`Available skills:`, skillListText, "", `User query: ${testQuery.query}`].join("\n");
|
|
2683
|
+
const rawResponse = await options.provider.sendMessage(systemPrompt, userPrompt, { model: options.model });
|
|
2684
|
+
const decision = parseDecision(
|
|
2685
|
+
rawResponse,
|
|
2686
|
+
allSkills.map((entry) => entry.name)
|
|
2687
|
+
);
|
|
2688
|
+
const expected = testQuery.should_trigger ? skillName : "none";
|
|
2689
|
+
const matched = testQuery.should_trigger ? decision === skillName : decision !== skillName;
|
|
2690
|
+
return {
|
|
2691
|
+
query: testQuery.query,
|
|
2692
|
+
shouldTrigger: testQuery.should_trigger,
|
|
2693
|
+
expected,
|
|
2694
|
+
actual: decision,
|
|
2695
|
+
matched,
|
|
2696
|
+
rawModelResponse: options.verbose ? rawResponse : void 0
|
|
2697
|
+
};
|
|
2698
|
+
},
|
|
2699
|
+
options.concurrency ?? 5
|
|
2700
|
+
);
|
|
1569
2701
|
const metrics = calculateMetrics(skillName, results);
|
|
1570
2702
|
return {
|
|
1571
2703
|
skillName,
|
|
1572
2704
|
model: options.model,
|
|
1573
2705
|
provider: options.provider.name,
|
|
2706
|
+
seed: options.seed,
|
|
1574
2707
|
queries,
|
|
1575
2708
|
cases: results,
|
|
1576
2709
|
metrics,
|
|
@@ -1730,6 +2863,9 @@ function writeError(error, asJson) {
|
|
|
1730
2863
|
}
|
|
1731
2864
|
|
|
1732
2865
|
// src/commands/lint.ts
|
|
2866
|
+
var lintCliSchema = z6.object({
|
|
2867
|
+
html: z6.string().optional()
|
|
2868
|
+
});
|
|
1733
2869
|
async function handleLintCommand(targetPath, options) {
|
|
1734
2870
|
try {
|
|
1735
2871
|
const report = await runLinter(targetPath, { suppress: options.suppress });
|
|
@@ -1738,6 +2874,9 @@ async function handleLintCommand(targetPath, options) {
|
|
|
1738
2874
|
} else {
|
|
1739
2875
|
writeResult(renderLintReport(report, options.color), false);
|
|
1740
2876
|
}
|
|
2877
|
+
if (options.html) {
|
|
2878
|
+
await fs6.writeFile(options.html, renderLintHtml(report), "utf8");
|
|
2879
|
+
}
|
|
1741
2880
|
if (lintFails(report, options.failOn)) {
|
|
1742
2881
|
process.exitCode = 1;
|
|
1743
2882
|
}
|
|
@@ -1747,74 +2886,85 @@ async function handleLintCommand(targetPath, options) {
|
|
|
1747
2886
|
}
|
|
1748
2887
|
}
|
|
1749
2888
|
function registerLintCommand(program) {
|
|
1750
|
-
program.command("lint").description("Run static lint checks against a SKILL.md file or skill directory.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").action(async (targetPath, _commandOptions, command) => {
|
|
2889
|
+
program.command("lint").description("Run static lint checks against a SKILL.md file or skill directory.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--html <path>", "Write an HTML report to the given file path").action(async (targetPath, _commandOptions, command) => {
|
|
1751
2890
|
const globalOptions = getGlobalCliOptions(command);
|
|
1752
2891
|
const config = getResolvedConfig(command);
|
|
2892
|
+
const parsedCli = lintCliSchema.safeParse(command.opts());
|
|
2893
|
+
if (!parsedCli.success) {
|
|
2894
|
+
writeError(new Error(parsedCli.error.issues[0]?.message ?? "Invalid lint options."), globalOptions.json);
|
|
2895
|
+
process.exitCode = 2;
|
|
2896
|
+
return;
|
|
2897
|
+
}
|
|
1753
2898
|
await handleLintCommand(targetPath, {
|
|
1754
2899
|
...globalOptions,
|
|
1755
2900
|
failOn: config.lint.failOn,
|
|
1756
|
-
suppress: config.lint.suppress
|
|
2901
|
+
suppress: config.lint.suppress,
|
|
2902
|
+
html: parsedCli.data.html
|
|
1757
2903
|
});
|
|
1758
2904
|
});
|
|
1759
2905
|
}
|
|
1760
2906
|
|
|
1761
2907
|
// src/commands/trigger.ts
|
|
2908
|
+
import fs8 from "node:fs/promises";
|
|
1762
2909
|
import ora from "ora";
|
|
1763
|
-
import { z as
|
|
2910
|
+
import { z as z8 } from "zod";
|
|
1764
2911
|
|
|
1765
2912
|
// src/utils/config.ts
|
|
1766
|
-
import
|
|
2913
|
+
import fs7 from "node:fs/promises";
|
|
1767
2914
|
import path5 from "node:path";
|
|
1768
|
-
import { z as
|
|
1769
|
-
var providerNameSchema =
|
|
1770
|
-
var lintFailOnSchema =
|
|
1771
|
-
var lintConfigSchema =
|
|
2915
|
+
import { z as z7 } from "zod";
|
|
2916
|
+
var providerNameSchema = z7.enum(["anthropic", "openai"]);
|
|
2917
|
+
var lintFailOnSchema = z7.enum(["error", "warn"]);
|
|
2918
|
+
var lintConfigSchema = z7.object({
|
|
1772
2919
|
failOn: lintFailOnSchema.optional(),
|
|
1773
|
-
suppress:
|
|
2920
|
+
suppress: z7.array(z7.string().min(1)).optional()
|
|
1774
2921
|
}).strict();
|
|
1775
|
-
var triggerConfigSchema =
|
|
1776
|
-
numQueries:
|
|
1777
|
-
threshold:
|
|
1778
|
-
seed:
|
|
2922
|
+
var triggerConfigSchema = z7.object({
|
|
2923
|
+
numQueries: z7.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
|
|
2924
|
+
threshold: z7.number().min(0).max(1).optional(),
|
|
2925
|
+
seed: z7.number().int().optional()
|
|
1779
2926
|
}).strict().partial();
|
|
1780
|
-
var evalConfigSchema =
|
|
1781
|
-
numRuns:
|
|
1782
|
-
threshold:
|
|
1783
|
-
promptFile:
|
|
1784
|
-
assertionsFile:
|
|
2927
|
+
var evalConfigSchema = z7.object({
|
|
2928
|
+
numRuns: z7.number().int().min(1).optional(),
|
|
2929
|
+
threshold: z7.number().min(0).max(1).optional(),
|
|
2930
|
+
promptFile: z7.string().min(1).optional(),
|
|
2931
|
+
assertionsFile: z7.string().min(1).optional()
|
|
1785
2932
|
}).strict().partial();
|
|
1786
|
-
var skilltestConfigSchema =
|
|
2933
|
+
var skilltestConfigSchema = z7.object({
|
|
1787
2934
|
provider: providerNameSchema.optional(),
|
|
1788
|
-
model:
|
|
1789
|
-
json:
|
|
2935
|
+
model: z7.string().min(1).optional(),
|
|
2936
|
+
json: z7.boolean().optional(),
|
|
2937
|
+
concurrency: z7.number().int().min(1).optional(),
|
|
1790
2938
|
lint: lintConfigSchema.optional(),
|
|
1791
2939
|
trigger: triggerConfigSchema.optional(),
|
|
1792
2940
|
eval: evalConfigSchema.optional()
|
|
1793
2941
|
}).strict();
|
|
1794
|
-
var resolvedSkilltestConfigSchema =
|
|
2942
|
+
var resolvedSkilltestConfigSchema = z7.object({
|
|
1795
2943
|
provider: providerNameSchema,
|
|
1796
|
-
model:
|
|
1797
|
-
json:
|
|
1798
|
-
|
|
2944
|
+
model: z7.string().min(1),
|
|
2945
|
+
json: z7.boolean(),
|
|
2946
|
+
concurrency: z7.number().int().min(1),
|
|
2947
|
+
lint: z7.object({
|
|
1799
2948
|
failOn: lintFailOnSchema,
|
|
1800
|
-
suppress:
|
|
2949
|
+
suppress: z7.array(z7.string().min(1))
|
|
1801
2950
|
}),
|
|
1802
|
-
trigger:
|
|
1803
|
-
numQueries:
|
|
1804
|
-
threshold:
|
|
1805
|
-
seed:
|
|
2951
|
+
trigger: z7.object({
|
|
2952
|
+
numQueries: z7.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
|
|
2953
|
+
threshold: z7.number().min(0).max(1),
|
|
2954
|
+
seed: z7.number().int().optional()
|
|
1806
2955
|
}),
|
|
1807
|
-
eval:
|
|
1808
|
-
numRuns:
|
|
1809
|
-
threshold:
|
|
1810
|
-
promptFile:
|
|
1811
|
-
assertionsFile:
|
|
2956
|
+
eval: z7.object({
|
|
2957
|
+
numRuns: z7.number().int().min(1),
|
|
2958
|
+
threshold: z7.number().min(0).max(1),
|
|
2959
|
+
promptFile: z7.string().min(1).optional(),
|
|
2960
|
+
assertionsFile: z7.string().min(1).optional()
|
|
1812
2961
|
})
|
|
1813
2962
|
});
|
|
1814
2963
|
var DEFAULT_SKILLTEST_CONFIG = {
|
|
1815
2964
|
provider: "anthropic",
|
|
1816
2965
|
model: "claude-sonnet-4-5-20250929",
|
|
1817
2966
|
json: false,
|
|
2967
|
+
concurrency: 5,
|
|
1818
2968
|
lint: {
|
|
1819
2969
|
failOn: "error",
|
|
1820
2970
|
suppress: []
|
|
@@ -1843,7 +2993,7 @@ function buildConfigValidationError(error, sourceLabel) {
|
|
|
1843
2993
|
async function readJsonObject(filePath, label) {
|
|
1844
2994
|
let raw;
|
|
1845
2995
|
try {
|
|
1846
|
-
raw = await
|
|
2996
|
+
raw = await fs7.readFile(filePath, "utf8");
|
|
1847
2997
|
} catch (error) {
|
|
1848
2998
|
const message = error instanceof Error ? error.message : String(error);
|
|
1849
2999
|
throw new Error(`Failed to read ${label}: ${message}`);
|
|
@@ -1876,7 +3026,7 @@ async function loadConfigFromNearestPackageJson(startDirectory) {
|
|
|
1876
3026
|
const packageJsonPath = path5.join(currentDirectory, "package.json");
|
|
1877
3027
|
if (await pathExists(packageJsonPath)) {
|
|
1878
3028
|
const raw = await readJsonObject(packageJsonPath, packageJsonPath);
|
|
1879
|
-
const packageJsonSchema =
|
|
3029
|
+
const packageJsonSchema = z7.object({
|
|
1880
3030
|
skilltestrc: skilltestConfigSchema.optional()
|
|
1881
3031
|
}).passthrough();
|
|
1882
3032
|
const parsed = packageJsonSchema.safeParse(raw);
|
|
@@ -1921,6 +3071,7 @@ function mergeConfigLayers(configFile = {}, cliFlags = {}, baseDirectory = proce
|
|
|
1921
3071
|
provider: cliFlags.provider ?? configFile.provider ?? DEFAULT_SKILLTEST_CONFIG.provider,
|
|
1922
3072
|
model: cliFlags.model ?? configFile.model ?? DEFAULT_SKILLTEST_CONFIG.model,
|
|
1923
3073
|
json: cliFlags.json ?? configFile.json ?? DEFAULT_SKILLTEST_CONFIG.json,
|
|
3074
|
+
concurrency: cliFlags.concurrency ?? configFile.concurrency ?? DEFAULT_SKILLTEST_CONFIG.concurrency,
|
|
1924
3075
|
lint: {
|
|
1925
3076
|
failOn: cliFlags.lint?.failOn ?? configFile.lint?.failOn ?? DEFAULT_SKILLTEST_CONFIG.lint.failOn,
|
|
1926
3077
|
suppress: cliFlags.lint?.suppress ?? configFile.lint?.suppress ?? DEFAULT_SKILLTEST_CONFIG.lint.suppress
|
|
@@ -1964,6 +3115,9 @@ function extractCliConfigOverrides(command) {
|
|
|
1964
3115
|
if (command.getOptionValueSource("model") === "cli") {
|
|
1965
3116
|
overrides.model = getTypedOptionValue(command, "model");
|
|
1966
3117
|
}
|
|
3118
|
+
if ((command.name() === "trigger" || command.name() === "eval" || command.name() === "check") && command.getOptionValueSource("concurrency") === "cli") {
|
|
3119
|
+
overrides.concurrency = getTypedOptionValue(command, "concurrency");
|
|
3120
|
+
}
|
|
1967
3121
|
if ((command.name() === "trigger" || command.name() === "check") && command.getOptionValueSource("numQueries") === "cli") {
|
|
1968
3122
|
overrides.trigger = {
|
|
1969
3123
|
...overrides.trigger,
|
|
@@ -1993,7 +3147,6 @@ async function resolveConfigContext(targetPath, cliFlags) {
|
|
|
1993
3147
|
const skillDirectoryConfig = await resolveSkillDirectoryConfig(targetPath);
|
|
1994
3148
|
if (skillDirectoryConfig) {
|
|
1995
3149
|
return {
|
|
1996
|
-
configFile: skillDirectoryConfig.configFile,
|
|
1997
3150
|
...skillDirectoryConfig,
|
|
1998
3151
|
config: mergeConfigLayers(skillDirectoryConfig.configFile, cliFlags, skillDirectoryConfig.sourceDirectory)
|
|
1999
3152
|
};
|
|
@@ -2002,7 +3155,6 @@ async function resolveConfigContext(targetPath, cliFlags) {
|
|
|
2002
3155
|
const cwdConfig = await loadConfigFromJsonFile(cwdConfigPath);
|
|
2003
3156
|
if (cwdConfig) {
|
|
2004
3157
|
return {
|
|
2005
|
-
configFile: cwdConfig.configFile,
|
|
2006
3158
|
...cwdConfig,
|
|
2007
3159
|
config: mergeConfigLayers(cwdConfig.configFile, cliFlags, cwdConfig.sourceDirectory)
|
|
2008
3160
|
};
|
|
@@ -2010,7 +3162,6 @@ async function resolveConfigContext(targetPath, cliFlags) {
|
|
|
2010
3162
|
const packageJsonConfig = await loadConfigFromNearestPackageJson(cwd);
|
|
2011
3163
|
if (packageJsonConfig) {
|
|
2012
3164
|
return {
|
|
2013
|
-
configFile: packageJsonConfig.configFile,
|
|
2014
3165
|
...packageJsonConfig,
|
|
2015
3166
|
config: mergeConfigLayers(packageJsonConfig.configFile, cliFlags, packageJsonConfig.sourceDirectory)
|
|
2016
3167
|
};
|
|
@@ -2218,11 +3369,14 @@ function createProvider(providerName, apiKeyOverride) {
|
|
|
2218
3369
|
}
|
|
2219
3370
|
|
|
2220
3371
|
// src/commands/trigger.ts
|
|
2221
|
-
var triggerCliSchema =
|
|
2222
|
-
queries:
|
|
2223
|
-
saveQueries:
|
|
2224
|
-
|
|
2225
|
-
|
|
3372
|
+
var triggerCliSchema = z8.object({
|
|
3373
|
+
queries: z8.string().optional(),
|
|
3374
|
+
saveQueries: z8.string().optional(),
|
|
3375
|
+
seed: z8.number().int().optional(),
|
|
3376
|
+
concurrency: z8.number().int().min(1).optional(),
|
|
3377
|
+
html: z8.string().optional(),
|
|
3378
|
+
verbose: z8.boolean().optional(),
|
|
3379
|
+
apiKey: z8.string().optional()
|
|
2226
3380
|
});
|
|
2227
3381
|
var DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-5-20250929";
|
|
2228
3382
|
var DEFAULT_OPENAI_MODEL = "gpt-4.1-mini";
|
|
@@ -2232,6 +3386,13 @@ function resolveModel(provider, model) {
|
|
|
2232
3386
|
}
|
|
2233
3387
|
return model;
|
|
2234
3388
|
}
|
|
3389
|
+
function renderTriggerOutputWithSeed(output, seed) {
|
|
3390
|
+
if (seed === void 0) {
|
|
3391
|
+
return output;
|
|
3392
|
+
}
|
|
3393
|
+
return `${output}
|
|
3394
|
+
Seed: ${seed}`;
|
|
3395
|
+
}
|
|
2235
3396
|
async function handleTriggerCommand(targetPath, options) {
|
|
2236
3397
|
const spinner = options.json || !process.stdout.isTTY ? null : ora("Preparing trigger evaluation...").start();
|
|
2237
3398
|
try {
|
|
@@ -2260,6 +3421,7 @@ async function handleTriggerCommand(targetPath, options) {
|
|
|
2260
3421
|
queries,
|
|
2261
3422
|
numQueries: options.numQueries,
|
|
2262
3423
|
seed: options.seed,
|
|
3424
|
+
concurrency: options.concurrency,
|
|
2263
3425
|
verbose: options.verbose
|
|
2264
3426
|
});
|
|
2265
3427
|
if (options.saveQueries) {
|
|
@@ -2269,7 +3431,14 @@ async function handleTriggerCommand(targetPath, options) {
|
|
|
2269
3431
|
if (options.json) {
|
|
2270
3432
|
writeResult(result, true);
|
|
2271
3433
|
} else {
|
|
2272
|
-
writeResult(renderTriggerReport(result, options.color, options.verbose), false);
|
|
3434
|
+
writeResult(renderTriggerOutputWithSeed(renderTriggerReport(result, options.color, options.verbose), result.seed), false);
|
|
3435
|
+
}
|
|
3436
|
+
if (options.html) {
|
|
3437
|
+
const htmlResult = {
|
|
3438
|
+
...result,
|
|
3439
|
+
target: targetPath
|
|
3440
|
+
};
|
|
3441
|
+
await fs8.writeFile(options.html, renderTriggerHtml(htmlResult), "utf8");
|
|
2273
3442
|
}
|
|
2274
3443
|
} catch (error) {
|
|
2275
3444
|
spinner?.stop();
|
|
@@ -2278,7 +3447,7 @@ async function handleTriggerCommand(targetPath, options) {
|
|
|
2278
3447
|
}
|
|
2279
3448
|
}
|
|
2280
3449
|
function registerTriggerCommand(program) {
|
|
2281
|
-
program.command("trigger").description("Evaluate whether a skill description triggers correctly.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--model <model>", "Model to use").option("--provider <provider>", "LLM provider: anthropic|openai").option("--queries <path>", "Path to custom test queries JSON").option("--num-queries <n>", "Number of auto-generated queries", (value) => Number.parseInt(value, 10)).option("--save-queries <path>", "Save generated queries to a JSON file").option("--api-key <key>", "API key override").option("--verbose", "Show full model decisions").action(async (targetPath, _commandOptions, command) => {
|
|
3450
|
+
program.command("trigger").description("Evaluate whether a skill description triggers correctly.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--model <model>", "Model to use").option("--provider <provider>", "LLM provider: anthropic|openai").option("--queries <path>", "Path to custom test queries JSON").option("--num-queries <n>", "Number of auto-generated queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--concurrency <n>", "Maximum in-flight trigger requests", (value) => Number.parseInt(value, 10)).option("--html <path>", "Write an HTML report to the given file path").option("--save-queries <path>", "Save generated queries to a JSON file").option("--api-key <key>", "API key override").option("--verbose", "Show full model decisions").action(async (targetPath, _commandOptions, command) => {
|
|
2282
3451
|
const globalOptions = getGlobalCliOptions(command);
|
|
2283
3452
|
const config = getResolvedConfig(command);
|
|
2284
3453
|
const parsedCli = triggerCliSchema.safeParse(command.opts());
|
|
@@ -2294,7 +3463,9 @@ function registerTriggerCommand(program) {
|
|
|
2294
3463
|
queries: parsedCli.data.queries,
|
|
2295
3464
|
numQueries: config.trigger.numQueries,
|
|
2296
3465
|
saveQueries: parsedCli.data.saveQueries,
|
|
2297
|
-
seed: config.trigger.seed,
|
|
3466
|
+
seed: parsedCli.data.seed ?? config.trigger.seed,
|
|
3467
|
+
concurrency: config.concurrency,
|
|
3468
|
+
html: parsedCli.data.html,
|
|
2298
3469
|
verbose: Boolean(parsedCli.data.verbose),
|
|
2299
3470
|
apiKey: parsedCli.data.apiKey
|
|
2300
3471
|
});
|
|
@@ -2302,14 +3473,17 @@ function registerTriggerCommand(program) {
|
|
|
2302
3473
|
}
|
|
2303
3474
|
|
|
2304
3475
|
// src/commands/eval.ts
|
|
3476
|
+
import fs9 from "node:fs/promises";
|
|
2305
3477
|
import ora2 from "ora";
|
|
2306
|
-
import { z as
|
|
2307
|
-
var evalCliSchema =
|
|
2308
|
-
prompts:
|
|
2309
|
-
graderModel:
|
|
2310
|
-
saveResults:
|
|
2311
|
-
|
|
2312
|
-
|
|
3478
|
+
import { z as z9 } from "zod";
|
|
3479
|
+
var evalCliSchema = z9.object({
|
|
3480
|
+
prompts: z9.string().optional(),
|
|
3481
|
+
graderModel: z9.string().optional(),
|
|
3482
|
+
saveResults: z9.string().optional(),
|
|
3483
|
+
concurrency: z9.number().int().min(1).optional(),
|
|
3484
|
+
html: z9.string().optional(),
|
|
3485
|
+
verbose: z9.boolean().optional(),
|
|
3486
|
+
apiKey: z9.string().optional()
|
|
2313
3487
|
});
|
|
2314
3488
|
var DEFAULT_ANTHROPIC_MODEL2 = "claude-sonnet-4-5-20250929";
|
|
2315
3489
|
var DEFAULT_OPENAI_MODEL2 = "gpt-4.1-mini";
|
|
@@ -2349,6 +3523,7 @@ async function handleEvalCommand(targetPath, options, command) {
|
|
|
2349
3523
|
model,
|
|
2350
3524
|
graderModel,
|
|
2351
3525
|
numRuns: options.numRuns,
|
|
3526
|
+
concurrency: options.concurrency,
|
|
2352
3527
|
prompts
|
|
2353
3528
|
});
|
|
2354
3529
|
if (options.saveResults) {
|
|
@@ -2360,6 +3535,13 @@ async function handleEvalCommand(targetPath, options, command) {
|
|
|
2360
3535
|
} else {
|
|
2361
3536
|
writeResult(renderEvalReport(result, options.color, options.verbose), false);
|
|
2362
3537
|
}
|
|
3538
|
+
if (options.html) {
|
|
3539
|
+
const htmlResult = {
|
|
3540
|
+
...result,
|
|
3541
|
+
target: targetPath
|
|
3542
|
+
};
|
|
3543
|
+
await fs9.writeFile(options.html, renderEvalHtml(htmlResult), "utf8");
|
|
3544
|
+
}
|
|
2363
3545
|
} catch (error) {
|
|
2364
3546
|
spinner?.stop();
|
|
2365
3547
|
writeError(error, options.json);
|
|
@@ -2367,7 +3549,7 @@ async function handleEvalCommand(targetPath, options, command) {
|
|
|
2367
3549
|
}
|
|
2368
3550
|
}
|
|
2369
3551
|
function registerEvalCommand(program) {
|
|
2370
|
-
program.command("eval").description("Run end-to-end skill execution and quality evaluation.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--prompts <path>", "Path to eval prompts JSON").option("--model <model>", "Model to execute prompts").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--provider <provider>", "LLM provider: anthropic|openai").option("--save-results <path>", "Save full evaluation results to JSON").option("--api-key <key>", "API key override").option("--verbose", "Show full model responses").action(async (targetPath, _commandOptions, command) => {
|
|
3552
|
+
program.command("eval").description("Run end-to-end skill execution and quality evaluation.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--prompts <path>", "Path to eval prompts JSON").option("--model <model>", "Model to execute prompts").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--provider <provider>", "LLM provider: anthropic|openai").option("--concurrency <n>", "Maximum in-flight eval prompt runs", (value) => Number.parseInt(value, 10)).option("--html <path>", "Write an HTML report to the given file path").option("--save-results <path>", "Save full evaluation results to JSON").option("--api-key <key>", "API key override").option("--verbose", "Show full model responses").action(async (targetPath, _commandOptions, command) => {
|
|
2371
3553
|
const globalOptions = getGlobalCliOptions(command);
|
|
2372
3554
|
const config = getResolvedConfig(command);
|
|
2373
3555
|
const parsedCli = evalCliSchema.safeParse(command.opts());
|
|
@@ -2385,9 +3567,11 @@ function registerEvalCommand(program) {
|
|
|
2385
3567
|
graderModel: parsedCli.data.graderModel,
|
|
2386
3568
|
provider: config.provider,
|
|
2387
3569
|
saveResults: parsedCli.data.saveResults,
|
|
3570
|
+
html: parsedCli.data.html,
|
|
2388
3571
|
verbose: Boolean(parsedCli.data.verbose),
|
|
2389
3572
|
apiKey: parsedCli.data.apiKey,
|
|
2390
|
-
numRuns: config.eval.numRuns
|
|
3573
|
+
numRuns: config.eval.numRuns,
|
|
3574
|
+
concurrency: config.concurrency
|
|
2391
3575
|
},
|
|
2392
3576
|
command
|
|
2393
3577
|
);
|
|
@@ -2395,8 +3579,9 @@ function registerEvalCommand(program) {
|
|
|
2395
3579
|
}
|
|
2396
3580
|
|
|
2397
3581
|
// src/commands/check.ts
|
|
3582
|
+
import fs10 from "node:fs/promises";
|
|
2398
3583
|
import ora3 from "ora";
|
|
2399
|
-
import { z as
|
|
3584
|
+
import { z as z10 } from "zod";
|
|
2400
3585
|
|
|
2401
3586
|
// src/core/check-runner.ts
|
|
2402
3587
|
function calculateEvalAssertPassRate(result) {
|
|
@@ -2427,23 +3612,33 @@ async function runCheck(inputPath, options) {
|
|
|
2427
3612
|
evalSkippedReason = `Skipped: skill could not be parsed strictly (${message}).`;
|
|
2428
3613
|
}
|
|
2429
3614
|
if (parsedSkill) {
|
|
2430
|
-
|
|
2431
|
-
trigger = await runTriggerTest(parsedSkill, {
|
|
3615
|
+
const triggerOptions = {
|
|
2432
3616
|
provider: options.provider,
|
|
2433
3617
|
model: options.model,
|
|
2434
3618
|
queries: options.queries,
|
|
2435
3619
|
numQueries: options.numQueries,
|
|
2436
3620
|
seed: options.triggerSeed,
|
|
3621
|
+
concurrency: options.concurrency,
|
|
2437
3622
|
verbose: options.verbose
|
|
2438
|
-
}
|
|
2439
|
-
|
|
2440
|
-
evalResult = await runEval(parsedSkill, {
|
|
3623
|
+
};
|
|
3624
|
+
const evalOptions = {
|
|
2441
3625
|
provider: options.provider,
|
|
2442
3626
|
model: options.model,
|
|
2443
3627
|
graderModel: options.graderModel,
|
|
2444
3628
|
numRuns: options.evalNumRuns,
|
|
2445
|
-
prompts: options.prompts
|
|
2446
|
-
|
|
3629
|
+
prompts: options.prompts,
|
|
3630
|
+
concurrency: options.concurrency
|
|
3631
|
+
};
|
|
3632
|
+
if ((options.concurrency ?? 5) === 1) {
|
|
3633
|
+
options.onStage?.("trigger");
|
|
3634
|
+
trigger = await runTriggerTest(parsedSkill, triggerOptions);
|
|
3635
|
+
options.onStage?.("eval");
|
|
3636
|
+
evalResult = await runEval(parsedSkill, evalOptions);
|
|
3637
|
+
} else {
|
|
3638
|
+
options.onStage?.("trigger");
|
|
3639
|
+
options.onStage?.("eval");
|
|
3640
|
+
[trigger, evalResult] = await Promise.all([runTriggerTest(parsedSkill, triggerOptions), runEval(parsedSkill, evalOptions)]);
|
|
3641
|
+
}
|
|
2447
3642
|
}
|
|
2448
3643
|
}
|
|
2449
3644
|
const triggerF1 = trigger ? trigger.metrics.f1 : null;
|
|
@@ -2478,14 +3673,17 @@ async function runCheck(inputPath, options) {
|
|
|
2478
3673
|
}
|
|
2479
3674
|
|
|
2480
3675
|
// src/commands/check.ts
|
|
2481
|
-
var checkCliSchema =
|
|
2482
|
-
graderModel:
|
|
2483
|
-
apiKey:
|
|
2484
|
-
queries:
|
|
2485
|
-
|
|
2486
|
-
|
|
2487
|
-
|
|
2488
|
-
|
|
3676
|
+
var checkCliSchema = z10.object({
|
|
3677
|
+
graderModel: z10.string().optional(),
|
|
3678
|
+
apiKey: z10.string().optional(),
|
|
3679
|
+
queries: z10.string().optional(),
|
|
3680
|
+
seed: z10.number().int().optional(),
|
|
3681
|
+
prompts: z10.string().optional(),
|
|
3682
|
+
concurrency: z10.number().int().min(1).optional(),
|
|
3683
|
+
html: z10.string().optional(),
|
|
3684
|
+
saveResults: z10.string().optional(),
|
|
3685
|
+
continueOnLintFail: z10.boolean().optional(),
|
|
3686
|
+
verbose: z10.boolean().optional()
|
|
2489
3687
|
});
|
|
2490
3688
|
var DEFAULT_ANTHROPIC_MODEL3 = "claude-sonnet-4-5-20250929";
|
|
2491
3689
|
var DEFAULT_OPENAI_MODEL3 = "gpt-4.1-mini";
|
|
@@ -2495,6 +3693,19 @@ function resolveModel3(provider, model) {
|
|
|
2495
3693
|
}
|
|
2496
3694
|
return model;
|
|
2497
3695
|
}
|
|
3696
|
+
function renderCheckOutputWithSeed(output, seed) {
|
|
3697
|
+
if (seed === void 0) {
|
|
3698
|
+
return output;
|
|
3699
|
+
}
|
|
3700
|
+
const lines = output.split("\n");
|
|
3701
|
+
const triggerIndex = lines.indexOf("Trigger");
|
|
3702
|
+
if (triggerIndex === -1) {
|
|
3703
|
+
return `${output}
|
|
3704
|
+
Seed: ${seed}`;
|
|
3705
|
+
}
|
|
3706
|
+
lines.splice(triggerIndex + 1, 0, `Seed: ${seed}`);
|
|
3707
|
+
return lines.join("\n");
|
|
3708
|
+
}
|
|
2498
3709
|
async function handleCheckCommand(targetPath, options, command) {
|
|
2499
3710
|
const spinner = options.json || !process.stdout.isTTY ? null : ora3("Preparing check run...").start();
|
|
2500
3711
|
try {
|
|
@@ -2531,6 +3742,7 @@ async function handleCheckCommand(targetPath, options, command) {
|
|
|
2531
3742
|
triggerSeed: options.triggerSeed,
|
|
2532
3743
|
prompts,
|
|
2533
3744
|
evalNumRuns: options.numRuns,
|
|
3745
|
+
concurrency: options.concurrency,
|
|
2534
3746
|
minF1: options.minF1,
|
|
2535
3747
|
minAssertPassRate: options.minAssertPassRate,
|
|
2536
3748
|
continueOnLintFail: options.continueOnLintFail,
|
|
@@ -2543,10 +3755,8 @@ async function handleCheckCommand(targetPath, options, command) {
|
|
|
2543
3755
|
spinner.text = "Running lint checks...";
|
|
2544
3756
|
} else if (stage === "parse") {
|
|
2545
3757
|
spinner.text = "Parsing skill for model evaluations...";
|
|
2546
|
-
} else if (stage === "trigger") {
|
|
2547
|
-
spinner.text = "Running trigger
|
|
2548
|
-
} else if (stage === "eval") {
|
|
2549
|
-
spinner.text = "Running end-to-end eval suite...";
|
|
3758
|
+
} else if (stage === "trigger" || stage === "eval") {
|
|
3759
|
+
spinner.text = "Running trigger and eval suites...";
|
|
2550
3760
|
}
|
|
2551
3761
|
}
|
|
2552
3762
|
});
|
|
@@ -2557,7 +3767,13 @@ async function handleCheckCommand(targetPath, options, command) {
|
|
|
2557
3767
|
if (options.json) {
|
|
2558
3768
|
writeResult(result, true);
|
|
2559
3769
|
} else {
|
|
2560
|
-
writeResult(
|
|
3770
|
+
writeResult(
|
|
3771
|
+
renderCheckOutputWithSeed(renderCheckReport(result, options.color, options.verbose), result.trigger?.seed),
|
|
3772
|
+
false
|
|
3773
|
+
);
|
|
3774
|
+
}
|
|
3775
|
+
if (options.html) {
|
|
3776
|
+
await fs10.writeFile(options.html, renderCheckHtml(result), "utf8");
|
|
2561
3777
|
}
|
|
2562
3778
|
process.exitCode = result.gates.overallPassed ? 0 : 1;
|
|
2563
3779
|
} catch (error) {
|
|
@@ -2567,7 +3783,7 @@ async function handleCheckCommand(targetPath, options, command) {
|
|
|
2567
3783
|
}
|
|
2568
3784
|
}
|
|
2569
3785
|
function registerCheckCommand(program) {
|
|
2570
|
-
program.command("check").description("Run lint + trigger + eval with threshold-based quality gates.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--provider <provider>", "LLM provider: anthropic|openai").option("--model <model>", "Model for trigger/eval runs").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--api-key <key>", "API key override").option("--queries <path>", "Path to custom trigger queries JSON").option("--num-queries <n>", "Number of auto-generated trigger queries", (value) => Number.parseInt(value, 10)).option("--prompts <path>", "Path to eval prompts JSON").option("--min-f1 <n>", "Minimum required trigger F1 score (0-1)", (value) => Number.parseFloat(value)).option("--min-assert-pass-rate <n>", "Minimum required eval assertion pass rate (0-1)", (value) => Number.parseFloat(value)).option("--save-results <path>", "Save combined check results to JSON").option("--continue-on-lint-fail", "Continue trigger/eval stages even when lint has failures").option("--verbose", "Show detailed trigger/eval output sections").action(async (targetPath, _commandOptions, command) => {
|
|
3786
|
+
program.command("check").description("Run lint + trigger + eval with threshold-based quality gates.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--provider <provider>", "LLM provider: anthropic|openai").option("--model <model>", "Model for trigger/eval runs").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--api-key <key>", "API key override").option("--queries <path>", "Path to custom trigger queries JSON").option("--num-queries <n>", "Number of auto-generated trigger queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--prompts <path>", "Path to eval prompts JSON").option("--concurrency <n>", "Maximum in-flight trigger/eval tasks", (value) => Number.parseInt(value, 10)).option("--html <path>", "Write an HTML report to the given file path").option("--min-f1 <n>", "Minimum required trigger F1 score (0-1)", (value) => Number.parseFloat(value)).option("--min-assert-pass-rate <n>", "Minimum required eval assertion pass rate (0-1)", (value) => Number.parseFloat(value)).option("--save-results <path>", "Save combined check results to JSON").option("--continue-on-lint-fail", "Continue trigger/eval stages even when lint has failures").option("--verbose", "Show detailed trigger/eval output sections").action(async (targetPath, _commandOptions, command) => {
|
|
2571
3787
|
const globalOptions = getGlobalCliOptions(command);
|
|
2572
3788
|
const config = getResolvedConfig(command);
|
|
2573
3789
|
const parsedCli = checkCliSchema.safeParse(command.opts());
|
|
@@ -2590,9 +3806,11 @@ function registerCheckCommand(program) {
|
|
|
2590
3806
|
minF1: config.trigger.threshold,
|
|
2591
3807
|
minAssertPassRate: config.eval.threshold,
|
|
2592
3808
|
numRuns: config.eval.numRuns,
|
|
3809
|
+
concurrency: config.concurrency,
|
|
3810
|
+
html: parsedCli.data.html,
|
|
2593
3811
|
lintFailOn: config.lint.failOn,
|
|
2594
3812
|
lintSuppress: config.lint.suppress,
|
|
2595
|
-
triggerSeed: config.trigger.seed,
|
|
3813
|
+
triggerSeed: parsedCli.data.seed ?? config.trigger.seed,
|
|
2596
3814
|
saveResults: parsedCli.data.saveResults,
|
|
2597
3815
|
continueOnLintFail: Boolean(parsedCli.data.continueOnLintFail),
|
|
2598
3816
|
verbose: Boolean(parsedCli.data.verbose)
|
|
@@ -2607,7 +3825,7 @@ function resolveVersion() {
|
|
|
2607
3825
|
try {
|
|
2608
3826
|
const currentFilePath = fileURLToPath(import.meta.url);
|
|
2609
3827
|
const packageJsonPath = path6.resolve(path6.dirname(currentFilePath), "..", "package.json");
|
|
2610
|
-
const raw =
|
|
3828
|
+
const raw = fs11.readFileSync(packageJsonPath, "utf8");
|
|
2611
3829
|
const parsed = JSON.parse(raw);
|
|
2612
3830
|
return parsed.version ?? "0.0.0";
|
|
2613
3831
|
} catch {
|