skilltest 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,11 +1,15 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/index.ts
4
- import fs7 from "node:fs";
4
+ import fs11 from "node:fs";
5
5
  import path6 from "node:path";
6
6
  import { fileURLToPath } from "node:url";
7
7
  import { Command } from "commander";
8
8
 
9
+ // src/commands/lint.ts
10
+ import fs6 from "node:fs/promises";
11
+ import { z as z6 } from "zod";
12
+
9
13
  // src/core/skill-parser.ts
10
14
  import fs from "node:fs/promises";
11
15
  import path from "node:path";
@@ -239,6 +243,171 @@ function runCompatibilityChecks(context) {
239
243
  return issues;
240
244
  }
241
245
 
246
+ // src/core/linter/markdown-zones.ts
247
+ function splitLines(raw) {
248
+ return raw.split(/\r?\n/);
249
+ }
250
+ function stripTopFrontmatter(raw) {
251
+ const lines = splitLines(raw);
252
+ if (lines[0] !== "---") {
253
+ return {
254
+ bodyLines: lines,
255
+ bodyStartLine: 1
256
+ };
257
+ }
258
+ for (let index = 1; index < lines.length; index += 1) {
259
+ if (lines[index] === "---") {
260
+ return {
261
+ bodyLines: lines.slice(index + 1),
262
+ bodyStartLine: index + 2
263
+ };
264
+ }
265
+ }
266
+ return {
267
+ bodyLines: lines,
268
+ bodyStartLine: 1
269
+ };
270
+ }
271
+ function matchCodeFenceOpener(line) {
272
+ const match = line.match(/^\s*(`{3,}|~{3,})(.*)$/);
273
+ return match?.[1] ?? null;
274
+ }
275
+ function isExactCodeFenceCloser(line, delimiter) {
276
+ return line.trim() === delimiter;
277
+ }
278
+ function appendZone(zones, type, content, startLine, endLine) {
279
+ if (content === "") {
280
+ return;
281
+ }
282
+ const previous = zones[zones.length - 1];
283
+ if (previous && previous.type === type && startLine <= previous.endLine + 1) {
284
+ const separator = startLine > previous.endLine ? "\n" : "";
285
+ previous.content += `${separator}${content}`;
286
+ previous.endLine = endLine;
287
+ return;
288
+ }
289
+ zones.push({
290
+ type,
291
+ content,
292
+ startLine,
293
+ endLine
294
+ });
295
+ }
296
+ function appendToOpenZone(zone, content, lineNumber) {
297
+ if (content === "") {
298
+ if (lineNumber > zone.endLine) {
299
+ zone.content += "\n";
300
+ zone.endLine = lineNumber;
301
+ }
302
+ return;
303
+ }
304
+ const separator = lineNumber > zone.endLine ? "\n" : "";
305
+ zone.content += `${separator}${content}`;
306
+ zone.endLine = lineNumber;
307
+ }
308
+ function addInlineAwareText(zones, text, lineNumber, baseType) {
309
+ if (text === "") {
310
+ return;
311
+ }
312
+ let cursor = 0;
313
+ while (cursor < text.length) {
314
+ const inlineStart = text.indexOf("`", cursor);
315
+ if (inlineStart === -1) {
316
+ appendZone(zones, baseType, text.slice(cursor), lineNumber, lineNumber);
317
+ return;
318
+ }
319
+ if (inlineStart > cursor) {
320
+ appendZone(zones, baseType, text.slice(cursor, inlineStart), lineNumber, lineNumber);
321
+ }
322
+ const inlineEnd = text.indexOf("`", inlineStart + 1);
323
+ if (inlineEnd === -1) {
324
+ appendZone(zones, baseType, text.slice(inlineStart), lineNumber, lineNumber);
325
+ return;
326
+ }
327
+ appendZone(zones, "inline-code", text.slice(inlineStart, inlineEnd + 1), lineNumber, lineNumber);
328
+ cursor = inlineEnd + 1;
329
+ }
330
+ }
331
+ function parseZones(raw) {
332
+ const { bodyLines, bodyStartLine } = stripTopFrontmatter(raw);
333
+ const zones = [];
334
+ let openCodeFence = null;
335
+ let openComment = null;
336
+ for (const [index, line] of bodyLines.entries()) {
337
+ const lineNumber = bodyStartLine + index;
338
+ if (openCodeFence) {
339
+ appendToOpenZone(openCodeFence.zone, line, lineNumber);
340
+ if (isExactCodeFenceCloser(line, openCodeFence.delimiter)) {
341
+ zones.push(openCodeFence.zone);
342
+ openCodeFence = null;
343
+ }
344
+ continue;
345
+ }
346
+ if (!openComment) {
347
+ const fenceDelimiter = matchCodeFenceOpener(line);
348
+ if (fenceDelimiter) {
349
+ openCodeFence = {
350
+ delimiter: fenceDelimiter,
351
+ zone: {
352
+ type: "code-fence",
353
+ content: line,
354
+ startLine: lineNumber,
355
+ endLine: lineNumber
356
+ }
357
+ };
358
+ continue;
359
+ }
360
+ }
361
+ const baseType = /^\s*>/.test(line) ? "blockquote" : "prose";
362
+ let cursor = 0;
363
+ while (cursor < line.length || openComment) {
364
+ if (openComment) {
365
+ const closeIndex = line.indexOf("-->", cursor);
366
+ if (closeIndex === -1) {
367
+ appendToOpenZone(openComment, line.slice(cursor), lineNumber);
368
+ cursor = line.length;
369
+ break;
370
+ }
371
+ appendToOpenZone(openComment, line.slice(cursor, closeIndex + 3), lineNumber);
372
+ zones.push(openComment);
373
+ openComment = null;
374
+ cursor = closeIndex + 3;
375
+ continue;
376
+ }
377
+ if (cursor >= line.length) {
378
+ break;
379
+ }
380
+ const commentStart = line.indexOf("<!--", cursor);
381
+ const textEnd = commentStart === -1 ? line.length : commentStart;
382
+ if (textEnd > cursor) {
383
+ addInlineAwareText(zones, line.slice(cursor, textEnd), lineNumber, baseType);
384
+ }
385
+ if (commentStart === -1) {
386
+ break;
387
+ }
388
+ const commentEnd = line.indexOf("-->", commentStart + 4);
389
+ if (commentEnd === -1) {
390
+ openComment = {
391
+ type: "html-comment",
392
+ content: line.slice(commentStart),
393
+ startLine: lineNumber,
394
+ endLine: lineNumber
395
+ };
396
+ break;
397
+ }
398
+ appendZone(zones, "html-comment", line.slice(commentStart, commentEnd + 3), lineNumber, lineNumber);
399
+ cursor = commentEnd + 3;
400
+ }
401
+ }
402
+ if (openComment) {
403
+ zones.push(openComment);
404
+ }
405
+ if (openCodeFence) {
406
+ zones.push(openCodeFence.zone);
407
+ }
408
+ return zones;
409
+ }
410
+
242
411
  // src/core/linter/content.ts
243
412
  var VAGUE_PATTERNS = [
244
413
  /\bdo something appropriate\b/i,
@@ -255,6 +424,102 @@ var SECRET_PATTERNS = [
255
424
  { label: "Slack token", regex: /\bxox[baprs]-[A-Za-z0-9-]{20,}\b/ },
256
425
  { label: "Generic private key header", regex: /-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----/ }
257
426
  ];
427
+ function summarizeLineRange(matches) {
428
+ if (matches.length === 0) {
429
+ return {};
430
+ }
431
+ return {
432
+ startLine: Math.min(...matches.map((match) => match.startLine)),
433
+ endLine: Math.max(...matches.map((match) => match.endLine))
434
+ };
435
+ }
436
+ function uniqueLabels(matches) {
437
+ const labels = [];
438
+ const seen = /* @__PURE__ */ new Set();
439
+ for (const match of matches) {
440
+ if (seen.has(match.label)) {
441
+ continue;
442
+ }
443
+ seen.add(match.label);
444
+ labels.push(match.label);
445
+ }
446
+ return labels;
447
+ }
448
+ function collectSecretMatches(zones) {
449
+ const prose = [];
450
+ const nonProse = [];
451
+ for (const zone of zones) {
452
+ for (const pattern of SECRET_PATTERNS) {
453
+ if (!pattern.regex.test(zone.content)) {
454
+ continue;
455
+ }
456
+ const occurrence = {
457
+ label: pattern.label,
458
+ zoneType: zone.type,
459
+ startLine: zone.startLine,
460
+ endLine: zone.endLine
461
+ };
462
+ if (zone.type === "prose") {
463
+ prose.push(occurrence);
464
+ } else {
465
+ nonProse.push(occurrence);
466
+ }
467
+ }
468
+ }
469
+ return { prose, nonProse };
470
+ }
471
+ function buildSkippedPatterns(matches) {
472
+ if (matches.length === 0) {
473
+ return void 0;
474
+ }
475
+ return matches.map((match) => ({
476
+ label: match.label,
477
+ zoneType: match.zoneType,
478
+ startLine: match.startLine,
479
+ endLine: match.endLine
480
+ }));
481
+ }
482
+ function buildSecretsIssue(context) {
483
+ if (context.suppressedCheckIds.has("content:secrets")) {
484
+ return null;
485
+ }
486
+ const { prose, nonProse } = collectSecretMatches(parseZones(context.skill.raw));
487
+ const proseLabels = uniqueLabels(prose);
488
+ const nonProseLabels = uniqueLabels(nonProse);
489
+ const skippedPatterns = buildSkippedPatterns(nonProse);
490
+ if (proseLabels.length > 0) {
491
+ return {
492
+ id: "content.secrets",
493
+ checkId: "content:secrets",
494
+ title: "Hardcoded Secrets",
495
+ status: "fail",
496
+ message: `Potential secrets detected (${proseLabels.join(", ")}).`,
497
+ suggestion: "Remove secrets from skill files and use environment variables or secret managers.",
498
+ ...summarizeLineRange(prose),
499
+ skippedPatterns
500
+ };
501
+ }
502
+ if (nonProseLabels.length > 0) {
503
+ const codeFenceOnly = nonProse.every((match) => match.zoneType === "code-fence");
504
+ return {
505
+ id: "content.secrets",
506
+ checkId: "content:secrets",
507
+ title: "Hardcoded Secrets",
508
+ status: "warn",
509
+ message: codeFenceOnly ? `Possible secret in code example \u2014 verify this is a placeholder, not a real key (${nonProseLabels.join(", ")}).` : `Possible secrets found outside prose instructions (${nonProseLabels.join(", ")}). Verify these are placeholders, not real credentials.`,
510
+ suggestion: "Replace real-looking credentials in examples with explicit placeholders such as YOUR_API_KEY.",
511
+ ...summarizeLineRange(nonProse),
512
+ skippedPatterns
513
+ };
514
+ }
515
+ return {
516
+ id: "content.secrets",
517
+ checkId: "content:secrets",
518
+ title: "Hardcoded Secrets",
519
+ status: "pass",
520
+ message: "No obvious API keys or secrets patterns were detected."
521
+ };
522
+ }
258
523
  function runContentChecks(context) {
259
524
  const issues = [];
260
525
  const body = context.frontmatter.content;
@@ -334,29 +599,9 @@ function runContentChecks(context) {
334
599
  message: "No angle bracket tokens detected in frontmatter."
335
600
  });
336
601
  }
337
- const secretHits = /* @__PURE__ */ new Set();
338
- for (const pattern of SECRET_PATTERNS) {
339
- if (pattern.regex.test(context.skill.raw)) {
340
- secretHits.add(pattern.label);
341
- }
342
- }
343
- if (secretHits.size > 0) {
344
- issues.push({
345
- id: "content.secrets",
346
- checkId: "content:secrets",
347
- title: "Hardcoded Secrets",
348
- status: "fail",
349
- message: `Potential secrets detected (${Array.from(secretHits).join(", ")}).`,
350
- suggestion: "Remove secrets from skill files and use environment variables or secret managers."
351
- });
352
- } else {
353
- issues.push({
354
- id: "content.secrets",
355
- checkId: "content:secrets",
356
- title: "Hardcoded Secrets",
357
- status: "pass",
358
- message: "No obvious API keys or secrets patterns were detected."
359
- });
602
+ const secretsIssue = buildSecretsIssue(context);
603
+ if (secretsIssue) {
604
+ issues.push(secretsIssue);
360
605
  }
361
606
  if (bodyLines.length < 10) {
362
607
  issues.push({
@@ -776,93 +1021,159 @@ var SHELL_ACTIVITY_PATTERNS = [
776
1021
  /\b(?:npm|pnpm|yarn|pip|git|docker|kubectl)\s+[A-Za-z0-9-]/i
777
1022
  ];
778
1023
  var SAFETY_GUARDRAIL_PATTERN = /\b(?:ask before|confirm|approval|dry[- ]run|sandbox|least privilege|redact|never expose|do not reveal)\b/i;
779
- function collectMatches(content, patterns) {
780
- const matches = [];
781
- for (const pattern of patterns) {
782
- if (pattern.regex.test(content)) {
783
- matches.push(pattern.label);
1024
+ function buildOccurrence(zone, pattern) {
1025
+ return {
1026
+ label: pattern.label,
1027
+ zoneType: zone.type,
1028
+ startLine: zone.startLine,
1029
+ endLine: zone.endLine
1030
+ };
1031
+ }
1032
+ function collectZoneAwareMatches(zones, patterns) {
1033
+ const flagged = [];
1034
+ const skipped = [];
1035
+ for (const zone of zones) {
1036
+ for (const pattern of patterns) {
1037
+ if (!pattern.regex.test(zone.content)) {
1038
+ continue;
1039
+ }
1040
+ const occurrence = buildOccurrence(zone, pattern);
1041
+ if (zone.type === "prose") {
1042
+ flagged.push(occurrence);
1043
+ } else {
1044
+ skipped.push(occurrence);
1045
+ }
1046
+ }
1047
+ }
1048
+ return { flagged, skipped };
1049
+ }
1050
+ function uniqueLabels2(matches) {
1051
+ const labels = [];
1052
+ const seen = /* @__PURE__ */ new Set();
1053
+ for (const match of matches) {
1054
+ if (seen.has(match.label)) {
1055
+ continue;
784
1056
  }
1057
+ seen.add(match.label);
1058
+ labels.push(match.label);
1059
+ }
1060
+ return labels;
1061
+ }
1062
+ function summarizeLineRange2(matches) {
1063
+ if (matches.length === 0) {
1064
+ return {};
1065
+ }
1066
+ return {
1067
+ startLine: Math.min(...matches.map((match) => match.startLine)),
1068
+ endLine: Math.max(...matches.map((match) => match.endLine))
1069
+ };
1070
+ }
1071
+ function buildSkippedPatterns2(matches) {
1072
+ if (matches.length === 0) {
1073
+ return void 0;
785
1074
  }
786
- return matches;
1075
+ return matches.map((match) => ({
1076
+ label: match.label,
1077
+ zoneType: match.zoneType,
1078
+ startLine: match.startLine,
1079
+ endLine: match.endLine
1080
+ }));
1081
+ }
1082
+ function isSuppressed(context, checkId) {
1083
+ return context.suppressedCheckIds.has(checkId);
1084
+ }
1085
+ function runZoneAwareSecurityCheck(context, zones, options) {
1086
+ if (isSuppressed(context, options.checkId)) {
1087
+ return null;
1088
+ }
1089
+ const matches = collectZoneAwareMatches(zones, options.patterns);
1090
+ const labels = uniqueLabels2(matches.flagged);
1091
+ const skippedPatterns = buildSkippedPatterns2(matches.skipped);
1092
+ if (labels.length > 0) {
1093
+ return {
1094
+ id: options.id,
1095
+ checkId: options.checkId,
1096
+ title: options.title,
1097
+ status: options.statusOnMatch,
1098
+ message: `${options.matchMessagePrefix}: ${labels.join(", ")}.`,
1099
+ suggestion: options.suggestion,
1100
+ ...summarizeLineRange2(matches.flagged),
1101
+ skippedPatterns
1102
+ };
1103
+ }
1104
+ return {
1105
+ id: options.id,
1106
+ checkId: options.checkId,
1107
+ title: options.title,
1108
+ status: "pass",
1109
+ message: options.passMessage,
1110
+ skippedPatterns
1111
+ };
787
1112
  }
788
1113
  function runSecurityChecks(context) {
789
1114
  const issues = [];
790
1115
  const skillText = context.skill.raw;
791
- const dangerousCommandHits = collectMatches(skillText, DANGEROUS_COMMAND_PATTERNS);
792
- if (dangerousCommandHits.length > 0) {
793
- issues.push({
794
- id: "security.dangerous-command-patterns",
795
- checkId: "security:dangerous-commands",
796
- title: "Dangerous Command Patterns",
797
- status: "fail",
798
- message: `Potentially dangerous command instruction patterns found: ${dangerousCommandHits.join(", ")}.`,
799
- suggestion: "Remove destructive/pipe-exec command examples or wrap them with explicit safety constraints."
800
- });
801
- } else {
802
- issues.push({
803
- id: "security.dangerous-command-patterns",
804
- checkId: "security:dangerous-commands",
805
- title: "Dangerous Command Patterns",
806
- status: "pass",
807
- message: "No high-risk destructive or direct pipe-to-shell patterns detected."
808
- });
809
- }
810
- const exfiltrationHits = collectMatches(skillText, EXFILTRATION_PATTERNS);
811
- if (exfiltrationHits.length > 0) {
812
- issues.push({
813
- id: "security.exfiltration-patterns",
814
- checkId: "security:exfiltration",
815
- title: "Sensitive Data Exfiltration",
816
- status: "fail",
817
- message: `Possible sensitive data exfiltration patterns found: ${exfiltrationHits.join(", ")}.`,
818
- suggestion: "Remove instructions that access or transmit secrets/credential files."
819
- });
820
- } else {
821
- issues.push({
822
- id: "security.exfiltration-patterns",
823
- checkId: "security:exfiltration",
824
- title: "Sensitive Data Exfiltration",
825
- status: "pass",
826
- message: "No obvious credential access/exfiltration instructions detected."
827
- });
828
- }
829
- const escalationHits = collectMatches(skillText, PRIVILEGE_ESCALATION_PATTERNS);
830
- if (escalationHits.length > 0) {
831
- issues.push({
832
- id: "security.privilege-escalation",
833
- checkId: "security:privilege-escalation",
834
- title: "Privilege Escalation Language",
835
- status: "warn",
836
- message: `Potentially risky privilege/execution language detected: ${escalationHits.join(", ")}.`,
837
- suggestion: "Prefer least-privilege execution and explicit approval steps for elevated commands."
838
- });
839
- } else {
840
- issues.push({
841
- id: "security.privilege-escalation",
842
- checkId: "security:privilege-escalation",
843
- title: "Privilege Escalation Language",
844
- status: "pass",
845
- message: "No obvious privilege-escalation language detected."
846
- });
1116
+ const needsZoneParsing = !isSuppressed(context, "security:dangerous-commands") || !isSuppressed(context, "security:exfiltration") || !isSuppressed(context, "security:privilege-escalation");
1117
+ const zones = needsZoneParsing ? parseZones(skillText) : [];
1118
+ const dangerousCommandsIssue = runZoneAwareSecurityCheck(context, zones, {
1119
+ id: "security.dangerous-command-patterns",
1120
+ checkId: "security:dangerous-commands",
1121
+ title: "Dangerous Command Patterns",
1122
+ statusOnMatch: "fail",
1123
+ patterns: DANGEROUS_COMMAND_PATTERNS,
1124
+ matchMessagePrefix: "Potentially dangerous command instruction patterns found",
1125
+ passMessage: "No high-risk destructive or direct pipe-to-shell patterns detected.",
1126
+ suggestion: "Remove destructive/pipe-exec command examples or wrap them with explicit safety constraints."
1127
+ });
1128
+ if (dangerousCommandsIssue) {
1129
+ issues.push(dangerousCommandsIssue);
1130
+ }
1131
+ const exfiltrationIssue = runZoneAwareSecurityCheck(context, zones, {
1132
+ id: "security.exfiltration-patterns",
1133
+ checkId: "security:exfiltration",
1134
+ title: "Sensitive Data Exfiltration",
1135
+ statusOnMatch: "fail",
1136
+ patterns: EXFILTRATION_PATTERNS,
1137
+ matchMessagePrefix: "Possible sensitive data exfiltration patterns found",
1138
+ passMessage: "No obvious credential access/exfiltration instructions detected.",
1139
+ suggestion: "Remove instructions that access or transmit secrets/credential files."
1140
+ });
1141
+ if (exfiltrationIssue) {
1142
+ issues.push(exfiltrationIssue);
1143
+ }
1144
+ const privilegeEscalationIssue = runZoneAwareSecurityCheck(context, zones, {
1145
+ id: "security.privilege-escalation",
1146
+ checkId: "security:privilege-escalation",
1147
+ title: "Privilege Escalation Language",
1148
+ statusOnMatch: "warn",
1149
+ patterns: PRIVILEGE_ESCALATION_PATTERNS,
1150
+ matchMessagePrefix: "Potentially risky privilege/execution language detected",
1151
+ passMessage: "No obvious privilege-escalation language detected.",
1152
+ suggestion: "Prefer least-privilege execution and explicit approval steps for elevated commands."
1153
+ });
1154
+ if (privilegeEscalationIssue) {
1155
+ issues.push(privilegeEscalationIssue);
847
1156
  }
848
- const hasShellActivity = SHELL_ACTIVITY_PATTERNS.some((pattern) => pattern.test(skillText));
849
- if (hasShellActivity && !SAFETY_GUARDRAIL_PATTERN.test(skillText)) {
850
- issues.push({
851
- id: "security.safety-guardrails",
852
- checkId: "security:missing-guardrails",
853
- title: "Execution Safety Guardrails",
854
- status: "warn",
855
- message: "Shell/tool execution is present, but no explicit safety guardrails were detected.",
856
- suggestion: "Add guidance such as approval requirements, dry-run mode, scope checks, and redaction rules."
857
- });
858
- } else {
859
- issues.push({
860
- id: "security.safety-guardrails",
861
- checkId: "security:missing-guardrails",
862
- title: "Execution Safety Guardrails",
863
- status: "pass",
864
- message: hasShellActivity ? "Shell/tool execution instructions include at least one safety guardrail." : "No shell/tool execution instructions detected."
865
- });
1157
+ if (!isSuppressed(context, "security:missing-guardrails")) {
1158
+ const hasShellActivity = SHELL_ACTIVITY_PATTERNS.some((pattern) => pattern.test(skillText));
1159
+ if (hasShellActivity && !SAFETY_GUARDRAIL_PATTERN.test(skillText)) {
1160
+ issues.push({
1161
+ id: "security.safety-guardrails",
1162
+ checkId: "security:missing-guardrails",
1163
+ title: "Execution Safety Guardrails",
1164
+ status: "warn",
1165
+ message: "Shell/tool execution is present, but no explicit safety guardrails were detected.",
1166
+ suggestion: "Add guidance such as approval requirements, dry-run mode, scope checks, and redaction rules."
1167
+ });
1168
+ } else {
1169
+ issues.push({
1170
+ id: "security.safety-guardrails",
1171
+ checkId: "security:missing-guardrails",
1172
+ title: "Execution Safety Guardrails",
1173
+ status: "pass",
1174
+ message: hasShellActivity ? "Shell/tool execution instructions include at least one safety guardrail." : "No shell/tool execution instructions detected."
1175
+ });
1176
+ }
866
1177
  }
867
1178
  return issues;
868
1179
  }
@@ -1038,9 +1349,11 @@ function lintFails(report, failOn) {
1038
1349
  async function runLinter(inputPath, options = {}) {
1039
1350
  const skill = await loadSkillFile(inputPath);
1040
1351
  const frontmatter = parseFrontmatter(skill.raw);
1352
+ const suppressedCheckIds = new Set(options.suppress ?? []);
1041
1353
  const context = {
1042
1354
  skill,
1043
- frontmatter
1355
+ frontmatter,
1356
+ suppressedCheckIds
1044
1357
  };
1045
1358
  const issues = [];
1046
1359
  issues.push(...runFrontmatterChecks(context));
@@ -1049,8 +1362,7 @@ async function runLinter(inputPath, options = {}) {
1049
1362
  issues.push(...runSecurityChecks(context));
1050
1363
  issues.push(...await runDisclosureChecks(context));
1051
1364
  issues.push(...runCompatibilityChecks(context));
1052
- const suppress = new Set(options.suppress ?? []);
1053
- const filteredIssues = issues.filter((issue) => !suppress.has(issue.checkId));
1365
+ const filteredIssues = issues.filter((issue) => !suppressedCheckIds.has(issue.checkId));
1054
1366
  return {
1055
1367
  target: inputPath,
1056
1368
  issues: filteredIssues,
@@ -1058,6 +1370,739 @@ async function runLinter(inputPath, options = {}) {
1058
1370
  };
1059
1371
  }
1060
1372
 
1373
+ // src/reporters/html.ts
1374
+ function escapeHtml(value) {
1375
+ return String(value ?? "").replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;");
1376
+ }
1377
+ function formatPercent(value) {
1378
+ return `${(value * 100).toFixed(1)}%`;
1379
+ }
1380
+ function formatLineRange(startLine, endLine) {
1381
+ if (startLine === void 0) {
1382
+ return null;
1383
+ }
1384
+ if (endLine === void 0 || endLine === startLine) {
1385
+ return `line ${startLine}`;
1386
+ }
1387
+ return `lines ${startLine}-${endLine}`;
1388
+ }
1389
+ function badgeLabel(status) {
1390
+ if (status === "pass") {
1391
+ return "PASS";
1392
+ }
1393
+ if (status === "warn") {
1394
+ return "WARN";
1395
+ }
1396
+ if (status === "fail") {
1397
+ return "FAIL";
1398
+ }
1399
+ return "SKIP";
1400
+ }
1401
+ function renderBadge(status) {
1402
+ return `<span class="badge ${status}">${badgeLabel(status)}</span>`;
1403
+ }
1404
+ function renderStatCards(stats) {
1405
+ return `<div class="stats-grid">${stats.map(
1406
+ (stat) => `
1407
+ <div class="stat-card${stat.status ? ` status-${stat.status}` : ""}">
1408
+ <div class="stat-label">${escapeHtml(stat.label)}</div>
1409
+ <div class="stat-value">${escapeHtml(stat.value)}</div>
1410
+ ${stat.note ? `<div class="stat-note">${escapeHtml(stat.note)}</div>` : ""}
1411
+ </div>
1412
+ `
1413
+ ).join("")}</div>`;
1414
+ }
1415
+ function renderMetaItems(items) {
1416
+ if (items.length === 0) {
1417
+ return "";
1418
+ }
1419
+ return `<div class="meta-grid">${items.map(
1420
+ (item) => `
1421
+ <div class="meta-item">
1422
+ <span class="meta-label">${escapeHtml(item.label)}</span>
1423
+ <span class="meta-value">${escapeHtml(item.value)}</span>
1424
+ </div>
1425
+ `
1426
+ ).join("")}</div>`;
1427
+ }
1428
+ function renderHeaderCard(commandName, heading, target, stats, metaItems) {
1429
+ return `
1430
+ <section class="card header-card">
1431
+ <div class="eyebrow">skilltest ${escapeHtml(commandName)}</div>
1432
+ <h1>${escapeHtml(heading)}</h1>
1433
+ <div class="target-line">target: ${escapeHtml(target)}</div>
1434
+ ${renderMetaItems(metaItems)}
1435
+ ${renderStatCards(stats)}
1436
+ </section>
1437
+ `;
1438
+ }
1439
+ function renderSectionCard(title, body) {
1440
+ return `
1441
+ <section class="card">
1442
+ <h2>${escapeHtml(title)}</h2>
1443
+ ${body}
1444
+ </section>
1445
+ `;
1446
+ }
1447
+ function renderMessageRow(status, title, message, details) {
1448
+ return `
1449
+ <div class="row">
1450
+ <div class="row-header">
1451
+ <div class="row-title">${escapeHtml(title)}</div>
1452
+ ${renderBadge(status)}
1453
+ </div>
1454
+ <div class="row-body">${escapeHtml(message)}</div>
1455
+ ${details ?? ""}
1456
+ </div>
1457
+ `;
1458
+ }
1459
+ function renderDetails(summary, content) {
1460
+ return `
1461
+ <details class="detail-block">
1462
+ <summary>${escapeHtml(summary)}</summary>
1463
+ <div class="detail-content">${content}</div>
1464
+ </details>
1465
+ `;
1466
+ }
1467
+ function renderPreBlock(content) {
1468
+ return `<pre>${escapeHtml(content)}</pre>`;
1469
+ }
1470
+ function renderDefinitionList(items) {
1471
+ return `<div class="definition-list">${items.map(
1472
+ (item) => `
1473
+ <div class="definition-item">
1474
+ <div class="definition-label">${escapeHtml(item.label)}</div>
1475
+ <div class="definition-value">${escapeHtml(item.value)}</div>
1476
+ </div>
1477
+ `
1478
+ ).join("")}</div>`;
1479
+ }
1480
+ function countSkippedSecurityPatterns(issues) {
1481
+ return issues.reduce((total, issue) => total + (issue.skippedPatterns?.length ?? 0), 0);
1482
+ }
1483
+ function renderLintIssueRow(issue) {
1484
+ const lineRange = formatLineRange(issue.startLine, issue.endLine);
1485
+ const detailBlocks = [];
1486
+ if (issue.suggestion) {
1487
+ detailBlocks.push(renderDetails("Suggestion", `<p>${escapeHtml(issue.suggestion)}</p>`));
1488
+ }
1489
+ if (issue.skippedPatterns && issue.skippedPatterns.length > 0) {
1490
+ const patternItems = issue.skippedPatterns.map(
1491
+ (pattern) => `
1492
+ <div class="definition-item">
1493
+ <div class="definition-label">${escapeHtml(pattern.label)}</div>
1494
+ <div class="definition-value">${escapeHtml(
1495
+ `${pattern.zoneType} lines ${pattern.startLine}-${pattern.endLine}`
1496
+ )}</div>
1497
+ </div>
1498
+ `
1499
+ ).join("");
1500
+ detailBlocks.push(renderDetails("Skipped security patterns", `<div class="definition-list">${patternItems}</div>`));
1501
+ }
1502
+ return `
1503
+ <div class="row">
1504
+ <div class="row-header">
1505
+ <div>
1506
+ <div class="row-title">${escapeHtml(issue.title)}</div>
1507
+ <div class="row-subtitle">${escapeHtml(issue.checkId)}</div>
1508
+ </div>
1509
+ ${renderBadge(issue.status)}
1510
+ </div>
1511
+ <div class="row-body">${escapeHtml(issue.message)}</div>
1512
+ ${renderDefinitionList(
1513
+ [
1514
+ lineRange ? { label: "Location", value: lineRange } : null,
1515
+ { label: "Check ID", value: issue.checkId }
1516
+ ].filter((item) => item !== null)
1517
+ )}
1518
+ ${detailBlocks.join("")}
1519
+ </div>
1520
+ `;
1521
+ }
1522
+ function renderLintIssueList(report) {
1523
+ const skippedSecurityPatterns = countSkippedSecurityPatterns(report.issues);
1524
+ const rows = report.issues.map((issue) => renderLintIssueRow(issue)).join("");
1525
+ const info = skippedSecurityPatterns > 0 ? `<p class="info-line">Skipped security patterns in examples/comments: ${escapeHtml(skippedSecurityPatterns)}</p>` : "";
1526
+ return `<div class="row-list">${rows}</div>${info}`;
1527
+ }
1528
+ function renderTriggerCaseRow(testCase) {
1529
+ const details = testCase.rawModelResponse ? renderDetails("Model response", renderPreBlock(testCase.rawModelResponse)) : "";
1530
+ return `
1531
+ <div class="row">
1532
+ <div class="row-header">
1533
+ <div>
1534
+ <div class="row-title">${escapeHtml(testCase.query)}</div>
1535
+ <div class="row-subtitle">${escapeHtml(
1536
+ `expected=${testCase.expected} actual=${testCase.actual} should_trigger=${String(testCase.shouldTrigger)}`
1537
+ )}</div>
1538
+ </div>
1539
+ ${renderBadge(testCase.matched ? "pass" : "fail")}
1540
+ </div>
1541
+ ${renderDefinitionList([
1542
+ { label: "Expected", value: testCase.expected },
1543
+ { label: "Actual", value: testCase.actual }
1544
+ ])}
1545
+ ${details}
1546
+ </div>
1547
+ `;
1548
+ }
1549
+ function promptStatus(promptResult) {
1550
+ if (promptResult.totalAssertions === 0) {
1551
+ return "skip";
1552
+ }
1553
+ if (promptResult.passedAssertions === promptResult.totalAssertions) {
1554
+ return "pass";
1555
+ }
1556
+ if (promptResult.passedAssertions === 0) {
1557
+ return "fail";
1558
+ }
1559
+ return "warn";
1560
+ }
1561
+ function renderAssertionRow(assertion) {
1562
+ return renderDetails(
1563
+ `${badgeLabel(assertion.passed ? "pass" : "fail")} ${assertion.assertion}`,
1564
+ renderPreBlock(assertion.evidence)
1565
+ );
1566
+ }
1567
+ function renderEvalPromptRow(promptResult) {
1568
+ const assertionDetails = promptResult.assertions.map((assertion) => renderAssertionRow(assertion)).join("");
1569
+ const responseDetails = renderDetails("Full model response", renderPreBlock(promptResult.response));
1570
+ return `
1571
+ <div class="row">
1572
+ <div class="row-header">
1573
+ <div>
1574
+ <div class="row-title">${escapeHtml(promptResult.prompt)}</div>
1575
+ <div class="row-subtitle">${escapeHtml(
1576
+ `${promptResult.passedAssertions}/${promptResult.totalAssertions} assertions passed`
1577
+ )}</div>
1578
+ </div>
1579
+ ${renderBadge(promptStatus(promptResult))}
1580
+ </div>
1581
+ <div class="row-body">${escapeHtml(promptResult.responseSummary)}</div>
1582
+ ${renderDefinitionList([
1583
+ { label: "Passed assertions", value: String(promptResult.passedAssertions) },
1584
+ { label: "Total assertions", value: String(promptResult.totalAssertions) }
1585
+ ])}
1586
+ ${renderDetails("Assertion evidence", assertionDetails || `<p>No assertions.</p>`)}
1587
+ ${responseDetails}
1588
+ </div>
1589
+ `;
1590
+ }
1591
+ function gateStatus(value) {
1592
+ if (value === null) {
1593
+ return "skip";
1594
+ }
1595
+ return value ? "pass" : "fail";
1596
+ }
1597
+ function renderGateCard(title, status, message) {
1598
+ return `
1599
+ <div class="gate-card">
1600
+ <div class="row-header">
1601
+ <div class="row-title">${escapeHtml(title)}</div>
1602
+ ${renderBadge(status)}
1603
+ </div>
1604
+ <div class="row-body">${escapeHtml(message)}</div>
1605
+ </div>
1606
+ `;
1607
+ }
1608
+ function renderCollapsibleSection(title, summary, body, status) {
1609
+ return `
1610
+ <details class="section-card" open>
1611
+ <summary>
1612
+ <span class="section-title">${escapeHtml(title)}</span>
1613
+ <span class="section-summary">${renderBadge(status)} ${escapeHtml(summary)}</span>
1614
+ </summary>
1615
+ <div class="section-body">${body}</div>
1616
+ </details>
1617
+ `;
1618
+ }
1619
+ function resolveOptionalTarget(result, fallback) {
1620
+ return result.target ?? fallback;
1621
+ }
1622
+ function renderHtmlDocument(title, body) {
1623
+ return `<!DOCTYPE html>
1624
+ <html lang="en">
1625
+ <head>
1626
+ <meta charset="utf-8">
1627
+ <meta name="viewport" content="width=device-width, initial-scale=1">
1628
+ <title>${escapeHtml(title)}</title>
1629
+ <style>
1630
+ :root {
1631
+ color-scheme: light;
1632
+ --bg: #f5f5f5;
1633
+ --surface: #ffffff;
1634
+ --surface-muted: #fafafa;
1635
+ --border: #d4d4d8;
1636
+ --text: #111827;
1637
+ --muted: #6b7280;
1638
+ --pass: #22c55e;
1639
+ --warn: #eab308;
1640
+ --fail: #ef4444;
1641
+ --skip: #6b7280;
1642
+ --shadow: 0 10px 30px rgba(15, 23, 42, 0.08);
1643
+ }
1644
+
1645
+ * {
1646
+ box-sizing: border-box;
1647
+ }
1648
+
1649
+ body {
1650
+ margin: 0;
1651
+ background: linear-gradient(180deg, #fafafa 0%, #f4f4f5 100%);
1652
+ color: var(--text);
1653
+ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
1654
+ line-height: 1.5;
1655
+ }
1656
+
1657
+ .container {
1658
+ max-width: 1120px;
1659
+ margin: 0 auto;
1660
+ padding: 24px 16px 40px;
1661
+ }
1662
+
1663
+ .card,
1664
+ .section-card {
1665
+ background: var(--surface);
1666
+ border: 1px solid var(--border);
1667
+ border-radius: 16px;
1668
+ box-shadow: var(--shadow);
1669
+ margin-bottom: 16px;
1670
+ }
1671
+
1672
+ .card {
1673
+ padding: 20px;
1674
+ }
1675
+
1676
+ .header-card h1,
1677
+ .card h2 {
1678
+ margin: 0 0 10px;
1679
+ font-size: 1.25rem;
1680
+ }
1681
+
1682
+ .eyebrow {
1683
+ margin-bottom: 10px;
1684
+ color: var(--muted);
1685
+ font-size: 0.78rem;
1686
+ letter-spacing: 0.08em;
1687
+ text-transform: uppercase;
1688
+ }
1689
+
1690
+ .target-line,
1691
+ .info-line {
1692
+ color: var(--muted);
1693
+ overflow-wrap: anywhere;
1694
+ }
1695
+
1696
+ .meta-grid,
1697
+ .stats-grid,
1698
+ .gate-grid,
1699
+ .definition-list {
1700
+ display: grid;
1701
+ gap: 12px;
1702
+ }
1703
+
1704
+ .meta-grid,
1705
+ .gate-grid,
1706
+ .definition-list {
1707
+ grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
1708
+ }
1709
+
1710
+ .stats-grid {
1711
+ grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
1712
+ margin-top: 16px;
1713
+ }
1714
+
1715
+ .meta-grid {
1716
+ margin-top: 14px;
1717
+ }
1718
+
1719
+ .meta-item,
1720
+ .definition-item,
1721
+ .stat-card,
1722
+ .gate-card {
1723
+ background: var(--surface-muted);
1724
+ border: 1px solid var(--border);
1725
+ border-radius: 12px;
1726
+ padding: 12px;
1727
+ }
1728
+
1729
+ .meta-item,
1730
+ .definition-item {
1731
+ display: flex;
1732
+ justify-content: space-between;
1733
+ gap: 12px;
1734
+ }
1735
+
1736
+ .meta-label,
1737
+ .definition-label,
1738
+ .stat-label {
1739
+ color: var(--muted);
1740
+ font-size: 0.82rem;
1741
+ }
1742
+
1743
+ .meta-value,
1744
+ .definition-value {
1745
+ text-align: right;
1746
+ overflow-wrap: anywhere;
1747
+ }
1748
+
1749
+ .stat-value {
1750
+ margin-top: 4px;
1751
+ font-size: 1.3rem;
1752
+ font-weight: 700;
1753
+ }
1754
+
1755
+ .stat-note {
1756
+ margin-top: 6px;
1757
+ color: var(--muted);
1758
+ font-size: 0.82rem;
1759
+ }
1760
+
1761
+ .status-pass {
1762
+ border-color: rgba(34, 197, 94, 0.35);
1763
+ }
1764
+
1765
+ .status-warn {
1766
+ border-color: rgba(234, 179, 8, 0.35);
1767
+ }
1768
+
1769
+ .status-fail {
1770
+ border-color: rgba(239, 68, 68, 0.35);
1771
+ }
1772
+
1773
+ .status-skip {
1774
+ border-color: rgba(107, 114, 128, 0.35);
1775
+ }
1776
+
1777
+ .row-list {
1778
+ display: grid;
1779
+ gap: 12px;
1780
+ }
1781
+
1782
+ .row {
1783
+ border: 1px solid var(--border);
1784
+ border-radius: 12px;
1785
+ padding: 14px;
1786
+ background: var(--surface-muted);
1787
+ }
1788
+
1789
+ .row-header {
1790
+ display: flex;
1791
+ justify-content: space-between;
1792
+ align-items: flex-start;
1793
+ gap: 12px;
1794
+ }
1795
+
1796
+ .row-title {
1797
+ font-weight: 700;
1798
+ overflow-wrap: anywhere;
1799
+ }
1800
+
1801
+ .row-subtitle {
1802
+ margin-top: 4px;
1803
+ color: var(--muted);
1804
+ font-size: 0.84rem;
1805
+ overflow-wrap: anywhere;
1806
+ }
1807
+
1808
+ .row-body {
1809
+ margin-top: 10px;
1810
+ overflow-wrap: anywhere;
1811
+ }
1812
+
1813
+ .badge {
1814
+ display: inline-flex;
1815
+ align-items: center;
1816
+ justify-content: center;
1817
+ min-width: 58px;
1818
+ padding: 3px 10px;
1819
+ border-radius: 999px;
1820
+ border: 1px solid currentColor;
1821
+ font-size: 0.76rem;
1822
+ font-weight: 700;
1823
+ letter-spacing: 0.04em;
1824
+ white-space: nowrap;
1825
+ }
1826
+
1827
+ .badge.pass {
1828
+ color: #15803d;
1829
+ background: rgba(34, 197, 94, 0.14);
1830
+ }
1831
+
1832
+ .badge.warn {
1833
+ color: #a16207;
1834
+ background: rgba(234, 179, 8, 0.18);
1835
+ }
1836
+
1837
+ .badge.fail {
1838
+ color: #b91c1c;
1839
+ background: rgba(239, 68, 68, 0.14);
1840
+ }
1841
+
1842
+ .badge.skip {
1843
+ color: #4b5563;
1844
+ background: rgba(107, 114, 128, 0.14);
1845
+ }
1846
+
1847
+ details {
1848
+ margin-top: 10px;
1849
+ }
1850
+
1851
+ details summary {
1852
+ cursor: pointer;
1853
+ color: var(--muted);
1854
+ }
1855
+
1856
+ .detail-block {
1857
+ border-top: 1px dashed var(--border);
1858
+ padding-top: 10px;
1859
+ }
1860
+
1861
+ .detail-content p {
1862
+ margin: 0;
1863
+ }
1864
+
1865
+ .section-card summary {
1866
+ display: flex;
1867
+ justify-content: space-between;
1868
+ align-items: center;
1869
+ gap: 12px;
1870
+ padding: 18px 20px;
1871
+ list-style: none;
1872
+ }
1873
+
1874
+ .section-card summary::-webkit-details-marker {
1875
+ display: none;
1876
+ }
1877
+
1878
+ .section-title {
1879
+ font-size: 1rem;
1880
+ font-weight: 700;
1881
+ color: var(--text);
1882
+ }
1883
+
1884
+ .section-summary {
1885
+ display: inline-flex;
1886
+ align-items: center;
1887
+ gap: 8px;
1888
+ color: var(--muted);
1889
+ text-align: right;
1890
+ }
1891
+
1892
+ .section-body {
1893
+ padding: 0 20px 20px;
1894
+ }
1895
+
1896
+ .gate-grid {
1897
+ margin-top: 12px;
1898
+ }
1899
+
1900
+ pre {
1901
+ margin: 0;
1902
+ padding: 12px;
1903
+ background: #f8fafc;
1904
+ border: 1px solid var(--border);
1905
+ border-radius: 10px;
1906
+ white-space: pre-wrap;
1907
+ word-break: break-word;
1908
+ overflow-wrap: anywhere;
1909
+ }
1910
+
1911
+ ul {
1912
+ margin: 0;
1913
+ padding-left: 20px;
1914
+ }
1915
+
1916
+ @media (max-width: 720px) {
1917
+ .container {
1918
+ padding: 16px 12px 28px;
1919
+ }
1920
+
1921
+ .row-header,
1922
+ .section-card summary,
1923
+ .meta-item,
1924
+ .definition-item {
1925
+ flex-direction: column;
1926
+ align-items: flex-start;
1927
+ }
1928
+
1929
+ .meta-value,
1930
+ .definition-value,
1931
+ .section-summary {
1932
+ text-align: left;
1933
+ }
1934
+ }
1935
+ </style>
1936
+ </head>
1937
+ <body>
1938
+ <main class="container">
1939
+ ${body}
1940
+ </main>
1941
+ </body>
1942
+ </html>`;
1943
+ }
1944
+ function renderLintHtml(report) {
1945
+ const passRate = report.summary.total === 0 ? 0 : report.summary.passed / report.summary.total;
1946
+ const body = [
1947
+ renderHeaderCard(
1948
+ "lint",
1949
+ "Static Analysis Report",
1950
+ report.target,
1951
+ [
1952
+ { label: "Pass rate", value: formatPercent(passRate), note: `${report.summary.passed}/${report.summary.total} passed` },
1953
+ { label: "Warnings", value: String(report.summary.warnings), status: report.summary.warnings > 0 ? "warn" : "pass" },
1954
+ { label: "Failures", value: String(report.summary.failures), status: report.summary.failures > 0 ? "fail" : "pass" },
1955
+ { label: "Checks", value: String(report.summary.total) }
1956
+ ],
1957
+ [{ label: "Target", value: report.target }]
1958
+ ),
1959
+ renderSectionCard("Lint Issues", renderLintIssueList(report))
1960
+ ].join("");
1961
+ return renderHtmlDocument(`skilltest lint - ${report.target}`, body);
1962
+ }
1963
+ function renderTriggerHtml(result) {
1964
+ const htmlResult = result;
1965
+ const target = resolveOptionalTarget(htmlResult, result.skillName);
1966
+ const matchedCount = result.cases.filter((testCase) => testCase.matched).length;
1967
+ const matchRate = result.cases.length === 0 ? 0 : matchedCount / result.cases.length;
1968
+ const body = [
1969
+ renderHeaderCard(
1970
+ "trigger",
1971
+ result.skillName,
1972
+ target,
1973
+ [
1974
+ { label: "Match rate", value: formatPercent(matchRate), note: `${matchedCount}/${result.cases.length} matched` },
1975
+ { label: "Precision", value: formatPercent(result.metrics.precision) },
1976
+ { label: "Recall", value: formatPercent(result.metrics.recall) },
1977
+ { label: "F1", value: formatPercent(result.metrics.f1), status: result.metrics.f1 >= 0.8 ? "pass" : "warn" }
1978
+ ],
1979
+ [
1980
+ { label: "Provider", value: result.provider },
1981
+ { label: "Model", value: result.model },
1982
+ { label: "Seed", value: result.seed !== void 0 ? String(result.seed) : "none" },
1983
+ { label: "Queries", value: String(result.queries.length) }
1984
+ ]
1985
+ ),
1986
+ renderSectionCard("Trigger Cases", `<div class="row-list">${result.cases.map((testCase) => renderTriggerCaseRow(testCase)).join("")}</div>`),
1987
+ renderSectionCard(
1988
+ "Suggestions",
1989
+ `<ul>${result.suggestions.map((suggestion) => `<li>${escapeHtml(suggestion)}</li>`).join("")}</ul>`
1990
+ )
1991
+ ].join("");
1992
+ return renderHtmlDocument(`skilltest trigger - ${result.skillName}`, body);
1993
+ }
1994
+ function renderEvalHtml(result) {
1995
+ const htmlResult = result;
1996
+ const target = resolveOptionalTarget(htmlResult, result.skillName);
1997
+ const passRate = result.summary.totalAssertions === 0 ? 0 : result.summary.passedAssertions / result.summary.totalAssertions;
1998
+ const body = [
1999
+ renderHeaderCard(
2000
+ "eval",
2001
+ result.skillName,
2002
+ target,
2003
+ [
2004
+ {
2005
+ label: "Assertion pass rate",
2006
+ value: formatPercent(passRate),
2007
+ note: `${result.summary.passedAssertions}/${result.summary.totalAssertions} passed`
2008
+ },
2009
+ { label: "Prompts", value: String(result.summary.totalPrompts) },
2010
+ { label: "Model", value: result.model },
2011
+ { label: "Grader", value: result.graderModel }
2012
+ ],
2013
+ [
2014
+ { label: "Provider", value: result.provider },
2015
+ { label: "Execution model", value: result.model },
2016
+ { label: "Grader model", value: result.graderModel },
2017
+ { label: "Prompts", value: String(result.prompts.length) }
2018
+ ]
2019
+ ),
2020
+ renderSectionCard("Eval Prompts", `<div class="row-list">${result.results.map((promptResult) => renderEvalPromptRow(promptResult)).join("")}</div>`)
2021
+ ].join("");
2022
+ return renderHtmlDocument(`skilltest eval - ${result.skillName}`, body);
2023
+ }
2024
+ function renderCheckHtml(result) {
2025
+ const skillName = result.trigger?.skillName ?? result.eval?.skillName ?? result.target;
2026
+ const triggerBody = result.trigger ? `<div class="row-list">${result.trigger.cases.map((testCase) => renderTriggerCaseRow(testCase)).join("")}</div>
2027
+ <div class="card" style="margin-top: 16px;">
2028
+ <h2>Trigger Suggestions</h2>
2029
+ <ul>${result.trigger.suggestions.map((suggestion) => `<li>${escapeHtml(suggestion)}</li>`).join("")}</ul>
2030
+ </div>` : renderMessageRow("skip", "Trigger skipped", result.triggerSkippedReason ?? "Skipped.");
2031
+ const evalBody = result.eval ? `<div class="row-list">${result.eval.results.map((promptResult) => renderEvalPromptRow(promptResult)).join("")}</div>` : renderMessageRow("skip", "Eval skipped", result.evalSkippedReason ?? "Skipped.");
2032
+ const lintStatus = result.gates.lintPassed ? "pass" : "fail";
2033
+ const triggerStatus = gateStatus(result.gates.triggerPassed);
2034
+ const evalStatus = gateStatus(result.gates.evalPassed);
2035
+ const overallStatus = result.gates.overallPassed ? "pass" : "fail";
2036
+ const header = renderHeaderCard(
2037
+ "check",
2038
+ skillName,
2039
+ result.target,
2040
+ [
2041
+ { label: "Overall gate", value: badgeLabel(overallStatus), status: overallStatus },
2042
+ {
2043
+ label: "Trigger F1",
2044
+ value: result.gates.triggerF1 !== null ? formatPercent(result.gates.triggerF1) : "skipped",
2045
+ status: triggerStatus
2046
+ },
2047
+ {
2048
+ label: "Eval pass rate",
2049
+ value: result.gates.evalAssertPassRate !== null ? formatPercent(result.gates.evalAssertPassRate) : "skipped",
2050
+ status: evalStatus
2051
+ },
2052
+ {
2053
+ label: "Lint result",
2054
+ value: `${result.lint.summary.failures} fail / ${result.lint.summary.warnings} warn`,
2055
+ status: lintStatus
2056
+ }
2057
+ ],
2058
+ [
2059
+ { label: "Provider", value: result.provider },
2060
+ { label: "Model", value: result.model },
2061
+ { label: "Grader model", value: result.graderModel },
2062
+ {
2063
+ label: "Thresholds",
2064
+ value: `min-f1=${result.thresholds.minF1.toFixed(2)} min-assert-pass-rate=${result.thresholds.minAssertPassRate.toFixed(2)}`
2065
+ }
2066
+ ]
2067
+ );
2068
+ const lintSection = renderCollapsibleSection(
2069
+ "Lint",
2070
+ `${result.lint.summary.passed}/${result.lint.summary.total} passed, ${result.lint.summary.warnings} warnings, ${result.lint.summary.failures} failures`,
2071
+ renderLintIssueList(result.lint),
2072
+ lintStatus
2073
+ );
2074
+ const triggerSection = renderCollapsibleSection(
2075
+ "Trigger",
2076
+ result.trigger ? `f1=${formatPercent(result.trigger.metrics.f1)} precision=${formatPercent(result.trigger.metrics.precision)} recall=${formatPercent(result.trigger.metrics.recall)}` : result.triggerSkippedReason ?? "Skipped.",
2077
+ triggerBody,
2078
+ triggerStatus
2079
+ );
2080
+ const evalSection = renderCollapsibleSection(
2081
+ "Eval",
2082
+ result.eval ? `assertion pass rate=${formatPercent(result.gates.evalAssertPassRate ?? 0)} (${result.eval.summary.passedAssertions}/${result.eval.summary.totalAssertions})` : result.evalSkippedReason ?? "Skipped.",
2083
+ evalBody,
2084
+ evalStatus
2085
+ );
2086
+ const qualityGate = renderSectionCard(
2087
+ "Quality Gate",
2088
+ `<div class="gate-grid">
2089
+ ${renderGateCard("Lint gate", lintStatus, result.gates.lintPassed ? "Lint passed." : "Lint failed.")}
2090
+ ${renderGateCard(
2091
+ "Trigger gate",
2092
+ triggerStatus,
2093
+ result.gates.triggerPassed === null ? result.triggerSkippedReason ?? "Skipped." : `required ${result.thresholds.minF1.toFixed(2)}, actual ${result.gates.triggerF1?.toFixed(2) ?? "n/a"}`
2094
+ )}
2095
+ ${renderGateCard(
2096
+ "Eval gate",
2097
+ evalStatus,
2098
+ result.gates.evalPassed === null ? result.evalSkippedReason ?? "Skipped." : `required ${result.thresholds.minAssertPassRate.toFixed(2)}, actual ${result.gates.evalAssertPassRate?.toFixed(2) ?? "n/a"}`
2099
+ )}
2100
+ ${renderGateCard("Overall", overallStatus, result.gates.overallPassed ? "All quality gates passed." : "One or more gates failed.")}
2101
+ </div>`
2102
+ );
2103
+ return renderHtmlDocument(`skilltest check - ${skillName}`, [header, lintSection, triggerSection, evalSection, qualityGate].join(""));
2104
+ }
2105
+
1061
2106
  // src/reporters/terminal.ts
1062
2107
  import { Chalk } from "chalk";
1063
2108
  function getChalkInstance(enableColor) {
@@ -1070,6 +2115,14 @@ function renderIssueLine(issue, c) {
1070
2115
  return ` ${label} ${issue.title}
1071
2116
  ${issue.message}${detail}`;
1072
2117
  }
2118
+ function countSkippedSecurityPatterns2(issues) {
2119
+ return issues.reduce((total, issue) => {
2120
+ if (!issue.checkId.startsWith("security:")) {
2121
+ return total;
2122
+ }
2123
+ return total + (issue.skippedPatterns?.length ?? 0);
2124
+ }, 0);
2125
+ }
1073
2126
  function renderLintReport(report, enableColor) {
1074
2127
  const c = getChalkInstance(enableColor);
1075
2128
  const { passed, warnings, failures, total } = report.summary;
@@ -1082,10 +2135,13 @@ function renderLintReport(report, enableColor) {
1082
2135
  `\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518`
1083
2136
  ];
1084
2137
  const renderedIssues = report.issues.map((issue) => renderIssueLine(issue, c)).join("\n");
2138
+ const skippedSecurityPatterns = countSkippedSecurityPatterns2(report.issues);
2139
+ const infoLine = skippedSecurityPatterns > 0 ? `
2140
+ ${c.cyan("\u2139")} ${skippedSecurityPatterns} security pattern(s) found in code examples/comments (not flagged)` : "";
1085
2141
  return `${headerLines.join("\n")}
1086
- ${renderedIssues}`;
2142
+ ${renderedIssues}${infoLine}`;
1087
2143
  }
1088
- function formatPercent(value) {
2144
+ function formatPercent2(value) {
1089
2145
  return `${(value * 100).toFixed(1)}%`;
1090
2146
  }
1091
2147
  function renderTriggerReport(result, enableColor, verbose) {
@@ -1097,7 +2153,7 @@ function renderTriggerReport(result, enableColor, verbose) {
1097
2153
  lines.push(`\u2502 skill: ${result.skillName}`);
1098
2154
  lines.push(`\u2502 provider/model: ${result.provider}/${result.model}`);
1099
2155
  lines.push(
1100
- `\u2502 precision: ${formatPercent(result.metrics.precision)} recall: ${formatPercent(result.metrics.recall)} f1: ${formatPercent(result.metrics.f1)}`
2156
+ `\u2502 precision: ${formatPercent2(result.metrics.precision)} recall: ${formatPercent2(result.metrics.recall)} f1: ${formatPercent2(result.metrics.f1)}`
1101
2157
  );
1102
2158
  lines.push(
1103
2159
  `\u2502 TP ${result.metrics.truePositives} TN ${result.metrics.trueNegatives} FP ${result.metrics.falsePositives} FN ${result.metrics.falseNegatives}`
@@ -1171,11 +2227,15 @@ function renderCheckReport(result, enableColor, verbose) {
1171
2227
  for (const issue of lintIssues) {
1172
2228
  lines.push(renderIssueLine(issue, c));
1173
2229
  }
2230
+ const skippedSecurityPatterns = countSkippedSecurityPatterns2(result.lint.issues);
2231
+ if (skippedSecurityPatterns > 0) {
2232
+ lines.push(` ${c.cyan("\u2139")} ${skippedSecurityPatterns} security pattern(s) found in code examples/comments (not flagged)`);
2233
+ }
1174
2234
  lines.push("");
1175
2235
  lines.push("Trigger");
1176
2236
  if (result.trigger) {
1177
2237
  lines.push(
1178
- `- ${triggerGate} f1=${formatPercent(result.trigger.metrics.f1)} (precision=${formatPercent(result.trigger.metrics.precision)} recall=${formatPercent(result.trigger.metrics.recall)})`
2238
+ `- ${triggerGate} f1=${formatPercent2(result.trigger.metrics.f1)} (precision=${formatPercent2(result.trigger.metrics.precision)} recall=${formatPercent2(result.trigger.metrics.recall)})`
1179
2239
  );
1180
2240
  lines.push(
1181
2241
  ` TP ${result.trigger.metrics.truePositives} TN ${result.trigger.metrics.trueNegatives} FP ${result.trigger.metrics.falsePositives} FN ${result.trigger.metrics.falseNegatives}`
@@ -1194,7 +2254,7 @@ function renderCheckReport(result, enableColor, verbose) {
1194
2254
  if (result.eval) {
1195
2255
  const passRate = result.gates.evalAssertPassRate ?? 0;
1196
2256
  lines.push(
1197
- `- ${evalGate} assertion pass rate=${formatPercent(passRate)} (${result.eval.summary.passedAssertions}/${result.eval.summary.totalAssertions})`
2257
+ `- ${evalGate} assertion pass rate=${formatPercent2(passRate)} (${result.eval.summary.passedAssertions}/${result.eval.summary.totalAssertions})`
1198
2258
  );
1199
2259
  for (const promptResult of result.eval.results) {
1200
2260
  const failedAssertions = promptResult.assertions.filter((assertion) => !assertion.passed);
@@ -1286,6 +2346,58 @@ async function gradeResponse(options) {
1286
2346
  return parsed.data.assertions;
1287
2347
  }
1288
2348
 
2349
+ // src/utils/concurrency.ts
2350
+ async function pMap(items, fn, concurrency) {
2351
+ if (!Number.isInteger(concurrency) || concurrency < 1) {
2352
+ throw new Error("pMap concurrency must be an integer greater than or equal to 1.");
2353
+ }
2354
+ if (items.length === 0) {
2355
+ return [];
2356
+ }
2357
+ const results = new Array(items.length);
2358
+ return new Promise((resolve, reject) => {
2359
+ let nextIndex = 0;
2360
+ let completed = 0;
2361
+ let rejected = false;
2362
+ const launchNext = () => {
2363
+ if (rejected) {
2364
+ return;
2365
+ }
2366
+ if (completed === items.length) {
2367
+ resolve(results);
2368
+ return;
2369
+ }
2370
+ if (nextIndex >= items.length) {
2371
+ return;
2372
+ }
2373
+ const currentIndex = nextIndex;
2374
+ nextIndex += 1;
2375
+ Promise.resolve().then(() => fn(items[currentIndex], currentIndex)).then((result) => {
2376
+ if (rejected) {
2377
+ return;
2378
+ }
2379
+ results[currentIndex] = result;
2380
+ completed += 1;
2381
+ if (completed === items.length) {
2382
+ resolve(results);
2383
+ return;
2384
+ }
2385
+ launchNext();
2386
+ }).catch((error) => {
2387
+ if (rejected) {
2388
+ return;
2389
+ }
2390
+ rejected = true;
2391
+ reject(error);
2392
+ });
2393
+ };
2394
+ const initialWorkers = Math.min(concurrency, items.length);
2395
+ for (let workerIndex = 0; workerIndex < initialWorkers; workerIndex += 1) {
2396
+ launchNext();
2397
+ }
2398
+ });
2399
+ }
2400
+
1289
2401
  // src/core/eval-runner.ts
1290
2402
  var evalPromptSchema = z3.object({
1291
2403
  prompt: z3.string().min(1),
@@ -1332,34 +2444,37 @@ async function generatePrompts(skill, provider, model, count) {
1332
2444
  }
1333
2445
  async function runEval(skill, options) {
1334
2446
  const prompts = options.prompts && options.prompts.length > 0 ? evalPromptArraySchema.parse(options.prompts) : await generatePrompts(skill, options.provider, options.model, options.numRuns);
1335
- const results = [];
1336
- for (const evalPrompt of prompts) {
1337
- const systemPrompt = [
1338
- "You are an AI assistant with an activated skill.",
1339
- "Follow this SKILL.md content exactly where applicable.",
1340
- "",
1341
- skill.raw
1342
- ].join("\n");
1343
- const response = await options.provider.sendMessage(systemPrompt, evalPrompt.prompt, { model: options.model });
1344
- const gradedAssertions = await gradeResponse({
1345
- provider: options.provider,
1346
- model: options.graderModel,
1347
- skillName: skill.frontmatter.name,
1348
- skillBody: skill.content,
1349
- userPrompt: evalPrompt.prompt,
1350
- modelResponse: response,
1351
- assertions: evalPrompt.assertions
1352
- });
1353
- const passedAssertions2 = gradedAssertions.filter((assertion) => assertion.passed).length;
1354
- results.push({
1355
- prompt: evalPrompt.prompt,
1356
- assertions: gradedAssertions,
1357
- responseSummary: response.slice(0, 200),
1358
- response,
1359
- passedAssertions: passedAssertions2,
1360
- totalAssertions: gradedAssertions.length
1361
- });
1362
- }
2447
+ const systemPrompt = [
2448
+ "You are an AI assistant with an activated skill.",
2449
+ "Follow this SKILL.md content exactly where applicable.",
2450
+ "",
2451
+ skill.raw
2452
+ ].join("\n");
2453
+ const results = await pMap(
2454
+ prompts,
2455
+ async (evalPrompt) => {
2456
+ const response = await options.provider.sendMessage(systemPrompt, evalPrompt.prompt, { model: options.model });
2457
+ const gradedAssertions = await gradeResponse({
2458
+ provider: options.provider,
2459
+ model: options.graderModel,
2460
+ skillName: skill.frontmatter.name,
2461
+ skillBody: skill.content,
2462
+ userPrompt: evalPrompt.prompt,
2463
+ modelResponse: response,
2464
+ assertions: evalPrompt.assertions
2465
+ });
2466
+ const passedAssertions2 = gradedAssertions.filter((assertion) => assertion.passed).length;
2467
+ return {
2468
+ prompt: evalPrompt.prompt,
2469
+ assertions: gradedAssertions,
2470
+ responseSummary: response.slice(0, 200),
2471
+ response,
2472
+ passedAssertions: passedAssertions2,
2473
+ totalAssertions: gradedAssertions.length
2474
+ };
2475
+ },
2476
+ options.concurrency ?? 5
2477
+ );
1363
2478
  const totalAssertions = results.reduce((total, result) => total + result.totalAssertions, 0);
1364
2479
  const passedAssertions = results.reduce((total, result) => total + result.passedAssertions, 0);
1365
2480
  return {
@@ -1401,23 +2516,28 @@ var FAKE_SKILLS = [
1401
2516
  { name: "test-generator", description: "Generates unit and integration test cases from feature requirements." },
1402
2517
  { name: "prompt-tuner", description: "Improves prompts for reliability, formatting, and failure handling." }
1403
2518
  ];
1404
- function createSeededRandom(seed) {
1405
- let state = seed >>> 0;
2519
+ function mulberry32(seed) {
1406
2520
  return () => {
1407
- state = state * 1664525 + 1013904223 >>> 0;
1408
- return state / 4294967296;
2521
+ seed |= 0;
2522
+ seed = seed + 1831565813 | 0;
2523
+ let t = Math.imul(seed ^ seed >>> 15, 1 | seed);
2524
+ t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t;
2525
+ return ((t ^ t >>> 14) >>> 0) / 4294967296;
1409
2526
  };
1410
2527
  }
1411
- function shuffle(values, random = Math.random) {
2528
+ function createRng(seed) {
2529
+ return seed !== void 0 ? mulberry32(seed) : Math.random;
2530
+ }
2531
+ function shuffle(values, rng) {
1412
2532
  const copy = [...values];
1413
2533
  for (let index = copy.length - 1; index > 0; index -= 1) {
1414
- const swapIndex = Math.floor(random() * (index + 1));
2534
+ const swapIndex = Math.floor(rng() * (index + 1));
1415
2535
  [copy[index], copy[swapIndex]] = [copy[swapIndex], copy[index]];
1416
2536
  }
1417
2537
  return copy;
1418
2538
  }
1419
- function sample(values, count, random = Math.random) {
1420
- return shuffle(values, random).slice(0, Math.max(0, Math.min(count, values.length)));
2539
+ function sample(values, count, rng) {
2540
+ return shuffle(values, rng).slice(0, Math.max(0, Math.min(count, values.length)));
1421
2541
  }
1422
2542
  function parseJsonArrayFromModelOutput(raw) {
1423
2543
  const trimmed = raw.trim();
@@ -1529,48 +2649,61 @@ function buildSuggestions(metrics) {
1529
2649
  return suggestions;
1530
2650
  }
1531
2651
  async function runTriggerTest(skill, options) {
1532
- const random = options.seed === void 0 ? Math.random : createSeededRandom(options.seed);
2652
+ const rng = createRng(options.seed);
1533
2653
  const queries = options.queries && options.queries.length > 0 ? triggerQueryArraySchema.parse(options.queries) : await generateQueriesWithModel(skill, options.provider, options.model, options.numQueries);
1534
- const results = [];
1535
2654
  const skillName = skill.frontmatter.name;
1536
- for (const testQuery of queries) {
1537
- const fakeCount = 5 + Math.floor(random() * 4);
1538
- const fakeSkills = sample(FAKE_SKILLS, fakeCount, random);
2655
+ const preparedQueries = queries.map((testQuery) => {
2656
+ const fakeCount = 5 + Math.floor(rng() * 5);
2657
+ const fakeSkills = sample(FAKE_SKILLS, fakeCount, rng);
1539
2658
  const allSkills = shuffle([
1540
2659
  ...fakeSkills,
1541
2660
  {
1542
2661
  name: skill.frontmatter.name,
1543
2662
  description: skill.frontmatter.description
1544
2663
  }
1545
- ], random);
2664
+ ], rng);
1546
2665
  const skillListText = allSkills.map((entry) => `- ${entry.name}: ${entry.description}`).join("\n");
1547
- const systemPrompt = [
1548
- "You are selecting one skill to activate for a user query.",
1549
- "Choose the single best matching skill name from the provided list, or 'none' if no skill is a good fit.",
1550
- "Respond with only the skill name or 'none'."
1551
- ].join(" ");
1552
- const userPrompt = [`Available skills:`, skillListText, "", `User query: ${testQuery.query}`].join("\n");
1553
- const rawResponse = await options.provider.sendMessage(systemPrompt, userPrompt, { model: options.model });
1554
- const decision = parseDecision(
1555
- rawResponse,
1556
- allSkills.map((entry) => entry.name)
1557
- );
1558
- const expected = testQuery.should_trigger ? skillName : "none";
1559
- const matched = testQuery.should_trigger ? decision === skillName : decision !== skillName;
1560
- results.push({
1561
- query: testQuery.query,
1562
- shouldTrigger: testQuery.should_trigger,
1563
- expected,
1564
- actual: decision,
1565
- matched,
1566
- rawModelResponse: options.verbose ? rawResponse : void 0
1567
- });
1568
- }
2666
+ return {
2667
+ testQuery,
2668
+ fakeCount,
2669
+ fakeSkills,
2670
+ allSkills,
2671
+ skillListText
2672
+ };
2673
+ });
2674
+ const systemPrompt = [
2675
+ "You are selecting one skill to activate for a user query.",
2676
+ "Choose the single best matching skill name from the provided list, or 'none' if no skill is a good fit.",
2677
+ "Respond with only the skill name or 'none'."
2678
+ ].join(" ");
2679
+ const results = await pMap(
2680
+ preparedQueries,
2681
+ async ({ testQuery, allSkills, skillListText }) => {
2682
+ const userPrompt = [`Available skills:`, skillListText, "", `User query: ${testQuery.query}`].join("\n");
2683
+ const rawResponse = await options.provider.sendMessage(systemPrompt, userPrompt, { model: options.model });
2684
+ const decision = parseDecision(
2685
+ rawResponse,
2686
+ allSkills.map((entry) => entry.name)
2687
+ );
2688
+ const expected = testQuery.should_trigger ? skillName : "none";
2689
+ const matched = testQuery.should_trigger ? decision === skillName : decision !== skillName;
2690
+ return {
2691
+ query: testQuery.query,
2692
+ shouldTrigger: testQuery.should_trigger,
2693
+ expected,
2694
+ actual: decision,
2695
+ matched,
2696
+ rawModelResponse: options.verbose ? rawResponse : void 0
2697
+ };
2698
+ },
2699
+ options.concurrency ?? 5
2700
+ );
1569
2701
  const metrics = calculateMetrics(skillName, results);
1570
2702
  return {
1571
2703
  skillName,
1572
2704
  model: options.model,
1573
2705
  provider: options.provider.name,
2706
+ seed: options.seed,
1574
2707
  queries,
1575
2708
  cases: results,
1576
2709
  metrics,
@@ -1730,6 +2863,9 @@ function writeError(error, asJson) {
1730
2863
  }
1731
2864
 
1732
2865
  // src/commands/lint.ts
2866
+ var lintCliSchema = z6.object({
2867
+ html: z6.string().optional()
2868
+ });
1733
2869
  async function handleLintCommand(targetPath, options) {
1734
2870
  try {
1735
2871
  const report = await runLinter(targetPath, { suppress: options.suppress });
@@ -1738,6 +2874,9 @@ async function handleLintCommand(targetPath, options) {
1738
2874
  } else {
1739
2875
  writeResult(renderLintReport(report, options.color), false);
1740
2876
  }
2877
+ if (options.html) {
2878
+ await fs6.writeFile(options.html, renderLintHtml(report), "utf8");
2879
+ }
1741
2880
  if (lintFails(report, options.failOn)) {
1742
2881
  process.exitCode = 1;
1743
2882
  }
@@ -1747,74 +2886,85 @@ async function handleLintCommand(targetPath, options) {
1747
2886
  }
1748
2887
  }
1749
2888
  function registerLintCommand(program) {
1750
- program.command("lint").description("Run static lint checks against a SKILL.md file or skill directory.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").action(async (targetPath, _commandOptions, command) => {
2889
+ program.command("lint").description("Run static lint checks against a SKILL.md file or skill directory.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--html <path>", "Write an HTML report to the given file path").action(async (targetPath, _commandOptions, command) => {
1751
2890
  const globalOptions = getGlobalCliOptions(command);
1752
2891
  const config = getResolvedConfig(command);
2892
+ const parsedCli = lintCliSchema.safeParse(command.opts());
2893
+ if (!parsedCli.success) {
2894
+ writeError(new Error(parsedCli.error.issues[0]?.message ?? "Invalid lint options."), globalOptions.json);
2895
+ process.exitCode = 2;
2896
+ return;
2897
+ }
1753
2898
  await handleLintCommand(targetPath, {
1754
2899
  ...globalOptions,
1755
2900
  failOn: config.lint.failOn,
1756
- suppress: config.lint.suppress
2901
+ suppress: config.lint.suppress,
2902
+ html: parsedCli.data.html
1757
2903
  });
1758
2904
  });
1759
2905
  }
1760
2906
 
1761
2907
  // src/commands/trigger.ts
2908
+ import fs8 from "node:fs/promises";
1762
2909
  import ora from "ora";
1763
- import { z as z7 } from "zod";
2910
+ import { z as z8 } from "zod";
1764
2911
 
1765
2912
  // src/utils/config.ts
1766
- import fs6 from "node:fs/promises";
2913
+ import fs7 from "node:fs/promises";
1767
2914
  import path5 from "node:path";
1768
- import { z as z6 } from "zod";
1769
- var providerNameSchema = z6.enum(["anthropic", "openai"]);
1770
- var lintFailOnSchema = z6.enum(["error", "warn"]);
1771
- var lintConfigSchema = z6.object({
2915
+ import { z as z7 } from "zod";
2916
+ var providerNameSchema = z7.enum(["anthropic", "openai"]);
2917
+ var lintFailOnSchema = z7.enum(["error", "warn"]);
2918
+ var lintConfigSchema = z7.object({
1772
2919
  failOn: lintFailOnSchema.optional(),
1773
- suppress: z6.array(z6.string().min(1)).optional()
2920
+ suppress: z7.array(z7.string().min(1)).optional()
1774
2921
  }).strict();
1775
- var triggerConfigSchema = z6.object({
1776
- numQueries: z6.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
1777
- threshold: z6.number().min(0).max(1).optional(),
1778
- seed: z6.number().int().optional()
2922
+ var triggerConfigSchema = z7.object({
2923
+ numQueries: z7.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
2924
+ threshold: z7.number().min(0).max(1).optional(),
2925
+ seed: z7.number().int().optional()
1779
2926
  }).strict().partial();
1780
- var evalConfigSchema = z6.object({
1781
- numRuns: z6.number().int().min(1).optional(),
1782
- threshold: z6.number().min(0).max(1).optional(),
1783
- promptFile: z6.string().min(1).optional(),
1784
- assertionsFile: z6.string().min(1).optional()
2927
+ var evalConfigSchema = z7.object({
2928
+ numRuns: z7.number().int().min(1).optional(),
2929
+ threshold: z7.number().min(0).max(1).optional(),
2930
+ promptFile: z7.string().min(1).optional(),
2931
+ assertionsFile: z7.string().min(1).optional()
1785
2932
  }).strict().partial();
1786
- var skilltestConfigSchema = z6.object({
2933
+ var skilltestConfigSchema = z7.object({
1787
2934
  provider: providerNameSchema.optional(),
1788
- model: z6.string().min(1).optional(),
1789
- json: z6.boolean().optional(),
2935
+ model: z7.string().min(1).optional(),
2936
+ json: z7.boolean().optional(),
2937
+ concurrency: z7.number().int().min(1).optional(),
1790
2938
  lint: lintConfigSchema.optional(),
1791
2939
  trigger: triggerConfigSchema.optional(),
1792
2940
  eval: evalConfigSchema.optional()
1793
2941
  }).strict();
1794
- var resolvedSkilltestConfigSchema = z6.object({
2942
+ var resolvedSkilltestConfigSchema = z7.object({
1795
2943
  provider: providerNameSchema,
1796
- model: z6.string().min(1),
1797
- json: z6.boolean(),
1798
- lint: z6.object({
2944
+ model: z7.string().min(1),
2945
+ json: z7.boolean(),
2946
+ concurrency: z7.number().int().min(1),
2947
+ lint: z7.object({
1799
2948
  failOn: lintFailOnSchema,
1800
- suppress: z6.array(z6.string().min(1))
2949
+ suppress: z7.array(z7.string().min(1))
1801
2950
  }),
1802
- trigger: z6.object({
1803
- numQueries: z6.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
1804
- threshold: z6.number().min(0).max(1),
1805
- seed: z6.number().int().optional()
2951
+ trigger: z7.object({
2952
+ numQueries: z7.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
2953
+ threshold: z7.number().min(0).max(1),
2954
+ seed: z7.number().int().optional()
1806
2955
  }),
1807
- eval: z6.object({
1808
- numRuns: z6.number().int().min(1),
1809
- threshold: z6.number().min(0).max(1),
1810
- promptFile: z6.string().min(1).optional(),
1811
- assertionsFile: z6.string().min(1).optional()
2956
+ eval: z7.object({
2957
+ numRuns: z7.number().int().min(1),
2958
+ threshold: z7.number().min(0).max(1),
2959
+ promptFile: z7.string().min(1).optional(),
2960
+ assertionsFile: z7.string().min(1).optional()
1812
2961
  })
1813
2962
  });
1814
2963
  var DEFAULT_SKILLTEST_CONFIG = {
1815
2964
  provider: "anthropic",
1816
2965
  model: "claude-sonnet-4-5-20250929",
1817
2966
  json: false,
2967
+ concurrency: 5,
1818
2968
  lint: {
1819
2969
  failOn: "error",
1820
2970
  suppress: []
@@ -1843,7 +2993,7 @@ function buildConfigValidationError(error, sourceLabel) {
1843
2993
  async function readJsonObject(filePath, label) {
1844
2994
  let raw;
1845
2995
  try {
1846
- raw = await fs6.readFile(filePath, "utf8");
2996
+ raw = await fs7.readFile(filePath, "utf8");
1847
2997
  } catch (error) {
1848
2998
  const message = error instanceof Error ? error.message : String(error);
1849
2999
  throw new Error(`Failed to read ${label}: ${message}`);
@@ -1876,7 +3026,7 @@ async function loadConfigFromNearestPackageJson(startDirectory) {
1876
3026
  const packageJsonPath = path5.join(currentDirectory, "package.json");
1877
3027
  if (await pathExists(packageJsonPath)) {
1878
3028
  const raw = await readJsonObject(packageJsonPath, packageJsonPath);
1879
- const packageJsonSchema = z6.object({
3029
+ const packageJsonSchema = z7.object({
1880
3030
  skilltestrc: skilltestConfigSchema.optional()
1881
3031
  }).passthrough();
1882
3032
  const parsed = packageJsonSchema.safeParse(raw);
@@ -1921,6 +3071,7 @@ function mergeConfigLayers(configFile = {}, cliFlags = {}, baseDirectory = proce
1921
3071
  provider: cliFlags.provider ?? configFile.provider ?? DEFAULT_SKILLTEST_CONFIG.provider,
1922
3072
  model: cliFlags.model ?? configFile.model ?? DEFAULT_SKILLTEST_CONFIG.model,
1923
3073
  json: cliFlags.json ?? configFile.json ?? DEFAULT_SKILLTEST_CONFIG.json,
3074
+ concurrency: cliFlags.concurrency ?? configFile.concurrency ?? DEFAULT_SKILLTEST_CONFIG.concurrency,
1924
3075
  lint: {
1925
3076
  failOn: cliFlags.lint?.failOn ?? configFile.lint?.failOn ?? DEFAULT_SKILLTEST_CONFIG.lint.failOn,
1926
3077
  suppress: cliFlags.lint?.suppress ?? configFile.lint?.suppress ?? DEFAULT_SKILLTEST_CONFIG.lint.suppress
@@ -1964,6 +3115,9 @@ function extractCliConfigOverrides(command) {
1964
3115
  if (command.getOptionValueSource("model") === "cli") {
1965
3116
  overrides.model = getTypedOptionValue(command, "model");
1966
3117
  }
3118
+ if ((command.name() === "trigger" || command.name() === "eval" || command.name() === "check") && command.getOptionValueSource("concurrency") === "cli") {
3119
+ overrides.concurrency = getTypedOptionValue(command, "concurrency");
3120
+ }
1967
3121
  if ((command.name() === "trigger" || command.name() === "check") && command.getOptionValueSource("numQueries") === "cli") {
1968
3122
  overrides.trigger = {
1969
3123
  ...overrides.trigger,
@@ -1993,7 +3147,6 @@ async function resolveConfigContext(targetPath, cliFlags) {
1993
3147
  const skillDirectoryConfig = await resolveSkillDirectoryConfig(targetPath);
1994
3148
  if (skillDirectoryConfig) {
1995
3149
  return {
1996
- configFile: skillDirectoryConfig.configFile,
1997
3150
  ...skillDirectoryConfig,
1998
3151
  config: mergeConfigLayers(skillDirectoryConfig.configFile, cliFlags, skillDirectoryConfig.sourceDirectory)
1999
3152
  };
@@ -2002,7 +3155,6 @@ async function resolveConfigContext(targetPath, cliFlags) {
2002
3155
  const cwdConfig = await loadConfigFromJsonFile(cwdConfigPath);
2003
3156
  if (cwdConfig) {
2004
3157
  return {
2005
- configFile: cwdConfig.configFile,
2006
3158
  ...cwdConfig,
2007
3159
  config: mergeConfigLayers(cwdConfig.configFile, cliFlags, cwdConfig.sourceDirectory)
2008
3160
  };
@@ -2010,7 +3162,6 @@ async function resolveConfigContext(targetPath, cliFlags) {
2010
3162
  const packageJsonConfig = await loadConfigFromNearestPackageJson(cwd);
2011
3163
  if (packageJsonConfig) {
2012
3164
  return {
2013
- configFile: packageJsonConfig.configFile,
2014
3165
  ...packageJsonConfig,
2015
3166
  config: mergeConfigLayers(packageJsonConfig.configFile, cliFlags, packageJsonConfig.sourceDirectory)
2016
3167
  };
@@ -2218,11 +3369,14 @@ function createProvider(providerName, apiKeyOverride) {
2218
3369
  }
2219
3370
 
2220
3371
  // src/commands/trigger.ts
2221
- var triggerCliSchema = z7.object({
2222
- queries: z7.string().optional(),
2223
- saveQueries: z7.string().optional(),
2224
- verbose: z7.boolean().optional(),
2225
- apiKey: z7.string().optional()
3372
+ var triggerCliSchema = z8.object({
3373
+ queries: z8.string().optional(),
3374
+ saveQueries: z8.string().optional(),
3375
+ seed: z8.number().int().optional(),
3376
+ concurrency: z8.number().int().min(1).optional(),
3377
+ html: z8.string().optional(),
3378
+ verbose: z8.boolean().optional(),
3379
+ apiKey: z8.string().optional()
2226
3380
  });
2227
3381
  var DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-5-20250929";
2228
3382
  var DEFAULT_OPENAI_MODEL = "gpt-4.1-mini";
@@ -2232,6 +3386,13 @@ function resolveModel(provider, model) {
2232
3386
  }
2233
3387
  return model;
2234
3388
  }
3389
+ function renderTriggerOutputWithSeed(output, seed) {
3390
+ if (seed === void 0) {
3391
+ return output;
3392
+ }
3393
+ return `${output}
3394
+ Seed: ${seed}`;
3395
+ }
2235
3396
  async function handleTriggerCommand(targetPath, options) {
2236
3397
  const spinner = options.json || !process.stdout.isTTY ? null : ora("Preparing trigger evaluation...").start();
2237
3398
  try {
@@ -2260,6 +3421,7 @@ async function handleTriggerCommand(targetPath, options) {
2260
3421
  queries,
2261
3422
  numQueries: options.numQueries,
2262
3423
  seed: options.seed,
3424
+ concurrency: options.concurrency,
2263
3425
  verbose: options.verbose
2264
3426
  });
2265
3427
  if (options.saveQueries) {
@@ -2269,7 +3431,14 @@ async function handleTriggerCommand(targetPath, options) {
2269
3431
  if (options.json) {
2270
3432
  writeResult(result, true);
2271
3433
  } else {
2272
- writeResult(renderTriggerReport(result, options.color, options.verbose), false);
3434
+ writeResult(renderTriggerOutputWithSeed(renderTriggerReport(result, options.color, options.verbose), result.seed), false);
3435
+ }
3436
+ if (options.html) {
3437
+ const htmlResult = {
3438
+ ...result,
3439
+ target: targetPath
3440
+ };
3441
+ await fs8.writeFile(options.html, renderTriggerHtml(htmlResult), "utf8");
2273
3442
  }
2274
3443
  } catch (error) {
2275
3444
  spinner?.stop();
@@ -2278,7 +3447,7 @@ async function handleTriggerCommand(targetPath, options) {
2278
3447
  }
2279
3448
  }
2280
3449
  function registerTriggerCommand(program) {
2281
- program.command("trigger").description("Evaluate whether a skill description triggers correctly.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--model <model>", "Model to use").option("--provider <provider>", "LLM provider: anthropic|openai").option("--queries <path>", "Path to custom test queries JSON").option("--num-queries <n>", "Number of auto-generated queries", (value) => Number.parseInt(value, 10)).option("--save-queries <path>", "Save generated queries to a JSON file").option("--api-key <key>", "API key override").option("--verbose", "Show full model decisions").action(async (targetPath, _commandOptions, command) => {
3450
+ program.command("trigger").description("Evaluate whether a skill description triggers correctly.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--model <model>", "Model to use").option("--provider <provider>", "LLM provider: anthropic|openai").option("--queries <path>", "Path to custom test queries JSON").option("--num-queries <n>", "Number of auto-generated queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--concurrency <n>", "Maximum in-flight trigger requests", (value) => Number.parseInt(value, 10)).option("--html <path>", "Write an HTML report to the given file path").option("--save-queries <path>", "Save generated queries to a JSON file").option("--api-key <key>", "API key override").option("--verbose", "Show full model decisions").action(async (targetPath, _commandOptions, command) => {
2282
3451
  const globalOptions = getGlobalCliOptions(command);
2283
3452
  const config = getResolvedConfig(command);
2284
3453
  const parsedCli = triggerCliSchema.safeParse(command.opts());
@@ -2294,7 +3463,9 @@ function registerTriggerCommand(program) {
2294
3463
  queries: parsedCli.data.queries,
2295
3464
  numQueries: config.trigger.numQueries,
2296
3465
  saveQueries: parsedCli.data.saveQueries,
2297
- seed: config.trigger.seed,
3466
+ seed: parsedCli.data.seed ?? config.trigger.seed,
3467
+ concurrency: config.concurrency,
3468
+ html: parsedCli.data.html,
2298
3469
  verbose: Boolean(parsedCli.data.verbose),
2299
3470
  apiKey: parsedCli.data.apiKey
2300
3471
  });
@@ -2302,14 +3473,17 @@ function registerTriggerCommand(program) {
2302
3473
  }
2303
3474
 
2304
3475
  // src/commands/eval.ts
3476
+ import fs9 from "node:fs/promises";
2305
3477
  import ora2 from "ora";
2306
- import { z as z8 } from "zod";
2307
- var evalCliSchema = z8.object({
2308
- prompts: z8.string().optional(),
2309
- graderModel: z8.string().optional(),
2310
- saveResults: z8.string().optional(),
2311
- verbose: z8.boolean().optional(),
2312
- apiKey: z8.string().optional()
3478
+ import { z as z9 } from "zod";
3479
+ var evalCliSchema = z9.object({
3480
+ prompts: z9.string().optional(),
3481
+ graderModel: z9.string().optional(),
3482
+ saveResults: z9.string().optional(),
3483
+ concurrency: z9.number().int().min(1).optional(),
3484
+ html: z9.string().optional(),
3485
+ verbose: z9.boolean().optional(),
3486
+ apiKey: z9.string().optional()
2313
3487
  });
2314
3488
  var DEFAULT_ANTHROPIC_MODEL2 = "claude-sonnet-4-5-20250929";
2315
3489
  var DEFAULT_OPENAI_MODEL2 = "gpt-4.1-mini";
@@ -2349,6 +3523,7 @@ async function handleEvalCommand(targetPath, options, command) {
2349
3523
  model,
2350
3524
  graderModel,
2351
3525
  numRuns: options.numRuns,
3526
+ concurrency: options.concurrency,
2352
3527
  prompts
2353
3528
  });
2354
3529
  if (options.saveResults) {
@@ -2360,6 +3535,13 @@ async function handleEvalCommand(targetPath, options, command) {
2360
3535
  } else {
2361
3536
  writeResult(renderEvalReport(result, options.color, options.verbose), false);
2362
3537
  }
3538
+ if (options.html) {
3539
+ const htmlResult = {
3540
+ ...result,
3541
+ target: targetPath
3542
+ };
3543
+ await fs9.writeFile(options.html, renderEvalHtml(htmlResult), "utf8");
3544
+ }
2363
3545
  } catch (error) {
2364
3546
  spinner?.stop();
2365
3547
  writeError(error, options.json);
@@ -2367,7 +3549,7 @@ async function handleEvalCommand(targetPath, options, command) {
2367
3549
  }
2368
3550
  }
2369
3551
  function registerEvalCommand(program) {
2370
- program.command("eval").description("Run end-to-end skill execution and quality evaluation.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--prompts <path>", "Path to eval prompts JSON").option("--model <model>", "Model to execute prompts").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--provider <provider>", "LLM provider: anthropic|openai").option("--save-results <path>", "Save full evaluation results to JSON").option("--api-key <key>", "API key override").option("--verbose", "Show full model responses").action(async (targetPath, _commandOptions, command) => {
3552
+ program.command("eval").description("Run end-to-end skill execution and quality evaluation.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--prompts <path>", "Path to eval prompts JSON").option("--model <model>", "Model to execute prompts").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--provider <provider>", "LLM provider: anthropic|openai").option("--concurrency <n>", "Maximum in-flight eval prompt runs", (value) => Number.parseInt(value, 10)).option("--html <path>", "Write an HTML report to the given file path").option("--save-results <path>", "Save full evaluation results to JSON").option("--api-key <key>", "API key override").option("--verbose", "Show full model responses").action(async (targetPath, _commandOptions, command) => {
2371
3553
  const globalOptions = getGlobalCliOptions(command);
2372
3554
  const config = getResolvedConfig(command);
2373
3555
  const parsedCli = evalCliSchema.safeParse(command.opts());
@@ -2385,9 +3567,11 @@ function registerEvalCommand(program) {
2385
3567
  graderModel: parsedCli.data.graderModel,
2386
3568
  provider: config.provider,
2387
3569
  saveResults: parsedCli.data.saveResults,
3570
+ html: parsedCli.data.html,
2388
3571
  verbose: Boolean(parsedCli.data.verbose),
2389
3572
  apiKey: parsedCli.data.apiKey,
2390
- numRuns: config.eval.numRuns
3573
+ numRuns: config.eval.numRuns,
3574
+ concurrency: config.concurrency
2391
3575
  },
2392
3576
  command
2393
3577
  );
@@ -2395,8 +3579,9 @@ function registerEvalCommand(program) {
2395
3579
  }
2396
3580
 
2397
3581
  // src/commands/check.ts
3582
+ import fs10 from "node:fs/promises";
2398
3583
  import ora3 from "ora";
2399
- import { z as z9 } from "zod";
3584
+ import { z as z10 } from "zod";
2400
3585
 
2401
3586
  // src/core/check-runner.ts
2402
3587
  function calculateEvalAssertPassRate(result) {
@@ -2427,23 +3612,33 @@ async function runCheck(inputPath, options) {
2427
3612
  evalSkippedReason = `Skipped: skill could not be parsed strictly (${message}).`;
2428
3613
  }
2429
3614
  if (parsedSkill) {
2430
- options.onStage?.("trigger");
2431
- trigger = await runTriggerTest(parsedSkill, {
3615
+ const triggerOptions = {
2432
3616
  provider: options.provider,
2433
3617
  model: options.model,
2434
3618
  queries: options.queries,
2435
3619
  numQueries: options.numQueries,
2436
3620
  seed: options.triggerSeed,
3621
+ concurrency: options.concurrency,
2437
3622
  verbose: options.verbose
2438
- });
2439
- options.onStage?.("eval");
2440
- evalResult = await runEval(parsedSkill, {
3623
+ };
3624
+ const evalOptions = {
2441
3625
  provider: options.provider,
2442
3626
  model: options.model,
2443
3627
  graderModel: options.graderModel,
2444
3628
  numRuns: options.evalNumRuns,
2445
- prompts: options.prompts
2446
- });
3629
+ prompts: options.prompts,
3630
+ concurrency: options.concurrency
3631
+ };
3632
+ if ((options.concurrency ?? 5) === 1) {
3633
+ options.onStage?.("trigger");
3634
+ trigger = await runTriggerTest(parsedSkill, triggerOptions);
3635
+ options.onStage?.("eval");
3636
+ evalResult = await runEval(parsedSkill, evalOptions);
3637
+ } else {
3638
+ options.onStage?.("trigger");
3639
+ options.onStage?.("eval");
3640
+ [trigger, evalResult] = await Promise.all([runTriggerTest(parsedSkill, triggerOptions), runEval(parsedSkill, evalOptions)]);
3641
+ }
2447
3642
  }
2448
3643
  }
2449
3644
  const triggerF1 = trigger ? trigger.metrics.f1 : null;
@@ -2478,14 +3673,17 @@ async function runCheck(inputPath, options) {
2478
3673
  }
2479
3674
 
2480
3675
  // src/commands/check.ts
2481
- var checkCliSchema = z9.object({
2482
- graderModel: z9.string().optional(),
2483
- apiKey: z9.string().optional(),
2484
- queries: z9.string().optional(),
2485
- prompts: z9.string().optional(),
2486
- saveResults: z9.string().optional(),
2487
- continueOnLintFail: z9.boolean().optional(),
2488
- verbose: z9.boolean().optional()
3676
+ var checkCliSchema = z10.object({
3677
+ graderModel: z10.string().optional(),
3678
+ apiKey: z10.string().optional(),
3679
+ queries: z10.string().optional(),
3680
+ seed: z10.number().int().optional(),
3681
+ prompts: z10.string().optional(),
3682
+ concurrency: z10.number().int().min(1).optional(),
3683
+ html: z10.string().optional(),
3684
+ saveResults: z10.string().optional(),
3685
+ continueOnLintFail: z10.boolean().optional(),
3686
+ verbose: z10.boolean().optional()
2489
3687
  });
2490
3688
  var DEFAULT_ANTHROPIC_MODEL3 = "claude-sonnet-4-5-20250929";
2491
3689
  var DEFAULT_OPENAI_MODEL3 = "gpt-4.1-mini";
@@ -2495,6 +3693,19 @@ function resolveModel3(provider, model) {
2495
3693
  }
2496
3694
  return model;
2497
3695
  }
3696
+ function renderCheckOutputWithSeed(output, seed) {
3697
+ if (seed === void 0) {
3698
+ return output;
3699
+ }
3700
+ const lines = output.split("\n");
3701
+ const triggerIndex = lines.indexOf("Trigger");
3702
+ if (triggerIndex === -1) {
3703
+ return `${output}
3704
+ Seed: ${seed}`;
3705
+ }
3706
+ lines.splice(triggerIndex + 1, 0, `Seed: ${seed}`);
3707
+ return lines.join("\n");
3708
+ }
2498
3709
  async function handleCheckCommand(targetPath, options, command) {
2499
3710
  const spinner = options.json || !process.stdout.isTTY ? null : ora3("Preparing check run...").start();
2500
3711
  try {
@@ -2531,6 +3742,7 @@ async function handleCheckCommand(targetPath, options, command) {
2531
3742
  triggerSeed: options.triggerSeed,
2532
3743
  prompts,
2533
3744
  evalNumRuns: options.numRuns,
3745
+ concurrency: options.concurrency,
2534
3746
  minF1: options.minF1,
2535
3747
  minAssertPassRate: options.minAssertPassRate,
2536
3748
  continueOnLintFail: options.continueOnLintFail,
@@ -2543,10 +3755,8 @@ async function handleCheckCommand(targetPath, options, command) {
2543
3755
  spinner.text = "Running lint checks...";
2544
3756
  } else if (stage === "parse") {
2545
3757
  spinner.text = "Parsing skill for model evaluations...";
2546
- } else if (stage === "trigger") {
2547
- spinner.text = "Running trigger test suite...";
2548
- } else if (stage === "eval") {
2549
- spinner.text = "Running end-to-end eval suite...";
3758
+ } else if (stage === "trigger" || stage === "eval") {
3759
+ spinner.text = "Running trigger and eval suites...";
2550
3760
  }
2551
3761
  }
2552
3762
  });
@@ -2557,7 +3767,13 @@ async function handleCheckCommand(targetPath, options, command) {
2557
3767
  if (options.json) {
2558
3768
  writeResult(result, true);
2559
3769
  } else {
2560
- writeResult(renderCheckReport(result, options.color, options.verbose), false);
3770
+ writeResult(
3771
+ renderCheckOutputWithSeed(renderCheckReport(result, options.color, options.verbose), result.trigger?.seed),
3772
+ false
3773
+ );
3774
+ }
3775
+ if (options.html) {
3776
+ await fs10.writeFile(options.html, renderCheckHtml(result), "utf8");
2561
3777
  }
2562
3778
  process.exitCode = result.gates.overallPassed ? 0 : 1;
2563
3779
  } catch (error) {
@@ -2567,7 +3783,7 @@ async function handleCheckCommand(targetPath, options, command) {
2567
3783
  }
2568
3784
  }
2569
3785
  function registerCheckCommand(program) {
2570
- program.command("check").description("Run lint + trigger + eval with threshold-based quality gates.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--provider <provider>", "LLM provider: anthropic|openai").option("--model <model>", "Model for trigger/eval runs").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--api-key <key>", "API key override").option("--queries <path>", "Path to custom trigger queries JSON").option("--num-queries <n>", "Number of auto-generated trigger queries", (value) => Number.parseInt(value, 10)).option("--prompts <path>", "Path to eval prompts JSON").option("--min-f1 <n>", "Minimum required trigger F1 score (0-1)", (value) => Number.parseFloat(value)).option("--min-assert-pass-rate <n>", "Minimum required eval assertion pass rate (0-1)", (value) => Number.parseFloat(value)).option("--save-results <path>", "Save combined check results to JSON").option("--continue-on-lint-fail", "Continue trigger/eval stages even when lint has failures").option("--verbose", "Show detailed trigger/eval output sections").action(async (targetPath, _commandOptions, command) => {
3786
+ program.command("check").description("Run lint + trigger + eval with threshold-based quality gates.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--provider <provider>", "LLM provider: anthropic|openai").option("--model <model>", "Model for trigger/eval runs").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--api-key <key>", "API key override").option("--queries <path>", "Path to custom trigger queries JSON").option("--num-queries <n>", "Number of auto-generated trigger queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--prompts <path>", "Path to eval prompts JSON").option("--concurrency <n>", "Maximum in-flight trigger/eval tasks", (value) => Number.parseInt(value, 10)).option("--html <path>", "Write an HTML report to the given file path").option("--min-f1 <n>", "Minimum required trigger F1 score (0-1)", (value) => Number.parseFloat(value)).option("--min-assert-pass-rate <n>", "Minimum required eval assertion pass rate (0-1)", (value) => Number.parseFloat(value)).option("--save-results <path>", "Save combined check results to JSON").option("--continue-on-lint-fail", "Continue trigger/eval stages even when lint has failures").option("--verbose", "Show detailed trigger/eval output sections").action(async (targetPath, _commandOptions, command) => {
2571
3787
  const globalOptions = getGlobalCliOptions(command);
2572
3788
  const config = getResolvedConfig(command);
2573
3789
  const parsedCli = checkCliSchema.safeParse(command.opts());
@@ -2590,9 +3806,11 @@ function registerCheckCommand(program) {
2590
3806
  minF1: config.trigger.threshold,
2591
3807
  minAssertPassRate: config.eval.threshold,
2592
3808
  numRuns: config.eval.numRuns,
3809
+ concurrency: config.concurrency,
3810
+ html: parsedCli.data.html,
2593
3811
  lintFailOn: config.lint.failOn,
2594
3812
  lintSuppress: config.lint.suppress,
2595
- triggerSeed: config.trigger.seed,
3813
+ triggerSeed: parsedCli.data.seed ?? config.trigger.seed,
2596
3814
  saveResults: parsedCli.data.saveResults,
2597
3815
  continueOnLintFail: Boolean(parsedCli.data.continueOnLintFail),
2598
3816
  verbose: Boolean(parsedCli.data.verbose)
@@ -2607,7 +3825,7 @@ function resolveVersion() {
2607
3825
  try {
2608
3826
  const currentFilePath = fileURLToPath(import.meta.url);
2609
3827
  const packageJsonPath = path6.resolve(path6.dirname(currentFilePath), "..", "package.json");
2610
- const raw = fs7.readFileSync(packageJsonPath, "utf8");
3828
+ const raw = fs11.readFileSync(packageJsonPath, "utf8");
2611
3829
  const parsed = JSON.parse(raw);
2612
3830
  return parsed.version ?? "0.0.0";
2613
3831
  } catch {