skilltest 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -239,6 +239,171 @@ function runCompatibilityChecks(context) {
239
239
  return issues;
240
240
  }
241
241
 
242
+ // src/core/linter/markdown-zones.ts
243
+ function splitLines(raw) {
244
+ return raw.split(/\r?\n/);
245
+ }
246
+ function stripTopFrontmatter(raw) {
247
+ const lines = splitLines(raw);
248
+ if (lines[0] !== "---") {
249
+ return {
250
+ bodyLines: lines,
251
+ bodyStartLine: 1
252
+ };
253
+ }
254
+ for (let index = 1; index < lines.length; index += 1) {
255
+ if (lines[index] === "---") {
256
+ return {
257
+ bodyLines: lines.slice(index + 1),
258
+ bodyStartLine: index + 2
259
+ };
260
+ }
261
+ }
262
+ return {
263
+ bodyLines: lines,
264
+ bodyStartLine: 1
265
+ };
266
+ }
267
+ function matchCodeFenceOpener(line) {
268
+ const match = line.match(/^\s*(`{3,}|~{3,})(.*)$/);
269
+ return match?.[1] ?? null;
270
+ }
271
+ function isExactCodeFenceCloser(line, delimiter) {
272
+ return line.trim() === delimiter;
273
+ }
274
+ function appendZone(zones, type, content, startLine, endLine) {
275
+ if (content === "") {
276
+ return;
277
+ }
278
+ const previous = zones[zones.length - 1];
279
+ if (previous && previous.type === type && startLine <= previous.endLine + 1) {
280
+ const separator = startLine > previous.endLine ? "\n" : "";
281
+ previous.content += `${separator}${content}`;
282
+ previous.endLine = endLine;
283
+ return;
284
+ }
285
+ zones.push({
286
+ type,
287
+ content,
288
+ startLine,
289
+ endLine
290
+ });
291
+ }
292
+ function appendToOpenZone(zone, content, lineNumber) {
293
+ if (content === "") {
294
+ if (lineNumber > zone.endLine) {
295
+ zone.content += "\n";
296
+ zone.endLine = lineNumber;
297
+ }
298
+ return;
299
+ }
300
+ const separator = lineNumber > zone.endLine ? "\n" : "";
301
+ zone.content += `${separator}${content}`;
302
+ zone.endLine = lineNumber;
303
+ }
304
+ function addInlineAwareText(zones, text, lineNumber, baseType) {
305
+ if (text === "") {
306
+ return;
307
+ }
308
+ let cursor = 0;
309
+ while (cursor < text.length) {
310
+ const inlineStart = text.indexOf("`", cursor);
311
+ if (inlineStart === -1) {
312
+ appendZone(zones, baseType, text.slice(cursor), lineNumber, lineNumber);
313
+ return;
314
+ }
315
+ if (inlineStart > cursor) {
316
+ appendZone(zones, baseType, text.slice(cursor, inlineStart), lineNumber, lineNumber);
317
+ }
318
+ const inlineEnd = text.indexOf("`", inlineStart + 1);
319
+ if (inlineEnd === -1) {
320
+ appendZone(zones, baseType, text.slice(inlineStart), lineNumber, lineNumber);
321
+ return;
322
+ }
323
+ appendZone(zones, "inline-code", text.slice(inlineStart, inlineEnd + 1), lineNumber, lineNumber);
324
+ cursor = inlineEnd + 1;
325
+ }
326
+ }
327
+ function parseZones(raw) {
328
+ const { bodyLines, bodyStartLine } = stripTopFrontmatter(raw);
329
+ const zones = [];
330
+ let openCodeFence = null;
331
+ let openComment = null;
332
+ for (const [index, line] of bodyLines.entries()) {
333
+ const lineNumber = bodyStartLine + index;
334
+ if (openCodeFence) {
335
+ appendToOpenZone(openCodeFence.zone, line, lineNumber);
336
+ if (isExactCodeFenceCloser(line, openCodeFence.delimiter)) {
337
+ zones.push(openCodeFence.zone);
338
+ openCodeFence = null;
339
+ }
340
+ continue;
341
+ }
342
+ if (!openComment) {
343
+ const fenceDelimiter = matchCodeFenceOpener(line);
344
+ if (fenceDelimiter) {
345
+ openCodeFence = {
346
+ delimiter: fenceDelimiter,
347
+ zone: {
348
+ type: "code-fence",
349
+ content: line,
350
+ startLine: lineNumber,
351
+ endLine: lineNumber
352
+ }
353
+ };
354
+ continue;
355
+ }
356
+ }
357
+ const baseType = /^\s*>/.test(line) ? "blockquote" : "prose";
358
+ let cursor = 0;
359
+ while (cursor < line.length || openComment) {
360
+ if (openComment) {
361
+ const closeIndex = line.indexOf("-->", cursor);
362
+ if (closeIndex === -1) {
363
+ appendToOpenZone(openComment, line.slice(cursor), lineNumber);
364
+ cursor = line.length;
365
+ break;
366
+ }
367
+ appendToOpenZone(openComment, line.slice(cursor, closeIndex + 3), lineNumber);
368
+ zones.push(openComment);
369
+ openComment = null;
370
+ cursor = closeIndex + 3;
371
+ continue;
372
+ }
373
+ if (cursor >= line.length) {
374
+ break;
375
+ }
376
+ const commentStart = line.indexOf("<!--", cursor);
377
+ const textEnd = commentStart === -1 ? line.length : commentStart;
378
+ if (textEnd > cursor) {
379
+ addInlineAwareText(zones, line.slice(cursor, textEnd), lineNumber, baseType);
380
+ }
381
+ if (commentStart === -1) {
382
+ break;
383
+ }
384
+ const commentEnd = line.indexOf("-->", commentStart + 4);
385
+ if (commentEnd === -1) {
386
+ openComment = {
387
+ type: "html-comment",
388
+ content: line.slice(commentStart),
389
+ startLine: lineNumber,
390
+ endLine: lineNumber
391
+ };
392
+ break;
393
+ }
394
+ appendZone(zones, "html-comment", line.slice(commentStart, commentEnd + 3), lineNumber, lineNumber);
395
+ cursor = commentEnd + 3;
396
+ }
397
+ }
398
+ if (openComment) {
399
+ zones.push(openComment);
400
+ }
401
+ if (openCodeFence) {
402
+ zones.push(openCodeFence.zone);
403
+ }
404
+ return zones;
405
+ }
406
+
242
407
  // src/core/linter/content.ts
243
408
  var VAGUE_PATTERNS = [
244
409
  /\bdo something appropriate\b/i,
@@ -255,6 +420,102 @@ var SECRET_PATTERNS = [
255
420
  { label: "Slack token", regex: /\bxox[baprs]-[A-Za-z0-9-]{20,}\b/ },
256
421
  { label: "Generic private key header", regex: /-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----/ }
257
422
  ];
423
+ function summarizeLineRange(matches) {
424
+ if (matches.length === 0) {
425
+ return {};
426
+ }
427
+ return {
428
+ startLine: Math.min(...matches.map((match) => match.startLine)),
429
+ endLine: Math.max(...matches.map((match) => match.endLine))
430
+ };
431
+ }
432
+ function uniqueLabels(matches) {
433
+ const labels = [];
434
+ const seen = /* @__PURE__ */ new Set();
435
+ for (const match of matches) {
436
+ if (seen.has(match.label)) {
437
+ continue;
438
+ }
439
+ seen.add(match.label);
440
+ labels.push(match.label);
441
+ }
442
+ return labels;
443
+ }
444
+ function collectSecretMatches(zones) {
445
+ const prose = [];
446
+ const nonProse = [];
447
+ for (const zone of zones) {
448
+ for (const pattern of SECRET_PATTERNS) {
449
+ if (!pattern.regex.test(zone.content)) {
450
+ continue;
451
+ }
452
+ const occurrence = {
453
+ label: pattern.label,
454
+ zoneType: zone.type,
455
+ startLine: zone.startLine,
456
+ endLine: zone.endLine
457
+ };
458
+ if (zone.type === "prose") {
459
+ prose.push(occurrence);
460
+ } else {
461
+ nonProse.push(occurrence);
462
+ }
463
+ }
464
+ }
465
+ return { prose, nonProse };
466
+ }
467
+ function buildSkippedPatterns(matches) {
468
+ if (matches.length === 0) {
469
+ return void 0;
470
+ }
471
+ return matches.map((match) => ({
472
+ label: match.label,
473
+ zoneType: match.zoneType,
474
+ startLine: match.startLine,
475
+ endLine: match.endLine
476
+ }));
477
+ }
478
+ function buildSecretsIssue(context) {
479
+ if (context.suppressedCheckIds.has("content:secrets")) {
480
+ return null;
481
+ }
482
+ const { prose, nonProse } = collectSecretMatches(parseZones(context.skill.raw));
483
+ const proseLabels = uniqueLabels(prose);
484
+ const nonProseLabels = uniqueLabels(nonProse);
485
+ const skippedPatterns = buildSkippedPatterns(nonProse);
486
+ if (proseLabels.length > 0) {
487
+ return {
488
+ id: "content.secrets",
489
+ checkId: "content:secrets",
490
+ title: "Hardcoded Secrets",
491
+ status: "fail",
492
+ message: `Potential secrets detected (${proseLabels.join(", ")}).`,
493
+ suggestion: "Remove secrets from skill files and use environment variables or secret managers.",
494
+ ...summarizeLineRange(prose),
495
+ skippedPatterns
496
+ };
497
+ }
498
+ if (nonProseLabels.length > 0) {
499
+ const codeFenceOnly = nonProse.every((match) => match.zoneType === "code-fence");
500
+ return {
501
+ id: "content.secrets",
502
+ checkId: "content:secrets",
503
+ title: "Hardcoded Secrets",
504
+ status: "warn",
505
+ message: codeFenceOnly ? `Possible secret in code example \u2014 verify this is a placeholder, not a real key (${nonProseLabels.join(", ")}).` : `Possible secrets found outside prose instructions (${nonProseLabels.join(", ")}). Verify these are placeholders, not real credentials.`,
506
+ suggestion: "Replace real-looking credentials in examples with explicit placeholders such as YOUR_API_KEY.",
507
+ ...summarizeLineRange(nonProse),
508
+ skippedPatterns
509
+ };
510
+ }
511
+ return {
512
+ id: "content.secrets",
513
+ checkId: "content:secrets",
514
+ title: "Hardcoded Secrets",
515
+ status: "pass",
516
+ message: "No obvious API keys or secrets patterns were detected."
517
+ };
518
+ }
258
519
  function runContentChecks(context) {
259
520
  const issues = [];
260
521
  const body = context.frontmatter.content;
@@ -334,29 +595,9 @@ function runContentChecks(context) {
334
595
  message: "No angle bracket tokens detected in frontmatter."
335
596
  });
336
597
  }
337
- const secretHits = /* @__PURE__ */ new Set();
338
- for (const pattern of SECRET_PATTERNS) {
339
- if (pattern.regex.test(context.skill.raw)) {
340
- secretHits.add(pattern.label);
341
- }
342
- }
343
- if (secretHits.size > 0) {
344
- issues.push({
345
- id: "content.secrets",
346
- checkId: "content:secrets",
347
- title: "Hardcoded Secrets",
348
- status: "fail",
349
- message: `Potential secrets detected (${Array.from(secretHits).join(", ")}).`,
350
- suggestion: "Remove secrets from skill files and use environment variables or secret managers."
351
- });
352
- } else {
353
- issues.push({
354
- id: "content.secrets",
355
- checkId: "content:secrets",
356
- title: "Hardcoded Secrets",
357
- status: "pass",
358
- message: "No obvious API keys or secrets patterns were detected."
359
- });
598
+ const secretsIssue = buildSecretsIssue(context);
599
+ if (secretsIssue) {
600
+ issues.push(secretsIssue);
360
601
  }
361
602
  if (bodyLines.length < 10) {
362
603
  issues.push({
@@ -776,93 +1017,159 @@ var SHELL_ACTIVITY_PATTERNS = [
776
1017
  /\b(?:npm|pnpm|yarn|pip|git|docker|kubectl)\s+[A-Za-z0-9-]/i
777
1018
  ];
778
1019
  var SAFETY_GUARDRAIL_PATTERN = /\b(?:ask before|confirm|approval|dry[- ]run|sandbox|least privilege|redact|never expose|do not reveal)\b/i;
779
- function collectMatches(content, patterns) {
780
- const matches = [];
781
- for (const pattern of patterns) {
782
- if (pattern.regex.test(content)) {
783
- matches.push(pattern.label);
1020
+ function buildOccurrence(zone, pattern) {
1021
+ return {
1022
+ label: pattern.label,
1023
+ zoneType: zone.type,
1024
+ startLine: zone.startLine,
1025
+ endLine: zone.endLine
1026
+ };
1027
+ }
1028
+ function collectZoneAwareMatches(zones, patterns) {
1029
+ const flagged = [];
1030
+ const skipped = [];
1031
+ for (const zone of zones) {
1032
+ for (const pattern of patterns) {
1033
+ if (!pattern.regex.test(zone.content)) {
1034
+ continue;
1035
+ }
1036
+ const occurrence = buildOccurrence(zone, pattern);
1037
+ if (zone.type === "prose") {
1038
+ flagged.push(occurrence);
1039
+ } else {
1040
+ skipped.push(occurrence);
1041
+ }
784
1042
  }
785
1043
  }
786
- return matches;
1044
+ return { flagged, skipped };
1045
+ }
1046
+ function uniqueLabels2(matches) {
1047
+ const labels = [];
1048
+ const seen = /* @__PURE__ */ new Set();
1049
+ for (const match of matches) {
1050
+ if (seen.has(match.label)) {
1051
+ continue;
1052
+ }
1053
+ seen.add(match.label);
1054
+ labels.push(match.label);
1055
+ }
1056
+ return labels;
1057
+ }
1058
+ function summarizeLineRange2(matches) {
1059
+ if (matches.length === 0) {
1060
+ return {};
1061
+ }
1062
+ return {
1063
+ startLine: Math.min(...matches.map((match) => match.startLine)),
1064
+ endLine: Math.max(...matches.map((match) => match.endLine))
1065
+ };
1066
+ }
1067
+ function buildSkippedPatterns2(matches) {
1068
+ if (matches.length === 0) {
1069
+ return void 0;
1070
+ }
1071
+ return matches.map((match) => ({
1072
+ label: match.label,
1073
+ zoneType: match.zoneType,
1074
+ startLine: match.startLine,
1075
+ endLine: match.endLine
1076
+ }));
1077
+ }
1078
+ function isSuppressed(context, checkId) {
1079
+ return context.suppressedCheckIds.has(checkId);
1080
+ }
1081
+ function runZoneAwareSecurityCheck(context, zones, options) {
1082
+ if (isSuppressed(context, options.checkId)) {
1083
+ return null;
1084
+ }
1085
+ const matches = collectZoneAwareMatches(zones, options.patterns);
1086
+ const labels = uniqueLabels2(matches.flagged);
1087
+ const skippedPatterns = buildSkippedPatterns2(matches.skipped);
1088
+ if (labels.length > 0) {
1089
+ return {
1090
+ id: options.id,
1091
+ checkId: options.checkId,
1092
+ title: options.title,
1093
+ status: options.statusOnMatch,
1094
+ message: `${options.matchMessagePrefix}: ${labels.join(", ")}.`,
1095
+ suggestion: options.suggestion,
1096
+ ...summarizeLineRange2(matches.flagged),
1097
+ skippedPatterns
1098
+ };
1099
+ }
1100
+ return {
1101
+ id: options.id,
1102
+ checkId: options.checkId,
1103
+ title: options.title,
1104
+ status: "pass",
1105
+ message: options.passMessage,
1106
+ skippedPatterns
1107
+ };
787
1108
  }
788
1109
  function runSecurityChecks(context) {
789
1110
  const issues = [];
790
1111
  const skillText = context.skill.raw;
791
- const dangerousCommandHits = collectMatches(skillText, DANGEROUS_COMMAND_PATTERNS);
792
- if (dangerousCommandHits.length > 0) {
793
- issues.push({
794
- id: "security.dangerous-command-patterns",
795
- checkId: "security:dangerous-commands",
796
- title: "Dangerous Command Patterns",
797
- status: "fail",
798
- message: `Potentially dangerous command instruction patterns found: ${dangerousCommandHits.join(", ")}.`,
799
- suggestion: "Remove destructive/pipe-exec command examples or wrap them with explicit safety constraints."
800
- });
801
- } else {
802
- issues.push({
803
- id: "security.dangerous-command-patterns",
804
- checkId: "security:dangerous-commands",
805
- title: "Dangerous Command Patterns",
806
- status: "pass",
807
- message: "No high-risk destructive or direct pipe-to-shell patterns detected."
808
- });
809
- }
810
- const exfiltrationHits = collectMatches(skillText, EXFILTRATION_PATTERNS);
811
- if (exfiltrationHits.length > 0) {
812
- issues.push({
813
- id: "security.exfiltration-patterns",
814
- checkId: "security:exfiltration",
815
- title: "Sensitive Data Exfiltration",
816
- status: "fail",
817
- message: `Possible sensitive data exfiltration patterns found: ${exfiltrationHits.join(", ")}.`,
818
- suggestion: "Remove instructions that access or transmit secrets/credential files."
819
- });
820
- } else {
821
- issues.push({
822
- id: "security.exfiltration-patterns",
823
- checkId: "security:exfiltration",
824
- title: "Sensitive Data Exfiltration",
825
- status: "pass",
826
- message: "No obvious credential access/exfiltration instructions detected."
827
- });
828
- }
829
- const escalationHits = collectMatches(skillText, PRIVILEGE_ESCALATION_PATTERNS);
830
- if (escalationHits.length > 0) {
831
- issues.push({
832
- id: "security.privilege-escalation",
833
- checkId: "security:privilege-escalation",
834
- title: "Privilege Escalation Language",
835
- status: "warn",
836
- message: `Potentially risky privilege/execution language detected: ${escalationHits.join(", ")}.`,
837
- suggestion: "Prefer least-privilege execution and explicit approval steps for elevated commands."
838
- });
839
- } else {
840
- issues.push({
841
- id: "security.privilege-escalation",
842
- checkId: "security:privilege-escalation",
843
- title: "Privilege Escalation Language",
844
- status: "pass",
845
- message: "No obvious privilege-escalation language detected."
846
- });
1112
+ const needsZoneParsing = !isSuppressed(context, "security:dangerous-commands") || !isSuppressed(context, "security:exfiltration") || !isSuppressed(context, "security:privilege-escalation");
1113
+ const zones = needsZoneParsing ? parseZones(skillText) : [];
1114
+ const dangerousCommandsIssue = runZoneAwareSecurityCheck(context, zones, {
1115
+ id: "security.dangerous-command-patterns",
1116
+ checkId: "security:dangerous-commands",
1117
+ title: "Dangerous Command Patterns",
1118
+ statusOnMatch: "fail",
1119
+ patterns: DANGEROUS_COMMAND_PATTERNS,
1120
+ matchMessagePrefix: "Potentially dangerous command instruction patterns found",
1121
+ passMessage: "No high-risk destructive or direct pipe-to-shell patterns detected.",
1122
+ suggestion: "Remove destructive/pipe-exec command examples or wrap them with explicit safety constraints."
1123
+ });
1124
+ if (dangerousCommandsIssue) {
1125
+ issues.push(dangerousCommandsIssue);
1126
+ }
1127
+ const exfiltrationIssue = runZoneAwareSecurityCheck(context, zones, {
1128
+ id: "security.exfiltration-patterns",
1129
+ checkId: "security:exfiltration",
1130
+ title: "Sensitive Data Exfiltration",
1131
+ statusOnMatch: "fail",
1132
+ patterns: EXFILTRATION_PATTERNS,
1133
+ matchMessagePrefix: "Possible sensitive data exfiltration patterns found",
1134
+ passMessage: "No obvious credential access/exfiltration instructions detected.",
1135
+ suggestion: "Remove instructions that access or transmit secrets/credential files."
1136
+ });
1137
+ if (exfiltrationIssue) {
1138
+ issues.push(exfiltrationIssue);
1139
+ }
1140
+ const privilegeEscalationIssue = runZoneAwareSecurityCheck(context, zones, {
1141
+ id: "security.privilege-escalation",
1142
+ checkId: "security:privilege-escalation",
1143
+ title: "Privilege Escalation Language",
1144
+ statusOnMatch: "warn",
1145
+ patterns: PRIVILEGE_ESCALATION_PATTERNS,
1146
+ matchMessagePrefix: "Potentially risky privilege/execution language detected",
1147
+ passMessage: "No obvious privilege-escalation language detected.",
1148
+ suggestion: "Prefer least-privilege execution and explicit approval steps for elevated commands."
1149
+ });
1150
+ if (privilegeEscalationIssue) {
1151
+ issues.push(privilegeEscalationIssue);
847
1152
  }
848
- const hasShellActivity = SHELL_ACTIVITY_PATTERNS.some((pattern) => pattern.test(skillText));
849
- if (hasShellActivity && !SAFETY_GUARDRAIL_PATTERN.test(skillText)) {
850
- issues.push({
851
- id: "security.safety-guardrails",
852
- checkId: "security:missing-guardrails",
853
- title: "Execution Safety Guardrails",
854
- status: "warn",
855
- message: "Shell/tool execution is present, but no explicit safety guardrails were detected.",
856
- suggestion: "Add guidance such as approval requirements, dry-run mode, scope checks, and redaction rules."
857
- });
858
- } else {
859
- issues.push({
860
- id: "security.safety-guardrails",
861
- checkId: "security:missing-guardrails",
862
- title: "Execution Safety Guardrails",
863
- status: "pass",
864
- message: hasShellActivity ? "Shell/tool execution instructions include at least one safety guardrail." : "No shell/tool execution instructions detected."
865
- });
1153
+ if (!isSuppressed(context, "security:missing-guardrails")) {
1154
+ const hasShellActivity = SHELL_ACTIVITY_PATTERNS.some((pattern) => pattern.test(skillText));
1155
+ if (hasShellActivity && !SAFETY_GUARDRAIL_PATTERN.test(skillText)) {
1156
+ issues.push({
1157
+ id: "security.safety-guardrails",
1158
+ checkId: "security:missing-guardrails",
1159
+ title: "Execution Safety Guardrails",
1160
+ status: "warn",
1161
+ message: "Shell/tool execution is present, but no explicit safety guardrails were detected.",
1162
+ suggestion: "Add guidance such as approval requirements, dry-run mode, scope checks, and redaction rules."
1163
+ });
1164
+ } else {
1165
+ issues.push({
1166
+ id: "security.safety-guardrails",
1167
+ checkId: "security:missing-guardrails",
1168
+ title: "Execution Safety Guardrails",
1169
+ status: "pass",
1170
+ message: hasShellActivity ? "Shell/tool execution instructions include at least one safety guardrail." : "No shell/tool execution instructions detected."
1171
+ });
1172
+ }
866
1173
  }
867
1174
  return issues;
868
1175
  }
@@ -1038,9 +1345,11 @@ function lintFails(report, failOn) {
1038
1345
  async function runLinter(inputPath, options = {}) {
1039
1346
  const skill = await loadSkillFile(inputPath);
1040
1347
  const frontmatter = parseFrontmatter(skill.raw);
1348
+ const suppressedCheckIds = new Set(options.suppress ?? []);
1041
1349
  const context = {
1042
1350
  skill,
1043
- frontmatter
1351
+ frontmatter,
1352
+ suppressedCheckIds
1044
1353
  };
1045
1354
  const issues = [];
1046
1355
  issues.push(...runFrontmatterChecks(context));
@@ -1049,8 +1358,7 @@ async function runLinter(inputPath, options = {}) {
1049
1358
  issues.push(...runSecurityChecks(context));
1050
1359
  issues.push(...await runDisclosureChecks(context));
1051
1360
  issues.push(...runCompatibilityChecks(context));
1052
- const suppress = new Set(options.suppress ?? []);
1053
- const filteredIssues = issues.filter((issue) => !suppress.has(issue.checkId));
1361
+ const filteredIssues = issues.filter((issue) => !suppressedCheckIds.has(issue.checkId));
1054
1362
  return {
1055
1363
  target: inputPath,
1056
1364
  issues: filteredIssues,
@@ -1070,6 +1378,14 @@ function renderIssueLine(issue, c) {
1070
1378
  return ` ${label} ${issue.title}
1071
1379
  ${issue.message}${detail}`;
1072
1380
  }
1381
+ function countSkippedSecurityPatterns(issues) {
1382
+ return issues.reduce((total, issue) => {
1383
+ if (!issue.checkId.startsWith("security:")) {
1384
+ return total;
1385
+ }
1386
+ return total + (issue.skippedPatterns?.length ?? 0);
1387
+ }, 0);
1388
+ }
1073
1389
  function renderLintReport(report, enableColor) {
1074
1390
  const c = getChalkInstance(enableColor);
1075
1391
  const { passed, warnings, failures, total } = report.summary;
@@ -1082,8 +1398,11 @@ function renderLintReport(report, enableColor) {
1082
1398
  `\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518`
1083
1399
  ];
1084
1400
  const renderedIssues = report.issues.map((issue) => renderIssueLine(issue, c)).join("\n");
1401
+ const skippedSecurityPatterns = countSkippedSecurityPatterns(report.issues);
1402
+ const infoLine = skippedSecurityPatterns > 0 ? `
1403
+ ${c.cyan("\u2139")} ${skippedSecurityPatterns} security pattern(s) found in code examples/comments (not flagged)` : "";
1085
1404
  return `${headerLines.join("\n")}
1086
- ${renderedIssues}`;
1405
+ ${renderedIssues}${infoLine}`;
1087
1406
  }
1088
1407
  function formatPercent(value) {
1089
1408
  return `${(value * 100).toFixed(1)}%`;
@@ -1171,6 +1490,10 @@ function renderCheckReport(result, enableColor, verbose) {
1171
1490
  for (const issue of lintIssues) {
1172
1491
  lines.push(renderIssueLine(issue, c));
1173
1492
  }
1493
+ const skippedSecurityPatterns = countSkippedSecurityPatterns(result.lint.issues);
1494
+ if (skippedSecurityPatterns > 0) {
1495
+ lines.push(` ${c.cyan("\u2139")} ${skippedSecurityPatterns} security pattern(s) found in code examples/comments (not flagged)`);
1496
+ }
1174
1497
  lines.push("");
1175
1498
  lines.push("Trigger");
1176
1499
  if (result.trigger) {
@@ -1401,23 +1724,28 @@ var FAKE_SKILLS = [
1401
1724
  { name: "test-generator", description: "Generates unit and integration test cases from feature requirements." },
1402
1725
  { name: "prompt-tuner", description: "Improves prompts for reliability, formatting, and failure handling." }
1403
1726
  ];
1404
- function createSeededRandom(seed) {
1405
- let state = seed >>> 0;
1727
+ function mulberry32(seed) {
1406
1728
  return () => {
1407
- state = state * 1664525 + 1013904223 >>> 0;
1408
- return state / 4294967296;
1729
+ seed |= 0;
1730
+ seed = seed + 1831565813 | 0;
1731
+ let t = Math.imul(seed ^ seed >>> 15, 1 | seed);
1732
+ t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t;
1733
+ return ((t ^ t >>> 14) >>> 0) / 4294967296;
1409
1734
  };
1410
1735
  }
1411
- function shuffle(values, random = Math.random) {
1736
+ function createRng(seed) {
1737
+ return seed !== void 0 ? mulberry32(seed) : Math.random;
1738
+ }
1739
+ function shuffle(values, rng) {
1412
1740
  const copy = [...values];
1413
1741
  for (let index = copy.length - 1; index > 0; index -= 1) {
1414
- const swapIndex = Math.floor(random() * (index + 1));
1742
+ const swapIndex = Math.floor(rng() * (index + 1));
1415
1743
  [copy[index], copy[swapIndex]] = [copy[swapIndex], copy[index]];
1416
1744
  }
1417
1745
  return copy;
1418
1746
  }
1419
- function sample(values, count, random = Math.random) {
1420
- return shuffle(values, random).slice(0, Math.max(0, Math.min(count, values.length)));
1747
+ function sample(values, count, rng) {
1748
+ return shuffle(values, rng).slice(0, Math.max(0, Math.min(count, values.length)));
1421
1749
  }
1422
1750
  function parseJsonArrayFromModelOutput(raw) {
1423
1751
  const trimmed = raw.trim();
@@ -1529,20 +1857,20 @@ function buildSuggestions(metrics) {
1529
1857
  return suggestions;
1530
1858
  }
1531
1859
  async function runTriggerTest(skill, options) {
1532
- const random = options.seed === void 0 ? Math.random : createSeededRandom(options.seed);
1860
+ const rng = createRng(options.seed);
1533
1861
  const queries = options.queries && options.queries.length > 0 ? triggerQueryArraySchema.parse(options.queries) : await generateQueriesWithModel(skill, options.provider, options.model, options.numQueries);
1534
1862
  const results = [];
1535
1863
  const skillName = skill.frontmatter.name;
1536
1864
  for (const testQuery of queries) {
1537
- const fakeCount = 5 + Math.floor(random() * 4);
1538
- const fakeSkills = sample(FAKE_SKILLS, fakeCount, random);
1865
+ const fakeCount = 5 + Math.floor(rng() * 5);
1866
+ const fakeSkills = sample(FAKE_SKILLS, fakeCount, rng);
1539
1867
  const allSkills = shuffle([
1540
1868
  ...fakeSkills,
1541
1869
  {
1542
1870
  name: skill.frontmatter.name,
1543
1871
  description: skill.frontmatter.description
1544
1872
  }
1545
- ], random);
1873
+ ], rng);
1546
1874
  const skillListText = allSkills.map((entry) => `- ${entry.name}: ${entry.description}`).join("\n");
1547
1875
  const systemPrompt = [
1548
1876
  "You are selecting one skill to activate for a user query.",
@@ -1571,6 +1899,7 @@ async function runTriggerTest(skill, options) {
1571
1899
  skillName,
1572
1900
  model: options.model,
1573
1901
  provider: options.provider.name,
1902
+ seed: options.seed,
1574
1903
  queries,
1575
1904
  cases: results,
1576
1905
  metrics,
@@ -2221,6 +2550,7 @@ function createProvider(providerName, apiKeyOverride) {
2221
2550
  var triggerCliSchema = z7.object({
2222
2551
  queries: z7.string().optional(),
2223
2552
  saveQueries: z7.string().optional(),
2553
+ seed: z7.number().int().optional(),
2224
2554
  verbose: z7.boolean().optional(),
2225
2555
  apiKey: z7.string().optional()
2226
2556
  });
@@ -2232,6 +2562,13 @@ function resolveModel(provider, model) {
2232
2562
  }
2233
2563
  return model;
2234
2564
  }
2565
+ function renderTriggerOutputWithSeed(output, seed) {
2566
+ if (seed === void 0) {
2567
+ return output;
2568
+ }
2569
+ return `${output}
2570
+ Seed: ${seed}`;
2571
+ }
2235
2572
  async function handleTriggerCommand(targetPath, options) {
2236
2573
  const spinner = options.json || !process.stdout.isTTY ? null : ora("Preparing trigger evaluation...").start();
2237
2574
  try {
@@ -2269,7 +2606,7 @@ async function handleTriggerCommand(targetPath, options) {
2269
2606
  if (options.json) {
2270
2607
  writeResult(result, true);
2271
2608
  } else {
2272
- writeResult(renderTriggerReport(result, options.color, options.verbose), false);
2609
+ writeResult(renderTriggerOutputWithSeed(renderTriggerReport(result, options.color, options.verbose), result.seed), false);
2273
2610
  }
2274
2611
  } catch (error) {
2275
2612
  spinner?.stop();
@@ -2278,7 +2615,7 @@ async function handleTriggerCommand(targetPath, options) {
2278
2615
  }
2279
2616
  }
2280
2617
  function registerTriggerCommand(program) {
2281
- program.command("trigger").description("Evaluate whether a skill description triggers correctly.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--model <model>", "Model to use").option("--provider <provider>", "LLM provider: anthropic|openai").option("--queries <path>", "Path to custom test queries JSON").option("--num-queries <n>", "Number of auto-generated queries", (value) => Number.parseInt(value, 10)).option("--save-queries <path>", "Save generated queries to a JSON file").option("--api-key <key>", "API key override").option("--verbose", "Show full model decisions").action(async (targetPath, _commandOptions, command) => {
2618
+ program.command("trigger").description("Evaluate whether a skill description triggers correctly.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--model <model>", "Model to use").option("--provider <provider>", "LLM provider: anthropic|openai").option("--queries <path>", "Path to custom test queries JSON").option("--num-queries <n>", "Number of auto-generated queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--save-queries <path>", "Save generated queries to a JSON file").option("--api-key <key>", "API key override").option("--verbose", "Show full model decisions").action(async (targetPath, _commandOptions, command) => {
2282
2619
  const globalOptions = getGlobalCliOptions(command);
2283
2620
  const config = getResolvedConfig(command);
2284
2621
  const parsedCli = triggerCliSchema.safeParse(command.opts());
@@ -2294,7 +2631,7 @@ function registerTriggerCommand(program) {
2294
2631
  queries: parsedCli.data.queries,
2295
2632
  numQueries: config.trigger.numQueries,
2296
2633
  saveQueries: parsedCli.data.saveQueries,
2297
- seed: config.trigger.seed,
2634
+ seed: parsedCli.data.seed ?? config.trigger.seed,
2298
2635
  verbose: Boolean(parsedCli.data.verbose),
2299
2636
  apiKey: parsedCli.data.apiKey
2300
2637
  });
@@ -2482,6 +2819,7 @@ var checkCliSchema = z9.object({
2482
2819
  graderModel: z9.string().optional(),
2483
2820
  apiKey: z9.string().optional(),
2484
2821
  queries: z9.string().optional(),
2822
+ seed: z9.number().int().optional(),
2485
2823
  prompts: z9.string().optional(),
2486
2824
  saveResults: z9.string().optional(),
2487
2825
  continueOnLintFail: z9.boolean().optional(),
@@ -2495,6 +2833,19 @@ function resolveModel3(provider, model) {
2495
2833
  }
2496
2834
  return model;
2497
2835
  }
2836
+ function renderCheckOutputWithSeed(output, seed) {
2837
+ if (seed === void 0) {
2838
+ return output;
2839
+ }
2840
+ const lines = output.split("\n");
2841
+ const triggerIndex = lines.indexOf("Trigger");
2842
+ if (triggerIndex === -1) {
2843
+ return `${output}
2844
+ Seed: ${seed}`;
2845
+ }
2846
+ lines.splice(triggerIndex + 1, 0, `Seed: ${seed}`);
2847
+ return lines.join("\n");
2848
+ }
2498
2849
  async function handleCheckCommand(targetPath, options, command) {
2499
2850
  const spinner = options.json || !process.stdout.isTTY ? null : ora3("Preparing check run...").start();
2500
2851
  try {
@@ -2557,7 +2908,10 @@ async function handleCheckCommand(targetPath, options, command) {
2557
2908
  if (options.json) {
2558
2909
  writeResult(result, true);
2559
2910
  } else {
2560
- writeResult(renderCheckReport(result, options.color, options.verbose), false);
2911
+ writeResult(
2912
+ renderCheckOutputWithSeed(renderCheckReport(result, options.color, options.verbose), result.trigger?.seed),
2913
+ false
2914
+ );
2561
2915
  }
2562
2916
  process.exitCode = result.gates.overallPassed ? 0 : 1;
2563
2917
  } catch (error) {
@@ -2567,7 +2921,7 @@ async function handleCheckCommand(targetPath, options, command) {
2567
2921
  }
2568
2922
  }
2569
2923
  function registerCheckCommand(program) {
2570
- program.command("check").description("Run lint + trigger + eval with threshold-based quality gates.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--provider <provider>", "LLM provider: anthropic|openai").option("--model <model>", "Model for trigger/eval runs").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--api-key <key>", "API key override").option("--queries <path>", "Path to custom trigger queries JSON").option("--num-queries <n>", "Number of auto-generated trigger queries", (value) => Number.parseInt(value, 10)).option("--prompts <path>", "Path to eval prompts JSON").option("--min-f1 <n>", "Minimum required trigger F1 score (0-1)", (value) => Number.parseFloat(value)).option("--min-assert-pass-rate <n>", "Minimum required eval assertion pass rate (0-1)", (value) => Number.parseFloat(value)).option("--save-results <path>", "Save combined check results to JSON").option("--continue-on-lint-fail", "Continue trigger/eval stages even when lint has failures").option("--verbose", "Show detailed trigger/eval output sections").action(async (targetPath, _commandOptions, command) => {
2924
+ program.command("check").description("Run lint + trigger + eval with threshold-based quality gates.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--provider <provider>", "LLM provider: anthropic|openai").option("--model <model>", "Model for trigger/eval runs").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--api-key <key>", "API key override").option("--queries <path>", "Path to custom trigger queries JSON").option("--num-queries <n>", "Number of auto-generated trigger queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--prompts <path>", "Path to eval prompts JSON").option("--min-f1 <n>", "Minimum required trigger F1 score (0-1)", (value) => Number.parseFloat(value)).option("--min-assert-pass-rate <n>", "Minimum required eval assertion pass rate (0-1)", (value) => Number.parseFloat(value)).option("--save-results <path>", "Save combined check results to JSON").option("--continue-on-lint-fail", "Continue trigger/eval stages even when lint has failures").option("--verbose", "Show detailed trigger/eval output sections").action(async (targetPath, _commandOptions, command) => {
2571
2925
  const globalOptions = getGlobalCliOptions(command);
2572
2926
  const config = getResolvedConfig(command);
2573
2927
  const parsedCli = checkCliSchema.safeParse(command.opts());
@@ -2592,7 +2946,7 @@ function registerCheckCommand(program) {
2592
2946
  numRuns: config.eval.numRuns,
2593
2947
  lintFailOn: config.lint.failOn,
2594
2948
  lintSuppress: config.lint.suppress,
2595
- triggerSeed: config.trigger.seed,
2949
+ triggerSeed: parsedCli.data.seed ?? config.trigger.seed,
2596
2950
  saveResults: parsedCli.data.saveResults,
2597
2951
  continueOnLintFail: Boolean(parsedCli.data.continueOnLintFail),
2598
2952
  verbose: Boolean(parsedCli.data.verbose)