skilltest 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -0
- package/dist/index.js +479 -125
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -239,6 +239,171 @@ function runCompatibilityChecks(context) {
|
|
|
239
239
|
return issues;
|
|
240
240
|
}
|
|
241
241
|
|
|
242
|
+
// src/core/linter/markdown-zones.ts
|
|
243
|
+
function splitLines(raw) {
|
|
244
|
+
return raw.split(/\r?\n/);
|
|
245
|
+
}
|
|
246
|
+
function stripTopFrontmatter(raw) {
|
|
247
|
+
const lines = splitLines(raw);
|
|
248
|
+
if (lines[0] !== "---") {
|
|
249
|
+
return {
|
|
250
|
+
bodyLines: lines,
|
|
251
|
+
bodyStartLine: 1
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
for (let index = 1; index < lines.length; index += 1) {
|
|
255
|
+
if (lines[index] === "---") {
|
|
256
|
+
return {
|
|
257
|
+
bodyLines: lines.slice(index + 1),
|
|
258
|
+
bodyStartLine: index + 2
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
return {
|
|
263
|
+
bodyLines: lines,
|
|
264
|
+
bodyStartLine: 1
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
function matchCodeFenceOpener(line) {
|
|
268
|
+
const match = line.match(/^\s*(`{3,}|~{3,})(.*)$/);
|
|
269
|
+
return match?.[1] ?? null;
|
|
270
|
+
}
|
|
271
|
+
function isExactCodeFenceCloser(line, delimiter) {
|
|
272
|
+
return line.trim() === delimiter;
|
|
273
|
+
}
|
|
274
|
+
function appendZone(zones, type, content, startLine, endLine) {
|
|
275
|
+
if (content === "") {
|
|
276
|
+
return;
|
|
277
|
+
}
|
|
278
|
+
const previous = zones[zones.length - 1];
|
|
279
|
+
if (previous && previous.type === type && startLine <= previous.endLine + 1) {
|
|
280
|
+
const separator = startLine > previous.endLine ? "\n" : "";
|
|
281
|
+
previous.content += `${separator}${content}`;
|
|
282
|
+
previous.endLine = endLine;
|
|
283
|
+
return;
|
|
284
|
+
}
|
|
285
|
+
zones.push({
|
|
286
|
+
type,
|
|
287
|
+
content,
|
|
288
|
+
startLine,
|
|
289
|
+
endLine
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
function appendToOpenZone(zone, content, lineNumber) {
|
|
293
|
+
if (content === "") {
|
|
294
|
+
if (lineNumber > zone.endLine) {
|
|
295
|
+
zone.content += "\n";
|
|
296
|
+
zone.endLine = lineNumber;
|
|
297
|
+
}
|
|
298
|
+
return;
|
|
299
|
+
}
|
|
300
|
+
const separator = lineNumber > zone.endLine ? "\n" : "";
|
|
301
|
+
zone.content += `${separator}${content}`;
|
|
302
|
+
zone.endLine = lineNumber;
|
|
303
|
+
}
|
|
304
|
+
function addInlineAwareText(zones, text, lineNumber, baseType) {
|
|
305
|
+
if (text === "") {
|
|
306
|
+
return;
|
|
307
|
+
}
|
|
308
|
+
let cursor = 0;
|
|
309
|
+
while (cursor < text.length) {
|
|
310
|
+
const inlineStart = text.indexOf("`", cursor);
|
|
311
|
+
if (inlineStart === -1) {
|
|
312
|
+
appendZone(zones, baseType, text.slice(cursor), lineNumber, lineNumber);
|
|
313
|
+
return;
|
|
314
|
+
}
|
|
315
|
+
if (inlineStart > cursor) {
|
|
316
|
+
appendZone(zones, baseType, text.slice(cursor, inlineStart), lineNumber, lineNumber);
|
|
317
|
+
}
|
|
318
|
+
const inlineEnd = text.indexOf("`", inlineStart + 1);
|
|
319
|
+
if (inlineEnd === -1) {
|
|
320
|
+
appendZone(zones, baseType, text.slice(inlineStart), lineNumber, lineNumber);
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
323
|
+
appendZone(zones, "inline-code", text.slice(inlineStart, inlineEnd + 1), lineNumber, lineNumber);
|
|
324
|
+
cursor = inlineEnd + 1;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
function parseZones(raw) {
|
|
328
|
+
const { bodyLines, bodyStartLine } = stripTopFrontmatter(raw);
|
|
329
|
+
const zones = [];
|
|
330
|
+
let openCodeFence = null;
|
|
331
|
+
let openComment = null;
|
|
332
|
+
for (const [index, line] of bodyLines.entries()) {
|
|
333
|
+
const lineNumber = bodyStartLine + index;
|
|
334
|
+
if (openCodeFence) {
|
|
335
|
+
appendToOpenZone(openCodeFence.zone, line, lineNumber);
|
|
336
|
+
if (isExactCodeFenceCloser(line, openCodeFence.delimiter)) {
|
|
337
|
+
zones.push(openCodeFence.zone);
|
|
338
|
+
openCodeFence = null;
|
|
339
|
+
}
|
|
340
|
+
continue;
|
|
341
|
+
}
|
|
342
|
+
if (!openComment) {
|
|
343
|
+
const fenceDelimiter = matchCodeFenceOpener(line);
|
|
344
|
+
if (fenceDelimiter) {
|
|
345
|
+
openCodeFence = {
|
|
346
|
+
delimiter: fenceDelimiter,
|
|
347
|
+
zone: {
|
|
348
|
+
type: "code-fence",
|
|
349
|
+
content: line,
|
|
350
|
+
startLine: lineNumber,
|
|
351
|
+
endLine: lineNumber
|
|
352
|
+
}
|
|
353
|
+
};
|
|
354
|
+
continue;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
const baseType = /^\s*>/.test(line) ? "blockquote" : "prose";
|
|
358
|
+
let cursor = 0;
|
|
359
|
+
while (cursor < line.length || openComment) {
|
|
360
|
+
if (openComment) {
|
|
361
|
+
const closeIndex = line.indexOf("-->", cursor);
|
|
362
|
+
if (closeIndex === -1) {
|
|
363
|
+
appendToOpenZone(openComment, line.slice(cursor), lineNumber);
|
|
364
|
+
cursor = line.length;
|
|
365
|
+
break;
|
|
366
|
+
}
|
|
367
|
+
appendToOpenZone(openComment, line.slice(cursor, closeIndex + 3), lineNumber);
|
|
368
|
+
zones.push(openComment);
|
|
369
|
+
openComment = null;
|
|
370
|
+
cursor = closeIndex + 3;
|
|
371
|
+
continue;
|
|
372
|
+
}
|
|
373
|
+
if (cursor >= line.length) {
|
|
374
|
+
break;
|
|
375
|
+
}
|
|
376
|
+
const commentStart = line.indexOf("<!--", cursor);
|
|
377
|
+
const textEnd = commentStart === -1 ? line.length : commentStart;
|
|
378
|
+
if (textEnd > cursor) {
|
|
379
|
+
addInlineAwareText(zones, line.slice(cursor, textEnd), lineNumber, baseType);
|
|
380
|
+
}
|
|
381
|
+
if (commentStart === -1) {
|
|
382
|
+
break;
|
|
383
|
+
}
|
|
384
|
+
const commentEnd = line.indexOf("-->", commentStart + 4);
|
|
385
|
+
if (commentEnd === -1) {
|
|
386
|
+
openComment = {
|
|
387
|
+
type: "html-comment",
|
|
388
|
+
content: line.slice(commentStart),
|
|
389
|
+
startLine: lineNumber,
|
|
390
|
+
endLine: lineNumber
|
|
391
|
+
};
|
|
392
|
+
break;
|
|
393
|
+
}
|
|
394
|
+
appendZone(zones, "html-comment", line.slice(commentStart, commentEnd + 3), lineNumber, lineNumber);
|
|
395
|
+
cursor = commentEnd + 3;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
if (openComment) {
|
|
399
|
+
zones.push(openComment);
|
|
400
|
+
}
|
|
401
|
+
if (openCodeFence) {
|
|
402
|
+
zones.push(openCodeFence.zone);
|
|
403
|
+
}
|
|
404
|
+
return zones;
|
|
405
|
+
}
|
|
406
|
+
|
|
242
407
|
// src/core/linter/content.ts
|
|
243
408
|
var VAGUE_PATTERNS = [
|
|
244
409
|
/\bdo something appropriate\b/i,
|
|
@@ -255,6 +420,102 @@ var SECRET_PATTERNS = [
|
|
|
255
420
|
{ label: "Slack token", regex: /\bxox[baprs]-[A-Za-z0-9-]{20,}\b/ },
|
|
256
421
|
{ label: "Generic private key header", regex: /-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----/ }
|
|
257
422
|
];
|
|
423
|
+
function summarizeLineRange(matches) {
|
|
424
|
+
if (matches.length === 0) {
|
|
425
|
+
return {};
|
|
426
|
+
}
|
|
427
|
+
return {
|
|
428
|
+
startLine: Math.min(...matches.map((match) => match.startLine)),
|
|
429
|
+
endLine: Math.max(...matches.map((match) => match.endLine))
|
|
430
|
+
};
|
|
431
|
+
}
|
|
432
|
+
function uniqueLabels(matches) {
|
|
433
|
+
const labels = [];
|
|
434
|
+
const seen = /* @__PURE__ */ new Set();
|
|
435
|
+
for (const match of matches) {
|
|
436
|
+
if (seen.has(match.label)) {
|
|
437
|
+
continue;
|
|
438
|
+
}
|
|
439
|
+
seen.add(match.label);
|
|
440
|
+
labels.push(match.label);
|
|
441
|
+
}
|
|
442
|
+
return labels;
|
|
443
|
+
}
|
|
444
|
+
function collectSecretMatches(zones) {
|
|
445
|
+
const prose = [];
|
|
446
|
+
const nonProse = [];
|
|
447
|
+
for (const zone of zones) {
|
|
448
|
+
for (const pattern of SECRET_PATTERNS) {
|
|
449
|
+
if (!pattern.regex.test(zone.content)) {
|
|
450
|
+
continue;
|
|
451
|
+
}
|
|
452
|
+
const occurrence = {
|
|
453
|
+
label: pattern.label,
|
|
454
|
+
zoneType: zone.type,
|
|
455
|
+
startLine: zone.startLine,
|
|
456
|
+
endLine: zone.endLine
|
|
457
|
+
};
|
|
458
|
+
if (zone.type === "prose") {
|
|
459
|
+
prose.push(occurrence);
|
|
460
|
+
} else {
|
|
461
|
+
nonProse.push(occurrence);
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
return { prose, nonProse };
|
|
466
|
+
}
|
|
467
|
+
function buildSkippedPatterns(matches) {
|
|
468
|
+
if (matches.length === 0) {
|
|
469
|
+
return void 0;
|
|
470
|
+
}
|
|
471
|
+
return matches.map((match) => ({
|
|
472
|
+
label: match.label,
|
|
473
|
+
zoneType: match.zoneType,
|
|
474
|
+
startLine: match.startLine,
|
|
475
|
+
endLine: match.endLine
|
|
476
|
+
}));
|
|
477
|
+
}
|
|
478
|
+
function buildSecretsIssue(context) {
|
|
479
|
+
if (context.suppressedCheckIds.has("content:secrets")) {
|
|
480
|
+
return null;
|
|
481
|
+
}
|
|
482
|
+
const { prose, nonProse } = collectSecretMatches(parseZones(context.skill.raw));
|
|
483
|
+
const proseLabels = uniqueLabels(prose);
|
|
484
|
+
const nonProseLabels = uniqueLabels(nonProse);
|
|
485
|
+
const skippedPatterns = buildSkippedPatterns(nonProse);
|
|
486
|
+
if (proseLabels.length > 0) {
|
|
487
|
+
return {
|
|
488
|
+
id: "content.secrets",
|
|
489
|
+
checkId: "content:secrets",
|
|
490
|
+
title: "Hardcoded Secrets",
|
|
491
|
+
status: "fail",
|
|
492
|
+
message: `Potential secrets detected (${proseLabels.join(", ")}).`,
|
|
493
|
+
suggestion: "Remove secrets from skill files and use environment variables or secret managers.",
|
|
494
|
+
...summarizeLineRange(prose),
|
|
495
|
+
skippedPatterns
|
|
496
|
+
};
|
|
497
|
+
}
|
|
498
|
+
if (nonProseLabels.length > 0) {
|
|
499
|
+
const codeFenceOnly = nonProse.every((match) => match.zoneType === "code-fence");
|
|
500
|
+
return {
|
|
501
|
+
id: "content.secrets",
|
|
502
|
+
checkId: "content:secrets",
|
|
503
|
+
title: "Hardcoded Secrets",
|
|
504
|
+
status: "warn",
|
|
505
|
+
message: codeFenceOnly ? `Possible secret in code example \u2014 verify this is a placeholder, not a real key (${nonProseLabels.join(", ")}).` : `Possible secrets found outside prose instructions (${nonProseLabels.join(", ")}). Verify these are placeholders, not real credentials.`,
|
|
506
|
+
suggestion: "Replace real-looking credentials in examples with explicit placeholders such as YOUR_API_KEY.",
|
|
507
|
+
...summarizeLineRange(nonProse),
|
|
508
|
+
skippedPatterns
|
|
509
|
+
};
|
|
510
|
+
}
|
|
511
|
+
return {
|
|
512
|
+
id: "content.secrets",
|
|
513
|
+
checkId: "content:secrets",
|
|
514
|
+
title: "Hardcoded Secrets",
|
|
515
|
+
status: "pass",
|
|
516
|
+
message: "No obvious API keys or secrets patterns were detected."
|
|
517
|
+
};
|
|
518
|
+
}
|
|
258
519
|
function runContentChecks(context) {
|
|
259
520
|
const issues = [];
|
|
260
521
|
const body = context.frontmatter.content;
|
|
@@ -334,29 +595,9 @@ function runContentChecks(context) {
|
|
|
334
595
|
message: "No angle bracket tokens detected in frontmatter."
|
|
335
596
|
});
|
|
336
597
|
}
|
|
337
|
-
const
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
secretHits.add(pattern.label);
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
if (secretHits.size > 0) {
|
|
344
|
-
issues.push({
|
|
345
|
-
id: "content.secrets",
|
|
346
|
-
checkId: "content:secrets",
|
|
347
|
-
title: "Hardcoded Secrets",
|
|
348
|
-
status: "fail",
|
|
349
|
-
message: `Potential secrets detected (${Array.from(secretHits).join(", ")}).`,
|
|
350
|
-
suggestion: "Remove secrets from skill files and use environment variables or secret managers."
|
|
351
|
-
});
|
|
352
|
-
} else {
|
|
353
|
-
issues.push({
|
|
354
|
-
id: "content.secrets",
|
|
355
|
-
checkId: "content:secrets",
|
|
356
|
-
title: "Hardcoded Secrets",
|
|
357
|
-
status: "pass",
|
|
358
|
-
message: "No obvious API keys or secrets patterns were detected."
|
|
359
|
-
});
|
|
598
|
+
const secretsIssue = buildSecretsIssue(context);
|
|
599
|
+
if (secretsIssue) {
|
|
600
|
+
issues.push(secretsIssue);
|
|
360
601
|
}
|
|
361
602
|
if (bodyLines.length < 10) {
|
|
362
603
|
issues.push({
|
|
@@ -776,93 +1017,159 @@ var SHELL_ACTIVITY_PATTERNS = [
|
|
|
776
1017
|
/\b(?:npm|pnpm|yarn|pip|git|docker|kubectl)\s+[A-Za-z0-9-]/i
|
|
777
1018
|
];
|
|
778
1019
|
var SAFETY_GUARDRAIL_PATTERN = /\b(?:ask before|confirm|approval|dry[- ]run|sandbox|least privilege|redact|never expose|do not reveal)\b/i;
|
|
779
|
-
function
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
1020
|
+
function buildOccurrence(zone, pattern) {
|
|
1021
|
+
return {
|
|
1022
|
+
label: pattern.label,
|
|
1023
|
+
zoneType: zone.type,
|
|
1024
|
+
startLine: zone.startLine,
|
|
1025
|
+
endLine: zone.endLine
|
|
1026
|
+
};
|
|
1027
|
+
}
|
|
1028
|
+
function collectZoneAwareMatches(zones, patterns) {
|
|
1029
|
+
const flagged = [];
|
|
1030
|
+
const skipped = [];
|
|
1031
|
+
for (const zone of zones) {
|
|
1032
|
+
for (const pattern of patterns) {
|
|
1033
|
+
if (!pattern.regex.test(zone.content)) {
|
|
1034
|
+
continue;
|
|
1035
|
+
}
|
|
1036
|
+
const occurrence = buildOccurrence(zone, pattern);
|
|
1037
|
+
if (zone.type === "prose") {
|
|
1038
|
+
flagged.push(occurrence);
|
|
1039
|
+
} else {
|
|
1040
|
+
skipped.push(occurrence);
|
|
1041
|
+
}
|
|
784
1042
|
}
|
|
785
1043
|
}
|
|
786
|
-
return
|
|
1044
|
+
return { flagged, skipped };
|
|
1045
|
+
}
|
|
1046
|
+
function uniqueLabels2(matches) {
|
|
1047
|
+
const labels = [];
|
|
1048
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1049
|
+
for (const match of matches) {
|
|
1050
|
+
if (seen.has(match.label)) {
|
|
1051
|
+
continue;
|
|
1052
|
+
}
|
|
1053
|
+
seen.add(match.label);
|
|
1054
|
+
labels.push(match.label);
|
|
1055
|
+
}
|
|
1056
|
+
return labels;
|
|
1057
|
+
}
|
|
1058
|
+
function summarizeLineRange2(matches) {
|
|
1059
|
+
if (matches.length === 0) {
|
|
1060
|
+
return {};
|
|
1061
|
+
}
|
|
1062
|
+
return {
|
|
1063
|
+
startLine: Math.min(...matches.map((match) => match.startLine)),
|
|
1064
|
+
endLine: Math.max(...matches.map((match) => match.endLine))
|
|
1065
|
+
};
|
|
1066
|
+
}
|
|
1067
|
+
function buildSkippedPatterns2(matches) {
|
|
1068
|
+
if (matches.length === 0) {
|
|
1069
|
+
return void 0;
|
|
1070
|
+
}
|
|
1071
|
+
return matches.map((match) => ({
|
|
1072
|
+
label: match.label,
|
|
1073
|
+
zoneType: match.zoneType,
|
|
1074
|
+
startLine: match.startLine,
|
|
1075
|
+
endLine: match.endLine
|
|
1076
|
+
}));
|
|
1077
|
+
}
|
|
1078
|
+
function isSuppressed(context, checkId) {
|
|
1079
|
+
return context.suppressedCheckIds.has(checkId);
|
|
1080
|
+
}
|
|
1081
|
+
function runZoneAwareSecurityCheck(context, zones, options) {
|
|
1082
|
+
if (isSuppressed(context, options.checkId)) {
|
|
1083
|
+
return null;
|
|
1084
|
+
}
|
|
1085
|
+
const matches = collectZoneAwareMatches(zones, options.patterns);
|
|
1086
|
+
const labels = uniqueLabels2(matches.flagged);
|
|
1087
|
+
const skippedPatterns = buildSkippedPatterns2(matches.skipped);
|
|
1088
|
+
if (labels.length > 0) {
|
|
1089
|
+
return {
|
|
1090
|
+
id: options.id,
|
|
1091
|
+
checkId: options.checkId,
|
|
1092
|
+
title: options.title,
|
|
1093
|
+
status: options.statusOnMatch,
|
|
1094
|
+
message: `${options.matchMessagePrefix}: ${labels.join(", ")}.`,
|
|
1095
|
+
suggestion: options.suggestion,
|
|
1096
|
+
...summarizeLineRange2(matches.flagged),
|
|
1097
|
+
skippedPatterns
|
|
1098
|
+
};
|
|
1099
|
+
}
|
|
1100
|
+
return {
|
|
1101
|
+
id: options.id,
|
|
1102
|
+
checkId: options.checkId,
|
|
1103
|
+
title: options.title,
|
|
1104
|
+
status: "pass",
|
|
1105
|
+
message: options.passMessage,
|
|
1106
|
+
skippedPatterns
|
|
1107
|
+
};
|
|
787
1108
|
}
|
|
788
1109
|
function runSecurityChecks(context) {
|
|
789
1110
|
const issues = [];
|
|
790
1111
|
const skillText = context.skill.raw;
|
|
791
|
-
const
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
}
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
issues.push({
|
|
832
|
-
id: "security.privilege-escalation",
|
|
833
|
-
checkId: "security:privilege-escalation",
|
|
834
|
-
title: "Privilege Escalation Language",
|
|
835
|
-
status: "warn",
|
|
836
|
-
message: `Potentially risky privilege/execution language detected: ${escalationHits.join(", ")}.`,
|
|
837
|
-
suggestion: "Prefer least-privilege execution and explicit approval steps for elevated commands."
|
|
838
|
-
});
|
|
839
|
-
} else {
|
|
840
|
-
issues.push({
|
|
841
|
-
id: "security.privilege-escalation",
|
|
842
|
-
checkId: "security:privilege-escalation",
|
|
843
|
-
title: "Privilege Escalation Language",
|
|
844
|
-
status: "pass",
|
|
845
|
-
message: "No obvious privilege-escalation language detected."
|
|
846
|
-
});
|
|
1112
|
+
const needsZoneParsing = !isSuppressed(context, "security:dangerous-commands") || !isSuppressed(context, "security:exfiltration") || !isSuppressed(context, "security:privilege-escalation");
|
|
1113
|
+
const zones = needsZoneParsing ? parseZones(skillText) : [];
|
|
1114
|
+
const dangerousCommandsIssue = runZoneAwareSecurityCheck(context, zones, {
|
|
1115
|
+
id: "security.dangerous-command-patterns",
|
|
1116
|
+
checkId: "security:dangerous-commands",
|
|
1117
|
+
title: "Dangerous Command Patterns",
|
|
1118
|
+
statusOnMatch: "fail",
|
|
1119
|
+
patterns: DANGEROUS_COMMAND_PATTERNS,
|
|
1120
|
+
matchMessagePrefix: "Potentially dangerous command instruction patterns found",
|
|
1121
|
+
passMessage: "No high-risk destructive or direct pipe-to-shell patterns detected.",
|
|
1122
|
+
suggestion: "Remove destructive/pipe-exec command examples or wrap them with explicit safety constraints."
|
|
1123
|
+
});
|
|
1124
|
+
if (dangerousCommandsIssue) {
|
|
1125
|
+
issues.push(dangerousCommandsIssue);
|
|
1126
|
+
}
|
|
1127
|
+
const exfiltrationIssue = runZoneAwareSecurityCheck(context, zones, {
|
|
1128
|
+
id: "security.exfiltration-patterns",
|
|
1129
|
+
checkId: "security:exfiltration",
|
|
1130
|
+
title: "Sensitive Data Exfiltration",
|
|
1131
|
+
statusOnMatch: "fail",
|
|
1132
|
+
patterns: EXFILTRATION_PATTERNS,
|
|
1133
|
+
matchMessagePrefix: "Possible sensitive data exfiltration patterns found",
|
|
1134
|
+
passMessage: "No obvious credential access/exfiltration instructions detected.",
|
|
1135
|
+
suggestion: "Remove instructions that access or transmit secrets/credential files."
|
|
1136
|
+
});
|
|
1137
|
+
if (exfiltrationIssue) {
|
|
1138
|
+
issues.push(exfiltrationIssue);
|
|
1139
|
+
}
|
|
1140
|
+
const privilegeEscalationIssue = runZoneAwareSecurityCheck(context, zones, {
|
|
1141
|
+
id: "security.privilege-escalation",
|
|
1142
|
+
checkId: "security:privilege-escalation",
|
|
1143
|
+
title: "Privilege Escalation Language",
|
|
1144
|
+
statusOnMatch: "warn",
|
|
1145
|
+
patterns: PRIVILEGE_ESCALATION_PATTERNS,
|
|
1146
|
+
matchMessagePrefix: "Potentially risky privilege/execution language detected",
|
|
1147
|
+
passMessage: "No obvious privilege-escalation language detected.",
|
|
1148
|
+
suggestion: "Prefer least-privilege execution and explicit approval steps for elevated commands."
|
|
1149
|
+
});
|
|
1150
|
+
if (privilegeEscalationIssue) {
|
|
1151
|
+
issues.push(privilegeEscalationIssue);
|
|
847
1152
|
}
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
1153
|
+
if (!isSuppressed(context, "security:missing-guardrails")) {
|
|
1154
|
+
const hasShellActivity = SHELL_ACTIVITY_PATTERNS.some((pattern) => pattern.test(skillText));
|
|
1155
|
+
if (hasShellActivity && !SAFETY_GUARDRAIL_PATTERN.test(skillText)) {
|
|
1156
|
+
issues.push({
|
|
1157
|
+
id: "security.safety-guardrails",
|
|
1158
|
+
checkId: "security:missing-guardrails",
|
|
1159
|
+
title: "Execution Safety Guardrails",
|
|
1160
|
+
status: "warn",
|
|
1161
|
+
message: "Shell/tool execution is present, but no explicit safety guardrails were detected.",
|
|
1162
|
+
suggestion: "Add guidance such as approval requirements, dry-run mode, scope checks, and redaction rules."
|
|
1163
|
+
});
|
|
1164
|
+
} else {
|
|
1165
|
+
issues.push({
|
|
1166
|
+
id: "security.safety-guardrails",
|
|
1167
|
+
checkId: "security:missing-guardrails",
|
|
1168
|
+
title: "Execution Safety Guardrails",
|
|
1169
|
+
status: "pass",
|
|
1170
|
+
message: hasShellActivity ? "Shell/tool execution instructions include at least one safety guardrail." : "No shell/tool execution instructions detected."
|
|
1171
|
+
});
|
|
1172
|
+
}
|
|
866
1173
|
}
|
|
867
1174
|
return issues;
|
|
868
1175
|
}
|
|
@@ -1038,9 +1345,11 @@ function lintFails(report, failOn) {
|
|
|
1038
1345
|
async function runLinter(inputPath, options = {}) {
|
|
1039
1346
|
const skill = await loadSkillFile(inputPath);
|
|
1040
1347
|
const frontmatter = parseFrontmatter(skill.raw);
|
|
1348
|
+
const suppressedCheckIds = new Set(options.suppress ?? []);
|
|
1041
1349
|
const context = {
|
|
1042
1350
|
skill,
|
|
1043
|
-
frontmatter
|
|
1351
|
+
frontmatter,
|
|
1352
|
+
suppressedCheckIds
|
|
1044
1353
|
};
|
|
1045
1354
|
const issues = [];
|
|
1046
1355
|
issues.push(...runFrontmatterChecks(context));
|
|
@@ -1049,8 +1358,7 @@ async function runLinter(inputPath, options = {}) {
|
|
|
1049
1358
|
issues.push(...runSecurityChecks(context));
|
|
1050
1359
|
issues.push(...await runDisclosureChecks(context));
|
|
1051
1360
|
issues.push(...runCompatibilityChecks(context));
|
|
1052
|
-
const
|
|
1053
|
-
const filteredIssues = issues.filter((issue) => !suppress.has(issue.checkId));
|
|
1361
|
+
const filteredIssues = issues.filter((issue) => !suppressedCheckIds.has(issue.checkId));
|
|
1054
1362
|
return {
|
|
1055
1363
|
target: inputPath,
|
|
1056
1364
|
issues: filteredIssues,
|
|
@@ -1070,6 +1378,14 @@ function renderIssueLine(issue, c) {
|
|
|
1070
1378
|
return ` ${label} ${issue.title}
|
|
1071
1379
|
${issue.message}${detail}`;
|
|
1072
1380
|
}
|
|
1381
|
+
function countSkippedSecurityPatterns(issues) {
|
|
1382
|
+
return issues.reduce((total, issue) => {
|
|
1383
|
+
if (!issue.checkId.startsWith("security:")) {
|
|
1384
|
+
return total;
|
|
1385
|
+
}
|
|
1386
|
+
return total + (issue.skippedPatterns?.length ?? 0);
|
|
1387
|
+
}, 0);
|
|
1388
|
+
}
|
|
1073
1389
|
function renderLintReport(report, enableColor) {
|
|
1074
1390
|
const c = getChalkInstance(enableColor);
|
|
1075
1391
|
const { passed, warnings, failures, total } = report.summary;
|
|
@@ -1082,8 +1398,11 @@ function renderLintReport(report, enableColor) {
|
|
|
1082
1398
|
`\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518`
|
|
1083
1399
|
];
|
|
1084
1400
|
const renderedIssues = report.issues.map((issue) => renderIssueLine(issue, c)).join("\n");
|
|
1401
|
+
const skippedSecurityPatterns = countSkippedSecurityPatterns(report.issues);
|
|
1402
|
+
const infoLine = skippedSecurityPatterns > 0 ? `
|
|
1403
|
+
${c.cyan("\u2139")} ${skippedSecurityPatterns} security pattern(s) found in code examples/comments (not flagged)` : "";
|
|
1085
1404
|
return `${headerLines.join("\n")}
|
|
1086
|
-
${renderedIssues}`;
|
|
1405
|
+
${renderedIssues}${infoLine}`;
|
|
1087
1406
|
}
|
|
1088
1407
|
function formatPercent(value) {
|
|
1089
1408
|
return `${(value * 100).toFixed(1)}%`;
|
|
@@ -1171,6 +1490,10 @@ function renderCheckReport(result, enableColor, verbose) {
|
|
|
1171
1490
|
for (const issue of lintIssues) {
|
|
1172
1491
|
lines.push(renderIssueLine(issue, c));
|
|
1173
1492
|
}
|
|
1493
|
+
const skippedSecurityPatterns = countSkippedSecurityPatterns(result.lint.issues);
|
|
1494
|
+
if (skippedSecurityPatterns > 0) {
|
|
1495
|
+
lines.push(` ${c.cyan("\u2139")} ${skippedSecurityPatterns} security pattern(s) found in code examples/comments (not flagged)`);
|
|
1496
|
+
}
|
|
1174
1497
|
lines.push("");
|
|
1175
1498
|
lines.push("Trigger");
|
|
1176
1499
|
if (result.trigger) {
|
|
@@ -1401,23 +1724,28 @@ var FAKE_SKILLS = [
|
|
|
1401
1724
|
{ name: "test-generator", description: "Generates unit and integration test cases from feature requirements." },
|
|
1402
1725
|
{ name: "prompt-tuner", description: "Improves prompts for reliability, formatting, and failure handling." }
|
|
1403
1726
|
];
|
|
1404
|
-
function
|
|
1405
|
-
let state = seed >>> 0;
|
|
1727
|
+
function mulberry32(seed) {
|
|
1406
1728
|
return () => {
|
|
1407
|
-
|
|
1408
|
-
|
|
1729
|
+
seed |= 0;
|
|
1730
|
+
seed = seed + 1831565813 | 0;
|
|
1731
|
+
let t = Math.imul(seed ^ seed >>> 15, 1 | seed);
|
|
1732
|
+
t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t;
|
|
1733
|
+
return ((t ^ t >>> 14) >>> 0) / 4294967296;
|
|
1409
1734
|
};
|
|
1410
1735
|
}
|
|
1411
|
-
function
|
|
1736
|
+
function createRng(seed) {
|
|
1737
|
+
return seed !== void 0 ? mulberry32(seed) : Math.random;
|
|
1738
|
+
}
|
|
1739
|
+
function shuffle(values, rng) {
|
|
1412
1740
|
const copy = [...values];
|
|
1413
1741
|
for (let index = copy.length - 1; index > 0; index -= 1) {
|
|
1414
|
-
const swapIndex = Math.floor(
|
|
1742
|
+
const swapIndex = Math.floor(rng() * (index + 1));
|
|
1415
1743
|
[copy[index], copy[swapIndex]] = [copy[swapIndex], copy[index]];
|
|
1416
1744
|
}
|
|
1417
1745
|
return copy;
|
|
1418
1746
|
}
|
|
1419
|
-
function sample(values, count,
|
|
1420
|
-
return shuffle(values,
|
|
1747
|
+
function sample(values, count, rng) {
|
|
1748
|
+
return shuffle(values, rng).slice(0, Math.max(0, Math.min(count, values.length)));
|
|
1421
1749
|
}
|
|
1422
1750
|
function parseJsonArrayFromModelOutput(raw) {
|
|
1423
1751
|
const trimmed = raw.trim();
|
|
@@ -1529,20 +1857,20 @@ function buildSuggestions(metrics) {
|
|
|
1529
1857
|
return suggestions;
|
|
1530
1858
|
}
|
|
1531
1859
|
async function runTriggerTest(skill, options) {
|
|
1532
|
-
const
|
|
1860
|
+
const rng = createRng(options.seed);
|
|
1533
1861
|
const queries = options.queries && options.queries.length > 0 ? triggerQueryArraySchema.parse(options.queries) : await generateQueriesWithModel(skill, options.provider, options.model, options.numQueries);
|
|
1534
1862
|
const results = [];
|
|
1535
1863
|
const skillName = skill.frontmatter.name;
|
|
1536
1864
|
for (const testQuery of queries) {
|
|
1537
|
-
const fakeCount = 5 + Math.floor(
|
|
1538
|
-
const fakeSkills = sample(FAKE_SKILLS, fakeCount,
|
|
1865
|
+
const fakeCount = 5 + Math.floor(rng() * 5);
|
|
1866
|
+
const fakeSkills = sample(FAKE_SKILLS, fakeCount, rng);
|
|
1539
1867
|
const allSkills = shuffle([
|
|
1540
1868
|
...fakeSkills,
|
|
1541
1869
|
{
|
|
1542
1870
|
name: skill.frontmatter.name,
|
|
1543
1871
|
description: skill.frontmatter.description
|
|
1544
1872
|
}
|
|
1545
|
-
],
|
|
1873
|
+
], rng);
|
|
1546
1874
|
const skillListText = allSkills.map((entry) => `- ${entry.name}: ${entry.description}`).join("\n");
|
|
1547
1875
|
const systemPrompt = [
|
|
1548
1876
|
"You are selecting one skill to activate for a user query.",
|
|
@@ -1571,6 +1899,7 @@ async function runTriggerTest(skill, options) {
|
|
|
1571
1899
|
skillName,
|
|
1572
1900
|
model: options.model,
|
|
1573
1901
|
provider: options.provider.name,
|
|
1902
|
+
seed: options.seed,
|
|
1574
1903
|
queries,
|
|
1575
1904
|
cases: results,
|
|
1576
1905
|
metrics,
|
|
@@ -2221,6 +2550,7 @@ function createProvider(providerName, apiKeyOverride) {
|
|
|
2221
2550
|
var triggerCliSchema = z7.object({
|
|
2222
2551
|
queries: z7.string().optional(),
|
|
2223
2552
|
saveQueries: z7.string().optional(),
|
|
2553
|
+
seed: z7.number().int().optional(),
|
|
2224
2554
|
verbose: z7.boolean().optional(),
|
|
2225
2555
|
apiKey: z7.string().optional()
|
|
2226
2556
|
});
|
|
@@ -2232,6 +2562,13 @@ function resolveModel(provider, model) {
|
|
|
2232
2562
|
}
|
|
2233
2563
|
return model;
|
|
2234
2564
|
}
|
|
2565
|
+
function renderTriggerOutputWithSeed(output, seed) {
|
|
2566
|
+
if (seed === void 0) {
|
|
2567
|
+
return output;
|
|
2568
|
+
}
|
|
2569
|
+
return `${output}
|
|
2570
|
+
Seed: ${seed}`;
|
|
2571
|
+
}
|
|
2235
2572
|
async function handleTriggerCommand(targetPath, options) {
|
|
2236
2573
|
const spinner = options.json || !process.stdout.isTTY ? null : ora("Preparing trigger evaluation...").start();
|
|
2237
2574
|
try {
|
|
@@ -2269,7 +2606,7 @@ async function handleTriggerCommand(targetPath, options) {
|
|
|
2269
2606
|
if (options.json) {
|
|
2270
2607
|
writeResult(result, true);
|
|
2271
2608
|
} else {
|
|
2272
|
-
writeResult(renderTriggerReport(result, options.color, options.verbose), false);
|
|
2609
|
+
writeResult(renderTriggerOutputWithSeed(renderTriggerReport(result, options.color, options.verbose), result.seed), false);
|
|
2273
2610
|
}
|
|
2274
2611
|
} catch (error) {
|
|
2275
2612
|
spinner?.stop();
|
|
@@ -2278,7 +2615,7 @@ async function handleTriggerCommand(targetPath, options) {
|
|
|
2278
2615
|
}
|
|
2279
2616
|
}
|
|
2280
2617
|
function registerTriggerCommand(program) {
|
|
2281
|
-
program.command("trigger").description("Evaluate whether a skill description triggers correctly.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--model <model>", "Model to use").option("--provider <provider>", "LLM provider: anthropic|openai").option("--queries <path>", "Path to custom test queries JSON").option("--num-queries <n>", "Number of auto-generated queries", (value) => Number.parseInt(value, 10)).option("--save-queries <path>", "Save generated queries to a JSON file").option("--api-key <key>", "API key override").option("--verbose", "Show full model decisions").action(async (targetPath, _commandOptions, command) => {
|
|
2618
|
+
program.command("trigger").description("Evaluate whether a skill description triggers correctly.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--model <model>", "Model to use").option("--provider <provider>", "LLM provider: anthropic|openai").option("--queries <path>", "Path to custom test queries JSON").option("--num-queries <n>", "Number of auto-generated queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--save-queries <path>", "Save generated queries to a JSON file").option("--api-key <key>", "API key override").option("--verbose", "Show full model decisions").action(async (targetPath, _commandOptions, command) => {
|
|
2282
2619
|
const globalOptions = getGlobalCliOptions(command);
|
|
2283
2620
|
const config = getResolvedConfig(command);
|
|
2284
2621
|
const parsedCli = triggerCliSchema.safeParse(command.opts());
|
|
@@ -2294,7 +2631,7 @@ function registerTriggerCommand(program) {
|
|
|
2294
2631
|
queries: parsedCli.data.queries,
|
|
2295
2632
|
numQueries: config.trigger.numQueries,
|
|
2296
2633
|
saveQueries: parsedCli.data.saveQueries,
|
|
2297
|
-
seed: config.trigger.seed,
|
|
2634
|
+
seed: parsedCli.data.seed ?? config.trigger.seed,
|
|
2298
2635
|
verbose: Boolean(parsedCli.data.verbose),
|
|
2299
2636
|
apiKey: parsedCli.data.apiKey
|
|
2300
2637
|
});
|
|
@@ -2482,6 +2819,7 @@ var checkCliSchema = z9.object({
|
|
|
2482
2819
|
graderModel: z9.string().optional(),
|
|
2483
2820
|
apiKey: z9.string().optional(),
|
|
2484
2821
|
queries: z9.string().optional(),
|
|
2822
|
+
seed: z9.number().int().optional(),
|
|
2485
2823
|
prompts: z9.string().optional(),
|
|
2486
2824
|
saveResults: z9.string().optional(),
|
|
2487
2825
|
continueOnLintFail: z9.boolean().optional(),
|
|
@@ -2495,6 +2833,19 @@ function resolveModel3(provider, model) {
|
|
|
2495
2833
|
}
|
|
2496
2834
|
return model;
|
|
2497
2835
|
}
|
|
2836
|
+
function renderCheckOutputWithSeed(output, seed) {
|
|
2837
|
+
if (seed === void 0) {
|
|
2838
|
+
return output;
|
|
2839
|
+
}
|
|
2840
|
+
const lines = output.split("\n");
|
|
2841
|
+
const triggerIndex = lines.indexOf("Trigger");
|
|
2842
|
+
if (triggerIndex === -1) {
|
|
2843
|
+
return `${output}
|
|
2844
|
+
Seed: ${seed}`;
|
|
2845
|
+
}
|
|
2846
|
+
lines.splice(triggerIndex + 1, 0, `Seed: ${seed}`);
|
|
2847
|
+
return lines.join("\n");
|
|
2848
|
+
}
|
|
2498
2849
|
async function handleCheckCommand(targetPath, options, command) {
|
|
2499
2850
|
const spinner = options.json || !process.stdout.isTTY ? null : ora3("Preparing check run...").start();
|
|
2500
2851
|
try {
|
|
@@ -2557,7 +2908,10 @@ async function handleCheckCommand(targetPath, options, command) {
|
|
|
2557
2908
|
if (options.json) {
|
|
2558
2909
|
writeResult(result, true);
|
|
2559
2910
|
} else {
|
|
2560
|
-
writeResult(
|
|
2911
|
+
writeResult(
|
|
2912
|
+
renderCheckOutputWithSeed(renderCheckReport(result, options.color, options.verbose), result.trigger?.seed),
|
|
2913
|
+
false
|
|
2914
|
+
);
|
|
2561
2915
|
}
|
|
2562
2916
|
process.exitCode = result.gates.overallPassed ? 0 : 1;
|
|
2563
2917
|
} catch (error) {
|
|
@@ -2567,7 +2921,7 @@ async function handleCheckCommand(targetPath, options, command) {
|
|
|
2567
2921
|
}
|
|
2568
2922
|
}
|
|
2569
2923
|
function registerCheckCommand(program) {
|
|
2570
|
-
program.command("check").description("Run lint + trigger + eval with threshold-based quality gates.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--provider <provider>", "LLM provider: anthropic|openai").option("--model <model>", "Model for trigger/eval runs").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--api-key <key>", "API key override").option("--queries <path>", "Path to custom trigger queries JSON").option("--num-queries <n>", "Number of auto-generated trigger queries", (value) => Number.parseInt(value, 10)).option("--prompts <path>", "Path to eval prompts JSON").option("--min-f1 <n>", "Minimum required trigger F1 score (0-1)", (value) => Number.parseFloat(value)).option("--min-assert-pass-rate <n>", "Minimum required eval assertion pass rate (0-1)", (value) => Number.parseFloat(value)).option("--save-results <path>", "Save combined check results to JSON").option("--continue-on-lint-fail", "Continue trigger/eval stages even when lint has failures").option("--verbose", "Show detailed trigger/eval output sections").action(async (targetPath, _commandOptions, command) => {
|
|
2924
|
+
program.command("check").description("Run lint + trigger + eval with threshold-based quality gates.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--provider <provider>", "LLM provider: anthropic|openai").option("--model <model>", "Model for trigger/eval runs").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--api-key <key>", "API key override").option("--queries <path>", "Path to custom trigger queries JSON").option("--num-queries <n>", "Number of auto-generated trigger queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--prompts <path>", "Path to eval prompts JSON").option("--min-f1 <n>", "Minimum required trigger F1 score (0-1)", (value) => Number.parseFloat(value)).option("--min-assert-pass-rate <n>", "Minimum required eval assertion pass rate (0-1)", (value) => Number.parseFloat(value)).option("--save-results <path>", "Save combined check results to JSON").option("--continue-on-lint-fail", "Continue trigger/eval stages even when lint has failures").option("--verbose", "Show detailed trigger/eval output sections").action(async (targetPath, _commandOptions, command) => {
|
|
2571
2925
|
const globalOptions = getGlobalCliOptions(command);
|
|
2572
2926
|
const config = getResolvedConfig(command);
|
|
2573
2927
|
const parsedCli = checkCliSchema.safeParse(command.opts());
|
|
@@ -2592,7 +2946,7 @@ function registerCheckCommand(program) {
|
|
|
2592
2946
|
numRuns: config.eval.numRuns,
|
|
2593
2947
|
lintFailOn: config.lint.failOn,
|
|
2594
2948
|
lintSuppress: config.lint.suppress,
|
|
2595
|
-
triggerSeed: config.trigger.seed,
|
|
2949
|
+
triggerSeed: parsedCli.data.seed ?? config.trigger.seed,
|
|
2596
2950
|
saveResults: parsedCli.data.saveResults,
|
|
2597
2951
|
continueOnLintFail: Boolean(parsedCli.data.continueOnLintFail),
|
|
2598
2952
|
verbose: Boolean(parsedCli.data.verbose)
|