skilltest 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,11 +1,15 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/index.ts
4
- import fs7 from "node:fs";
4
+ import fs11 from "node:fs";
5
5
  import path6 from "node:path";
6
6
  import { fileURLToPath } from "node:url";
7
7
  import { Command } from "commander";
8
8
 
9
+ // src/commands/lint.ts
10
+ import fs6 from "node:fs/promises";
11
+ import { z as z6 } from "zod";
12
+
9
13
  // src/core/skill-parser.ts
10
14
  import fs from "node:fs/promises";
11
15
  import path from "node:path";
@@ -577,24 +581,6 @@ function runContentChecks(context) {
577
581
  message: "No obvious vague placeholder phrasing found."
578
582
  });
579
583
  }
580
- if (context.frontmatter.rawFrontmatter && /[<>]/.test(context.frontmatter.rawFrontmatter)) {
581
- issues.push({
582
- id: "content.frontmatter-angle-brackets",
583
- checkId: "content:angle-brackets",
584
- title: "Frontmatter Angle Brackets",
585
- status: "warn",
586
- message: "Frontmatter contains angle bracket characters (< or >), which can be misinterpreted in some agents.",
587
- suggestion: "Remove XML-like tags from frontmatter values when possible."
588
- });
589
- } else {
590
- issues.push({
591
- id: "content.frontmatter-angle-brackets",
592
- checkId: "content:angle-brackets",
593
- title: "Frontmatter Angle Brackets",
594
- status: "pass",
595
- message: "No angle bracket tokens detected in frontmatter."
596
- });
597
- }
598
584
  const secretsIssue = buildSecretsIssue(context);
599
585
  if (secretsIssue) {
600
586
  issues.push(secretsIssue);
@@ -947,6 +933,24 @@ function runFrontmatterChecks(context) {
947
933
  message: "license field is present."
948
934
  });
949
935
  }
936
+ if (context.frontmatter.rawFrontmatter && /[<>]/.test(context.frontmatter.rawFrontmatter)) {
937
+ issues.push({
938
+ id: "frontmatter.angle-brackets",
939
+ checkId: "frontmatter:angle-brackets",
940
+ title: "Frontmatter Angle Brackets",
941
+ status: "warn",
942
+ message: "Frontmatter contains angle bracket characters (< or >), which can be misinterpreted in some agents.",
943
+ suggestion: "Remove XML-like tags from frontmatter values when possible."
944
+ });
945
+ } else {
946
+ issues.push({
947
+ id: "frontmatter.angle-brackets",
948
+ checkId: "frontmatter:angle-brackets",
949
+ title: "Frontmatter Angle Brackets",
950
+ status: "pass",
951
+ message: "No angle bracket tokens detected in frontmatter."
952
+ });
953
+ }
950
954
  if (description && description.trim() !== "" && !descriptionLooksActionable(description)) {
951
955
  issues.push({
952
956
  id: "frontmatter.description.triggerability",
@@ -1366,6 +1370,739 @@ async function runLinter(inputPath, options = {}) {
1366
1370
  };
1367
1371
  }
1368
1372
 
1373
+ // src/reporters/html.ts
1374
+ function escapeHtml(value) {
1375
+ return String(value ?? "").replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;");
1376
+ }
1377
+ function formatPercent(value) {
1378
+ return `${(value * 100).toFixed(1)}%`;
1379
+ }
1380
+ function formatLineRange(startLine, endLine) {
1381
+ if (startLine === void 0) {
1382
+ return null;
1383
+ }
1384
+ if (endLine === void 0 || endLine === startLine) {
1385
+ return `line ${startLine}`;
1386
+ }
1387
+ return `lines ${startLine}-${endLine}`;
1388
+ }
1389
+ function badgeLabel(status) {
1390
+ if (status === "pass") {
1391
+ return "PASS";
1392
+ }
1393
+ if (status === "warn") {
1394
+ return "WARN";
1395
+ }
1396
+ if (status === "fail") {
1397
+ return "FAIL";
1398
+ }
1399
+ return "SKIP";
1400
+ }
1401
+ function renderBadge(status) {
1402
+ return `<span class="badge ${status}">${badgeLabel(status)}</span>`;
1403
+ }
1404
+ function renderStatCards(stats) {
1405
+ return `<div class="stats-grid">${stats.map(
1406
+ (stat) => `
1407
+ <div class="stat-card${stat.status ? ` status-${stat.status}` : ""}">
1408
+ <div class="stat-label">${escapeHtml(stat.label)}</div>
1409
+ <div class="stat-value">${escapeHtml(stat.value)}</div>
1410
+ ${stat.note ? `<div class="stat-note">${escapeHtml(stat.note)}</div>` : ""}
1411
+ </div>
1412
+ `
1413
+ ).join("")}</div>`;
1414
+ }
1415
+ function renderMetaItems(items) {
1416
+ if (items.length === 0) {
1417
+ return "";
1418
+ }
1419
+ return `<div class="meta-grid">${items.map(
1420
+ (item) => `
1421
+ <div class="meta-item">
1422
+ <span class="meta-label">${escapeHtml(item.label)}</span>
1423
+ <span class="meta-value">${escapeHtml(item.value)}</span>
1424
+ </div>
1425
+ `
1426
+ ).join("")}</div>`;
1427
+ }
1428
+ function renderHeaderCard(commandName, heading, target, stats, metaItems) {
1429
+ return `
1430
+ <section class="card header-card">
1431
+ <div class="eyebrow">skilltest ${escapeHtml(commandName)}</div>
1432
+ <h1>${escapeHtml(heading)}</h1>
1433
+ <div class="target-line">target: ${escapeHtml(target)}</div>
1434
+ ${renderMetaItems(metaItems)}
1435
+ ${renderStatCards(stats)}
1436
+ </section>
1437
+ `;
1438
+ }
1439
+ function renderSectionCard(title, body) {
1440
+ return `
1441
+ <section class="card">
1442
+ <h2>${escapeHtml(title)}</h2>
1443
+ ${body}
1444
+ </section>
1445
+ `;
1446
+ }
1447
+ function renderMessageRow(status, title, message, details) {
1448
+ return `
1449
+ <div class="row">
1450
+ <div class="row-header">
1451
+ <div class="row-title">${escapeHtml(title)}</div>
1452
+ ${renderBadge(status)}
1453
+ </div>
1454
+ <div class="row-body">${escapeHtml(message)}</div>
1455
+ ${details ?? ""}
1456
+ </div>
1457
+ `;
1458
+ }
1459
+ function renderDetails(summary, content) {
1460
+ return `
1461
+ <details class="detail-block">
1462
+ <summary>${escapeHtml(summary)}</summary>
1463
+ <div class="detail-content">${content}</div>
1464
+ </details>
1465
+ `;
1466
+ }
1467
+ function renderPreBlock(content) {
1468
+ return `<pre>${escapeHtml(content)}</pre>`;
1469
+ }
1470
+ function renderDefinitionList(items) {
1471
+ return `<div class="definition-list">${items.map(
1472
+ (item) => `
1473
+ <div class="definition-item">
1474
+ <div class="definition-label">${escapeHtml(item.label)}</div>
1475
+ <div class="definition-value">${escapeHtml(item.value)}</div>
1476
+ </div>
1477
+ `
1478
+ ).join("")}</div>`;
1479
+ }
1480
+ function countSkippedSecurityPatterns(issues) {
1481
+ return issues.reduce((total, issue) => total + (issue.skippedPatterns?.length ?? 0), 0);
1482
+ }
1483
+ function renderLintIssueRow(issue) {
1484
+ const lineRange = formatLineRange(issue.startLine, issue.endLine);
1485
+ const detailBlocks = [];
1486
+ if (issue.suggestion) {
1487
+ detailBlocks.push(renderDetails("Suggestion", `<p>${escapeHtml(issue.suggestion)}</p>`));
1488
+ }
1489
+ if (issue.skippedPatterns && issue.skippedPatterns.length > 0) {
1490
+ const patternItems = issue.skippedPatterns.map(
1491
+ (pattern) => `
1492
+ <div class="definition-item">
1493
+ <div class="definition-label">${escapeHtml(pattern.label)}</div>
1494
+ <div class="definition-value">${escapeHtml(
1495
+ `${pattern.zoneType} lines ${pattern.startLine}-${pattern.endLine}`
1496
+ )}</div>
1497
+ </div>
1498
+ `
1499
+ ).join("");
1500
+ detailBlocks.push(renderDetails("Skipped security patterns", `<div class="definition-list">${patternItems}</div>`));
1501
+ }
1502
+ return `
1503
+ <div class="row">
1504
+ <div class="row-header">
1505
+ <div>
1506
+ <div class="row-title">${escapeHtml(issue.title)}</div>
1507
+ <div class="row-subtitle">${escapeHtml(issue.checkId)}</div>
1508
+ </div>
1509
+ ${renderBadge(issue.status)}
1510
+ </div>
1511
+ <div class="row-body">${escapeHtml(issue.message)}</div>
1512
+ ${renderDefinitionList(
1513
+ [
1514
+ lineRange ? { label: "Location", value: lineRange } : null,
1515
+ { label: "Check ID", value: issue.checkId }
1516
+ ].filter((item) => item !== null)
1517
+ )}
1518
+ ${detailBlocks.join("")}
1519
+ </div>
1520
+ `;
1521
+ }
1522
+ function renderLintIssueList(report) {
1523
+ const skippedSecurityPatterns = countSkippedSecurityPatterns(report.issues);
1524
+ const rows = report.issues.map((issue) => renderLintIssueRow(issue)).join("");
1525
+ const info = skippedSecurityPatterns > 0 ? `<p class="info-line">Skipped security patterns in examples/comments: ${escapeHtml(skippedSecurityPatterns)}</p>` : "";
1526
+ return `<div class="row-list">${rows}</div>${info}`;
1527
+ }
1528
+ function renderTriggerCaseRow(testCase) {
1529
+ const details = testCase.rawModelResponse ? renderDetails("Model response", renderPreBlock(testCase.rawModelResponse)) : "";
1530
+ return `
1531
+ <div class="row">
1532
+ <div class="row-header">
1533
+ <div>
1534
+ <div class="row-title">${escapeHtml(testCase.query)}</div>
1535
+ <div class="row-subtitle">${escapeHtml(
1536
+ `expected=${testCase.expected} actual=${testCase.actual} should_trigger=${String(testCase.shouldTrigger)}`
1537
+ )}</div>
1538
+ </div>
1539
+ ${renderBadge(testCase.matched ? "pass" : "fail")}
1540
+ </div>
1541
+ ${renderDefinitionList([
1542
+ { label: "Expected", value: testCase.expected },
1543
+ { label: "Actual", value: testCase.actual }
1544
+ ])}
1545
+ ${details}
1546
+ </div>
1547
+ `;
1548
+ }
1549
+ function promptStatus(promptResult) {
1550
+ if (promptResult.totalAssertions === 0) {
1551
+ return "skip";
1552
+ }
1553
+ if (promptResult.passedAssertions === promptResult.totalAssertions) {
1554
+ return "pass";
1555
+ }
1556
+ if (promptResult.passedAssertions === 0) {
1557
+ return "fail";
1558
+ }
1559
+ return "warn";
1560
+ }
1561
+ function renderAssertionRow(assertion) {
1562
+ return renderDetails(
1563
+ `${badgeLabel(assertion.passed ? "pass" : "fail")} ${assertion.assertion}`,
1564
+ renderPreBlock(assertion.evidence)
1565
+ );
1566
+ }
1567
+ function renderEvalPromptRow(promptResult) {
1568
+ const assertionDetails = promptResult.assertions.map((assertion) => renderAssertionRow(assertion)).join("");
1569
+ const responseDetails = renderDetails("Full model response", renderPreBlock(promptResult.response));
1570
+ return `
1571
+ <div class="row">
1572
+ <div class="row-header">
1573
+ <div>
1574
+ <div class="row-title">${escapeHtml(promptResult.prompt)}</div>
1575
+ <div class="row-subtitle">${escapeHtml(
1576
+ `${promptResult.passedAssertions}/${promptResult.totalAssertions} assertions passed`
1577
+ )}</div>
1578
+ </div>
1579
+ ${renderBadge(promptStatus(promptResult))}
1580
+ </div>
1581
+ <div class="row-body">${escapeHtml(promptResult.responseSummary)}</div>
1582
+ ${renderDefinitionList([
1583
+ { label: "Passed assertions", value: String(promptResult.passedAssertions) },
1584
+ { label: "Total assertions", value: String(promptResult.totalAssertions) }
1585
+ ])}
1586
+ ${renderDetails("Assertion evidence", assertionDetails || `<p>No assertions.</p>`)}
1587
+ ${responseDetails}
1588
+ </div>
1589
+ `;
1590
+ }
1591
+ function gateStatus(value) {
1592
+ if (value === null) {
1593
+ return "skip";
1594
+ }
1595
+ return value ? "pass" : "fail";
1596
+ }
1597
+ function renderGateCard(title, status, message) {
1598
+ return `
1599
+ <div class="gate-card">
1600
+ <div class="row-header">
1601
+ <div class="row-title">${escapeHtml(title)}</div>
1602
+ ${renderBadge(status)}
1603
+ </div>
1604
+ <div class="row-body">${escapeHtml(message)}</div>
1605
+ </div>
1606
+ `;
1607
+ }
1608
+ function renderCollapsibleSection(title, summary, body, status) {
1609
+ return `
1610
+ <details class="section-card" open>
1611
+ <summary>
1612
+ <span class="section-title">${escapeHtml(title)}</span>
1613
+ <span class="section-summary">${renderBadge(status)} ${escapeHtml(summary)}</span>
1614
+ </summary>
1615
+ <div class="section-body">${body}</div>
1616
+ </details>
1617
+ `;
1618
+ }
1619
+ function resolveOptionalTarget(result, fallback) {
1620
+ return result.target ?? fallback;
1621
+ }
1622
+ function renderHtmlDocument(title, body) {
1623
+ return `<!DOCTYPE html>
1624
+ <html lang="en">
1625
+ <head>
1626
+ <meta charset="utf-8">
1627
+ <meta name="viewport" content="width=device-width, initial-scale=1">
1628
+ <title>${escapeHtml(title)}</title>
1629
+ <style>
1630
+ :root {
1631
+ color-scheme: light;
1632
+ --bg: #f5f5f5;
1633
+ --surface: #ffffff;
1634
+ --surface-muted: #fafafa;
1635
+ --border: #d4d4d8;
1636
+ --text: #111827;
1637
+ --muted: #6b7280;
1638
+ --pass: #22c55e;
1639
+ --warn: #eab308;
1640
+ --fail: #ef4444;
1641
+ --skip: #6b7280;
1642
+ --shadow: 0 10px 30px rgba(15, 23, 42, 0.08);
1643
+ }
1644
+
1645
+ * {
1646
+ box-sizing: border-box;
1647
+ }
1648
+
1649
+ body {
1650
+ margin: 0;
1651
+ background: linear-gradient(180deg, #fafafa 0%, #f4f4f5 100%);
1652
+ color: var(--text);
1653
+ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
1654
+ line-height: 1.5;
1655
+ }
1656
+
1657
+ .container {
1658
+ max-width: 1120px;
1659
+ margin: 0 auto;
1660
+ padding: 24px 16px 40px;
1661
+ }
1662
+
1663
+ .card,
1664
+ .section-card {
1665
+ background: var(--surface);
1666
+ border: 1px solid var(--border);
1667
+ border-radius: 16px;
1668
+ box-shadow: var(--shadow);
1669
+ margin-bottom: 16px;
1670
+ }
1671
+
1672
+ .card {
1673
+ padding: 20px;
1674
+ }
1675
+
1676
+ .header-card h1,
1677
+ .card h2 {
1678
+ margin: 0 0 10px;
1679
+ font-size: 1.25rem;
1680
+ }
1681
+
1682
+ .eyebrow {
1683
+ margin-bottom: 10px;
1684
+ color: var(--muted);
1685
+ font-size: 0.78rem;
1686
+ letter-spacing: 0.08em;
1687
+ text-transform: uppercase;
1688
+ }
1689
+
1690
+ .target-line,
1691
+ .info-line {
1692
+ color: var(--muted);
1693
+ overflow-wrap: anywhere;
1694
+ }
1695
+
1696
+ .meta-grid,
1697
+ .stats-grid,
1698
+ .gate-grid,
1699
+ .definition-list {
1700
+ display: grid;
1701
+ gap: 12px;
1702
+ }
1703
+
1704
+ .meta-grid,
1705
+ .gate-grid,
1706
+ .definition-list {
1707
+ grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
1708
+ }
1709
+
1710
+ .stats-grid {
1711
+ grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
1712
+ margin-top: 16px;
1713
+ }
1714
+
1715
+ .meta-grid {
1716
+ margin-top: 14px;
1717
+ }
1718
+
1719
+ .meta-item,
1720
+ .definition-item,
1721
+ .stat-card,
1722
+ .gate-card {
1723
+ background: var(--surface-muted);
1724
+ border: 1px solid var(--border);
1725
+ border-radius: 12px;
1726
+ padding: 12px;
1727
+ }
1728
+
1729
+ .meta-item,
1730
+ .definition-item {
1731
+ display: flex;
1732
+ justify-content: space-between;
1733
+ gap: 12px;
1734
+ }
1735
+
1736
+ .meta-label,
1737
+ .definition-label,
1738
+ .stat-label {
1739
+ color: var(--muted);
1740
+ font-size: 0.82rem;
1741
+ }
1742
+
1743
+ .meta-value,
1744
+ .definition-value {
1745
+ text-align: right;
1746
+ overflow-wrap: anywhere;
1747
+ }
1748
+
1749
+ .stat-value {
1750
+ margin-top: 4px;
1751
+ font-size: 1.3rem;
1752
+ font-weight: 700;
1753
+ }
1754
+
1755
+ .stat-note {
1756
+ margin-top: 6px;
1757
+ color: var(--muted);
1758
+ font-size: 0.82rem;
1759
+ }
1760
+
1761
+ .status-pass {
1762
+ border-color: rgba(34, 197, 94, 0.35);
1763
+ }
1764
+
1765
+ .status-warn {
1766
+ border-color: rgba(234, 179, 8, 0.35);
1767
+ }
1768
+
1769
+ .status-fail {
1770
+ border-color: rgba(239, 68, 68, 0.35);
1771
+ }
1772
+
1773
+ .status-skip {
1774
+ border-color: rgba(107, 114, 128, 0.35);
1775
+ }
1776
+
1777
+ .row-list {
1778
+ display: grid;
1779
+ gap: 12px;
1780
+ }
1781
+
1782
+ .row {
1783
+ border: 1px solid var(--border);
1784
+ border-radius: 12px;
1785
+ padding: 14px;
1786
+ background: var(--surface-muted);
1787
+ }
1788
+
1789
+ .row-header {
1790
+ display: flex;
1791
+ justify-content: space-between;
1792
+ align-items: flex-start;
1793
+ gap: 12px;
1794
+ }
1795
+
1796
+ .row-title {
1797
+ font-weight: 700;
1798
+ overflow-wrap: anywhere;
1799
+ }
1800
+
1801
+ .row-subtitle {
1802
+ margin-top: 4px;
1803
+ color: var(--muted);
1804
+ font-size: 0.84rem;
1805
+ overflow-wrap: anywhere;
1806
+ }
1807
+
1808
+ .row-body {
1809
+ margin-top: 10px;
1810
+ overflow-wrap: anywhere;
1811
+ }
1812
+
1813
+ .badge {
1814
+ display: inline-flex;
1815
+ align-items: center;
1816
+ justify-content: center;
1817
+ min-width: 58px;
1818
+ padding: 3px 10px;
1819
+ border-radius: 999px;
1820
+ border: 1px solid currentColor;
1821
+ font-size: 0.76rem;
1822
+ font-weight: 700;
1823
+ letter-spacing: 0.04em;
1824
+ white-space: nowrap;
1825
+ }
1826
+
1827
+ .badge.pass {
1828
+ color: #15803d;
1829
+ background: rgba(34, 197, 94, 0.14);
1830
+ }
1831
+
1832
+ .badge.warn {
1833
+ color: #a16207;
1834
+ background: rgba(234, 179, 8, 0.18);
1835
+ }
1836
+
1837
+ .badge.fail {
1838
+ color: #b91c1c;
1839
+ background: rgba(239, 68, 68, 0.14);
1840
+ }
1841
+
1842
+ .badge.skip {
1843
+ color: #4b5563;
1844
+ background: rgba(107, 114, 128, 0.14);
1845
+ }
1846
+
1847
+ details {
1848
+ margin-top: 10px;
1849
+ }
1850
+
1851
+ details summary {
1852
+ cursor: pointer;
1853
+ color: var(--muted);
1854
+ }
1855
+
1856
+ .detail-block {
1857
+ border-top: 1px dashed var(--border);
1858
+ padding-top: 10px;
1859
+ }
1860
+
1861
+ .detail-content p {
1862
+ margin: 0;
1863
+ }
1864
+
1865
+ .section-card summary {
1866
+ display: flex;
1867
+ justify-content: space-between;
1868
+ align-items: center;
1869
+ gap: 12px;
1870
+ padding: 18px 20px;
1871
+ list-style: none;
1872
+ }
1873
+
1874
+ .section-card summary::-webkit-details-marker {
1875
+ display: none;
1876
+ }
1877
+
1878
+ .section-title {
1879
+ font-size: 1rem;
1880
+ font-weight: 700;
1881
+ color: var(--text);
1882
+ }
1883
+
1884
+ .section-summary {
1885
+ display: inline-flex;
1886
+ align-items: center;
1887
+ gap: 8px;
1888
+ color: var(--muted);
1889
+ text-align: right;
1890
+ }
1891
+
1892
+ .section-body {
1893
+ padding: 0 20px 20px;
1894
+ }
1895
+
1896
+ .gate-grid {
1897
+ margin-top: 12px;
1898
+ }
1899
+
1900
+ pre {
1901
+ margin: 0;
1902
+ padding: 12px;
1903
+ background: #f8fafc;
1904
+ border: 1px solid var(--border);
1905
+ border-radius: 10px;
1906
+ white-space: pre-wrap;
1907
+ word-break: break-word;
1908
+ overflow-wrap: anywhere;
1909
+ }
1910
+
1911
+ ul {
1912
+ margin: 0;
1913
+ padding-left: 20px;
1914
+ }
1915
+
1916
+ @media (max-width: 720px) {
1917
+ .container {
1918
+ padding: 16px 12px 28px;
1919
+ }
1920
+
1921
+ .row-header,
1922
+ .section-card summary,
1923
+ .meta-item,
1924
+ .definition-item {
1925
+ flex-direction: column;
1926
+ align-items: flex-start;
1927
+ }
1928
+
1929
+ .meta-value,
1930
+ .definition-value,
1931
+ .section-summary {
1932
+ text-align: left;
1933
+ }
1934
+ }
1935
+ </style>
1936
+ </head>
1937
+ <body>
1938
+ <main class="container">
1939
+ ${body}
1940
+ </main>
1941
+ </body>
1942
+ </html>`;
1943
+ }
1944
+ function renderLintHtml(report) {
1945
+ const passRate = report.summary.total === 0 ? 0 : report.summary.passed / report.summary.total;
1946
+ const body = [
1947
+ renderHeaderCard(
1948
+ "lint",
1949
+ "Static Analysis Report",
1950
+ report.target,
1951
+ [
1952
+ { label: "Pass rate", value: formatPercent(passRate), note: `${report.summary.passed}/${report.summary.total} passed` },
1953
+ { label: "Warnings", value: String(report.summary.warnings), status: report.summary.warnings > 0 ? "warn" : "pass" },
1954
+ { label: "Failures", value: String(report.summary.failures), status: report.summary.failures > 0 ? "fail" : "pass" },
1955
+ { label: "Checks", value: String(report.summary.total) }
1956
+ ],
1957
+ [{ label: "Target", value: report.target }]
1958
+ ),
1959
+ renderSectionCard("Lint Issues", renderLintIssueList(report))
1960
+ ].join("");
1961
+ return renderHtmlDocument(`skilltest lint - ${report.target}`, body);
1962
+ }
1963
+ function renderTriggerHtml(result) {
1964
+ const htmlResult = result;
1965
+ const target = resolveOptionalTarget(htmlResult, result.skillName);
1966
+ const matchedCount = result.cases.filter((testCase) => testCase.matched).length;
1967
+ const matchRate = result.cases.length === 0 ? 0 : matchedCount / result.cases.length;
1968
+ const body = [
1969
+ renderHeaderCard(
1970
+ "trigger",
1971
+ result.skillName,
1972
+ target,
1973
+ [
1974
+ { label: "Match rate", value: formatPercent(matchRate), note: `${matchedCount}/${result.cases.length} matched` },
1975
+ { label: "Precision", value: formatPercent(result.metrics.precision) },
1976
+ { label: "Recall", value: formatPercent(result.metrics.recall) },
1977
+ { label: "F1", value: formatPercent(result.metrics.f1), status: result.metrics.f1 >= 0.8 ? "pass" : "warn" }
1978
+ ],
1979
+ [
1980
+ { label: "Provider", value: result.provider },
1981
+ { label: "Model", value: result.model },
1982
+ { label: "Seed", value: result.seed !== void 0 ? String(result.seed) : "none" },
1983
+ { label: "Queries", value: String(result.queries.length) }
1984
+ ]
1985
+ ),
1986
+ renderSectionCard("Trigger Cases", `<div class="row-list">${result.cases.map((testCase) => renderTriggerCaseRow(testCase)).join("")}</div>`),
1987
+ renderSectionCard(
1988
+ "Suggestions",
1989
+ `<ul>${result.suggestions.map((suggestion) => `<li>${escapeHtml(suggestion)}</li>`).join("")}</ul>`
1990
+ )
1991
+ ].join("");
1992
+ return renderHtmlDocument(`skilltest trigger - ${result.skillName}`, body);
1993
+ }
1994
+ function renderEvalHtml(result) {
1995
+ const htmlResult = result;
1996
+ const target = resolveOptionalTarget(htmlResult, result.skillName);
1997
+ const passRate = result.summary.totalAssertions === 0 ? 0 : result.summary.passedAssertions / result.summary.totalAssertions;
1998
+ const body = [
1999
+ renderHeaderCard(
2000
+ "eval",
2001
+ result.skillName,
2002
+ target,
2003
+ [
2004
+ {
2005
+ label: "Assertion pass rate",
2006
+ value: formatPercent(passRate),
2007
+ note: `${result.summary.passedAssertions}/${result.summary.totalAssertions} passed`
2008
+ },
2009
+ { label: "Prompts", value: String(result.summary.totalPrompts) },
2010
+ { label: "Model", value: result.model },
2011
+ { label: "Grader", value: result.graderModel }
2012
+ ],
2013
+ [
2014
+ { label: "Provider", value: result.provider },
2015
+ { label: "Execution model", value: result.model },
2016
+ { label: "Grader model", value: result.graderModel },
2017
+ { label: "Prompts", value: String(result.prompts.length) }
2018
+ ]
2019
+ ),
2020
+ renderSectionCard("Eval Prompts", `<div class="row-list">${result.results.map((promptResult) => renderEvalPromptRow(promptResult)).join("")}</div>`)
2021
+ ].join("");
2022
+ return renderHtmlDocument(`skilltest eval - ${result.skillName}`, body);
2023
+ }
2024
+ function renderCheckHtml(result) {
2025
+ const skillName = result.trigger?.skillName ?? result.eval?.skillName ?? result.target;
2026
+ const triggerBody = result.trigger ? `<div class="row-list">${result.trigger.cases.map((testCase) => renderTriggerCaseRow(testCase)).join("")}</div>
2027
+ <div class="card" style="margin-top: 16px;">
2028
+ <h2>Trigger Suggestions</h2>
2029
+ <ul>${result.trigger.suggestions.map((suggestion) => `<li>${escapeHtml(suggestion)}</li>`).join("")}</ul>
2030
+ </div>` : renderMessageRow("skip", "Trigger skipped", result.triggerSkippedReason ?? "Skipped.");
2031
+ const evalBody = result.eval ? `<div class="row-list">${result.eval.results.map((promptResult) => renderEvalPromptRow(promptResult)).join("")}</div>` : renderMessageRow("skip", "Eval skipped", result.evalSkippedReason ?? "Skipped.");
2032
+ const lintStatus = result.gates.lintPassed ? "pass" : "fail";
2033
+ const triggerStatus = gateStatus(result.gates.triggerPassed);
2034
+ const evalStatus = gateStatus(result.gates.evalPassed);
2035
+ const overallStatus = result.gates.overallPassed ? "pass" : "fail";
2036
+ const header = renderHeaderCard(
2037
+ "check",
2038
+ skillName,
2039
+ result.target,
2040
+ [
2041
+ { label: "Overall gate", value: badgeLabel(overallStatus), status: overallStatus },
2042
+ {
2043
+ label: "Trigger F1",
2044
+ value: result.gates.triggerF1 !== null ? formatPercent(result.gates.triggerF1) : "skipped",
2045
+ status: triggerStatus
2046
+ },
2047
+ {
2048
+ label: "Eval pass rate",
2049
+ value: result.gates.evalAssertPassRate !== null ? formatPercent(result.gates.evalAssertPassRate) : "skipped",
2050
+ status: evalStatus
2051
+ },
2052
+ {
2053
+ label: "Lint result",
2054
+ value: `${result.lint.summary.failures} fail / ${result.lint.summary.warnings} warn`,
2055
+ status: lintStatus
2056
+ }
2057
+ ],
2058
+ [
2059
+ { label: "Provider", value: result.provider },
2060
+ { label: "Model", value: result.model },
2061
+ { label: "Grader model", value: result.graderModel },
2062
+ {
2063
+ label: "Thresholds",
2064
+ value: `min-f1=${result.thresholds.minF1.toFixed(2)} min-assert-pass-rate=${result.thresholds.minAssertPassRate.toFixed(2)}`
2065
+ }
2066
+ ]
2067
+ );
2068
+ const lintSection = renderCollapsibleSection(
2069
+ "Lint",
2070
+ `${result.lint.summary.passed}/${result.lint.summary.total} passed, ${result.lint.summary.warnings} warnings, ${result.lint.summary.failures} failures`,
2071
+ renderLintIssueList(result.lint),
2072
+ lintStatus
2073
+ );
2074
+ const triggerSection = renderCollapsibleSection(
2075
+ "Trigger",
2076
+ result.trigger ? `f1=${formatPercent(result.trigger.metrics.f1)} precision=${formatPercent(result.trigger.metrics.precision)} recall=${formatPercent(result.trigger.metrics.recall)}` : result.triggerSkippedReason ?? "Skipped.",
2077
+ triggerBody,
2078
+ triggerStatus
2079
+ );
2080
+ const evalSection = renderCollapsibleSection(
2081
+ "Eval",
2082
+ result.eval ? `assertion pass rate=${formatPercent(result.gates.evalAssertPassRate ?? 0)} (${result.eval.summary.passedAssertions}/${result.eval.summary.totalAssertions})` : result.evalSkippedReason ?? "Skipped.",
2083
+ evalBody,
2084
+ evalStatus
2085
+ );
2086
+ const qualityGate = renderSectionCard(
2087
+ "Quality Gate",
2088
+ `<div class="gate-grid">
2089
+ ${renderGateCard("Lint gate", lintStatus, result.gates.lintPassed ? "Lint passed." : "Lint failed.")}
2090
+ ${renderGateCard(
2091
+ "Trigger gate",
2092
+ triggerStatus,
2093
+ result.gates.triggerPassed === null ? result.triggerSkippedReason ?? "Skipped." : `required ${result.thresholds.minF1.toFixed(2)}, actual ${result.gates.triggerF1?.toFixed(2) ?? "n/a"}`
2094
+ )}
2095
+ ${renderGateCard(
2096
+ "Eval gate",
2097
+ evalStatus,
2098
+ result.gates.evalPassed === null ? result.evalSkippedReason ?? "Skipped." : `required ${result.thresholds.minAssertPassRate.toFixed(2)}, actual ${result.gates.evalAssertPassRate?.toFixed(2) ?? "n/a"}`
2099
+ )}
2100
+ ${renderGateCard("Overall", overallStatus, result.gates.overallPassed ? "All quality gates passed." : "One or more gates failed.")}
2101
+ </div>`
2102
+ );
2103
+ return renderHtmlDocument(`skilltest check - ${skillName}`, [header, lintSection, triggerSection, evalSection, qualityGate].join(""));
2104
+ }
2105
+
1369
2106
  // src/reporters/terminal.ts
1370
2107
  import { Chalk } from "chalk";
1371
2108
  function getChalkInstance(enableColor) {
@@ -1378,7 +2115,7 @@ function renderIssueLine(issue, c) {
1378
2115
  return ` ${label} ${issue.title}
1379
2116
  ${issue.message}${detail}`;
1380
2117
  }
1381
- function countSkippedSecurityPatterns(issues) {
2118
+ function countSkippedSecurityPatterns2(issues) {
1382
2119
  return issues.reduce((total, issue) => {
1383
2120
  if (!issue.checkId.startsWith("security:")) {
1384
2121
  return total;
@@ -1398,13 +2135,13 @@ function renderLintReport(report, enableColor) {
1398
2135
  `\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518`
1399
2136
  ];
1400
2137
  const renderedIssues = report.issues.map((issue) => renderIssueLine(issue, c)).join("\n");
1401
- const skippedSecurityPatterns = countSkippedSecurityPatterns(report.issues);
2138
+ const skippedSecurityPatterns = countSkippedSecurityPatterns2(report.issues);
1402
2139
  const infoLine = skippedSecurityPatterns > 0 ? `
1403
2140
  ${c.cyan("\u2139")} ${skippedSecurityPatterns} security pattern(s) found in code examples/comments (not flagged)` : "";
1404
2141
  return `${headerLines.join("\n")}
1405
2142
  ${renderedIssues}${infoLine}`;
1406
2143
  }
1407
- function formatPercent(value) {
2144
+ function formatPercent2(value) {
1408
2145
  return `${(value * 100).toFixed(1)}%`;
1409
2146
  }
1410
2147
  function renderTriggerReport(result, enableColor, verbose) {
@@ -1416,7 +2153,7 @@ function renderTriggerReport(result, enableColor, verbose) {
1416
2153
  lines.push(`\u2502 skill: ${result.skillName}`);
1417
2154
  lines.push(`\u2502 provider/model: ${result.provider}/${result.model}`);
1418
2155
  lines.push(
1419
- `\u2502 precision: ${formatPercent(result.metrics.precision)} recall: ${formatPercent(result.metrics.recall)} f1: ${formatPercent(result.metrics.f1)}`
2156
+ `\u2502 precision: ${formatPercent2(result.metrics.precision)} recall: ${formatPercent2(result.metrics.recall)} f1: ${formatPercent2(result.metrics.f1)}`
1420
2157
  );
1421
2158
  lines.push(
1422
2159
  `\u2502 TP ${result.metrics.truePositives} TN ${result.metrics.trueNegatives} FP ${result.metrics.falsePositives} FN ${result.metrics.falseNegatives}`
@@ -1490,7 +2227,7 @@ function renderCheckReport(result, enableColor, verbose) {
1490
2227
  for (const issue of lintIssues) {
1491
2228
  lines.push(renderIssueLine(issue, c));
1492
2229
  }
1493
- const skippedSecurityPatterns = countSkippedSecurityPatterns(result.lint.issues);
2230
+ const skippedSecurityPatterns = countSkippedSecurityPatterns2(result.lint.issues);
1494
2231
  if (skippedSecurityPatterns > 0) {
1495
2232
  lines.push(` ${c.cyan("\u2139")} ${skippedSecurityPatterns} security pattern(s) found in code examples/comments (not flagged)`);
1496
2233
  }
@@ -1498,7 +2235,7 @@ function renderCheckReport(result, enableColor, verbose) {
1498
2235
  lines.push("Trigger");
1499
2236
  if (result.trigger) {
1500
2237
  lines.push(
1501
- `- ${triggerGate} f1=${formatPercent(result.trigger.metrics.f1)} (precision=${formatPercent(result.trigger.metrics.precision)} recall=${formatPercent(result.trigger.metrics.recall)})`
2238
+ `- ${triggerGate} f1=${formatPercent2(result.trigger.metrics.f1)} (precision=${formatPercent2(result.trigger.metrics.precision)} recall=${formatPercent2(result.trigger.metrics.recall)})`
1502
2239
  );
1503
2240
  lines.push(
1504
2241
  ` TP ${result.trigger.metrics.truePositives} TN ${result.trigger.metrics.trueNegatives} FP ${result.trigger.metrics.falsePositives} FN ${result.trigger.metrics.falseNegatives}`
@@ -1517,7 +2254,7 @@ function renderCheckReport(result, enableColor, verbose) {
1517
2254
  if (result.eval) {
1518
2255
  const passRate = result.gates.evalAssertPassRate ?? 0;
1519
2256
  lines.push(
1520
- `- ${evalGate} assertion pass rate=${formatPercent(passRate)} (${result.eval.summary.passedAssertions}/${result.eval.summary.totalAssertions})`
2257
+ `- ${evalGate} assertion pass rate=${formatPercent2(passRate)} (${result.eval.summary.passedAssertions}/${result.eval.summary.totalAssertions})`
1521
2258
  );
1522
2259
  for (const promptResult of result.eval.results) {
1523
2260
  const failedAssertions = promptResult.assertions.filter((assertion) => !assertion.passed);
@@ -1577,12 +2314,13 @@ function extractJsonObject(raw) {
1577
2314
  }
1578
2315
  throw new Error("Grader did not return a JSON object.");
1579
2316
  }
1580
- async function gradeResponse(options) {
1581
- const assertionList = options.assertions && options.assertions.length > 0 ? options.assertions : [
1582
- "The response follows the skill instructions faithfully.",
1583
- "The response is well-structured and actionable.",
1584
- "The response addresses the user prompt directly."
1585
- ];
2317
+ var DEFAULT_ASSERTIONS = [
2318
+ "The response follows the skill instructions faithfully.",
2319
+ "The response is well-structured and actionable.",
2320
+ "The response addresses the user prompt directly."
2321
+ ];
2322
+ function buildGraderPrompts(options) {
2323
+ const assertions = options.assertions && options.assertions.length > 0 ? options.assertions : DEFAULT_ASSERTIONS;
1586
2324
  const systemPrompt = [
1587
2325
  "You are a strict evaluator for agent skill outputs.",
1588
2326
  "Assess each assertion and return JSON only.",
@@ -1599,15 +2337,78 @@ async function gradeResponse(options) {
1599
2337
  options.modelResponse,
1600
2338
  "",
1601
2339
  "Assertions to evaluate:",
1602
- assertionList.map((assertion, index) => `${index + 1}. ${assertion}`).join("\n")
2340
+ assertions.map((assertion, index) => `${index + 1}. ${assertion}`).join("\n")
1603
2341
  ].join("\n");
1604
- const raw = await options.provider.sendMessage(systemPrompt, userPrompt, { model: options.model });
2342
+ return {
2343
+ assertions,
2344
+ systemPrompt,
2345
+ userPrompt
2346
+ };
2347
+ }
2348
+ function parseGraderOutput(raw) {
1605
2349
  const parsed = graderOutputSchema.safeParse(extractJsonObject(raw));
1606
2350
  if (!parsed.success) {
1607
2351
  throw new Error(`Failed to parse grader output: ${parsed.error.issues[0]?.message ?? "invalid grader JSON"}`);
1608
2352
  }
1609
2353
  return parsed.data.assertions;
1610
2354
  }
2355
+ async function gradeResponse(options) {
2356
+ const prompts = buildGraderPrompts(options);
2357
+ const raw = await options.provider.sendMessage(prompts.systemPrompt, prompts.userPrompt, { model: options.model });
2358
+ return parseGraderOutput(raw);
2359
+ }
2360
+
2361
+ // src/utils/concurrency.ts
2362
+ async function pMap(items, fn, concurrency) {
2363
+ if (!Number.isInteger(concurrency) || concurrency < 1) {
2364
+ throw new Error("pMap concurrency must be an integer greater than or equal to 1.");
2365
+ }
2366
+ if (items.length === 0) {
2367
+ return [];
2368
+ }
2369
+ const results = new Array(items.length);
2370
+ return new Promise((resolve, reject) => {
2371
+ let nextIndex = 0;
2372
+ let completed = 0;
2373
+ let rejected = false;
2374
+ const launchNext = () => {
2375
+ if (rejected) {
2376
+ return;
2377
+ }
2378
+ if (completed === items.length) {
2379
+ resolve(results);
2380
+ return;
2381
+ }
2382
+ if (nextIndex >= items.length) {
2383
+ return;
2384
+ }
2385
+ const currentIndex = nextIndex;
2386
+ nextIndex += 1;
2387
+ Promise.resolve().then(() => fn(items[currentIndex], currentIndex)).then((result) => {
2388
+ if (rejected) {
2389
+ return;
2390
+ }
2391
+ results[currentIndex] = result;
2392
+ completed += 1;
2393
+ if (completed === items.length) {
2394
+ resolve(results);
2395
+ return;
2396
+ }
2397
+ launchNext();
2398
+ }).catch((error) => {
2399
+ if (rejected) {
2400
+ return;
2401
+ }
2402
+ rejected = true;
2403
+ reject(error);
2404
+ });
2405
+ };
2406
+ const initialWorkers = Math.min(concurrency, items.length);
2407
+ for (let workerIndex = 0; workerIndex < initialWorkers; workerIndex += 1) {
2408
+ launchNext();
2409
+ }
2410
+ });
2411
+ }
1611
2412
 
1612
2413
  // src/core/eval-runner.ts
1613
2414
  var evalPromptSchema = z3.object({
@@ -1655,34 +2456,37 @@ async function generatePrompts(skill, provider, model, count) {
1655
2456
  }
1656
2457
  async function runEval(skill, options) {
1657
2458
  const prompts = options.prompts && options.prompts.length > 0 ? evalPromptArraySchema.parse(options.prompts) : await generatePrompts(skill, options.provider, options.model, options.numRuns);
1658
- const results = [];
1659
- for (const evalPrompt of prompts) {
1660
- const systemPrompt = [
1661
- "You are an AI assistant with an activated skill.",
1662
- "Follow this SKILL.md content exactly where applicable.",
1663
- "",
1664
- skill.raw
1665
- ].join("\n");
1666
- const response = await options.provider.sendMessage(systemPrompt, evalPrompt.prompt, { model: options.model });
1667
- const gradedAssertions = await gradeResponse({
1668
- provider: options.provider,
1669
- model: options.graderModel,
1670
- skillName: skill.frontmatter.name,
1671
- skillBody: skill.content,
1672
- userPrompt: evalPrompt.prompt,
1673
- modelResponse: response,
1674
- assertions: evalPrompt.assertions
1675
- });
1676
- const passedAssertions2 = gradedAssertions.filter((assertion) => assertion.passed).length;
1677
- results.push({
1678
- prompt: evalPrompt.prompt,
1679
- assertions: gradedAssertions,
1680
- responseSummary: response.slice(0, 200),
1681
- response,
1682
- passedAssertions: passedAssertions2,
1683
- totalAssertions: gradedAssertions.length
1684
- });
1685
- }
2459
+ const systemPrompt = [
2460
+ "You are an AI assistant with an activated skill.",
2461
+ "Follow this SKILL.md content exactly where applicable.",
2462
+ "",
2463
+ skill.raw
2464
+ ].join("\n");
2465
+ const results = await pMap(
2466
+ prompts,
2467
+ async (evalPrompt) => {
2468
+ const response = await options.provider.sendMessage(systemPrompt, evalPrompt.prompt, { model: options.model });
2469
+ const gradedAssertions = await gradeResponse({
2470
+ provider: options.provider,
2471
+ model: options.graderModel,
2472
+ skillName: skill.frontmatter.name,
2473
+ skillBody: skill.content,
2474
+ userPrompt: evalPrompt.prompt,
2475
+ modelResponse: response,
2476
+ assertions: evalPrompt.assertions
2477
+ });
2478
+ const passedAssertions2 = gradedAssertions.filter((assertion) => assertion.passed).length;
2479
+ return {
2480
+ prompt: evalPrompt.prompt,
2481
+ assertions: gradedAssertions,
2482
+ responseSummary: response.slice(0, 200),
2483
+ response,
2484
+ passedAssertions: passedAssertions2,
2485
+ totalAssertions: gradedAssertions.length
2486
+ };
2487
+ },
2488
+ options.concurrency ?? 5
2489
+ );
1686
2490
  const totalAssertions = results.reduce((total, result) => total + result.totalAssertions, 0);
1687
2491
  const passedAssertions = results.reduce((total, result) => total + result.passedAssertions, 0);
1688
2492
  return {
@@ -1707,6 +2511,7 @@ var triggerQuerySchema = z4.object({
1707
2511
  should_trigger: z4.boolean()
1708
2512
  });
1709
2513
  var triggerQueryArraySchema = z4.array(triggerQuerySchema);
2514
+ var triggerNumQueriesSchema = z4.number().int().min(2).refine((value) => value % 2 === 0, "numQueries must be an even number.");
1710
2515
  var FAKE_SKILLS = [
1711
2516
  { name: "code-review", description: "Reviews code changes for bugs, regressions, and maintainability issues." },
1712
2517
  { name: "api-tester", description: "Designs and runs REST API tests, validating status codes and response shapes." },
@@ -1747,6 +2552,9 @@ function shuffle(values, rng) {
1747
2552
  function sample(values, count, rng) {
1748
2553
  return shuffle(values, rng).slice(0, Math.max(0, Math.min(count, values.length)));
1749
2554
  }
2555
+ function validateNumQueries(numQueries) {
2556
+ return triggerNumQueriesSchema.parse(numQueries);
2557
+ }
1750
2558
  function parseJsonArrayFromModelOutput(raw) {
1751
2559
  const trimmed = raw.trim();
1752
2560
  if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
@@ -1761,6 +2569,7 @@ function parseJsonArrayFromModelOutput(raw) {
1761
2569
  throw new Error("Model did not return a JSON array.");
1762
2570
  }
1763
2571
  async function generateQueriesWithModel(skill, provider, model, numQueries) {
2572
+ validateNumQueries(numQueries);
1764
2573
  const shouldTriggerCount = Math.floor(numQueries / 2);
1765
2574
  const shouldNotTriggerCount = numQueries - shouldTriggerCount;
1766
2575
  const systemPrompt = [
@@ -1805,6 +2614,29 @@ function parseDecision(rawResponse, skillNames) {
1805
2614
  }
1806
2615
  return "unrecognized";
1807
2616
  }
2617
+ function prepareTriggerQueries(skill, queries, seed) {
2618
+ const rng = createRng(seed);
2619
+ return queries.map((testQuery) => {
2620
+ const fakeCount = 5 + Math.floor(rng() * 5);
2621
+ const fakeSkills = sample(FAKE_SKILLS, fakeCount, rng);
2622
+ const allSkills = shuffle(
2623
+ [
2624
+ ...fakeSkills,
2625
+ {
2626
+ name: skill.frontmatter.name,
2627
+ description: skill.frontmatter.description
2628
+ }
2629
+ ],
2630
+ rng
2631
+ );
2632
+ return {
2633
+ testQuery,
2634
+ fakeSkills,
2635
+ allSkills,
2636
+ skillListText: allSkills.map((entry) => `- ${entry.name}: ${entry.description}`).join("\n")
2637
+ };
2638
+ });
2639
+ }
1808
2640
  function calculateMetrics(skillName, cases) {
1809
2641
  let truePositives = 0;
1810
2642
  let trueNegatives = 0;
@@ -1857,43 +2689,36 @@ function buildSuggestions(metrics) {
1857
2689
  return suggestions;
1858
2690
  }
1859
2691
  async function runTriggerTest(skill, options) {
1860
- const rng = createRng(options.seed);
1861
2692
  const queries = options.queries && options.queries.length > 0 ? triggerQueryArraySchema.parse(options.queries) : await generateQueriesWithModel(skill, options.provider, options.model, options.numQueries);
1862
- const results = [];
1863
2693
  const skillName = skill.frontmatter.name;
1864
- for (const testQuery of queries) {
1865
- const fakeCount = 5 + Math.floor(rng() * 5);
1866
- const fakeSkills = sample(FAKE_SKILLS, fakeCount, rng);
1867
- const allSkills = shuffle([
1868
- ...fakeSkills,
1869
- {
1870
- name: skill.frontmatter.name,
1871
- description: skill.frontmatter.description
1872
- }
1873
- ], rng);
1874
- const skillListText = allSkills.map((entry) => `- ${entry.name}: ${entry.description}`).join("\n");
1875
- const systemPrompt = [
1876
- "You are selecting one skill to activate for a user query.",
1877
- "Choose the single best matching skill name from the provided list, or 'none' if no skill is a good fit.",
1878
- "Respond with only the skill name or 'none'."
1879
- ].join(" ");
1880
- const userPrompt = [`Available skills:`, skillListText, "", `User query: ${testQuery.query}`].join("\n");
1881
- const rawResponse = await options.provider.sendMessage(systemPrompt, userPrompt, { model: options.model });
1882
- const decision = parseDecision(
1883
- rawResponse,
1884
- allSkills.map((entry) => entry.name)
1885
- );
1886
- const expected = testQuery.should_trigger ? skillName : "none";
1887
- const matched = testQuery.should_trigger ? decision === skillName : decision !== skillName;
1888
- results.push({
1889
- query: testQuery.query,
1890
- shouldTrigger: testQuery.should_trigger,
1891
- expected,
1892
- actual: decision,
1893
- matched,
1894
- rawModelResponse: options.verbose ? rawResponse : void 0
1895
- });
1896
- }
2694
+ const preparedQueries = prepareTriggerQueries(skill, queries, options.seed);
2695
+ const systemPrompt = [
2696
+ "You are selecting one skill to activate for a user query.",
2697
+ "Choose the single best matching skill name from the provided list, or 'none' if no skill is a good fit.",
2698
+ "Respond with only the skill name or 'none'."
2699
+ ].join(" ");
2700
+ const results = await pMap(
2701
+ preparedQueries,
2702
+ async ({ testQuery, allSkills, skillListText }) => {
2703
+ const userPrompt = [`Available skills:`, skillListText, "", `User query: ${testQuery.query}`].join("\n");
2704
+ const rawResponse = await options.provider.sendMessage(systemPrompt, userPrompt, { model: options.model });
2705
+ const decision = parseDecision(
2706
+ rawResponse,
2707
+ allSkills.map((entry) => entry.name)
2708
+ );
2709
+ const expected = testQuery.should_trigger ? skillName : "none";
2710
+ const matched = testQuery.should_trigger ? decision === skillName : decision !== skillName;
2711
+ return {
2712
+ query: testQuery.query,
2713
+ shouldTrigger: testQuery.should_trigger,
2714
+ expected,
2715
+ actual: decision,
2716
+ matched,
2717
+ rawModelResponse: options.verbose ? rawResponse : void 0
2718
+ };
2719
+ },
2720
+ options.concurrency ?? 5
2721
+ );
1897
2722
  const metrics = calculateMetrics(skillName, results);
1898
2723
  return {
1899
2724
  skillName,
@@ -2059,6 +2884,9 @@ function writeError(error, asJson) {
2059
2884
  }
2060
2885
 
2061
2886
  // src/commands/lint.ts
2887
+ var lintCliSchema = z6.object({
2888
+ html: z6.string().optional()
2889
+ });
2062
2890
  async function handleLintCommand(targetPath, options) {
2063
2891
  try {
2064
2892
  const report = await runLinter(targetPath, { suppress: options.suppress });
@@ -2067,6 +2895,9 @@ async function handleLintCommand(targetPath, options) {
2067
2895
  } else {
2068
2896
  writeResult(renderLintReport(report, options.color), false);
2069
2897
  }
2898
+ if (options.html) {
2899
+ await fs6.writeFile(options.html, renderLintHtml(report), "utf8");
2900
+ }
2070
2901
  if (lintFails(report, options.failOn)) {
2071
2902
  process.exitCode = 1;
2072
2903
  }
@@ -2076,74 +2907,85 @@ async function handleLintCommand(targetPath, options) {
2076
2907
  }
2077
2908
  }
2078
2909
  function registerLintCommand(program) {
2079
- program.command("lint").description("Run static lint checks against a SKILL.md file or skill directory.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").action(async (targetPath, _commandOptions, command) => {
2910
+ program.command("lint").description("Run static lint checks against a SKILL.md file or skill directory.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--html <path>", "Write an HTML report to the given file path").action(async (targetPath, _commandOptions, command) => {
2080
2911
  const globalOptions = getGlobalCliOptions(command);
2081
2912
  const config = getResolvedConfig(command);
2913
+ const parsedCli = lintCliSchema.safeParse(command.opts());
2914
+ if (!parsedCli.success) {
2915
+ writeError(new Error(parsedCli.error.issues[0]?.message ?? "Invalid lint options."), globalOptions.json);
2916
+ process.exitCode = 2;
2917
+ return;
2918
+ }
2082
2919
  await handleLintCommand(targetPath, {
2083
2920
  ...globalOptions,
2084
2921
  failOn: config.lint.failOn,
2085
- suppress: config.lint.suppress
2922
+ suppress: config.lint.suppress,
2923
+ html: parsedCli.data.html
2086
2924
  });
2087
2925
  });
2088
2926
  }
2089
2927
 
2090
2928
  // src/commands/trigger.ts
2929
+ import fs8 from "node:fs/promises";
2091
2930
  import ora from "ora";
2092
- import { z as z7 } from "zod";
2931
+ import { z as z8 } from "zod";
2093
2932
 
2094
2933
  // src/utils/config.ts
2095
- import fs6 from "node:fs/promises";
2934
+ import fs7 from "node:fs/promises";
2096
2935
  import path5 from "node:path";
2097
- import { z as z6 } from "zod";
2098
- var providerNameSchema = z6.enum(["anthropic", "openai"]);
2099
- var lintFailOnSchema = z6.enum(["error", "warn"]);
2100
- var lintConfigSchema = z6.object({
2936
+ import { z as z7 } from "zod";
2937
+ var providerNameSchema = z7.enum(["anthropic", "openai"]);
2938
+ var lintFailOnSchema = z7.enum(["error", "warn"]);
2939
+ var lintConfigSchema = z7.object({
2101
2940
  failOn: lintFailOnSchema.optional(),
2102
- suppress: z6.array(z6.string().min(1)).optional()
2941
+ suppress: z7.array(z7.string().min(1)).optional()
2103
2942
  }).strict();
2104
- var triggerConfigSchema = z6.object({
2105
- numQueries: z6.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
2106
- threshold: z6.number().min(0).max(1).optional(),
2107
- seed: z6.number().int().optional()
2943
+ var triggerConfigSchema = z7.object({
2944
+ numQueries: z7.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
2945
+ threshold: z7.number().min(0).max(1).optional(),
2946
+ seed: z7.number().int().optional()
2108
2947
  }).strict().partial();
2109
- var evalConfigSchema = z6.object({
2110
- numRuns: z6.number().int().min(1).optional(),
2111
- threshold: z6.number().min(0).max(1).optional(),
2112
- promptFile: z6.string().min(1).optional(),
2113
- assertionsFile: z6.string().min(1).optional()
2948
+ var evalConfigSchema = z7.object({
2949
+ numRuns: z7.number().int().min(1).optional(),
2950
+ threshold: z7.number().min(0).max(1).optional(),
2951
+ promptFile: z7.string().min(1).optional(),
2952
+ assertionsFile: z7.string().min(1).optional()
2114
2953
  }).strict().partial();
2115
- var skilltestConfigSchema = z6.object({
2954
+ var skilltestConfigSchema = z7.object({
2116
2955
  provider: providerNameSchema.optional(),
2117
- model: z6.string().min(1).optional(),
2118
- json: z6.boolean().optional(),
2956
+ model: z7.string().min(1).optional(),
2957
+ json: z7.boolean().optional(),
2958
+ concurrency: z7.number().int().min(1).optional(),
2119
2959
  lint: lintConfigSchema.optional(),
2120
2960
  trigger: triggerConfigSchema.optional(),
2121
2961
  eval: evalConfigSchema.optional()
2122
2962
  }).strict();
2123
- var resolvedSkilltestConfigSchema = z6.object({
2963
+ var resolvedSkilltestConfigSchema = z7.object({
2124
2964
  provider: providerNameSchema,
2125
- model: z6.string().min(1),
2126
- json: z6.boolean(),
2127
- lint: z6.object({
2965
+ model: z7.string().min(1),
2966
+ json: z7.boolean(),
2967
+ concurrency: z7.number().int().min(1),
2968
+ lint: z7.object({
2128
2969
  failOn: lintFailOnSchema,
2129
- suppress: z6.array(z6.string().min(1))
2970
+ suppress: z7.array(z7.string().min(1))
2130
2971
  }),
2131
- trigger: z6.object({
2132
- numQueries: z6.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
2133
- threshold: z6.number().min(0).max(1),
2134
- seed: z6.number().int().optional()
2972
+ trigger: z7.object({
2973
+ numQueries: z7.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
2974
+ threshold: z7.number().min(0).max(1),
2975
+ seed: z7.number().int().optional()
2135
2976
  }),
2136
- eval: z6.object({
2137
- numRuns: z6.number().int().min(1),
2138
- threshold: z6.number().min(0).max(1),
2139
- promptFile: z6.string().min(1).optional(),
2140
- assertionsFile: z6.string().min(1).optional()
2977
+ eval: z7.object({
2978
+ numRuns: z7.number().int().min(1),
2979
+ threshold: z7.number().min(0).max(1),
2980
+ promptFile: z7.string().min(1).optional(),
2981
+ assertionsFile: z7.string().min(1).optional()
2141
2982
  })
2142
2983
  });
2143
2984
  var DEFAULT_SKILLTEST_CONFIG = {
2144
2985
  provider: "anthropic",
2145
2986
  model: "claude-sonnet-4-5-20250929",
2146
2987
  json: false,
2988
+ concurrency: 5,
2147
2989
  lint: {
2148
2990
  failOn: "error",
2149
2991
  suppress: []
@@ -2172,7 +3014,7 @@ function buildConfigValidationError(error, sourceLabel) {
2172
3014
  async function readJsonObject(filePath, label) {
2173
3015
  let raw;
2174
3016
  try {
2175
- raw = await fs6.readFile(filePath, "utf8");
3017
+ raw = await fs7.readFile(filePath, "utf8");
2176
3018
  } catch (error) {
2177
3019
  const message = error instanceof Error ? error.message : String(error);
2178
3020
  throw new Error(`Failed to read ${label}: ${message}`);
@@ -2205,7 +3047,7 @@ async function loadConfigFromNearestPackageJson(startDirectory) {
2205
3047
  const packageJsonPath = path5.join(currentDirectory, "package.json");
2206
3048
  if (await pathExists(packageJsonPath)) {
2207
3049
  const raw = await readJsonObject(packageJsonPath, packageJsonPath);
2208
- const packageJsonSchema = z6.object({
3050
+ const packageJsonSchema = z7.object({
2209
3051
  skilltestrc: skilltestConfigSchema.optional()
2210
3052
  }).passthrough();
2211
3053
  const parsed = packageJsonSchema.safeParse(raw);
@@ -2250,6 +3092,7 @@ function mergeConfigLayers(configFile = {}, cliFlags = {}, baseDirectory = proce
2250
3092
  provider: cliFlags.provider ?? configFile.provider ?? DEFAULT_SKILLTEST_CONFIG.provider,
2251
3093
  model: cliFlags.model ?? configFile.model ?? DEFAULT_SKILLTEST_CONFIG.model,
2252
3094
  json: cliFlags.json ?? configFile.json ?? DEFAULT_SKILLTEST_CONFIG.json,
3095
+ concurrency: cliFlags.concurrency ?? configFile.concurrency ?? DEFAULT_SKILLTEST_CONFIG.concurrency,
2253
3096
  lint: {
2254
3097
  failOn: cliFlags.lint?.failOn ?? configFile.lint?.failOn ?? DEFAULT_SKILLTEST_CONFIG.lint.failOn,
2255
3098
  suppress: cliFlags.lint?.suppress ?? configFile.lint?.suppress ?? DEFAULT_SKILLTEST_CONFIG.lint.suppress
@@ -2293,6 +3136,9 @@ function extractCliConfigOverrides(command) {
2293
3136
  if (command.getOptionValueSource("model") === "cli") {
2294
3137
  overrides.model = getTypedOptionValue(command, "model");
2295
3138
  }
3139
+ if ((command.name() === "trigger" || command.name() === "eval" || command.name() === "check") && command.getOptionValueSource("concurrency") === "cli") {
3140
+ overrides.concurrency = getTypedOptionValue(command, "concurrency");
3141
+ }
2296
3142
  if ((command.name() === "trigger" || command.name() === "check") && command.getOptionValueSource("numQueries") === "cli") {
2297
3143
  overrides.trigger = {
2298
3144
  ...overrides.trigger,
@@ -2322,7 +3168,6 @@ async function resolveConfigContext(targetPath, cliFlags) {
2322
3168
  const skillDirectoryConfig = await resolveSkillDirectoryConfig(targetPath);
2323
3169
  if (skillDirectoryConfig) {
2324
3170
  return {
2325
- configFile: skillDirectoryConfig.configFile,
2326
3171
  ...skillDirectoryConfig,
2327
3172
  config: mergeConfigLayers(skillDirectoryConfig.configFile, cliFlags, skillDirectoryConfig.sourceDirectory)
2328
3173
  };
@@ -2331,7 +3176,6 @@ async function resolveConfigContext(targetPath, cliFlags) {
2331
3176
  const cwdConfig = await loadConfigFromJsonFile(cwdConfigPath);
2332
3177
  if (cwdConfig) {
2333
3178
  return {
2334
- configFile: cwdConfig.configFile,
2335
3179
  ...cwdConfig,
2336
3180
  config: mergeConfigLayers(cwdConfig.configFile, cliFlags, cwdConfig.sourceDirectory)
2337
3181
  };
@@ -2339,7 +3183,6 @@ async function resolveConfigContext(targetPath, cliFlags) {
2339
3183
  const packageJsonConfig = await loadConfigFromNearestPackageJson(cwd);
2340
3184
  if (packageJsonConfig) {
2341
3185
  return {
2342
- configFile: packageJsonConfig.configFile,
2343
3186
  ...packageJsonConfig,
2344
3187
  config: mergeConfigLayers(packageJsonConfig.configFile, cliFlags, packageJsonConfig.sourceDirectory)
2345
3188
  };
@@ -2547,12 +3390,14 @@ function createProvider(providerName, apiKeyOverride) {
2547
3390
  }
2548
3391
 
2549
3392
  // src/commands/trigger.ts
2550
- var triggerCliSchema = z7.object({
2551
- queries: z7.string().optional(),
2552
- saveQueries: z7.string().optional(),
2553
- seed: z7.number().int().optional(),
2554
- verbose: z7.boolean().optional(),
2555
- apiKey: z7.string().optional()
3393
+ var triggerCliSchema = z8.object({
3394
+ queries: z8.string().optional(),
3395
+ saveQueries: z8.string().optional(),
3396
+ seed: z8.number().int().optional(),
3397
+ concurrency: z8.number().int().min(1).optional(),
3398
+ html: z8.string().optional(),
3399
+ verbose: z8.boolean().optional(),
3400
+ apiKey: z8.string().optional()
2556
3401
  });
2557
3402
  var DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-5-20250929";
2558
3403
  var DEFAULT_OPENAI_MODEL = "gpt-4.1-mini";
@@ -2597,6 +3442,7 @@ async function handleTriggerCommand(targetPath, options) {
2597
3442
  queries,
2598
3443
  numQueries: options.numQueries,
2599
3444
  seed: options.seed,
3445
+ concurrency: options.concurrency,
2600
3446
  verbose: options.verbose
2601
3447
  });
2602
3448
  if (options.saveQueries) {
@@ -2608,6 +3454,13 @@ async function handleTriggerCommand(targetPath, options) {
2608
3454
  } else {
2609
3455
  writeResult(renderTriggerOutputWithSeed(renderTriggerReport(result, options.color, options.verbose), result.seed), false);
2610
3456
  }
3457
+ if (options.html) {
3458
+ const htmlResult = {
3459
+ ...result,
3460
+ target: targetPath
3461
+ };
3462
+ await fs8.writeFile(options.html, renderTriggerHtml(htmlResult), "utf8");
3463
+ }
2611
3464
  } catch (error) {
2612
3465
  spinner?.stop();
2613
3466
  writeError(error, options.json);
@@ -2615,7 +3468,7 @@ async function handleTriggerCommand(targetPath, options) {
2615
3468
  }
2616
3469
  }
2617
3470
  function registerTriggerCommand(program) {
2618
- program.command("trigger").description("Evaluate whether a skill description triggers correctly.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--model <model>", "Model to use").option("--provider <provider>", "LLM provider: anthropic|openai").option("--queries <path>", "Path to custom test queries JSON").option("--num-queries <n>", "Number of auto-generated queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--save-queries <path>", "Save generated queries to a JSON file").option("--api-key <key>", "API key override").option("--verbose", "Show full model decisions").action(async (targetPath, _commandOptions, command) => {
3471
+ program.command("trigger").description("Evaluate whether a skill description triggers correctly.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--model <model>", "Model to use").option("--provider <provider>", "LLM provider: anthropic|openai").option("--queries <path>", "Path to custom test queries JSON").option("--num-queries <n>", "Number of auto-generated queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--concurrency <n>", "Maximum in-flight trigger requests", (value) => Number.parseInt(value, 10)).option("--html <path>", "Write an HTML report to the given file path").option("--save-queries <path>", "Save generated queries to a JSON file").option("--api-key <key>", "API key override").option("--verbose", "Show full model decisions").action(async (targetPath, _commandOptions, command) => {
2619
3472
  const globalOptions = getGlobalCliOptions(command);
2620
3473
  const config = getResolvedConfig(command);
2621
3474
  const parsedCli = triggerCliSchema.safeParse(command.opts());
@@ -2632,6 +3485,8 @@ function registerTriggerCommand(program) {
2632
3485
  numQueries: config.trigger.numQueries,
2633
3486
  saveQueries: parsedCli.data.saveQueries,
2634
3487
  seed: parsedCli.data.seed ?? config.trigger.seed,
3488
+ concurrency: config.concurrency,
3489
+ html: parsedCli.data.html,
2635
3490
  verbose: Boolean(parsedCli.data.verbose),
2636
3491
  apiKey: parsedCli.data.apiKey
2637
3492
  });
@@ -2639,14 +3494,17 @@ function registerTriggerCommand(program) {
2639
3494
  }
2640
3495
 
2641
3496
  // src/commands/eval.ts
3497
+ import fs9 from "node:fs/promises";
2642
3498
  import ora2 from "ora";
2643
- import { z as z8 } from "zod";
2644
- var evalCliSchema = z8.object({
2645
- prompts: z8.string().optional(),
2646
- graderModel: z8.string().optional(),
2647
- saveResults: z8.string().optional(),
2648
- verbose: z8.boolean().optional(),
2649
- apiKey: z8.string().optional()
3499
+ import { z as z9 } from "zod";
3500
+ var evalCliSchema = z9.object({
3501
+ prompts: z9.string().optional(),
3502
+ graderModel: z9.string().optional(),
3503
+ saveResults: z9.string().optional(),
3504
+ concurrency: z9.number().int().min(1).optional(),
3505
+ html: z9.string().optional(),
3506
+ verbose: z9.boolean().optional(),
3507
+ apiKey: z9.string().optional()
2650
3508
  });
2651
3509
  var DEFAULT_ANTHROPIC_MODEL2 = "claude-sonnet-4-5-20250929";
2652
3510
  var DEFAULT_OPENAI_MODEL2 = "gpt-4.1-mini";
@@ -2686,6 +3544,7 @@ async function handleEvalCommand(targetPath, options, command) {
2686
3544
  model,
2687
3545
  graderModel,
2688
3546
  numRuns: options.numRuns,
3547
+ concurrency: options.concurrency,
2689
3548
  prompts
2690
3549
  });
2691
3550
  if (options.saveResults) {
@@ -2697,6 +3556,13 @@ async function handleEvalCommand(targetPath, options, command) {
2697
3556
  } else {
2698
3557
  writeResult(renderEvalReport(result, options.color, options.verbose), false);
2699
3558
  }
3559
+ if (options.html) {
3560
+ const htmlResult = {
3561
+ ...result,
3562
+ target: targetPath
3563
+ };
3564
+ await fs9.writeFile(options.html, renderEvalHtml(htmlResult), "utf8");
3565
+ }
2700
3566
  } catch (error) {
2701
3567
  spinner?.stop();
2702
3568
  writeError(error, options.json);
@@ -2704,7 +3570,7 @@ async function handleEvalCommand(targetPath, options, command) {
2704
3570
  }
2705
3571
  }
2706
3572
  function registerEvalCommand(program) {
2707
- program.command("eval").description("Run end-to-end skill execution and quality evaluation.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--prompts <path>", "Path to eval prompts JSON").option("--model <model>", "Model to execute prompts").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--provider <provider>", "LLM provider: anthropic|openai").option("--save-results <path>", "Save full evaluation results to JSON").option("--api-key <key>", "API key override").option("--verbose", "Show full model responses").action(async (targetPath, _commandOptions, command) => {
3573
+ program.command("eval").description("Run end-to-end skill execution and quality evaluation.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--prompts <path>", "Path to eval prompts JSON").option("--model <model>", "Model to execute prompts").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--provider <provider>", "LLM provider: anthropic|openai").option("--concurrency <n>", "Maximum in-flight eval prompt runs", (value) => Number.parseInt(value, 10)).option("--html <path>", "Write an HTML report to the given file path").option("--save-results <path>", "Save full evaluation results to JSON").option("--api-key <key>", "API key override").option("--verbose", "Show full model responses").action(async (targetPath, _commandOptions, command) => {
2708
3574
  const globalOptions = getGlobalCliOptions(command);
2709
3575
  const config = getResolvedConfig(command);
2710
3576
  const parsedCli = evalCliSchema.safeParse(command.opts());
@@ -2722,9 +3588,11 @@ function registerEvalCommand(program) {
2722
3588
  graderModel: parsedCli.data.graderModel,
2723
3589
  provider: config.provider,
2724
3590
  saveResults: parsedCli.data.saveResults,
3591
+ html: parsedCli.data.html,
2725
3592
  verbose: Boolean(parsedCli.data.verbose),
2726
3593
  apiKey: parsedCli.data.apiKey,
2727
- numRuns: config.eval.numRuns
3594
+ numRuns: config.eval.numRuns,
3595
+ concurrency: config.concurrency
2728
3596
  },
2729
3597
  command
2730
3598
  );
@@ -2732,8 +3600,9 @@ function registerEvalCommand(program) {
2732
3600
  }
2733
3601
 
2734
3602
  // src/commands/check.ts
3603
+ import fs10 from "node:fs/promises";
2735
3604
  import ora3 from "ora";
2736
- import { z as z9 } from "zod";
3605
+ import { z as z10 } from "zod";
2737
3606
 
2738
3607
  // src/core/check-runner.ts
2739
3608
  function calculateEvalAssertPassRate(result) {
@@ -2764,23 +3633,33 @@ async function runCheck(inputPath, options) {
2764
3633
  evalSkippedReason = `Skipped: skill could not be parsed strictly (${message}).`;
2765
3634
  }
2766
3635
  if (parsedSkill) {
2767
- options.onStage?.("trigger");
2768
- trigger = await runTriggerTest(parsedSkill, {
3636
+ const triggerOptions = {
2769
3637
  provider: options.provider,
2770
3638
  model: options.model,
2771
3639
  queries: options.queries,
2772
3640
  numQueries: options.numQueries,
2773
3641
  seed: options.triggerSeed,
3642
+ concurrency: options.concurrency,
2774
3643
  verbose: options.verbose
2775
- });
2776
- options.onStage?.("eval");
2777
- evalResult = await runEval(parsedSkill, {
3644
+ };
3645
+ const evalOptions = {
2778
3646
  provider: options.provider,
2779
3647
  model: options.model,
2780
3648
  graderModel: options.graderModel,
2781
3649
  numRuns: options.evalNumRuns,
2782
- prompts: options.prompts
2783
- });
3650
+ prompts: options.prompts,
3651
+ concurrency: options.concurrency
3652
+ };
3653
+ if ((options.concurrency ?? 5) === 1) {
3654
+ options.onStage?.("trigger");
3655
+ trigger = await runTriggerTest(parsedSkill, triggerOptions);
3656
+ options.onStage?.("eval");
3657
+ evalResult = await runEval(parsedSkill, evalOptions);
3658
+ } else {
3659
+ options.onStage?.("trigger");
3660
+ options.onStage?.("eval");
3661
+ [trigger, evalResult] = await Promise.all([runTriggerTest(parsedSkill, triggerOptions), runEval(parsedSkill, evalOptions)]);
3662
+ }
2784
3663
  }
2785
3664
  }
2786
3665
  const triggerF1 = trigger ? trigger.metrics.f1 : null;
@@ -2815,15 +3694,17 @@ async function runCheck(inputPath, options) {
2815
3694
  }
2816
3695
 
2817
3696
  // src/commands/check.ts
2818
- var checkCliSchema = z9.object({
2819
- graderModel: z9.string().optional(),
2820
- apiKey: z9.string().optional(),
2821
- queries: z9.string().optional(),
2822
- seed: z9.number().int().optional(),
2823
- prompts: z9.string().optional(),
2824
- saveResults: z9.string().optional(),
2825
- continueOnLintFail: z9.boolean().optional(),
2826
- verbose: z9.boolean().optional()
3697
+ var checkCliSchema = z10.object({
3698
+ graderModel: z10.string().optional(),
3699
+ apiKey: z10.string().optional(),
3700
+ queries: z10.string().optional(),
3701
+ seed: z10.number().int().optional(),
3702
+ prompts: z10.string().optional(),
3703
+ concurrency: z10.number().int().min(1).optional(),
3704
+ html: z10.string().optional(),
3705
+ saveResults: z10.string().optional(),
3706
+ continueOnLintFail: z10.boolean().optional(),
3707
+ verbose: z10.boolean().optional()
2827
3708
  });
2828
3709
  var DEFAULT_ANTHROPIC_MODEL3 = "claude-sonnet-4-5-20250929";
2829
3710
  var DEFAULT_OPENAI_MODEL3 = "gpt-4.1-mini";
@@ -2882,6 +3763,7 @@ async function handleCheckCommand(targetPath, options, command) {
2882
3763
  triggerSeed: options.triggerSeed,
2883
3764
  prompts,
2884
3765
  evalNumRuns: options.numRuns,
3766
+ concurrency: options.concurrency,
2885
3767
  minF1: options.minF1,
2886
3768
  minAssertPassRate: options.minAssertPassRate,
2887
3769
  continueOnLintFail: options.continueOnLintFail,
@@ -2894,10 +3776,8 @@ async function handleCheckCommand(targetPath, options, command) {
2894
3776
  spinner.text = "Running lint checks...";
2895
3777
  } else if (stage === "parse") {
2896
3778
  spinner.text = "Parsing skill for model evaluations...";
2897
- } else if (stage === "trigger") {
2898
- spinner.text = "Running trigger test suite...";
2899
- } else if (stage === "eval") {
2900
- spinner.text = "Running end-to-end eval suite...";
3779
+ } else if (stage === "trigger" || stage === "eval") {
3780
+ spinner.text = "Running trigger and eval suites...";
2901
3781
  }
2902
3782
  }
2903
3783
  });
@@ -2913,6 +3793,9 @@ async function handleCheckCommand(targetPath, options, command) {
2913
3793
  false
2914
3794
  );
2915
3795
  }
3796
+ if (options.html) {
3797
+ await fs10.writeFile(options.html, renderCheckHtml(result), "utf8");
3798
+ }
2916
3799
  process.exitCode = result.gates.overallPassed ? 0 : 1;
2917
3800
  } catch (error) {
2918
3801
  spinner?.stop();
@@ -2921,7 +3804,7 @@ async function handleCheckCommand(targetPath, options, command) {
2921
3804
  }
2922
3805
  }
2923
3806
  function registerCheckCommand(program) {
2924
- program.command("check").description("Run lint + trigger + eval with threshold-based quality gates.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--provider <provider>", "LLM provider: anthropic|openai").option("--model <model>", "Model for trigger/eval runs").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--api-key <key>", "API key override").option("--queries <path>", "Path to custom trigger queries JSON").option("--num-queries <n>", "Number of auto-generated trigger queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--prompts <path>", "Path to eval prompts JSON").option("--min-f1 <n>", "Minimum required trigger F1 score (0-1)", (value) => Number.parseFloat(value)).option("--min-assert-pass-rate <n>", "Minimum required eval assertion pass rate (0-1)", (value) => Number.parseFloat(value)).option("--save-results <path>", "Save combined check results to JSON").option("--continue-on-lint-fail", "Continue trigger/eval stages even when lint has failures").option("--verbose", "Show detailed trigger/eval output sections").action(async (targetPath, _commandOptions, command) => {
3807
+ program.command("check").description("Run lint + trigger + eval with threshold-based quality gates.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--provider <provider>", "LLM provider: anthropic|openai").option("--model <model>", "Model for trigger/eval runs").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--api-key <key>", "API key override").option("--queries <path>", "Path to custom trigger queries JSON").option("--num-queries <n>", "Number of auto-generated trigger queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--prompts <path>", "Path to eval prompts JSON").option("--concurrency <n>", "Maximum in-flight trigger/eval tasks", (value) => Number.parseInt(value, 10)).option("--html <path>", "Write an HTML report to the given file path").option("--min-f1 <n>", "Minimum required trigger F1 score (0-1)", (value) => Number.parseFloat(value)).option("--min-assert-pass-rate <n>", "Minimum required eval assertion pass rate (0-1)", (value) => Number.parseFloat(value)).option("--save-results <path>", "Save combined check results to JSON").option("--continue-on-lint-fail", "Continue trigger/eval stages even when lint has failures").option("--verbose", "Show detailed trigger/eval output sections").action(async (targetPath, _commandOptions, command) => {
2925
3808
  const globalOptions = getGlobalCliOptions(command);
2926
3809
  const config = getResolvedConfig(command);
2927
3810
  const parsedCli = checkCliSchema.safeParse(command.opts());
@@ -2944,6 +3827,8 @@ function registerCheckCommand(program) {
2944
3827
  minF1: config.trigger.threshold,
2945
3828
  minAssertPassRate: config.eval.threshold,
2946
3829
  numRuns: config.eval.numRuns,
3830
+ concurrency: config.concurrency,
3831
+ html: parsedCli.data.html,
2947
3832
  lintFailOn: config.lint.failOn,
2948
3833
  lintSuppress: config.lint.suppress,
2949
3834
  triggerSeed: parsedCli.data.seed ?? config.trigger.seed,
@@ -2961,7 +3846,7 @@ function resolveVersion() {
2961
3846
  try {
2962
3847
  const currentFilePath = fileURLToPath(import.meta.url);
2963
3848
  const packageJsonPath = path6.resolve(path6.dirname(currentFilePath), "..", "package.json");
2964
- const raw = fs7.readFileSync(packageJsonPath, "utf8");
3849
+ const raw = fs11.readFileSync(packageJsonPath, "utf8");
2965
3850
  const parsed = JSON.parse(raw);
2966
3851
  return parsed.version ?? "0.0.0";
2967
3852
  } catch {