skilltest 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,11 +1,15 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/index.ts
4
- import fs7 from "node:fs";
4
+ import fs11 from "node:fs";
5
5
  import path6 from "node:path";
6
6
  import { fileURLToPath } from "node:url";
7
7
  import { Command } from "commander";
8
8
 
9
+ // src/commands/lint.ts
10
+ import fs6 from "node:fs/promises";
11
+ import { z as z6 } from "zod";
12
+
9
13
  // src/core/skill-parser.ts
10
14
  import fs from "node:fs/promises";
11
15
  import path from "node:path";
@@ -1366,6 +1370,739 @@ async function runLinter(inputPath, options = {}) {
1366
1370
  };
1367
1371
  }
1368
1372
 
1373
+ // src/reporters/html.ts
1374
+ function escapeHtml(value) {
1375
+ return String(value ?? "").replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;");
1376
+ }
1377
+ function formatPercent(value) {
1378
+ return `${(value * 100).toFixed(1)}%`;
1379
+ }
1380
+ function formatLineRange(startLine, endLine) {
1381
+ if (startLine === void 0) {
1382
+ return null;
1383
+ }
1384
+ if (endLine === void 0 || endLine === startLine) {
1385
+ return `line ${startLine}`;
1386
+ }
1387
+ return `lines ${startLine}-${endLine}`;
1388
+ }
1389
+ function badgeLabel(status) {
1390
+ if (status === "pass") {
1391
+ return "PASS";
1392
+ }
1393
+ if (status === "warn") {
1394
+ return "WARN";
1395
+ }
1396
+ if (status === "fail") {
1397
+ return "FAIL";
1398
+ }
1399
+ return "SKIP";
1400
+ }
1401
+ function renderBadge(status) {
1402
+ return `<span class="badge ${status}">${badgeLabel(status)}</span>`;
1403
+ }
1404
+ function renderStatCards(stats) {
1405
+ return `<div class="stats-grid">${stats.map(
1406
+ (stat) => `
1407
+ <div class="stat-card${stat.status ? ` status-${stat.status}` : ""}">
1408
+ <div class="stat-label">${escapeHtml(stat.label)}</div>
1409
+ <div class="stat-value">${escapeHtml(stat.value)}</div>
1410
+ ${stat.note ? `<div class="stat-note">${escapeHtml(stat.note)}</div>` : ""}
1411
+ </div>
1412
+ `
1413
+ ).join("")}</div>`;
1414
+ }
1415
+ function renderMetaItems(items) {
1416
+ if (items.length === 0) {
1417
+ return "";
1418
+ }
1419
+ return `<div class="meta-grid">${items.map(
1420
+ (item) => `
1421
+ <div class="meta-item">
1422
+ <span class="meta-label">${escapeHtml(item.label)}</span>
1423
+ <span class="meta-value">${escapeHtml(item.value)}</span>
1424
+ </div>
1425
+ `
1426
+ ).join("")}</div>`;
1427
+ }
1428
+ function renderHeaderCard(commandName, heading, target, stats, metaItems) {
1429
+ return `
1430
+ <section class="card header-card">
1431
+ <div class="eyebrow">skilltest ${escapeHtml(commandName)}</div>
1432
+ <h1>${escapeHtml(heading)}</h1>
1433
+ <div class="target-line">target: ${escapeHtml(target)}</div>
1434
+ ${renderMetaItems(metaItems)}
1435
+ ${renderStatCards(stats)}
1436
+ </section>
1437
+ `;
1438
+ }
1439
+ function renderSectionCard(title, body) {
1440
+ return `
1441
+ <section class="card">
1442
+ <h2>${escapeHtml(title)}</h2>
1443
+ ${body}
1444
+ </section>
1445
+ `;
1446
+ }
1447
+ function renderMessageRow(status, title, message, details) {
1448
+ return `
1449
+ <div class="row">
1450
+ <div class="row-header">
1451
+ <div class="row-title">${escapeHtml(title)}</div>
1452
+ ${renderBadge(status)}
1453
+ </div>
1454
+ <div class="row-body">${escapeHtml(message)}</div>
1455
+ ${details ?? ""}
1456
+ </div>
1457
+ `;
1458
+ }
1459
+ function renderDetails(summary, content) {
1460
+ return `
1461
+ <details class="detail-block">
1462
+ <summary>${escapeHtml(summary)}</summary>
1463
+ <div class="detail-content">${content}</div>
1464
+ </details>
1465
+ `;
1466
+ }
1467
+ function renderPreBlock(content) {
1468
+ return `<pre>${escapeHtml(content)}</pre>`;
1469
+ }
1470
+ function renderDefinitionList(items) {
1471
+ return `<div class="definition-list">${items.map(
1472
+ (item) => `
1473
+ <div class="definition-item">
1474
+ <div class="definition-label">${escapeHtml(item.label)}</div>
1475
+ <div class="definition-value">${escapeHtml(item.value)}</div>
1476
+ </div>
1477
+ `
1478
+ ).join("")}</div>`;
1479
+ }
1480
+ function countSkippedSecurityPatterns(issues) {
1481
+ return issues.reduce((total, issue) => total + (issue.skippedPatterns?.length ?? 0), 0);
1482
+ }
1483
+ function renderLintIssueRow(issue) {
1484
+ const lineRange = formatLineRange(issue.startLine, issue.endLine);
1485
+ const detailBlocks = [];
1486
+ if (issue.suggestion) {
1487
+ detailBlocks.push(renderDetails("Suggestion", `<p>${escapeHtml(issue.suggestion)}</p>`));
1488
+ }
1489
+ if (issue.skippedPatterns && issue.skippedPatterns.length > 0) {
1490
+ const patternItems = issue.skippedPatterns.map(
1491
+ (pattern) => `
1492
+ <div class="definition-item">
1493
+ <div class="definition-label">${escapeHtml(pattern.label)}</div>
1494
+ <div class="definition-value">${escapeHtml(
1495
+ `${pattern.zoneType} lines ${pattern.startLine}-${pattern.endLine}`
1496
+ )}</div>
1497
+ </div>
1498
+ `
1499
+ ).join("");
1500
+ detailBlocks.push(renderDetails("Skipped security patterns", `<div class="definition-list">${patternItems}</div>`));
1501
+ }
1502
+ return `
1503
+ <div class="row">
1504
+ <div class="row-header">
1505
+ <div>
1506
+ <div class="row-title">${escapeHtml(issue.title)}</div>
1507
+ <div class="row-subtitle">${escapeHtml(issue.checkId)}</div>
1508
+ </div>
1509
+ ${renderBadge(issue.status)}
1510
+ </div>
1511
+ <div class="row-body">${escapeHtml(issue.message)}</div>
1512
+ ${renderDefinitionList(
1513
+ [
1514
+ lineRange ? { label: "Location", value: lineRange } : null,
1515
+ { label: "Check ID", value: issue.checkId }
1516
+ ].filter((item) => item !== null)
1517
+ )}
1518
+ ${detailBlocks.join("")}
1519
+ </div>
1520
+ `;
1521
+ }
1522
+ function renderLintIssueList(report) {
1523
+ const skippedSecurityPatterns = countSkippedSecurityPatterns(report.issues);
1524
+ const rows = report.issues.map((issue) => renderLintIssueRow(issue)).join("");
1525
+ const info = skippedSecurityPatterns > 0 ? `<p class="info-line">Skipped security patterns in examples/comments: ${escapeHtml(skippedSecurityPatterns)}</p>` : "";
1526
+ return `<div class="row-list">${rows}</div>${info}`;
1527
+ }
1528
+ function renderTriggerCaseRow(testCase) {
1529
+ const details = testCase.rawModelResponse ? renderDetails("Model response", renderPreBlock(testCase.rawModelResponse)) : "";
1530
+ return `
1531
+ <div class="row">
1532
+ <div class="row-header">
1533
+ <div>
1534
+ <div class="row-title">${escapeHtml(testCase.query)}</div>
1535
+ <div class="row-subtitle">${escapeHtml(
1536
+ `expected=${testCase.expected} actual=${testCase.actual} should_trigger=${String(testCase.shouldTrigger)}`
1537
+ )}</div>
1538
+ </div>
1539
+ ${renderBadge(testCase.matched ? "pass" : "fail")}
1540
+ </div>
1541
+ ${renderDefinitionList([
1542
+ { label: "Expected", value: testCase.expected },
1543
+ { label: "Actual", value: testCase.actual }
1544
+ ])}
1545
+ ${details}
1546
+ </div>
1547
+ `;
1548
+ }
1549
+ function promptStatus(promptResult) {
1550
+ if (promptResult.totalAssertions === 0) {
1551
+ return "skip";
1552
+ }
1553
+ if (promptResult.passedAssertions === promptResult.totalAssertions) {
1554
+ return "pass";
1555
+ }
1556
+ if (promptResult.passedAssertions === 0) {
1557
+ return "fail";
1558
+ }
1559
+ return "warn";
1560
+ }
1561
+ function renderAssertionRow(assertion) {
1562
+ return renderDetails(
1563
+ `${badgeLabel(assertion.passed ? "pass" : "fail")} ${assertion.assertion}`,
1564
+ renderPreBlock(assertion.evidence)
1565
+ );
1566
+ }
1567
+ function renderEvalPromptRow(promptResult) {
1568
+ const assertionDetails = promptResult.assertions.map((assertion) => renderAssertionRow(assertion)).join("");
1569
+ const responseDetails = renderDetails("Full model response", renderPreBlock(promptResult.response));
1570
+ return `
1571
+ <div class="row">
1572
+ <div class="row-header">
1573
+ <div>
1574
+ <div class="row-title">${escapeHtml(promptResult.prompt)}</div>
1575
+ <div class="row-subtitle">${escapeHtml(
1576
+ `${promptResult.passedAssertions}/${promptResult.totalAssertions} assertions passed`
1577
+ )}</div>
1578
+ </div>
1579
+ ${renderBadge(promptStatus(promptResult))}
1580
+ </div>
1581
+ <div class="row-body">${escapeHtml(promptResult.responseSummary)}</div>
1582
+ ${renderDefinitionList([
1583
+ { label: "Passed assertions", value: String(promptResult.passedAssertions) },
1584
+ { label: "Total assertions", value: String(promptResult.totalAssertions) }
1585
+ ])}
1586
+ ${renderDetails("Assertion evidence", assertionDetails || `<p>No assertions.</p>`)}
1587
+ ${responseDetails}
1588
+ </div>
1589
+ `;
1590
+ }
1591
+ function gateStatus(value) {
1592
+ if (value === null) {
1593
+ return "skip";
1594
+ }
1595
+ return value ? "pass" : "fail";
1596
+ }
1597
+ function renderGateCard(title, status, message) {
1598
+ return `
1599
+ <div class="gate-card">
1600
+ <div class="row-header">
1601
+ <div class="row-title">${escapeHtml(title)}</div>
1602
+ ${renderBadge(status)}
1603
+ </div>
1604
+ <div class="row-body">${escapeHtml(message)}</div>
1605
+ </div>
1606
+ `;
1607
+ }
1608
+ function renderCollapsibleSection(title, summary, body, status) {
1609
+ return `
1610
+ <details class="section-card" open>
1611
+ <summary>
1612
+ <span class="section-title">${escapeHtml(title)}</span>
1613
+ <span class="section-summary">${renderBadge(status)} ${escapeHtml(summary)}</span>
1614
+ </summary>
1615
+ <div class="section-body">${body}</div>
1616
+ </details>
1617
+ `;
1618
+ }
1619
+ function resolveOptionalTarget(result, fallback) {
1620
+ return result.target ?? fallback;
1621
+ }
1622
+ function renderHtmlDocument(title, body) {
1623
+ return `<!DOCTYPE html>
1624
+ <html lang="en">
1625
+ <head>
1626
+ <meta charset="utf-8">
1627
+ <meta name="viewport" content="width=device-width, initial-scale=1">
1628
+ <title>${escapeHtml(title)}</title>
1629
+ <style>
1630
+ :root {
1631
+ color-scheme: light;
1632
+ --bg: #f5f5f5;
1633
+ --surface: #ffffff;
1634
+ --surface-muted: #fafafa;
1635
+ --border: #d4d4d8;
1636
+ --text: #111827;
1637
+ --muted: #6b7280;
1638
+ --pass: #22c55e;
1639
+ --warn: #eab308;
1640
+ --fail: #ef4444;
1641
+ --skip: #6b7280;
1642
+ --shadow: 0 10px 30px rgba(15, 23, 42, 0.08);
1643
+ }
1644
+
1645
+ * {
1646
+ box-sizing: border-box;
1647
+ }
1648
+
1649
+ body {
1650
+ margin: 0;
1651
+ background: linear-gradient(180deg, #fafafa 0%, #f4f4f5 100%);
1652
+ color: var(--text);
1653
+ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
1654
+ line-height: 1.5;
1655
+ }
1656
+
1657
+ .container {
1658
+ max-width: 1120px;
1659
+ margin: 0 auto;
1660
+ padding: 24px 16px 40px;
1661
+ }
1662
+
1663
+ .card,
1664
+ .section-card {
1665
+ background: var(--surface);
1666
+ border: 1px solid var(--border);
1667
+ border-radius: 16px;
1668
+ box-shadow: var(--shadow);
1669
+ margin-bottom: 16px;
1670
+ }
1671
+
1672
+ .card {
1673
+ padding: 20px;
1674
+ }
1675
+
1676
+ .header-card h1,
1677
+ .card h2 {
1678
+ margin: 0 0 10px;
1679
+ font-size: 1.25rem;
1680
+ }
1681
+
1682
+ .eyebrow {
1683
+ margin-bottom: 10px;
1684
+ color: var(--muted);
1685
+ font-size: 0.78rem;
1686
+ letter-spacing: 0.08em;
1687
+ text-transform: uppercase;
1688
+ }
1689
+
1690
+ .target-line,
1691
+ .info-line {
1692
+ color: var(--muted);
1693
+ overflow-wrap: anywhere;
1694
+ }
1695
+
1696
+ .meta-grid,
1697
+ .stats-grid,
1698
+ .gate-grid,
1699
+ .definition-list {
1700
+ display: grid;
1701
+ gap: 12px;
1702
+ }
1703
+
1704
+ .meta-grid,
1705
+ .gate-grid,
1706
+ .definition-list {
1707
+ grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
1708
+ }
1709
+
1710
+ .stats-grid {
1711
+ grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
1712
+ margin-top: 16px;
1713
+ }
1714
+
1715
+ .meta-grid {
1716
+ margin-top: 14px;
1717
+ }
1718
+
1719
+ .meta-item,
1720
+ .definition-item,
1721
+ .stat-card,
1722
+ .gate-card {
1723
+ background: var(--surface-muted);
1724
+ border: 1px solid var(--border);
1725
+ border-radius: 12px;
1726
+ padding: 12px;
1727
+ }
1728
+
1729
+ .meta-item,
1730
+ .definition-item {
1731
+ display: flex;
1732
+ justify-content: space-between;
1733
+ gap: 12px;
1734
+ }
1735
+
1736
+ .meta-label,
1737
+ .definition-label,
1738
+ .stat-label {
1739
+ color: var(--muted);
1740
+ font-size: 0.82rem;
1741
+ }
1742
+
1743
+ .meta-value,
1744
+ .definition-value {
1745
+ text-align: right;
1746
+ overflow-wrap: anywhere;
1747
+ }
1748
+
1749
+ .stat-value {
1750
+ margin-top: 4px;
1751
+ font-size: 1.3rem;
1752
+ font-weight: 700;
1753
+ }
1754
+
1755
+ .stat-note {
1756
+ margin-top: 6px;
1757
+ color: var(--muted);
1758
+ font-size: 0.82rem;
1759
+ }
1760
+
1761
+ .status-pass {
1762
+ border-color: rgba(34, 197, 94, 0.35);
1763
+ }
1764
+
1765
+ .status-warn {
1766
+ border-color: rgba(234, 179, 8, 0.35);
1767
+ }
1768
+
1769
+ .status-fail {
1770
+ border-color: rgba(239, 68, 68, 0.35);
1771
+ }
1772
+
1773
+ .status-skip {
1774
+ border-color: rgba(107, 114, 128, 0.35);
1775
+ }
1776
+
1777
+ .row-list {
1778
+ display: grid;
1779
+ gap: 12px;
1780
+ }
1781
+
1782
+ .row {
1783
+ border: 1px solid var(--border);
1784
+ border-radius: 12px;
1785
+ padding: 14px;
1786
+ background: var(--surface-muted);
1787
+ }
1788
+
1789
+ .row-header {
1790
+ display: flex;
1791
+ justify-content: space-between;
1792
+ align-items: flex-start;
1793
+ gap: 12px;
1794
+ }
1795
+
1796
+ .row-title {
1797
+ font-weight: 700;
1798
+ overflow-wrap: anywhere;
1799
+ }
1800
+
1801
+ .row-subtitle {
1802
+ margin-top: 4px;
1803
+ color: var(--muted);
1804
+ font-size: 0.84rem;
1805
+ overflow-wrap: anywhere;
1806
+ }
1807
+
1808
+ .row-body {
1809
+ margin-top: 10px;
1810
+ overflow-wrap: anywhere;
1811
+ }
1812
+
1813
+ .badge {
1814
+ display: inline-flex;
1815
+ align-items: center;
1816
+ justify-content: center;
1817
+ min-width: 58px;
1818
+ padding: 3px 10px;
1819
+ border-radius: 999px;
1820
+ border: 1px solid currentColor;
1821
+ font-size: 0.76rem;
1822
+ font-weight: 700;
1823
+ letter-spacing: 0.04em;
1824
+ white-space: nowrap;
1825
+ }
1826
+
1827
+ .badge.pass {
1828
+ color: #15803d;
1829
+ background: rgba(34, 197, 94, 0.14);
1830
+ }
1831
+
1832
+ .badge.warn {
1833
+ color: #a16207;
1834
+ background: rgba(234, 179, 8, 0.18);
1835
+ }
1836
+
1837
+ .badge.fail {
1838
+ color: #b91c1c;
1839
+ background: rgba(239, 68, 68, 0.14);
1840
+ }
1841
+
1842
+ .badge.skip {
1843
+ color: #4b5563;
1844
+ background: rgba(107, 114, 128, 0.14);
1845
+ }
1846
+
1847
+ details {
1848
+ margin-top: 10px;
1849
+ }
1850
+
1851
+ details summary {
1852
+ cursor: pointer;
1853
+ color: var(--muted);
1854
+ }
1855
+
1856
+ .detail-block {
1857
+ border-top: 1px dashed var(--border);
1858
+ padding-top: 10px;
1859
+ }
1860
+
1861
+ .detail-content p {
1862
+ margin: 0;
1863
+ }
1864
+
1865
+ .section-card summary {
1866
+ display: flex;
1867
+ justify-content: space-between;
1868
+ align-items: center;
1869
+ gap: 12px;
1870
+ padding: 18px 20px;
1871
+ list-style: none;
1872
+ }
1873
+
1874
+ .section-card summary::-webkit-details-marker {
1875
+ display: none;
1876
+ }
1877
+
1878
+ .section-title {
1879
+ font-size: 1rem;
1880
+ font-weight: 700;
1881
+ color: var(--text);
1882
+ }
1883
+
1884
+ .section-summary {
1885
+ display: inline-flex;
1886
+ align-items: center;
1887
+ gap: 8px;
1888
+ color: var(--muted);
1889
+ text-align: right;
1890
+ }
1891
+
1892
+ .section-body {
1893
+ padding: 0 20px 20px;
1894
+ }
1895
+
1896
+ .gate-grid {
1897
+ margin-top: 12px;
1898
+ }
1899
+
1900
+ pre {
1901
+ margin: 0;
1902
+ padding: 12px;
1903
+ background: #f8fafc;
1904
+ border: 1px solid var(--border);
1905
+ border-radius: 10px;
1906
+ white-space: pre-wrap;
1907
+ word-break: break-word;
1908
+ overflow-wrap: anywhere;
1909
+ }
1910
+
1911
+ ul {
1912
+ margin: 0;
1913
+ padding-left: 20px;
1914
+ }
1915
+
1916
+ @media (max-width: 720px) {
1917
+ .container {
1918
+ padding: 16px 12px 28px;
1919
+ }
1920
+
1921
+ .row-header,
1922
+ .section-card summary,
1923
+ .meta-item,
1924
+ .definition-item {
1925
+ flex-direction: column;
1926
+ align-items: flex-start;
1927
+ }
1928
+
1929
+ .meta-value,
1930
+ .definition-value,
1931
+ .section-summary {
1932
+ text-align: left;
1933
+ }
1934
+ }
1935
+ </style>
1936
+ </head>
1937
+ <body>
1938
+ <main class="container">
1939
+ ${body}
1940
+ </main>
1941
+ </body>
1942
+ </html>`;
1943
+ }
1944
+ function renderLintHtml(report) {
1945
+ const passRate = report.summary.total === 0 ? 0 : report.summary.passed / report.summary.total;
1946
+ const body = [
1947
+ renderHeaderCard(
1948
+ "lint",
1949
+ "Static Analysis Report",
1950
+ report.target,
1951
+ [
1952
+ { label: "Pass rate", value: formatPercent(passRate), note: `${report.summary.passed}/${report.summary.total} passed` },
1953
+ { label: "Warnings", value: String(report.summary.warnings), status: report.summary.warnings > 0 ? "warn" : "pass" },
1954
+ { label: "Failures", value: String(report.summary.failures), status: report.summary.failures > 0 ? "fail" : "pass" },
1955
+ { label: "Checks", value: String(report.summary.total) }
1956
+ ],
1957
+ [{ label: "Target", value: report.target }]
1958
+ ),
1959
+ renderSectionCard("Lint Issues", renderLintIssueList(report))
1960
+ ].join("");
1961
+ return renderHtmlDocument(`skilltest lint - ${report.target}`, body);
1962
+ }
1963
+ function renderTriggerHtml(result) {
1964
+ const htmlResult = result;
1965
+ const target = resolveOptionalTarget(htmlResult, result.skillName);
1966
+ const matchedCount = result.cases.filter((testCase) => testCase.matched).length;
1967
+ const matchRate = result.cases.length === 0 ? 0 : matchedCount / result.cases.length;
1968
+ const body = [
1969
+ renderHeaderCard(
1970
+ "trigger",
1971
+ result.skillName,
1972
+ target,
1973
+ [
1974
+ { label: "Match rate", value: formatPercent(matchRate), note: `${matchedCount}/${result.cases.length} matched` },
1975
+ { label: "Precision", value: formatPercent(result.metrics.precision) },
1976
+ { label: "Recall", value: formatPercent(result.metrics.recall) },
1977
+ { label: "F1", value: formatPercent(result.metrics.f1), status: result.metrics.f1 >= 0.8 ? "pass" : "warn" }
1978
+ ],
1979
+ [
1980
+ { label: "Provider", value: result.provider },
1981
+ { label: "Model", value: result.model },
1982
+ { label: "Seed", value: result.seed !== void 0 ? String(result.seed) : "none" },
1983
+ { label: "Queries", value: String(result.queries.length) }
1984
+ ]
1985
+ ),
1986
+ renderSectionCard("Trigger Cases", `<div class="row-list">${result.cases.map((testCase) => renderTriggerCaseRow(testCase)).join("")}</div>`),
1987
+ renderSectionCard(
1988
+ "Suggestions",
1989
+ `<ul>${result.suggestions.map((suggestion) => `<li>${escapeHtml(suggestion)}</li>`).join("")}</ul>`
1990
+ )
1991
+ ].join("");
1992
+ return renderHtmlDocument(`skilltest trigger - ${result.skillName}`, body);
1993
+ }
1994
+ function renderEvalHtml(result) {
1995
+ const htmlResult = result;
1996
+ const target = resolveOptionalTarget(htmlResult, result.skillName);
1997
+ const passRate = result.summary.totalAssertions === 0 ? 0 : result.summary.passedAssertions / result.summary.totalAssertions;
1998
+ const body = [
1999
+ renderHeaderCard(
2000
+ "eval",
2001
+ result.skillName,
2002
+ target,
2003
+ [
2004
+ {
2005
+ label: "Assertion pass rate",
2006
+ value: formatPercent(passRate),
2007
+ note: `${result.summary.passedAssertions}/${result.summary.totalAssertions} passed`
2008
+ },
2009
+ { label: "Prompts", value: String(result.summary.totalPrompts) },
2010
+ { label: "Model", value: result.model },
2011
+ { label: "Grader", value: result.graderModel }
2012
+ ],
2013
+ [
2014
+ { label: "Provider", value: result.provider },
2015
+ { label: "Execution model", value: result.model },
2016
+ { label: "Grader model", value: result.graderModel },
2017
+ { label: "Prompts", value: String(result.prompts.length) }
2018
+ ]
2019
+ ),
2020
+ renderSectionCard("Eval Prompts", `<div class="row-list">${result.results.map((promptResult) => renderEvalPromptRow(promptResult)).join("")}</div>`)
2021
+ ].join("");
2022
+ return renderHtmlDocument(`skilltest eval - ${result.skillName}`, body);
2023
+ }
2024
+ function renderCheckHtml(result) {
2025
+ const skillName = result.trigger?.skillName ?? result.eval?.skillName ?? result.target;
2026
+ const triggerBody = result.trigger ? `<div class="row-list">${result.trigger.cases.map((testCase) => renderTriggerCaseRow(testCase)).join("")}</div>
2027
+ <div class="card" style="margin-top: 16px;">
2028
+ <h2>Trigger Suggestions</h2>
2029
+ <ul>${result.trigger.suggestions.map((suggestion) => `<li>${escapeHtml(suggestion)}</li>`).join("")}</ul>
2030
+ </div>` : renderMessageRow("skip", "Trigger skipped", result.triggerSkippedReason ?? "Skipped.");
2031
+ const evalBody = result.eval ? `<div class="row-list">${result.eval.results.map((promptResult) => renderEvalPromptRow(promptResult)).join("")}</div>` : renderMessageRow("skip", "Eval skipped", result.evalSkippedReason ?? "Skipped.");
2032
+ const lintStatus = result.gates.lintPassed ? "pass" : "fail";
2033
+ const triggerStatus = gateStatus(result.gates.triggerPassed);
2034
+ const evalStatus = gateStatus(result.gates.evalPassed);
2035
+ const overallStatus = result.gates.overallPassed ? "pass" : "fail";
2036
+ const header = renderHeaderCard(
2037
+ "check",
2038
+ skillName,
2039
+ result.target,
2040
+ [
2041
+ { label: "Overall gate", value: badgeLabel(overallStatus), status: overallStatus },
2042
+ {
2043
+ label: "Trigger F1",
2044
+ value: result.gates.triggerF1 !== null ? formatPercent(result.gates.triggerF1) : "skipped",
2045
+ status: triggerStatus
2046
+ },
2047
+ {
2048
+ label: "Eval pass rate",
2049
+ value: result.gates.evalAssertPassRate !== null ? formatPercent(result.gates.evalAssertPassRate) : "skipped",
2050
+ status: evalStatus
2051
+ },
2052
+ {
2053
+ label: "Lint result",
2054
+ value: `${result.lint.summary.failures} fail / ${result.lint.summary.warnings} warn`,
2055
+ status: lintStatus
2056
+ }
2057
+ ],
2058
+ [
2059
+ { label: "Provider", value: result.provider },
2060
+ { label: "Model", value: result.model },
2061
+ { label: "Grader model", value: result.graderModel },
2062
+ {
2063
+ label: "Thresholds",
2064
+ value: `min-f1=${result.thresholds.minF1.toFixed(2)} min-assert-pass-rate=${result.thresholds.minAssertPassRate.toFixed(2)}`
2065
+ }
2066
+ ]
2067
+ );
2068
+ const lintSection = renderCollapsibleSection(
2069
+ "Lint",
2070
+ `${result.lint.summary.passed}/${result.lint.summary.total} passed, ${result.lint.summary.warnings} warnings, ${result.lint.summary.failures} failures`,
2071
+ renderLintIssueList(result.lint),
2072
+ lintStatus
2073
+ );
2074
+ const triggerSection = renderCollapsibleSection(
2075
+ "Trigger",
2076
+ result.trigger ? `f1=${formatPercent(result.trigger.metrics.f1)} precision=${formatPercent(result.trigger.metrics.precision)} recall=${formatPercent(result.trigger.metrics.recall)}` : result.triggerSkippedReason ?? "Skipped.",
2077
+ triggerBody,
2078
+ triggerStatus
2079
+ );
2080
+ const evalSection = renderCollapsibleSection(
2081
+ "Eval",
2082
+ result.eval ? `assertion pass rate=${formatPercent(result.gates.evalAssertPassRate ?? 0)} (${result.eval.summary.passedAssertions}/${result.eval.summary.totalAssertions})` : result.evalSkippedReason ?? "Skipped.",
2083
+ evalBody,
2084
+ evalStatus
2085
+ );
2086
+ const qualityGate = renderSectionCard(
2087
+ "Quality Gate",
2088
+ `<div class="gate-grid">
2089
+ ${renderGateCard("Lint gate", lintStatus, result.gates.lintPassed ? "Lint passed." : "Lint failed.")}
2090
+ ${renderGateCard(
2091
+ "Trigger gate",
2092
+ triggerStatus,
2093
+ result.gates.triggerPassed === null ? result.triggerSkippedReason ?? "Skipped." : `required ${result.thresholds.minF1.toFixed(2)}, actual ${result.gates.triggerF1?.toFixed(2) ?? "n/a"}`
2094
+ )}
2095
+ ${renderGateCard(
2096
+ "Eval gate",
2097
+ evalStatus,
2098
+ result.gates.evalPassed === null ? result.evalSkippedReason ?? "Skipped." : `required ${result.thresholds.minAssertPassRate.toFixed(2)}, actual ${result.gates.evalAssertPassRate?.toFixed(2) ?? "n/a"}`
2099
+ )}
2100
+ ${renderGateCard("Overall", overallStatus, result.gates.overallPassed ? "All quality gates passed." : "One or more gates failed.")}
2101
+ </div>`
2102
+ );
2103
+ return renderHtmlDocument(`skilltest check - ${skillName}`, [header, lintSection, triggerSection, evalSection, qualityGate].join(""));
2104
+ }
2105
+
1369
2106
  // src/reporters/terminal.ts
1370
2107
  import { Chalk } from "chalk";
1371
2108
  function getChalkInstance(enableColor) {
@@ -1378,7 +2115,7 @@ function renderIssueLine(issue, c) {
1378
2115
  return ` ${label} ${issue.title}
1379
2116
  ${issue.message}${detail}`;
1380
2117
  }
1381
- function countSkippedSecurityPatterns(issues) {
2118
+ function countSkippedSecurityPatterns2(issues) {
1382
2119
  return issues.reduce((total, issue) => {
1383
2120
  if (!issue.checkId.startsWith("security:")) {
1384
2121
  return total;
@@ -1398,13 +2135,13 @@ function renderLintReport(report, enableColor) {
1398
2135
  `\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518`
1399
2136
  ];
1400
2137
  const renderedIssues = report.issues.map((issue) => renderIssueLine(issue, c)).join("\n");
1401
- const skippedSecurityPatterns = countSkippedSecurityPatterns(report.issues);
2138
+ const skippedSecurityPatterns = countSkippedSecurityPatterns2(report.issues);
1402
2139
  const infoLine = skippedSecurityPatterns > 0 ? `
1403
2140
  ${c.cyan("\u2139")} ${skippedSecurityPatterns} security pattern(s) found in code examples/comments (not flagged)` : "";
1404
2141
  return `${headerLines.join("\n")}
1405
2142
  ${renderedIssues}${infoLine}`;
1406
2143
  }
1407
- function formatPercent(value) {
2144
+ function formatPercent2(value) {
1408
2145
  return `${(value * 100).toFixed(1)}%`;
1409
2146
  }
1410
2147
  function renderTriggerReport(result, enableColor, verbose) {
@@ -1416,7 +2153,7 @@ function renderTriggerReport(result, enableColor, verbose) {
1416
2153
  lines.push(`\u2502 skill: ${result.skillName}`);
1417
2154
  lines.push(`\u2502 provider/model: ${result.provider}/${result.model}`);
1418
2155
  lines.push(
1419
- `\u2502 precision: ${formatPercent(result.metrics.precision)} recall: ${formatPercent(result.metrics.recall)} f1: ${formatPercent(result.metrics.f1)}`
2156
+ `\u2502 precision: ${formatPercent2(result.metrics.precision)} recall: ${formatPercent2(result.metrics.recall)} f1: ${formatPercent2(result.metrics.f1)}`
1420
2157
  );
1421
2158
  lines.push(
1422
2159
  `\u2502 TP ${result.metrics.truePositives} TN ${result.metrics.trueNegatives} FP ${result.metrics.falsePositives} FN ${result.metrics.falseNegatives}`
@@ -1490,7 +2227,7 @@ function renderCheckReport(result, enableColor, verbose) {
1490
2227
  for (const issue of lintIssues) {
1491
2228
  lines.push(renderIssueLine(issue, c));
1492
2229
  }
1493
- const skippedSecurityPatterns = countSkippedSecurityPatterns(result.lint.issues);
2230
+ const skippedSecurityPatterns = countSkippedSecurityPatterns2(result.lint.issues);
1494
2231
  if (skippedSecurityPatterns > 0) {
1495
2232
  lines.push(` ${c.cyan("\u2139")} ${skippedSecurityPatterns} security pattern(s) found in code examples/comments (not flagged)`);
1496
2233
  }
@@ -1498,7 +2235,7 @@ function renderCheckReport(result, enableColor, verbose) {
1498
2235
  lines.push("Trigger");
1499
2236
  if (result.trigger) {
1500
2237
  lines.push(
1501
- `- ${triggerGate} f1=${formatPercent(result.trigger.metrics.f1)} (precision=${formatPercent(result.trigger.metrics.precision)} recall=${formatPercent(result.trigger.metrics.recall)})`
2238
+ `- ${triggerGate} f1=${formatPercent2(result.trigger.metrics.f1)} (precision=${formatPercent2(result.trigger.metrics.precision)} recall=${formatPercent2(result.trigger.metrics.recall)})`
1502
2239
  );
1503
2240
  lines.push(
1504
2241
  ` TP ${result.trigger.metrics.truePositives} TN ${result.trigger.metrics.trueNegatives} FP ${result.trigger.metrics.falsePositives} FN ${result.trigger.metrics.falseNegatives}`
@@ -1517,7 +2254,7 @@ function renderCheckReport(result, enableColor, verbose) {
1517
2254
  if (result.eval) {
1518
2255
  const passRate = result.gates.evalAssertPassRate ?? 0;
1519
2256
  lines.push(
1520
- `- ${evalGate} assertion pass rate=${formatPercent(passRate)} (${result.eval.summary.passedAssertions}/${result.eval.summary.totalAssertions})`
2257
+ `- ${evalGate} assertion pass rate=${formatPercent2(passRate)} (${result.eval.summary.passedAssertions}/${result.eval.summary.totalAssertions})`
1521
2258
  );
1522
2259
  for (const promptResult of result.eval.results) {
1523
2260
  const failedAssertions = promptResult.assertions.filter((assertion) => !assertion.passed);
@@ -1609,6 +2346,58 @@ async function gradeResponse(options) {
1609
2346
  return parsed.data.assertions;
1610
2347
  }
1611
2348
 
2349
+ // src/utils/concurrency.ts
2350
+ async function pMap(items, fn, concurrency) {
2351
+ if (!Number.isInteger(concurrency) || concurrency < 1) {
2352
+ throw new Error("pMap concurrency must be an integer greater than or equal to 1.");
2353
+ }
2354
+ if (items.length === 0) {
2355
+ return [];
2356
+ }
2357
+ const results = new Array(items.length);
2358
+ return new Promise((resolve, reject) => {
2359
+ let nextIndex = 0;
2360
+ let completed = 0;
2361
+ let rejected = false;
2362
+ const launchNext = () => {
2363
+ if (rejected) {
2364
+ return;
2365
+ }
2366
+ if (completed === items.length) {
2367
+ resolve(results);
2368
+ return;
2369
+ }
2370
+ if (nextIndex >= items.length) {
2371
+ return;
2372
+ }
2373
+ const currentIndex = nextIndex;
2374
+ nextIndex += 1;
2375
+ Promise.resolve().then(() => fn(items[currentIndex], currentIndex)).then((result) => {
2376
+ if (rejected) {
2377
+ return;
2378
+ }
2379
+ results[currentIndex] = result;
2380
+ completed += 1;
2381
+ if (completed === items.length) {
2382
+ resolve(results);
2383
+ return;
2384
+ }
2385
+ launchNext();
2386
+ }).catch((error) => {
2387
+ if (rejected) {
2388
+ return;
2389
+ }
2390
+ rejected = true;
2391
+ reject(error);
2392
+ });
2393
+ };
2394
+ const initialWorkers = Math.min(concurrency, items.length);
2395
+ for (let workerIndex = 0; workerIndex < initialWorkers; workerIndex += 1) {
2396
+ launchNext();
2397
+ }
2398
+ });
2399
+ }
2400
+
1612
2401
  // src/core/eval-runner.ts
1613
2402
  var evalPromptSchema = z3.object({
1614
2403
  prompt: z3.string().min(1),
@@ -1655,34 +2444,37 @@ async function generatePrompts(skill, provider, model, count) {
1655
2444
  }
1656
2445
  async function runEval(skill, options) {
1657
2446
  const prompts = options.prompts && options.prompts.length > 0 ? evalPromptArraySchema.parse(options.prompts) : await generatePrompts(skill, options.provider, options.model, options.numRuns);
1658
- const results = [];
1659
- for (const evalPrompt of prompts) {
1660
- const systemPrompt = [
1661
- "You are an AI assistant with an activated skill.",
1662
- "Follow this SKILL.md content exactly where applicable.",
1663
- "",
1664
- skill.raw
1665
- ].join("\n");
1666
- const response = await options.provider.sendMessage(systemPrompt, evalPrompt.prompt, { model: options.model });
1667
- const gradedAssertions = await gradeResponse({
1668
- provider: options.provider,
1669
- model: options.graderModel,
1670
- skillName: skill.frontmatter.name,
1671
- skillBody: skill.content,
1672
- userPrompt: evalPrompt.prompt,
1673
- modelResponse: response,
1674
- assertions: evalPrompt.assertions
1675
- });
1676
- const passedAssertions2 = gradedAssertions.filter((assertion) => assertion.passed).length;
1677
- results.push({
1678
- prompt: evalPrompt.prompt,
1679
- assertions: gradedAssertions,
1680
- responseSummary: response.slice(0, 200),
1681
- response,
1682
- passedAssertions: passedAssertions2,
1683
- totalAssertions: gradedAssertions.length
1684
- });
1685
- }
2447
+ const systemPrompt = [
2448
+ "You are an AI assistant with an activated skill.",
2449
+ "Follow this SKILL.md content exactly where applicable.",
2450
+ "",
2451
+ skill.raw
2452
+ ].join("\n");
2453
+ const results = await pMap(
2454
+ prompts,
2455
+ async (evalPrompt) => {
2456
+ const response = await options.provider.sendMessage(systemPrompt, evalPrompt.prompt, { model: options.model });
2457
+ const gradedAssertions = await gradeResponse({
2458
+ provider: options.provider,
2459
+ model: options.graderModel,
2460
+ skillName: skill.frontmatter.name,
2461
+ skillBody: skill.content,
2462
+ userPrompt: evalPrompt.prompt,
2463
+ modelResponse: response,
2464
+ assertions: evalPrompt.assertions
2465
+ });
2466
+ const passedAssertions2 = gradedAssertions.filter((assertion) => assertion.passed).length;
2467
+ return {
2468
+ prompt: evalPrompt.prompt,
2469
+ assertions: gradedAssertions,
2470
+ responseSummary: response.slice(0, 200),
2471
+ response,
2472
+ passedAssertions: passedAssertions2,
2473
+ totalAssertions: gradedAssertions.length
2474
+ };
2475
+ },
2476
+ options.concurrency ?? 5
2477
+ );
1686
2478
  const totalAssertions = results.reduce((total, result) => total + result.totalAssertions, 0);
1687
2479
  const passedAssertions = results.reduce((total, result) => total + result.passedAssertions, 0);
1688
2480
  return {
@@ -1859,9 +2651,8 @@ function buildSuggestions(metrics) {
1859
2651
  async function runTriggerTest(skill, options) {
1860
2652
  const rng = createRng(options.seed);
1861
2653
  const queries = options.queries && options.queries.length > 0 ? triggerQueryArraySchema.parse(options.queries) : await generateQueriesWithModel(skill, options.provider, options.model, options.numQueries);
1862
- const results = [];
1863
2654
  const skillName = skill.frontmatter.name;
1864
- for (const testQuery of queries) {
2655
+ const preparedQueries = queries.map((testQuery) => {
1865
2656
  const fakeCount = 5 + Math.floor(rng() * 5);
1866
2657
  const fakeSkills = sample(FAKE_SKILLS, fakeCount, rng);
1867
2658
  const allSkills = shuffle([
@@ -1872,28 +2663,41 @@ async function runTriggerTest(skill, options) {
1872
2663
  }
1873
2664
  ], rng);
1874
2665
  const skillListText = allSkills.map((entry) => `- ${entry.name}: ${entry.description}`).join("\n");
1875
- const systemPrompt = [
1876
- "You are selecting one skill to activate for a user query.",
1877
- "Choose the single best matching skill name from the provided list, or 'none' if no skill is a good fit.",
1878
- "Respond with only the skill name or 'none'."
1879
- ].join(" ");
1880
- const userPrompt = [`Available skills:`, skillListText, "", `User query: ${testQuery.query}`].join("\n");
1881
- const rawResponse = await options.provider.sendMessage(systemPrompt, userPrompt, { model: options.model });
1882
- const decision = parseDecision(
1883
- rawResponse,
1884
- allSkills.map((entry) => entry.name)
1885
- );
1886
- const expected = testQuery.should_trigger ? skillName : "none";
1887
- const matched = testQuery.should_trigger ? decision === skillName : decision !== skillName;
1888
- results.push({
1889
- query: testQuery.query,
1890
- shouldTrigger: testQuery.should_trigger,
1891
- expected,
1892
- actual: decision,
1893
- matched,
1894
- rawModelResponse: options.verbose ? rawResponse : void 0
1895
- });
1896
- }
2666
+ return {
2667
+ testQuery,
2668
+ fakeCount,
2669
+ fakeSkills,
2670
+ allSkills,
2671
+ skillListText
2672
+ };
2673
+ });
2674
+ const systemPrompt = [
2675
+ "You are selecting one skill to activate for a user query.",
2676
+ "Choose the single best matching skill name from the provided list, or 'none' if no skill is a good fit.",
2677
+ "Respond with only the skill name or 'none'."
2678
+ ].join(" ");
2679
+ const results = await pMap(
2680
+ preparedQueries,
2681
+ async ({ testQuery, allSkills, skillListText }) => {
2682
+ const userPrompt = [`Available skills:`, skillListText, "", `User query: ${testQuery.query}`].join("\n");
2683
+ const rawResponse = await options.provider.sendMessage(systemPrompt, userPrompt, { model: options.model });
2684
+ const decision = parseDecision(
2685
+ rawResponse,
2686
+ allSkills.map((entry) => entry.name)
2687
+ );
2688
+ const expected = testQuery.should_trigger ? skillName : "none";
2689
+ const matched = testQuery.should_trigger ? decision === skillName : decision !== skillName;
2690
+ return {
2691
+ query: testQuery.query,
2692
+ shouldTrigger: testQuery.should_trigger,
2693
+ expected,
2694
+ actual: decision,
2695
+ matched,
2696
+ rawModelResponse: options.verbose ? rawResponse : void 0
2697
+ };
2698
+ },
2699
+ options.concurrency ?? 5
2700
+ );
1897
2701
  const metrics = calculateMetrics(skillName, results);
1898
2702
  return {
1899
2703
  skillName,
@@ -2059,6 +2863,9 @@ function writeError(error, asJson) {
2059
2863
  }
2060
2864
 
2061
2865
  // src/commands/lint.ts
2866
+ var lintCliSchema = z6.object({
2867
+ html: z6.string().optional()
2868
+ });
2062
2869
  async function handleLintCommand(targetPath, options) {
2063
2870
  try {
2064
2871
  const report = await runLinter(targetPath, { suppress: options.suppress });
@@ -2067,6 +2874,9 @@ async function handleLintCommand(targetPath, options) {
2067
2874
  } else {
2068
2875
  writeResult(renderLintReport(report, options.color), false);
2069
2876
  }
2877
+ if (options.html) {
2878
+ await fs6.writeFile(options.html, renderLintHtml(report), "utf8");
2879
+ }
2070
2880
  if (lintFails(report, options.failOn)) {
2071
2881
  process.exitCode = 1;
2072
2882
  }
@@ -2076,74 +2886,85 @@ async function handleLintCommand(targetPath, options) {
2076
2886
  }
2077
2887
  }
2078
2888
  function registerLintCommand(program) {
2079
- program.command("lint").description("Run static lint checks against a SKILL.md file or skill directory.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").action(async (targetPath, _commandOptions, command) => {
2889
+ program.command("lint").description("Run static lint checks against a SKILL.md file or skill directory.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--html <path>", "Write an HTML report to the given file path").action(async (targetPath, _commandOptions, command) => {
2080
2890
  const globalOptions = getGlobalCliOptions(command);
2081
2891
  const config = getResolvedConfig(command);
2892
+ const parsedCli = lintCliSchema.safeParse(command.opts());
2893
+ if (!parsedCli.success) {
2894
+ writeError(new Error(parsedCli.error.issues[0]?.message ?? "Invalid lint options."), globalOptions.json);
2895
+ process.exitCode = 2;
2896
+ return;
2897
+ }
2082
2898
  await handleLintCommand(targetPath, {
2083
2899
  ...globalOptions,
2084
2900
  failOn: config.lint.failOn,
2085
- suppress: config.lint.suppress
2901
+ suppress: config.lint.suppress,
2902
+ html: parsedCli.data.html
2086
2903
  });
2087
2904
  });
2088
2905
  }
2089
2906
 
2090
2907
  // src/commands/trigger.ts
2908
+ import fs8 from "node:fs/promises";
2091
2909
  import ora from "ora";
2092
- import { z as z7 } from "zod";
2910
+ import { z as z8 } from "zod";
2093
2911
 
2094
2912
  // src/utils/config.ts
2095
- import fs6 from "node:fs/promises";
2913
+ import fs7 from "node:fs/promises";
2096
2914
  import path5 from "node:path";
2097
- import { z as z6 } from "zod";
2098
- var providerNameSchema = z6.enum(["anthropic", "openai"]);
2099
- var lintFailOnSchema = z6.enum(["error", "warn"]);
2100
- var lintConfigSchema = z6.object({
2915
+ import { z as z7 } from "zod";
2916
+ var providerNameSchema = z7.enum(["anthropic", "openai"]);
2917
+ var lintFailOnSchema = z7.enum(["error", "warn"]);
2918
+ var lintConfigSchema = z7.object({
2101
2919
  failOn: lintFailOnSchema.optional(),
2102
- suppress: z6.array(z6.string().min(1)).optional()
2920
+ suppress: z7.array(z7.string().min(1)).optional()
2103
2921
  }).strict();
2104
- var triggerConfigSchema = z6.object({
2105
- numQueries: z6.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
2106
- threshold: z6.number().min(0).max(1).optional(),
2107
- seed: z6.number().int().optional()
2922
+ var triggerConfigSchema = z7.object({
2923
+ numQueries: z7.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
2924
+ threshold: z7.number().min(0).max(1).optional(),
2925
+ seed: z7.number().int().optional()
2108
2926
  }).strict().partial();
2109
- var evalConfigSchema = z6.object({
2110
- numRuns: z6.number().int().min(1).optional(),
2111
- threshold: z6.number().min(0).max(1).optional(),
2112
- promptFile: z6.string().min(1).optional(),
2113
- assertionsFile: z6.string().min(1).optional()
2927
+ var evalConfigSchema = z7.object({
2928
+ numRuns: z7.number().int().min(1).optional(),
2929
+ threshold: z7.number().min(0).max(1).optional(),
2930
+ promptFile: z7.string().min(1).optional(),
2931
+ assertionsFile: z7.string().min(1).optional()
2114
2932
  }).strict().partial();
2115
- var skilltestConfigSchema = z6.object({
2933
+ var skilltestConfigSchema = z7.object({
2116
2934
  provider: providerNameSchema.optional(),
2117
- model: z6.string().min(1).optional(),
2118
- json: z6.boolean().optional(),
2935
+ model: z7.string().min(1).optional(),
2936
+ json: z7.boolean().optional(),
2937
+ concurrency: z7.number().int().min(1).optional(),
2119
2938
  lint: lintConfigSchema.optional(),
2120
2939
  trigger: triggerConfigSchema.optional(),
2121
2940
  eval: evalConfigSchema.optional()
2122
2941
  }).strict();
2123
- var resolvedSkilltestConfigSchema = z6.object({
2942
+ var resolvedSkilltestConfigSchema = z7.object({
2124
2943
  provider: providerNameSchema,
2125
- model: z6.string().min(1),
2126
- json: z6.boolean(),
2127
- lint: z6.object({
2944
+ model: z7.string().min(1),
2945
+ json: z7.boolean(),
2946
+ concurrency: z7.number().int().min(1),
2947
+ lint: z7.object({
2128
2948
  failOn: lintFailOnSchema,
2129
- suppress: z6.array(z6.string().min(1))
2949
+ suppress: z7.array(z7.string().min(1))
2130
2950
  }),
2131
- trigger: z6.object({
2132
- numQueries: z6.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
2133
- threshold: z6.number().min(0).max(1),
2134
- seed: z6.number().int().optional()
2951
+ trigger: z7.object({
2952
+ numQueries: z7.number().int().min(2).refine((value) => value % 2 === 0, "trigger.numQueries must be an even number."),
2953
+ threshold: z7.number().min(0).max(1),
2954
+ seed: z7.number().int().optional()
2135
2955
  }),
2136
- eval: z6.object({
2137
- numRuns: z6.number().int().min(1),
2138
- threshold: z6.number().min(0).max(1),
2139
- promptFile: z6.string().min(1).optional(),
2140
- assertionsFile: z6.string().min(1).optional()
2956
+ eval: z7.object({
2957
+ numRuns: z7.number().int().min(1),
2958
+ threshold: z7.number().min(0).max(1),
2959
+ promptFile: z7.string().min(1).optional(),
2960
+ assertionsFile: z7.string().min(1).optional()
2141
2961
  })
2142
2962
  });
2143
2963
  var DEFAULT_SKILLTEST_CONFIG = {
2144
2964
  provider: "anthropic",
2145
2965
  model: "claude-sonnet-4-5-20250929",
2146
2966
  json: false,
2967
+ concurrency: 5,
2147
2968
  lint: {
2148
2969
  failOn: "error",
2149
2970
  suppress: []
@@ -2172,7 +2993,7 @@ function buildConfigValidationError(error, sourceLabel) {
2172
2993
  async function readJsonObject(filePath, label) {
2173
2994
  let raw;
2174
2995
  try {
2175
- raw = await fs6.readFile(filePath, "utf8");
2996
+ raw = await fs7.readFile(filePath, "utf8");
2176
2997
  } catch (error) {
2177
2998
  const message = error instanceof Error ? error.message : String(error);
2178
2999
  throw new Error(`Failed to read ${label}: ${message}`);
@@ -2205,7 +3026,7 @@ async function loadConfigFromNearestPackageJson(startDirectory) {
2205
3026
  const packageJsonPath = path5.join(currentDirectory, "package.json");
2206
3027
  if (await pathExists(packageJsonPath)) {
2207
3028
  const raw = await readJsonObject(packageJsonPath, packageJsonPath);
2208
- const packageJsonSchema = z6.object({
3029
+ const packageJsonSchema = z7.object({
2209
3030
  skilltestrc: skilltestConfigSchema.optional()
2210
3031
  }).passthrough();
2211
3032
  const parsed = packageJsonSchema.safeParse(raw);
@@ -2250,6 +3071,7 @@ function mergeConfigLayers(configFile = {}, cliFlags = {}, baseDirectory = proce
2250
3071
  provider: cliFlags.provider ?? configFile.provider ?? DEFAULT_SKILLTEST_CONFIG.provider,
2251
3072
  model: cliFlags.model ?? configFile.model ?? DEFAULT_SKILLTEST_CONFIG.model,
2252
3073
  json: cliFlags.json ?? configFile.json ?? DEFAULT_SKILLTEST_CONFIG.json,
3074
+ concurrency: cliFlags.concurrency ?? configFile.concurrency ?? DEFAULT_SKILLTEST_CONFIG.concurrency,
2253
3075
  lint: {
2254
3076
  failOn: cliFlags.lint?.failOn ?? configFile.lint?.failOn ?? DEFAULT_SKILLTEST_CONFIG.lint.failOn,
2255
3077
  suppress: cliFlags.lint?.suppress ?? configFile.lint?.suppress ?? DEFAULT_SKILLTEST_CONFIG.lint.suppress
@@ -2293,6 +3115,9 @@ function extractCliConfigOverrides(command) {
2293
3115
  if (command.getOptionValueSource("model") === "cli") {
2294
3116
  overrides.model = getTypedOptionValue(command, "model");
2295
3117
  }
3118
+ if ((command.name() === "trigger" || command.name() === "eval" || command.name() === "check") && command.getOptionValueSource("concurrency") === "cli") {
3119
+ overrides.concurrency = getTypedOptionValue(command, "concurrency");
3120
+ }
2296
3121
  if ((command.name() === "trigger" || command.name() === "check") && command.getOptionValueSource("numQueries") === "cli") {
2297
3122
  overrides.trigger = {
2298
3123
  ...overrides.trigger,
@@ -2322,7 +3147,6 @@ async function resolveConfigContext(targetPath, cliFlags) {
2322
3147
  const skillDirectoryConfig = await resolveSkillDirectoryConfig(targetPath);
2323
3148
  if (skillDirectoryConfig) {
2324
3149
  return {
2325
- configFile: skillDirectoryConfig.configFile,
2326
3150
  ...skillDirectoryConfig,
2327
3151
  config: mergeConfigLayers(skillDirectoryConfig.configFile, cliFlags, skillDirectoryConfig.sourceDirectory)
2328
3152
  };
@@ -2331,7 +3155,6 @@ async function resolveConfigContext(targetPath, cliFlags) {
2331
3155
  const cwdConfig = await loadConfigFromJsonFile(cwdConfigPath);
2332
3156
  if (cwdConfig) {
2333
3157
  return {
2334
- configFile: cwdConfig.configFile,
2335
3158
  ...cwdConfig,
2336
3159
  config: mergeConfigLayers(cwdConfig.configFile, cliFlags, cwdConfig.sourceDirectory)
2337
3160
  };
@@ -2339,7 +3162,6 @@ async function resolveConfigContext(targetPath, cliFlags) {
2339
3162
  const packageJsonConfig = await loadConfigFromNearestPackageJson(cwd);
2340
3163
  if (packageJsonConfig) {
2341
3164
  return {
2342
- configFile: packageJsonConfig.configFile,
2343
3165
  ...packageJsonConfig,
2344
3166
  config: mergeConfigLayers(packageJsonConfig.configFile, cliFlags, packageJsonConfig.sourceDirectory)
2345
3167
  };
@@ -2547,12 +3369,14 @@ function createProvider(providerName, apiKeyOverride) {
2547
3369
  }
2548
3370
 
2549
3371
  // src/commands/trigger.ts
2550
- var triggerCliSchema = z7.object({
2551
- queries: z7.string().optional(),
2552
- saveQueries: z7.string().optional(),
2553
- seed: z7.number().int().optional(),
2554
- verbose: z7.boolean().optional(),
2555
- apiKey: z7.string().optional()
3372
+ var triggerCliSchema = z8.object({
3373
+ queries: z8.string().optional(),
3374
+ saveQueries: z8.string().optional(),
3375
+ seed: z8.number().int().optional(),
3376
+ concurrency: z8.number().int().min(1).optional(),
3377
+ html: z8.string().optional(),
3378
+ verbose: z8.boolean().optional(),
3379
+ apiKey: z8.string().optional()
2556
3380
  });
2557
3381
  var DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-5-20250929";
2558
3382
  var DEFAULT_OPENAI_MODEL = "gpt-4.1-mini";
@@ -2597,6 +3421,7 @@ async function handleTriggerCommand(targetPath, options) {
2597
3421
  queries,
2598
3422
  numQueries: options.numQueries,
2599
3423
  seed: options.seed,
3424
+ concurrency: options.concurrency,
2600
3425
  verbose: options.verbose
2601
3426
  });
2602
3427
  if (options.saveQueries) {
@@ -2608,6 +3433,13 @@ async function handleTriggerCommand(targetPath, options) {
2608
3433
  } else {
2609
3434
  writeResult(renderTriggerOutputWithSeed(renderTriggerReport(result, options.color, options.verbose), result.seed), false);
2610
3435
  }
3436
+ if (options.html) {
3437
+ const htmlResult = {
3438
+ ...result,
3439
+ target: targetPath
3440
+ };
3441
+ await fs8.writeFile(options.html, renderTriggerHtml(htmlResult), "utf8");
3442
+ }
2611
3443
  } catch (error) {
2612
3444
  spinner?.stop();
2613
3445
  writeError(error, options.json);
@@ -2615,7 +3447,7 @@ async function handleTriggerCommand(targetPath, options) {
2615
3447
  }
2616
3448
  }
2617
3449
  function registerTriggerCommand(program) {
2618
- program.command("trigger").description("Evaluate whether a skill description triggers correctly.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--model <model>", "Model to use").option("--provider <provider>", "LLM provider: anthropic|openai").option("--queries <path>", "Path to custom test queries JSON").option("--num-queries <n>", "Number of auto-generated queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--save-queries <path>", "Save generated queries to a JSON file").option("--api-key <key>", "API key override").option("--verbose", "Show full model decisions").action(async (targetPath, _commandOptions, command) => {
3450
+ program.command("trigger").description("Evaluate whether a skill description triggers correctly.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--model <model>", "Model to use").option("--provider <provider>", "LLM provider: anthropic|openai").option("--queries <path>", "Path to custom test queries JSON").option("--num-queries <n>", "Number of auto-generated queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--concurrency <n>", "Maximum in-flight trigger requests", (value) => Number.parseInt(value, 10)).option("--html <path>", "Write an HTML report to the given file path").option("--save-queries <path>", "Save generated queries to a JSON file").option("--api-key <key>", "API key override").option("--verbose", "Show full model decisions").action(async (targetPath, _commandOptions, command) => {
2619
3451
  const globalOptions = getGlobalCliOptions(command);
2620
3452
  const config = getResolvedConfig(command);
2621
3453
  const parsedCli = triggerCliSchema.safeParse(command.opts());
@@ -2632,6 +3464,8 @@ function registerTriggerCommand(program) {
2632
3464
  numQueries: config.trigger.numQueries,
2633
3465
  saveQueries: parsedCli.data.saveQueries,
2634
3466
  seed: parsedCli.data.seed ?? config.trigger.seed,
3467
+ concurrency: config.concurrency,
3468
+ html: parsedCli.data.html,
2635
3469
  verbose: Boolean(parsedCli.data.verbose),
2636
3470
  apiKey: parsedCli.data.apiKey
2637
3471
  });
@@ -2639,14 +3473,17 @@ function registerTriggerCommand(program) {
2639
3473
  }
2640
3474
 
2641
3475
  // src/commands/eval.ts
3476
+ import fs9 from "node:fs/promises";
2642
3477
  import ora2 from "ora";
2643
- import { z as z8 } from "zod";
2644
- var evalCliSchema = z8.object({
2645
- prompts: z8.string().optional(),
2646
- graderModel: z8.string().optional(),
2647
- saveResults: z8.string().optional(),
2648
- verbose: z8.boolean().optional(),
2649
- apiKey: z8.string().optional()
3478
+ import { z as z9 } from "zod";
3479
+ var evalCliSchema = z9.object({
3480
+ prompts: z9.string().optional(),
3481
+ graderModel: z9.string().optional(),
3482
+ saveResults: z9.string().optional(),
3483
+ concurrency: z9.number().int().min(1).optional(),
3484
+ html: z9.string().optional(),
3485
+ verbose: z9.boolean().optional(),
3486
+ apiKey: z9.string().optional()
2650
3487
  });
2651
3488
  var DEFAULT_ANTHROPIC_MODEL2 = "claude-sonnet-4-5-20250929";
2652
3489
  var DEFAULT_OPENAI_MODEL2 = "gpt-4.1-mini";
@@ -2686,6 +3523,7 @@ async function handleEvalCommand(targetPath, options, command) {
2686
3523
  model,
2687
3524
  graderModel,
2688
3525
  numRuns: options.numRuns,
3526
+ concurrency: options.concurrency,
2689
3527
  prompts
2690
3528
  });
2691
3529
  if (options.saveResults) {
@@ -2697,6 +3535,13 @@ async function handleEvalCommand(targetPath, options, command) {
2697
3535
  } else {
2698
3536
  writeResult(renderEvalReport(result, options.color, options.verbose), false);
2699
3537
  }
3538
+ if (options.html) {
3539
+ const htmlResult = {
3540
+ ...result,
3541
+ target: targetPath
3542
+ };
3543
+ await fs9.writeFile(options.html, renderEvalHtml(htmlResult), "utf8");
3544
+ }
2700
3545
  } catch (error) {
2701
3546
  spinner?.stop();
2702
3547
  writeError(error, options.json);
@@ -2704,7 +3549,7 @@ async function handleEvalCommand(targetPath, options, command) {
2704
3549
  }
2705
3550
  }
2706
3551
  function registerEvalCommand(program) {
2707
- program.command("eval").description("Run end-to-end skill execution and quality evaluation.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--prompts <path>", "Path to eval prompts JSON").option("--model <model>", "Model to execute prompts").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--provider <provider>", "LLM provider: anthropic|openai").option("--save-results <path>", "Save full evaluation results to JSON").option("--api-key <key>", "API key override").option("--verbose", "Show full model responses").action(async (targetPath, _commandOptions, command) => {
3552
+ program.command("eval").description("Run end-to-end skill execution and quality evaluation.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--prompts <path>", "Path to eval prompts JSON").option("--model <model>", "Model to execute prompts").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--provider <provider>", "LLM provider: anthropic|openai").option("--concurrency <n>", "Maximum in-flight eval prompt runs", (value) => Number.parseInt(value, 10)).option("--html <path>", "Write an HTML report to the given file path").option("--save-results <path>", "Save full evaluation results to JSON").option("--api-key <key>", "API key override").option("--verbose", "Show full model responses").action(async (targetPath, _commandOptions, command) => {
2708
3553
  const globalOptions = getGlobalCliOptions(command);
2709
3554
  const config = getResolvedConfig(command);
2710
3555
  const parsedCli = evalCliSchema.safeParse(command.opts());
@@ -2722,9 +3567,11 @@ function registerEvalCommand(program) {
2722
3567
  graderModel: parsedCli.data.graderModel,
2723
3568
  provider: config.provider,
2724
3569
  saveResults: parsedCli.data.saveResults,
3570
+ html: parsedCli.data.html,
2725
3571
  verbose: Boolean(parsedCli.data.verbose),
2726
3572
  apiKey: parsedCli.data.apiKey,
2727
- numRuns: config.eval.numRuns
3573
+ numRuns: config.eval.numRuns,
3574
+ concurrency: config.concurrency
2728
3575
  },
2729
3576
  command
2730
3577
  );
@@ -2732,8 +3579,9 @@ function registerEvalCommand(program) {
2732
3579
  }
2733
3580
 
2734
3581
  // src/commands/check.ts
3582
+ import fs10 from "node:fs/promises";
2735
3583
  import ora3 from "ora";
2736
- import { z as z9 } from "zod";
3584
+ import { z as z10 } from "zod";
2737
3585
 
2738
3586
  // src/core/check-runner.ts
2739
3587
  function calculateEvalAssertPassRate(result) {
@@ -2764,23 +3612,33 @@ async function runCheck(inputPath, options) {
2764
3612
  evalSkippedReason = `Skipped: skill could not be parsed strictly (${message}).`;
2765
3613
  }
2766
3614
  if (parsedSkill) {
2767
- options.onStage?.("trigger");
2768
- trigger = await runTriggerTest(parsedSkill, {
3615
+ const triggerOptions = {
2769
3616
  provider: options.provider,
2770
3617
  model: options.model,
2771
3618
  queries: options.queries,
2772
3619
  numQueries: options.numQueries,
2773
3620
  seed: options.triggerSeed,
3621
+ concurrency: options.concurrency,
2774
3622
  verbose: options.verbose
2775
- });
2776
- options.onStage?.("eval");
2777
- evalResult = await runEval(parsedSkill, {
3623
+ };
3624
+ const evalOptions = {
2778
3625
  provider: options.provider,
2779
3626
  model: options.model,
2780
3627
  graderModel: options.graderModel,
2781
3628
  numRuns: options.evalNumRuns,
2782
- prompts: options.prompts
2783
- });
3629
+ prompts: options.prompts,
3630
+ concurrency: options.concurrency
3631
+ };
3632
+ if ((options.concurrency ?? 5) === 1) {
3633
+ options.onStage?.("trigger");
3634
+ trigger = await runTriggerTest(parsedSkill, triggerOptions);
3635
+ options.onStage?.("eval");
3636
+ evalResult = await runEval(parsedSkill, evalOptions);
3637
+ } else {
3638
+ options.onStage?.("trigger");
3639
+ options.onStage?.("eval");
3640
+ [trigger, evalResult] = await Promise.all([runTriggerTest(parsedSkill, triggerOptions), runEval(parsedSkill, evalOptions)]);
3641
+ }
2784
3642
  }
2785
3643
  }
2786
3644
  const triggerF1 = trigger ? trigger.metrics.f1 : null;
@@ -2815,15 +3673,17 @@ async function runCheck(inputPath, options) {
2815
3673
  }
2816
3674
 
2817
3675
  // src/commands/check.ts
2818
- var checkCliSchema = z9.object({
2819
- graderModel: z9.string().optional(),
2820
- apiKey: z9.string().optional(),
2821
- queries: z9.string().optional(),
2822
- seed: z9.number().int().optional(),
2823
- prompts: z9.string().optional(),
2824
- saveResults: z9.string().optional(),
2825
- continueOnLintFail: z9.boolean().optional(),
2826
- verbose: z9.boolean().optional()
3676
+ var checkCliSchema = z10.object({
3677
+ graderModel: z10.string().optional(),
3678
+ apiKey: z10.string().optional(),
3679
+ queries: z10.string().optional(),
3680
+ seed: z10.number().int().optional(),
3681
+ prompts: z10.string().optional(),
3682
+ concurrency: z10.number().int().min(1).optional(),
3683
+ html: z10.string().optional(),
3684
+ saveResults: z10.string().optional(),
3685
+ continueOnLintFail: z10.boolean().optional(),
3686
+ verbose: z10.boolean().optional()
2827
3687
  });
2828
3688
  var DEFAULT_ANTHROPIC_MODEL3 = "claude-sonnet-4-5-20250929";
2829
3689
  var DEFAULT_OPENAI_MODEL3 = "gpt-4.1-mini";
@@ -2882,6 +3742,7 @@ async function handleCheckCommand(targetPath, options, command) {
2882
3742
  triggerSeed: options.triggerSeed,
2883
3743
  prompts,
2884
3744
  evalNumRuns: options.numRuns,
3745
+ concurrency: options.concurrency,
2885
3746
  minF1: options.minF1,
2886
3747
  minAssertPassRate: options.minAssertPassRate,
2887
3748
  continueOnLintFail: options.continueOnLintFail,
@@ -2894,10 +3755,8 @@ async function handleCheckCommand(targetPath, options, command) {
2894
3755
  spinner.text = "Running lint checks...";
2895
3756
  } else if (stage === "parse") {
2896
3757
  spinner.text = "Parsing skill for model evaluations...";
2897
- } else if (stage === "trigger") {
2898
- spinner.text = "Running trigger test suite...";
2899
- } else if (stage === "eval") {
2900
- spinner.text = "Running end-to-end eval suite...";
3758
+ } else if (stage === "trigger" || stage === "eval") {
3759
+ spinner.text = "Running trigger and eval suites...";
2901
3760
  }
2902
3761
  }
2903
3762
  });
@@ -2913,6 +3772,9 @@ async function handleCheckCommand(targetPath, options, command) {
2913
3772
  false
2914
3773
  );
2915
3774
  }
3775
+ if (options.html) {
3776
+ await fs10.writeFile(options.html, renderCheckHtml(result), "utf8");
3777
+ }
2916
3778
  process.exitCode = result.gates.overallPassed ? 0 : 1;
2917
3779
  } catch (error) {
2918
3780
  spinner?.stop();
@@ -2921,7 +3783,7 @@ async function handleCheckCommand(targetPath, options, command) {
2921
3783
  }
2922
3784
  }
2923
3785
  function registerCheckCommand(program) {
2924
- program.command("check").description("Run lint + trigger + eval with threshold-based quality gates.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--provider <provider>", "LLM provider: anthropic|openai").option("--model <model>", "Model for trigger/eval runs").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--api-key <key>", "API key override").option("--queries <path>", "Path to custom trigger queries JSON").option("--num-queries <n>", "Number of auto-generated trigger queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--prompts <path>", "Path to eval prompts JSON").option("--min-f1 <n>", "Minimum required trigger F1 score (0-1)", (value) => Number.parseFloat(value)).option("--min-assert-pass-rate <n>", "Minimum required eval assertion pass rate (0-1)", (value) => Number.parseFloat(value)).option("--save-results <path>", "Save combined check results to JSON").option("--continue-on-lint-fail", "Continue trigger/eval stages even when lint has failures").option("--verbose", "Show detailed trigger/eval output sections").action(async (targetPath, _commandOptions, command) => {
3786
+ program.command("check").description("Run lint + trigger + eval with threshold-based quality gates.").argument("<path-to-skill>", "Path to SKILL.md or skill directory").option("--provider <provider>", "LLM provider: anthropic|openai").option("--model <model>", "Model for trigger/eval runs").option("--grader-model <model>", "Model used for grading (defaults to --model)").option("--api-key <key>", "API key override").option("--queries <path>", "Path to custom trigger queries JSON").option("--num-queries <n>", "Number of auto-generated trigger queries", (value) => Number.parseInt(value, 10)).option("--seed <number>", "RNG seed for reproducible results", (value) => Number.parseInt(value, 10)).option("--prompts <path>", "Path to eval prompts JSON").option("--concurrency <n>", "Maximum in-flight trigger/eval tasks", (value) => Number.parseInt(value, 10)).option("--html <path>", "Write an HTML report to the given file path").option("--min-f1 <n>", "Minimum required trigger F1 score (0-1)", (value) => Number.parseFloat(value)).option("--min-assert-pass-rate <n>", "Minimum required eval assertion pass rate (0-1)", (value) => Number.parseFloat(value)).option("--save-results <path>", "Save combined check results to JSON").option("--continue-on-lint-fail", "Continue trigger/eval stages even when lint has failures").option("--verbose", "Show detailed trigger/eval output sections").action(async (targetPath, _commandOptions, command) => {
2925
3787
  const globalOptions = getGlobalCliOptions(command);
2926
3788
  const config = getResolvedConfig(command);
2927
3789
  const parsedCli = checkCliSchema.safeParse(command.opts());
@@ -2944,6 +3806,8 @@ function registerCheckCommand(program) {
2944
3806
  minF1: config.trigger.threshold,
2945
3807
  minAssertPassRate: config.eval.threshold,
2946
3808
  numRuns: config.eval.numRuns,
3809
+ concurrency: config.concurrency,
3810
+ html: parsedCli.data.html,
2947
3811
  lintFailOn: config.lint.failOn,
2948
3812
  lintSuppress: config.lint.suppress,
2949
3813
  triggerSeed: parsedCli.data.seed ?? config.trigger.seed,
@@ -2961,7 +3825,7 @@ function resolveVersion() {
2961
3825
  try {
2962
3826
  const currentFilePath = fileURLToPath(import.meta.url);
2963
3827
  const packageJsonPath = path6.resolve(path6.dirname(currentFilePath), "..", "package.json");
2964
- const raw = fs7.readFileSync(packageJsonPath, "utf8");
3828
+ const raw = fs11.readFileSync(packageJsonPath, "utf8");
2965
3829
  const parsed = JSON.parse(raw);
2966
3830
  return parsed.version ?? "0.0.0";
2967
3831
  } catch {