@evalgate/sdk 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +638 -0
  2. package/README.md +398 -0
  3. package/dist/assertions.d.ts +189 -0
  4. package/dist/assertions.js +662 -0
  5. package/dist/batch.d.ts +68 -0
  6. package/dist/batch.js +179 -0
  7. package/dist/cache.d.ts +65 -0
  8. package/dist/cache.js +131 -0
  9. package/dist/cli/api.d.ts +108 -0
  10. package/dist/cli/api.js +132 -0
  11. package/dist/cli/baseline.d.ts +10 -0
  12. package/dist/cli/baseline.js +172 -0
  13. package/dist/cli/check.d.ts +73 -0
  14. package/dist/cli/check.js +355 -0
  15. package/dist/cli/ci-context.d.ts +6 -0
  16. package/dist/cli/ci-context.js +112 -0
  17. package/dist/cli/ci.d.ts +45 -0
  18. package/dist/cli/ci.js +192 -0
  19. package/dist/cli/config.d.ts +30 -0
  20. package/dist/cli/config.js +230 -0
  21. package/dist/cli/constants.d.ts +15 -0
  22. package/dist/cli/constants.js +18 -0
  23. package/dist/cli/diff.d.ts +173 -0
  24. package/dist/cli/diff.js +685 -0
  25. package/dist/cli/discover.d.ts +84 -0
  26. package/dist/cli/discover.js +419 -0
  27. package/dist/cli/doctor.d.ts +88 -0
  28. package/dist/cli/doctor.js +675 -0
  29. package/dist/cli/env.d.ts +21 -0
  30. package/dist/cli/env.js +42 -0
  31. package/dist/cli/explain.d.ts +58 -0
  32. package/dist/cli/explain.js +561 -0
  33. package/dist/cli/formatters/github.d.ts +8 -0
  34. package/dist/cli/formatters/github.js +135 -0
  35. package/dist/cli/formatters/human.d.ts +6 -0
  36. package/dist/cli/formatters/human.js +110 -0
  37. package/dist/cli/formatters/json.d.ts +6 -0
  38. package/dist/cli/formatters/json.js +10 -0
  39. package/dist/cli/formatters/pr-comment.d.ts +12 -0
  40. package/dist/cli/formatters/pr-comment.js +103 -0
  41. package/dist/cli/formatters/types.d.ts +103 -0
  42. package/dist/cli/formatters/types.js +8 -0
  43. package/dist/cli/gate.d.ts +21 -0
  44. package/dist/cli/gate.js +179 -0
  45. package/dist/cli/impact-analysis.d.ts +63 -0
  46. package/dist/cli/impact-analysis.js +252 -0
  47. package/dist/cli/index.d.ts +9 -0
  48. package/dist/cli/index.js +332 -0
  49. package/dist/cli/init.d.ts +16 -0
  50. package/dist/cli/init.js +292 -0
  51. package/dist/cli/manifest.d.ts +103 -0
  52. package/dist/cli/manifest.js +282 -0
  53. package/dist/cli/migrate.d.ts +41 -0
  54. package/dist/cli/migrate.js +349 -0
  55. package/dist/cli/policy-packs.d.ts +23 -0
  56. package/dist/cli/policy-packs.js +89 -0
  57. package/dist/cli/print-config.d.ts +29 -0
  58. package/dist/cli/print-config.js +270 -0
  59. package/dist/cli/profiles.d.ts +28 -0
  60. package/dist/cli/profiles.js +30 -0
  61. package/dist/cli/reason-codes.d.ts +17 -0
  62. package/dist/cli/reason-codes.js +19 -0
  63. package/dist/cli/regression-gate.d.ts +15 -0
  64. package/dist/cli/regression-gate.js +341 -0
  65. package/dist/cli/render/snippet.d.ts +5 -0
  66. package/dist/cli/render/snippet.js +15 -0
  67. package/dist/cli/render/sort.d.ts +10 -0
  68. package/dist/cli/render/sort.js +24 -0
  69. package/dist/cli/report/build-check-report.d.ts +19 -0
  70. package/dist/cli/report/build-check-report.js +132 -0
  71. package/dist/cli/run.d.ts +101 -0
  72. package/dist/cli/run.js +395 -0
  73. package/dist/cli/share.d.ts +17 -0
  74. package/dist/cli/share.js +91 -0
  75. package/dist/cli/upgrade.d.ts +15 -0
  76. package/dist/cli/upgrade.js +492 -0
  77. package/dist/cli/workspace.d.ts +31 -0
  78. package/dist/cli/workspace.js +68 -0
  79. package/dist/client.d.ts +368 -0
  80. package/dist/client.js +893 -0
  81. package/dist/client.request.test.d.ts +1 -0
  82. package/dist/client.request.test.js +232 -0
  83. package/dist/context.d.ts +134 -0
  84. package/dist/context.js +215 -0
  85. package/dist/errors.d.ts +82 -0
  86. package/dist/errors.js +298 -0
  87. package/dist/export.d.ts +195 -0
  88. package/dist/export.js +344 -0
  89. package/dist/index.d.ts +44 -0
  90. package/dist/index.js +153 -0
  91. package/dist/integrations/anthropic.d.ts +91 -0
  92. package/dist/integrations/anthropic.js +163 -0
  93. package/dist/integrations/openai-eval.d.ts +57 -0
  94. package/dist/integrations/openai-eval.js +232 -0
  95. package/dist/integrations/openai.d.ts +92 -0
  96. package/dist/integrations/openai.js +160 -0
  97. package/dist/local.d.ts +39 -0
  98. package/dist/local.js +148 -0
  99. package/dist/logger.d.ts +128 -0
  100. package/dist/logger.js +227 -0
  101. package/dist/matchers/index.d.ts +1 -0
  102. package/dist/matchers/index.js +6 -0
  103. package/dist/matchers/to-pass-gate.d.ts +29 -0
  104. package/dist/matchers/to-pass-gate.js +35 -0
  105. package/dist/pagination.d.ts +74 -0
  106. package/dist/pagination.js +139 -0
  107. package/dist/regression.d.ts +100 -0
  108. package/dist/regression.js +44 -0
  109. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  110. package/dist/runtime/adapters/config-to-dsl.js +400 -0
  111. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  112. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  113. package/dist/runtime/context.d.ts +26 -0
  114. package/dist/runtime/context.js +74 -0
  115. package/dist/runtime/eval.d.ts +46 -0
  116. package/dist/runtime/eval.js +244 -0
  117. package/dist/runtime/execution-mode.d.ts +80 -0
  118. package/dist/runtime/execution-mode.js +357 -0
  119. package/dist/runtime/executor.d.ts +16 -0
  120. package/dist/runtime/executor.js +152 -0
  121. package/dist/runtime/registry.d.ts +78 -0
  122. package/dist/runtime/registry.js +403 -0
  123. package/dist/runtime/run-report.d.ts +200 -0
  124. package/dist/runtime/run-report.js +222 -0
  125. package/dist/runtime/types.d.ts +356 -0
  126. package/dist/runtime/types.js +76 -0
  127. package/dist/snapshot.d.ts +176 -0
  128. package/dist/snapshot.js +322 -0
  129. package/dist/streaming.d.ts +173 -0
  130. package/dist/streaming.js +268 -0
  131. package/dist/testing.d.ts +273 -0
  132. package/dist/testing.js +317 -0
  133. package/dist/types.d.ts +754 -0
  134. package/dist/types.js +54 -0
  135. package/dist/utils/input-hash.d.ts +8 -0
  136. package/dist/utils/input-hash.js +41 -0
  137. package/dist/version.d.ts +7 -0
  138. package/dist/version.js +10 -0
  139. package/dist/workflows.d.ts +389 -0
  140. package/dist/workflows.js +671 -0
  141. package/package.json +117 -0
@@ -0,0 +1,135 @@
1
+ "use strict";
2
+ /**
3
+ * GitHub formatter for evalgate check.
4
+ * - stdout: minimal (verdict + score + link) + ::error annotations for failed cases
5
+ * - Step summary: full Markdown written to GITHUB_STEP_SUMMARY (not stdout)
6
+ */
7
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
8
+ if (k2 === undefined) k2 = k;
9
+ var desc = Object.getOwnPropertyDescriptor(m, k);
10
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
11
+ desc = { enumerable: true, get: function() { return m[k]; } };
12
+ }
13
+ Object.defineProperty(o, k2, desc);
14
+ }) : (function(o, m, k, k2) {
15
+ if (k2 === undefined) k2 = k;
16
+ o[k2] = m[k];
17
+ }));
18
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
19
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
20
+ }) : function(o, v) {
21
+ o["default"] = v;
22
+ });
23
+ var __importStar = (this && this.__importStar) || (function () {
24
+ var ownKeys = function(o) {
25
+ ownKeys = Object.getOwnPropertyNames || function (o) {
26
+ var ar = [];
27
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
28
+ return ar;
29
+ };
30
+ return ownKeys(o);
31
+ };
32
+ return function (mod) {
33
+ if (mod && mod.__esModule) return mod;
34
+ var result = {};
35
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
36
+ __setModuleDefault(result, mod);
37
+ return result;
38
+ };
39
+ })();
40
+ Object.defineProperty(exports, "__esModule", { value: true });
41
+ exports.appendStepSummary = appendStepSummary;
42
+ exports.formatGitHub = formatGitHub;
43
+ const fs = __importStar(require("node:fs"));
44
+ const snippet_1 = require("../render/snippet");
45
+ const ANNOTATION_MAX = 10;
46
+ function escapeAnnotationMessage(s) {
47
+ return s.replace(/\r/g, "").replace(/\n/g, "%0A");
48
+ }
49
+ function formatAnnotation(fc) {
50
+ const id = fc.testCaseId ?? fc.name ?? "unknown";
51
+ const reason = fc.reason ?? fc.outputSnippet ?? fc.output ?? "no output";
52
+ const msg = escapeAnnotationMessage(`TestCase ${id} failed - ${(0, snippet_1.truncateSnippet)(reason, 100)}`);
53
+ return `::error title=EvalGate regression::${msg}`;
54
+ }
55
+ function appendStepSummary(report) {
56
+ const path = typeof process !== "undefined" && process.env?.GITHUB_STEP_SUMMARY;
57
+ if (!path)
58
+ return;
59
+ const lines = [];
60
+ const passed = report.verdict === "pass";
61
+ const warned = report.verdict === "warn";
62
+ lines.push("## EvalGate Gate");
63
+ lines.push("");
64
+ lines.push(passed && !warned
65
+ ? "✅ **PASSED**"
66
+ : warned
67
+ ? `⚠️ **WARNED**: ${report.reasonMessage ?? report.reasonCode}`
68
+ : `❌ **FAILED**: ${report.reasonMessage ?? report.reasonCode}`);
69
+ lines.push("");
70
+ const deltaStr = report.baselineScore != null && report.delta != null
71
+ ? ` (baseline ${report.baselineScore}, ${report.delta >= 0 ? "+" : ""}${report.delta} pts)`
72
+ : "";
73
+ lines.push(`**Score:** ${report.score ?? 0}/100${deltaStr}`);
74
+ lines.push("");
75
+ const failedCases = report.failedCases ?? [];
76
+ if (failedCases.length > 0) {
77
+ lines.push(`### ${failedCases.length} failing case${failedCases.length === 1 ? "" : "s"}`);
78
+ lines.push("");
79
+ for (const fc of failedCases.slice(0, 10)) {
80
+ const label = fc.name ?? fc.input ?? "(unnamed)";
81
+ const exp = (0, snippet_1.truncateSnippet)(fc.expectedOutput ?? fc.expectedSnippet, 80);
82
+ const out = (0, snippet_1.truncateSnippet)(fc.output ?? fc.outputSnippet, 80);
83
+ const reason = out ? `got "${out}"` : "no output";
84
+ lines.push(`- **${(0, snippet_1.truncateSnippet)(label, 60)}** — expected: ${exp || "(unknown)"}, ${reason}`);
85
+ }
86
+ if (failedCases.length > 10) {
87
+ lines.push(`- _+ ${failedCases.length - 10} more_`);
88
+ }
89
+ lines.push("");
90
+ }
91
+ if (report.dashboardUrl) {
92
+ lines.push(`[View Dashboard](${report.dashboardUrl})`);
93
+ lines.push("");
94
+ }
95
+ if (!passed) {
96
+ lines.push("> **Tip:** Run `evalgate explain` locally to see root causes and suggested fixes.");
97
+ lines.push("> Report saved to `.evalgate/last-report.json` — upload as a build artifact for offline analysis.");
98
+ lines.push("");
99
+ }
100
+ try {
101
+ fs.appendFileSync(path, lines.join("\n"), "utf8");
102
+ }
103
+ catch {
104
+ // Non-fatal: step summary is best-effort
105
+ }
106
+ }
107
+ function formatGitHub(report) {
108
+ const stdoutLines = [];
109
+ // Emit ::error annotations for failed cases (up to N)
110
+ const failedCases = report.failedCases ?? [];
111
+ const toAnnotate = failedCases.slice(0, ANNOTATION_MAX);
112
+ for (const fc of toAnnotate) {
113
+ stdoutLines.push(formatAnnotation(fc));
114
+ }
115
+ // Minimal summary: verdict + score + link
116
+ const passed = report.verdict === "pass";
117
+ const warned = report.verdict === "warn";
118
+ const failReason = report.reasonMessage ?? report.reasonCode;
119
+ if (passed && !warned)
120
+ stdoutLines.push("\n✓ EvalGate gate PASSED");
121
+ else if (warned)
122
+ stdoutLines.push(`\n⚠ EvalGate gate WARNED: ${failReason}`);
123
+ else
124
+ stdoutLines.push(`\n✗ EvalGate gate FAILED: ${failReason}`);
125
+ const deltaStr = report.baselineScore != null && report.delta != null
126
+ ? ` (baseline ${report.baselineScore}, ${report.delta >= 0 ? "+" : ""}${report.delta} pts)`
127
+ : "";
128
+ stdoutLines.push(`Score: ${report.score ?? 0}/100${deltaStr}`);
129
+ if (report.dashboardUrl) {
130
+ stdoutLines.push(`Dashboard: ${report.dashboardUrl}`);
131
+ }
132
+ // Write full markdown to GITHUB_STEP_SUMMARY (not stdout)
133
+ appendStepSummary(report);
134
+ return stdoutLines.join("\n");
135
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Human-readable formatter for evalgate check output.
3
+ * Deterministic: verdict → score → failures → link → hint.
4
+ */
5
+ import type { CheckReport } from "./types";
6
+ export declare function formatHuman(report: CheckReport): string;
@@ -0,0 +1,110 @@
1
+ "use strict";
2
+ /**
3
+ * Human-readable formatter for evalgate check output.
4
+ * Deterministic: verdict → score → failures → link → hint.
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.formatHuman = formatHuman;
8
+ const snippet_1 = require("../render/snippet");
9
+ const TOP_N = 3;
10
+ function formatHuman(report) {
11
+ const lines = [];
12
+ const passed = report.verdict === "pass";
13
+ const warned = report.verdict === "warn";
14
+ const failReason = report.reasonMessage;
15
+ lines.push(passed && !warned
16
+ ? "\n✓ EvalGate gate PASSED"
17
+ : warned
18
+ ? `\n⚠ EvalGate gate WARNED: ${failReason ?? report.reasonCode}`
19
+ : `\n✗ EvalGate gate FAILED: ${failReason ?? report.reasonCode}`);
20
+ const deltaStr = report.baselineScore != null && report.delta != null
21
+ ? ` (baseline ${report.baselineScore}, ${report.delta >= 0 ? "+" : ""}${report.delta} pts)`
22
+ : "";
23
+ lines.push(`Score: ${report.score ?? 0}/100${deltaStr}`);
24
+ const failedCases = report.failedCases ?? [];
25
+ if (failedCases.length > 0) {
26
+ const toShow = failedCases.slice(0, TOP_N);
27
+ lines.push(`${failedCases.length} failing case${failedCases.length === 1 ? "" : "s"}:`);
28
+ for (const fc of toShow) {
29
+ const label = fc.name ?? fc.input ?? "(unnamed)";
30
+ const exp = (0, snippet_1.truncateSnippet)(fc.expectedOutput ?? fc.expectedSnippet, 50);
31
+ const out = (0, snippet_1.truncateSnippet)(fc.output ?? fc.outputSnippet, 50);
32
+ const reason = out ? `got "${out}"` : "no output";
33
+ lines.push(` - "${(0, snippet_1.truncateSnippet)(label, 50)}" → expected: ${exp || "(unknown)"}, ${reason}`);
34
+ }
35
+ if (failedCases.length > toShow.length) {
36
+ lines.push(` + ${failedCases.length - toShow.length} more`);
37
+ }
38
+ }
39
+ if (report.dashboardUrl) {
40
+ lines.push(`Dashboard: ${report.dashboardUrl}`);
41
+ }
42
+ if (!passed || warned) {
43
+ lines.push("Next: View full report above, fix failing cases, or adjust gate with --minScore / --maxDrop / --warnDrop");
44
+ }
45
+ if (report.explain &&
46
+ (report.breakdown01 ||
47
+ report.contribPts ||
48
+ report.flags?.length ||
49
+ report.policyEvidence)) {
50
+ lines.push("");
51
+ lines.push("--- Explain ---");
52
+ if (report.contribPts) {
53
+ const cp = report.contribPts;
54
+ const pts = [];
55
+ if (cp.passRatePts != null)
56
+ pts.push(`passRate: ${cp.passRatePts}`);
57
+ if (cp.safetyPts != null)
58
+ pts.push(`safety: ${cp.safetyPts}`);
59
+ if (cp.compliancePts != null)
60
+ pts.push(`compliance: ${cp.compliancePts}`);
61
+ if (cp.performancePts != null)
62
+ pts.push(`performance: ${cp.performancePts}`);
63
+ if (pts.length)
64
+ lines.push(`Contrib pts: ${pts.join(", ")}`);
65
+ }
66
+ if (report.breakdown01) {
67
+ const b = report.breakdown01;
68
+ const parts = [];
69
+ if (b.passRate != null)
70
+ parts.push(`passRate=${b.passRate}`);
71
+ if (b.safety != null)
72
+ parts.push(`safety=${b.safety}`);
73
+ if (b.judge != null)
74
+ parts.push(`judge=${b.judge}`);
75
+ if (b.schema != null)
76
+ parts.push(`schema=${b.schema}`);
77
+ if (b.latency != null)
78
+ parts.push(`latency=${b.latency}`);
79
+ if (b.cost != null)
80
+ parts.push(`cost=${b.cost}`);
81
+ if (parts.length)
82
+ lines.push(`Breakdown: ${parts.join(", ")}`);
83
+ }
84
+ if (report.flags && report.flags.length > 0) {
85
+ lines.push(`Flags: ${report.flags.join(", ")}`);
86
+ }
87
+ if (report.thresholds) {
88
+ const t = report.thresholds;
89
+ const parts = [];
90
+ if (t.minScore != null)
91
+ parts.push(`minScore=${t.minScore}`);
92
+ if (t.maxDrop != null)
93
+ parts.push(`maxDrop=${t.maxDrop}`);
94
+ if (t.minN != null)
95
+ parts.push(`minN=${t.minN}`);
96
+ if (parts.length)
97
+ lines.push(`Thresholds: ${parts.join(", ")}`);
98
+ }
99
+ if (report.policyEvidence) {
100
+ const pe = report.policyEvidence;
101
+ lines.push(`Policy sub-check failed: ${pe.failedCheck ?? "unknown"}`);
102
+ if (pe.remediation)
103
+ lines.push(`Remediation: ${pe.remediation}`);
104
+ if (pe.snapshot && Object.keys(pe.snapshot).length > 0) {
105
+ lines.push(`Snapshot: ${JSON.stringify(pe.snapshot)}`);
106
+ }
107
+ }
108
+ }
109
+ return lines.join("\n");
110
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * JSON formatter for evalgate check.
3
+ * Outputs only JSON, no extra logs.
4
+ */
5
+ import type { CheckReport } from "./types";
6
+ export declare function formatJson(report: CheckReport): string;
@@ -0,0 +1,10 @@
1
+ "use strict";
2
+ /**
3
+ * JSON formatter for evalgate check.
4
+ * Outputs only JSON, no extra logs.
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.formatJson = formatJson;
8
+ function formatJson(report) {
9
+ return JSON.stringify(report, null, 0);
10
+ }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * PR comment markdown builder for evalgate check --pr-comment-out.
3
+ * Produces deterministic markdown for GitHub Action to post as PR comment.
4
+ */
5
+ import type { CheckReport } from "./types";
6
+ /**
7
+ * Hidden marker for GitHub Action to find and update existing comment (sticky update).
8
+ * Action should: 1) post body from file 2) search PR comments for this marker 3) update if found, else create.
9
+ * Export for use in Action scripts.
10
+ */
11
+ export declare const PR_COMMENT_MARKER = "<!-- evalgate-gate-comment -->";
12
+ export declare function buildPrComment(report: CheckReport): string;
@@ -0,0 +1,103 @@
1
+ "use strict";
2
+ /**
3
+ * PR comment markdown builder for evalgate check --pr-comment-out.
4
+ * Produces deterministic markdown for GitHub Action to post as PR comment.
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.PR_COMMENT_MARKER = void 0;
8
+ exports.buildPrComment = buildPrComment;
9
+ const snippet_1 = require("../render/snippet");
10
+ const TOP_FAILURES = 3;
11
+ function escapeMarkdown(s) {
12
+ return s.replace(/\|/g, "\\|").replace(/\n/g, " ");
13
+ }
14
+ /**
15
+ * Hidden marker for GitHub Action to find and update existing comment (sticky update).
16
+ * Action should: 1) post body from file 2) search PR comments for this marker 3) update if found, else create.
17
+ * Export for use in Action scripts.
18
+ */
19
+ exports.PR_COMMENT_MARKER = "<!-- evalgate-gate-comment -->";
20
+ function buildPrComment(report) {
21
+ const lines = [];
22
+ lines.push(exports.PR_COMMENT_MARKER);
23
+ lines.push("");
24
+ const passed = report.verdict === "pass";
25
+ const gateApplied = report.gateApplied !== false;
26
+ // Verdict badge — distinguish "PASS" from "NOT GATED"
27
+ if (!gateApplied) {
28
+ lines.push("## ⚠️ EvalGate Regression Gate — NOT APPLIED");
29
+ lines.push("");
30
+ lines.push("**Gate not applied: baseline missing.**");
31
+ if (report.actionableMessage) {
32
+ lines.push("");
33
+ lines.push(report.actionableMessage);
34
+ }
35
+ }
36
+ else {
37
+ lines.push(passed
38
+ ? "## ✅ EvalGate Regression Gate — PASSED"
39
+ : "## 🚨 EvalGate Regression Gate — FAILED");
40
+ }
41
+ lines.push("");
42
+ // Score + Delta (skip when gate not applied)
43
+ const deltaStr = report.baselineScore != null && report.delta != null
44
+ ? ` (${report.delta >= 0 ? "+" : ""}${report.delta} from baseline ${report.baselineScore})`
45
+ : "";
46
+ lines.push(`**Score:** ${report.score ?? 0}/100${deltaStr}`);
47
+ lines.push("");
48
+ // ReasonCode
49
+ lines.push(`**Reason:** ${report.reasonCode}`);
50
+ if (report.reasonMessage) {
51
+ lines.push(`_${escapeMarkdown(report.reasonMessage)}_`);
52
+ }
53
+ lines.push("");
54
+ // Policy (if unknown)
55
+ if (report.policy) {
56
+ lines.push(`**Policy:** ${report.policy}`);
57
+ lines.push("");
58
+ }
59
+ // Top failures (max 3)
60
+ const failedCases = report.failedCases ?? [];
61
+ if (failedCases.length > 0) {
62
+ lines.push("### Top Issues");
63
+ lines.push("");
64
+ for (const fc of failedCases.slice(0, TOP_FAILURES)) {
65
+ const label = fc.name ?? fc.input ?? "(unnamed)";
66
+ const reason = fc.reason ?? fc.outputSnippet ?? fc.output ?? "no output";
67
+ lines.push(`- **${(0, snippet_1.truncateSnippet)(escapeMarkdown(label), 60)}** — ${(0, snippet_1.truncateSnippet)(escapeMarkdown(reason), 80)}`);
68
+ }
69
+ if (failedCases.length > TOP_FAILURES) {
70
+ lines.push(`- _+ ${failedCases.length - TOP_FAILURES} more_`);
71
+ }
72
+ lines.push("");
73
+ }
74
+ // Explain summary (if --explain)
75
+ if (report.explain && report.contribPts) {
76
+ const pts = report.contribPts;
77
+ const parts = [];
78
+ if (pts.passRatePts != null)
79
+ parts.push(`pass rate: ${pts.passRatePts} pts`);
80
+ if (pts.safetyPts != null)
81
+ parts.push(`safety: ${pts.safetyPts} pts`);
82
+ if (pts.compliancePts != null)
83
+ parts.push(`compliance: ${pts.compliancePts} pts`);
84
+ if (pts.performancePts != null)
85
+ parts.push(`performance: ${pts.performancePts} pts`);
86
+ if (parts.length > 0) {
87
+ lines.push("### Breakdown");
88
+ lines.push("");
89
+ lines.push(parts.join(" | "));
90
+ lines.push("");
91
+ }
92
+ }
93
+ // Dashboard URL
94
+ if (report.dashboardUrl) {
95
+ lines.push(`🔎 [Dashboard](${report.dashboardUrl})`);
96
+ }
97
+ // Share URL (if exists)
98
+ if (report.shareUrl) {
99
+ lines.push(`🔗 [Share Snapshot](${report.shareUrl})`);
100
+ }
101
+ lines.push("");
102
+ return lines.join("\n");
103
+ }
@@ -0,0 +1,103 @@
1
+ /**
2
+ * CheckReport and related types for formatters.
3
+ */
4
+ export type GateVerdict = "pass" | "warn" | "fail";
5
+ /** "neutral" = exit 0 but gate not applied (e.g. baseline missing with --baseline auto) */
6
+ export type GateMode = "enforced" | "neutral";
7
+ /** Canonical reason codes. Import REASON_CODES from ../reason-codes for constants. */
8
+ export type FailureReasonCode = "PASS" | "WARN_REGRESSION" | "LOW_SAMPLE_SIZE" | "BASELINE_MISSING" | "SCORE_TOO_LOW" | "DELTA_TOO_HIGH" | "COST_BUDGET_EXCEEDED" | "LATENCY_BUDGET_EXCEEDED" | "POLICY_FAILED" | "UNKNOWN" | "LOW_SCORE" | "LOW_PASS_RATE" | "SAFETY_RISK" | "LATENCY_RISK" | "COST_RISK" | "MAX_DROP_EXCEEDED" | "INSUFFICIENT_EVIDENCE" | "POLICY_VIOLATION";
9
+ export type ScoreBreakdown01 = {
10
+ passRate?: number;
11
+ safety?: number;
12
+ judge?: number;
13
+ schema?: number;
14
+ latency?: number;
15
+ cost?: number;
16
+ };
17
+ export type ScoreContribPts = {
18
+ passRatePts?: number;
19
+ safetyPts?: number;
20
+ compliancePts?: number;
21
+ performancePts?: number;
22
+ };
23
+ export type GateThresholds = {
24
+ minScore?: number;
25
+ minPassRate?: number;
26
+ minSafety?: number;
27
+ maxDrop?: number;
28
+ warnDrop?: number;
29
+ minN?: number;
30
+ allowWeakEvidence?: boolean;
31
+ baseline?: "published" | "previous" | "production" | "auto";
32
+ maxCostUsd?: number;
33
+ maxLatencyMs?: number;
34
+ maxCostDeltaUsd?: number;
35
+ };
36
+ export type FailedCase = {
37
+ testCaseId?: number;
38
+ status?: "failed" | "error" | "skipped" | "passed";
39
+ name?: string;
40
+ input?: string;
41
+ inputSnippet?: string;
42
+ expectedOutput?: string;
43
+ expectedSnippet?: string;
44
+ output?: string;
45
+ outputSnippet?: string;
46
+ reason?: string;
47
+ };
48
+ export type CiContext = {
49
+ provider?: "github" | "gitlab" | "circle" | "unknown";
50
+ repo?: string;
51
+ sha?: string;
52
+ branch?: string;
53
+ pr?: number;
54
+ runUrl?: string;
55
+ actor?: string;
56
+ };
57
+ /** Current schema version for CheckReport (.evalgate/last-report.json). Bump on breaking changes. */
58
+ export declare const CHECK_REPORT_SCHEMA_VERSION = 1;
59
+ export type CheckReport = {
60
+ schemaVersion?: number;
61
+ evaluationId: string;
62
+ runId?: number;
63
+ verdict: GateVerdict;
64
+ /** false when gate not applied (e.g. baseline missing, exit 0) — prevents false confidence */
65
+ gateApplied: boolean;
66
+ /** "enforced" = gate ran; "neutral" = exit 0, gate skipped */
67
+ gateMode: GateMode;
68
+ reasonCode: FailureReasonCode;
69
+ /** Actionable message for PR comment / UX */
70
+ actionableMessage?: string;
71
+ reasonMessage?: string;
72
+ score?: number;
73
+ baselineScore?: number;
74
+ delta?: number;
75
+ passRate?: number;
76
+ safetyPassRate?: number;
77
+ flags?: string[];
78
+ breakdown01?: ScoreBreakdown01;
79
+ contribPts?: ScoreContribPts;
80
+ thresholds?: GateThresholds;
81
+ n?: number;
82
+ evidenceLevel?: "strong" | "medium" | "weak";
83
+ baselineMissing?: boolean;
84
+ baselineStatus?: "found" | "missing";
85
+ dashboardUrl?: string;
86
+ failedCases?: FailedCase[];
87
+ failedCasesShown?: number;
88
+ failedCasesMore?: number;
89
+ requestId?: string;
90
+ durationMs?: number;
91
+ ci?: CiContext;
92
+ explain?: boolean;
93
+ shareUrl?: string;
94
+ policy?: string;
95
+ baselineRunId?: number;
96
+ ciRunUrl?: string;
97
+ /** When --explain and policy failed: which sub-check failed, remediation, snapshot */
98
+ policyEvidence?: {
99
+ failedCheck?: string;
100
+ remediation?: string;
101
+ snapshot?: Record<string, unknown>;
102
+ };
103
+ };
@@ -0,0 +1,8 @@
1
+ "use strict";
2
+ /**
3
+ * CheckReport and related types for formatters.
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.CHECK_REPORT_SCHEMA_VERSION = void 0;
7
+ /** Current schema version for CheckReport (.evalgate/last-report.json). Bump on breaking changes. */
8
+ exports.CHECK_REPORT_SCHEMA_VERSION = 1;
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Pure gate evaluation. No console output.
3
+ * Baseline missing → configuration failure (BAD_ARGS), not API_ERROR.
4
+ */
5
+ import type { QualityLatestData } from "./api";
6
+ import type { CheckArgs } from "./check";
7
+ export type GateResult = {
8
+ exitCode: number;
9
+ passed: boolean;
10
+ reasonCode: string;
11
+ reasonMessage: string | null;
12
+ /** true when gate was skipped (e.g. baseline missing + auto) */
13
+ gateSkipped?: boolean;
14
+ /** When policy failed: sub-check, remediation, snapshot for explain */
15
+ policyEvidence?: {
16
+ failedCheck: string;
17
+ remediation: string;
18
+ snapshot?: Record<string, unknown>;
19
+ };
20
+ };
21
+ export declare function evaluateGate(args: CheckArgs, quality: QualityLatestData): GateResult;