evalgate-sdk 3.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. evalgate_sdk/__init__.py +707 -0
  2. evalgate_sdk/_version.py +3 -0
  3. evalgate_sdk/assertions.py +1362 -0
  4. evalgate_sdk/auto.py +247 -0
  5. evalgate_sdk/batch.py +174 -0
  6. evalgate_sdk/cache.py +111 -0
  7. evalgate_sdk/ci_context.py +123 -0
  8. evalgate_sdk/cli/__init__.py +111 -0
  9. evalgate_sdk/cli/api.py +261 -0
  10. evalgate_sdk/cli/cli_constants.py +20 -0
  11. evalgate_sdk/cli/commands.py +1041 -0
  12. evalgate_sdk/cli/config.py +228 -0
  13. evalgate_sdk/cli/env.py +43 -0
  14. evalgate_sdk/cli/formatters/types.py +132 -0
  15. evalgate_sdk/cli/golden_commands.py +322 -0
  16. evalgate_sdk/cli/manifest.py +301 -0
  17. evalgate_sdk/cli/new_commands.py +435 -0
  18. evalgate_sdk/cli/policy_packs.py +103 -0
  19. evalgate_sdk/cli/profiles.py +12 -0
  20. evalgate_sdk/cli/regression_gate.py +312 -0
  21. evalgate_sdk/cli/render/__init__.py +1 -0
  22. evalgate_sdk/cli/render/snippet.py +18 -0
  23. evalgate_sdk/cli/render/sort.py +29 -0
  24. evalgate_sdk/cli/report/__init__.py +1 -0
  25. evalgate_sdk/cli/report/build_check_report.py +209 -0
  26. evalgate_sdk/cli/traces.py +186 -0
  27. evalgate_sdk/cli/workspace.py +63 -0
  28. evalgate_sdk/client.py +609 -0
  29. evalgate_sdk/cluster.py +359 -0
  30. evalgate_sdk/collector.py +161 -0
  31. evalgate_sdk/constants.py +6 -0
  32. evalgate_sdk/context.py +151 -0
  33. evalgate_sdk/errors.py +236 -0
  34. evalgate_sdk/export.py +238 -0
  35. evalgate_sdk/formatters/__init__.py +11 -0
  36. evalgate_sdk/formatters/github.py +51 -0
  37. evalgate_sdk/formatters/human.py +68 -0
  38. evalgate_sdk/formatters/json_fmt.py +11 -0
  39. evalgate_sdk/formatters/pr_comment.py +80 -0
  40. evalgate_sdk/golden.py +426 -0
  41. evalgate_sdk/integrations/__init__.py +1 -0
  42. evalgate_sdk/integrations/anthropic.py +99 -0
  43. evalgate_sdk/integrations/autogen.py +62 -0
  44. evalgate_sdk/integrations/crewai.py +61 -0
  45. evalgate_sdk/integrations/langchain.py +100 -0
  46. evalgate_sdk/integrations/openai.py +155 -0
  47. evalgate_sdk/integrations/openai_eval.py +221 -0
  48. evalgate_sdk/local.py +144 -0
  49. evalgate_sdk/logger.py +123 -0
  50. evalgate_sdk/matchers.py +62 -0
  51. evalgate_sdk/otel.py +256 -0
  52. evalgate_sdk/pagination.py +145 -0
  53. evalgate_sdk/py.typed +0 -0
  54. evalgate_sdk/pytest_plugin.py +96 -0
  55. evalgate_sdk/reason_codes.py +103 -0
  56. evalgate_sdk/regression.py +196 -0
  57. evalgate_sdk/replay_decision.py +115 -0
  58. evalgate_sdk/runtime/__init__.py +50 -0
  59. evalgate_sdk/runtime/adapters/__init__.py +1 -0
  60. evalgate_sdk/runtime/adapters/config_to_dsl.py +270 -0
  61. evalgate_sdk/runtime/adapters/testsuite_to_dsl.py +213 -0
  62. evalgate_sdk/runtime/context.py +68 -0
  63. evalgate_sdk/runtime/eval.py +318 -0
  64. evalgate_sdk/runtime/execution_mode.py +170 -0
  65. evalgate_sdk/runtime/executor.py +92 -0
  66. evalgate_sdk/runtime/registry.py +125 -0
  67. evalgate_sdk/runtime/run_report.py +249 -0
  68. evalgate_sdk/runtime/types.py +143 -0
  69. evalgate_sdk/snapshot.py +219 -0
  70. evalgate_sdk/streaming.py +124 -0
  71. evalgate_sdk/synthesize.py +226 -0
  72. evalgate_sdk/testing.py +128 -0
  73. evalgate_sdk/types.py +666 -0
  74. evalgate_sdk/utils/__init__.py +1 -0
  75. evalgate_sdk/utils/input_hash.py +42 -0
  76. evalgate_sdk/workflows.py +264 -0
  77. evalgate_sdk-3.3.1.dist-info/METADATA +608 -0
  78. evalgate_sdk-3.3.1.dist-info/RECORD +80 -0
  79. evalgate_sdk-3.3.1.dist-info/WHEEL +4 -0
  80. evalgate_sdk-3.3.1.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,312 @@
1
+ """evalgate gate — Run the regression gate.
2
+
3
+ Two modes:
4
+ 1. Project mode: delegates to eval:regression-gate script (full gate)
5
+ 2. Built-in mode: runs tests, compares against baseline
6
+
7
+ Port of ``cli/regression-gate.ts``.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import os
14
+ import re
15
+ import subprocess
16
+ import time
17
+ from dataclasses import dataclass, field
18
+ from datetime import datetime, timezone
19
+ from pathlib import Path
20
+ from typing import Any, Literal
21
+
22
+
23
+ @dataclass
24
+ class GateArgs:
25
+ format: Literal["human", "json", "github"] = "human"
26
+
27
+
28
+ @dataclass
29
+ class BuiltinReport:
30
+ schema_version: int = 1
31
+ timestamp: str = ""
32
+ exit_code: int = 0
33
+ category: str = "pass"
34
+ passed: bool = True
35
+ failures: list[str] = field(default_factory=list)
36
+ deltas: list[dict[str, Any]] = field(default_factory=list)
37
+ baseline: dict[str, str] | None = None
38
+ duration_ms: int = 0
39
+ command: str = ""
40
+ runner: str = ""
41
+
42
+ def to_dict(self) -> dict[str, Any]:
43
+ return {
44
+ "schemaVersion": self.schema_version,
45
+ "timestamp": self.timestamp,
46
+ "exitCode": self.exit_code,
47
+ "category": self.category,
48
+ "passed": self.passed,
49
+ "failures": self.failures,
50
+ "deltas": self.deltas,
51
+ "baseline": self.baseline,
52
+ "durationMs": self.duration_ms,
53
+ "command": self.command,
54
+ "runner": self.runner,
55
+ }
56
+
57
+
58
+ def parse_gate_args(argv: list[str]) -> GateArgs:
59
+ args = GateArgs()
60
+ i = 0
61
+ while i < len(argv):
62
+ if argv[i] == "--format" and i + 1 < len(argv):
63
+ fmt = argv[i + 1]
64
+ if fmt in ("human", "json", "github"):
65
+ args.format = fmt # type: ignore[assignment]
66
+ i += 2
67
+ else:
68
+ i += 1
69
+ return args
70
+
71
+
72
+ def _detect_test_runner(cwd: str) -> str:
73
+ """Detect Python test runner used in the project."""
74
+ pyproject = os.path.join(cwd, "pyproject.toml")
75
+ if os.path.isfile(pyproject):
76
+ try:
77
+ text = Path(pyproject).read_text(encoding="utf-8")
78
+ if "pytest" in text:
79
+ return "pytest"
80
+ if "unittest" in text:
81
+ return "unittest"
82
+ except OSError:
83
+ pass
84
+
85
+ if os.path.isfile(os.path.join(cwd, "pytest.ini")) or os.path.isfile(os.path.join(cwd, "setup.cfg")):
86
+ return "pytest"
87
+
88
+ return "pytest"
89
+
90
+
91
+ def _detect_test_command(cwd: str) -> str:
92
+ """Detect the test command to run."""
93
+ runner = _detect_test_runner(cwd)
94
+ if runner == "pytest":
95
+ return "python -m pytest"
96
+ return "python -m unittest discover"
97
+
98
+
99
+ def run_builtin_gate(cwd: str) -> BuiltinReport:
100
+ """Run the built-in lightweight gate."""
101
+ t0 = time.time()
102
+ now = datetime.now(timezone.utc).isoformat()
103
+ command = _detect_test_command(cwd)
104
+ runner = _detect_test_runner(cwd)
105
+ baseline_path = os.path.join(cwd, "evals", "baseline.json")
106
+
107
+ if not os.path.isfile(baseline_path):
108
+ return BuiltinReport(
109
+ timestamp=now,
110
+ exit_code=2,
111
+ category="infra_error",
112
+ passed=False,
113
+ failures=["Baseline file not found. Run: evalgate init"],
114
+ duration_ms=int((time.time() - t0) * 1000),
115
+ command=command,
116
+ runner=runner,
117
+ )
118
+
119
+ try:
120
+ baseline_data = json.loads(Path(baseline_path).read_text(encoding="utf-8"))
121
+ except (OSError, json.JSONDecodeError):
122
+ return BuiltinReport(
123
+ timestamp=now,
124
+ exit_code=2,
125
+ category="infra_error",
126
+ passed=False,
127
+ failures=["Failed to parse evals/baseline.json"],
128
+ duration_ms=int((time.time() - t0) * 1000),
129
+ command=command,
130
+ runner=runner,
131
+ )
132
+
133
+ baseline_meta = None
134
+ if baseline_data.get("updatedAt"):
135
+ baseline_meta = {
136
+ "updatedAt": baseline_data["updatedAt"],
137
+ "updatedBy": baseline_data.get("updatedBy", "unknown"),
138
+ }
139
+
140
+ # Run tests
141
+ try:
142
+ result = subprocess.run(
143
+ command.split(),
144
+ cwd=cwd,
145
+ capture_output=True,
146
+ text=True,
147
+ timeout=300,
148
+ )
149
+ except subprocess.TimeoutExpired:
150
+ return BuiltinReport(
151
+ timestamp=now,
152
+ exit_code=2,
153
+ category="infra_error",
154
+ passed=False,
155
+ failures=["Test command timed out after 300s"],
156
+ duration_ms=int((time.time() - t0) * 1000),
157
+ command=command,
158
+ runner=runner,
159
+ )
160
+ except OSError as exc:
161
+ return BuiltinReport(
162
+ timestamp=now,
163
+ exit_code=2,
164
+ category="infra_error",
165
+ passed=False,
166
+ failures=[f"Failed to run test command: {exc}"],
167
+ duration_ms=int((time.time() - t0) * 1000),
168
+ command=command,
169
+ runner=runner,
170
+ )
171
+
172
+ tests_passed = result.returncode == 0
173
+ output = (result.stdout or "") + (result.stderr or "")
174
+
175
+ # Extract test count
176
+ test_count = 0
177
+ count_match = (
178
+ re.search(r"(\d+)\s+(?:tests?|specs?)\s+(?:passed|completed)", output, re.I)
179
+ or re.search(r"(\d+)\s+passed", output, re.I)
180
+ or re.search(r"(\d+)\s+passing", output, re.I)
181
+ )
182
+ if count_match:
183
+ test_count = int(count_match.group(1))
184
+
185
+ baseline_passed = baseline_data.get("confidenceTests", {}).get("passed", True)
186
+ baseline_total = baseline_data.get("confidenceTests", {}).get("total", 0)
187
+
188
+ failures: list[str] = []
189
+ deltas: list[dict[str, Any]] = []
190
+
191
+ deltas.append(
192
+ {
193
+ "metric": "tests_passing",
194
+ "baseline": baseline_passed,
195
+ "current": tests_passed,
196
+ "delta": "0" if tests_passed == baseline_passed else ("+1" if tests_passed else "-1"),
197
+ "status": "pass" if tests_passed else "fail",
198
+ }
199
+ )
200
+
201
+ if not tests_passed and baseline_passed:
202
+ failures.append("Tests were passing in baseline but are now failing")
203
+
204
+ if test_count > 0 or baseline_total > 0:
205
+ count_delta = test_count - baseline_total
206
+ deltas.append(
207
+ {
208
+ "metric": "test_count",
209
+ "baseline": baseline_total,
210
+ "current": test_count,
211
+ "delta": f"+{count_delta}" if count_delta >= 0 else str(count_delta),
212
+ "status": "pass" if test_count >= baseline_total else "fail",
213
+ }
214
+ )
215
+ if test_count < baseline_total:
216
+ failures.append(f"Test count dropped from {baseline_total} to {test_count} ({count_delta})")
217
+
218
+ has_regression = len(failures) > 0
219
+
220
+ return BuiltinReport(
221
+ timestamp=now,
222
+ exit_code=1 if has_regression else 0,
223
+ category="regression" if has_regression else "pass",
224
+ passed=not has_regression,
225
+ failures=failures,
226
+ deltas=deltas,
227
+ baseline=baseline_meta,
228
+ duration_ms=int((time.time() - t0) * 1000),
229
+ command=command,
230
+ runner=runner,
231
+ )
232
+
233
+
234
+ def format_human(report: BuiltinReport) -> str:
235
+ """Format report for human consumption."""
236
+ icon = "✅" if report.passed else "❌"
237
+ lines = [f"\n{icon} EvalGate Gate: {report.category.upper()}\n"]
238
+
239
+ if report.deltas:
240
+
241
+ def pad(s, n):
242
+ return str(s).ljust(n)
243
+
244
+ lines.append(f" {pad('Metric', 16)} {pad('Baseline', 10)} {pad('Current', 10)} {pad('Delta', 8)} Status")
245
+ lines.append(f" {'-' * 16} {'-' * 10} {'-' * 10} {'-' * 8} ------")
246
+ for d in report.deltas:
247
+ si = "✔" if d["status"] == "pass" else "✖"
248
+ lines.append(
249
+ f" {pad(d['metric'], 16)} {pad(d['baseline'], 10)} {pad(d['current'], 10)} {pad(d['delta'], 8)} {si}"
250
+ )
251
+
252
+ if report.failures:
253
+ lines.append("\n Failures:")
254
+ for f in report.failures:
255
+ lines.append(f" • {f}")
256
+ lines.append("")
257
+
258
+ return "\n".join(lines)
259
+
260
+
261
+ def format_github(report: BuiltinReport) -> str:
262
+ """Format report as GitHub markdown."""
263
+ icon = "✅" if report.passed else "❌"
264
+ lines = [
265
+ f"## {icon} EvalGate Gate: {report.category}",
266
+ "",
267
+ "| Metric | Baseline | Current | Delta | Status |",
268
+ "|--------|----------|---------|-------|--------|",
269
+ ]
270
+ for d in report.deltas:
271
+ si = "✅" if d["status"] == "pass" else "❌"
272
+ lines.append(f"| {d['metric']} | {d['baseline']} | {d['current']} | {d['delta']} | {si} |")
273
+
274
+ if report.failures:
275
+ lines.extend(["", "### Failures", ""])
276
+ for f in report.failures:
277
+ lines.append(f"- {f}")
278
+
279
+ lines.append(f"\nSchema version: {report.schema_version}")
280
+ return "\n".join(lines)
281
+
282
+
283
+ def run_gate(argv: list[str] | None = None) -> int:
284
+ """Main gate entry point. Returns exit code."""
285
+ cwd = os.getcwd()
286
+ args = parse_gate_args(argv or [])
287
+ report = run_builtin_gate(cwd)
288
+
289
+ # Write report artifact
290
+ evals_dir = os.path.join(cwd, "evals")
291
+ os.makedirs(evals_dir, exist_ok=True)
292
+ Path(os.path.join(cwd, "evals", "regression-report.json")).write_text(
293
+ json.dumps(report.to_dict(), indent=2) + "\n",
294
+ encoding="utf-8",
295
+ )
296
+
297
+ if args.format == "json":
298
+ print(json.dumps(report.to_dict(), indent=2))
299
+ elif args.format == "github":
300
+ md = format_github(report)
301
+ summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
302
+ if summary_path:
303
+ try:
304
+ with open(summary_path, "a") as f:
305
+ f.write(md + "\n")
306
+ except OSError:
307
+ pass
308
+ print(md)
309
+ else:
310
+ print(format_human(report))
311
+
312
+ return report.exit_code
@@ -0,0 +1 @@
1
+ """Render utilities for CLI output."""
@@ -0,0 +1,18 @@
1
+ """Truncate a string for deterministic output.
2
+
3
+ Port of ``cli/render/snippet.ts``.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import re
9
+
10
+
11
+ def truncate_snippet(s: str | None, max_len: int = 140) -> str:
12
+ """Replaces newlines with space, caps length."""
13
+ if s is None:
14
+ return ""
15
+ normalized = re.sub(r"\s+", " ", s).strip()
16
+ if len(normalized) <= max_len:
17
+ return normalized
18
+ return normalized[:max_len] + "…"
@@ -0,0 +1,29 @@
1
+ """Deterministic ordering for failed cases.
2
+
3
+ Sort by status severity (failed > error > skipped > passed), then by test_case_id asc.
4
+
5
+ Port of ``cli/render/sort.ts``.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+ STATUS_SEVERITY: dict[str, int] = {
13
+ "failed": 0,
14
+ "error": 1,
15
+ "skipped": 2,
16
+ "passed": 3,
17
+ }
18
+
19
+
20
+ def sort_failed_cases(cases: list[dict[str, Any]]) -> list[dict[str, Any]]:
21
+ """Sort cases by status severity then test_case_id."""
22
+
23
+ def sort_key(c: dict[str, Any]) -> tuple[int, int]:
24
+ status = (c.get("status") or "").lower()
25
+ sev = STATUS_SEVERITY.get(status, 4)
26
+ tid = c.get("test_case_id") or c.get("testCaseId") or 0
27
+ return (sev, tid)
28
+
29
+ return sorted(cases, key=sort_key)
@@ -0,0 +1 @@
1
+ """Report building utilities for CLI."""
@@ -0,0 +1,209 @@
1
+ """Build CheckReport from API data and gate result.
2
+
3
+ Normalizes failed cases (truncate, sort), dashboard URL, top N + more.
4
+
5
+ Port of ``cli/report/build-check-report.ts``.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+ from evalgate_sdk.cli.formatters.types import (
13
+ CHECK_REPORT_SCHEMA_VERSION,
14
+ CheckReport,
15
+ FailedCase,
16
+ GateThresholds,
17
+ ScoreBreakdown01,
18
+ ScoreContribPts,
19
+ )
20
+ from evalgate_sdk.cli.render.snippet import truncate_snippet
21
+ from evalgate_sdk.cli.render.sort import sort_failed_cases
22
+
23
+ TOP_N = 3
24
+ SNIPPET_MAX = 50
25
+
26
+
27
+ def compute_contrib_pts(b: ScoreBreakdown01) -> ScoreContribPts:
28
+ """ContribPts from weights: passRate*50, safety*25, (0.6*judge+0.4*schema)*15, (0.6*latency+0.4*cost)*10."""
29
+ pr = b.pass_rate or 0
30
+ s = b.safety or 0
31
+ j = b.judge or 0
32
+ sc = b.schema or 0
33
+ lat = b.latency or 0
34
+ c = b.cost or 0
35
+ return ScoreContribPts(
36
+ pass_rate_pts=round(pr * 50 * 10) / 10,
37
+ safety_pts=round(s * 25 * 10) / 10,
38
+ compliance_pts=round((0.6 * j + 0.4 * sc) * 15 * 10) / 10,
39
+ performance_pts=round((0.6 * lat + 0.4 * c) * 10 * 10) / 10,
40
+ )
41
+
42
+
43
+ def build_check_report(
44
+ evaluation_id: str,
45
+ quality: dict[str, Any],
46
+ gate_result: dict[str, Any],
47
+ base_url: str = "",
48
+ run_details: dict[str, Any] | None = None,
49
+ request_id: str | None = None,
50
+ share_url: str | None = None,
51
+ baseline_run_id: int | None = None,
52
+ ci_run_url: str | None = None,
53
+ explain: bool = False,
54
+ policy: str | None = None,
55
+ min_score: float | None = None,
56
+ max_drop: float | None = None,
57
+ warn_drop: float | None = None,
58
+ min_n: int | None = None,
59
+ allow_weak_evidence: bool | None = None,
60
+ baseline: str | None = None,
61
+ max_cost_usd: float | None = None,
62
+ max_latency_ms: float | None = None,
63
+ max_cost_delta_usd: float | None = None,
64
+ ) -> CheckReport:
65
+ """Build a CheckReport from API data and gate result."""
66
+ score = quality.get("score", 0)
67
+ total = quality.get("total")
68
+ baseline_score = quality.get("baselineScore") or quality.get("baseline_score")
69
+ regression_delta = quality.get("regressionDelta") or quality.get("regression_delta")
70
+ evaluation_run_id = quality.get("evaluationRunId") or quality.get("evaluation_run_id")
71
+ breakdown = quality.get("breakdown", {})
72
+ flags = sorted(quality.get("flags", []))
73
+
74
+ dashboard_url = None
75
+ if evaluation_run_id is not None:
76
+ clean_base = base_url.rstrip("/")
77
+ dashboard_url = f"{clean_base}/evaluations/{evaluation_id}/runs/{evaluation_run_id}"
78
+
79
+ # Build failed cases from run details
80
+ failed_cases: list[FailedCase] = []
81
+ if run_details and run_details.get("results") and evaluation_run_id is not None:
82
+ raw = []
83
+ for r in run_details["results"]:
84
+ if r.get("status") == "failed":
85
+ tc = r.get("test_cases", {})
86
+ raw.append(
87
+ {
88
+ "test_case_id": r.get("testCaseId") or r.get("test_case_id"),
89
+ "status": "failed",
90
+ "name": tc.get("name"),
91
+ "input": tc.get("input"),
92
+ "expected_output": tc.get("expectedOutput") or tc.get("expected_output"),
93
+ "output": r.get("output"),
94
+ }
95
+ )
96
+
97
+ sorted_raw = sort_failed_cases(raw)
98
+ for fc in sorted_raw:
99
+ failed_cases.append(
100
+ FailedCase(
101
+ test_case_id=fc.get("test_case_id"),
102
+ status="failed",
103
+ name=fc.get("name"),
104
+ input=fc.get("input"),
105
+ input_snippet=truncate_snippet(fc.get("input"), SNIPPET_MAX),
106
+ expected_output=fc.get("expected_output"),
107
+ expected_snippet=truncate_snippet(fc.get("expected_output"), SNIPPET_MAX),
108
+ output=fc.get("output"),
109
+ output_snippet=truncate_snippet(fc.get("output"), SNIPPET_MAX),
110
+ )
111
+ )
112
+
113
+ failed_cases_shown = min(len(failed_cases), TOP_N) if failed_cases else None
114
+ failed_cases_more = (len(failed_cases) - TOP_N) if len(failed_cases) > TOP_N else None
115
+
116
+ gate_skipped = gate_result.get("gate_skipped", gate_result.get("gateSkipped", False))
117
+ gate_applied = not gate_skipped
118
+ gate_mode = "neutral" if gate_skipped else "enforced"
119
+ reason_code = gate_result.get("reason_code", gate_result.get("reasonCode", "UNKNOWN"))
120
+
121
+ if reason_code == "WARN_REGRESSION":
122
+ verdict = "warn"
123
+ elif gate_result.get("passed"):
124
+ verdict = "pass"
125
+ else:
126
+ verdict = "fail"
127
+
128
+ actionable_message = None
129
+ if gate_skipped:
130
+ actionable_message = (
131
+ "Gate not applied: baseline missing. Publish a baseline from the dashboard, "
132
+ "or run with --baseline previous once you have runs."
133
+ )
134
+ else:
135
+ actionable_message = gate_result.get("reason_message") or gate_result.get("reasonMessage")
136
+
137
+ breakdown_01 = None
138
+ if breakdown:
139
+ breakdown_01 = ScoreBreakdown01(
140
+ pass_rate=breakdown.get("passRate"),
141
+ safety=breakdown.get("safety"),
142
+ judge=breakdown.get("judge"),
143
+ schema=breakdown.get("schema"),
144
+ latency=breakdown.get("latency"),
145
+ cost=breakdown.get("cost"),
146
+ )
147
+
148
+ contrib_pts = None
149
+ if explain and breakdown_01:
150
+ contrib_pts = compute_contrib_pts(breakdown_01)
151
+
152
+ thresholds = GateThresholds(
153
+ min_score=min_score,
154
+ max_drop=max_drop,
155
+ warn_drop=warn_drop,
156
+ min_n=min_n,
157
+ allow_weak_evidence=allow_weak_evidence,
158
+ baseline=baseline,
159
+ max_cost_usd=max_cost_usd,
160
+ max_latency_ms=max_latency_ms,
161
+ max_cost_delta_usd=max_cost_delta_usd,
162
+ )
163
+
164
+ policy_evidence = None
165
+ if explain and gate_result.get("policy_evidence", gate_result.get("policyEvidence")):
166
+ pe = gate_result.get("policy_evidence") or gate_result.get("policyEvidence")
167
+ policy_evidence = {
168
+ "failedCheck": pe.get("failed_check") or pe.get("failedCheck"),
169
+ "remediation": pe.get("remediation"),
170
+ "snapshot": pe.get("snapshot"),
171
+ }
172
+
173
+ return CheckReport(
174
+ schema_version=CHECK_REPORT_SCHEMA_VERSION,
175
+ evaluation_id=evaluation_id,
176
+ run_id=evaluation_run_id,
177
+ verdict=verdict,
178
+ gate_applied=gate_applied,
179
+ gate_mode=gate_mode,
180
+ actionable_message=actionable_message,
181
+ share_url=share_url,
182
+ policy=policy,
183
+ baseline_run_id=baseline_run_id or quality.get("baselineRunId") or quality.get("baseline_run_id"),
184
+ ci_run_url=ci_run_url,
185
+ reason_code=reason_code,
186
+ reason_message=gate_result.get("reason_message") or gate_result.get("reasonMessage"),
187
+ score=score,
188
+ baseline_score=baseline_score,
189
+ delta=regression_delta,
190
+ n=total,
191
+ evidence_level=quality.get("evidenceLevel") or quality.get("evidence_level"),
192
+ baseline_missing=quality.get("baselineMissing") or quality.get("baseline_missing"),
193
+ baseline_status=(
194
+ "missing"
195
+ if quality.get("baselineMissing") or quality.get("baseline_missing")
196
+ else ("found" if baseline_score is not None else None)
197
+ ),
198
+ flags=flags if flags else None,
199
+ breakdown_01=breakdown_01,
200
+ contrib_pts=contrib_pts,
201
+ thresholds=thresholds,
202
+ dashboard_url=dashboard_url,
203
+ failed_cases=failed_cases,
204
+ failed_cases_shown=failed_cases_shown,
205
+ failed_cases_more=failed_cases_more if failed_cases_more and failed_cases_more > 0 else None,
206
+ request_id=request_id,
207
+ explain=explain if explain else None,
208
+ policy_evidence=policy_evidence,
209
+ )