invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +4 -4
- invarlock/adapters/__init__.py +10 -14
- invarlock/adapters/auto.py +37 -50
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_loading.py +7 -7
- invarlock/adapters/hf_mixin.py +53 -9
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/assurance/__init__.py +15 -23
- invarlock/cli/adapter_auto.py +32 -26
- invarlock/cli/app.py +128 -27
- invarlock/cli/commands/__init__.py +2 -2
- invarlock/cli/commands/calibrate.py +48 -4
- invarlock/cli/commands/doctor.py +8 -10
- invarlock/cli/commands/evaluate.py +986 -0
- invarlock/cli/commands/explain_gates.py +25 -17
- invarlock/cli/commands/export_html.py +11 -9
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +326 -92
- invarlock/cli/commands/run.py +1160 -228
- invarlock/cli/commands/verify.py +157 -97
- invarlock/cli/config.py +1 -1
- invarlock/cli/determinism.py +1 -1
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +4 -4
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/registry.py +9 -11
- invarlock/core/retry.py +14 -14
- invarlock/core/runner.py +112 -26
- invarlock/edits/noop.py +2 -2
- invarlock/edits/quant_rtn.py +67 -39
- invarlock/eval/__init__.py +1 -1
- invarlock/eval/bench.py +14 -10
- invarlock/eval/data.py +68 -23
- invarlock/eval/metrics.py +59 -1
- invarlock/eval/primary_metric.py +1 -1
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/rmt.py +2 -2
- invarlock/guards/spectral.py +1 -1
- invarlock/guards/variance.py +2 -2
- invarlock/model_profile.py +64 -62
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/plugins/hf_bnb_adapter.py +32 -21
- invarlock/reporting/__init__.py +18 -4
- invarlock/reporting/guards_analysis.py +154 -4
- invarlock/reporting/html.py +61 -11
- invarlock/reporting/normalizer.py +9 -2
- invarlock/reporting/policy_utils.py +1 -1
- invarlock/reporting/primary_metric_utils.py +11 -11
- invarlock/reporting/render.py +876 -510
- invarlock/reporting/report.py +72 -30
- invarlock/reporting/{certificate.py → report_builder.py} +252 -99
- invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
- invarlock/reporting/report_types.py +6 -1
- invarlock/reporting/telemetry.py +86 -0
- invarlock-0.3.8.dist-info/METADATA +283 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- invarlock/cli/commands/certify.py +0 -422
- invarlock-0.3.6.dist-info/METADATA +0 -588
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
invarlock/cli/commands/report.py
CHANGED
|
@@ -8,20 +8,101 @@ Provides the `invarlock report` group with:
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import json
|
|
11
|
+
import math
|
|
11
12
|
from pathlib import Path
|
|
13
|
+
from time import perf_counter
|
|
14
|
+
from typing import Any
|
|
12
15
|
|
|
13
16
|
import typer
|
|
14
17
|
from rich.console import Console
|
|
15
18
|
|
|
16
|
-
from invarlock.
|
|
19
|
+
from invarlock.cli.output import print_event, resolve_output_style
|
|
17
20
|
from invarlock.reporting import report as report_lib
|
|
21
|
+
from invarlock.reporting import report_builder as report_builder
|
|
18
22
|
|
|
19
23
|
console = Console()
|
|
20
24
|
|
|
25
|
+
SECTION_WIDTH = 67
|
|
26
|
+
KV_LABEL_WIDTH = 16
|
|
27
|
+
GATE_LABEL_WIDTH = 32
|
|
28
|
+
ARTIFACT_LABEL_WIDTH = 18
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _format_section_title(title: str, *, suffix: str | None = None) -> str:
|
|
32
|
+
if not suffix:
|
|
33
|
+
return title
|
|
34
|
+
combined = f"{title} {suffix}"
|
|
35
|
+
if len(combined) > SECTION_WIDTH:
|
|
36
|
+
return combined
|
|
37
|
+
pad = max(1, SECTION_WIDTH - len(title) - len(suffix))
|
|
38
|
+
return f"{title}{' ' * pad}{suffix}"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _print_section_header(
|
|
42
|
+
console: Console, title: str, *, suffix: str | None = None
|
|
43
|
+
) -> None:
|
|
44
|
+
bar = "═" * SECTION_WIDTH
|
|
45
|
+
console.print(bar)
|
|
46
|
+
console.print(_format_section_title(title, suffix=suffix))
|
|
47
|
+
console.print(bar)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _format_kv_line(label: str, value: str, *, width: int = KV_LABEL_WIDTH) -> str:
|
|
51
|
+
return f" {label:<{width}}: {value}"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _format_status(ok: bool) -> str:
|
|
55
|
+
return "PASS" if ok else "FAIL"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _fmt_metric_value(value: Any) -> str:
|
|
59
|
+
try:
|
|
60
|
+
val = float(value)
|
|
61
|
+
except (TypeError, ValueError):
|
|
62
|
+
return "N/A"
|
|
63
|
+
if not math.isfinite(val):
|
|
64
|
+
return "N/A"
|
|
65
|
+
return f"{val:.3f}"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _fmt_ci_95(ci: Any) -> str | None:
|
|
69
|
+
if isinstance(ci, (list, tuple)) and len(ci) == 2:
|
|
70
|
+
try:
|
|
71
|
+
lo = float(ci[0])
|
|
72
|
+
hi = float(ci[1])
|
|
73
|
+
except (TypeError, ValueError):
|
|
74
|
+
return None
|
|
75
|
+
if math.isfinite(lo) and math.isfinite(hi):
|
|
76
|
+
return f"[{lo:.3f}, {hi:.3f}]"
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _artifact_entries(
|
|
81
|
+
saved_files: dict[str, str], output_dir: str
|
|
82
|
+
) -> list[tuple[str, str]]:
|
|
83
|
+
order = [
|
|
84
|
+
("report", "Evaluation Report (JSON)"),
|
|
85
|
+
("report_md", "Evaluation Report (MD)"),
|
|
86
|
+
("json", "JSON"),
|
|
87
|
+
("markdown", "Markdown"),
|
|
88
|
+
("html", "HTML"),
|
|
89
|
+
]
|
|
90
|
+
entries: list[tuple[str, str]] = [("Output", output_dir)]
|
|
91
|
+
used: set[str] = set()
|
|
92
|
+
for key, label in order:
|
|
93
|
+
if key in saved_files:
|
|
94
|
+
entries.append((label, str(saved_files[key])))
|
|
95
|
+
used.add(key)
|
|
96
|
+
for key in sorted(saved_files.keys()):
|
|
97
|
+
if key in used:
|
|
98
|
+
continue
|
|
99
|
+
entries.append((key.upper(), str(saved_files[key])))
|
|
100
|
+
return entries
|
|
101
|
+
|
|
21
102
|
|
|
22
103
|
# Group with callback so `invarlock report` still generates reports
|
|
23
104
|
report_app = typer.Typer(
|
|
24
|
-
help="Operations on reports and
|
|
105
|
+
help="Operations on run reports and evaluation reports (verify, explain, html, validate).",
|
|
25
106
|
invoke_without_command=True,
|
|
26
107
|
)
|
|
27
108
|
|
|
@@ -33,6 +114,11 @@ def _generate_reports(
|
|
|
33
114
|
compare: str | None = None,
|
|
34
115
|
baseline: str | None = None,
|
|
35
116
|
output: str | None = None,
|
|
117
|
+
style: str = "audit",
|
|
118
|
+
no_color: bool = False,
|
|
119
|
+
summary_baseline_seconds: float | None = None,
|
|
120
|
+
summary_subject_seconds: float | None = None,
|
|
121
|
+
summary_report_start: float | None = None,
|
|
36
122
|
) -> None:
|
|
37
123
|
# This callback runs only when invoked without subcommand (default Click behavior)
|
|
38
124
|
try:
|
|
@@ -55,21 +141,37 @@ def _generate_reports(
|
|
|
55
141
|
compare = _coerce_option(compare)
|
|
56
142
|
baseline = _coerce_option(baseline)
|
|
57
143
|
output = _coerce_option(output)
|
|
144
|
+
style = _coerce_option(style, "audit")
|
|
145
|
+
no_color = bool(_coerce_option(no_color, False))
|
|
146
|
+
summary_baseline_seconds = _coerce_option(summary_baseline_seconds)
|
|
147
|
+
summary_subject_seconds = _coerce_option(summary_subject_seconds)
|
|
148
|
+
summary_report_start = _coerce_option(summary_report_start)
|
|
149
|
+
|
|
150
|
+
output_style = resolve_output_style(
|
|
151
|
+
style=str(style),
|
|
152
|
+
profile="ci",
|
|
153
|
+
progress=False,
|
|
154
|
+
timing=False,
|
|
155
|
+
no_color=no_color,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
def _event(tag: str, message: str, *, emoji: str | None = None) -> None:
|
|
159
|
+
print_event(console, tag, message, style=output_style, emoji=emoji)
|
|
58
160
|
|
|
59
161
|
# Load primary report
|
|
60
|
-
|
|
162
|
+
_event("DATA", f"Loading run report: {run}", emoji="📊")
|
|
61
163
|
primary_report = _load_run_report(run)
|
|
62
164
|
|
|
63
165
|
# Load comparison report if specified
|
|
64
166
|
compare_report = None
|
|
65
167
|
if compare:
|
|
66
|
-
|
|
168
|
+
_event("DATA", f"Loading comparison report: {compare}", emoji="📊")
|
|
67
169
|
compare_report = _load_run_report(compare)
|
|
68
170
|
|
|
69
171
|
# Load baseline report if specified
|
|
70
172
|
baseline_report = None
|
|
71
173
|
if baseline:
|
|
72
|
-
|
|
174
|
+
_event("DATA", f"Loading baseline report: {baseline}", emoji="📊")
|
|
73
175
|
baseline_report = _load_run_report(baseline)
|
|
74
176
|
|
|
75
177
|
# Determine output directory
|
|
@@ -80,25 +182,39 @@ def _generate_reports(
|
|
|
80
182
|
output_dir = output
|
|
81
183
|
|
|
82
184
|
# Determine formats
|
|
185
|
+
allowed_formats = {"json", "md", "markdown", "html", "report", "all"}
|
|
186
|
+
if format not in allowed_formats:
|
|
187
|
+
_event("FAIL", f"Unknown --format '{format}'", emoji="❌")
|
|
188
|
+
raise typer.Exit(2)
|
|
189
|
+
|
|
190
|
+
if format == "md":
|
|
191
|
+
format = "markdown"
|
|
83
192
|
if format == "all":
|
|
84
193
|
formats = ["json", "markdown", "html"]
|
|
85
194
|
else:
|
|
86
195
|
formats = [format]
|
|
87
196
|
|
|
88
|
-
# Validate
|
|
89
|
-
if "
|
|
197
|
+
# Validate evaluation report requirements
|
|
198
|
+
if "report" in formats:
|
|
90
199
|
if baseline_report is None:
|
|
91
|
-
|
|
92
|
-
"
|
|
200
|
+
_event(
|
|
201
|
+
"FAIL",
|
|
202
|
+
"Evaluation report format requires --baseline",
|
|
203
|
+
emoji="❌",
|
|
93
204
|
)
|
|
94
|
-
|
|
95
|
-
"
|
|
205
|
+
_event(
|
|
206
|
+
"INFO",
|
|
207
|
+
"Use: invarlock report --run <run_dir> --format report --baseline <baseline_run_dir>",
|
|
96
208
|
)
|
|
97
209
|
raise typer.Exit(1)
|
|
98
|
-
|
|
210
|
+
_event(
|
|
211
|
+
"EXEC",
|
|
212
|
+
"Generating evaluation report with baseline comparison",
|
|
213
|
+
emoji="📜",
|
|
214
|
+
)
|
|
99
215
|
|
|
100
216
|
# Generate reports
|
|
101
|
-
|
|
217
|
+
_event("EXEC", f"Generating reports in formats: {formats}", emoji="📝")
|
|
102
218
|
saved_files = report_lib.save_report(
|
|
103
219
|
primary_report,
|
|
104
220
|
output_dir,
|
|
@@ -109,82 +225,151 @@ def _generate_reports(
|
|
|
109
225
|
)
|
|
110
226
|
|
|
111
227
|
# Show results
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
for fmt, file_path in saved_files.items():
|
|
116
|
-
if fmt == "cert":
|
|
117
|
-
console.print(f" 📜 CERTIFICATE (JSON): {file_path}")
|
|
118
|
-
elif fmt == "cert_md":
|
|
119
|
-
console.print(f" 📜 CERTIFICATE (MD): {file_path}")
|
|
120
|
-
else:
|
|
121
|
-
console.print(f" 📄 {fmt.upper()}: {file_path}")
|
|
122
|
-
|
|
123
|
-
# Show key metrics (PM-first). Avoid PPL-first wording.
|
|
124
|
-
console.print("\n📈 Key Metrics:")
|
|
125
|
-
console.print(f" Model: {primary_report['meta']['model_id']}")
|
|
126
|
-
console.print(f" Edit: {primary_report['edit']['name']}")
|
|
127
|
-
pm = (primary_report.get("metrics", {}) or {}).get("primary_metric", {})
|
|
128
|
-
if isinstance(pm, dict) and pm:
|
|
129
|
-
kind = str(pm.get("kind") or "primary")
|
|
130
|
-
console.print(f" Primary Metric: {kind}")
|
|
131
|
-
final = pm.get("final")
|
|
132
|
-
if isinstance(final, int | float):
|
|
133
|
-
console.print(f" point (final): {final:.3f}")
|
|
134
|
-
dci = pm.get("display_ci")
|
|
135
|
-
if isinstance(dci, tuple | list) and len(dci) == 2:
|
|
136
|
-
try:
|
|
137
|
-
lo, hi = float(dci[0]), float(dci[1])
|
|
138
|
-
console.print(f" CI: {lo:.3f}–{hi:.3f}")
|
|
139
|
-
except Exception:
|
|
140
|
-
pass
|
|
141
|
-
ratio = pm.get("ratio_vs_baseline")
|
|
142
|
-
if isinstance(ratio, int | float):
|
|
143
|
-
console.print(f" ratio vs baseline: {ratio:.3f}")
|
|
144
|
-
|
|
145
|
-
# Show certificate validation if generated
|
|
146
|
-
if "cert" in formats and baseline_report:
|
|
228
|
+
_event("PASS", "Reports generated successfully.", emoji="✅")
|
|
229
|
+
|
|
230
|
+
if "report" in formats and baseline_report:
|
|
147
231
|
try:
|
|
148
|
-
|
|
232
|
+
evaluation_report = report_builder.make_report(
|
|
149
233
|
primary_report, baseline_report
|
|
150
234
|
)
|
|
151
|
-
|
|
235
|
+
report_builder.validate_report(evaluation_report)
|
|
152
236
|
from invarlock.reporting.render import (
|
|
153
237
|
compute_console_validation_block as _console_block,
|
|
154
238
|
)
|
|
155
239
|
|
|
156
|
-
block = _console_block(
|
|
240
|
+
block = _console_block(evaluation_report)
|
|
157
241
|
overall_pass = bool(block.get("overall_pass"))
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
242
|
+
status_text = _format_status(overall_pass)
|
|
243
|
+
|
|
244
|
+
console.print("")
|
|
245
|
+
summary_suffix: str | None = None
|
|
246
|
+
if summary_report_start is not None:
|
|
247
|
+
try:
|
|
248
|
+
base = (
|
|
249
|
+
float(summary_baseline_seconds)
|
|
250
|
+
if summary_baseline_seconds is not None
|
|
251
|
+
else 0.0
|
|
252
|
+
)
|
|
253
|
+
subject = (
|
|
254
|
+
float(summary_subject_seconds)
|
|
255
|
+
if summary_subject_seconds is not None
|
|
256
|
+
else 0.0
|
|
257
|
+
)
|
|
258
|
+
report_elapsed = max(
|
|
259
|
+
0.0, float(perf_counter() - float(summary_report_start))
|
|
260
|
+
)
|
|
261
|
+
summary_suffix = f"[{(base + subject + report_elapsed):.2f}s]"
|
|
262
|
+
except Exception:
|
|
263
|
+
summary_suffix = None
|
|
264
|
+
_print_section_header(
|
|
265
|
+
console,
|
|
266
|
+
"EVALUATION REPORT SUMMARY",
|
|
267
|
+
suffix=summary_suffix,
|
|
163
268
|
)
|
|
269
|
+
console.print(_format_kv_line("Status", status_text))
|
|
270
|
+
|
|
271
|
+
schema_version = evaluation_report.get("schema_version")
|
|
272
|
+
if schema_version:
|
|
273
|
+
console.print(
|
|
274
|
+
_format_kv_line("Schema Version", str(schema_version))
|
|
275
|
+
)
|
|
164
276
|
|
|
277
|
+
run_id = evaluation_report.get("run_id") or (
|
|
278
|
+
(primary_report.get("meta", {}) or {}).get("run_id")
|
|
279
|
+
)
|
|
280
|
+
if run_id:
|
|
281
|
+
console.print(_format_kv_line("Run ID", str(run_id)))
|
|
282
|
+
|
|
283
|
+
model_id = (primary_report.get("meta", {}) or {}).get("model_id")
|
|
284
|
+
edit_name = (primary_report.get("edit", {}) or {}).get("name")
|
|
285
|
+
if model_id:
|
|
286
|
+
console.print(_format_kv_line("Model", str(model_id)))
|
|
287
|
+
if edit_name:
|
|
288
|
+
console.print(_format_kv_line("Edit", str(edit_name)))
|
|
289
|
+
|
|
290
|
+
pm = (
|
|
291
|
+
(evaluation_report.get("primary_metric") or {})
|
|
292
|
+
if isinstance(evaluation_report, dict)
|
|
293
|
+
else {}
|
|
294
|
+
)
|
|
295
|
+
if not pm:
|
|
296
|
+
pm = (primary_report.get("metrics", {}) or {}).get(
|
|
297
|
+
"primary_metric", {}
|
|
298
|
+
)
|
|
299
|
+
console.print(" PRIMARY METRIC")
|
|
300
|
+
pm_entries: list[tuple[str, str]] = []
|
|
301
|
+
if isinstance(pm, dict) and pm:
|
|
302
|
+
kind = str(pm.get("kind") or "primary")
|
|
303
|
+
pm_entries.append(("Kind", kind))
|
|
304
|
+
preview = pm.get("preview")
|
|
305
|
+
if preview is not None:
|
|
306
|
+
pm_entries.append(("Preview", _fmt_metric_value(preview)))
|
|
307
|
+
final = pm.get("final")
|
|
308
|
+
if final is not None:
|
|
309
|
+
pm_entries.append(("Final", _fmt_metric_value(final)))
|
|
310
|
+
ratio = pm.get("ratio_vs_baseline")
|
|
311
|
+
if ratio is not None:
|
|
312
|
+
pm_entries.append(("Ratio", _fmt_metric_value(ratio)))
|
|
313
|
+
dci = pm.get("display_ci")
|
|
314
|
+
ci_95 = _fmt_ci_95(dci)
|
|
315
|
+
if ci_95 is not None:
|
|
316
|
+
pm_entries.append(("CI (95%)", ci_95))
|
|
317
|
+
if not pm_entries:
|
|
318
|
+
pm_entries.append(("Status", "Unavailable"))
|
|
319
|
+
for idx, (label, value) in enumerate(pm_entries):
|
|
320
|
+
branch = "└─" if idx == len(pm_entries) - 1 else "├─"
|
|
321
|
+
console.print(f" {branch} {label:<14} {value}")
|
|
322
|
+
|
|
323
|
+
console.print(" VALIDATION GATES")
|
|
165
324
|
rows = block.get("rows", [])
|
|
166
325
|
if isinstance(rows, list) and rows:
|
|
167
|
-
for row in rows:
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
326
|
+
for idx, row in enumerate(rows):
|
|
327
|
+
label = str(row.get("label") or "Unknown")
|
|
328
|
+
ok = bool(row.get("ok"))
|
|
329
|
+
status = _format_status(ok)
|
|
330
|
+
mark = "✓" if ok else "✗"
|
|
331
|
+
branch = "└─" if idx == len(rows) - 1 else "├─"
|
|
332
|
+
console.print(
|
|
333
|
+
f" {branch} {label:<{GATE_LABEL_WIDTH}} {mark} {status}"
|
|
334
|
+
)
|
|
335
|
+
else:
|
|
336
|
+
console.print(f" └─ {'No validation rows':<{GATE_LABEL_WIDTH}} -")
|
|
337
|
+
|
|
338
|
+
console.print(" ARTIFACTS")
|
|
339
|
+
entries = _artifact_entries(saved_files, str(output_dir))
|
|
340
|
+
for idx, (label, value) in enumerate(entries):
|
|
341
|
+
branch = "└─" if idx == len(entries) - 1 else "├─"
|
|
342
|
+
console.print(f" {branch} {label:<{ARTIFACT_LABEL_WIDTH}} {value}")
|
|
343
|
+
console.print("═" * SECTION_WIDTH)
|
|
175
344
|
|
|
176
345
|
# In CLI report flow, do not hard-exit on validation failure; just display status.
|
|
177
346
|
# CI gating should be handled by dedicated verify commands.
|
|
178
347
|
|
|
179
348
|
except Exception as e:
|
|
349
|
+
_event("WARN", f"Evaluation report validation error: {e}", emoji="⚠️")
|
|
350
|
+
# Exit non-zero on evaluation report generation error
|
|
351
|
+
raise typer.Exit(1) from e
|
|
352
|
+
else:
|
|
353
|
+
console.print(_format_kv_line("Output", str(output_dir)))
|
|
354
|
+
for label, value in _artifact_entries(saved_files, str(output_dir))[1:]:
|
|
180
355
|
console.print(
|
|
181
|
-
|
|
356
|
+
_format_kv_line(label, str(value), width=ARTIFACT_LABEL_WIDTH)
|
|
182
357
|
)
|
|
183
|
-
# Exit non-zero on certificate generation error
|
|
184
|
-
raise typer.Exit(1) from e
|
|
185
358
|
|
|
186
359
|
except Exception as e:
|
|
187
|
-
|
|
360
|
+
print_event(
|
|
361
|
+
console,
|
|
362
|
+
"FAIL",
|
|
363
|
+
f"Report generation failed: {e}",
|
|
364
|
+
style=resolve_output_style(
|
|
365
|
+
style="audit",
|
|
366
|
+
profile="ci",
|
|
367
|
+
progress=False,
|
|
368
|
+
timing=False,
|
|
369
|
+
no_color=False,
|
|
370
|
+
),
|
|
371
|
+
emoji="❌",
|
|
372
|
+
)
|
|
188
373
|
raise typer.Exit(1) from e
|
|
189
374
|
|
|
190
375
|
|
|
@@ -195,7 +380,7 @@ def report_callback(
|
|
|
195
380
|
None, "--run", help="Path to run directory or RunReport JSON"
|
|
196
381
|
),
|
|
197
382
|
format: str = typer.Option(
|
|
198
|
-
"json", "--format", help="Output format (json|md|html|
|
|
383
|
+
"json", "--format", help="Output format (json|md|html|report|all)"
|
|
199
384
|
),
|
|
200
385
|
compare: str | None = typer.Option(
|
|
201
386
|
None, "--compare", help="Path to second run for comparison"
|
|
@@ -203,18 +388,40 @@ def report_callback(
|
|
|
203
388
|
baseline: str | None = typer.Option(
|
|
204
389
|
None,
|
|
205
390
|
"--baseline",
|
|
206
|
-
help="Path to baseline run for
|
|
391
|
+
help="Path to baseline run for evaluation report generation (required for report format)",
|
|
207
392
|
),
|
|
208
393
|
output: str | None = typer.Option(None, "--output", "-o", help="Output directory"),
|
|
394
|
+
style: str = typer.Option("audit", "--style", help="Output style (audit|friendly)"),
|
|
395
|
+
no_color: bool = typer.Option(
|
|
396
|
+
False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
|
|
397
|
+
),
|
|
209
398
|
):
|
|
210
399
|
"""Generate a report from a run (default callback)."""
|
|
211
400
|
if getattr(ctx, "resilient_parsing", False) or ctx.invoked_subcommand is not None:
|
|
212
401
|
return
|
|
213
402
|
if not run:
|
|
214
|
-
|
|
403
|
+
print_event(
|
|
404
|
+
console,
|
|
405
|
+
"FAIL",
|
|
406
|
+
"--run is required when no subcommand is provided",
|
|
407
|
+
style=resolve_output_style(
|
|
408
|
+
style=str(style),
|
|
409
|
+
profile="ci",
|
|
410
|
+
progress=False,
|
|
411
|
+
timing=False,
|
|
412
|
+
no_color=no_color,
|
|
413
|
+
),
|
|
414
|
+
emoji="❌",
|
|
415
|
+
)
|
|
215
416
|
raise typer.Exit(2)
|
|
216
417
|
return _generate_reports(
|
|
217
|
-
run=run,
|
|
418
|
+
run=run,
|
|
419
|
+
format=format,
|
|
420
|
+
compare=compare,
|
|
421
|
+
baseline=baseline,
|
|
422
|
+
output=output,
|
|
423
|
+
style=style,
|
|
424
|
+
no_color=no_color,
|
|
218
425
|
)
|
|
219
426
|
|
|
220
427
|
|
|
@@ -225,9 +432,23 @@ def report_command(
|
|
|
225
432
|
compare: str | None = None,
|
|
226
433
|
baseline: str | None = None,
|
|
227
434
|
output: str | None = None,
|
|
435
|
+
style: str = "audit",
|
|
436
|
+
no_color: bool = False,
|
|
437
|
+
summary_baseline_seconds: float | None = None,
|
|
438
|
+
summary_subject_seconds: float | None = None,
|
|
439
|
+
summary_report_start: float | None = None,
|
|
228
440
|
):
|
|
229
441
|
return _generate_reports(
|
|
230
|
-
run=run,
|
|
442
|
+
run=run,
|
|
443
|
+
format=format,
|
|
444
|
+
compare=compare,
|
|
445
|
+
baseline=baseline,
|
|
446
|
+
output=output,
|
|
447
|
+
style=style,
|
|
448
|
+
no_color=no_color,
|
|
449
|
+
summary_baseline_seconds=summary_baseline_seconds,
|
|
450
|
+
summary_subject_seconds=summary_subject_seconds,
|
|
451
|
+
summary_report_start=summary_report_start,
|
|
231
452
|
)
|
|
232
453
|
|
|
233
454
|
|
|
@@ -254,16 +475,16 @@ def _load_run_report(path: str) -> dict:
|
|
|
254
475
|
|
|
255
476
|
# Subcommands wired from existing modules
|
|
256
477
|
@report_app.command(
|
|
257
|
-
name="verify", help="Recompute and verify metrics for
|
|
478
|
+
name="verify", help="Recompute and verify metrics for evaluation reports."
|
|
258
479
|
)
|
|
259
480
|
def report_verify_command(
|
|
260
|
-
|
|
261
|
-
..., help="One or more
|
|
481
|
+
reports: list[str] = typer.Argument(
|
|
482
|
+
..., help="One or more evaluation report JSON files to verify."
|
|
262
483
|
),
|
|
263
484
|
baseline: str | None = typer.Option(
|
|
264
485
|
None,
|
|
265
486
|
"--baseline",
|
|
266
|
-
help="Optional baseline
|
|
487
|
+
help="Optional baseline evaluation report JSON to enforce provider parity.",
|
|
267
488
|
),
|
|
268
489
|
tolerance: float = typer.Option(
|
|
269
490
|
1e-9, "--tolerance", help="Tolerance for analysis-basis comparisons."
|
|
@@ -278,10 +499,10 @@ def report_verify_command(
|
|
|
278
499
|
|
|
279
500
|
from .verify import verify_command as _verify_command
|
|
280
501
|
|
|
281
|
-
|
|
502
|
+
report_paths = [_Path(p) for p in reports]
|
|
282
503
|
baseline_path = _Path(baseline) if isinstance(baseline, str) else None
|
|
283
504
|
return _verify_command(
|
|
284
|
-
|
|
505
|
+
reports=report_paths,
|
|
285
506
|
baseline=baseline_path,
|
|
286
507
|
tolerance=tolerance,
|
|
287
508
|
profile=profile,
|
|
@@ -289,7 +510,7 @@ def report_verify_command(
|
|
|
289
510
|
|
|
290
511
|
|
|
291
512
|
@report_app.command(
|
|
292
|
-
name="explain", help="Explain
|
|
513
|
+
name="explain", help="Explain evaluation report gates for report vs baseline."
|
|
293
514
|
)
|
|
294
515
|
def report_explain(
|
|
295
516
|
report: str = typer.Option(..., "--report", help="Path to primary report.json"),
|
|
@@ -297,15 +518,17 @@ def report_explain(
|
|
|
297
518
|
..., "--baseline", help="Path to baseline report.json"
|
|
298
519
|
),
|
|
299
520
|
): # pragma: no cover - thin wrapper
|
|
300
|
-
"""Explain
|
|
521
|
+
"""Explain evaluation report gates for a report vs baseline."""
|
|
301
522
|
from .explain_gates import explain_gates_command as _explain
|
|
302
523
|
|
|
303
524
|
return _explain(report=report, baseline=baseline)
|
|
304
525
|
|
|
305
526
|
|
|
306
|
-
@report_app.command(name="html", help="Render
|
|
527
|
+
@report_app.command(name="html", help="Render an evaluation report JSON to HTML.")
|
|
307
528
|
def report_html(
|
|
308
|
-
input: str = typer.Option(
|
|
529
|
+
input: str = typer.Option(
|
|
530
|
+
..., "--input", "-i", help="Path to evaluation report JSON"
|
|
531
|
+
),
|
|
309
532
|
output: str = typer.Option(..., "--output", "-o", help="Path to output HTML file"),
|
|
310
533
|
embed_css: bool = typer.Option(
|
|
311
534
|
True, "--embed-css/--no-embed-css", help="Inline a minimal static stylesheet"
|
|
@@ -322,32 +545,43 @@ def report_html(
|
|
|
322
545
|
@report_app.command("validate")
|
|
323
546
|
def report_validate(
|
|
324
547
|
report: str = typer.Argument(
|
|
325
|
-
..., help="Path to
|
|
548
|
+
..., help="Path to evaluation report JSON to validate against schema v1"
|
|
326
549
|
),
|
|
327
550
|
):
|
|
328
|
-
"""Validate
|
|
551
|
+
"""Validate an evaluation report JSON against the current schema (v1)."""
|
|
552
|
+
output_style = resolve_output_style(
|
|
553
|
+
style="audit",
|
|
554
|
+
profile="ci",
|
|
555
|
+
progress=False,
|
|
556
|
+
timing=False,
|
|
557
|
+
no_color=False,
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
def _event(tag: str, message: str, *, emoji: str | None = None) -> None:
|
|
561
|
+
print_event(console, tag, message, style=output_style, emoji=emoji)
|
|
562
|
+
|
|
329
563
|
p = Path(report)
|
|
330
564
|
try:
|
|
331
565
|
payload = json.loads(p.read_text(encoding="utf-8"))
|
|
332
566
|
except Exception as exc: # noqa: BLE001
|
|
333
|
-
|
|
567
|
+
_event("FAIL", f"Failed to read input JSON: {exc}", emoji="❌")
|
|
334
568
|
raise typer.Exit(1) from exc
|
|
335
569
|
|
|
336
570
|
try:
|
|
337
|
-
from invarlock.reporting.
|
|
571
|
+
from invarlock.reporting.report_builder import validate_report
|
|
338
572
|
|
|
339
|
-
ok =
|
|
573
|
+
ok = validate_report(payload)
|
|
340
574
|
if not ok:
|
|
341
|
-
|
|
575
|
+
_event("FAIL", "Evaluation report schema validation failed", emoji="❌")
|
|
342
576
|
raise typer.Exit(2)
|
|
343
|
-
|
|
577
|
+
_event("PASS", "Evaluation report schema is valid", emoji="✅")
|
|
344
578
|
except ValueError as exc:
|
|
345
|
-
|
|
579
|
+
_event("FAIL", f"Evaluation report validation error: {exc}", emoji="❌")
|
|
346
580
|
raise typer.Exit(2) from exc
|
|
347
581
|
except typer.Exit:
|
|
348
582
|
raise
|
|
349
583
|
except Exception as exc: # noqa: BLE001
|
|
350
|
-
|
|
584
|
+
_event("FAIL", f"Validation failed: {exc}", emoji="❌")
|
|
351
585
|
raise typer.Exit(1) from exc
|
|
352
586
|
|
|
353
587
|
|