invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. invarlock/__init__.py +4 -4
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +37 -50
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_loading.py +7 -7
  8. invarlock/adapters/hf_mixin.py +53 -9
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/assurance/__init__.py +15 -23
  12. invarlock/cli/adapter_auto.py +32 -26
  13. invarlock/cli/app.py +128 -27
  14. invarlock/cli/commands/__init__.py +2 -2
  15. invarlock/cli/commands/calibrate.py +48 -4
  16. invarlock/cli/commands/doctor.py +8 -10
  17. invarlock/cli/commands/evaluate.py +986 -0
  18. invarlock/cli/commands/explain_gates.py +25 -17
  19. invarlock/cli/commands/export_html.py +11 -9
  20. invarlock/cli/commands/plugins.py +13 -9
  21. invarlock/cli/commands/report.py +326 -92
  22. invarlock/cli/commands/run.py +1160 -228
  23. invarlock/cli/commands/verify.py +157 -97
  24. invarlock/cli/config.py +1 -1
  25. invarlock/cli/determinism.py +1 -1
  26. invarlock/cli/doctor_helpers.py +4 -5
  27. invarlock/cli/output.py +193 -0
  28. invarlock/cli/provenance.py +4 -4
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/registry.py +9 -11
  31. invarlock/core/retry.py +14 -14
  32. invarlock/core/runner.py +112 -26
  33. invarlock/edits/noop.py +2 -2
  34. invarlock/edits/quant_rtn.py +67 -39
  35. invarlock/eval/__init__.py +1 -1
  36. invarlock/eval/bench.py +14 -10
  37. invarlock/eval/data.py +68 -23
  38. invarlock/eval/metrics.py +59 -1
  39. invarlock/eval/primary_metric.py +1 -1
  40. invarlock/eval/tasks/__init__.py +12 -0
  41. invarlock/eval/tasks/classification.py +48 -0
  42. invarlock/eval/tasks/qa.py +36 -0
  43. invarlock/eval/tasks/text_generation.py +102 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/rmt.py +2 -2
  46. invarlock/guards/spectral.py +1 -1
  47. invarlock/guards/variance.py +2 -2
  48. invarlock/model_profile.py +64 -62
  49. invarlock/observability/health.py +6 -6
  50. invarlock/observability/metrics.py +108 -0
  51. invarlock/plugins/hf_bnb_adapter.py +32 -21
  52. invarlock/reporting/__init__.py +18 -4
  53. invarlock/reporting/guards_analysis.py +154 -4
  54. invarlock/reporting/html.py +61 -11
  55. invarlock/reporting/normalizer.py +9 -2
  56. invarlock/reporting/policy_utils.py +1 -1
  57. invarlock/reporting/primary_metric_utils.py +11 -11
  58. invarlock/reporting/render.py +876 -510
  59. invarlock/reporting/report.py +72 -30
  60. invarlock/reporting/{certificate.py → report_builder.py} +252 -99
  61. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  62. invarlock/reporting/report_types.py +6 -1
  63. invarlock/reporting/telemetry.py +86 -0
  64. invarlock-0.3.8.dist-info/METADATA +283 -0
  65. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
  66. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
  67. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
  68. invarlock/adapters/hf_gpt2.py +0 -404
  69. invarlock/adapters/hf_llama.py +0 -487
  70. invarlock/cli/commands/certify.py +0 -422
  71. invarlock-0.3.6.dist-info/METADATA +0 -588
  72. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
  73. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
@@ -8,20 +8,101 @@ Provides the `invarlock report` group with:
8
8
  """
9
9
 
10
10
  import json
11
+ import math
11
12
  from pathlib import Path
13
+ from time import perf_counter
14
+ from typing import Any
12
15
 
13
16
  import typer
14
17
  from rich.console import Console
15
18
 
16
- from invarlock.reporting import certificate as certificate_lib
19
+ from invarlock.cli.output import print_event, resolve_output_style
17
20
  from invarlock.reporting import report as report_lib
21
+ from invarlock.reporting import report_builder as report_builder
18
22
 
19
23
  console = Console()
20
24
 
25
+ SECTION_WIDTH = 67
26
+ KV_LABEL_WIDTH = 16
27
+ GATE_LABEL_WIDTH = 32
28
+ ARTIFACT_LABEL_WIDTH = 18
29
+
30
+
31
+ def _format_section_title(title: str, *, suffix: str | None = None) -> str:
32
+ if not suffix:
33
+ return title
34
+ combined = f"{title} {suffix}"
35
+ if len(combined) > SECTION_WIDTH:
36
+ return combined
37
+ pad = max(1, SECTION_WIDTH - len(title) - len(suffix))
38
+ return f"{title}{' ' * pad}{suffix}"
39
+
40
+
41
+ def _print_section_header(
42
+ console: Console, title: str, *, suffix: str | None = None
43
+ ) -> None:
44
+ bar = "═" * SECTION_WIDTH
45
+ console.print(bar)
46
+ console.print(_format_section_title(title, suffix=suffix))
47
+ console.print(bar)
48
+
49
+
50
+ def _format_kv_line(label: str, value: str, *, width: int = KV_LABEL_WIDTH) -> str:
51
+ return f" {label:<{width}}: {value}"
52
+
53
+
54
+ def _format_status(ok: bool) -> str:
55
+ return "PASS" if ok else "FAIL"
56
+
57
+
58
+ def _fmt_metric_value(value: Any) -> str:
59
+ try:
60
+ val = float(value)
61
+ except (TypeError, ValueError):
62
+ return "N/A"
63
+ if not math.isfinite(val):
64
+ return "N/A"
65
+ return f"{val:.3f}"
66
+
67
+
68
+ def _fmt_ci_95(ci: Any) -> str | None:
69
+ if isinstance(ci, (list, tuple)) and len(ci) == 2:
70
+ try:
71
+ lo = float(ci[0])
72
+ hi = float(ci[1])
73
+ except (TypeError, ValueError):
74
+ return None
75
+ if math.isfinite(lo) and math.isfinite(hi):
76
+ return f"[{lo:.3f}, {hi:.3f}]"
77
+ return None
78
+
79
+
80
+ def _artifact_entries(
81
+ saved_files: dict[str, str], output_dir: str
82
+ ) -> list[tuple[str, str]]:
83
+ order = [
84
+ ("report", "Evaluation Report (JSON)"),
85
+ ("report_md", "Evaluation Report (MD)"),
86
+ ("json", "JSON"),
87
+ ("markdown", "Markdown"),
88
+ ("html", "HTML"),
89
+ ]
90
+ entries: list[tuple[str, str]] = [("Output", output_dir)]
91
+ used: set[str] = set()
92
+ for key, label in order:
93
+ if key in saved_files:
94
+ entries.append((label, str(saved_files[key])))
95
+ used.add(key)
96
+ for key in sorted(saved_files.keys()):
97
+ if key in used:
98
+ continue
99
+ entries.append((key.upper(), str(saved_files[key])))
100
+ return entries
101
+
21
102
 
22
103
  # Group with callback so `invarlock report` still generates reports
23
104
  report_app = typer.Typer(
24
- help="Operations on reports and certificates (verify, explain, html, validate).",
105
+ help="Operations on run reports and evaluation reports (verify, explain, html, validate).",
25
106
  invoke_without_command=True,
26
107
  )
27
108
 
@@ -33,6 +114,11 @@ def _generate_reports(
33
114
  compare: str | None = None,
34
115
  baseline: str | None = None,
35
116
  output: str | None = None,
117
+ style: str = "audit",
118
+ no_color: bool = False,
119
+ summary_baseline_seconds: float | None = None,
120
+ summary_subject_seconds: float | None = None,
121
+ summary_report_start: float | None = None,
36
122
  ) -> None:
37
123
  # This callback runs only when invoked without subcommand (default Click behavior)
38
124
  try:
@@ -55,21 +141,37 @@ def _generate_reports(
55
141
  compare = _coerce_option(compare)
56
142
  baseline = _coerce_option(baseline)
57
143
  output = _coerce_option(output)
144
+ style = _coerce_option(style, "audit")
145
+ no_color = bool(_coerce_option(no_color, False))
146
+ summary_baseline_seconds = _coerce_option(summary_baseline_seconds)
147
+ summary_subject_seconds = _coerce_option(summary_subject_seconds)
148
+ summary_report_start = _coerce_option(summary_report_start)
149
+
150
+ output_style = resolve_output_style(
151
+ style=str(style),
152
+ profile="ci",
153
+ progress=False,
154
+ timing=False,
155
+ no_color=no_color,
156
+ )
157
+
158
+ def _event(tag: str, message: str, *, emoji: str | None = None) -> None:
159
+ print_event(console, tag, message, style=output_style, emoji=emoji)
58
160
 
59
161
  # Load primary report
60
- console.print(f"📊 Loading run report: {run}")
162
+ _event("DATA", f"Loading run report: {run}", emoji="📊")
61
163
  primary_report = _load_run_report(run)
62
164
 
63
165
  # Load comparison report if specified
64
166
  compare_report = None
65
167
  if compare:
66
- console.print(f"📊 Loading comparison report: {compare}")
168
+ _event("DATA", f"Loading comparison report: {compare}", emoji="📊")
67
169
  compare_report = _load_run_report(compare)
68
170
 
69
171
  # Load baseline report if specified
70
172
  baseline_report = None
71
173
  if baseline:
72
- console.print(f"📊 Loading baseline report: {baseline}")
174
+ _event("DATA", f"Loading baseline report: {baseline}", emoji="📊")
73
175
  baseline_report = _load_run_report(baseline)
74
176
 
75
177
  # Determine output directory
@@ -80,25 +182,39 @@ def _generate_reports(
80
182
  output_dir = output
81
183
 
82
184
  # Determine formats
185
+ allowed_formats = {"json", "md", "markdown", "html", "report", "all"}
186
+ if format not in allowed_formats:
187
+ _event("FAIL", f"Unknown --format '{format}'", emoji="❌")
188
+ raise typer.Exit(2)
189
+
190
+ if format == "md":
191
+ format = "markdown"
83
192
  if format == "all":
84
193
  formats = ["json", "markdown", "html"]
85
194
  else:
86
195
  formats = [format]
87
196
 
88
- # Validate certificate requirements
89
- if "cert" in formats:
197
+ # Validate evaluation report requirements
198
+ if "report" in formats:
90
199
  if baseline_report is None:
91
- console.print(
92
- "[red]❌ Certificate format requires --baseline parameter[/red]"
200
+ _event(
201
+ "FAIL",
202
+ "Evaluation report format requires --baseline",
203
+ emoji="❌",
93
204
  )
94
- console.print(
95
- "Use: invarlock report --run <run_dir> --format cert --baseline <baseline_run_dir>"
205
+ _event(
206
+ "INFO",
207
+ "Use: invarlock report --run <run_dir> --format report --baseline <baseline_run_dir>",
96
208
  )
97
209
  raise typer.Exit(1)
98
- console.print("📜 Generating safety certificate with baseline comparison")
210
+ _event(
211
+ "EXEC",
212
+ "Generating evaluation report with baseline comparison",
213
+ emoji="📜",
214
+ )
99
215
 
100
216
  # Generate reports
101
- console.print(f"📝 Generating reports in formats: {formats}")
217
+ _event("EXEC", f"Generating reports in formats: {formats}", emoji="📝")
102
218
  saved_files = report_lib.save_report(
103
219
  primary_report,
104
220
  output_dir,
@@ -109,82 +225,151 @@ def _generate_reports(
109
225
  )
110
226
 
111
227
  # Show results
112
- console.print("[green]✅ Reports generated successfully![/green]")
113
- console.print(f"📁 Output directory: {output_dir}")
114
-
115
- for fmt, file_path in saved_files.items():
116
- if fmt == "cert":
117
- console.print(f" 📜 CERTIFICATE (JSON): {file_path}")
118
- elif fmt == "cert_md":
119
- console.print(f" 📜 CERTIFICATE (MD): {file_path}")
120
- else:
121
- console.print(f" 📄 {fmt.upper()}: {file_path}")
122
-
123
- # Show key metrics (PM-first). Avoid PPL-first wording.
124
- console.print("\n📈 Key Metrics:")
125
- console.print(f" Model: {primary_report['meta']['model_id']}")
126
- console.print(f" Edit: {primary_report['edit']['name']}")
127
- pm = (primary_report.get("metrics", {}) or {}).get("primary_metric", {})
128
- if isinstance(pm, dict) and pm:
129
- kind = str(pm.get("kind") or "primary")
130
- console.print(f" Primary Metric: {kind}")
131
- final = pm.get("final")
132
- if isinstance(final, int | float):
133
- console.print(f" point (final): {final:.3f}")
134
- dci = pm.get("display_ci")
135
- if isinstance(dci, tuple | list) and len(dci) == 2:
136
- try:
137
- lo, hi = float(dci[0]), float(dci[1])
138
- console.print(f" CI: {lo:.3f}–{hi:.3f}")
139
- except Exception:
140
- pass
141
- ratio = pm.get("ratio_vs_baseline")
142
- if isinstance(ratio, int | float):
143
- console.print(f" ratio vs baseline: {ratio:.3f}")
144
-
145
- # Show certificate validation if generated
146
- if "cert" in formats and baseline_report:
228
+ _event("PASS", "Reports generated successfully.", emoji="✅")
229
+
230
+ if "report" in formats and baseline_report:
147
231
  try:
148
- certificate = certificate_lib.make_certificate(
232
+ evaluation_report = report_builder.make_report(
149
233
  primary_report, baseline_report
150
234
  )
151
- certificate_lib.validate_certificate(certificate)
235
+ report_builder.validate_report(evaluation_report)
152
236
  from invarlock.reporting.render import (
153
237
  compute_console_validation_block as _console_block,
154
238
  )
155
239
 
156
- block = _console_block(certificate)
240
+ block = _console_block(evaluation_report)
157
241
  overall_pass = bool(block.get("overall_pass"))
158
-
159
- console.print("\n📜 Certificate Validation:")
160
- status_emoji = "" if overall_pass else "❌"
161
- console.print(
162
- f" Overall Status: {status_emoji} {'PASS' if overall_pass else 'FAIL'}"
242
+ status_text = _format_status(overall_pass)
243
+
244
+ console.print("")
245
+ summary_suffix: str | None = None
246
+ if summary_report_start is not None:
247
+ try:
248
+ base = (
249
+ float(summary_baseline_seconds)
250
+ if summary_baseline_seconds is not None
251
+ else 0.0
252
+ )
253
+ subject = (
254
+ float(summary_subject_seconds)
255
+ if summary_subject_seconds is not None
256
+ else 0.0
257
+ )
258
+ report_elapsed = max(
259
+ 0.0, float(perf_counter() - float(summary_report_start))
260
+ )
261
+ summary_suffix = f"[{(base + subject + report_elapsed):.2f}s]"
262
+ except Exception:
263
+ summary_suffix = None
264
+ _print_section_header(
265
+ console,
266
+ "EVALUATION REPORT SUMMARY",
267
+ suffix=summary_suffix,
163
268
  )
269
+ console.print(_format_kv_line("Status", status_text))
270
+
271
+ schema_version = evaluation_report.get("schema_version")
272
+ if schema_version:
273
+ console.print(
274
+ _format_kv_line("Schema Version", str(schema_version))
275
+ )
164
276
 
277
+ run_id = evaluation_report.get("run_id") or (
278
+ (primary_report.get("meta", {}) or {}).get("run_id")
279
+ )
280
+ if run_id:
281
+ console.print(_format_kv_line("Run ID", str(run_id)))
282
+
283
+ model_id = (primary_report.get("meta", {}) or {}).get("model_id")
284
+ edit_name = (primary_report.get("edit", {}) or {}).get("name")
285
+ if model_id:
286
+ console.print(_format_kv_line("Model", str(model_id)))
287
+ if edit_name:
288
+ console.print(_format_kv_line("Edit", str(edit_name)))
289
+
290
+ pm = (
291
+ (evaluation_report.get("primary_metric") or {})
292
+ if isinstance(evaluation_report, dict)
293
+ else {}
294
+ )
295
+ if not pm:
296
+ pm = (primary_report.get("metrics", {}) or {}).get(
297
+ "primary_metric", {}
298
+ )
299
+ console.print(" PRIMARY METRIC")
300
+ pm_entries: list[tuple[str, str]] = []
301
+ if isinstance(pm, dict) and pm:
302
+ kind = str(pm.get("kind") or "primary")
303
+ pm_entries.append(("Kind", kind))
304
+ preview = pm.get("preview")
305
+ if preview is not None:
306
+ pm_entries.append(("Preview", _fmt_metric_value(preview)))
307
+ final = pm.get("final")
308
+ if final is not None:
309
+ pm_entries.append(("Final", _fmt_metric_value(final)))
310
+ ratio = pm.get("ratio_vs_baseline")
311
+ if ratio is not None:
312
+ pm_entries.append(("Ratio", _fmt_metric_value(ratio)))
313
+ dci = pm.get("display_ci")
314
+ ci_95 = _fmt_ci_95(dci)
315
+ if ci_95 is not None:
316
+ pm_entries.append(("CI (95%)", ci_95))
317
+ if not pm_entries:
318
+ pm_entries.append(("Status", "Unavailable"))
319
+ for idx, (label, value) in enumerate(pm_entries):
320
+ branch = "└─" if idx == len(pm_entries) - 1 else "├─"
321
+ console.print(f" {branch} {label:<14} {value}")
322
+
323
+ console.print(" VALIDATION GATES")
165
324
  rows = block.get("rows", [])
166
325
  if isinstance(rows, list) and rows:
167
- for row in rows:
168
- try:
169
- label = row.get("label")
170
- status = row.get("status")
171
- if label and status:
172
- console.print(f" {label}: {status}")
173
- except Exception:
174
- continue
326
+ for idx, row in enumerate(rows):
327
+ label = str(row.get("label") or "Unknown")
328
+ ok = bool(row.get("ok"))
329
+ status = _format_status(ok)
330
+ mark = "✓" if ok else "✗"
331
+ branch = "└─" if idx == len(rows) - 1 else "├─"
332
+ console.print(
333
+ f" {branch} {label:<{GATE_LABEL_WIDTH}} {mark} {status}"
334
+ )
335
+ else:
336
+ console.print(f" └─ {'No validation rows':<{GATE_LABEL_WIDTH}} -")
337
+
338
+ console.print(" ARTIFACTS")
339
+ entries = _artifact_entries(saved_files, str(output_dir))
340
+ for idx, (label, value) in enumerate(entries):
341
+ branch = "└─" if idx == len(entries) - 1 else "├─"
342
+ console.print(f" {branch} {label:<{ARTIFACT_LABEL_WIDTH}} {value}")
343
+ console.print("═" * SECTION_WIDTH)
175
344
 
176
345
  # In CLI report flow, do not hard-exit on validation failure; just display status.
177
346
  # CI gating should be handled by dedicated verify commands.
178
347
 
179
348
  except Exception as e:
349
+ _event("WARN", f"Evaluation report validation error: {e}", emoji="⚠️")
350
+ # Exit non-zero on evaluation report generation error
351
+ raise typer.Exit(1) from e
352
+ else:
353
+ console.print(_format_kv_line("Output", str(output_dir)))
354
+ for label, value in _artifact_entries(saved_files, str(output_dir))[1:]:
180
355
  console.print(
181
- f" [yellow]⚠️ Certificate validation error: {e}[/yellow]"
356
+ _format_kv_line(label, str(value), width=ARTIFACT_LABEL_WIDTH)
182
357
  )
183
- # Exit non-zero on certificate generation error
184
- raise typer.Exit(1) from e
185
358
 
186
359
  except Exception as e:
187
- console.print(f"[red]❌ Report generation failed: {e}[/red]")
360
+ print_event(
361
+ console,
362
+ "FAIL",
363
+ f"Report generation failed: {e}",
364
+ style=resolve_output_style(
365
+ style="audit",
366
+ profile="ci",
367
+ progress=False,
368
+ timing=False,
369
+ no_color=False,
370
+ ),
371
+ emoji="❌",
372
+ )
188
373
  raise typer.Exit(1) from e
189
374
 
190
375
 
@@ -195,7 +380,7 @@ def report_callback(
195
380
  None, "--run", help="Path to run directory or RunReport JSON"
196
381
  ),
197
382
  format: str = typer.Option(
198
- "json", "--format", help="Output format (json|md|html|cert|all)"
383
+ "json", "--format", help="Output format (json|md|html|report|all)"
199
384
  ),
200
385
  compare: str | None = typer.Option(
201
386
  None, "--compare", help="Path to second run for comparison"
@@ -203,18 +388,40 @@ def report_callback(
203
388
  baseline: str | None = typer.Option(
204
389
  None,
205
390
  "--baseline",
206
- help="Path to baseline run for certificate generation (required for cert format)",
391
+ help="Path to baseline run for evaluation report generation (required for report format)",
207
392
  ),
208
393
  output: str | None = typer.Option(None, "--output", "-o", help="Output directory"),
394
+ style: str = typer.Option("audit", "--style", help="Output style (audit|friendly)"),
395
+ no_color: bool = typer.Option(
396
+ False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
397
+ ),
209
398
  ):
210
399
  """Generate a report from a run (default callback)."""
211
400
  if getattr(ctx, "resilient_parsing", False) or ctx.invoked_subcommand is not None:
212
401
  return
213
402
  if not run:
214
- console.print("[red]❌ --run is required when no subcommand is provided[/red]")
403
+ print_event(
404
+ console,
405
+ "FAIL",
406
+ "--run is required when no subcommand is provided",
407
+ style=resolve_output_style(
408
+ style=str(style),
409
+ profile="ci",
410
+ progress=False,
411
+ timing=False,
412
+ no_color=no_color,
413
+ ),
414
+ emoji="❌",
415
+ )
215
416
  raise typer.Exit(2)
216
417
  return _generate_reports(
217
- run=run, format=format, compare=compare, baseline=baseline, output=output
418
+ run=run,
419
+ format=format,
420
+ compare=compare,
421
+ baseline=baseline,
422
+ output=output,
423
+ style=style,
424
+ no_color=no_color,
218
425
  )
219
426
 
220
427
 
@@ -225,9 +432,23 @@ def report_command(
225
432
  compare: str | None = None,
226
433
  baseline: str | None = None,
227
434
  output: str | None = None,
435
+ style: str = "audit",
436
+ no_color: bool = False,
437
+ summary_baseline_seconds: float | None = None,
438
+ summary_subject_seconds: float | None = None,
439
+ summary_report_start: float | None = None,
228
440
  ):
229
441
  return _generate_reports(
230
- run=run, format=format, compare=compare, baseline=baseline, output=output
442
+ run=run,
443
+ format=format,
444
+ compare=compare,
445
+ baseline=baseline,
446
+ output=output,
447
+ style=style,
448
+ no_color=no_color,
449
+ summary_baseline_seconds=summary_baseline_seconds,
450
+ summary_subject_seconds=summary_subject_seconds,
451
+ summary_report_start=summary_report_start,
231
452
  )
232
453
 
233
454
 
@@ -254,16 +475,16 @@ def _load_run_report(path: str) -> dict:
254
475
 
255
476
  # Subcommands wired from existing modules
256
477
  @report_app.command(
257
- name="verify", help="Recompute and verify metrics for a report/cert."
478
+ name="verify", help="Recompute and verify metrics for evaluation reports."
258
479
  )
259
480
  def report_verify_command(
260
- certificates: list[str] = typer.Argument(
261
- ..., help="One or more certificate JSON files to verify."
481
+ reports: list[str] = typer.Argument(
482
+ ..., help="One or more evaluation report JSON files to verify."
262
483
  ),
263
484
  baseline: str | None = typer.Option(
264
485
  None,
265
486
  "--baseline",
266
- help="Optional baseline certificate/report JSON to enforce provider parity.",
487
+ help="Optional baseline evaluation report JSON to enforce provider parity.",
267
488
  ),
268
489
  tolerance: float = typer.Option(
269
490
  1e-9, "--tolerance", help="Tolerance for analysis-basis comparisons."
@@ -278,10 +499,10 @@ def report_verify_command(
278
499
 
279
500
  from .verify import verify_command as _verify_command
280
501
 
281
- cert_paths = [_Path(c) for c in certificates]
502
+ report_paths = [_Path(p) for p in reports]
282
503
  baseline_path = _Path(baseline) if isinstance(baseline, str) else None
283
504
  return _verify_command(
284
- certificates=cert_paths,
505
+ reports=report_paths,
285
506
  baseline=baseline_path,
286
507
  tolerance=tolerance,
287
508
  profile=profile,
@@ -289,7 +510,7 @@ def report_verify_command(
289
510
 
290
511
 
291
512
  @report_app.command(
292
- name="explain", help="Explain certificate gates for report vs baseline."
513
+ name="explain", help="Explain evaluation report gates for report vs baseline."
293
514
  )
294
515
  def report_explain(
295
516
  report: str = typer.Option(..., "--report", help="Path to primary report.json"),
@@ -297,15 +518,17 @@ def report_explain(
297
518
  ..., "--baseline", help="Path to baseline report.json"
298
519
  ),
299
520
  ): # pragma: no cover - thin wrapper
300
- """Explain certificate gates for a report vs baseline."""
521
+ """Explain evaluation report gates for a report vs baseline."""
301
522
  from .explain_gates import explain_gates_command as _explain
302
523
 
303
524
  return _explain(report=report, baseline=baseline)
304
525
 
305
526
 
306
- @report_app.command(name="html", help="Render a certificate JSON to HTML.")
527
+ @report_app.command(name="html", help="Render an evaluation report JSON to HTML.")
307
528
  def report_html(
308
- input: str = typer.Option(..., "--input", "-i", help="Path to certificate JSON"),
529
+ input: str = typer.Option(
530
+ ..., "--input", "-i", help="Path to evaluation report JSON"
531
+ ),
309
532
  output: str = typer.Option(..., "--output", "-o", help="Path to output HTML file"),
310
533
  embed_css: bool = typer.Option(
311
534
  True, "--embed-css/--no-embed-css", help="Inline a minimal static stylesheet"
@@ -322,32 +545,43 @@ def report_html(
322
545
  @report_app.command("validate")
323
546
  def report_validate(
324
547
  report: str = typer.Argument(
325
- ..., help="Path to certificate JSON to validate against schema v1"
548
+ ..., help="Path to evaluation report JSON to validate against schema v1"
326
549
  ),
327
550
  ):
328
- """Validate a certificate JSON against the current schema (v1)."""
551
+ """Validate an evaluation report JSON against the current schema (v1)."""
552
+ output_style = resolve_output_style(
553
+ style="audit",
554
+ profile="ci",
555
+ progress=False,
556
+ timing=False,
557
+ no_color=False,
558
+ )
559
+
560
+ def _event(tag: str, message: str, *, emoji: str | None = None) -> None:
561
+ print_event(console, tag, message, style=output_style, emoji=emoji)
562
+
329
563
  p = Path(report)
330
564
  try:
331
565
  payload = json.loads(p.read_text(encoding="utf-8"))
332
566
  except Exception as exc: # noqa: BLE001
333
- console.print(f"[red]❌ Failed to read input JSON: {exc}[/red]")
567
+ _event("FAIL", f"Failed to read input JSON: {exc}", emoji="❌")
334
568
  raise typer.Exit(1) from exc
335
569
 
336
570
  try:
337
- from invarlock.reporting.certificate import validate_certificate
571
+ from invarlock.reporting.report_builder import validate_report
338
572
 
339
- ok = validate_certificate(payload)
573
+ ok = validate_report(payload)
340
574
  if not ok:
341
- console.print("[red]❌ Certificate schema validation failed[/red]")
575
+ _event("FAIL", "Evaluation report schema validation failed", emoji="❌")
342
576
  raise typer.Exit(2)
343
- console.print(" Certificate schema is valid")
577
+ _event("PASS", "Evaluation report schema is valid", emoji="✅")
344
578
  except ValueError as exc:
345
- console.print(f"[red]❌ Certificate validation error: {exc}[/red]")
579
+ _event("FAIL", f"Evaluation report validation error: {exc}", emoji="❌")
346
580
  raise typer.Exit(2) from exc
347
581
  except typer.Exit:
348
582
  raise
349
583
  except Exception as exc: # noqa: BLE001
350
- console.print(f"[red]❌ Validation failed: {exc}[/red]")
584
+ _event("FAIL", f"Validation failed: {exc}", emoji="❌")
351
585
  raise typer.Exit(1) from exc
352
586
 
353
587