invarlock 0.3.7__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. invarlock/__init__.py +3 -3
  2. invarlock/adapters/auto.py +2 -10
  3. invarlock/adapters/hf_loading.py +7 -7
  4. invarlock/adapters/hf_mixin.py +28 -5
  5. invarlock/assurance/__init__.py +15 -23
  6. invarlock/cli/adapter_auto.py +1 -5
  7. invarlock/cli/app.py +57 -27
  8. invarlock/cli/commands/__init__.py +2 -2
  9. invarlock/cli/commands/calibrate.py +48 -4
  10. invarlock/cli/commands/{certify.py → evaluate.py} +69 -46
  11. invarlock/cli/commands/explain_gates.py +25 -17
  12. invarlock/cli/commands/export_html.py +11 -9
  13. invarlock/cli/commands/report.py +116 -46
  14. invarlock/cli/commands/run.py +274 -66
  15. invarlock/cli/commands/verify.py +84 -89
  16. invarlock/cli/determinism.py +1 -1
  17. invarlock/cli/provenance.py +3 -3
  18. invarlock/core/bootstrap.py +1 -1
  19. invarlock/core/retry.py +14 -14
  20. invarlock/core/runner.py +1 -1
  21. invarlock/edits/noop.py +2 -2
  22. invarlock/edits/quant_rtn.py +2 -2
  23. invarlock/eval/__init__.py +1 -1
  24. invarlock/eval/bench.py +11 -7
  25. invarlock/eval/primary_metric.py +1 -1
  26. invarlock/guards/spectral.py +1 -1
  27. invarlock/model_profile.py +16 -35
  28. invarlock/plugins/hf_bnb_adapter.py +32 -21
  29. invarlock/reporting/__init__.py +18 -4
  30. invarlock/reporting/html.py +7 -7
  31. invarlock/reporting/normalizer.py +2 -2
  32. invarlock/reporting/policy_utils.py +1 -1
  33. invarlock/reporting/primary_metric_utils.py +11 -11
  34. invarlock/reporting/render.py +126 -120
  35. invarlock/reporting/report.py +43 -37
  36. invarlock/reporting/{certificate.py → report_builder.py} +98 -95
  37. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  38. invarlock-0.3.8.dist-info/METADATA +283 -0
  39. {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/RECORD +43 -43
  40. {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
  41. invarlock-0.3.7.dist-info/METADATA +0 -602
  42. {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +0 -0
  43. {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
  44. {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
@@ -10,14 +10,15 @@ Provides the `invarlock report` group with:
10
10
  import json
11
11
  import math
12
12
  from pathlib import Path
13
+ from time import perf_counter
13
14
  from typing import Any
14
15
 
15
16
  import typer
16
17
  from rich.console import Console
17
18
 
18
19
  from invarlock.cli.output import print_event, resolve_output_style
19
- from invarlock.reporting import certificate as certificate_lib
20
20
  from invarlock.reporting import report as report_lib
21
+ from invarlock.reporting import report_builder as report_builder
21
22
 
22
23
  console = Console()
23
24
 
@@ -27,10 +28,22 @@ GATE_LABEL_WIDTH = 32
27
28
  ARTIFACT_LABEL_WIDTH = 18
28
29
 
29
30
 
30
- def _print_section_header(console: Console, title: str) -> None:
31
+ def _format_section_title(title: str, *, suffix: str | None = None) -> str:
32
+ if not suffix:
33
+ return title
34
+ combined = f"{title} {suffix}"
35
+ if len(combined) > SECTION_WIDTH:
36
+ return combined
37
+ pad = max(1, SECTION_WIDTH - len(title) - len(suffix))
38
+ return f"{title}{' ' * pad}{suffix}"
39
+
40
+
41
+ def _print_section_header(
42
+ console: Console, title: str, *, suffix: str | None = None
43
+ ) -> None:
31
44
  bar = "═" * SECTION_WIDTH
32
45
  console.print(bar)
33
- console.print(title)
46
+ console.print(_format_section_title(title, suffix=suffix))
34
47
  console.print(bar)
35
48
 
36
49
 
@@ -52,24 +65,24 @@ def _fmt_metric_value(value: Any) -> str:
52
65
  return f"{val:.3f}"
53
66
 
54
67
 
55
- def _fmt_ci_range(ci: Any) -> str:
68
+ def _fmt_ci_95(ci: Any) -> str | None:
56
69
  if isinstance(ci, (list, tuple)) and len(ci) == 2:
57
70
  try:
58
71
  lo = float(ci[0])
59
72
  hi = float(ci[1])
60
73
  except (TypeError, ValueError):
61
- return "N/A"
74
+ return None
62
75
  if math.isfinite(lo) and math.isfinite(hi):
63
- return f"{lo:.3f}{hi:.3f}"
64
- return "N/A"
76
+ return f"[{lo:.3f}, {hi:.3f}]"
77
+ return None
65
78
 
66
79
 
67
80
  def _artifact_entries(
68
81
  saved_files: dict[str, str], output_dir: str
69
82
  ) -> list[tuple[str, str]]:
70
83
  order = [
71
- ("cert", "Certificate (JSON)"),
72
- ("cert_md", "Certificate (MD)"),
84
+ ("report", "Evaluation Report (JSON)"),
85
+ ("report_md", "Evaluation Report (MD)"),
73
86
  ("json", "JSON"),
74
87
  ("markdown", "Markdown"),
75
88
  ("html", "HTML"),
@@ -89,7 +102,7 @@ def _artifact_entries(
89
102
 
90
103
  # Group with callback so `invarlock report` still generates reports
91
104
  report_app = typer.Typer(
92
- help="Operations on reports and certificates (verify, explain, html, validate).",
105
+ help="Operations on run reports and evaluation reports (verify, explain, html, validate).",
93
106
  invoke_without_command=True,
94
107
  )
95
108
 
@@ -103,6 +116,9 @@ def _generate_reports(
103
116
  output: str | None = None,
104
117
  style: str = "audit",
105
118
  no_color: bool = False,
119
+ summary_baseline_seconds: float | None = None,
120
+ summary_subject_seconds: float | None = None,
121
+ summary_report_start: float | None = None,
106
122
  ) -> None:
107
123
  # This callback runs only when invoked without subcommand (default Click behavior)
108
124
  try:
@@ -127,6 +143,9 @@ def _generate_reports(
127
143
  output = _coerce_option(output)
128
144
  style = _coerce_option(style, "audit")
129
145
  no_color = bool(_coerce_option(no_color, False))
146
+ summary_baseline_seconds = _coerce_option(summary_baseline_seconds)
147
+ summary_subject_seconds = _coerce_option(summary_subject_seconds)
148
+ summary_report_start = _coerce_option(summary_report_start)
130
149
 
131
150
  output_style = resolve_output_style(
132
151
  style=str(style),
@@ -163,23 +182,34 @@ def _generate_reports(
163
182
  output_dir = output
164
183
 
165
184
  # Determine formats
185
+ allowed_formats = {"json", "md", "markdown", "html", "report", "all"}
186
+ if format not in allowed_formats:
187
+ _event("FAIL", f"Unknown --format '{format}'", emoji="❌")
188
+ raise typer.Exit(2)
189
+
190
+ if format == "md":
191
+ format = "markdown"
166
192
  if format == "all":
167
193
  formats = ["json", "markdown", "html"]
168
194
  else:
169
195
  formats = [format]
170
196
 
171
- # Validate certificate requirements
172
- if "cert" in formats:
197
+ # Validate evaluation report requirements
198
+ if "report" in formats:
173
199
  if baseline_report is None:
174
- _event("FAIL", "Certificate format requires --baseline", emoji="❌")
200
+ _event(
201
+ "FAIL",
202
+ "Evaluation report format requires --baseline",
203
+ emoji="❌",
204
+ )
175
205
  _event(
176
206
  "INFO",
177
- "Use: invarlock report --run <run_dir> --format cert --baseline <baseline_run_dir>",
207
+ "Use: invarlock report --run <run_dir> --format report --baseline <baseline_run_dir>",
178
208
  )
179
209
  raise typer.Exit(1)
180
210
  _event(
181
211
  "EXEC",
182
- "Generating evaluation certificate with baseline comparison",
212
+ "Generating evaluation report with baseline comparison",
183
213
  emoji="📜",
184
214
  )
185
215
 
@@ -197,31 +227,54 @@ def _generate_reports(
197
227
  # Show results
198
228
  _event("PASS", "Reports generated successfully.", emoji="✅")
199
229
 
200
- if "cert" in formats and baseline_report:
230
+ if "report" in formats and baseline_report:
201
231
  try:
202
- certificate = certificate_lib.make_certificate(
232
+ evaluation_report = report_builder.make_report(
203
233
  primary_report, baseline_report
204
234
  )
205
- certificate_lib.validate_certificate(certificate)
235
+ report_builder.validate_report(evaluation_report)
206
236
  from invarlock.reporting.render import (
207
237
  compute_console_validation_block as _console_block,
208
238
  )
209
239
 
210
- block = _console_block(certificate)
240
+ block = _console_block(evaluation_report)
211
241
  overall_pass = bool(block.get("overall_pass"))
212
242
  status_text = _format_status(overall_pass)
213
243
 
214
244
  console.print("")
215
- _print_section_header(console, "CERTIFICATE SUMMARY")
245
+ summary_suffix: str | None = None
246
+ if summary_report_start is not None:
247
+ try:
248
+ base = (
249
+ float(summary_baseline_seconds)
250
+ if summary_baseline_seconds is not None
251
+ else 0.0
252
+ )
253
+ subject = (
254
+ float(summary_subject_seconds)
255
+ if summary_subject_seconds is not None
256
+ else 0.0
257
+ )
258
+ report_elapsed = max(
259
+ 0.0, float(perf_counter() - float(summary_report_start))
260
+ )
261
+ summary_suffix = f"[{(base + subject + report_elapsed):.2f}s]"
262
+ except Exception:
263
+ summary_suffix = None
264
+ _print_section_header(
265
+ console,
266
+ "EVALUATION REPORT SUMMARY",
267
+ suffix=summary_suffix,
268
+ )
216
269
  console.print(_format_kv_line("Status", status_text))
217
270
 
218
- schema_version = certificate.get("schema_version")
271
+ schema_version = evaluation_report.get("schema_version")
219
272
  if schema_version:
220
273
  console.print(
221
274
  _format_kv_line("Schema Version", str(schema_version))
222
275
  )
223
276
 
224
- run_id = certificate.get("run_id") or (
277
+ run_id = evaluation_report.get("run_id") or (
225
278
  (primary_report.get("meta", {}) or {}).get("run_id")
226
279
  )
227
280
  if run_id:
@@ -234,7 +287,15 @@ def _generate_reports(
234
287
  if edit_name:
235
288
  console.print(_format_kv_line("Edit", str(edit_name)))
236
289
 
237
- pm = (primary_report.get("metrics", {}) or {}).get("primary_metric", {})
290
+ pm = (
291
+ (evaluation_report.get("primary_metric") or {})
292
+ if isinstance(evaluation_report, dict)
293
+ else {}
294
+ )
295
+ if not pm:
296
+ pm = (primary_report.get("metrics", {}) or {}).get(
297
+ "primary_metric", {}
298
+ )
238
299
  console.print(" PRIMARY METRIC")
239
300
  pm_entries: list[tuple[str, str]] = []
240
301
  if isinstance(pm, dict) and pm:
@@ -250,8 +311,9 @@ def _generate_reports(
250
311
  if ratio is not None:
251
312
  pm_entries.append(("Ratio", _fmt_metric_value(ratio)))
252
313
  dci = pm.get("display_ci")
253
- if dci is not None:
254
- pm_entries.append(("CI", _fmt_ci_range(dci)))
314
+ ci_95 = _fmt_ci_95(dci)
315
+ if ci_95 is not None:
316
+ pm_entries.append(("CI (95%)", ci_95))
255
317
  if not pm_entries:
256
318
  pm_entries.append(("Status", "Unavailable"))
257
319
  for idx, (label, value) in enumerate(pm_entries):
@@ -284,8 +346,8 @@ def _generate_reports(
284
346
  # CI gating should be handled by dedicated verify commands.
285
347
 
286
348
  except Exception as e:
287
- _event("WARN", f"Certificate validation error: {e}", emoji="⚠️")
288
- # Exit non-zero on certificate generation error
349
+ _event("WARN", f"Evaluation report validation error: {e}", emoji="⚠️")
350
+ # Exit non-zero on evaluation report generation error
289
351
  raise typer.Exit(1) from e
290
352
  else:
291
353
  console.print(_format_kv_line("Output", str(output_dir)))
@@ -318,7 +380,7 @@ def report_callback(
318
380
  None, "--run", help="Path to run directory or RunReport JSON"
319
381
  ),
320
382
  format: str = typer.Option(
321
- "json", "--format", help="Output format (json|md|html|cert|all)"
383
+ "json", "--format", help="Output format (json|md|html|report|all)"
322
384
  ),
323
385
  compare: str | None = typer.Option(
324
386
  None, "--compare", help="Path to second run for comparison"
@@ -326,7 +388,7 @@ def report_callback(
326
388
  baseline: str | None = typer.Option(
327
389
  None,
328
390
  "--baseline",
329
- help="Path to baseline run for certificate generation (required for cert format)",
391
+ help="Path to baseline run for evaluation report generation (required for report format)",
330
392
  ),
331
393
  output: str | None = typer.Option(None, "--output", "-o", help="Output directory"),
332
394
  style: str = typer.Option("audit", "--style", help="Output style (audit|friendly)"),
@@ -372,6 +434,9 @@ def report_command(
372
434
  output: str | None = None,
373
435
  style: str = "audit",
374
436
  no_color: bool = False,
437
+ summary_baseline_seconds: float | None = None,
438
+ summary_subject_seconds: float | None = None,
439
+ summary_report_start: float | None = None,
375
440
  ):
376
441
  return _generate_reports(
377
442
  run=run,
@@ -381,6 +446,9 @@ def report_command(
381
446
  output=output,
382
447
  style=style,
383
448
  no_color=no_color,
449
+ summary_baseline_seconds=summary_baseline_seconds,
450
+ summary_subject_seconds=summary_subject_seconds,
451
+ summary_report_start=summary_report_start,
384
452
  )
385
453
 
386
454
 
@@ -407,16 +475,16 @@ def _load_run_report(path: str) -> dict:
407
475
 
408
476
  # Subcommands wired from existing modules
409
477
  @report_app.command(
410
- name="verify", help="Recompute and verify metrics for a report/cert."
478
+ name="verify", help="Recompute and verify metrics for evaluation reports."
411
479
  )
412
480
  def report_verify_command(
413
- certificates: list[str] = typer.Argument(
414
- ..., help="One or more certificate JSON files to verify."
481
+ reports: list[str] = typer.Argument(
482
+ ..., help="One or more evaluation report JSON files to verify."
415
483
  ),
416
484
  baseline: str | None = typer.Option(
417
485
  None,
418
486
  "--baseline",
419
- help="Optional baseline certificate/report JSON to enforce provider parity.",
487
+ help="Optional baseline evaluation report JSON to enforce provider parity.",
420
488
  ),
421
489
  tolerance: float = typer.Option(
422
490
  1e-9, "--tolerance", help="Tolerance for analysis-basis comparisons."
@@ -431,10 +499,10 @@ def report_verify_command(
431
499
 
432
500
  from .verify import verify_command as _verify_command
433
501
 
434
- cert_paths = [_Path(c) for c in certificates]
502
+ report_paths = [_Path(p) for p in reports]
435
503
  baseline_path = _Path(baseline) if isinstance(baseline, str) else None
436
504
  return _verify_command(
437
- certificates=cert_paths,
505
+ reports=report_paths,
438
506
  baseline=baseline_path,
439
507
  tolerance=tolerance,
440
508
  profile=profile,
@@ -442,7 +510,7 @@ def report_verify_command(
442
510
 
443
511
 
444
512
  @report_app.command(
445
- name="explain", help="Explain certificate gates for report vs baseline."
513
+ name="explain", help="Explain evaluation report gates for report vs baseline."
446
514
  )
447
515
  def report_explain(
448
516
  report: str = typer.Option(..., "--report", help="Path to primary report.json"),
@@ -450,15 +518,17 @@ def report_explain(
450
518
  ..., "--baseline", help="Path to baseline report.json"
451
519
  ),
452
520
  ): # pragma: no cover - thin wrapper
453
- """Explain certificate gates for a report vs baseline."""
521
+ """Explain evaluation report gates for a report vs baseline."""
454
522
  from .explain_gates import explain_gates_command as _explain
455
523
 
456
524
  return _explain(report=report, baseline=baseline)
457
525
 
458
526
 
459
- @report_app.command(name="html", help="Render a certificate JSON to HTML.")
527
+ @report_app.command(name="html", help="Render an evaluation report JSON to HTML.")
460
528
  def report_html(
461
- input: str = typer.Option(..., "--input", "-i", help="Path to certificate JSON"),
529
+ input: str = typer.Option(
530
+ ..., "--input", "-i", help="Path to evaluation report JSON"
531
+ ),
462
532
  output: str = typer.Option(..., "--output", "-o", help="Path to output HTML file"),
463
533
  embed_css: bool = typer.Option(
464
534
  True, "--embed-css/--no-embed-css", help="Inline a minimal static stylesheet"
@@ -475,10 +545,10 @@ def report_html(
475
545
  @report_app.command("validate")
476
546
  def report_validate(
477
547
  report: str = typer.Argument(
478
- ..., help="Path to certificate JSON to validate against schema v1"
548
+ ..., help="Path to evaluation report JSON to validate against schema v1"
479
549
  ),
480
550
  ):
481
- """Validate a certificate JSON against the current schema (v1)."""
551
+ """Validate an evaluation report JSON against the current schema (v1)."""
482
552
  output_style = resolve_output_style(
483
553
  style="audit",
484
554
  profile="ci",
@@ -498,15 +568,15 @@ def report_validate(
498
568
  raise typer.Exit(1) from exc
499
569
 
500
570
  try:
501
- from invarlock.reporting.certificate import validate_certificate
571
+ from invarlock.reporting.report_builder import validate_report
502
572
 
503
- ok = validate_certificate(payload)
573
+ ok = validate_report(payload)
504
574
  if not ok:
505
- _event("FAIL", "Certificate schema validation failed", emoji="❌")
575
+ _event("FAIL", "Evaluation report schema validation failed", emoji="❌")
506
576
  raise typer.Exit(2)
507
- _event("PASS", "Certificate schema is valid", emoji="✅")
577
+ _event("PASS", "Evaluation report schema is valid", emoji="✅")
508
578
  except ValueError as exc:
509
- _event("FAIL", f"Certificate validation error: {exc}", emoji="❌")
579
+ _event("FAIL", f"Evaluation report validation error: {exc}", emoji="❌")
510
580
  raise typer.Exit(2) from exc
511
581
  except typer.Exit:
512
582
  raise