invarlock 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. invarlock/__init__.py +3 -3
  2. invarlock/adapters/auto.py +2 -10
  3. invarlock/adapters/hf_loading.py +7 -7
  4. invarlock/adapters/hf_mixin.py +28 -5
  5. invarlock/assurance/__init__.py +15 -23
  6. invarlock/calibration/spectral_null.py +1 -1
  7. invarlock/cli/adapter_auto.py +1 -5
  8. invarlock/cli/app.py +57 -27
  9. invarlock/cli/commands/__init__.py +2 -2
  10. invarlock/cli/commands/calibrate.py +48 -4
  11. invarlock/cli/commands/{certify.py → evaluate.py} +69 -46
  12. invarlock/cli/commands/explain_gates.py +94 -51
  13. invarlock/cli/commands/export_html.py +11 -9
  14. invarlock/cli/commands/report.py +121 -47
  15. invarlock/cli/commands/run.py +274 -66
  16. invarlock/cli/commands/verify.py +84 -89
  17. invarlock/cli/determinism.py +1 -1
  18. invarlock/cli/provenance.py +3 -3
  19. invarlock/core/bootstrap.py +1 -1
  20. invarlock/core/retry.py +14 -14
  21. invarlock/core/runner.py +1 -1
  22. invarlock/edits/noop.py +2 -2
  23. invarlock/edits/quant_rtn.py +2 -2
  24. invarlock/eval/__init__.py +1 -1
  25. invarlock/eval/bench.py +11 -7
  26. invarlock/eval/primary_metric.py +1 -1
  27. invarlock/guards/spectral.py +2 -2
  28. invarlock/guards_ref/spectral_ref.py +1 -1
  29. invarlock/model_profile.py +16 -35
  30. invarlock/observability/health.py +38 -20
  31. invarlock/plugins/hf_bnb_adapter.py +32 -21
  32. invarlock/reporting/__init__.py +18 -4
  33. invarlock/reporting/html.py +7 -7
  34. invarlock/reporting/normalizer.py +2 -2
  35. invarlock/reporting/policy_utils.py +1 -1
  36. invarlock/reporting/primary_metric_utils.py +11 -11
  37. invarlock/reporting/render.py +126 -120
  38. invarlock/reporting/report.py +43 -37
  39. invarlock/reporting/{certificate.py → report_builder.py} +103 -99
  40. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  41. invarlock-0.3.9.dist-info/METADATA +303 -0
  42. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/RECORD +46 -46
  43. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/WHEEL +1 -1
  44. invarlock-0.3.7.dist-info/METADATA +0 -602
  45. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/entry_points.txt +0 -0
  46. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/licenses/LICENSE +0 -0
  47. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/top_level.txt +0 -0
@@ -10,14 +10,15 @@ Provides the `invarlock report` group with:
10
10
  import json
11
11
  import math
12
12
  from pathlib import Path
13
+ from time import perf_counter
13
14
  from typing import Any
14
15
 
15
16
  import typer
16
17
  from rich.console import Console
17
18
 
18
19
  from invarlock.cli.output import print_event, resolve_output_style
19
- from invarlock.reporting import certificate as certificate_lib
20
20
  from invarlock.reporting import report as report_lib
21
+ from invarlock.reporting import report_builder as report_builder
21
22
 
22
23
  console = Console()
23
24
 
@@ -27,10 +28,22 @@ GATE_LABEL_WIDTH = 32
27
28
  ARTIFACT_LABEL_WIDTH = 18
28
29
 
29
30
 
30
- def _print_section_header(console: Console, title: str) -> None:
31
+ def _format_section_title(title: str, *, suffix: str | None = None) -> str:
32
+ if not suffix:
33
+ return title
34
+ combined = f"{title} {suffix}"
35
+ if len(combined) > SECTION_WIDTH:
36
+ return combined
37
+ pad = max(1, SECTION_WIDTH - len(title) - len(suffix))
38
+ return f"{title}{' ' * pad}{suffix}"
39
+
40
+
41
+ def _print_section_header(
42
+ console: Console, title: str, *, suffix: str | None = None
43
+ ) -> None:
31
44
  bar = "═" * SECTION_WIDTH
32
45
  console.print(bar)
33
- console.print(title)
46
+ console.print(_format_section_title(title, suffix=suffix))
34
47
  console.print(bar)
35
48
 
36
49
 
@@ -52,24 +65,24 @@ def _fmt_metric_value(value: Any) -> str:
52
65
  return f"{val:.3f}"
53
66
 
54
67
 
55
- def _fmt_ci_range(ci: Any) -> str:
68
+ def _fmt_ci_95(ci: Any) -> str | None:
56
69
  if isinstance(ci, (list, tuple)) and len(ci) == 2:
57
70
  try:
58
71
  lo = float(ci[0])
59
72
  hi = float(ci[1])
60
73
  except (TypeError, ValueError):
61
- return "N/A"
74
+ return None
62
75
  if math.isfinite(lo) and math.isfinite(hi):
63
- return f"{lo:.3f}{hi:.3f}"
64
- return "N/A"
76
+ return f"[{lo:.3f}, {hi:.3f}]"
77
+ return None
65
78
 
66
79
 
67
80
  def _artifact_entries(
68
81
  saved_files: dict[str, str], output_dir: str
69
82
  ) -> list[tuple[str, str]]:
70
83
  order = [
71
- ("cert", "Certificate (JSON)"),
72
- ("cert_md", "Certificate (MD)"),
84
+ ("report", "Evaluation Report (JSON)"),
85
+ ("report_md", "Evaluation Report (MD)"),
73
86
  ("json", "JSON"),
74
87
  ("markdown", "Markdown"),
75
88
  ("html", "HTML"),
@@ -89,7 +102,7 @@ def _artifact_entries(
89
102
 
90
103
  # Group with callback so `invarlock report` still generates reports
91
104
  report_app = typer.Typer(
92
- help="Operations on reports and certificates (verify, explain, html, validate).",
105
+ help="Operations on run reports and evaluation reports (verify, explain, html, validate).",
93
106
  invoke_without_command=True,
94
107
  )
95
108
 
@@ -103,6 +116,9 @@ def _generate_reports(
103
116
  output: str | None = None,
104
117
  style: str = "audit",
105
118
  no_color: bool = False,
119
+ summary_baseline_seconds: float | None = None,
120
+ summary_subject_seconds: float | None = None,
121
+ summary_report_start: float | None = None,
106
122
  ) -> None:
107
123
  # This callback runs only when invoked without subcommand (default Click behavior)
108
124
  try:
@@ -127,6 +143,9 @@ def _generate_reports(
127
143
  output = _coerce_option(output)
128
144
  style = _coerce_option(style, "audit")
129
145
  no_color = bool(_coerce_option(no_color, False))
146
+ summary_baseline_seconds = _coerce_option(summary_baseline_seconds)
147
+ summary_subject_seconds = _coerce_option(summary_subject_seconds)
148
+ summary_report_start = _coerce_option(summary_report_start)
130
149
 
131
150
  output_style = resolve_output_style(
132
151
  style=str(style),
@@ -163,23 +182,34 @@ def _generate_reports(
163
182
  output_dir = output
164
183
 
165
184
  # Determine formats
185
+ allowed_formats = {"json", "md", "markdown", "html", "report", "all"}
186
+ if format not in allowed_formats:
187
+ _event("FAIL", f"Unknown --format '{format}'", emoji="❌")
188
+ raise typer.Exit(2)
189
+
190
+ if format == "md":
191
+ format = "markdown"
166
192
  if format == "all":
167
193
  formats = ["json", "markdown", "html"]
168
194
  else:
169
195
  formats = [format]
170
196
 
171
- # Validate certificate requirements
172
- if "cert" in formats:
197
+ # Validate evaluation report requirements
198
+ if "report" in formats:
173
199
  if baseline_report is None:
174
- _event("FAIL", "Certificate format requires --baseline", emoji="❌")
200
+ _event(
201
+ "FAIL",
202
+ "Evaluation report format requires --baseline",
203
+ emoji="❌",
204
+ )
175
205
  _event(
176
206
  "INFO",
177
- "Use: invarlock report --run <run_dir> --format cert --baseline <baseline_run_dir>",
207
+ "Use: invarlock report --run <run_dir> --format report --baseline <baseline_run_dir>",
178
208
  )
179
209
  raise typer.Exit(1)
180
210
  _event(
181
211
  "EXEC",
182
- "Generating evaluation certificate with baseline comparison",
212
+ "Generating evaluation report with baseline comparison",
183
213
  emoji="📜",
184
214
  )
185
215
 
@@ -197,31 +227,54 @@ def _generate_reports(
197
227
  # Show results
198
228
  _event("PASS", "Reports generated successfully.", emoji="✅")
199
229
 
200
- if "cert" in formats and baseline_report:
230
+ if "report" in formats and baseline_report:
201
231
  try:
202
- certificate = certificate_lib.make_certificate(
232
+ evaluation_report = report_builder.make_report(
203
233
  primary_report, baseline_report
204
234
  )
205
- certificate_lib.validate_certificate(certificate)
235
+ report_builder.validate_report(evaluation_report)
206
236
  from invarlock.reporting.render import (
207
237
  compute_console_validation_block as _console_block,
208
238
  )
209
239
 
210
- block = _console_block(certificate)
240
+ block = _console_block(evaluation_report)
211
241
  overall_pass = bool(block.get("overall_pass"))
212
242
  status_text = _format_status(overall_pass)
213
243
 
214
244
  console.print("")
215
- _print_section_header(console, "CERTIFICATE SUMMARY")
245
+ summary_suffix: str | None = None
246
+ if summary_report_start is not None:
247
+ try:
248
+ base = (
249
+ float(summary_baseline_seconds)
250
+ if summary_baseline_seconds is not None
251
+ else 0.0
252
+ )
253
+ subject = (
254
+ float(summary_subject_seconds)
255
+ if summary_subject_seconds is not None
256
+ else 0.0
257
+ )
258
+ report_elapsed = max(
259
+ 0.0, float(perf_counter() - float(summary_report_start))
260
+ )
261
+ summary_suffix = f"[{(base + subject + report_elapsed):.2f}s]"
262
+ except Exception:
263
+ summary_suffix = None
264
+ _print_section_header(
265
+ console,
266
+ "EVALUATION REPORT SUMMARY",
267
+ suffix=summary_suffix,
268
+ )
216
269
  console.print(_format_kv_line("Status", status_text))
217
270
 
218
- schema_version = certificate.get("schema_version")
271
+ schema_version = evaluation_report.get("schema_version")
219
272
  if schema_version:
220
273
  console.print(
221
274
  _format_kv_line("Schema Version", str(schema_version))
222
275
  )
223
276
 
224
- run_id = certificate.get("run_id") or (
277
+ run_id = evaluation_report.get("run_id") or (
225
278
  (primary_report.get("meta", {}) or {}).get("run_id")
226
279
  )
227
280
  if run_id:
@@ -234,7 +287,15 @@ def _generate_reports(
234
287
  if edit_name:
235
288
  console.print(_format_kv_line("Edit", str(edit_name)))
236
289
 
237
- pm = (primary_report.get("metrics", {}) or {}).get("primary_metric", {})
290
+ pm = (
291
+ (evaluation_report.get("primary_metric") or {})
292
+ if isinstance(evaluation_report, dict)
293
+ else {}
294
+ )
295
+ if not pm:
296
+ pm = (primary_report.get("metrics", {}) or {}).get(
297
+ "primary_metric", {}
298
+ )
238
299
  console.print(" PRIMARY METRIC")
239
300
  pm_entries: list[tuple[str, str]] = []
240
301
  if isinstance(pm, dict) and pm:
@@ -250,8 +311,9 @@ def _generate_reports(
250
311
  if ratio is not None:
251
312
  pm_entries.append(("Ratio", _fmt_metric_value(ratio)))
252
313
  dci = pm.get("display_ci")
253
- if dci is not None:
254
- pm_entries.append(("CI", _fmt_ci_range(dci)))
314
+ ci_95 = _fmt_ci_95(dci)
315
+ if ci_95 is not None:
316
+ pm_entries.append(("CI (95%)", ci_95))
255
317
  if not pm_entries:
256
318
  pm_entries.append(("Status", "Unavailable"))
257
319
  for idx, (label, value) in enumerate(pm_entries):
@@ -275,17 +337,21 @@ def _generate_reports(
275
337
 
276
338
  console.print(" ARTIFACTS")
277
339
  entries = _artifact_entries(saved_files, str(output_dir))
340
+ artifact_label_width = max(
341
+ ARTIFACT_LABEL_WIDTH,
342
+ max((len(label) for label, _ in entries), default=0),
343
+ )
278
344
  for idx, (label, value) in enumerate(entries):
279
345
  branch = "└─" if idx == len(entries) - 1 else "├─"
280
- console.print(f" {branch} {label:<{ARTIFACT_LABEL_WIDTH}} {value}")
346
+ console.print(f" {branch} {label:<{artifact_label_width}} {value}")
281
347
  console.print("═" * SECTION_WIDTH)
282
348
 
283
349
  # In CLI report flow, do not hard-exit on validation failure; just display status.
284
350
  # CI gating should be handled by dedicated verify commands.
285
351
 
286
352
  except Exception as e:
287
- _event("WARN", f"Certificate validation error: {e}", emoji="⚠️")
288
- # Exit non-zero on certificate generation error
353
+ _event("WARN", f"Evaluation report validation error: {e}", emoji="⚠️")
354
+ # Exit non-zero on evaluation report generation error
289
355
  raise typer.Exit(1) from e
290
356
  else:
291
357
  console.print(_format_kv_line("Output", str(output_dir)))
@@ -318,7 +384,7 @@ def report_callback(
318
384
  None, "--run", help="Path to run directory or RunReport JSON"
319
385
  ),
320
386
  format: str = typer.Option(
321
- "json", "--format", help="Output format (json|md|html|cert|all)"
387
+ "json", "--format", help="Output format (json|md|html|report|all)"
322
388
  ),
323
389
  compare: str | None = typer.Option(
324
390
  None, "--compare", help="Path to second run for comparison"
@@ -326,7 +392,7 @@ def report_callback(
326
392
  baseline: str | None = typer.Option(
327
393
  None,
328
394
  "--baseline",
329
- help="Path to baseline run for certificate generation (required for cert format)",
395
+ help="Path to baseline run for evaluation report generation (required for report format)",
330
396
  ),
331
397
  output: str | None = typer.Option(None, "--output", "-o", help="Output directory"),
332
398
  style: str = typer.Option("audit", "--style", help="Output style (audit|friendly)"),
@@ -372,6 +438,9 @@ def report_command(
372
438
  output: str | None = None,
373
439
  style: str = "audit",
374
440
  no_color: bool = False,
441
+ summary_baseline_seconds: float | None = None,
442
+ summary_subject_seconds: float | None = None,
443
+ summary_report_start: float | None = None,
375
444
  ):
376
445
  return _generate_reports(
377
446
  run=run,
@@ -381,6 +450,9 @@ def report_command(
381
450
  output=output,
382
451
  style=style,
383
452
  no_color=no_color,
453
+ summary_baseline_seconds=summary_baseline_seconds,
454
+ summary_subject_seconds=summary_subject_seconds,
455
+ summary_report_start=summary_report_start,
384
456
  )
385
457
 
386
458
 
@@ -407,16 +479,16 @@ def _load_run_report(path: str) -> dict:
407
479
 
408
480
  # Subcommands wired from existing modules
409
481
  @report_app.command(
410
- name="verify", help="Recompute and verify metrics for a report/cert."
482
+ name="verify", help="Recompute and verify metrics for evaluation reports."
411
483
  )
412
484
  def report_verify_command(
413
- certificates: list[str] = typer.Argument(
414
- ..., help="One or more certificate JSON files to verify."
485
+ reports: list[str] = typer.Argument(
486
+ ..., help="One or more evaluation report JSON files to verify."
415
487
  ),
416
488
  baseline: str | None = typer.Option(
417
489
  None,
418
490
  "--baseline",
419
- help="Optional baseline certificate/report JSON to enforce provider parity.",
491
+ help="Optional baseline evaluation report JSON to enforce provider parity.",
420
492
  ),
421
493
  tolerance: float = typer.Option(
422
494
  1e-9, "--tolerance", help="Tolerance for analysis-basis comparisons."
@@ -431,10 +503,10 @@ def report_verify_command(
431
503
 
432
504
  from .verify import verify_command as _verify_command
433
505
 
434
- cert_paths = [_Path(c) for c in certificates]
506
+ report_paths = [_Path(p) for p in reports]
435
507
  baseline_path = _Path(baseline) if isinstance(baseline, str) else None
436
508
  return _verify_command(
437
- certificates=cert_paths,
509
+ reports=report_paths,
438
510
  baseline=baseline_path,
439
511
  tolerance=tolerance,
440
512
  profile=profile,
@@ -442,7 +514,7 @@ def report_verify_command(
442
514
 
443
515
 
444
516
  @report_app.command(
445
- name="explain", help="Explain certificate gates for report vs baseline."
517
+ name="explain", help="Explain evaluation report gates for report vs baseline."
446
518
  )
447
519
  def report_explain(
448
520
  report: str = typer.Option(..., "--report", help="Path to primary report.json"),
@@ -450,15 +522,17 @@ def report_explain(
450
522
  ..., "--baseline", help="Path to baseline report.json"
451
523
  ),
452
524
  ): # pragma: no cover - thin wrapper
453
- """Explain certificate gates for a report vs baseline."""
525
+ """Explain evaluation report gates for a report vs baseline."""
454
526
  from .explain_gates import explain_gates_command as _explain
455
527
 
456
528
  return _explain(report=report, baseline=baseline)
457
529
 
458
530
 
459
- @report_app.command(name="html", help="Render a certificate JSON to HTML.")
531
+ @report_app.command(name="html", help="Render an evaluation report JSON to HTML.")
460
532
  def report_html(
461
- input: str = typer.Option(..., "--input", "-i", help="Path to certificate JSON"),
533
+ input: str = typer.Option(
534
+ ..., "--input", "-i", help="Path to evaluation report JSON"
535
+ ),
462
536
  output: str = typer.Option(..., "--output", "-o", help="Path to output HTML file"),
463
537
  embed_css: bool = typer.Option(
464
538
  True, "--embed-css/--no-embed-css", help="Inline a minimal static stylesheet"
@@ -475,10 +549,10 @@ def report_html(
475
549
  @report_app.command("validate")
476
550
  def report_validate(
477
551
  report: str = typer.Argument(
478
- ..., help="Path to certificate JSON to validate against schema v1"
552
+ ..., help="Path to evaluation report JSON to validate against schema v1"
479
553
  ),
480
554
  ):
481
- """Validate a certificate JSON against the current schema (v1)."""
555
+ """Validate an evaluation report JSON against the current schema (v1)."""
482
556
  output_style = resolve_output_style(
483
557
  style="audit",
484
558
  profile="ci",
@@ -498,15 +572,15 @@ def report_validate(
498
572
  raise typer.Exit(1) from exc
499
573
 
500
574
  try:
501
- from invarlock.reporting.certificate import validate_certificate
575
+ from invarlock.reporting.report_builder import validate_report
502
576
 
503
- ok = validate_certificate(payload)
577
+ ok = validate_report(payload)
504
578
  if not ok:
505
- _event("FAIL", "Certificate schema validation failed", emoji="❌")
579
+ _event("FAIL", "Evaluation report schema validation failed", emoji="❌")
506
580
  raise typer.Exit(2)
507
- _event("PASS", "Certificate schema is valid", emoji="✅")
581
+ _event("PASS", "Evaluation report schema is valid", emoji="✅")
508
582
  except ValueError as exc:
509
- _event("FAIL", f"Certificate validation error: {exc}", emoji="❌")
583
+ _event("FAIL", f"Evaluation report validation error: {exc}", emoji="❌")
510
584
  raise typer.Exit(2) from exc
511
585
  except typer.Exit:
512
586
  raise