invarlock 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +3 -3
- invarlock/adapters/auto.py +2 -10
- invarlock/adapters/hf_loading.py +7 -7
- invarlock/adapters/hf_mixin.py +28 -5
- invarlock/assurance/__init__.py +15 -23
- invarlock/calibration/spectral_null.py +1 -1
- invarlock/cli/adapter_auto.py +1 -5
- invarlock/cli/app.py +57 -27
- invarlock/cli/commands/__init__.py +2 -2
- invarlock/cli/commands/calibrate.py +48 -4
- invarlock/cli/commands/{certify.py → evaluate.py} +69 -46
- invarlock/cli/commands/explain_gates.py +94 -51
- invarlock/cli/commands/export_html.py +11 -9
- invarlock/cli/commands/report.py +121 -47
- invarlock/cli/commands/run.py +274 -66
- invarlock/cli/commands/verify.py +84 -89
- invarlock/cli/determinism.py +1 -1
- invarlock/cli/provenance.py +3 -3
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/retry.py +14 -14
- invarlock/core/runner.py +1 -1
- invarlock/edits/noop.py +2 -2
- invarlock/edits/quant_rtn.py +2 -2
- invarlock/eval/__init__.py +1 -1
- invarlock/eval/bench.py +11 -7
- invarlock/eval/primary_metric.py +1 -1
- invarlock/guards/spectral.py +2 -2
- invarlock/guards_ref/spectral_ref.py +1 -1
- invarlock/model_profile.py +16 -35
- invarlock/observability/health.py +38 -20
- invarlock/plugins/hf_bnb_adapter.py +32 -21
- invarlock/reporting/__init__.py +18 -4
- invarlock/reporting/html.py +7 -7
- invarlock/reporting/normalizer.py +2 -2
- invarlock/reporting/policy_utils.py +1 -1
- invarlock/reporting/primary_metric_utils.py +11 -11
- invarlock/reporting/render.py +126 -120
- invarlock/reporting/report.py +43 -37
- invarlock/reporting/{certificate.py → report_builder.py} +103 -99
- invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
- invarlock-0.3.9.dist-info/METADATA +303 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/RECORD +46 -46
- {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/WHEEL +1 -1
- invarlock-0.3.7.dist-info/METADATA +0 -602
- {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/entry_points.txt +0 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/top_level.txt +0 -0
invarlock/cli/commands/report.py
CHANGED
|
@@ -10,14 +10,15 @@ Provides the `invarlock report` group with:
|
|
|
10
10
|
import json
|
|
11
11
|
import math
|
|
12
12
|
from pathlib import Path
|
|
13
|
+
from time import perf_counter
|
|
13
14
|
from typing import Any
|
|
14
15
|
|
|
15
16
|
import typer
|
|
16
17
|
from rich.console import Console
|
|
17
18
|
|
|
18
19
|
from invarlock.cli.output import print_event, resolve_output_style
|
|
19
|
-
from invarlock.reporting import certificate as certificate_lib
|
|
20
20
|
from invarlock.reporting import report as report_lib
|
|
21
|
+
from invarlock.reporting import report_builder as report_builder
|
|
21
22
|
|
|
22
23
|
console = Console()
|
|
23
24
|
|
|
@@ -27,10 +28,22 @@ GATE_LABEL_WIDTH = 32
|
|
|
27
28
|
ARTIFACT_LABEL_WIDTH = 18
|
|
28
29
|
|
|
29
30
|
|
|
30
|
-
def
|
|
31
|
+
def _format_section_title(title: str, *, suffix: str | None = None) -> str:
|
|
32
|
+
if not suffix:
|
|
33
|
+
return title
|
|
34
|
+
combined = f"{title} {suffix}"
|
|
35
|
+
if len(combined) > SECTION_WIDTH:
|
|
36
|
+
return combined
|
|
37
|
+
pad = max(1, SECTION_WIDTH - len(title) - len(suffix))
|
|
38
|
+
return f"{title}{' ' * pad}{suffix}"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _print_section_header(
|
|
42
|
+
console: Console, title: str, *, suffix: str | None = None
|
|
43
|
+
) -> None:
|
|
31
44
|
bar = "═" * SECTION_WIDTH
|
|
32
45
|
console.print(bar)
|
|
33
|
-
console.print(title)
|
|
46
|
+
console.print(_format_section_title(title, suffix=suffix))
|
|
34
47
|
console.print(bar)
|
|
35
48
|
|
|
36
49
|
|
|
@@ -52,24 +65,24 @@ def _fmt_metric_value(value: Any) -> str:
|
|
|
52
65
|
return f"{val:.3f}"
|
|
53
66
|
|
|
54
67
|
|
|
55
|
-
def
|
|
68
|
+
def _fmt_ci_95(ci: Any) -> str | None:
|
|
56
69
|
if isinstance(ci, (list, tuple)) and len(ci) == 2:
|
|
57
70
|
try:
|
|
58
71
|
lo = float(ci[0])
|
|
59
72
|
hi = float(ci[1])
|
|
60
73
|
except (TypeError, ValueError):
|
|
61
|
-
return
|
|
74
|
+
return None
|
|
62
75
|
if math.isfinite(lo) and math.isfinite(hi):
|
|
63
|
-
return f"{lo:.3f}
|
|
64
|
-
return
|
|
76
|
+
return f"[{lo:.3f}, {hi:.3f}]"
|
|
77
|
+
return None
|
|
65
78
|
|
|
66
79
|
|
|
67
80
|
def _artifact_entries(
|
|
68
81
|
saved_files: dict[str, str], output_dir: str
|
|
69
82
|
) -> list[tuple[str, str]]:
|
|
70
83
|
order = [
|
|
71
|
-
("
|
|
72
|
-
("
|
|
84
|
+
("report", "Evaluation Report (JSON)"),
|
|
85
|
+
("report_md", "Evaluation Report (MD)"),
|
|
73
86
|
("json", "JSON"),
|
|
74
87
|
("markdown", "Markdown"),
|
|
75
88
|
("html", "HTML"),
|
|
@@ -89,7 +102,7 @@ def _artifact_entries(
|
|
|
89
102
|
|
|
90
103
|
# Group with callback so `invarlock report` still generates reports
|
|
91
104
|
report_app = typer.Typer(
|
|
92
|
-
help="Operations on reports and
|
|
105
|
+
help="Operations on run reports and evaluation reports (verify, explain, html, validate).",
|
|
93
106
|
invoke_without_command=True,
|
|
94
107
|
)
|
|
95
108
|
|
|
@@ -103,6 +116,9 @@ def _generate_reports(
|
|
|
103
116
|
output: str | None = None,
|
|
104
117
|
style: str = "audit",
|
|
105
118
|
no_color: bool = False,
|
|
119
|
+
summary_baseline_seconds: float | None = None,
|
|
120
|
+
summary_subject_seconds: float | None = None,
|
|
121
|
+
summary_report_start: float | None = None,
|
|
106
122
|
) -> None:
|
|
107
123
|
# This callback runs only when invoked without subcommand (default Click behavior)
|
|
108
124
|
try:
|
|
@@ -127,6 +143,9 @@ def _generate_reports(
|
|
|
127
143
|
output = _coerce_option(output)
|
|
128
144
|
style = _coerce_option(style, "audit")
|
|
129
145
|
no_color = bool(_coerce_option(no_color, False))
|
|
146
|
+
summary_baseline_seconds = _coerce_option(summary_baseline_seconds)
|
|
147
|
+
summary_subject_seconds = _coerce_option(summary_subject_seconds)
|
|
148
|
+
summary_report_start = _coerce_option(summary_report_start)
|
|
130
149
|
|
|
131
150
|
output_style = resolve_output_style(
|
|
132
151
|
style=str(style),
|
|
@@ -163,23 +182,34 @@ def _generate_reports(
|
|
|
163
182
|
output_dir = output
|
|
164
183
|
|
|
165
184
|
# Determine formats
|
|
185
|
+
allowed_formats = {"json", "md", "markdown", "html", "report", "all"}
|
|
186
|
+
if format not in allowed_formats:
|
|
187
|
+
_event("FAIL", f"Unknown --format '{format}'", emoji="❌")
|
|
188
|
+
raise typer.Exit(2)
|
|
189
|
+
|
|
190
|
+
if format == "md":
|
|
191
|
+
format = "markdown"
|
|
166
192
|
if format == "all":
|
|
167
193
|
formats = ["json", "markdown", "html"]
|
|
168
194
|
else:
|
|
169
195
|
formats = [format]
|
|
170
196
|
|
|
171
|
-
# Validate
|
|
172
|
-
if "
|
|
197
|
+
# Validate evaluation report requirements
|
|
198
|
+
if "report" in formats:
|
|
173
199
|
if baseline_report is None:
|
|
174
|
-
_event(
|
|
200
|
+
_event(
|
|
201
|
+
"FAIL",
|
|
202
|
+
"Evaluation report format requires --baseline",
|
|
203
|
+
emoji="❌",
|
|
204
|
+
)
|
|
175
205
|
_event(
|
|
176
206
|
"INFO",
|
|
177
|
-
"Use: invarlock report --run <run_dir> --format
|
|
207
|
+
"Use: invarlock report --run <run_dir> --format report --baseline <baseline_run_dir>",
|
|
178
208
|
)
|
|
179
209
|
raise typer.Exit(1)
|
|
180
210
|
_event(
|
|
181
211
|
"EXEC",
|
|
182
|
-
"Generating evaluation
|
|
212
|
+
"Generating evaluation report with baseline comparison",
|
|
183
213
|
emoji="📜",
|
|
184
214
|
)
|
|
185
215
|
|
|
@@ -197,31 +227,54 @@ def _generate_reports(
|
|
|
197
227
|
# Show results
|
|
198
228
|
_event("PASS", "Reports generated successfully.", emoji="✅")
|
|
199
229
|
|
|
200
|
-
if "
|
|
230
|
+
if "report" in formats and baseline_report:
|
|
201
231
|
try:
|
|
202
|
-
|
|
232
|
+
evaluation_report = report_builder.make_report(
|
|
203
233
|
primary_report, baseline_report
|
|
204
234
|
)
|
|
205
|
-
|
|
235
|
+
report_builder.validate_report(evaluation_report)
|
|
206
236
|
from invarlock.reporting.render import (
|
|
207
237
|
compute_console_validation_block as _console_block,
|
|
208
238
|
)
|
|
209
239
|
|
|
210
|
-
block = _console_block(
|
|
240
|
+
block = _console_block(evaluation_report)
|
|
211
241
|
overall_pass = bool(block.get("overall_pass"))
|
|
212
242
|
status_text = _format_status(overall_pass)
|
|
213
243
|
|
|
214
244
|
console.print("")
|
|
215
|
-
|
|
245
|
+
summary_suffix: str | None = None
|
|
246
|
+
if summary_report_start is not None:
|
|
247
|
+
try:
|
|
248
|
+
base = (
|
|
249
|
+
float(summary_baseline_seconds)
|
|
250
|
+
if summary_baseline_seconds is not None
|
|
251
|
+
else 0.0
|
|
252
|
+
)
|
|
253
|
+
subject = (
|
|
254
|
+
float(summary_subject_seconds)
|
|
255
|
+
if summary_subject_seconds is not None
|
|
256
|
+
else 0.0
|
|
257
|
+
)
|
|
258
|
+
report_elapsed = max(
|
|
259
|
+
0.0, float(perf_counter() - float(summary_report_start))
|
|
260
|
+
)
|
|
261
|
+
summary_suffix = f"[{(base + subject + report_elapsed):.2f}s]"
|
|
262
|
+
except Exception:
|
|
263
|
+
summary_suffix = None
|
|
264
|
+
_print_section_header(
|
|
265
|
+
console,
|
|
266
|
+
"EVALUATION REPORT SUMMARY",
|
|
267
|
+
suffix=summary_suffix,
|
|
268
|
+
)
|
|
216
269
|
console.print(_format_kv_line("Status", status_text))
|
|
217
270
|
|
|
218
|
-
schema_version =
|
|
271
|
+
schema_version = evaluation_report.get("schema_version")
|
|
219
272
|
if schema_version:
|
|
220
273
|
console.print(
|
|
221
274
|
_format_kv_line("Schema Version", str(schema_version))
|
|
222
275
|
)
|
|
223
276
|
|
|
224
|
-
run_id =
|
|
277
|
+
run_id = evaluation_report.get("run_id") or (
|
|
225
278
|
(primary_report.get("meta", {}) or {}).get("run_id")
|
|
226
279
|
)
|
|
227
280
|
if run_id:
|
|
@@ -234,7 +287,15 @@ def _generate_reports(
|
|
|
234
287
|
if edit_name:
|
|
235
288
|
console.print(_format_kv_line("Edit", str(edit_name)))
|
|
236
289
|
|
|
237
|
-
pm = (
|
|
290
|
+
pm = (
|
|
291
|
+
(evaluation_report.get("primary_metric") or {})
|
|
292
|
+
if isinstance(evaluation_report, dict)
|
|
293
|
+
else {}
|
|
294
|
+
)
|
|
295
|
+
if not pm:
|
|
296
|
+
pm = (primary_report.get("metrics", {}) or {}).get(
|
|
297
|
+
"primary_metric", {}
|
|
298
|
+
)
|
|
238
299
|
console.print(" PRIMARY METRIC")
|
|
239
300
|
pm_entries: list[tuple[str, str]] = []
|
|
240
301
|
if isinstance(pm, dict) and pm:
|
|
@@ -250,8 +311,9 @@ def _generate_reports(
|
|
|
250
311
|
if ratio is not None:
|
|
251
312
|
pm_entries.append(("Ratio", _fmt_metric_value(ratio)))
|
|
252
313
|
dci = pm.get("display_ci")
|
|
253
|
-
|
|
254
|
-
|
|
314
|
+
ci_95 = _fmt_ci_95(dci)
|
|
315
|
+
if ci_95 is not None:
|
|
316
|
+
pm_entries.append(("CI (95%)", ci_95))
|
|
255
317
|
if not pm_entries:
|
|
256
318
|
pm_entries.append(("Status", "Unavailable"))
|
|
257
319
|
for idx, (label, value) in enumerate(pm_entries):
|
|
@@ -275,17 +337,21 @@ def _generate_reports(
|
|
|
275
337
|
|
|
276
338
|
console.print(" ARTIFACTS")
|
|
277
339
|
entries = _artifact_entries(saved_files, str(output_dir))
|
|
340
|
+
artifact_label_width = max(
|
|
341
|
+
ARTIFACT_LABEL_WIDTH,
|
|
342
|
+
max((len(label) for label, _ in entries), default=0),
|
|
343
|
+
)
|
|
278
344
|
for idx, (label, value) in enumerate(entries):
|
|
279
345
|
branch = "└─" if idx == len(entries) - 1 else "├─"
|
|
280
|
-
console.print(f" {branch} {label:<{
|
|
346
|
+
console.print(f" {branch} {label:<{artifact_label_width}} {value}")
|
|
281
347
|
console.print("═" * SECTION_WIDTH)
|
|
282
348
|
|
|
283
349
|
# In CLI report flow, do not hard-exit on validation failure; just display status.
|
|
284
350
|
# CI gating should be handled by dedicated verify commands.
|
|
285
351
|
|
|
286
352
|
except Exception as e:
|
|
287
|
-
_event("WARN", f"
|
|
288
|
-
# Exit non-zero on
|
|
353
|
+
_event("WARN", f"Evaluation report validation error: {e}", emoji="⚠️")
|
|
354
|
+
# Exit non-zero on evaluation report generation error
|
|
289
355
|
raise typer.Exit(1) from e
|
|
290
356
|
else:
|
|
291
357
|
console.print(_format_kv_line("Output", str(output_dir)))
|
|
@@ -318,7 +384,7 @@ def report_callback(
|
|
|
318
384
|
None, "--run", help="Path to run directory or RunReport JSON"
|
|
319
385
|
),
|
|
320
386
|
format: str = typer.Option(
|
|
321
|
-
"json", "--format", help="Output format (json|md|html|
|
|
387
|
+
"json", "--format", help="Output format (json|md|html|report|all)"
|
|
322
388
|
),
|
|
323
389
|
compare: str | None = typer.Option(
|
|
324
390
|
None, "--compare", help="Path to second run for comparison"
|
|
@@ -326,7 +392,7 @@ def report_callback(
|
|
|
326
392
|
baseline: str | None = typer.Option(
|
|
327
393
|
None,
|
|
328
394
|
"--baseline",
|
|
329
|
-
help="Path to baseline run for
|
|
395
|
+
help="Path to baseline run for evaluation report generation (required for report format)",
|
|
330
396
|
),
|
|
331
397
|
output: str | None = typer.Option(None, "--output", "-o", help="Output directory"),
|
|
332
398
|
style: str = typer.Option("audit", "--style", help="Output style (audit|friendly)"),
|
|
@@ -372,6 +438,9 @@ def report_command(
|
|
|
372
438
|
output: str | None = None,
|
|
373
439
|
style: str = "audit",
|
|
374
440
|
no_color: bool = False,
|
|
441
|
+
summary_baseline_seconds: float | None = None,
|
|
442
|
+
summary_subject_seconds: float | None = None,
|
|
443
|
+
summary_report_start: float | None = None,
|
|
375
444
|
):
|
|
376
445
|
return _generate_reports(
|
|
377
446
|
run=run,
|
|
@@ -381,6 +450,9 @@ def report_command(
|
|
|
381
450
|
output=output,
|
|
382
451
|
style=style,
|
|
383
452
|
no_color=no_color,
|
|
453
|
+
summary_baseline_seconds=summary_baseline_seconds,
|
|
454
|
+
summary_subject_seconds=summary_subject_seconds,
|
|
455
|
+
summary_report_start=summary_report_start,
|
|
384
456
|
)
|
|
385
457
|
|
|
386
458
|
|
|
@@ -407,16 +479,16 @@ def _load_run_report(path: str) -> dict:
|
|
|
407
479
|
|
|
408
480
|
# Subcommands wired from existing modules
|
|
409
481
|
@report_app.command(
|
|
410
|
-
name="verify", help="Recompute and verify metrics for
|
|
482
|
+
name="verify", help="Recompute and verify metrics for evaluation reports."
|
|
411
483
|
)
|
|
412
484
|
def report_verify_command(
|
|
413
|
-
|
|
414
|
-
..., help="One or more
|
|
485
|
+
reports: list[str] = typer.Argument(
|
|
486
|
+
..., help="One or more evaluation report JSON files to verify."
|
|
415
487
|
),
|
|
416
488
|
baseline: str | None = typer.Option(
|
|
417
489
|
None,
|
|
418
490
|
"--baseline",
|
|
419
|
-
help="Optional baseline
|
|
491
|
+
help="Optional baseline evaluation report JSON to enforce provider parity.",
|
|
420
492
|
),
|
|
421
493
|
tolerance: float = typer.Option(
|
|
422
494
|
1e-9, "--tolerance", help="Tolerance for analysis-basis comparisons."
|
|
@@ -431,10 +503,10 @@ def report_verify_command(
|
|
|
431
503
|
|
|
432
504
|
from .verify import verify_command as _verify_command
|
|
433
505
|
|
|
434
|
-
|
|
506
|
+
report_paths = [_Path(p) for p in reports]
|
|
435
507
|
baseline_path = _Path(baseline) if isinstance(baseline, str) else None
|
|
436
508
|
return _verify_command(
|
|
437
|
-
|
|
509
|
+
reports=report_paths,
|
|
438
510
|
baseline=baseline_path,
|
|
439
511
|
tolerance=tolerance,
|
|
440
512
|
profile=profile,
|
|
@@ -442,7 +514,7 @@ def report_verify_command(
|
|
|
442
514
|
|
|
443
515
|
|
|
444
516
|
@report_app.command(
|
|
445
|
-
name="explain", help="Explain
|
|
517
|
+
name="explain", help="Explain evaluation report gates for report vs baseline."
|
|
446
518
|
)
|
|
447
519
|
def report_explain(
|
|
448
520
|
report: str = typer.Option(..., "--report", help="Path to primary report.json"),
|
|
@@ -450,15 +522,17 @@ def report_explain(
|
|
|
450
522
|
..., "--baseline", help="Path to baseline report.json"
|
|
451
523
|
),
|
|
452
524
|
): # pragma: no cover - thin wrapper
|
|
453
|
-
"""Explain
|
|
525
|
+
"""Explain evaluation report gates for a report vs baseline."""
|
|
454
526
|
from .explain_gates import explain_gates_command as _explain
|
|
455
527
|
|
|
456
528
|
return _explain(report=report, baseline=baseline)
|
|
457
529
|
|
|
458
530
|
|
|
459
|
-
@report_app.command(name="html", help="Render
|
|
531
|
+
@report_app.command(name="html", help="Render an evaluation report JSON to HTML.")
|
|
460
532
|
def report_html(
|
|
461
|
-
input: str = typer.Option(
|
|
533
|
+
input: str = typer.Option(
|
|
534
|
+
..., "--input", "-i", help="Path to evaluation report JSON"
|
|
535
|
+
),
|
|
462
536
|
output: str = typer.Option(..., "--output", "-o", help="Path to output HTML file"),
|
|
463
537
|
embed_css: bool = typer.Option(
|
|
464
538
|
True, "--embed-css/--no-embed-css", help="Inline a minimal static stylesheet"
|
|
@@ -475,10 +549,10 @@ def report_html(
|
|
|
475
549
|
@report_app.command("validate")
|
|
476
550
|
def report_validate(
|
|
477
551
|
report: str = typer.Argument(
|
|
478
|
-
..., help="Path to
|
|
552
|
+
..., help="Path to evaluation report JSON to validate against schema v1"
|
|
479
553
|
),
|
|
480
554
|
):
|
|
481
|
-
"""Validate
|
|
555
|
+
"""Validate an evaluation report JSON against the current schema (v1)."""
|
|
482
556
|
output_style = resolve_output_style(
|
|
483
557
|
style="audit",
|
|
484
558
|
profile="ci",
|
|
@@ -498,15 +572,15 @@ def report_validate(
|
|
|
498
572
|
raise typer.Exit(1) from exc
|
|
499
573
|
|
|
500
574
|
try:
|
|
501
|
-
from invarlock.reporting.
|
|
575
|
+
from invarlock.reporting.report_builder import validate_report
|
|
502
576
|
|
|
503
|
-
ok =
|
|
577
|
+
ok = validate_report(payload)
|
|
504
578
|
if not ok:
|
|
505
|
-
_event("FAIL", "
|
|
579
|
+
_event("FAIL", "Evaluation report schema validation failed", emoji="❌")
|
|
506
580
|
raise typer.Exit(2)
|
|
507
|
-
_event("PASS", "
|
|
581
|
+
_event("PASS", "Evaluation report schema is valid", emoji="✅")
|
|
508
582
|
except ValueError as exc:
|
|
509
|
-
_event("FAIL", f"
|
|
583
|
+
_event("FAIL", f"Evaluation report validation error: {exc}", emoji="❌")
|
|
510
584
|
raise typer.Exit(2) from exc
|
|
511
585
|
except typer.Exit:
|
|
512
586
|
raise
|