invarlock 0.3.7__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +3 -3
- invarlock/adapters/auto.py +2 -10
- invarlock/adapters/hf_loading.py +7 -7
- invarlock/adapters/hf_mixin.py +28 -5
- invarlock/assurance/__init__.py +15 -23
- invarlock/cli/adapter_auto.py +1 -5
- invarlock/cli/app.py +57 -27
- invarlock/cli/commands/__init__.py +2 -2
- invarlock/cli/commands/calibrate.py +48 -4
- invarlock/cli/commands/{certify.py → evaluate.py} +69 -46
- invarlock/cli/commands/explain_gates.py +25 -17
- invarlock/cli/commands/export_html.py +11 -9
- invarlock/cli/commands/report.py +116 -46
- invarlock/cli/commands/run.py +274 -66
- invarlock/cli/commands/verify.py +84 -89
- invarlock/cli/determinism.py +1 -1
- invarlock/cli/provenance.py +3 -3
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/retry.py +14 -14
- invarlock/core/runner.py +1 -1
- invarlock/edits/noop.py +2 -2
- invarlock/edits/quant_rtn.py +2 -2
- invarlock/eval/__init__.py +1 -1
- invarlock/eval/bench.py +11 -7
- invarlock/eval/primary_metric.py +1 -1
- invarlock/guards/spectral.py +1 -1
- invarlock/model_profile.py +16 -35
- invarlock/plugins/hf_bnb_adapter.py +32 -21
- invarlock/reporting/__init__.py +18 -4
- invarlock/reporting/html.py +7 -7
- invarlock/reporting/normalizer.py +2 -2
- invarlock/reporting/policy_utils.py +1 -1
- invarlock/reporting/primary_metric_utils.py +11 -11
- invarlock/reporting/render.py +126 -120
- invarlock/reporting/report.py +43 -37
- invarlock/reporting/{certificate.py → report_builder.py} +98 -95
- invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
- invarlock-0.3.8.dist-info/METADATA +283 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/RECORD +43 -43
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
- invarlock-0.3.7.dist-info/METADATA +0 -602
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +0 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
invarlock/cli/commands/report.py
CHANGED
|
@@ -10,14 +10,15 @@ Provides the `invarlock report` group with:
|
|
|
10
10
|
import json
|
|
11
11
|
import math
|
|
12
12
|
from pathlib import Path
|
|
13
|
+
from time import perf_counter
|
|
13
14
|
from typing import Any
|
|
14
15
|
|
|
15
16
|
import typer
|
|
16
17
|
from rich.console import Console
|
|
17
18
|
|
|
18
19
|
from invarlock.cli.output import print_event, resolve_output_style
|
|
19
|
-
from invarlock.reporting import certificate as certificate_lib
|
|
20
20
|
from invarlock.reporting import report as report_lib
|
|
21
|
+
from invarlock.reporting import report_builder as report_builder
|
|
21
22
|
|
|
22
23
|
console = Console()
|
|
23
24
|
|
|
@@ -27,10 +28,22 @@ GATE_LABEL_WIDTH = 32
|
|
|
27
28
|
ARTIFACT_LABEL_WIDTH = 18
|
|
28
29
|
|
|
29
30
|
|
|
30
|
-
def
|
|
31
|
+
def _format_section_title(title: str, *, suffix: str | None = None) -> str:
|
|
32
|
+
if not suffix:
|
|
33
|
+
return title
|
|
34
|
+
combined = f"{title} {suffix}"
|
|
35
|
+
if len(combined) > SECTION_WIDTH:
|
|
36
|
+
return combined
|
|
37
|
+
pad = max(1, SECTION_WIDTH - len(title) - len(suffix))
|
|
38
|
+
return f"{title}{' ' * pad}{suffix}"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _print_section_header(
|
|
42
|
+
console: Console, title: str, *, suffix: str | None = None
|
|
43
|
+
) -> None:
|
|
31
44
|
bar = "═" * SECTION_WIDTH
|
|
32
45
|
console.print(bar)
|
|
33
|
-
console.print(title)
|
|
46
|
+
console.print(_format_section_title(title, suffix=suffix))
|
|
34
47
|
console.print(bar)
|
|
35
48
|
|
|
36
49
|
|
|
@@ -52,24 +65,24 @@ def _fmt_metric_value(value: Any) -> str:
|
|
|
52
65
|
return f"{val:.3f}"
|
|
53
66
|
|
|
54
67
|
|
|
55
|
-
def
|
|
68
|
+
def _fmt_ci_95(ci: Any) -> str | None:
|
|
56
69
|
if isinstance(ci, (list, tuple)) and len(ci) == 2:
|
|
57
70
|
try:
|
|
58
71
|
lo = float(ci[0])
|
|
59
72
|
hi = float(ci[1])
|
|
60
73
|
except (TypeError, ValueError):
|
|
61
|
-
return
|
|
74
|
+
return None
|
|
62
75
|
if math.isfinite(lo) and math.isfinite(hi):
|
|
63
|
-
return f"{lo:.3f}
|
|
64
|
-
return
|
|
76
|
+
return f"[{lo:.3f}, {hi:.3f}]"
|
|
77
|
+
return None
|
|
65
78
|
|
|
66
79
|
|
|
67
80
|
def _artifact_entries(
|
|
68
81
|
saved_files: dict[str, str], output_dir: str
|
|
69
82
|
) -> list[tuple[str, str]]:
|
|
70
83
|
order = [
|
|
71
|
-
("
|
|
72
|
-
("
|
|
84
|
+
("report", "Evaluation Report (JSON)"),
|
|
85
|
+
("report_md", "Evaluation Report (MD)"),
|
|
73
86
|
("json", "JSON"),
|
|
74
87
|
("markdown", "Markdown"),
|
|
75
88
|
("html", "HTML"),
|
|
@@ -89,7 +102,7 @@ def _artifact_entries(
|
|
|
89
102
|
|
|
90
103
|
# Group with callback so `invarlock report` still generates reports
|
|
91
104
|
report_app = typer.Typer(
|
|
92
|
-
help="Operations on reports and
|
|
105
|
+
help="Operations on run reports and evaluation reports (verify, explain, html, validate).",
|
|
93
106
|
invoke_without_command=True,
|
|
94
107
|
)
|
|
95
108
|
|
|
@@ -103,6 +116,9 @@ def _generate_reports(
|
|
|
103
116
|
output: str | None = None,
|
|
104
117
|
style: str = "audit",
|
|
105
118
|
no_color: bool = False,
|
|
119
|
+
summary_baseline_seconds: float | None = None,
|
|
120
|
+
summary_subject_seconds: float | None = None,
|
|
121
|
+
summary_report_start: float | None = None,
|
|
106
122
|
) -> None:
|
|
107
123
|
# This callback runs only when invoked without subcommand (default Click behavior)
|
|
108
124
|
try:
|
|
@@ -127,6 +143,9 @@ def _generate_reports(
|
|
|
127
143
|
output = _coerce_option(output)
|
|
128
144
|
style = _coerce_option(style, "audit")
|
|
129
145
|
no_color = bool(_coerce_option(no_color, False))
|
|
146
|
+
summary_baseline_seconds = _coerce_option(summary_baseline_seconds)
|
|
147
|
+
summary_subject_seconds = _coerce_option(summary_subject_seconds)
|
|
148
|
+
summary_report_start = _coerce_option(summary_report_start)
|
|
130
149
|
|
|
131
150
|
output_style = resolve_output_style(
|
|
132
151
|
style=str(style),
|
|
@@ -163,23 +182,34 @@ def _generate_reports(
|
|
|
163
182
|
output_dir = output
|
|
164
183
|
|
|
165
184
|
# Determine formats
|
|
185
|
+
allowed_formats = {"json", "md", "markdown", "html", "report", "all"}
|
|
186
|
+
if format not in allowed_formats:
|
|
187
|
+
_event("FAIL", f"Unknown --format '{format}'", emoji="❌")
|
|
188
|
+
raise typer.Exit(2)
|
|
189
|
+
|
|
190
|
+
if format == "md":
|
|
191
|
+
format = "markdown"
|
|
166
192
|
if format == "all":
|
|
167
193
|
formats = ["json", "markdown", "html"]
|
|
168
194
|
else:
|
|
169
195
|
formats = [format]
|
|
170
196
|
|
|
171
|
-
# Validate
|
|
172
|
-
if "
|
|
197
|
+
# Validate evaluation report requirements
|
|
198
|
+
if "report" in formats:
|
|
173
199
|
if baseline_report is None:
|
|
174
|
-
_event(
|
|
200
|
+
_event(
|
|
201
|
+
"FAIL",
|
|
202
|
+
"Evaluation report format requires --baseline",
|
|
203
|
+
emoji="❌",
|
|
204
|
+
)
|
|
175
205
|
_event(
|
|
176
206
|
"INFO",
|
|
177
|
-
"Use: invarlock report --run <run_dir> --format
|
|
207
|
+
"Use: invarlock report --run <run_dir> --format report --baseline <baseline_run_dir>",
|
|
178
208
|
)
|
|
179
209
|
raise typer.Exit(1)
|
|
180
210
|
_event(
|
|
181
211
|
"EXEC",
|
|
182
|
-
"Generating evaluation
|
|
212
|
+
"Generating evaluation report with baseline comparison",
|
|
183
213
|
emoji="📜",
|
|
184
214
|
)
|
|
185
215
|
|
|
@@ -197,31 +227,54 @@ def _generate_reports(
|
|
|
197
227
|
# Show results
|
|
198
228
|
_event("PASS", "Reports generated successfully.", emoji="✅")
|
|
199
229
|
|
|
200
|
-
if "
|
|
230
|
+
if "report" in formats and baseline_report:
|
|
201
231
|
try:
|
|
202
|
-
|
|
232
|
+
evaluation_report = report_builder.make_report(
|
|
203
233
|
primary_report, baseline_report
|
|
204
234
|
)
|
|
205
|
-
|
|
235
|
+
report_builder.validate_report(evaluation_report)
|
|
206
236
|
from invarlock.reporting.render import (
|
|
207
237
|
compute_console_validation_block as _console_block,
|
|
208
238
|
)
|
|
209
239
|
|
|
210
|
-
block = _console_block(
|
|
240
|
+
block = _console_block(evaluation_report)
|
|
211
241
|
overall_pass = bool(block.get("overall_pass"))
|
|
212
242
|
status_text = _format_status(overall_pass)
|
|
213
243
|
|
|
214
244
|
console.print("")
|
|
215
|
-
|
|
245
|
+
summary_suffix: str | None = None
|
|
246
|
+
if summary_report_start is not None:
|
|
247
|
+
try:
|
|
248
|
+
base = (
|
|
249
|
+
float(summary_baseline_seconds)
|
|
250
|
+
if summary_baseline_seconds is not None
|
|
251
|
+
else 0.0
|
|
252
|
+
)
|
|
253
|
+
subject = (
|
|
254
|
+
float(summary_subject_seconds)
|
|
255
|
+
if summary_subject_seconds is not None
|
|
256
|
+
else 0.0
|
|
257
|
+
)
|
|
258
|
+
report_elapsed = max(
|
|
259
|
+
0.0, float(perf_counter() - float(summary_report_start))
|
|
260
|
+
)
|
|
261
|
+
summary_suffix = f"[{(base + subject + report_elapsed):.2f}s]"
|
|
262
|
+
except Exception:
|
|
263
|
+
summary_suffix = None
|
|
264
|
+
_print_section_header(
|
|
265
|
+
console,
|
|
266
|
+
"EVALUATION REPORT SUMMARY",
|
|
267
|
+
suffix=summary_suffix,
|
|
268
|
+
)
|
|
216
269
|
console.print(_format_kv_line("Status", status_text))
|
|
217
270
|
|
|
218
|
-
schema_version =
|
|
271
|
+
schema_version = evaluation_report.get("schema_version")
|
|
219
272
|
if schema_version:
|
|
220
273
|
console.print(
|
|
221
274
|
_format_kv_line("Schema Version", str(schema_version))
|
|
222
275
|
)
|
|
223
276
|
|
|
224
|
-
run_id =
|
|
277
|
+
run_id = evaluation_report.get("run_id") or (
|
|
225
278
|
(primary_report.get("meta", {}) or {}).get("run_id")
|
|
226
279
|
)
|
|
227
280
|
if run_id:
|
|
@@ -234,7 +287,15 @@ def _generate_reports(
|
|
|
234
287
|
if edit_name:
|
|
235
288
|
console.print(_format_kv_line("Edit", str(edit_name)))
|
|
236
289
|
|
|
237
|
-
pm = (
|
|
290
|
+
pm = (
|
|
291
|
+
(evaluation_report.get("primary_metric") or {})
|
|
292
|
+
if isinstance(evaluation_report, dict)
|
|
293
|
+
else {}
|
|
294
|
+
)
|
|
295
|
+
if not pm:
|
|
296
|
+
pm = (primary_report.get("metrics", {}) or {}).get(
|
|
297
|
+
"primary_metric", {}
|
|
298
|
+
)
|
|
238
299
|
console.print(" PRIMARY METRIC")
|
|
239
300
|
pm_entries: list[tuple[str, str]] = []
|
|
240
301
|
if isinstance(pm, dict) and pm:
|
|
@@ -250,8 +311,9 @@ def _generate_reports(
|
|
|
250
311
|
if ratio is not None:
|
|
251
312
|
pm_entries.append(("Ratio", _fmt_metric_value(ratio)))
|
|
252
313
|
dci = pm.get("display_ci")
|
|
253
|
-
|
|
254
|
-
|
|
314
|
+
ci_95 = _fmt_ci_95(dci)
|
|
315
|
+
if ci_95 is not None:
|
|
316
|
+
pm_entries.append(("CI (95%)", ci_95))
|
|
255
317
|
if not pm_entries:
|
|
256
318
|
pm_entries.append(("Status", "Unavailable"))
|
|
257
319
|
for idx, (label, value) in enumerate(pm_entries):
|
|
@@ -284,8 +346,8 @@ def _generate_reports(
|
|
|
284
346
|
# CI gating should be handled by dedicated verify commands.
|
|
285
347
|
|
|
286
348
|
except Exception as e:
|
|
287
|
-
_event("WARN", f"
|
|
288
|
-
# Exit non-zero on
|
|
349
|
+
_event("WARN", f"Evaluation report validation error: {e}", emoji="⚠️")
|
|
350
|
+
# Exit non-zero on evaluation report generation error
|
|
289
351
|
raise typer.Exit(1) from e
|
|
290
352
|
else:
|
|
291
353
|
console.print(_format_kv_line("Output", str(output_dir)))
|
|
@@ -318,7 +380,7 @@ def report_callback(
|
|
|
318
380
|
None, "--run", help="Path to run directory or RunReport JSON"
|
|
319
381
|
),
|
|
320
382
|
format: str = typer.Option(
|
|
321
|
-
"json", "--format", help="Output format (json|md|html|
|
|
383
|
+
"json", "--format", help="Output format (json|md|html|report|all)"
|
|
322
384
|
),
|
|
323
385
|
compare: str | None = typer.Option(
|
|
324
386
|
None, "--compare", help="Path to second run for comparison"
|
|
@@ -326,7 +388,7 @@ def report_callback(
|
|
|
326
388
|
baseline: str | None = typer.Option(
|
|
327
389
|
None,
|
|
328
390
|
"--baseline",
|
|
329
|
-
help="Path to baseline run for
|
|
391
|
+
help="Path to baseline run for evaluation report generation (required for report format)",
|
|
330
392
|
),
|
|
331
393
|
output: str | None = typer.Option(None, "--output", "-o", help="Output directory"),
|
|
332
394
|
style: str = typer.Option("audit", "--style", help="Output style (audit|friendly)"),
|
|
@@ -372,6 +434,9 @@ def report_command(
|
|
|
372
434
|
output: str | None = None,
|
|
373
435
|
style: str = "audit",
|
|
374
436
|
no_color: bool = False,
|
|
437
|
+
summary_baseline_seconds: float | None = None,
|
|
438
|
+
summary_subject_seconds: float | None = None,
|
|
439
|
+
summary_report_start: float | None = None,
|
|
375
440
|
):
|
|
376
441
|
return _generate_reports(
|
|
377
442
|
run=run,
|
|
@@ -381,6 +446,9 @@ def report_command(
|
|
|
381
446
|
output=output,
|
|
382
447
|
style=style,
|
|
383
448
|
no_color=no_color,
|
|
449
|
+
summary_baseline_seconds=summary_baseline_seconds,
|
|
450
|
+
summary_subject_seconds=summary_subject_seconds,
|
|
451
|
+
summary_report_start=summary_report_start,
|
|
384
452
|
)
|
|
385
453
|
|
|
386
454
|
|
|
@@ -407,16 +475,16 @@ def _load_run_report(path: str) -> dict:
|
|
|
407
475
|
|
|
408
476
|
# Subcommands wired from existing modules
|
|
409
477
|
@report_app.command(
|
|
410
|
-
name="verify", help="Recompute and verify metrics for
|
|
478
|
+
name="verify", help="Recompute and verify metrics for evaluation reports."
|
|
411
479
|
)
|
|
412
480
|
def report_verify_command(
|
|
413
|
-
|
|
414
|
-
..., help="One or more
|
|
481
|
+
reports: list[str] = typer.Argument(
|
|
482
|
+
..., help="One or more evaluation report JSON files to verify."
|
|
415
483
|
),
|
|
416
484
|
baseline: str | None = typer.Option(
|
|
417
485
|
None,
|
|
418
486
|
"--baseline",
|
|
419
|
-
help="Optional baseline
|
|
487
|
+
help="Optional baseline evaluation report JSON to enforce provider parity.",
|
|
420
488
|
),
|
|
421
489
|
tolerance: float = typer.Option(
|
|
422
490
|
1e-9, "--tolerance", help="Tolerance for analysis-basis comparisons."
|
|
@@ -431,10 +499,10 @@ def report_verify_command(
|
|
|
431
499
|
|
|
432
500
|
from .verify import verify_command as _verify_command
|
|
433
501
|
|
|
434
|
-
|
|
502
|
+
report_paths = [_Path(p) for p in reports]
|
|
435
503
|
baseline_path = _Path(baseline) if isinstance(baseline, str) else None
|
|
436
504
|
return _verify_command(
|
|
437
|
-
|
|
505
|
+
reports=report_paths,
|
|
438
506
|
baseline=baseline_path,
|
|
439
507
|
tolerance=tolerance,
|
|
440
508
|
profile=profile,
|
|
@@ -442,7 +510,7 @@ def report_verify_command(
|
|
|
442
510
|
|
|
443
511
|
|
|
444
512
|
@report_app.command(
|
|
445
|
-
name="explain", help="Explain
|
|
513
|
+
name="explain", help="Explain evaluation report gates for report vs baseline."
|
|
446
514
|
)
|
|
447
515
|
def report_explain(
|
|
448
516
|
report: str = typer.Option(..., "--report", help="Path to primary report.json"),
|
|
@@ -450,15 +518,17 @@ def report_explain(
|
|
|
450
518
|
..., "--baseline", help="Path to baseline report.json"
|
|
451
519
|
),
|
|
452
520
|
): # pragma: no cover - thin wrapper
|
|
453
|
-
"""Explain
|
|
521
|
+
"""Explain evaluation report gates for a report vs baseline."""
|
|
454
522
|
from .explain_gates import explain_gates_command as _explain
|
|
455
523
|
|
|
456
524
|
return _explain(report=report, baseline=baseline)
|
|
457
525
|
|
|
458
526
|
|
|
459
|
-
@report_app.command(name="html", help="Render
|
|
527
|
+
@report_app.command(name="html", help="Render an evaluation report JSON to HTML.")
|
|
460
528
|
def report_html(
|
|
461
|
-
input: str = typer.Option(
|
|
529
|
+
input: str = typer.Option(
|
|
530
|
+
..., "--input", "-i", help="Path to evaluation report JSON"
|
|
531
|
+
),
|
|
462
532
|
output: str = typer.Option(..., "--output", "-o", help="Path to output HTML file"),
|
|
463
533
|
embed_css: bool = typer.Option(
|
|
464
534
|
True, "--embed-css/--no-embed-css", help="Inline a minimal static stylesheet"
|
|
@@ -475,10 +545,10 @@ def report_html(
|
|
|
475
545
|
@report_app.command("validate")
|
|
476
546
|
def report_validate(
|
|
477
547
|
report: str = typer.Argument(
|
|
478
|
-
..., help="Path to
|
|
548
|
+
..., help="Path to evaluation report JSON to validate against schema v1"
|
|
479
549
|
),
|
|
480
550
|
):
|
|
481
|
-
"""Validate
|
|
551
|
+
"""Validate an evaluation report JSON against the current schema (v1)."""
|
|
482
552
|
output_style = resolve_output_style(
|
|
483
553
|
style="audit",
|
|
484
554
|
profile="ci",
|
|
@@ -498,15 +568,15 @@ def report_validate(
|
|
|
498
568
|
raise typer.Exit(1) from exc
|
|
499
569
|
|
|
500
570
|
try:
|
|
501
|
-
from invarlock.reporting.
|
|
571
|
+
from invarlock.reporting.report_builder import validate_report
|
|
502
572
|
|
|
503
|
-
ok =
|
|
573
|
+
ok = validate_report(payload)
|
|
504
574
|
if not ok:
|
|
505
|
-
_event("FAIL", "
|
|
575
|
+
_event("FAIL", "Evaluation report schema validation failed", emoji="❌")
|
|
506
576
|
raise typer.Exit(2)
|
|
507
|
-
_event("PASS", "
|
|
577
|
+
_event("PASS", "Evaluation report schema is valid", emoji="✅")
|
|
508
578
|
except ValueError as exc:
|
|
509
|
-
_event("FAIL", f"
|
|
579
|
+
_event("FAIL", f"Evaluation report validation error: {exc}", emoji="❌")
|
|
510
580
|
raise typer.Exit(2) from exc
|
|
511
581
|
except typer.Exit:
|
|
512
582
|
raise
|