invarlock 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. invarlock/__init__.py +2 -2
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +35 -40
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_mixin.py +25 -4
  8. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  9. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  10. invarlock/cli/adapter_auto.py +31 -21
  11. invarlock/cli/app.py +73 -2
  12. invarlock/cli/commands/certify.py +600 -59
  13. invarlock/cli/commands/doctor.py +8 -10
  14. invarlock/cli/commands/plugins.py +13 -9
  15. invarlock/cli/commands/report.py +233 -69
  16. invarlock/cli/commands/run.py +907 -183
  17. invarlock/cli/commands/verify.py +76 -11
  18. invarlock/cli/config.py +1 -1
  19. invarlock/cli/doctor_helpers.py +4 -5
  20. invarlock/cli/output.py +193 -0
  21. invarlock/cli/provenance.py +1 -1
  22. invarlock/core/bootstrap.py +1 -1
  23. invarlock/core/registry.py +9 -11
  24. invarlock/core/runner.py +111 -25
  25. invarlock/edits/quant_rtn.py +65 -37
  26. invarlock/eval/bench.py +3 -3
  27. invarlock/eval/data.py +68 -23
  28. invarlock/eval/metrics.py +59 -1
  29. invarlock/eval/tasks/__init__.py +12 -0
  30. invarlock/eval/tasks/classification.py +48 -0
  31. invarlock/eval/tasks/qa.py +36 -0
  32. invarlock/eval/tasks/text_generation.py +102 -0
  33. invarlock/guards/invariants.py +19 -10
  34. invarlock/guards/rmt.py +2 -2
  35. invarlock/guards/variance.py +2 -2
  36. invarlock/model_profile.py +48 -27
  37. invarlock/observability/health.py +6 -6
  38. invarlock/observability/metrics.py +108 -0
  39. invarlock/reporting/certificate.py +159 -9
  40. invarlock/reporting/certificate_schema.py +1 -1
  41. invarlock/reporting/guards_analysis.py +154 -4
  42. invarlock/reporting/html.py +55 -5
  43. invarlock/reporting/normalizer.py +7 -0
  44. invarlock/reporting/render.py +791 -431
  45. invarlock/reporting/report.py +39 -3
  46. invarlock/reporting/report_types.py +6 -1
  47. invarlock/reporting/telemetry.py +86 -0
  48. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/METADATA +23 -9
  49. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/RECORD +53 -48
  50. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
  51. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
  52. invarlock/adapters/hf_gpt2.py +0 -404
  53. invarlock/adapters/hf_llama.py +0 -487
  54. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
  55. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
@@ -14,14 +14,20 @@ Steps:
14
14
 
15
15
  from __future__ import annotations
16
16
 
17
+ import inspect
18
+ import io
17
19
  import json
18
20
  import math
21
+ from collections.abc import Iterator
22
+ from contextlib import contextmanager
19
23
  from pathlib import Path
20
- from typing import Any
24
+ from typing import Any, NoReturn
21
25
 
22
26
  import typer
23
27
  from rich.console import Console
24
28
 
29
+ from invarlock import __version__ as INVARLOCK_VERSION
30
+
25
31
  from ...core.exceptions import MetricsError
26
32
  from ..adapter_auto import resolve_auto_adapter
27
33
  from ..config import _deep_merge as _merge # reuse helper
@@ -32,9 +38,142 @@ from .run import _resolve_exit_code as _resolve_exit_code
32
38
 
33
39
  _LAZY_RUN_IMPORT = True
34
40
 
41
+ PHASE_BAR_WIDTH = 67
42
+ VERBOSITY_QUIET = 0
43
+ VERBOSITY_DEFAULT = 1
44
+ VERBOSITY_VERBOSE = 2
45
+
35
46
  console = Console()
36
47
 
37
48
 
49
+ def _render_banner_lines(title: str, context: str) -> list[str]:
50
+ width = max(len(title), len(context))
51
+ border = "─" * (width + 2)
52
+ return [
53
+ f"┌{border}┐",
54
+ f"│ {title.ljust(width)} │",
55
+ f"│ {context.ljust(width)} │",
56
+ f"└{border}┘",
57
+ ]
58
+
59
+
60
+ def _print_header_banner(
61
+ console: Console, *, version: str, profile: str, tier: str, adapter: str
62
+ ) -> None:
63
+ title = f"INVARLOCK v{version} · Certification Pipeline"
64
+ context = f"Profile: {profile} · Tier: {tier} · Adapter: {adapter}"
65
+ for line in _render_banner_lines(title, context):
66
+ console.print(line)
67
+
68
+
69
+ def _phase_title(index: int, total: int, title: str) -> str:
70
+ return f"PHASE {index}/{total} · {title}"
71
+
72
+
73
+ def _print_phase_header(console: Console, title: str) -> None:
74
+ bar_width = max(PHASE_BAR_WIDTH, len(title))
75
+ bar = "═" * bar_width
76
+ console.print(bar)
77
+ console.print(title)
78
+ console.print(bar)
79
+
80
+
81
+ def _format_ratio(value: Any) -> str:
82
+ try:
83
+ val = float(value)
84
+ except (TypeError, ValueError):
85
+ return "N/A"
86
+ if not math.isfinite(val):
87
+ return "N/A"
88
+ return f"{val:.3f}"
89
+
90
+
91
+ def _resolve_verbosity(quiet: bool, verbose: bool) -> int:
92
+ if quiet and verbose:
93
+ console.print("--quiet and --verbose are mutually exclusive")
94
+ raise typer.Exit(2)
95
+ if quiet:
96
+ return VERBOSITY_QUIET
97
+ if verbose:
98
+ return VERBOSITY_VERBOSE
99
+ return VERBOSITY_DEFAULT
100
+
101
+
102
+ @contextmanager
103
+ def _override_console(module: Any, new_console: Console) -> Iterator[None]:
104
+ original_console = getattr(module, "console", None)
105
+ module.console = new_console
106
+ try:
107
+ yield
108
+ finally:
109
+ module.console = original_console
110
+
111
+
112
+ @contextmanager
113
+ def _suppress_child_output(enabled: bool) -> Iterator[io.StringIO | None]:
114
+ if not enabled:
115
+ yield None
116
+ return
117
+ from . import report as report_mod
118
+ from . import run as run_mod
119
+
120
+ buffer = io.StringIO()
121
+ quiet_console = Console(file=buffer, force_terminal=False, color_system=None)
122
+ with (
123
+ _override_console(run_mod, quiet_console),
124
+ _override_console(report_mod, quiet_console),
125
+ ):
126
+ yield buffer
127
+
128
+
129
+ def _print_quiet_summary(
130
+ *,
131
+ cert_out: Path,
132
+ source: str,
133
+ edited: str,
134
+ profile: str,
135
+ ) -> None:
136
+ cert_path = cert_out / "evaluation.cert.json"
137
+ console.print(f"INVARLOCK v{INVARLOCK_VERSION} · CERTIFY")
138
+ console.print(f"Baseline: {source} -> Subject: {edited} · Profile: {profile}")
139
+ if not cert_path.exists():
140
+ console.print(f"Output: {cert_out}")
141
+ return
142
+ try:
143
+ with cert_path.open("r", encoding="utf-8") as fh:
144
+ certificate = json.load(fh)
145
+ except Exception:
146
+ console.print(f"Output: {cert_path}")
147
+ return
148
+ if not isinstance(certificate, dict):
149
+ console.print(f"Output: {cert_path}")
150
+ return
151
+ try:
152
+ from invarlock.reporting.render import (
153
+ compute_console_validation_block as _console_block,
154
+ )
155
+
156
+ block = _console_block(certificate)
157
+ rows = block.get("rows", [])
158
+ total = len(rows) if isinstance(rows, list) else 0
159
+ passed = (
160
+ sum(1 for row in rows if row.get("ok")) if isinstance(rows, list) else 0
161
+ )
162
+ status = "PASS" if block.get("overall_pass") else "FAIL"
163
+ except Exception:
164
+ total = 0
165
+ passed = 0
166
+ status = "UNKNOWN"
167
+ pm_ratio = _format_ratio(
168
+ (certificate.get("primary_metric") or {}).get("ratio_vs_baseline")
169
+ )
170
+ gate_summary = f"{passed}/{total} passed" if total else "N/A"
171
+ console.print(f"Status: {status} · Gates: {gate_summary}")
172
+ if pm_ratio != "N/A":
173
+ console.print(f"Primary metric ratio: {pm_ratio}")
174
+ console.print(f"Output: {cert_path}")
175
+
176
+
38
177
  def _latest_run_report(run_root: Path) -> Path | None:
39
178
  if not run_root.exists():
40
179
  return None
@@ -90,6 +229,14 @@ def certify_command(
90
229
  edited: str = typer.Option(
91
230
  ..., "--edited", "--subject", help="Subject model dir or Hub ID"
92
231
  ),
232
+ baseline_report: str | None = typer.Option(
233
+ None,
234
+ "--baseline-report",
235
+ help=(
236
+ "Reuse an existing baseline run report.json (skips baseline evaluation). "
237
+ "Must include stored evaluation windows (e.g., set INVARLOCK_STORE_EVAL_WINDOWS=1)."
238
+ ),
239
+ ),
93
240
  adapter: str = typer.Option(
94
241
  "auto", "--adapter", help="Adapter name or 'auto' to resolve"
95
242
  ),
@@ -117,6 +264,31 @@ def certify_command(
117
264
  edit_config: str | None = typer.Option(
118
265
  None, "--edit-config", help="Edit preset to apply a demo edit (quant_rtn)"
119
266
  ),
267
+ edit_label: str | None = typer.Option(
268
+ None,
269
+ "--edit-label",
270
+ help=(
271
+ "Edit algorithm label for BYOE models. Use 'noop' for baseline, "
272
+ "'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
273
+ ),
274
+ ),
275
+ quiet: bool = typer.Option(
276
+ False, "--quiet", "-q", help="Minimal output (suppress run/report detail)"
277
+ ),
278
+ verbose: bool = typer.Option(
279
+ False, "--verbose", "-v", help="Verbose output (include debug details)"
280
+ ),
281
+ banner: bool = typer.Option(
282
+ True, "--banner/--no-banner", help="Show header banner"
283
+ ),
284
+ style: str = typer.Option("audit", "--style", help="Output style (audit|friendly)"),
285
+ timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
286
+ progress: bool = typer.Option(
287
+ True, "--progress/--no-progress", help="Show progress done messages"
288
+ ),
289
+ no_color: bool = typer.Option(
290
+ False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
291
+ ),
120
292
  ):
121
293
  """Certify two checkpoints (baseline vs subject) with pinned windows."""
122
294
  # Support programmatic calls and Typer-invoked calls uniformly
@@ -132,6 +304,7 @@ def certify_command(
132
304
 
133
305
  source = _coerce_option(source)
134
306
  edited = _coerce_option(edited)
307
+ baseline_report = _coerce_option(baseline_report)
135
308
  adapter = _coerce_option(adapter, "auto")
136
309
  device = _coerce_option(device)
137
310
  profile = _coerce_option(profile, "ci")
@@ -140,22 +313,88 @@ def certify_command(
140
313
  out = _coerce_option(out, "runs")
141
314
  cert_out = _coerce_option(cert_out, "reports/cert")
142
315
  edit_config = _coerce_option(edit_config)
316
+ edit_label = _coerce_option(edit_label)
317
+ quiet = _coerce_option(quiet, False)
318
+ verbose = _coerce_option(verbose, False)
319
+ banner = _coerce_option(banner, True)
320
+ style = _coerce_option(style, "audit")
321
+ timing = bool(_coerce_option(timing, False))
322
+ progress = bool(_coerce_option(progress, True))
323
+ no_color = bool(_coerce_option(no_color, False))
324
+
325
+ verbosity = _resolve_verbosity(bool(quiet), bool(verbose))
326
+
327
+ if verbosity == VERBOSITY_QUIET:
328
+ progress = False
329
+ timing = False
330
+
331
+ from invarlock.cli.output import (
332
+ make_console,
333
+ perf_counter,
334
+ print_event,
335
+ print_timing_summary,
336
+ resolve_output_style,
337
+ timed_step,
338
+ )
339
+
340
+ output_style = resolve_output_style(
341
+ style=str(style),
342
+ profile=str(profile),
343
+ progress=bool(progress),
344
+ timing=bool(timing),
345
+ no_color=bool(no_color),
346
+ )
347
+ console = make_console(no_color=not output_style.color)
348
+ timings: dict[str, float] = {}
349
+ total_start: float | None = perf_counter() if output_style.timing else None
350
+
351
+ def _info(message: str, *, tag: str = "INFO", emoji: str | None = None) -> None:
352
+ if verbosity >= VERBOSITY_DEFAULT:
353
+ print_event(console, tag, message, style=output_style, emoji=emoji)
354
+
355
+ def _debug(msg: str) -> None:
356
+ if verbosity >= VERBOSITY_VERBOSE:
357
+ console.print(msg, markup=False)
358
+
359
+ def _fail(message: str, *, exit_code: int = 2) -> NoReturn:
360
+ print_event(console, "FAIL", message, style=output_style, emoji="❌")
361
+ raise typer.Exit(exit_code)
362
+
363
+ def _phase(index: int, total: int, title: str) -> None:
364
+ if verbosity >= VERBOSITY_DEFAULT:
365
+ console.print("")
366
+ _print_phase_header(console, _phase_title(index, total, title))
143
367
 
144
368
  src_id = str(source)
145
369
  edt_id = str(edited)
146
370
 
147
371
  # Resolve adapter when requested
148
372
  eff_adapter = adapter
149
- if str(adapter).strip().lower() in {"auto", "hf_auto", "auto_hf"}:
373
+ adapter_auto = False
374
+ if str(adapter).strip().lower() in {"auto", "auto_hf"}:
150
375
  eff_adapter = resolve_auto_adapter(src_id)
151
- console.print(f"🔎 Adapter:auto → {eff_adapter}")
376
+ adapter_auto = True
377
+
378
+ show_banner = bool(banner) and verbosity >= VERBOSITY_DEFAULT
379
+ if show_banner:
380
+ _print_header_banner(
381
+ console,
382
+ version=INVARLOCK_VERSION,
383
+ profile=profile,
384
+ tier=tier,
385
+ adapter=str(eff_adapter),
386
+ )
387
+ console.print("")
388
+
389
+ if adapter_auto:
390
+ _debug(f"Adapter:auto -> {eff_adapter}")
152
391
 
153
392
  # Choose preset. If none provided and repo preset is missing (pip install
154
393
  # scenario), fall back to a minimal built-in universal preset so the
155
394
  # flag-only quick start works without cloning the repo.
156
395
  default_universal = (
157
396
  Path("configs/presets/masked_lm/wikitext2_128.yaml")
158
- if eff_adapter == "hf_bert"
397
+ if eff_adapter == "hf_mlm"
159
398
  else Path("configs/presets/causal_lm/wikitext2_512.yaml")
160
399
  )
161
400
  preset_path = Path(preset) if preset is not None else default_universal
@@ -176,7 +415,13 @@ def certify_command(
176
415
  }
177
416
  else:
178
417
  if not preset_path.exists():
179
- console.print(f"[red]❌ Preset not found: {preset_path}")
418
+ print_event(
419
+ console,
420
+ "FAIL",
421
+ f"Preset not found: {preset_path}",
422
+ style=output_style,
423
+ emoji="❌",
424
+ )
180
425
  raise typer.Exit(1)
181
426
  preset_data = _load_yaml(preset_path)
182
427
  # Do not hard-code device from presets in auto-generated certify configs;
@@ -201,6 +446,108 @@ def certify_command(
201
446
  if guards_order is None:
202
447
  guards_order = list(default_guards_order)
203
448
 
449
+ def _load_and_validate_baseline_report(
450
+ report_path: Path,
451
+ *,
452
+ expected_profile: str,
453
+ expected_tier: str,
454
+ expected_adapter: str,
455
+ ) -> Path:
456
+ candidate = Path(report_path).expanduser()
457
+ if not candidate.exists():
458
+ _fail(f"Baseline report not found: {candidate}")
459
+ resolved_report: Path | None = None
460
+ if candidate.is_dir():
461
+ direct = candidate / "report.json"
462
+ if direct.is_file():
463
+ resolved_report = direct
464
+ else:
465
+ resolved_report = _latest_run_report(candidate)
466
+ elif candidate.is_file():
467
+ resolved_report = candidate
468
+ if resolved_report is None or not resolved_report.is_file():
469
+ _fail(f"Baseline report not found: {candidate}")
470
+ resolved_report = resolved_report.resolve()
471
+ try:
472
+ with resolved_report.open("r", encoding="utf-8") as fh:
473
+ payload = json.load(fh)
474
+ except Exception as exc: # noqa: BLE001
475
+ _fail(f"Baseline report is not valid JSON: {resolved_report} ({exc})")
476
+ if not isinstance(payload, dict):
477
+ _fail(f"Baseline report must be a JSON object: {resolved_report}")
478
+
479
+ edit_block = payload.get("edit")
480
+ edit_name = edit_block.get("name") if isinstance(edit_block, dict) else None
481
+ if edit_name != "noop":
482
+ _fail(
483
+ "Baseline report must be a no-op run (edit.name == 'noop'). "
484
+ f"Got edit.name={edit_name!r} in {resolved_report}"
485
+ )
486
+
487
+ meta = payload.get("meta")
488
+ if isinstance(meta, dict):
489
+ baseline_adapter = meta.get("adapter")
490
+ if (
491
+ isinstance(baseline_adapter, str)
492
+ and baseline_adapter != expected_adapter
493
+ ):
494
+ _fail(
495
+ "Baseline report adapter mismatch. "
496
+ f"Expected {expected_adapter!r}, got {baseline_adapter!r} in {resolved_report}"
497
+ )
498
+
499
+ context = payload.get("context")
500
+ if isinstance(context, dict):
501
+ baseline_profile = context.get("profile")
502
+ if (
503
+ isinstance(baseline_profile, str)
504
+ and baseline_profile.strip().lower() != expected_profile.strip().lower()
505
+ ):
506
+ _fail(
507
+ "Baseline report profile mismatch. "
508
+ f"Expected {expected_profile!r}, got {baseline_profile!r} in {resolved_report}"
509
+ )
510
+ auto_ctx = context.get("auto")
511
+ if isinstance(auto_ctx, dict):
512
+ baseline_tier = auto_ctx.get("tier")
513
+ if isinstance(baseline_tier, str) and baseline_tier != expected_tier:
514
+ _fail(
515
+ "Baseline report tier mismatch. "
516
+ f"Expected {expected_tier!r}, got {baseline_tier!r} in {resolved_report}"
517
+ )
518
+
519
+ eval_windows = payload.get("evaluation_windows")
520
+ if not isinstance(eval_windows, dict):
521
+ _fail(
522
+ "Baseline report missing evaluation window payloads. "
523
+ "Re-run baseline with INVARLOCK_STORE_EVAL_WINDOWS=1."
524
+ )
525
+
526
+ for phase_name in ("preview", "final"):
527
+ phase = eval_windows.get(phase_name)
528
+ if not isinstance(phase, dict):
529
+ _fail(
530
+ f"Baseline report missing evaluation_windows.{phase_name} payloads. "
531
+ "Re-run baseline with INVARLOCK_STORE_EVAL_WINDOWS=1."
532
+ )
533
+ window_ids = phase.get("window_ids")
534
+ input_ids = phase.get("input_ids")
535
+ if not isinstance(window_ids, list) or not window_ids:
536
+ _fail(
537
+ f"Baseline report missing evaluation_windows.{phase_name}.window_ids."
538
+ )
539
+ if not isinstance(input_ids, list) or not input_ids:
540
+ _fail(
541
+ f"Baseline report missing evaluation_windows.{phase_name}.input_ids."
542
+ )
543
+ if len(input_ids) != len(window_ids):
544
+ _fail(
545
+ "Baseline report has inconsistent evaluation window payloads "
546
+ f"for {phase_name}: input_ids={len(input_ids)} window_ids={len(window_ids)}."
547
+ )
548
+
549
+ return resolved_report
550
+
204
551
  # Create temp baseline config (no-op edit)
205
552
  # Normalize possible "hf:" prefixes for HF adapters
206
553
  norm_src_id = _normalize_model_id(src_id, eff_adapter)
@@ -221,39 +568,98 @@ def certify_command(
221
568
  },
222
569
  )
223
570
 
571
+ baseline_label = "noop"
572
+ subject_label: str | None = None
573
+ if edit_label:
574
+ subject_label = edit_label
575
+ elif not edit_config:
576
+ subject_label = "custom" if norm_src_id != norm_edt_id else "noop"
577
+
224
578
  tmp_dir = Path(".certify_tmp")
225
579
  tmp_dir.mkdir(parents=True, exist_ok=True)
226
- baseline_yaml = tmp_dir / "baseline_noop.yaml"
227
- _dump_yaml(baseline_yaml, baseline_cfg)
228
-
229
- console.print("🏁 Running baseline (no-op edit)")
230
- from .run import run_command as _run
231
-
232
- _run(
233
- config=str(baseline_yaml),
234
- profile=profile,
235
- out=str(Path(out) / "source"),
236
- tier=tier,
237
- device=device,
238
- )
239
580
 
240
- baseline_report = _latest_run_report(Path(out) / "source")
241
- if not baseline_report:
242
- console.print("[red]❌ Could not locate baseline report after run")
243
- raise typer.Exit(1)
581
+ baseline_report_path: Path
582
+ if baseline_report:
583
+ _info(
584
+ "Using provided baseline report (skipping baseline evaluation)",
585
+ tag="EXEC",
586
+ emoji="♻️",
587
+ )
588
+ baseline_report_path = _load_and_validate_baseline_report(
589
+ Path(baseline_report),
590
+ expected_profile=profile,
591
+ expected_tier=tier,
592
+ expected_adapter=str(eff_adapter),
593
+ )
594
+ _debug(f"Baseline report: {baseline_report_path}")
595
+ else:
596
+ baseline_yaml = tmp_dir / "baseline_noop.yaml"
597
+ _dump_yaml(baseline_yaml, baseline_cfg)
598
+
599
+ _phase(1, 3, "BASELINE EVALUATION")
600
+ _info("Running baseline (no-op edit)", tag="EXEC", emoji="🏁")
601
+ _debug(f"Baseline config: {baseline_yaml}")
602
+ from .run import run_command as _run
603
+
604
+ with _suppress_child_output(verbosity == VERBOSITY_QUIET) as quiet_buffer:
605
+ try:
606
+ with timed_step(
607
+ console=console,
608
+ style=output_style,
609
+ timings=timings,
610
+ key="baseline",
611
+ tag="EXEC",
612
+ message="Baseline",
613
+ emoji="🏁",
614
+ ):
615
+ _run(
616
+ config=str(baseline_yaml),
617
+ profile=profile,
618
+ out=str(Path(out) / "source"),
619
+ tier=tier,
620
+ device=device,
621
+ edit_label=baseline_label,
622
+ style=output_style.name,
623
+ progress=progress,
624
+ timing=False,
625
+ no_color=no_color,
626
+ )
627
+ except Exception:
628
+ if quiet_buffer is not None:
629
+ console.print(quiet_buffer.getvalue(), markup=False)
630
+ raise
631
+
632
+ baseline_report_path_candidate = _latest_run_report(Path(out) / "source")
633
+ if not baseline_report_path_candidate:
634
+ _fail("Could not locate baseline report after run", exit_code=1)
635
+ baseline_report_path = baseline_report_path_candidate
636
+ _debug(f"Baseline report: {baseline_report_path}")
244
637
 
245
638
  # Edited run: either no-op (Compare & Certify) or provided edit_config (demo edit)
639
+ _phase(2, 3, "SUBJECT EVALUATION")
246
640
  if edit_config:
247
641
  edited_yaml = Path(edit_config)
248
642
  if not edited_yaml.exists():
249
- console.print(f"[red]❌ Edit config not found: {edited_yaml}")
643
+ print_event(
644
+ console,
645
+ "FAIL",
646
+ f"Edit config not found: {edited_yaml}",
647
+ style=output_style,
648
+ emoji="❌",
649
+ )
250
650
  raise typer.Exit(1)
251
- console.print("✂️ Running edited (demo edit via --edit-config)")
651
+ _info("Running edited (demo edit via --edit-config)", tag="EXEC", emoji="✂️")
252
652
  # Overlay subject model id/adapter and output/context onto the provided edit config
253
653
  try:
254
654
  cfg_loaded: dict[str, Any] = _load_yaml(edited_yaml)
255
655
  except Exception as exc: # noqa: BLE001
256
- console.print(f"[red]❌ Failed to load edit config: {exc}")
656
+ print_event(
657
+ console,
658
+ "FAIL",
659
+ f"Failed to load edit config: {exc}",
660
+ style=output_style,
661
+ emoji="❌",
662
+ )
257
663
  raise typer.Exit(1) from exc
258
664
 
259
665
  # Ensure model.id/adapter point to the requested subject
@@ -282,23 +688,58 @@ def certify_command(
282
688
  "context": {"profile": profile, "tier": tier},
283
689
  },
284
690
  )
691
+ # Ensure the edited run always has a guard chain. Presets/edit configs
692
+ # often omit it, but `invarlock run` expects guards.order.
693
+ guards_block = merged_edited_cfg.get("guards")
694
+ guards_order_cfg = (
695
+ guards_block.get("order") if isinstance(guards_block, dict) else None
696
+ )
697
+ if not (
698
+ isinstance(guards_order_cfg, list)
699
+ and guards_order_cfg
700
+ and all(isinstance(item, str) for item in guards_order_cfg)
701
+ ):
702
+ merged_edited_cfg = _merge(
703
+ merged_edited_cfg, {"guards": {"order": guards_order}}
704
+ )
285
705
 
286
706
  # Persist a temporary merged config for traceability
287
707
  tmp_dir = Path(".certify_tmp")
288
708
  tmp_dir.mkdir(parents=True, exist_ok=True)
289
709
  edited_merged_yaml = tmp_dir / "edited_merged.yaml"
290
710
  _dump_yaml(edited_merged_yaml, merged_edited_cfg)
711
+ _debug(f"Edited config (merged): {edited_merged_yaml}")
291
712
 
292
713
  from .run import run_command as _run
293
714
 
294
- _run(
295
- config=str(edited_merged_yaml),
296
- profile=profile,
297
- out=str(Path(out) / "edited"),
298
- tier=tier,
299
- baseline=str(baseline_report),
300
- device=device,
301
- )
715
+ with _suppress_child_output(verbosity == VERBOSITY_QUIET) as quiet_buffer:
716
+ try:
717
+ with timed_step(
718
+ console=console,
719
+ style=output_style,
720
+ timings=timings,
721
+ key="subject",
722
+ tag="EXEC",
723
+ message="Subject",
724
+ emoji="✂️",
725
+ ):
726
+ _run(
727
+ config=str(edited_merged_yaml),
728
+ profile=profile,
729
+ out=str(Path(out) / "edited"),
730
+ tier=tier,
731
+ baseline=str(baseline_report_path),
732
+ device=device,
733
+ edit_label=subject_label if edit_label else None,
734
+ style=output_style.name,
735
+ progress=progress,
736
+ timing=False,
737
+ no_color=no_color,
738
+ )
739
+ except Exception:
740
+ if quiet_buffer is not None:
741
+ console.print(quiet_buffer.getvalue(), markup=False)
742
+ raise
302
743
  else:
303
744
  edited_cfg = _merge(
304
745
  preset_data,
@@ -313,22 +754,96 @@ def certify_command(
313
754
  )
314
755
  edited_yaml = tmp_dir / "edited_noop.yaml"
315
756
  _dump_yaml(edited_yaml, edited_cfg)
316
- console.print("🧪 Running edited (no-op, Compare & Certify)")
757
+ _info("Running edited (no-op, Compare & Certify)", tag="EXEC", emoji="🧪")
758
+ _debug(f"Edited config: {edited_yaml}")
317
759
  from .run import run_command as _run
318
760
 
319
- _run(
320
- config=str(edited_yaml),
321
- profile=profile,
322
- out=str(Path(out) / "edited"),
323
- tier=tier,
324
- baseline=str(baseline_report),
325
- device=device,
326
- )
761
+ with _suppress_child_output(verbosity == VERBOSITY_QUIET) as quiet_buffer:
762
+ try:
763
+ with timed_step(
764
+ console=console,
765
+ style=output_style,
766
+ timings=timings,
767
+ key="subject",
768
+ tag="EXEC",
769
+ message="Subject",
770
+ emoji="🧪",
771
+ ):
772
+ _run(
773
+ config=str(edited_yaml),
774
+ profile=profile,
775
+ out=str(Path(out) / "edited"),
776
+ tier=tier,
777
+ baseline=str(baseline_report_path),
778
+ device=device,
779
+ edit_label=subject_label,
780
+ style=output_style.name,
781
+ progress=progress,
782
+ timing=False,
783
+ no_color=no_color,
784
+ )
785
+ except Exception:
786
+ if quiet_buffer is not None:
787
+ console.print(quiet_buffer.getvalue(), markup=False)
788
+ raise
327
789
 
328
790
  edited_report = _latest_run_report(Path(out) / "edited")
329
791
  if not edited_report:
330
- console.print("[red]❌ Could not locate edited report after run")
792
+ print_event(
793
+ console,
794
+ "FAIL",
795
+ "Could not locate edited report after run",
796
+ style=output_style,
797
+ emoji="❌",
798
+ )
331
799
  raise typer.Exit(1)
800
+ _debug(f"Edited report: {edited_report}")
801
+
802
+ _phase(3, 3, "CERTIFICATE GENERATION")
803
+
804
+ def _emit_certificate() -> None:
805
+ _info("Emitting certificate", tag="EXEC", emoji="📜")
806
+ with _suppress_child_output(verbosity == VERBOSITY_QUIET) as quiet_buffer:
807
+ try:
808
+ with timed_step(
809
+ console=console,
810
+ style=output_style,
811
+ timings=timings,
812
+ key="certificate",
813
+ tag="EXEC",
814
+ message="Certificate",
815
+ emoji="📜",
816
+ ):
817
+ report_kwargs = {
818
+ "run": str(edited_report),
819
+ "format": "cert",
820
+ "baseline": str(baseline_report_path),
821
+ "output": cert_out,
822
+ "style": output_style.name,
823
+ "no_color": no_color,
824
+ }
825
+ try:
826
+ sig = inspect.signature(_report)
827
+ except (TypeError, ValueError):
828
+ _report(**report_kwargs)
829
+ else:
830
+ if any(
831
+ param.kind == inspect.Parameter.VAR_KEYWORD
832
+ for param in sig.parameters.values()
833
+ ):
834
+ _report(**report_kwargs)
835
+ else:
836
+ _report(
837
+ **{
838
+ key: value
839
+ for key, value in report_kwargs.items()
840
+ if key in sig.parameters
841
+ }
842
+ )
843
+ except Exception:
844
+ if quiet_buffer is not None:
845
+ console.print(quiet_buffer.getvalue(), markup=False)
846
+ raise
332
847
 
333
848
  # CI/Release hard‑abort: fail fast when primary metric is not computable.
334
849
  try:
@@ -340,7 +855,13 @@ def certify_command(
340
855
  with Path(edited_report).open("r", encoding="utf-8") as fh:
341
856
  edited_payload = json.load(fh)
342
857
  except Exception as exc: # noqa: BLE001
343
- console.print(f"[red]❌ Failed to read edited report: {exc}")
858
+ print_event(
859
+ console,
860
+ "FAIL",
861
+ f"Failed to read edited report: {exc}",
862
+ style=output_style,
863
+ emoji="❌",
864
+ )
344
865
  raise typer.Exit(1) from exc
345
866
 
346
867
  def _finite(x: Any) -> bool:
@@ -382,8 +903,12 @@ def certify_command(
382
903
  if (not _finite(pm_prev) or not _finite(pm_final))
383
904
  else "primary_metric_degraded"
384
905
  )
385
- console.print(
386
- "[yellow]⚠️ Primary metric degraded or non-finite; emitting certificate and marking task degraded. Primary metric computation failed.[/yellow]"
906
+ print_event(
907
+ console,
908
+ "WARN",
909
+ "Primary metric degraded or non-finite; emitting certificate and marking task degraded. Primary metric computation failed.",
910
+ style=output_style,
911
+ emoji="⚠️",
387
912
  )
388
913
  pm["degraded"] = True
389
914
  pm["invalid"] = pm.get("invalid") or True
@@ -395,12 +920,7 @@ def certify_command(
395
920
  edited_payload.setdefault("metrics", {}).update(metrics)
396
921
 
397
922
  # Emit the certificate for inspection, then exit with a CI-visible error.
398
- _report(
399
- run=str(edited_report),
400
- format="cert",
401
- baseline=str(baseline_report),
402
- output=cert_out,
403
- )
923
+ _emit_certificate()
404
924
  err = MetricsError(
405
925
  code="E111",
406
926
  message=f"Primary metric degraded or non-finite ({degraded_reason}).",
@@ -413,10 +933,31 @@ def certify_command(
413
933
  )
414
934
  raise typer.Exit(_resolve_exit_code(err, profile=profile))
415
935
 
416
- console.print("📜 Emitting certificate")
417
- _report(
418
- run=str(edited_report),
419
- format="cert",
420
- baseline=str(baseline_report),
421
- output=cert_out,
422
- )
936
+ _emit_certificate()
937
+ if timing:
938
+ if total_start is not None:
939
+ timings["total"] = max(0.0, float(perf_counter() - total_start))
940
+ else:
941
+ timings["total"] = (
942
+ float(timings.get("baseline", 0.0))
943
+ + float(timings.get("subject", 0.0))
944
+ + float(timings.get("certificate", 0.0))
945
+ )
946
+ print_timing_summary(
947
+ console,
948
+ timings,
949
+ style=output_style,
950
+ order=[
951
+ ("Baseline", "baseline"),
952
+ ("Subject", "subject"),
953
+ ("Certificate", "certificate"),
954
+ ("Total", "total"),
955
+ ],
956
+ )
957
+ if verbosity == VERBOSITY_QUIET:
958
+ _print_quiet_summary(
959
+ cert_out=Path(cert_out),
960
+ source=src_id,
961
+ edited=edt_id,
962
+ profile=profile,
963
+ )