invarlock 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. invarlock/__init__.py +2 -2
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +35 -40
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_mixin.py +25 -4
  8. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  9. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  10. invarlock/cli/adapter_auto.py +31 -21
  11. invarlock/cli/app.py +73 -2
  12. invarlock/cli/commands/certify.py +600 -59
  13. invarlock/cli/commands/doctor.py +8 -10
  14. invarlock/cli/commands/plugins.py +13 -9
  15. invarlock/cli/commands/report.py +233 -69
  16. invarlock/cli/commands/run.py +907 -183
  17. invarlock/cli/commands/verify.py +76 -11
  18. invarlock/cli/config.py +1 -1
  19. invarlock/cli/doctor_helpers.py +4 -5
  20. invarlock/cli/output.py +193 -0
  21. invarlock/cli/provenance.py +1 -1
  22. invarlock/core/bootstrap.py +1 -1
  23. invarlock/core/registry.py +9 -11
  24. invarlock/core/runner.py +111 -25
  25. invarlock/edits/quant_rtn.py +65 -37
  26. invarlock/eval/bench.py +3 -3
  27. invarlock/eval/data.py +68 -23
  28. invarlock/eval/metrics.py +59 -1
  29. invarlock/eval/tasks/__init__.py +12 -0
  30. invarlock/eval/tasks/classification.py +48 -0
  31. invarlock/eval/tasks/qa.py +36 -0
  32. invarlock/eval/tasks/text_generation.py +102 -0
  33. invarlock/guards/invariants.py +19 -10
  34. invarlock/guards/rmt.py +2 -2
  35. invarlock/guards/variance.py +2 -2
  36. invarlock/model_profile.py +48 -27
  37. invarlock/observability/health.py +6 -6
  38. invarlock/observability/metrics.py +108 -0
  39. invarlock/reporting/certificate.py +159 -9
  40. invarlock/reporting/certificate_schema.py +1 -1
  41. invarlock/reporting/guards_analysis.py +154 -4
  42. invarlock/reporting/html.py +55 -5
  43. invarlock/reporting/normalizer.py +7 -0
  44. invarlock/reporting/render.py +791 -431
  45. invarlock/reporting/report.py +39 -3
  46. invarlock/reporting/report_types.py +6 -1
  47. invarlock/reporting/telemetry.py +86 -0
  48. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/METADATA +23 -9
  49. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/RECORD +53 -48
  50. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
  51. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
  52. invarlock/adapters/hf_gpt2.py +0 -404
  53. invarlock/adapters/hf_llama.py +0 -487
  54. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
  55. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
@@ -17,8 +17,10 @@ import random
17
17
  import shutil
18
18
  import sys as _sys
19
19
  import types as _types
20
+ import warnings
20
21
  from array import array
21
- from collections.abc import Iterable, Sequence
22
+ from collections.abc import Callable, Iterable, Iterator, Sequence
23
+ from contextlib import contextmanager
22
24
  from datetime import datetime
23
25
  from pathlib import Path
24
26
  from types import SimpleNamespace
@@ -30,6 +32,16 @@ import psutil
30
32
  import typer
31
33
  from rich.console import Console
32
34
 
35
+ from invarlock.cli.output import (
36
+ OutputStyle,
37
+ make_console,
38
+ perf_counter,
39
+ print_event,
40
+ print_timing_summary,
41
+ resolve_output_style,
42
+ timed_step,
43
+ )
44
+
33
45
  try:
34
46
  import torch
35
47
  except ImportError:
@@ -63,7 +75,42 @@ from ..config import (
63
75
  )
64
76
  from ..overhead_utils import _extract_pm_snapshot_for_overhead
65
77
 
66
- console = Console()
78
+ console = make_console()
79
+
80
+
81
+ def _style_from_console(console: Console, profile: str | None = None) -> OutputStyle:
82
+ style = getattr(console, "_invarlock_output_style", None)
83
+ if isinstance(style, OutputStyle):
84
+ return style
85
+ return resolve_output_style(
86
+ style=None,
87
+ profile=profile,
88
+ progress=False,
89
+ timing=False,
90
+ no_color=False,
91
+ )
92
+
93
+
94
+ def _event(
95
+ console: Console,
96
+ tag: str,
97
+ message: str,
98
+ *,
99
+ emoji: str | None = None,
100
+ console_style: str | None = None,
101
+ profile: str | None = None,
102
+ ) -> None:
103
+ style = _style_from_console(console, profile=profile)
104
+ print_event(
105
+ console,
106
+ tag,
107
+ message,
108
+ style=style,
109
+ emoji=emoji,
110
+ console_style=console_style,
111
+ )
112
+
113
+
67
114
  LIGHT_IMPORT = os.getenv("INVARLOCK_LIGHT_IMPORT", "").strip().lower() in {
68
115
  "1",
69
116
  "true",
@@ -76,6 +123,73 @@ RELEASE_MIN_WINDOWS_PER_ARM = 200
76
123
  RELEASE_CALIBRATION_MIN = 16
77
124
  RELEASE_CALIBRATION_MAX = 24
78
125
  GUARD_OVERHEAD_THRESHOLD = 0.01
126
+ KV_LABEL_WIDTH = 10
127
+
128
+ _NOISY_WARNING_PATTERNS = (
129
+ r".*`torch_dtype` is deprecated.*",
130
+ r".*loss_type=None.*unrecognized.*",
131
+ )
132
+
133
+
134
+ def _resolve_warning_suppression(profile: str | None) -> tuple[bool, bool]:
135
+ suppress_all = os.getenv("INVARLOCK_SUPPRESS_WARNINGS", "").strip().lower() in {
136
+ "1",
137
+ "true",
138
+ "yes",
139
+ "on",
140
+ }
141
+ profile_norm = (profile or "").strip().lower()
142
+ enabled = bool(suppress_all) or profile_norm in {"ci", "ci_cpu", "release", "dev"}
143
+ return enabled, suppress_all
144
+
145
+
146
+ def _apply_warning_filters(profile: str | None) -> bool:
147
+ enabled, suppress_all = _resolve_warning_suppression(profile)
148
+ if not enabled:
149
+ return False
150
+ if suppress_all:
151
+ warnings.simplefilter("ignore")
152
+ else:
153
+ for pattern in _NOISY_WARNING_PATTERNS:
154
+ warnings.filterwarnings("ignore", message=pattern)
155
+ return True
156
+
157
+
158
+ @contextmanager
159
+ def _suppress_noisy_warnings(profile: str | None) -> Iterator[None]:
160
+ enabled, _suppress_all = _resolve_warning_suppression(profile)
161
+ if not enabled:
162
+ yield
163
+ return
164
+ with warnings.catch_warnings():
165
+ _apply_warning_filters(profile)
166
+ yield
167
+
168
+
169
+ def _format_kv_line(label: str, value: str, *, width: int = KV_LABEL_WIDTH) -> str:
170
+ return f" {label:<{width}}: {value}"
171
+
172
+
173
+ def _device_resolution_note(target_device: str, resolved_device: str) -> str:
174
+ target_norm = str(target_device or "").strip().lower()
175
+ resolved_norm = str(resolved_device or "").strip().lower()
176
+ if not target_norm or target_norm == "auto":
177
+ return "auto-resolved"
178
+ if target_norm == resolved_norm:
179
+ return "requested"
180
+ return f"resolved from {target_device}"
181
+
182
+
183
+ def _format_guard_chain(guards: list[Any]) -> str:
184
+ names = [str(getattr(guard, "name", "unknown")) for guard in guards]
185
+ seen: set[str] = set()
186
+ deduped: list[str] = []
187
+ for name in names:
188
+ if name in seen:
189
+ continue
190
+ seen.add(name)
191
+ deduped.append(name)
192
+ return " → ".join(deduped)
79
193
 
80
194
 
81
195
  # Common dataset split aliases we probe in order when not explicitly set
@@ -241,6 +355,89 @@ def _resolve_pm_acceptance_range(
241
355
  return {"min": float(min_val), "max": float(max_val)}
242
356
 
243
357
 
358
+ def _resolve_pm_drift_band(
359
+ cfg: InvarLockConfig | dict[str, Any] | None,
360
+ ) -> dict[str, float]:
361
+ """Resolve preview→final drift band from config/env with safe defaults.
362
+
363
+ The drift band governs the Preview Final Drift Acceptable gate. By default,
364
+ certificates enforce 0.95–1.05 unless an explicit band is provided.
365
+ """
366
+
367
+ base_min = 0.95
368
+ base_max = 1.05
369
+
370
+ cfg_min = None
371
+ cfg_max = None
372
+ try:
373
+ cfg_map = _coerce_mapping(cfg) if cfg is not None else {}
374
+ pm_section = cfg_map.get("primary_metric") if isinstance(cfg_map, dict) else {}
375
+ pm_map = _coerce_mapping(pm_section)
376
+ drift_band = pm_map.get("drift_band") if isinstance(pm_map, dict) else None
377
+ if isinstance(drift_band, dict):
378
+ if drift_band.get("min") is not None:
379
+ try:
380
+ cfg_min = float(drift_band["min"])
381
+ except (TypeError, ValueError):
382
+ cfg_min = None
383
+ if drift_band.get("max") is not None:
384
+ try:
385
+ cfg_max = float(drift_band["max"])
386
+ except (TypeError, ValueError):
387
+ cfg_max = None
388
+ elif isinstance(drift_band, list | tuple) and len(drift_band) == 2:
389
+ try:
390
+ cfg_min = float(drift_band[0])
391
+ cfg_max = float(drift_band[1])
392
+ except (TypeError, ValueError):
393
+ cfg_min = None
394
+ cfg_max = None
395
+ except Exception:
396
+ cfg_min = None
397
+ cfg_max = None
398
+
399
+ def _parse_env(name: str) -> float | None:
400
+ try:
401
+ raw = os.environ.get(name, "")
402
+ if raw is None or str(raw).strip() == "":
403
+ return None
404
+ return float(raw)
405
+ except Exception:
406
+ return None
407
+
408
+ env_min = _parse_env("INVARLOCK_PM_DRIFT_MIN")
409
+ env_max = _parse_env("INVARLOCK_PM_DRIFT_MAX")
410
+
411
+ has_explicit = any(v is not None for v in (cfg_min, cfg_max, env_min, env_max))
412
+ if not has_explicit:
413
+ return {}
414
+
415
+ min_val = (
416
+ env_min if env_min is not None else cfg_min if cfg_min is not None else base_min
417
+ )
418
+ max_val = (
419
+ env_max if env_max is not None else cfg_max if cfg_max is not None else base_max
420
+ )
421
+
422
+ try:
423
+ if min_val is not None and min_val <= 0:
424
+ min_val = base_min
425
+ except Exception:
426
+ min_val = base_min
427
+ try:
428
+ if max_val is not None and max_val <= 0:
429
+ max_val = base_max
430
+ except Exception:
431
+ max_val = base_max
432
+ try:
433
+ if min_val is not None and max_val is not None and min_val >= max_val:
434
+ min_val, max_val = base_min, base_max
435
+ except Exception:
436
+ min_val, max_val = base_min, base_max
437
+
438
+ return {"min": float(min_val), "max": float(max_val)}
439
+
440
+
244
441
  def _free_model_memory(model: object | None) -> None:
245
442
  """Best-effort cleanup to release GPU memory for a model object."""
246
443
  if model is None:
@@ -754,38 +951,60 @@ def _prepare_config_for_run(
754
951
  resolve_edit_kind as _resolve_edit_kind,
755
952
  )
756
953
 
757
- console.print(f"📋 Loading configuration: {config_path}")
954
+ _event(
955
+ console,
956
+ "INIT",
957
+ f"Loading configuration: {config_path}",
958
+ emoji="📋",
959
+ profile=profile,
960
+ )
758
961
  cfg = _load_config(config_path)
759
962
 
760
963
  # Apply profile if specified (dev is a no-op)
761
964
  if profile and str(profile).lower() not in {"dev"}:
762
- console.print(f"🎯 Applying profile: {profile}")
965
+ _event(
966
+ console, "INIT", f"Applying profile: {profile}", emoji="🎯", profile=profile
967
+ )
763
968
  try:
764
969
  cfg = _apply_profile(cfg, profile)
765
970
  except Exception as exc:
766
- console.print(f"[red]{exc}[/red]")
971
+ _event(console, "FAIL", str(exc), emoji="❌", profile=profile)
767
972
  raise typer.Exit(1) from exc
768
973
 
769
974
  # Apply edit override
770
975
  if edit:
771
976
  try:
772
977
  edit_name = _resolve_edit_kind(edit)
773
- console.print(f"✂️ Edit override: {edit} → {edit_name}")
978
+ _event(
979
+ console,
980
+ "EXEC",
981
+ f"Edit override: {edit} → {edit_name}",
982
+ emoji="✂️",
983
+ profile=profile,
984
+ )
774
985
  cfg = _apply_edit_override(cfg, edit)
775
986
  except ValueError as e:
776
- console.print(f"[red]{e}[/red]")
987
+ _event(console, "FAIL", str(e), emoji="❌", profile=profile)
777
988
  raise typer.Exit(1) from e
778
989
 
779
990
  # Apply CLI overrides for auto configuration
780
991
  if tier or probes is not None:
781
992
  if tier and tier not in ["conservative", "balanced", "aggressive", "none"]:
782
- console.print(
783
- f"[red]❌ Invalid tier '{tier}'. Valid options: conservative, balanced, aggressive, none[/red]"
993
+ _event(
994
+ console,
995
+ "FAIL",
996
+ f"Invalid tier '{tier}'. Valid options: conservative, balanced, aggressive, none",
997
+ emoji="❌",
998
+ profile=profile,
784
999
  )
785
1000
  raise typer.Exit(1)
786
1001
  if probes is not None and (probes < 0 or probes > 10):
787
- console.print(
788
- f"[red]❌ Invalid probes '{probes}'. Must be between 0 and 10[/red]"
1002
+ _event(
1003
+ console,
1004
+ "FAIL",
1005
+ f"Invalid probes '{probes}'. Must be between 0 and 10",
1006
+ emoji="❌",
1007
+ profile=profile,
789
1008
  )
790
1009
  raise typer.Exit(1)
791
1010
 
@@ -796,10 +1015,22 @@ def _prepare_config_for_run(
796
1015
  cfg_dict["auto"] = auto_section
797
1016
  if tier:
798
1017
  auto_section["tier"] = tier
799
- console.print(f"🎛️ Auto tier override: {tier}")
1018
+ _event(
1019
+ console,
1020
+ "INIT",
1021
+ f"Auto tier override: {tier}",
1022
+ emoji="🎛️",
1023
+ profile=profile,
1024
+ )
800
1025
  if probes is not None:
801
1026
  auto_section["probes"] = probes
802
- console.print(f"🔬 Auto probes override: {probes}")
1027
+ _event(
1028
+ console,
1029
+ "INIT",
1030
+ f"Auto probes override: {probes}",
1031
+ emoji="🔬",
1032
+ profile=profile,
1033
+ )
803
1034
  cfg = InvarLockConfig(cfg_dict)
804
1035
 
805
1036
  # Resolve adapter:auto to a concrete built-in adapter if requested
@@ -832,7 +1063,7 @@ def _maybe_plan_release_windows(
832
1063
 
833
1064
 
834
1065
  def _print_pipeline_start(console: Console) -> None:
835
- console.print("🚀 Starting InvarLock pipeline...")
1066
+ _event(console, "INIT", "Starting InvarLock pipeline...", emoji="🚀")
836
1067
 
837
1068
 
838
1069
  def _emit_run_artifacts(
@@ -841,7 +1072,7 @@ def _emit_run_artifacts(
841
1072
  """Save run report and return emitted artifact paths."""
842
1073
  from invarlock.reporting.report import save_report as _save_report
843
1074
 
844
- console.print("💾 Saving run report...")
1075
+ _event(console, "DATA", "Saving run report...", emoji="💾")
845
1076
  return _save_report(
846
1077
  report, out_dir, formats=["json"], filename_prefix=filename_prefix
847
1078
  )
@@ -864,12 +1095,11 @@ def _resolve_device_and_output(
864
1095
  cfg_device = None
865
1096
  target_device = device or cfg_device or "auto"
866
1097
  resolved_device = _resolve_device(target_device)
867
- console.print(
868
- f"Device: {resolved_device} (requested={target_device}, resolved={resolved_device})"
869
- )
1098
+ resolution_note = _device_resolution_note(target_device, resolved_device)
1099
+ console.print(_format_kv_line("Device", f"{resolved_device} ({resolution_note})"))
870
1100
  is_valid, error_msg = _validate(resolved_device)
871
1101
  if not is_valid:
872
- console.print(f"[red]❌ Device validation failed: {error_msg}[/red]")
1102
+ _event(console, "FAIL", f"Device validation failed: {error_msg}", emoji="❌")
873
1103
  raise typer.Exit(1)
874
1104
 
875
1105
  # Determine output directory
@@ -892,6 +1122,7 @@ def _resolve_provider_and_split(
892
1122
  provider_kwargs: dict[str, Any] | None = None,
893
1123
  console: Console,
894
1124
  resolved_device: str | None = None,
1125
+ emit: Callable[[str, str, str | None], None] | None = None,
895
1126
  ) -> tuple[Any, str, bool]:
896
1127
  """Resolve dataset provider and split, returning (provider, split, used_fallback)."""
897
1128
  provider_name = None
@@ -918,7 +1149,10 @@ def _resolve_provider_and_split(
918
1149
  # Pass device hint only to providers that understand it (currently WikiText-2)
919
1150
  if resolved_device and provider_name == "wikitext2":
920
1151
  provider_kwargs.setdefault("device_hint", resolved_device)
921
- data_provider = get_provider_fn(provider_name, **provider_kwargs)
1152
+ if emit is not None and provider_name == "wikitext2":
1153
+ data_provider = get_provider_fn(provider_name, emit=emit, **provider_kwargs)
1154
+ else:
1155
+ data_provider = get_provider_fn(provider_name, **provider_kwargs)
922
1156
 
923
1157
  requested_split = None
924
1158
  try:
@@ -972,7 +1206,13 @@ def _extract_model_load_kwargs(cfg: InvarLockConfig) -> dict[str, Any]:
972
1206
  return extra
973
1207
 
974
1208
 
975
- def _load_model_with_cfg(adapter: Any, cfg: InvarLockConfig, device: str) -> Any:
1209
+ def _load_model_with_cfg(
1210
+ adapter: Any,
1211
+ cfg: InvarLockConfig,
1212
+ device: str,
1213
+ *,
1214
+ profile: str | None = None,
1215
+ ) -> Any:
976
1216
  """Load a model with config-provided kwargs, filtering for strict adapters."""
977
1217
  try:
978
1218
  model_id = cfg.model.id
@@ -985,20 +1225,21 @@ def _load_model_with_cfg(adapter: Any, cfg: InvarLockConfig, device: str) -> Any
985
1225
  raise ValueError("Missing model.id in config")
986
1226
 
987
1227
  extra = _extract_model_load_kwargs(cfg)
988
- try:
989
- sig = inspect.signature(adapter.load_model)
990
- accepts_var_kw = any(
991
- p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
992
- )
993
- if accepts_var_kw:
994
- return adapter.load_model(model_id, device=device, **extra)
995
- allowed = {k: v for k, v in extra.items() if k in sig.parameters}
996
- if allowed:
997
- return adapter.load_model(model_id, device=device, **allowed)
998
- except Exception:
999
- # Fall back to the strictest call shape.
1000
- pass
1001
- return adapter.load_model(model_id, device=device)
1228
+ with _suppress_noisy_warnings(profile):
1229
+ try:
1230
+ sig = inspect.signature(adapter.load_model)
1231
+ accepts_var_kw = any(
1232
+ p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
1233
+ )
1234
+ if accepts_var_kw:
1235
+ return adapter.load_model(model_id, device=device, **extra)
1236
+ allowed = {k: v for k, v in extra.items() if k in sig.parameters}
1237
+ if allowed:
1238
+ return adapter.load_model(model_id, device=device, **allowed)
1239
+ except Exception:
1240
+ # Fall back to the strictest call shape.
1241
+ pass
1242
+ return adapter.load_model(model_id, device=device)
1002
1243
 
1003
1244
 
1004
1245
  def _run_bare_control(
@@ -1018,14 +1259,20 @@ def _run_bare_control(
1018
1259
  restore_fn: Any | None,
1019
1260
  console: Console,
1020
1261
  resolved_loss_type: str,
1021
- profile_normalized: str | None,
1262
+ profile_normalized: str | None = None,
1022
1263
  snapshot_provenance: dict[str, bool] | None = None,
1023
1264
  skip_model_load: bool = False,
1024
1265
  ) -> dict[str, Any] | None:
1025
1266
  """Execute the bare-control run for overhead estimation and return payload."""
1026
1267
  from invarlock.core.runner import CoreRunner as _CoreRunner
1027
1268
 
1028
- console.print("🧪 Running bare control (guards disabled) for overhead check")
1269
+ _event(
1270
+ console,
1271
+ "EXEC",
1272
+ "Running bare control (guards disabled) for overhead check",
1273
+ emoji="🧪",
1274
+ profile=profile_normalized,
1275
+ )
1029
1276
  set_seed(seed_bundle["python"]) # type: ignore[arg-type]
1030
1277
 
1031
1278
  bare_runner = _CoreRunner()
@@ -1034,6 +1281,12 @@ def _run_bare_control(
1034
1281
  bare_context = copy.deepcopy(run_config.context)
1035
1282
  bare_context.setdefault("validation", {})["guard_overhead_mode"] = "bare"
1036
1283
  bare_config.context = bare_context
1284
+ runtime_edit_config = dict(edit_config or {})
1285
+ runtime_edit_config.setdefault("console", console)
1286
+ runtime_edit_config.setdefault(
1287
+ "output_style", _style_from_console(console, profile=profile_normalized)
1288
+ )
1289
+ runtime_edit_config.setdefault("emit", True)
1037
1290
 
1038
1291
  private_model_loaded = False
1039
1292
  bare_target_model = None
@@ -1047,7 +1300,9 @@ def _run_bare_control(
1047
1300
  elif skip_model_load:
1048
1301
  bare_target_model = model or SimpleNamespace(name="bare_stub_model")
1049
1302
  else:
1050
- bare_target_model = _load_model_with_cfg(adapter, cfg, resolved_device)
1303
+ bare_target_model = _load_model_with_cfg(
1304
+ adapter, cfg, resolved_device, profile=profile_normalized
1305
+ )
1051
1306
  private_model_loaded = True
1052
1307
  if snapshot_provenance is not None:
1053
1308
  snapshot_provenance["reload_path_used"] = True
@@ -1060,7 +1315,7 @@ def _run_bare_control(
1060
1315
  config=bare_config,
1061
1316
  calibration_data=calibration_data,
1062
1317
  auto_config=auto_config,
1063
- edit_config=edit_config,
1318
+ edit_config=runtime_edit_config,
1064
1319
  preview_n=preview_count,
1065
1320
  final_n=final_count,
1066
1321
  )
@@ -1084,8 +1339,12 @@ def _run_bare_control(
1084
1339
  return False
1085
1340
 
1086
1341
  if not (_finite(bare_ppl_preview) and _finite(bare_ppl_final)):
1087
- console.print(
1088
- "[yellow]⚠️ Primary metric non-finite during bare control; continuing with diagnostics.[/yellow]"
1342
+ _event(
1343
+ console,
1344
+ "WARN",
1345
+ "Primary metric non-finite during bare control; continuing with diagnostics.",
1346
+ emoji="⚠️",
1347
+ profile=profile_normalized,
1089
1348
  )
1090
1349
 
1091
1350
  payload: dict[str, Any] = {
@@ -1137,6 +1396,7 @@ def _execute_guarded_run(
1137
1396
  final_count: int,
1138
1397
  restore_fn: Any | None,
1139
1398
  resolved_device: str,
1399
+ profile_normalized: str | None = None,
1140
1400
  console: Console,
1141
1401
  snapshot_provenance: dict[str, bool] | None = None,
1142
1402
  skip_model_load: bool = False,
@@ -1150,11 +1410,26 @@ def _execute_guarded_run(
1150
1410
  elif skip_model_load:
1151
1411
  model = model or SimpleNamespace(name="guarded_stub_model")
1152
1412
  else:
1153
- console.print(f"🔧 Loading model: {cfg.model.id} (attempt 1)")
1154
- model = _load_model_with_cfg(adapter, cfg, resolved_device)
1413
+ _event(
1414
+ console,
1415
+ "INIT",
1416
+ f"Loading model: {cfg.model.id} (attempt 1)",
1417
+ emoji="🔧",
1418
+ profile=profile_normalized,
1419
+ )
1420
+ model = _load_model_with_cfg(
1421
+ adapter, cfg, resolved_device, profile=profile_normalized
1422
+ )
1155
1423
  if snapshot_provenance is not None:
1156
1424
  snapshot_provenance["reload_path_used"] = True
1157
1425
 
1426
+ runtime_edit_config = dict(edit_config or {})
1427
+ runtime_edit_config.setdefault("console", console)
1428
+ runtime_edit_config.setdefault(
1429
+ "output_style", _style_from_console(console, profile=profile_normalized)
1430
+ )
1431
+ runtime_edit_config.setdefault("emit", True)
1432
+
1158
1433
  core_report = runner.execute(
1159
1434
  model=model,
1160
1435
  adapter=adapter,
@@ -1163,7 +1438,7 @@ def _execute_guarded_run(
1163
1438
  config=run_config,
1164
1439
  calibration_data=calibration_data,
1165
1440
  auto_config=auto_config,
1166
- edit_config=edit_config,
1441
+ edit_config=runtime_edit_config,
1167
1442
  preview_n=preview_count,
1168
1443
  final_n=final_count,
1169
1444
  )
@@ -1200,10 +1475,10 @@ def _postprocess_and_summarize(
1200
1475
  saved_files = _emit_run_artifacts(
1201
1476
  report=report, out_dir=run_dir, filename_prefix="report", console=console
1202
1477
  )
1203
- console.print("[green]✅ Run completed successfully![/green]")
1204
- console.print(f"📄 Report: {saved_files['json']}")
1478
+ _event(console, "PASS", "Run completed successfully!", emoji="✅")
1479
+ _event(console, "DATA", f"Report: {saved_files['json']}", emoji="📄")
1205
1480
  if run_config.event_path:
1206
- console.print(f"📝 Events: {run_config.event_path}")
1481
+ _event(console, "DATA", f"Events: {run_config.event_path}", emoji="📝")
1207
1482
  return saved_files
1208
1483
 
1209
1484
 
@@ -1293,9 +1568,14 @@ def _validate_and_harvest_baseline_schedule(
1293
1568
  message = f"PAIRING-EVIDENCE-MISSING: {path}: {reason}"
1294
1569
  if prof in {"ci", "release"}:
1295
1570
  raise InvarlockError(code="E001", message=message)
1296
- _print(
1297
- f"[red]❌ Baseline pairing schedule '{path}' is incompatible: {reason}[/red]"
1298
- )
1571
+ if console is not None:
1572
+ _event(
1573
+ console,
1574
+ "FAIL",
1575
+ f"Baseline pairing schedule '{path}' is incompatible: {reason}",
1576
+ emoji="❌",
1577
+ profile=prof,
1578
+ )
1299
1579
  raise typer.Exit(1)
1300
1580
 
1301
1581
  baseline_meta = (
@@ -1450,9 +1730,14 @@ def _validate_and_harvest_baseline_schedule(
1450
1730
  prof = (profile or "dev").strip().lower()
1451
1731
  if prof in {"ci", "release"}:
1452
1732
  _fail_schedule("preview_hash mismatch vs baseline report data")
1453
- _print(
1454
- "[yellow]⚠️ Baseline preview_hash mismatch; continuing in dev profile.[/yellow]"
1455
- )
1733
+ if console is not None:
1734
+ _event(
1735
+ console,
1736
+ "WARN",
1737
+ "Baseline preview_hash mismatch; continuing in dev profile.",
1738
+ emoji="⚠️",
1739
+ profile=prof,
1740
+ )
1456
1741
  if (
1457
1742
  isinstance(baseline_final_hash, str)
1458
1743
  and baseline_final_hash
@@ -1461,9 +1746,14 @@ def _validate_and_harvest_baseline_schedule(
1461
1746
  prof = (profile or "dev").strip().lower()
1462
1747
  if prof in {"ci", "release"}:
1463
1748
  _fail_schedule("final_hash mismatch vs baseline report data")
1464
- _print(
1465
- "[yellow]⚠️ Baseline final_hash mismatch; continuing in dev profile.[/yellow]"
1466
- )
1749
+ if console is not None:
1750
+ _event(
1751
+ console,
1752
+ "WARN",
1753
+ "Baseline final_hash mismatch; continuing in dev profile.",
1754
+ emoji="⚠️",
1755
+ profile=prof,
1756
+ )
1467
1757
  if (
1468
1758
  isinstance(baseline_dataset_hash, str)
1469
1759
  and baseline_dataset_hash
@@ -1472,9 +1762,14 @@ def _validate_and_harvest_baseline_schedule(
1472
1762
  prof = (profile or "dev").strip().lower()
1473
1763
  if prof in {"ci", "release"}:
1474
1764
  _fail_schedule("dataset_hash mismatch vs baseline report data")
1475
- _print(
1476
- "[yellow]⚠️ Baseline dataset_hash mismatch; continuing in dev profile.[/yellow]"
1477
- )
1765
+ if console is not None:
1766
+ _event(
1767
+ console,
1768
+ "WARN",
1769
+ "Baseline dataset_hash mismatch; continuing in dev profile.",
1770
+ emoji="⚠️",
1771
+ profile=prof,
1772
+ )
1478
1773
  except InvarlockError:
1479
1774
  raise
1480
1775
  except typer.Exit:
@@ -1496,10 +1791,14 @@ def _validate_and_harvest_baseline_schedule(
1496
1791
  and baseline_final is not None
1497
1792
  and baseline_final != cfg_final
1498
1793
  ):
1499
- _print(
1500
- "[yellow]⚠️ Adjusting evaluation window counts to match baseline schedule "
1501
- f"({baseline_preview}/{baseline_final}).[/yellow]"
1502
- )
1794
+ if console is not None:
1795
+ _event(
1796
+ console,
1797
+ "WARN",
1798
+ f"Adjusting evaluation window counts to match baseline schedule ({baseline_preview}/{baseline_final}).",
1799
+ emoji="⚠️",
1800
+ profile=profile,
1801
+ )
1503
1802
 
1504
1803
  effective_preview = int(baseline_preview)
1505
1804
  effective_final = int(baseline_final)
@@ -1662,6 +1961,7 @@ def _resolve_metric_and_provider(
1662
1961
  model_profile: Any,
1663
1962
  *,
1664
1963
  resolved_loss_type: str | None = None,
1964
+ metric_kind_override: str | None = None,
1665
1965
  ) -> tuple[str, str, dict[str, float]]:
1666
1966
  """Resolve metric kind, provider kind, and metric options from config with precedence.
1667
1967
 
@@ -1701,9 +2001,13 @@ def _resolve_metric_and_provider(
1701
2001
  metric_cfg = None
1702
2002
 
1703
2003
  metric_kind = None
2004
+ if isinstance(metric_kind_override, str) and metric_kind_override.strip():
2005
+ mk_override = metric_kind_override.strip().lower()
2006
+ if mk_override != "auto":
2007
+ metric_kind = mk_override
1704
2008
  reps = None
1705
2009
  ci_level = None
1706
- if metric_cfg is not None:
2010
+ if metric_kind is None and metric_cfg is not None:
1707
2011
  try:
1708
2012
  metric_kind = (
1709
2013
  metric_cfg.get("kind")
@@ -1825,18 +2129,25 @@ def _plan_release_windows(
1825
2129
  candidate_msg = f", candidate_unique={int(candidate_unique)}" + (
1826
2130
  f"/{int(candidate_limit)}" if candidate_limit is not None else ""
1827
2131
  )
1828
- console.print(
1829
- "📏 Release window capacity:"
2132
+ _event(
2133
+ console,
2134
+ "METRIC",
2135
+ "Release window capacity:"
1830
2136
  f" unique={available_unique}, reserve={reserve_windows} "
1831
2137
  f"(calib {calibration_windows}, buffer {buffer_windows}), "
1832
2138
  f"usable={available_for_eval}, "
1833
2139
  f"per-arm raw={actual_per_arm_raw} → selected {actual_per_arm} "
1834
- f"(target {target_per_arm}{candidate_msg})"
2140
+ f"(target {target_per_arm}{candidate_msg})",
2141
+ emoji="📏",
2142
+ profile="release",
1835
2143
  )
1836
2144
  if actual_per_arm < target_per_arm:
1837
- console.print(
1838
- "[yellow]⚠️ Adjusted per-arm windows down from "
1839
- f"{target_per_arm} to {actual_per_arm} based on capacity.[/yellow]"
2145
+ _event(
2146
+ console,
2147
+ "WARN",
2148
+ f"Adjusted per-arm windows down from {target_per_arm} to {actual_per_arm} based on capacity.",
2149
+ emoji="⚠️",
2150
+ profile="release",
1840
2151
  )
1841
2152
 
1842
2153
  plan = {
@@ -1893,11 +2204,24 @@ def run_command(
1893
2204
  ),
1894
2205
  out: str | None = typer.Option(None, "--out", help="Output directory override"),
1895
2206
  edit: str | None = typer.Option(None, "--edit", help="Edit kind (quant|mixed)"),
2207
+ edit_label: str | None = typer.Option(
2208
+ None,
2209
+ "--edit-label",
2210
+ help=(
2211
+ "Edit algorithm label for BYOE models. Use 'noop' for baseline, "
2212
+ "'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
2213
+ ),
2214
+ ),
1896
2215
  tier: str | None = typer.Option(
1897
2216
  None,
1898
2217
  "--tier",
1899
2218
  help="Auto-tuning tier override (conservative|balanced|aggressive)",
1900
2219
  ),
2220
+ metric_kind: str | None = typer.Option(
2221
+ None,
2222
+ "--metric-kind",
2223
+ help="Primary metric kind override (ppl_causal|ppl_mlm|accuracy|etc.)",
2224
+ ),
1901
2225
  probes: int | None = typer.Option(
1902
2226
  None, "--probes", help="Number of micro-probes (0=deterministic, >0=adaptive)"
1903
2227
  ),
@@ -1918,6 +2242,19 @@ def run_command(
1918
2242
  no_cleanup: bool = typer.Option(
1919
2243
  False, "--no-cleanup", help="Skip cleanup of temporary artifacts"
1920
2244
  ),
2245
+ style: str | None = typer.Option(
2246
+ None, "--style", help="Output style (audit|friendly)"
2247
+ ),
2248
+ progress: bool = typer.Option(
2249
+ False, "--progress", help="Show progress done messages"
2250
+ ),
2251
+ timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
2252
+ telemetry: bool = typer.Option(
2253
+ False, "--telemetry", help="Write telemetry JSON alongside the report"
2254
+ ),
2255
+ no_color: bool = typer.Option(
2256
+ False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
2257
+ ),
1921
2258
  ):
1922
2259
  """
1923
2260
  Run InvarLock pipeline with the given configuration.
@@ -1936,24 +2273,57 @@ def run_command(
1936
2273
  config = _coerce_option(config)
1937
2274
  device = _coerce_option(device)
1938
2275
  profile = _coerce_option(profile)
2276
+ profile_normalized = (str(profile or "")).strip().lower()
1939
2277
  out = _coerce_option(out)
1940
2278
  edit = _coerce_option(edit)
2279
+ edit_label = _coerce_option(edit_label)
1941
2280
  tier = _coerce_option(tier)
2281
+ metric_kind = _coerce_option(metric_kind)
1942
2282
  probes = _coerce_option(probes)
1943
2283
  until_pass = bool(_coerce_option(until_pass, False))
1944
2284
  max_attempts = int(_coerce_option(max_attempts, 3))
1945
2285
  timeout = _coerce_option(timeout)
1946
2286
  baseline = _coerce_option(baseline)
1947
2287
  no_cleanup = bool(_coerce_option(no_cleanup, False))
2288
+ style = _coerce_option(style)
2289
+ progress = bool(_coerce_option(progress, False))
2290
+ timing = bool(_coerce_option(timing, False))
2291
+ telemetry = bool(_coerce_option(telemetry, False))
2292
+ no_color = bool(_coerce_option(no_color, False))
2293
+
2294
+ output_style = resolve_output_style(
2295
+ style=str(style) if style is not None else None,
2296
+ profile=profile_normalized,
2297
+ progress=progress,
2298
+ timing=timing,
2299
+ no_color=no_color,
2300
+ )
2301
+ console._invarlock_output_style = output_style
2302
+ if not output_style.color:
2303
+ console.no_color = True
2304
+ timings: dict[str, float] = {}
2305
+ collect_timings = bool(output_style.timing or telemetry)
2306
+ total_start: float | None = perf_counter() if collect_timings else None
2307
+
2308
+ _apply_warning_filters(profile_normalized)
1948
2309
 
1949
2310
  # Use shared CLI coercers from invarlock.cli.utils
1950
2311
  report_path_out: str | None = None
1951
2312
 
1952
2313
  def _fail_run(message: str) -> None:
1953
- console.print(f"[red]❌ {message}[/red]")
2314
+ _event(console, "FAIL", message, emoji="❌", profile=profile_normalized)
1954
2315
  # Generic failure path → exit 1 (InvarlockError paths handle code 3 separately)
1955
2316
  raise typer.Exit(1)
1956
2317
 
2318
+ def _provider_event(tag: str, message: str, emoji: str | None = None) -> None:
2319
+ _event(
2320
+ console,
2321
+ tag,
2322
+ message,
2323
+ emoji=emoji,
2324
+ profile=profile_normalized,
2325
+ )
2326
+
1957
2327
  # Fail fast when torch is missing so users see a clear extras hint instead of
1958
2328
  # a raw ModuleNotFoundError from deeper imports.
1959
2329
  try:
@@ -1961,12 +2331,14 @@ def run_command(
1961
2331
 
1962
2332
  _ = _torch # pragma: no cover
1963
2333
  except (ImportError, ModuleNotFoundError) as e:
1964
- console.print(
1965
- "❌ Torch is required for this command. "
2334
+ _event(
2335
+ console,
2336
+ "FAIL",
2337
+ "Torch is required for this command. "
1966
2338
  'Install extras with: pip install "invarlock[hf]" '
1967
2339
  'or "invarlock[adapters]".',
1968
- style="red",
1969
- markup=False,
2340
+ emoji="",
2341
+ profile=profile_normalized,
1970
2342
  )
1971
2343
  raise typer.Exit(1) from e
1972
2344
 
@@ -2044,7 +2416,7 @@ def run_command(
2044
2416
  seed_value = 42
2045
2417
  set_seed(seed_value)
2046
2418
  # Enforce deterministic algorithms in CI/Release profiles when torch is available
2047
- profile_label = (str(profile or "").lower()) if profile else None
2419
+ profile_label = profile_normalized or None
2048
2420
  if torch is not None and profile_label in {"ci", "release"}:
2049
2421
  try: # pragma: no cover - behavior depends on torch availability
2050
2422
  if hasattr(torch, "use_deterministic_algorithms"):
@@ -2073,10 +2445,14 @@ def run_command(
2073
2445
  "numpy": int(numpy_seed),
2074
2446
  "torch": int(torch_seed) if torch_seed is not None else None,
2075
2447
  }
2076
- console.print(
2077
- "🎲 Deterministic seeds → "
2448
+ _event(
2449
+ console,
2450
+ "INIT",
2451
+ "Deterministic seeds → "
2078
2452
  f"python={seed_bundle['python']}, numpy={seed_bundle['numpy']}, "
2079
- f"torch={seed_bundle['torch'] if seed_bundle['torch'] is not None else 'N/A'}"
2453
+ f"torch={seed_bundle['torch'] if seed_bundle['torch'] is not None else 'N/A'}",
2454
+ emoji="🎲",
2455
+ profile=profile_normalized,
2080
2456
  )
2081
2457
 
2082
2458
  # Resolve device and output directory
@@ -2111,8 +2487,8 @@ def run_command(
2111
2487
 
2112
2488
  run_id = f"{output_dir.name}-{timestamp}" if output_dir.name else timestamp
2113
2489
 
2114
- console.print(f"📁 Output directory: {run_dir}")
2115
- console.print(f"🆔 Run ID: {run_id}")
2490
+ console.print(_format_kv_line("Output", str(run_dir)))
2491
+ console.print(_format_kv_line("Run ID", run_id))
2116
2492
 
2117
2493
  # Initialize retry controller if --until-pass mode enabled
2118
2494
  retry_controller = _init_retry_controller(
@@ -2127,7 +2503,6 @@ def run_command(
2127
2503
  pairing_schedule: dict[str, Any] | None = None
2128
2504
  if baseline:
2129
2505
  baseline_path = Path(baseline)
2130
- profile_normalized = (profile or "").strip().lower()
2131
2506
  strict_baseline = profile_normalized in {"ci", "release"}
2132
2507
  if not baseline_path.exists():
2133
2508
  msg = (
@@ -2136,8 +2511,12 @@ def run_command(
2136
2511
  )
2137
2512
  if strict_baseline:
2138
2513
  raise InvarlockError(code="E001", message=msg)
2139
- console.print(
2140
- f"[yellow]⚠️ {msg}. Falling back to dataset schedule.[/yellow]"
2514
+ _event(
2515
+ console,
2516
+ "WARN",
2517
+ f"{msg}. Falling back to dataset schedule.",
2518
+ emoji="⚠️",
2519
+ profile=profile_normalized,
2141
2520
  )
2142
2521
  else:
2143
2522
  try:
@@ -2147,8 +2526,12 @@ def run_command(
2147
2526
  msg = f"PAIRING-EVIDENCE-MISSING: baseline report JSON parse failed ({exc})"
2148
2527
  if strict_baseline:
2149
2528
  raise InvarlockError(code="E001", message=msg) from exc
2150
- console.print(
2151
- f"[yellow]⚠️ {msg}. Falling back to dataset schedule.[/yellow]"
2529
+ _event(
2530
+ console,
2531
+ "WARN",
2532
+ f"{msg}. Falling back to dataset schedule.",
2533
+ emoji="⚠️",
2534
+ profile=profile_normalized,
2152
2535
  )
2153
2536
  baseline_report_data = None
2154
2537
  if isinstance(baseline_report_data, dict):
@@ -2206,8 +2589,12 @@ def run_command(
2206
2589
  tokenizer_hash = tok
2207
2590
  except Exception:
2208
2591
  pass
2209
- console.print(
2210
- "🧬 Loaded baseline evaluation schedule for pairing"
2592
+ _event(
2593
+ console,
2594
+ "DATA",
2595
+ "Loaded baseline evaluation schedule for pairing",
2596
+ emoji="🧬",
2597
+ profile=profile_normalized,
2211
2598
  )
2212
2599
  else:
2213
2600
  msg = (
@@ -2216,8 +2603,12 @@ def run_command(
2216
2603
  )
2217
2604
  if strict_baseline:
2218
2605
  raise InvarlockError(code="E001", message=msg)
2219
- console.print(
2220
- f"[yellow]⚠️ {msg}. Falling back to dataset schedule.[/yellow]"
2606
+ _event(
2607
+ console,
2608
+ "WARN",
2609
+ f"{msg}. Falling back to dataset schedule.",
2610
+ emoji="⚠️",
2611
+ profile=profile_normalized,
2221
2612
  )
2222
2613
  baseline_report_data = None
2223
2614
  pairing_schedule = None
@@ -2243,15 +2634,23 @@ def run_command(
2243
2634
  adapter = registry.get_adapter(cfg.model.adapter)
2244
2635
  edit_name = getattr(getattr(cfg, "edit", None), "name", None)
2245
2636
  if not isinstance(edit_name, str) or not edit_name.strip():
2246
- console.print(
2247
- "[red]❌ Edit configuration must specify a non-empty `edit.name`.[/red]"
2637
+ _event(
2638
+ console,
2639
+ "FAIL",
2640
+ "Edit configuration must specify a non-empty `edit.name`.",
2641
+ emoji="❌",
2642
+ profile=profile_normalized,
2248
2643
  )
2249
2644
  raise typer.Exit(1)
2250
2645
  try:
2251
2646
  edit_op = registry.get_edit(edit_name.strip())
2252
2647
  except Exception:
2253
- console.print(
2254
- f"[yellow]⚠️ Unknown edit '{edit_name.strip()}'. Using pass-through shim.[/yellow]"
2648
+ _event(
2649
+ console,
2650
+ "WARN",
2651
+ f"Unknown edit '{edit_name.strip()}'. Using pass-through shim.",
2652
+ emoji="⚠️",
2653
+ profile=profile_normalized,
2255
2654
  )
2256
2655
  edit_op = SimpleNamespace(name=edit_name.strip())
2257
2656
 
@@ -2287,8 +2686,12 @@ def run_command(
2287
2686
  registry.get_plugin_metadata(guard_name, "guards")
2288
2687
  )
2289
2688
  except KeyError:
2290
- console.print(
2291
- f"[yellow]⚠️ Guard '{guard_name}' not found, skipping[/yellow]"
2689
+ _event(
2690
+ console,
2691
+ "WARN",
2692
+ f"Guard '{guard_name}' not found, skipping",
2693
+ emoji="⚠️",
2694
+ profile=profile_normalized,
2292
2695
  )
2293
2696
  plugin_provenance = {
2294
2697
  "adapter": adapter_meta,
@@ -2296,8 +2699,15 @@ def run_command(
2296
2699
  "guards": guard_metadata,
2297
2700
  }
2298
2701
  pm_acceptance_range = _resolve_pm_acceptance_range(cfg)
2299
-
2300
- console.print(f"🔌 Adapter: {adapter.name}")
2702
+ pm_drift_band = _resolve_pm_drift_band(cfg)
2703
+
2704
+ _event(
2705
+ console,
2706
+ "DATA",
2707
+ f"Adapter: {adapter.name}",
2708
+ emoji="🔌",
2709
+ profile=profile_normalized,
2710
+ )
2301
2711
 
2302
2712
  # Create run configuration
2303
2713
  guard_overrides = {
@@ -2361,6 +2771,9 @@ def run_command(
2361
2771
  pm_acceptance_range
2362
2772
  )
2363
2773
  run_context["pm_acceptance_range"] = pm_acceptance_range
2774
+ if pm_drift_band:
2775
+ run_context.setdefault("primary_metric", {})["drift_band"] = pm_drift_band
2776
+ run_context["pm_drift_band"] = pm_drift_band
2364
2777
  run_context["model_profile"] = {
2365
2778
  "family": model_profile.family,
2366
2779
  "default_loss": model_profile.default_loss,
@@ -2391,6 +2804,7 @@ def run_command(
2391
2804
  dataset_meta: dict[str, Any] = {}
2392
2805
  baseline_meta: dict[str, Any] = {}
2393
2806
  window_plan: dict[str, Any] | None = None
2807
+ dataset_timing_start: float | None = perf_counter() if collect_timings else None
2394
2808
  if pairing_schedule:
2395
2809
  harvested = _validate_and_harvest_baseline_schedule(
2396
2810
  cfg,
@@ -2413,7 +2827,7 @@ def run_command(
2413
2827
  try:
2414
2828
  tokenizer, tokenizer_hash = resolve_tokenizer(model_profile)
2415
2829
  except Exception as exc:
2416
- console.print(f"[red]{exc}[/red]")
2830
+ _event(console, "FAIL", str(exc), emoji="❌", profile=profile)
2417
2831
  raise typer.Exit(1) from exc
2418
2832
  preview_window_ids = pairing_schedule["preview"].get("window_ids")
2419
2833
  preview_labels = pairing_schedule["preview"].get("labels")
@@ -2635,7 +3049,13 @@ def run_command(
2635
3049
  if capacity_meta and "window_capacity" not in dataset_meta:
2636
3050
  dataset_meta["window_capacity"] = capacity_meta
2637
3051
  elif cfg.dataset.provider:
2638
- console.print(f"📊 Loading dataset: {cfg.dataset.provider}")
3052
+ _event(
3053
+ console,
3054
+ "DATA",
3055
+ f"Loading dataset: {cfg.dataset.provider}",
3056
+ emoji="📊",
3057
+ profile=profile_normalized,
3058
+ )
2639
3059
  # Pass through provider-specific kwargs when available
2640
3060
  provider_kwargs = {}
2641
3061
  for key in (
@@ -2695,6 +3115,7 @@ def run_command(
2695
3115
  provider_kwargs=provider_kwargs,
2696
3116
  console=console,
2697
3117
  resolved_device=resolved_device,
3118
+ emit=_provider_event,
2698
3119
  )
2699
3120
  )
2700
3121
 
@@ -2702,7 +3123,7 @@ def run_command(
2702
3123
  try:
2703
3124
  tokenizer, tokenizer_hash = resolve_tokenizer(model_profile)
2704
3125
  except Exception as exc:
2705
- console.print(f"[red]{exc}[/red]")
3126
+ _event(console, "FAIL", str(exc), emoji="❌", profile=profile)
2706
3127
  raise typer.Exit(1) from exc
2707
3128
 
2708
3129
  dataset_stride = getattr(
@@ -2736,7 +3157,7 @@ def run_command(
2736
3157
  console=console,
2737
3158
  )
2738
3159
  except RuntimeError as err:
2739
- console.print(f"[red]{err}[/red]")
3160
+ _event(console, "FAIL", str(err), emoji="❌", profile=profile)
2740
3161
  raise typer.Exit(1) from err
2741
3162
 
2742
3163
  actual_per_arm = int(window_plan["actual_preview"])
@@ -2748,9 +3169,12 @@ def run_command(
2748
3169
  cfg.dataset, "stride", getattr(cfg.dataset, "seq_len", 0)
2749
3170
  )
2750
3171
  else:
2751
- console.print(
2752
- "[yellow]⚠️ Release profile requested but dataset provider "
2753
- "does not expose capacity estimation; using configured window counts.[/yellow]"
3172
+ _event(
3173
+ console,
3174
+ "WARN",
3175
+ "Release profile requested but dataset provider does not expose capacity estimation; using configured window counts.",
3176
+ emoji="⚠️",
3177
+ profile=profile_normalized,
2754
3178
  )
2755
3179
 
2756
3180
  preview_records: list[tuple[list[int], list[int]]] = []
@@ -2954,8 +3378,12 @@ def run_command(
2954
3378
  raise RuntimeError(
2955
3379
  "Unable to construct non-overlapping windows within minimum window floor."
2956
3380
  )
2957
- console.print(
2958
- f"[yellow]⚠️ Detected {deficit} duplicate windows; reducing per-arm windows to {proposed_per_arm} and retrying stratification.[/yellow]"
3381
+ _event(
3382
+ console,
3383
+ "WARN",
3384
+ f"Detected {deficit} duplicate windows; reducing per-arm windows to {proposed_per_arm} and retrying stratification.",
3385
+ emoji="⚠️",
3386
+ profile=profile_normalized,
2959
3387
  )
2960
3388
 
2961
3389
  effective_preview = proposed_per_arm
@@ -3097,6 +3525,10 @@ def run_command(
3097
3525
  run_context["dataset_meta"] = dataset_meta
3098
3526
  if window_plan:
3099
3527
  run_context["window_plan"] = window_plan
3528
+ if dataset_timing_start is not None:
3529
+ timings["load_dataset"] = max(
3530
+ 0.0, float(perf_counter() - dataset_timing_start)
3531
+ )
3100
3532
 
3101
3533
  if os.environ.get("INVARLOCK_DEBUG_TRACE"):
3102
3534
  console.print(
@@ -3120,7 +3552,13 @@ def run_command(
3120
3552
  )
3121
3553
 
3122
3554
  # Execute the real pipeline using CoreRunner
3123
- console.print(f"⚙️ Executing pipeline with {len(guards)} guards...")
3555
+ _event(
3556
+ console,
3557
+ "EXEC",
3558
+ f"Executing pipeline with {len(guards)} guards...",
3559
+ emoji="⚙️",
3560
+ profile=profile_normalized,
3561
+ )
3124
3562
  runner = CoreRunner()
3125
3563
 
3126
3564
  # Prepare auto configuration for tier resolution
@@ -3185,8 +3623,8 @@ def run_command(
3185
3623
  for key, values in model_profile.module_selectors.items()
3186
3624
  }
3187
3625
 
3188
- console.print(f"✂️ Edit: {edit_op.name}")
3189
- console.print(f"🛡️ Guards: {[g.name for g in guards]}")
3626
+ console.print(_format_kv_line("Edit", str(edit_op.name)))
3627
+ console.print(_format_kv_line("Guards", _format_guard_chain(guards)))
3190
3628
 
3191
3629
  # Model load/snapshot strategy
3192
3630
  model = None
@@ -3200,8 +3638,25 @@ def run_command(
3200
3638
  # Try single-load with snapshot/restore if adapter supports it; fallback to reload per attempt
3201
3639
  try:
3202
3640
  # Load once
3203
- console.print(f"🔧 Loading model once: {cfg.model.id}")
3204
- model = _load_model_with_cfg(adapter, cfg, resolved_device)
3641
+ _event(
3642
+ console,
3643
+ "INIT",
3644
+ f"Loading model once: {cfg.model.id}",
3645
+ emoji="🔧",
3646
+ profile=profile_normalized,
3647
+ )
3648
+ with timed_step(
3649
+ console=console,
3650
+ style=_style_from_console(console, profile=profile_normalized),
3651
+ timings=timings,
3652
+ key="load_model",
3653
+ tag="INIT",
3654
+ message="Load model",
3655
+ emoji="🔧",
3656
+ ):
3657
+ model = _load_model_with_cfg(
3658
+ adapter, cfg, resolved_device, profile=profile_normalized
3659
+ )
3205
3660
 
3206
3661
  # No edit-specific bootstrap logic
3207
3662
 
@@ -3357,9 +3812,13 @@ def run_command(
3357
3812
  return "reload"
3358
3813
 
3359
3814
  mode = _choose_snapshot_mode()
3360
- # Emit deterministic snapshot mode status line
3361
- console.print(
3362
- f"snapshot_mode: {'enabled' if mode in {'bytes', 'chunked'} else 'disabled'}"
3815
+ enabled = mode in {"bytes", "chunked"}
3816
+ _event(
3817
+ console,
3818
+ "INIT",
3819
+ f"Snapshot mode: {'enabled' if enabled else 'disabled'}",
3820
+ emoji="💾",
3821
+ profile=profile_normalized,
3363
3822
  )
3364
3823
  if mode == "chunked":
3365
3824
  snapshot_tmpdir = adapter.snapshot_chunked(model) # type: ignore[attr-defined]
@@ -3402,13 +3861,16 @@ def run_command(
3402
3861
 
3403
3862
  # RETRY LOOP - All report processing inside loop
3404
3863
  attempt = 1
3405
- profile_normalized = (profile or "").lower()
3406
3864
  measure_guard_overhead, skip_overhead = _should_measure_overhead(
3407
3865
  profile_normalized
3408
3866
  )
3409
3867
  if skip_overhead and profile_normalized in {"ci", "release"}:
3410
- console.print(
3411
- "[yellow]⚠️ Overhead check skipped via INVARLOCK_SKIP_OVERHEAD_CHECK[/yellow]"
3868
+ _event(
3869
+ console,
3870
+ "WARN",
3871
+ "Overhead check skipped via INVARLOCK_SKIP_OVERHEAD_CHECK",
3872
+ emoji="⚠️",
3873
+ profile=profile_normalized,
3412
3874
  )
3413
3875
 
3414
3876
  while True:
@@ -3416,12 +3878,32 @@ def run_command(
3416
3878
  set_seed(seed_bundle["python"])
3417
3879
 
3418
3880
  if retry_controller:
3419
- console.print(f"\n🚀 Attempt {attempt}/{max_attempts}")
3881
+ console.print("\n")
3882
+ _event(
3883
+ console,
3884
+ "EXEC",
3885
+ f"Attempt {attempt}/{max_attempts}",
3886
+ emoji="🚀",
3887
+ profile=profile_normalized,
3888
+ )
3420
3889
  if attempt > 1:
3421
- console.print(f"🔄 Retry attempt {attempt}/{max_attempts}")
3890
+ _event(
3891
+ console,
3892
+ "EXEC",
3893
+ f"Retry attempt {attempt}/{max_attempts}",
3894
+ emoji="🔄",
3895
+ profile=profile_normalized,
3896
+ )
3422
3897
  else:
3423
3898
  if attempt > 1:
3424
- console.print(f"\n🚀 Attempt {attempt}")
3899
+ console.print("\n")
3900
+ _event(
3901
+ console,
3902
+ "EXEC",
3903
+ f"Attempt {attempt}",
3904
+ emoji="🚀",
3905
+ profile=profile_normalized,
3906
+ )
3425
3907
 
3426
3908
  # Adjust parameters for retry attempts
3427
3909
  if retry_controller and attempt > 1:
@@ -3450,6 +3932,8 @@ def run_command(
3450
3932
  "checks": {},
3451
3933
  }
3452
3934
  elif measure_guard_overhead:
3935
+ bare_edit_config = dict(edit_config or {})
3936
+ bare_edit_config["emit"] = False
3453
3937
  guard_overhead_payload = _run_bare_control(
3454
3938
  adapter=adapter,
3455
3939
  edit_op=edit_op,
@@ -3458,7 +3942,7 @@ def run_command(
3458
3942
  run_config=run_config,
3459
3943
  calibration_data=calibration_data,
3460
3944
  auto_config=auto_config,
3461
- edit_config=edit_config,
3945
+ edit_config=bare_edit_config,
3462
3946
  preview_count=preview_count,
3463
3947
  final_count=final_count,
3464
3948
  seed_bundle=seed_bundle,
@@ -3472,34 +3956,53 @@ def run_command(
3472
3956
  )
3473
3957
 
3474
3958
  # Ensure clean state for guarded run
3475
- core_report, model = _execute_guarded_run(
3476
- runner=runner,
3477
- adapter=adapter,
3478
- model=model,
3479
- cfg=cfg,
3480
- edit_op=edit_op,
3481
- run_config=run_config,
3482
- guards=guards,
3483
- calibration_data=calibration_data,
3484
- auto_config=auto_config,
3485
- edit_config=edit_config,
3486
- preview_count=preview_count,
3487
- final_count=final_count,
3488
- restore_fn=restore_fn,
3489
- resolved_device=resolved_device,
3959
+ with timed_step(
3490
3960
  console=console,
3491
- snapshot_provenance=snapshot_provenance,
3492
- skip_model_load=skip_model_load,
3493
- )
3961
+ style=_style_from_console(console, profile=profile_normalized),
3962
+ timings=timings,
3963
+ key="execute",
3964
+ tag="EXEC",
3965
+ message="Execute pipeline",
3966
+ emoji="⚙️",
3967
+ ):
3968
+ core_report, model = _execute_guarded_run(
3969
+ runner=runner,
3970
+ adapter=adapter,
3971
+ model=model,
3972
+ cfg=cfg,
3973
+ edit_op=edit_op,
3974
+ run_config=run_config,
3975
+ guards=guards,
3976
+ calibration_data=calibration_data,
3977
+ auto_config=auto_config,
3978
+ edit_config=edit_config,
3979
+ preview_count=preview_count,
3980
+ final_count=final_count,
3981
+ restore_fn=restore_fn,
3982
+ resolved_device=resolved_device,
3983
+ profile_normalized=profile_normalized,
3984
+ console=console,
3985
+ snapshot_provenance=snapshot_provenance,
3986
+ skip_model_load=skip_model_load,
3987
+ )
3494
3988
  except _SnapshotRestoreFailed as exc:
3495
3989
  snapshot_provenance["restore_failed"] = True
3496
3990
  _free_model_memory(model)
3497
3991
  model = None
3498
3992
  restore_fn = None
3499
- console.print(
3500
- "[yellow]⚠️ Snapshot restore failed; switching to reload-per-attempt.[/yellow]"
3993
+ _event(
3994
+ console,
3995
+ "WARN",
3996
+ "Snapshot restore failed; switching to reload-per-attempt.",
3997
+ emoji="⚠️",
3998
+ profile=profile_normalized,
3999
+ )
4000
+ _event(
4001
+ console,
4002
+ "WARN",
4003
+ f"↳ {exc}",
4004
+ profile=profile_normalized,
3501
4005
  )
3502
- console.print(f"[yellow]↳ {exc}[/yellow]")
3503
4006
  if retry_controller:
3504
4007
  retry_controller.record_attempt(
3505
4008
  attempt,
@@ -3631,6 +4134,8 @@ def run_command(
3631
4134
  report["meta"].update(meta_payload)
3632
4135
  if pm_acceptance_range:
3633
4136
  report["meta"]["pm_acceptance_range"] = pm_acceptance_range
4137
+ if pm_drift_band:
4138
+ report["meta"]["pm_drift_band"] = pm_drift_band
3634
4139
  report["meta"]["model_profile"] = {
3635
4140
  "family": model_profile.family,
3636
4141
  "default_loss": model_profile.default_loss,
@@ -3714,6 +4219,14 @@ def run_command(
3714
4219
  }
3715
4220
  )
3716
4221
 
4222
+ if edit_label:
4223
+ report.setdefault("edit", {})
4224
+ report["edit"]["name"] = edit_label
4225
+ report["edit"]["algorithm"] = edit_label
4226
+ if isinstance(core_report.context, dict):
4227
+ core_report.context.setdefault("edit", {})
4228
+ core_report.context["edit"]["name"] = edit_label
4229
+
3717
4230
  mask_artifact_path = _persist_ref_masks(core_report, run_dir)
3718
4231
  if mask_artifact_path:
3719
4232
  report.setdefault("artifacts", {})
@@ -3721,6 +4234,22 @@ def run_command(
3721
4234
 
3722
4235
  # Transfer metrics (PM-only: do not write ppl_* fields)
3723
4236
  if hasattr(core_report, "metrics") and core_report.metrics:
4237
+ if isinstance(core_report.metrics, dict):
4238
+ core_timings = core_report.metrics.get("timings")
4239
+ if isinstance(core_timings, dict):
4240
+ for key in (
4241
+ "prepare",
4242
+ "prepare_guards",
4243
+ "edit",
4244
+ "guards",
4245
+ "eval",
4246
+ "finalize",
4247
+ ):
4248
+ if key in core_timings:
4249
+ try:
4250
+ timings[key] = float(core_timings[key])
4251
+ except Exception:
4252
+ timings[key] = core_timings[key]
3724
4253
  metrics_payload = {
3725
4254
  "latency_ms_per_tok": core_report.metrics.get(
3726
4255
  "latency_ms_per_tok", 0.0
@@ -3772,6 +4301,11 @@ def run_command(
3772
4301
  "masked_tokens_total",
3773
4302
  "masked_tokens_preview",
3774
4303
  "masked_tokens_final",
4304
+ "timings",
4305
+ "guard_timings",
4306
+ "memory_snapshots",
4307
+ "gpu_memory_mb_peak",
4308
+ "gpu_memory_reserved_mb_peak",
3775
4309
  "reduction",
3776
4310
  ]
3777
4311
  for key in optional_keys:
@@ -3935,8 +4469,12 @@ def run_command(
3935
4469
  },
3936
4470
  }
3937
4471
  elif had_baseline and (profile or "").lower() in {"ci", "release"}:
3938
- console.print(
3939
- "[red]❌ [INVARLOCK:E001] PAIRING-SCHEDULE-MISMATCH: baseline pairing requested but evaluation windows were not produced. Check capacity/pairing config.[/red]"
4472
+ _event(
4473
+ console,
4474
+ "FAIL",
4475
+ "[INVARLOCK:E001] PAIRING-SCHEDULE-MISMATCH: baseline pairing requested but evaluation windows were not produced. Check capacity/pairing config.",
4476
+ emoji="❌",
4477
+ profile=profile_normalized,
3940
4478
  )
3941
4479
  raise typer.Exit(3)
3942
4480
  else:
@@ -4147,12 +4685,20 @@ def run_command(
4147
4685
  if ok:
4148
4686
  report["artifacts"]["checkpoint_path"] = str(export_dir)
4149
4687
  else:
4150
- console.print(
4151
- "[yellow]⚠️ Model export requested but adapter did not save a HF directory.[/yellow]"
4688
+ _event(
4689
+ console,
4690
+ "WARN",
4691
+ "Model export requested but adapter did not save a HF directory.",
4692
+ emoji="⚠️",
4693
+ profile=profile_normalized,
4152
4694
  )
4153
4695
  except Exception:
4154
- console.print(
4155
- "[yellow]⚠️ Model export requested but failed due to an unexpected error.[/yellow]"
4696
+ _event(
4697
+ console,
4698
+ "WARN",
4699
+ "Model export requested but failed due to an unexpected error.",
4700
+ emoji="⚠️",
4701
+ profile=profile_normalized,
4156
4702
  )
4157
4703
 
4158
4704
  # Set flags
@@ -4373,7 +4919,10 @@ def run_command(
4373
4919
  try:
4374
4920
  metric_kind_resolved, _provider_kind, metric_opts = (
4375
4921
  _resolve_metric_and_provider(
4376
- cfg, model_profile, resolved_loss_type=resolved_loss_type
4922
+ cfg,
4923
+ model_profile,
4924
+ resolved_loss_type=resolved_loss_type,
4925
+ metric_kind_override=metric_kind,
4377
4926
  )
4378
4927
  )
4379
4928
  if metric_kind_resolved:
@@ -4452,6 +5001,13 @@ def run_command(
4452
5001
  except Exception:
4453
5002
  pass
4454
5003
 
5004
+ telemetry_path: Path | None = None
5005
+ if telemetry:
5006
+ telemetry_path = run_dir / "telemetry.json"
5007
+ report.setdefault("artifacts", {})["telemetry_path"] = str(
5008
+ telemetry_path
5009
+ )
5010
+
4455
5011
  saved_files = _postprocess_and_summarize(
4456
5012
  report=report,
4457
5013
  run_dir=run_dir,
@@ -4468,6 +5024,31 @@ def run_command(
4468
5024
  except Exception:
4469
5025
  pass
4470
5026
 
5027
+ if telemetry and telemetry_path is not None:
5028
+ try:
5029
+ from invarlock.reporting.telemetry import save_telemetry_report
5030
+
5031
+ saved_path = save_telemetry_report(
5032
+ report, run_dir, filename=telemetry_path.name
5033
+ )
5034
+ if isinstance(saved_files, dict):
5035
+ saved_files["telemetry"] = str(saved_path)
5036
+ _event(
5037
+ console,
5038
+ "DATA",
5039
+ f"Telemetry: {saved_path}",
5040
+ emoji="📈",
5041
+ profile=profile_normalized,
5042
+ )
5043
+ except Exception as exc: # pragma: no cover - best-effort
5044
+ _event(
5045
+ console,
5046
+ "WARN",
5047
+ f"Telemetry export failed: {exc}",
5048
+ emoji="⚠️",
5049
+ profile=profile_normalized,
5050
+ )
5051
+
4471
5052
  # Metrics display
4472
5053
  pm_obj = None
4473
5054
  try:
@@ -4482,15 +5063,23 @@ def run_command(
4482
5063
  if isinstance(pm_prev, (int | float)) and isinstance(
4483
5064
  pm_fin, (int | float)
4484
5065
  ):
4485
- console.print(
4486
- f"📌 Primary Metric [{pm_kind}] — preview: {pm_prev:.3f}, final: {pm_fin:.3f}"
5066
+ _event(
5067
+ console,
5068
+ "METRIC",
5069
+ f"Primary Metric [{pm_kind}] — preview: {pm_prev:.3f}, final: {pm_fin:.3f}",
5070
+ emoji="📌",
5071
+ profile=profile_normalized,
4487
5072
  )
4488
5073
  ratio_vs_base = pm_obj.get("ratio_vs_baseline")
4489
5074
  if isinstance(ratio_vs_base, (int | float)) and math.isfinite(
4490
5075
  ratio_vs_base
4491
5076
  ):
4492
- console.print(
4493
- f"🔗 Ratio vs baseline [{pm_kind}]: {ratio_vs_base:.3f}"
5077
+ _event(
5078
+ console,
5079
+ "METRIC",
5080
+ f"Ratio vs baseline [{pm_kind}]: {ratio_vs_base:.3f}",
5081
+ emoji="🔗",
5082
+ profile=profile_normalized,
4494
5083
  )
4495
5084
  except Exception:
4496
5085
  pass
@@ -4502,8 +5091,12 @@ def run_command(
4502
5091
  console, guard_overhead_info
4503
5092
  )
4504
5093
  if not guard_overhead_info.get("passed", True):
4505
- console.print(
4506
- "[red]⚠️ Guard overhead gate FAILED: Guards add more than the permitted budget[/red]"
5094
+ _event(
5095
+ console,
5096
+ "FAIL",
5097
+ "Guard overhead gate FAILED: Guards add more than the permitted budget",
5098
+ emoji="⚠️",
5099
+ profile=profile_normalized,
4507
5100
  )
4508
5101
  # Only fail hard when the overhead check was actually evaluated
4509
5102
  # (e.g., for causal LMs with available bare/guarded PM). For
@@ -4544,7 +5137,13 @@ def run_command(
4544
5137
  if baseline_report is None:
4545
5138
  raise FileNotFoundError("Baseline report unavailable")
4546
5139
 
4547
- console.print("📜 Generating safety certificate...")
5140
+ _event(
5141
+ console,
5142
+ "EXEC",
5143
+ "Generating evaluation certificate...",
5144
+ emoji="📜",
5145
+ profile=profile_normalized,
5146
+ )
4548
5147
  certificate = make_certificate(report, baseline_report)
4549
5148
 
4550
5149
  validation = certificate.get("validation", {})
@@ -4561,11 +5160,21 @@ def run_command(
4561
5160
  )
4562
5161
 
4563
5162
  if certificate_passed:
4564
- console.print("[green]✅ Certificate PASSED all gates![/green]")
5163
+ _event(
5164
+ console,
5165
+ "PASS",
5166
+ "Certificate PASSED all gates!",
5167
+ emoji="✅",
5168
+ profile=profile_normalized,
5169
+ )
4565
5170
  break
4566
5171
  else:
4567
- console.print(
4568
- f"[yellow]⚠️ Certificate FAILED gates: {', '.join(failed_gates)}[/yellow]"
5172
+ _event(
5173
+ console,
5174
+ "FAIL",
5175
+ f"Certificate FAILED gates: {', '.join(failed_gates)}",
5176
+ emoji="⚠️",
5177
+ profile=profile_normalized,
4569
5178
  )
4570
5179
 
4571
5180
  # Auto-tune mask-only heads (binary search on keep count)
@@ -4610,8 +5219,12 @@ def run_command(
4610
5219
  }
4611
5220
  )
4612
5221
  head_section["global_k"] = next_keep
4613
- console.print(
4614
- f"🔧 Auto-tune adjust: global_k → {next_keep} (bounds {keep_low}-{keep_high})"
5222
+ _event(
5223
+ console,
5224
+ "INIT",
5225
+ f"Auto-tune adjust: global_k → {next_keep} (bounds {keep_low}-{keep_high})",
5226
+ emoji="🔧",
5227
+ profile=profile_normalized,
4615
5228
  )
4616
5229
  except Exception:
4617
5230
  pass
@@ -4620,14 +5233,22 @@ def run_command(
4620
5233
  attempt += 1
4621
5234
  continue
4622
5235
  else:
4623
- console.print(
4624
- f"[red]❌ Exhausted retry budget after {attempt} attempts[/red]"
5236
+ _event(
5237
+ console,
5238
+ "FAIL",
5239
+ f"Exhausted retry budget after {attempt} attempts",
5240
+ emoji="❌",
5241
+ profile=profile_normalized,
4625
5242
  )
4626
5243
  break
4627
5244
 
4628
5245
  except Exception as cert_error:
4629
- console.print(
4630
- f"[yellow]⚠️ Certificate validation failed: {cert_error}[/yellow]"
5246
+ _event(
5247
+ console,
5248
+ "WARN",
5249
+ f"Certificate validation failed: {cert_error}",
5250
+ emoji="⚠️",
5251
+ profile=profile_normalized,
4631
5252
  )
4632
5253
  if retry_controller:
4633
5254
  retry_controller.record_attempt(
@@ -4656,11 +5277,82 @@ def run_command(
4656
5277
  # (moved) Cleanup printing occurs after loop to guarantee execution
4657
5278
  pass
4658
5279
 
5280
+ if output_style.timing:
5281
+ total_duration = (
5282
+ max(0.0, float(perf_counter() - total_start))
5283
+ if total_start is not None
5284
+ else None
5285
+ )
5286
+ timings_for_summary: dict[str, float] = {}
5287
+ for key, value in timings.items():
5288
+ if isinstance(value, (int | float)):
5289
+ timings_for_summary[key] = float(value)
5290
+ if total_duration is not None:
5291
+ timings_for_summary["total"] = total_duration
5292
+
5293
+ has_breakdown = any(
5294
+ key in timings_for_summary
5295
+ for key in (
5296
+ "prepare",
5297
+ "prepare_guards",
5298
+ "edit",
5299
+ "guards",
5300
+ "eval",
5301
+ "finalize",
5302
+ )
5303
+ )
5304
+
5305
+ order: list[tuple[str, str]] = []
5306
+
5307
+ def _add(label: str, key: str) -> None:
5308
+ if key in timings_for_summary:
5309
+ order.append((label, key))
5310
+
5311
+ _add("Load model", "load_model")
5312
+ _add("Load data", "load_dataset")
5313
+ if has_breakdown:
5314
+ _add("Prepare", "prepare")
5315
+ _add("Prep guards", "prepare_guards")
5316
+ _add("Edit", "edit")
5317
+ _add("Guards", "guards")
5318
+ _add("Eval", "eval")
5319
+ _add("Finalize", "finalize")
5320
+ else:
5321
+ _add("Execute", "execute")
5322
+ _add("Total", "total")
5323
+
5324
+ extra_lines: list[str] = []
5325
+ metrics_section = (
5326
+ report.get("metrics", {}) if isinstance(report, dict) else {}
5327
+ )
5328
+ if isinstance(metrics_section, dict):
5329
+ mem_peak = metrics_section.get("memory_mb_peak")
5330
+ gpu_peak = metrics_section.get("gpu_memory_mb_peak")
5331
+ if isinstance(mem_peak, (int | float)):
5332
+ extra_lines.append(f" Peak Memory : {float(mem_peak):.2f} MB")
5333
+ if isinstance(gpu_peak, (int | float)):
5334
+ extra_lines.append(f" Peak GPU Mem: {float(gpu_peak):.2f} MB")
5335
+
5336
+ if timings_for_summary and order:
5337
+ print_timing_summary(
5338
+ console,
5339
+ timings_for_summary,
5340
+ style=output_style,
5341
+ order=order,
5342
+ extra_lines=extra_lines,
5343
+ )
5344
+
4659
5345
  # Normal path falls through; cleanup handled below in finally
4660
5346
  return report_path_out
4661
5347
 
4662
5348
  except FileNotFoundError as e:
4663
- console.print(f"[red]❌ Configuration file not found: {e}[/red]")
5349
+ _event(
5350
+ console,
5351
+ "FAIL",
5352
+ f"Configuration file not found: {e}",
5353
+ emoji="❌",
5354
+ profile=profile_normalized,
5355
+ )
4664
5356
  raise typer.Exit(1) from e
4665
5357
  except InvarlockError as ce:
4666
5358
  # InvarlockError → code 3 only in CI/Release; dev → 1
@@ -4676,12 +5368,22 @@ def run_command(
4676
5368
  traceback.print_exc()
4677
5369
  # Emit a clearer message for schema failures (exit 2)
4678
5370
  if isinstance(e, ValueError) and "Invalid RunReport" in str(e):
4679
- console.print(
4680
- "[red]❌ Schema invalid: run report structure failed validation[/red]"
5371
+ _event(
5372
+ console,
5373
+ "FAIL",
5374
+ "Schema invalid: run report structure failed validation",
5375
+ emoji="❌",
5376
+ profile=profile_normalized,
4681
5377
  )
4682
5378
  code = 2
4683
5379
  else:
4684
- console.print(f"[red]❌ Pipeline execution failed: {e}[/red]")
5380
+ _event(
5381
+ console,
5382
+ "FAIL",
5383
+ f"Pipeline execution failed: {e}",
5384
+ emoji="❌",
5385
+ profile=profile_normalized,
5386
+ )
4685
5387
  code = _resolve_exit_code(e, profile=profile)
4686
5388
  raise typer.Exit(code) from e
4687
5389
  finally:
@@ -4695,9 +5397,21 @@ def run_command(
4695
5397
  except Exception:
4696
5398
  pass
4697
5399
  finally:
4698
- console.print("cleanup: removed")
5400
+ _event(
5401
+ console,
5402
+ "INFO",
5403
+ "Cleanup: removed",
5404
+ emoji="🧹",
5405
+ profile=profile_normalized,
5406
+ )
4699
5407
  else:
4700
- console.print("cleanup: skipped")
5408
+ _event(
5409
+ console,
5410
+ "INFO",
5411
+ "Cleanup: skipped",
5412
+ emoji="🧹",
5413
+ profile=profile_normalized,
5414
+ )
4701
5415
  except Exception:
4702
5416
  # Best-effort cleanup printing; never raise from finally
4703
5417
  pass
@@ -4844,11 +5558,9 @@ def _print_guard_overhead_summary(
4844
5558
  """Print a concise guard-overhead console summary. Returns threshold fraction used."""
4845
5559
  evaluated = bool(guard_overhead_info.get("evaluated", True))
4846
5560
  if not evaluated:
4847
- console.print("🛡️ Guard Overhead: not evaluated")
5561
+ _event(console, "METRIC", "Guard Overhead: not evaluated", emoji="🛡️")
4848
5562
  return GUARD_OVERHEAD_THRESHOLD
4849
- overhead_status = (
4850
- "✅ PASS" if guard_overhead_info.get("passed", True) else "❌ FAIL"
4851
- )
5563
+ overhead_status = "PASS" if guard_overhead_info.get("passed", True) else "FAIL"
4852
5564
  overhead_percent = guard_overhead_info.get("overhead_percent")
4853
5565
  if isinstance(overhead_percent, (int | float)) and math.isfinite(
4854
5566
  float(overhead_percent)
@@ -4867,8 +5579,11 @@ def _print_guard_overhead_summary(
4867
5579
  except (TypeError, ValueError):
4868
5580
  threshold_fraction = GUARD_OVERHEAD_THRESHOLD
4869
5581
  threshold_display = f"≤ +{threshold_fraction * 100:.1f}%"
4870
- console.print(
4871
- f"🛡️ Guard Overhead: {overhead_status} {overhead_display} ({threshold_display})"
5582
+ _event(
5583
+ console,
5584
+ "METRIC",
5585
+ f"Guard Overhead: {overhead_status} {overhead_display} ({threshold_display})",
5586
+ emoji="🛡️",
4872
5587
  )
4873
5588
  return threshold_fraction
4874
5589
 
@@ -4878,8 +5593,12 @@ def _print_retry_summary(console: Console, retry_controller: Any | None) -> None
4878
5593
  try:
4879
5594
  if retry_controller and getattr(retry_controller, "attempt_history", None):
4880
5595
  summary = retry_controller.get_attempt_summary()
4881
- console.print(
4882
- f"\n📊 Retry Summary: {summary['total_attempts']} attempts in {summary['elapsed_time']:.1f}s"
5596
+ console.print("\n")
5597
+ _event(
5598
+ console,
5599
+ "METRIC",
5600
+ f"Retry Summary: {summary['total_attempts']} attempts in {summary['elapsed_time']:.1f}s",
5601
+ emoji="📊",
4883
5602
  )
4884
5603
  except Exception:
4885
5604
  # Never break the run for summary printing
@@ -4902,10 +5621,15 @@ def _init_retry_controller(
4902
5621
  retry_controller = RetryController(
4903
5622
  max_attempts=max_attempts, timeout=timeout, verbose=True
4904
5623
  )
4905
- console.print(f"🔄 Retry mode enabled: max {max_attempts} attempts")
5624
+ _event(
5625
+ console,
5626
+ "INIT",
5627
+ f"Retry mode enabled: max {max_attempts} attempts",
5628
+ emoji="🔄",
5629
+ )
4906
5630
  if baseline:
4907
- console.print(f"📋 Using baseline: {baseline}")
5631
+ _event(console, "DATA", f"Using baseline: {baseline}", emoji="📋")
4908
5632
  else:
4909
5633
  if baseline:
4910
- console.print(f"📋 Using baseline: {baseline}")
5634
+ _event(console, "DATA", f"Using baseline: {baseline}", emoji="📋")
4911
5635
  return retry_controller