invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. invarlock/__init__.py +4 -4
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +37 -50
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_loading.py +7 -7
  8. invarlock/adapters/hf_mixin.py +53 -9
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/assurance/__init__.py +15 -23
  12. invarlock/cli/adapter_auto.py +32 -26
  13. invarlock/cli/app.py +128 -27
  14. invarlock/cli/commands/__init__.py +2 -2
  15. invarlock/cli/commands/calibrate.py +48 -4
  16. invarlock/cli/commands/doctor.py +8 -10
  17. invarlock/cli/commands/evaluate.py +986 -0
  18. invarlock/cli/commands/explain_gates.py +25 -17
  19. invarlock/cli/commands/export_html.py +11 -9
  20. invarlock/cli/commands/plugins.py +13 -9
  21. invarlock/cli/commands/report.py +326 -92
  22. invarlock/cli/commands/run.py +1160 -228
  23. invarlock/cli/commands/verify.py +157 -97
  24. invarlock/cli/config.py +1 -1
  25. invarlock/cli/determinism.py +1 -1
  26. invarlock/cli/doctor_helpers.py +4 -5
  27. invarlock/cli/output.py +193 -0
  28. invarlock/cli/provenance.py +4 -4
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/registry.py +9 -11
  31. invarlock/core/retry.py +14 -14
  32. invarlock/core/runner.py +112 -26
  33. invarlock/edits/noop.py +2 -2
  34. invarlock/edits/quant_rtn.py +67 -39
  35. invarlock/eval/__init__.py +1 -1
  36. invarlock/eval/bench.py +14 -10
  37. invarlock/eval/data.py +68 -23
  38. invarlock/eval/metrics.py +59 -1
  39. invarlock/eval/primary_metric.py +1 -1
  40. invarlock/eval/tasks/__init__.py +12 -0
  41. invarlock/eval/tasks/classification.py +48 -0
  42. invarlock/eval/tasks/qa.py +36 -0
  43. invarlock/eval/tasks/text_generation.py +102 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/rmt.py +2 -2
  46. invarlock/guards/spectral.py +1 -1
  47. invarlock/guards/variance.py +2 -2
  48. invarlock/model_profile.py +64 -62
  49. invarlock/observability/health.py +6 -6
  50. invarlock/observability/metrics.py +108 -0
  51. invarlock/plugins/hf_bnb_adapter.py +32 -21
  52. invarlock/reporting/__init__.py +18 -4
  53. invarlock/reporting/guards_analysis.py +154 -4
  54. invarlock/reporting/html.py +61 -11
  55. invarlock/reporting/normalizer.py +9 -2
  56. invarlock/reporting/policy_utils.py +1 -1
  57. invarlock/reporting/primary_metric_utils.py +11 -11
  58. invarlock/reporting/render.py +876 -510
  59. invarlock/reporting/report.py +72 -30
  60. invarlock/reporting/{certificate.py → report_builder.py} +252 -99
  61. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  62. invarlock/reporting/report_types.py +6 -1
  63. invarlock/reporting/telemetry.py +86 -0
  64. invarlock-0.3.8.dist-info/METADATA +283 -0
  65. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
  66. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
  67. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
  68. invarlock/adapters/hf_gpt2.py +0 -404
  69. invarlock/adapters/hf_llama.py +0 -487
  70. invarlock/cli/commands/certify.py +0 -422
  71. invarlock-0.3.6.dist-info/METADATA +0 -588
  72. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
  73. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
@@ -4,21 +4,25 @@ InvarLock CLI Run Command
4
4
 
5
5
  Run a guarded pipeline from a YAML config. Intended for local smokes,
6
6
  plugin demos, and development. Advanced: for pairwise certification,
7
- prefer Compare & Certify via `invarlock certify --baseline ... --subject ...`.
7
+ prefer Compare & Evaluate via `invarlock evaluate --baseline ... --subject ...`.
8
8
  """
9
9
 
10
10
  import copy
11
11
  import hashlib
12
12
  import inspect
13
13
  import json
14
+ import logging
14
15
  import math
15
16
  import os
16
17
  import random
18
+ import re
17
19
  import shutil
18
20
  import sys as _sys
19
21
  import types as _types
22
+ import warnings
20
23
  from array import array
21
- from collections.abc import Iterable, Sequence
24
+ from collections.abc import Callable, Iterable, Iterator, Sequence
25
+ from contextlib import contextmanager
22
26
  from datetime import datetime
23
27
  from pathlib import Path
24
28
  from types import SimpleNamespace
@@ -30,6 +34,16 @@ import psutil
30
34
  import typer
31
35
  from rich.console import Console
32
36
 
37
+ from invarlock.cli.output import (
38
+ OutputStyle,
39
+ make_console,
40
+ perf_counter,
41
+ print_event,
42
+ print_timing_summary,
43
+ resolve_output_style,
44
+ timed_step,
45
+ )
46
+
33
47
  try:
34
48
  import torch
35
49
  except ImportError:
@@ -63,7 +77,42 @@ from ..config import (
63
77
  )
64
78
  from ..overhead_utils import _extract_pm_snapshot_for_overhead
65
79
 
66
- console = Console()
80
+ console = make_console()
81
+
82
+
83
+ def _style_from_console(console: Console, profile: str | None = None) -> OutputStyle:
84
+ style = getattr(console, "_invarlock_output_style", None)
85
+ if isinstance(style, OutputStyle):
86
+ return style
87
+ return resolve_output_style(
88
+ style=None,
89
+ profile=profile,
90
+ progress=False,
91
+ timing=False,
92
+ no_color=False,
93
+ )
94
+
95
+
96
+ def _event(
97
+ console: Console,
98
+ tag: str,
99
+ message: str,
100
+ *,
101
+ emoji: str | None = None,
102
+ console_style: str | None = None,
103
+ profile: str | None = None,
104
+ ) -> None:
105
+ style = _style_from_console(console, profile=profile)
106
+ print_event(
107
+ console,
108
+ tag,
109
+ message,
110
+ style=style,
111
+ emoji=emoji,
112
+ console_style=console_style,
113
+ )
114
+
115
+
67
116
  LIGHT_IMPORT = os.getenv("INVARLOCK_LIGHT_IMPORT", "").strip().lower() in {
68
117
  "1",
69
118
  "true",
@@ -76,6 +125,232 @@ RELEASE_MIN_WINDOWS_PER_ARM = 200
76
125
  RELEASE_CALIBRATION_MIN = 16
77
126
  RELEASE_CALIBRATION_MAX = 24
78
127
  GUARD_OVERHEAD_THRESHOLD = 0.01
128
+ KV_LABEL_WIDTH = 10
129
+
130
+ _NOISY_WARNING_PATTERNS = (r".*loss_type=None.*unrecognized.*",)
131
+
132
+
133
+ def _resolve_warning_suppression(profile: str | None) -> tuple[bool, bool]:
134
+ suppress_all = os.getenv("INVARLOCK_SUPPRESS_WARNINGS", "").strip().lower() in {
135
+ "1",
136
+ "true",
137
+ "yes",
138
+ "on",
139
+ }
140
+ profile_norm = (profile or "").strip().lower()
141
+ enabled = bool(suppress_all) or profile_norm in {"ci", "ci_cpu", "release"}
142
+ return enabled, suppress_all
143
+
144
+
145
+ def _apply_warning_filters(profile: str | None) -> bool:
146
+ enabled, suppress_all = _resolve_warning_suppression(profile)
147
+ if not enabled:
148
+ return False
149
+ if suppress_all:
150
+ warnings.simplefilter("ignore")
151
+ else:
152
+ for pattern in _NOISY_WARNING_PATTERNS:
153
+ warnings.filterwarnings("ignore", message=pattern)
154
+ return True
155
+
156
+
157
+ @contextmanager
158
+ def _suppress_noisy_warnings(
159
+ profile: str | None,
160
+ *,
161
+ event_path: Path | None = None,
162
+ context: dict[str, Any] | None = None,
163
+ ) -> Iterator[None]:
164
+ enabled, suppress_all = _resolve_warning_suppression(profile)
165
+ if not enabled:
166
+ yield
167
+ return
168
+
169
+ prev_tf_verbosity = os.environ.get("TRANSFORMERS_VERBOSITY")
170
+ os.environ["TRANSFORMERS_VERBOSITY"] = "error"
171
+ transformers_logger = logging.getLogger("transformers")
172
+ prev_tf_level = transformers_logger.level
173
+ transformers_logger.setLevel(logging.ERROR)
174
+
175
+ patterns = [re.compile(p) for p in _NOISY_WARNING_PATTERNS]
176
+ suppressed: list[str] = []
177
+
178
+ class _NoisyLogFilter(logging.Filter):
179
+ def filter(self, record: logging.LogRecord) -> bool: # noqa: A003
180
+ try:
181
+ message = record.getMessage()
182
+ except Exception:
183
+ return True
184
+ if any(p.search(message) for p in patterns):
185
+ suppressed.append(message)
186
+ return False
187
+ return True
188
+
189
+ def _iter_handlers() -> list[logging.Handler]:
190
+ handlers: list[logging.Handler] = []
191
+ seen: set[int] = set()
192
+ for logger in (
193
+ logging.getLogger(),
194
+ logging.getLogger("transformers"),
195
+ logging.getLogger("huggingface_hub"),
196
+ logging.getLogger("datasets"),
197
+ ):
198
+ for handler in getattr(logger, "handlers", []) or []:
199
+ if id(handler) in seen:
200
+ continue
201
+ seen.add(id(handler))
202
+ handlers.append(handler)
203
+ return handlers
204
+
205
+ log_filter = _NoisyLogFilter()
206
+ handlers = _iter_handlers()
207
+
208
+ def _append_suppressed_warnings() -> None:
209
+ if not suppressed or event_path is None:
210
+ return
211
+ try:
212
+ path = Path(event_path)
213
+ path.parent.mkdir(parents=True, exist_ok=True)
214
+ payload = {
215
+ "timestamp": datetime.now().isoformat(),
216
+ "component": "warnings",
217
+ "operation": "suppressed",
218
+ "level": "WARNING",
219
+ "data": {
220
+ "count": len(suppressed),
221
+ "messages": suppressed[:50],
222
+ "profile": profile or "",
223
+ **(context or {}),
224
+ },
225
+ }
226
+ with path.open("a", encoding="utf-8") as fh:
227
+ fh.write(json.dumps(payload) + "\n")
228
+ except Exception:
229
+ # Best-effort: suppressed warnings are non-fatal and logging must not
230
+ # impact model loading.
231
+ return
232
+
233
+ for handler in handlers:
234
+ handler.addFilter(log_filter)
235
+
236
+ try:
237
+ with warnings.catch_warnings():
238
+ from contextlib import redirect_stderr, redirect_stdout
239
+
240
+ class _FilteredStream:
241
+ def __init__(self, raw: Any) -> None:
242
+ self._raw = raw
243
+
244
+ def __getattr__(self, name: str) -> object:
245
+ return getattr(self._raw, name)
246
+
247
+ def write(self, s: object) -> int:
248
+ try:
249
+ if isinstance(s, bytes):
250
+ text = s.decode("utf-8", errors="replace")
251
+ else:
252
+ text = str(s)
253
+ except Exception:
254
+ return int(self._raw.write(s))
255
+
256
+ # Preserve progress bars (carriage returns) by passing through
257
+ # all non-matching chunks immediately.
258
+ pieces = text.splitlines(keepends=True)
259
+ for piece in pieces:
260
+ if any(p.search(piece) for p in patterns):
261
+ suppressed.append(piece.rstrip("\n"))
262
+ continue
263
+ self._raw.write(piece)
264
+ return len(text)
265
+
266
+ def flush(self) -> None:
267
+ try:
268
+ self._raw.flush()
269
+ except Exception:
270
+ pass
271
+
272
+ stdout_proxy = _FilteredStream(_sys.stdout)
273
+ stderr_proxy = _FilteredStream(_sys.stderr)
274
+
275
+ with redirect_stdout(stdout_proxy), redirect_stderr(stderr_proxy):
276
+ if suppress_all:
277
+ warnings.simplefilter("ignore")
278
+ yield
279
+ else:
280
+ original_showwarning = warnings.showwarning
281
+
282
+ def _showwarning(
283
+ message: Warning | str,
284
+ category: type[Warning],
285
+ filename: str,
286
+ lineno: int,
287
+ file: object | None = None,
288
+ line: str | None = None,
289
+ ) -> None:
290
+ try:
291
+ rendered = warnings.formatwarning(
292
+ message, category, filename, lineno, line
293
+ )
294
+ except Exception:
295
+ rendered = str(message)
296
+ if any(p.search(rendered) for p in patterns):
297
+ suppressed.append(str(message))
298
+ return
299
+ original_showwarning(
300
+ message,
301
+ category,
302
+ filename,
303
+ lineno,
304
+ file=file,
305
+ line=line,
306
+ )
307
+
308
+ warnings.showwarning = _showwarning # type: ignore[assignment]
309
+ try:
310
+ yield
311
+ finally:
312
+ warnings.showwarning = original_showwarning # type: ignore[assignment]
313
+ finally:
314
+ for handler in handlers:
315
+ try:
316
+ handler.removeFilter(log_filter)
317
+ except Exception:
318
+ pass
319
+ try:
320
+ transformers_logger.setLevel(prev_tf_level)
321
+ except Exception:
322
+ pass
323
+ if prev_tf_verbosity is None:
324
+ os.environ.pop("TRANSFORMERS_VERBOSITY", None)
325
+ else:
326
+ os.environ["TRANSFORMERS_VERBOSITY"] = prev_tf_verbosity
327
+ _append_suppressed_warnings()
328
+
329
+
330
+ def _format_kv_line(label: str, value: str, *, width: int = KV_LABEL_WIDTH) -> str:
331
+ return f" {label:<{width}}: {value}"
332
+
333
+
334
+ def _device_resolution_note(target_device: str, resolved_device: str) -> str:
335
+ target_norm = str(target_device or "").strip().lower()
336
+ resolved_norm = str(resolved_device or "").strip().lower()
337
+ if not target_norm or target_norm == "auto":
338
+ return "auto-resolved"
339
+ if target_norm == resolved_norm:
340
+ return "requested"
341
+ return f"resolved from {target_device}"
342
+
343
+
344
+ def _format_guard_chain(guards: list[Any]) -> str:
345
+ names = [str(getattr(guard, "name", "unknown")) for guard in guards]
346
+ seen: set[str] = set()
347
+ deduped: list[str] = []
348
+ for name in names:
349
+ if name in seen:
350
+ continue
351
+ seen.add(name)
352
+ deduped.append(name)
353
+ return " → ".join(deduped)
79
354
 
80
355
 
81
356
  # Common dataset split aliases we probe in order when not explicitly set
@@ -241,6 +516,89 @@ def _resolve_pm_acceptance_range(
241
516
  return {"min": float(min_val), "max": float(max_val)}
242
517
 
243
518
 
519
+ def _resolve_pm_drift_band(
520
+ cfg: InvarLockConfig | dict[str, Any] | None,
521
+ ) -> dict[str, float]:
522
+ """Resolve preview→final drift band from config/env with safe defaults.
523
+
524
+ The drift band governs the Preview Final Drift Acceptable gate. By default,
525
+ evaluation reports enforce 0.95–1.05 unless an explicit band is provided.
526
+ """
527
+
528
+ base_min = 0.95
529
+ base_max = 1.05
530
+
531
+ cfg_min = None
532
+ cfg_max = None
533
+ try:
534
+ cfg_map = _coerce_mapping(cfg) if cfg is not None else {}
535
+ pm_section = cfg_map.get("primary_metric") if isinstance(cfg_map, dict) else {}
536
+ pm_map = _coerce_mapping(pm_section)
537
+ drift_band = pm_map.get("drift_band") if isinstance(pm_map, dict) else None
538
+ if isinstance(drift_band, dict):
539
+ if drift_band.get("min") is not None:
540
+ try:
541
+ cfg_min = float(drift_band["min"])
542
+ except (TypeError, ValueError):
543
+ cfg_min = None
544
+ if drift_band.get("max") is not None:
545
+ try:
546
+ cfg_max = float(drift_band["max"])
547
+ except (TypeError, ValueError):
548
+ cfg_max = None
549
+ elif isinstance(drift_band, list | tuple) and len(drift_band) == 2:
550
+ try:
551
+ cfg_min = float(drift_band[0])
552
+ cfg_max = float(drift_band[1])
553
+ except (TypeError, ValueError):
554
+ cfg_min = None
555
+ cfg_max = None
556
+ except Exception:
557
+ cfg_min = None
558
+ cfg_max = None
559
+
560
+ def _parse_env(name: str) -> float | None:
561
+ try:
562
+ raw = os.environ.get(name, "")
563
+ if raw is None or str(raw).strip() == "":
564
+ return None
565
+ return float(raw)
566
+ except Exception:
567
+ return None
568
+
569
+ env_min = _parse_env("INVARLOCK_PM_DRIFT_MIN")
570
+ env_max = _parse_env("INVARLOCK_PM_DRIFT_MAX")
571
+
572
+ has_explicit = any(v is not None for v in (cfg_min, cfg_max, env_min, env_max))
573
+ if not has_explicit:
574
+ return {}
575
+
576
+ min_val = (
577
+ env_min if env_min is not None else cfg_min if cfg_min is not None else base_min
578
+ )
579
+ max_val = (
580
+ env_max if env_max is not None else cfg_max if cfg_max is not None else base_max
581
+ )
582
+
583
+ try:
584
+ if min_val is not None and min_val <= 0:
585
+ min_val = base_min
586
+ except Exception:
587
+ min_val = base_min
588
+ try:
589
+ if max_val is not None and max_val <= 0:
590
+ max_val = base_max
591
+ except Exception:
592
+ max_val = base_max
593
+ try:
594
+ if min_val is not None and max_val is not None and min_val >= max_val:
595
+ min_val, max_val = base_min, base_max
596
+ except Exception:
597
+ min_val, max_val = base_min, base_max
598
+
599
+ return {"min": float(min_val), "max": float(max_val)}
600
+
601
+
244
602
  def _free_model_memory(model: object | None) -> None:
245
603
  """Best-effort cleanup to release GPU memory for a model object."""
246
604
  if model is None:
@@ -754,38 +1112,60 @@ def _prepare_config_for_run(
754
1112
  resolve_edit_kind as _resolve_edit_kind,
755
1113
  )
756
1114
 
757
- console.print(f"📋 Loading configuration: {config_path}")
1115
+ _event(
1116
+ console,
1117
+ "INIT",
1118
+ f"Loading configuration: {config_path}",
1119
+ emoji="📋",
1120
+ profile=profile,
1121
+ )
758
1122
  cfg = _load_config(config_path)
759
1123
 
760
1124
  # Apply profile if specified (dev is a no-op)
761
1125
  if profile and str(profile).lower() not in {"dev"}:
762
- console.print(f"🎯 Applying profile: {profile}")
1126
+ _event(
1127
+ console, "INIT", f"Applying profile: {profile}", emoji="🎯", profile=profile
1128
+ )
763
1129
  try:
764
1130
  cfg = _apply_profile(cfg, profile)
765
1131
  except Exception as exc:
766
- console.print(f"[red]{exc}[/red]")
1132
+ _event(console, "FAIL", str(exc), emoji="❌", profile=profile)
767
1133
  raise typer.Exit(1) from exc
768
1134
 
769
1135
  # Apply edit override
770
1136
  if edit:
771
1137
  try:
772
1138
  edit_name = _resolve_edit_kind(edit)
773
- console.print(f"✂️ Edit override: {edit} → {edit_name}")
1139
+ _event(
1140
+ console,
1141
+ "EXEC",
1142
+ f"Edit override: {edit} → {edit_name}",
1143
+ emoji="✂️",
1144
+ profile=profile,
1145
+ )
774
1146
  cfg = _apply_edit_override(cfg, edit)
775
1147
  except ValueError as e:
776
- console.print(f"[red]{e}[/red]")
1148
+ _event(console, "FAIL", str(e), emoji="❌", profile=profile)
777
1149
  raise typer.Exit(1) from e
778
1150
 
779
1151
  # Apply CLI overrides for auto configuration
780
1152
  if tier or probes is not None:
781
1153
  if tier and tier not in ["conservative", "balanced", "aggressive", "none"]:
782
- console.print(
783
- f"[red]❌ Invalid tier '{tier}'. Valid options: conservative, balanced, aggressive, none[/red]"
1154
+ _event(
1155
+ console,
1156
+ "FAIL",
1157
+ f"Invalid tier '{tier}'. Valid options: conservative, balanced, aggressive, none",
1158
+ emoji="❌",
1159
+ profile=profile,
784
1160
  )
785
1161
  raise typer.Exit(1)
786
1162
  if probes is not None and (probes < 0 or probes > 10):
787
- console.print(
788
- f"[red]❌ Invalid probes '{probes}'. Must be between 0 and 10[/red]"
1163
+ _event(
1164
+ console,
1165
+ "FAIL",
1166
+ f"Invalid probes '{probes}'. Must be between 0 and 10",
1167
+ emoji="❌",
1168
+ profile=profile,
789
1169
  )
790
1170
  raise typer.Exit(1)
791
1171
 
@@ -796,10 +1176,22 @@ def _prepare_config_for_run(
796
1176
  cfg_dict["auto"] = auto_section
797
1177
  if tier:
798
1178
  auto_section["tier"] = tier
799
- console.print(f"🎛️ Auto tier override: {tier}")
1179
+ _event(
1180
+ console,
1181
+ "INIT",
1182
+ f"Auto tier override: {tier}",
1183
+ emoji="🎛️",
1184
+ profile=profile,
1185
+ )
800
1186
  if probes is not None:
801
1187
  auto_section["probes"] = probes
802
- console.print(f"🔬 Auto probes override: {probes}")
1188
+ _event(
1189
+ console,
1190
+ "INIT",
1191
+ f"Auto probes override: {probes}",
1192
+ emoji="🔬",
1193
+ profile=profile,
1194
+ )
803
1195
  cfg = InvarLockConfig(cfg_dict)
804
1196
 
805
1197
  # Resolve adapter:auto to a concrete built-in adapter if requested
@@ -832,7 +1224,7 @@ def _maybe_plan_release_windows(
832
1224
 
833
1225
 
834
1226
  def _print_pipeline_start(console: Console) -> None:
835
- console.print("🚀 Starting InvarLock pipeline...")
1227
+ _event(console, "INIT", "Starting InvarLock pipeline...", emoji="🚀")
836
1228
 
837
1229
 
838
1230
  def _emit_run_artifacts(
@@ -841,7 +1233,7 @@ def _emit_run_artifacts(
841
1233
  """Save run report and return emitted artifact paths."""
842
1234
  from invarlock.reporting.report import save_report as _save_report
843
1235
 
844
- console.print("💾 Saving run report...")
1236
+ _event(console, "DATA", "Saving run report...", emoji="💾")
845
1237
  return _save_report(
846
1238
  report, out_dir, formats=["json"], filename_prefix=filename_prefix
847
1239
  )
@@ -864,12 +1256,11 @@ def _resolve_device_and_output(
864
1256
  cfg_device = None
865
1257
  target_device = device or cfg_device or "auto"
866
1258
  resolved_device = _resolve_device(target_device)
867
- console.print(
868
- f"Device: {resolved_device} (requested={target_device}, resolved={resolved_device})"
869
- )
1259
+ resolution_note = _device_resolution_note(target_device, resolved_device)
1260
+ console.print(_format_kv_line("Device", f"{resolved_device} ({resolution_note})"))
870
1261
  is_valid, error_msg = _validate(resolved_device)
871
1262
  if not is_valid:
872
- console.print(f"[red]❌ Device validation failed: {error_msg}[/red]")
1263
+ _event(console, "FAIL", f"Device validation failed: {error_msg}", emoji="❌")
873
1264
  raise typer.Exit(1)
874
1265
 
875
1266
  # Determine output directory
@@ -892,6 +1283,7 @@ def _resolve_provider_and_split(
892
1283
  provider_kwargs: dict[str, Any] | None = None,
893
1284
  console: Console,
894
1285
  resolved_device: str | None = None,
1286
+ emit: Callable[[str, str, str | None], None] | None = None,
895
1287
  ) -> tuple[Any, str, bool]:
896
1288
  """Resolve dataset provider and split, returning (provider, split, used_fallback)."""
897
1289
  provider_name = None
@@ -918,7 +1310,10 @@ def _resolve_provider_and_split(
918
1310
  # Pass device hint only to providers that understand it (currently WikiText-2)
919
1311
  if resolved_device and provider_name == "wikitext2":
920
1312
  provider_kwargs.setdefault("device_hint", resolved_device)
921
- data_provider = get_provider_fn(provider_name, **provider_kwargs)
1313
+ if emit is not None and provider_name == "wikitext2":
1314
+ data_provider = get_provider_fn(provider_name, emit=emit, **provider_kwargs)
1315
+ else:
1316
+ data_provider = get_provider_fn(provider_name, **provider_kwargs)
922
1317
 
923
1318
  requested_split = None
924
1319
  try:
@@ -951,13 +1346,24 @@ def _extract_model_load_kwargs(cfg: InvarLockConfig) -> dict[str, Any]:
951
1346
  for key, value in model.items()
952
1347
  if key not in {"id", "adapter", "device"} and value is not None
953
1348
  }
954
- # Backwards-compatible aliasing: config `dtype` → HF `torch_dtype`.
955
- if "dtype" in extra and "torch_dtype" not in extra:
956
- extra["torch_dtype"] = extra.pop("dtype")
1349
+ removed_keys: list[str] = []
1350
+ for key in ("torch_dtype", "load_in_8bit", "load_in_4bit"):
1351
+ if key in extra:
1352
+ removed_keys.append(key)
1353
+ if removed_keys:
1354
+ raise InvarlockError(
1355
+ code="E007",
1356
+ message=(
1357
+ "CONFIG-KEY-REMOVED: "
1358
+ + ", ".join(removed_keys)
1359
+ + ". Use model.dtype and/or model.quantization_config."
1360
+ ),
1361
+ details={"removed_keys": removed_keys},
1362
+ )
957
1363
 
958
- # Normalize torch_dtype when present (keep as string for JSON-ability).
959
- if "torch_dtype" in extra and isinstance(extra.get("torch_dtype"), str):
960
- dtype_str = str(extra.get("torch_dtype") or "").strip().lower()
1364
+ # Normalize dtype when present (keep as string for JSON-ability).
1365
+ if "dtype" in extra and isinstance(extra.get("dtype"), str):
1366
+ dtype_str = str(extra.get("dtype") or "").strip().lower()
961
1367
  aliases = {
962
1368
  "fp16": "float16",
963
1369
  "half": "float16",
@@ -965,14 +1371,22 @@ def _extract_model_load_kwargs(cfg: InvarLockConfig) -> dict[str, Any]:
965
1371
  "fp32": "float32",
966
1372
  }
967
1373
  if dtype_str in aliases:
968
- extra["torch_dtype"] = aliases[dtype_str]
1374
+ extra["dtype"] = aliases[dtype_str]
969
1375
  elif dtype_str:
970
- extra["torch_dtype"] = dtype_str
1376
+ extra["dtype"] = dtype_str
971
1377
 
972
1378
  return extra
973
1379
 
974
1380
 
975
- def _load_model_with_cfg(adapter: Any, cfg: InvarLockConfig, device: str) -> Any:
1381
+ def _load_model_with_cfg(
1382
+ adapter: Any,
1383
+ cfg: InvarLockConfig,
1384
+ device: str,
1385
+ *,
1386
+ profile: str | None = None,
1387
+ event_path: Path | None = None,
1388
+ warning_context: dict[str, Any] | None = None,
1389
+ ) -> Any:
976
1390
  """Load a model with config-provided kwargs, filtering for strict adapters."""
977
1391
  try:
978
1392
  model_id = cfg.model.id
@@ -985,20 +1399,25 @@ def _load_model_with_cfg(adapter: Any, cfg: InvarLockConfig, device: str) -> Any
985
1399
  raise ValueError("Missing model.id in config")
986
1400
 
987
1401
  extra = _extract_model_load_kwargs(cfg)
988
- try:
989
- sig = inspect.signature(adapter.load_model)
990
- accepts_var_kw = any(
991
- p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
992
- )
993
- if accepts_var_kw:
994
- return adapter.load_model(model_id, device=device, **extra)
995
- allowed = {k: v for k, v in extra.items() if k in sig.parameters}
996
- if allowed:
997
- return adapter.load_model(model_id, device=device, **allowed)
998
- except Exception:
999
- # Fall back to the strictest call shape.
1000
- pass
1001
- return adapter.load_model(model_id, device=device)
1402
+ with _suppress_noisy_warnings(
1403
+ profile,
1404
+ event_path=event_path,
1405
+ context=warning_context,
1406
+ ):
1407
+ try:
1408
+ sig = inspect.signature(adapter.load_model)
1409
+ accepts_var_kw = any(
1410
+ p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
1411
+ )
1412
+ if accepts_var_kw:
1413
+ return adapter.load_model(model_id, device=device, **extra)
1414
+ allowed = {k: v for k, v in extra.items() if k in sig.parameters}
1415
+ if allowed:
1416
+ return adapter.load_model(model_id, device=device, **allowed)
1417
+ except Exception:
1418
+ # Fall back to the strictest call shape.
1419
+ pass
1420
+ return adapter.load_model(model_id, device=device)
1002
1421
 
1003
1422
 
1004
1423
  def _run_bare_control(
@@ -1018,14 +1437,20 @@ def _run_bare_control(
1018
1437
  restore_fn: Any | None,
1019
1438
  console: Console,
1020
1439
  resolved_loss_type: str,
1021
- profile_normalized: str | None,
1440
+ profile_normalized: str | None = None,
1022
1441
  snapshot_provenance: dict[str, bool] | None = None,
1023
1442
  skip_model_load: bool = False,
1024
1443
  ) -> dict[str, Any] | None:
1025
1444
  """Execute the bare-control run for overhead estimation and return payload."""
1026
1445
  from invarlock.core.runner import CoreRunner as _CoreRunner
1027
1446
 
1028
- console.print("🧪 Running bare control (guards disabled) for overhead check")
1447
+ _event(
1448
+ console,
1449
+ "EXEC",
1450
+ "Running bare control (guards disabled) for overhead check",
1451
+ emoji="🧪",
1452
+ profile=profile_normalized,
1453
+ )
1029
1454
  set_seed(seed_bundle["python"]) # type: ignore[arg-type]
1030
1455
 
1031
1456
  bare_runner = _CoreRunner()
@@ -1034,6 +1459,12 @@ def _run_bare_control(
1034
1459
  bare_context = copy.deepcopy(run_config.context)
1035
1460
  bare_context.setdefault("validation", {})["guard_overhead_mode"] = "bare"
1036
1461
  bare_config.context = bare_context
1462
+ runtime_edit_config = dict(edit_config or {})
1463
+ runtime_edit_config.setdefault("console", console)
1464
+ runtime_edit_config.setdefault(
1465
+ "output_style", _style_from_console(console, profile=profile_normalized)
1466
+ )
1467
+ runtime_edit_config.setdefault("emit", True)
1037
1468
 
1038
1469
  private_model_loaded = False
1039
1470
  bare_target_model = None
@@ -1047,23 +1478,30 @@ def _run_bare_control(
1047
1478
  elif skip_model_load:
1048
1479
  bare_target_model = model or SimpleNamespace(name="bare_stub_model")
1049
1480
  else:
1050
- bare_target_model = _load_model_with_cfg(adapter, cfg, resolved_device)
1481
+ bare_target_model = _load_model_with_cfg(
1482
+ adapter, cfg, resolved_device, profile=profile_normalized
1483
+ )
1051
1484
  private_model_loaded = True
1052
1485
  if snapshot_provenance is not None:
1053
1486
  snapshot_provenance["reload_path_used"] = True
1054
1487
 
1055
- bare_report = bare_runner.execute(
1056
- model=bare_target_model,
1057
- adapter=adapter,
1058
- edit=edit_op,
1059
- guards=[],
1060
- config=bare_config,
1061
- calibration_data=calibration_data,
1062
- auto_config=auto_config,
1063
- edit_config=edit_config,
1064
- preview_n=preview_count,
1065
- final_n=final_count,
1066
- )
1488
+ with _suppress_noisy_warnings(
1489
+ profile_normalized,
1490
+ event_path=getattr(run_config, "event_path", None),
1491
+ context={"phase": "guard_overhead_bare"},
1492
+ ):
1493
+ bare_report = bare_runner.execute(
1494
+ model=bare_target_model,
1495
+ adapter=adapter,
1496
+ edit=edit_op,
1497
+ guards=[],
1498
+ config=bare_config,
1499
+ calibration_data=calibration_data,
1500
+ auto_config=auto_config,
1501
+ edit_config=runtime_edit_config,
1502
+ preview_n=preview_count,
1503
+ final_n=final_count,
1504
+ )
1067
1505
  finally:
1068
1506
  if private_model_loaded:
1069
1507
  _free_model_memory(bare_target_model)
@@ -1084,8 +1522,12 @@ def _run_bare_control(
1084
1522
  return False
1085
1523
 
1086
1524
  if not (_finite(bare_ppl_preview) and _finite(bare_ppl_final)):
1087
- console.print(
1088
- "[yellow]⚠️ Primary metric non-finite during bare control; continuing with diagnostics.[/yellow]"
1525
+ _event(
1526
+ console,
1527
+ "WARN",
1528
+ "Primary metric non-finite during bare control; continuing with diagnostics.",
1529
+ emoji="⚠️",
1530
+ profile=profile_normalized,
1089
1531
  )
1090
1532
 
1091
1533
  payload: dict[str, Any] = {
@@ -1137,6 +1579,7 @@ def _execute_guarded_run(
1137
1579
  final_count: int,
1138
1580
  restore_fn: Any | None,
1139
1581
  resolved_device: str,
1582
+ profile_normalized: str | None = None,
1140
1583
  console: Console,
1141
1584
  snapshot_provenance: dict[str, bool] | None = None,
1142
1585
  skip_model_load: bool = False,
@@ -1150,23 +1593,56 @@ def _execute_guarded_run(
1150
1593
  elif skip_model_load:
1151
1594
  model = model or SimpleNamespace(name="guarded_stub_model")
1152
1595
  else:
1153
- console.print(f"🔧 Loading model: {cfg.model.id} (attempt 1)")
1154
- model = _load_model_with_cfg(adapter, cfg, resolved_device)
1596
+ _event(
1597
+ console,
1598
+ "INIT",
1599
+ f"Loading model: {cfg.model.id} (attempt 1)",
1600
+ emoji="🔧",
1601
+ profile=profile_normalized,
1602
+ )
1603
+ warning_context: dict[str, Any] = {"phase": "load_model"}
1604
+ try:
1605
+ if hasattr(run_config, "context") and isinstance(run_config.context, dict):
1606
+ rid = run_config.context.get("run_id")
1607
+ if isinstance(rid, str) and rid:
1608
+ warning_context["run_id"] = rid
1609
+ except Exception:
1610
+ pass
1611
+ model = _load_model_with_cfg(
1612
+ adapter,
1613
+ cfg,
1614
+ resolved_device,
1615
+ profile=profile_normalized,
1616
+ event_path=getattr(run_config, "event_path", None),
1617
+ warning_context=warning_context,
1618
+ )
1155
1619
  if snapshot_provenance is not None:
1156
1620
  snapshot_provenance["reload_path_used"] = True
1157
1621
 
1158
- core_report = runner.execute(
1159
- model=model,
1160
- adapter=adapter,
1161
- edit=edit_op,
1162
- guards=guards,
1163
- config=run_config,
1164
- calibration_data=calibration_data,
1165
- auto_config=auto_config,
1166
- edit_config=edit_config,
1167
- preview_n=preview_count,
1168
- final_n=final_count,
1622
+ runtime_edit_config = dict(edit_config or {})
1623
+ runtime_edit_config.setdefault("console", console)
1624
+ runtime_edit_config.setdefault(
1625
+ "output_style", _style_from_console(console, profile=profile_normalized)
1169
1626
  )
1627
+ runtime_edit_config.setdefault("emit", True)
1628
+
1629
+ with _suppress_noisy_warnings(
1630
+ profile_normalized,
1631
+ event_path=getattr(run_config, "event_path", None),
1632
+ context={"phase": "core_runner_execute"},
1633
+ ):
1634
+ core_report = runner.execute(
1635
+ model=model,
1636
+ adapter=adapter,
1637
+ edit=edit_op,
1638
+ guards=guards,
1639
+ config=run_config,
1640
+ calibration_data=calibration_data,
1641
+ auto_config=auto_config,
1642
+ edit_config=runtime_edit_config,
1643
+ preview_n=preview_count,
1644
+ final_n=final_count,
1645
+ )
1170
1646
  return core_report, model
1171
1647
 
1172
1648
 
@@ -1200,10 +1676,10 @@ def _postprocess_and_summarize(
1200
1676
  saved_files = _emit_run_artifacts(
1201
1677
  report=report, out_dir=run_dir, filename_prefix="report", console=console
1202
1678
  )
1203
- console.print("[green]✅ Run completed successfully![/green]")
1204
- console.print(f"📄 Report: {saved_files['json']}")
1679
+ _event(console, "PASS", "Run completed successfully!", emoji="✅")
1680
+ _event(console, "DATA", f"Report: {saved_files['json']}", emoji="📄")
1205
1681
  if run_config.event_path:
1206
- console.print(f"📝 Events: {run_config.event_path}")
1682
+ _event(console, "DATA", f"Events: {run_config.event_path}", emoji="📝")
1207
1683
  return saved_files
1208
1684
 
1209
1685
 
@@ -1293,9 +1769,14 @@ def _validate_and_harvest_baseline_schedule(
1293
1769
  message = f"PAIRING-EVIDENCE-MISSING: {path}: {reason}"
1294
1770
  if prof in {"ci", "release"}:
1295
1771
  raise InvarlockError(code="E001", message=message)
1296
- _print(
1297
- f"[red]❌ Baseline pairing schedule '{path}' is incompatible: {reason}[/red]"
1298
- )
1772
+ if console is not None:
1773
+ _event(
1774
+ console,
1775
+ "FAIL",
1776
+ f"Baseline pairing schedule '{path}' is incompatible: {reason}",
1777
+ emoji="❌",
1778
+ profile=prof,
1779
+ )
1299
1780
  raise typer.Exit(1)
1300
1781
 
1301
1782
  baseline_meta = (
@@ -1450,9 +1931,14 @@ def _validate_and_harvest_baseline_schedule(
1450
1931
  prof = (profile or "dev").strip().lower()
1451
1932
  if prof in {"ci", "release"}:
1452
1933
  _fail_schedule("preview_hash mismatch vs baseline report data")
1453
- _print(
1454
- "[yellow]⚠️ Baseline preview_hash mismatch; continuing in dev profile.[/yellow]"
1455
- )
1934
+ if console is not None:
1935
+ _event(
1936
+ console,
1937
+ "WARN",
1938
+ "Baseline preview_hash mismatch; continuing in dev profile.",
1939
+ emoji="⚠️",
1940
+ profile=prof,
1941
+ )
1456
1942
  if (
1457
1943
  isinstance(baseline_final_hash, str)
1458
1944
  and baseline_final_hash
@@ -1461,9 +1947,14 @@ def _validate_and_harvest_baseline_schedule(
1461
1947
  prof = (profile or "dev").strip().lower()
1462
1948
  if prof in {"ci", "release"}:
1463
1949
  _fail_schedule("final_hash mismatch vs baseline report data")
1464
- _print(
1465
- "[yellow]⚠️ Baseline final_hash mismatch; continuing in dev profile.[/yellow]"
1466
- )
1950
+ if console is not None:
1951
+ _event(
1952
+ console,
1953
+ "WARN",
1954
+ "Baseline final_hash mismatch; continuing in dev profile.",
1955
+ emoji="⚠️",
1956
+ profile=prof,
1957
+ )
1467
1958
  if (
1468
1959
  isinstance(baseline_dataset_hash, str)
1469
1960
  and baseline_dataset_hash
@@ -1472,9 +1963,14 @@ def _validate_and_harvest_baseline_schedule(
1472
1963
  prof = (profile or "dev").strip().lower()
1473
1964
  if prof in {"ci", "release"}:
1474
1965
  _fail_schedule("dataset_hash mismatch vs baseline report data")
1475
- _print(
1476
- "[yellow]⚠️ Baseline dataset_hash mismatch; continuing in dev profile.[/yellow]"
1477
- )
1966
+ if console is not None:
1967
+ _event(
1968
+ console,
1969
+ "WARN",
1970
+ "Baseline dataset_hash mismatch; continuing in dev profile.",
1971
+ emoji="⚠️",
1972
+ profile=prof,
1973
+ )
1478
1974
  except InvarlockError:
1479
1975
  raise
1480
1976
  except typer.Exit:
@@ -1496,10 +1992,14 @@ def _validate_and_harvest_baseline_schedule(
1496
1992
  and baseline_final is not None
1497
1993
  and baseline_final != cfg_final
1498
1994
  ):
1499
- _print(
1500
- "[yellow]⚠️ Adjusting evaluation window counts to match baseline schedule "
1501
- f"({baseline_preview}/{baseline_final}).[/yellow]"
1502
- )
1995
+ if console is not None:
1996
+ _event(
1997
+ console,
1998
+ "WARN",
1999
+ f"Adjusting evaluation window counts to match baseline schedule ({baseline_preview}/{baseline_final}).",
2000
+ emoji="⚠️",
2001
+ profile=profile,
2002
+ )
1503
2003
 
1504
2004
  effective_preview = int(baseline_preview)
1505
2005
  effective_final = int(baseline_final)
@@ -1662,6 +2162,7 @@ def _resolve_metric_and_provider(
1662
2162
  model_profile: Any,
1663
2163
  *,
1664
2164
  resolved_loss_type: str | None = None,
2165
+ metric_kind_override: str | None = None,
1665
2166
  ) -> tuple[str, str, dict[str, float]]:
1666
2167
  """Resolve metric kind, provider kind, and metric options from config with precedence.
1667
2168
 
@@ -1701,9 +2202,13 @@ def _resolve_metric_and_provider(
1701
2202
  metric_cfg = None
1702
2203
 
1703
2204
  metric_kind = None
2205
+ if isinstance(metric_kind_override, str) and metric_kind_override.strip():
2206
+ mk_override = metric_kind_override.strip().lower()
2207
+ if mk_override != "auto":
2208
+ metric_kind = mk_override
1704
2209
  reps = None
1705
2210
  ci_level = None
1706
- if metric_cfg is not None:
2211
+ if metric_kind is None and metric_cfg is not None:
1707
2212
  try:
1708
2213
  metric_kind = (
1709
2214
  metric_cfg.get("kind")
@@ -1825,18 +2330,25 @@ def _plan_release_windows(
1825
2330
  candidate_msg = f", candidate_unique={int(candidate_unique)}" + (
1826
2331
  f"/{int(candidate_limit)}" if candidate_limit is not None else ""
1827
2332
  )
1828
- console.print(
1829
- "📏 Release window capacity:"
2333
+ _event(
2334
+ console,
2335
+ "METRIC",
2336
+ "Release window capacity:"
1830
2337
  f" unique={available_unique}, reserve={reserve_windows} "
1831
2338
  f"(calib {calibration_windows}, buffer {buffer_windows}), "
1832
2339
  f"usable={available_for_eval}, "
1833
2340
  f"per-arm raw={actual_per_arm_raw} → selected {actual_per_arm} "
1834
- f"(target {target_per_arm}{candidate_msg})"
2341
+ f"(target {target_per_arm}{candidate_msg})",
2342
+ emoji="📏",
2343
+ profile="release",
1835
2344
  )
1836
2345
  if actual_per_arm < target_per_arm:
1837
- console.print(
1838
- "[yellow]⚠️ Adjusted per-arm windows down from "
1839
- f"{target_per_arm} to {actual_per_arm} based on capacity.[/yellow]"
2346
+ _event(
2347
+ console,
2348
+ "WARN",
2349
+ f"Adjusted per-arm windows down from {target_per_arm} to {actual_per_arm} based on capacity.",
2350
+ emoji="⚠️",
2351
+ profile="release",
1840
2352
  )
1841
2353
 
1842
2354
  plan = {
@@ -1893,16 +2405,31 @@ def run_command(
1893
2405
  ),
1894
2406
  out: str | None = typer.Option(None, "--out", help="Output directory override"),
1895
2407
  edit: str | None = typer.Option(None, "--edit", help="Edit kind (quant|mixed)"),
2408
+ edit_label: str | None = typer.Option(
2409
+ None,
2410
+ "--edit-label",
2411
+ help=(
2412
+ "Edit algorithm label for BYOE models. Use 'noop' for baseline, "
2413
+ "'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
2414
+ ),
2415
+ ),
1896
2416
  tier: str | None = typer.Option(
1897
2417
  None,
1898
2418
  "--tier",
1899
2419
  help="Auto-tuning tier override (conservative|balanced|aggressive)",
1900
2420
  ),
2421
+ metric_kind: str | None = typer.Option(
2422
+ None,
2423
+ "--metric-kind",
2424
+ help="Primary metric kind override (ppl_causal|ppl_mlm|accuracy|etc.)",
2425
+ ),
1901
2426
  probes: int | None = typer.Option(
1902
2427
  None, "--probes", help="Number of micro-probes (0=deterministic, >0=adaptive)"
1903
2428
  ),
1904
2429
  until_pass: bool = typer.Option(
1905
- False, "--until-pass", help="Retry until certificate passes (max 3 attempts)"
2430
+ False,
2431
+ "--until-pass",
2432
+ help="Retry until evaluation report passes gates (max 3 attempts)",
1906
2433
  ),
1907
2434
  max_attempts: int = typer.Option(
1908
2435
  3, "--max-attempts", help="Maximum retry attempts for --until-pass mode"
@@ -1913,11 +2440,24 @@ def run_command(
1913
2440
  baseline: str | None = typer.Option(
1914
2441
  None,
1915
2442
  "--baseline",
1916
- help="Path to baseline report.json for certificate validation",
2443
+ help="Path to baseline report.json for evaluation report validation",
1917
2444
  ),
1918
2445
  no_cleanup: bool = typer.Option(
1919
2446
  False, "--no-cleanup", help="Skip cleanup of temporary artifacts"
1920
2447
  ),
2448
+ style: str | None = typer.Option(
2449
+ None, "--style", help="Output style (audit|friendly)"
2450
+ ),
2451
+ progress: bool = typer.Option(
2452
+ False, "--progress", help="Show progress done messages"
2453
+ ),
2454
+ timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
2455
+ telemetry: bool = typer.Option(
2456
+ False, "--telemetry", help="Write telemetry JSON alongside the report"
2457
+ ),
2458
+ no_color: bool = typer.Option(
2459
+ False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
2460
+ ),
1921
2461
  ):
1922
2462
  """
1923
2463
  Run InvarLock pipeline with the given configuration.
@@ -1925,7 +2465,7 @@ def run_command(
1925
2465
  The command assembles non-overlapping preview/final windows, executes the
1926
2466
  GuardChain (invariants → spectral → RMT → variance), checks pairing/overlap
1927
2467
  invariants, enforces guard-overhead ≤1 %, and emits a run report plus JSONL
1928
- events suitable for certificate generation.
2468
+ events suitable for evaluation report generation.
1929
2469
  """
1930
2470
 
1931
2471
  try:
@@ -1936,24 +2476,57 @@ def run_command(
1936
2476
  config = _coerce_option(config)
1937
2477
  device = _coerce_option(device)
1938
2478
  profile = _coerce_option(profile)
2479
+ profile_normalized = (str(profile or "")).strip().lower()
1939
2480
  out = _coerce_option(out)
1940
2481
  edit = _coerce_option(edit)
2482
+ edit_label = _coerce_option(edit_label)
1941
2483
  tier = _coerce_option(tier)
2484
+ metric_kind = _coerce_option(metric_kind)
1942
2485
  probes = _coerce_option(probes)
1943
2486
  until_pass = bool(_coerce_option(until_pass, False))
1944
2487
  max_attempts = int(_coerce_option(max_attempts, 3))
1945
2488
  timeout = _coerce_option(timeout)
1946
2489
  baseline = _coerce_option(baseline)
1947
2490
  no_cleanup = bool(_coerce_option(no_cleanup, False))
2491
+ style = _coerce_option(style)
2492
+ progress = bool(_coerce_option(progress, False))
2493
+ timing = bool(_coerce_option(timing, False))
2494
+ telemetry = bool(_coerce_option(telemetry, False))
2495
+ no_color = bool(_coerce_option(no_color, False))
2496
+
2497
+ output_style = resolve_output_style(
2498
+ style=str(style) if style is not None else None,
2499
+ profile=profile_normalized,
2500
+ progress=progress,
2501
+ timing=timing,
2502
+ no_color=no_color,
2503
+ )
2504
+ console._invarlock_output_style = output_style
2505
+ if not output_style.color:
2506
+ console.no_color = True
2507
+ timings: dict[str, float] = {}
2508
+ collect_timings = bool(output_style.timing or telemetry)
2509
+ total_start: float | None = perf_counter() if collect_timings else None
2510
+
2511
+ _apply_warning_filters(profile_normalized)
1948
2512
 
1949
2513
  # Use shared CLI coercers from invarlock.cli.utils
1950
2514
  report_path_out: str | None = None
1951
2515
 
1952
2516
  def _fail_run(message: str) -> None:
1953
- console.print(f"[red]❌ {message}[/red]")
2517
+ _event(console, "FAIL", message, emoji="❌", profile=profile_normalized)
1954
2518
  # Generic failure path → exit 1 (InvarlockError paths handle code 3 separately)
1955
2519
  raise typer.Exit(1)
1956
2520
 
2521
+ def _provider_event(tag: str, message: str, emoji: str | None = None) -> None:
2522
+ _event(
2523
+ console,
2524
+ tag,
2525
+ message,
2526
+ emoji=emoji,
2527
+ profile=profile_normalized,
2528
+ )
2529
+
1957
2530
  # Fail fast when torch is missing so users see a clear extras hint instead of
1958
2531
  # a raw ModuleNotFoundError from deeper imports.
1959
2532
  try:
@@ -1961,12 +2534,14 @@ def run_command(
1961
2534
 
1962
2535
  _ = _torch # pragma: no cover
1963
2536
  except (ImportError, ModuleNotFoundError) as e:
1964
- console.print(
1965
- "❌ Torch is required for this command. "
2537
+ _event(
2538
+ console,
2539
+ "FAIL",
2540
+ "Torch is required for this command. "
1966
2541
  'Install extras with: pip install "invarlock[hf]" '
1967
2542
  'or "invarlock[adapters]".',
1968
- style="red",
1969
- markup=False,
2543
+ emoji="",
2544
+ profile=profile_normalized,
1970
2545
  )
1971
2546
  raise typer.Exit(1) from e
1972
2547
 
@@ -2044,7 +2619,7 @@ def run_command(
2044
2619
  seed_value = 42
2045
2620
  set_seed(seed_value)
2046
2621
  # Enforce deterministic algorithms in CI/Release profiles when torch is available
2047
- profile_label = (str(profile or "").lower()) if profile else None
2622
+ profile_label = profile_normalized or None
2048
2623
  if torch is not None and profile_label in {"ci", "release"}:
2049
2624
  try: # pragma: no cover - behavior depends on torch availability
2050
2625
  if hasattr(torch, "use_deterministic_algorithms"):
@@ -2073,10 +2648,14 @@ def run_command(
2073
2648
  "numpy": int(numpy_seed),
2074
2649
  "torch": int(torch_seed) if torch_seed is not None else None,
2075
2650
  }
2076
- console.print(
2077
- "🎲 Deterministic seeds → "
2651
+ _event(
2652
+ console,
2653
+ "INIT",
2654
+ "Deterministic seeds → "
2078
2655
  f"python={seed_bundle['python']}, numpy={seed_bundle['numpy']}, "
2079
- f"torch={seed_bundle['torch'] if seed_bundle['torch'] is not None else 'N/A'}"
2656
+ f"torch={seed_bundle['torch'] if seed_bundle['torch'] is not None else 'N/A'}",
2657
+ emoji="🎲",
2658
+ profile=profile_normalized,
2080
2659
  )
2081
2660
 
2082
2661
  # Resolve device and output directory
@@ -2111,8 +2690,8 @@ def run_command(
2111
2690
 
2112
2691
  run_id = f"{output_dir.name}-{timestamp}" if output_dir.name else timestamp
2113
2692
 
2114
- console.print(f"📁 Output directory: {run_dir}")
2115
- console.print(f"🆔 Run ID: {run_id}")
2693
+ console.print(_format_kv_line("Output", str(run_dir)))
2694
+ console.print(_format_kv_line("Run ID", run_id))
2116
2695
 
2117
2696
  # Initialize retry controller if --until-pass mode enabled
2118
2697
  retry_controller = _init_retry_controller(
@@ -2127,7 +2706,6 @@ def run_command(
2127
2706
  pairing_schedule: dict[str, Any] | None = None
2128
2707
  if baseline:
2129
2708
  baseline_path = Path(baseline)
2130
- profile_normalized = (profile or "").strip().lower()
2131
2709
  strict_baseline = profile_normalized in {"ci", "release"}
2132
2710
  if not baseline_path.exists():
2133
2711
  msg = (
@@ -2136,8 +2714,12 @@ def run_command(
2136
2714
  )
2137
2715
  if strict_baseline:
2138
2716
  raise InvarlockError(code="E001", message=msg)
2139
- console.print(
2140
- f"[yellow]⚠️ {msg}. Falling back to dataset schedule.[/yellow]"
2717
+ _event(
2718
+ console,
2719
+ "WARN",
2720
+ f"{msg}. Falling back to dataset schedule.",
2721
+ emoji="⚠️",
2722
+ profile=profile_normalized,
2141
2723
  )
2142
2724
  else:
2143
2725
  try:
@@ -2147,8 +2729,12 @@ def run_command(
2147
2729
  msg = f"PAIRING-EVIDENCE-MISSING: baseline report JSON parse failed ({exc})"
2148
2730
  if strict_baseline:
2149
2731
  raise InvarlockError(code="E001", message=msg) from exc
2150
- console.print(
2151
- f"[yellow]⚠️ {msg}. Falling back to dataset schedule.[/yellow]"
2732
+ _event(
2733
+ console,
2734
+ "WARN",
2735
+ f"{msg}. Falling back to dataset schedule.",
2736
+ emoji="⚠️",
2737
+ profile=profile_normalized,
2152
2738
  )
2153
2739
  baseline_report_data = None
2154
2740
  if isinstance(baseline_report_data, dict):
@@ -2206,8 +2792,12 @@ def run_command(
2206
2792
  tokenizer_hash = tok
2207
2793
  except Exception:
2208
2794
  pass
2209
- console.print(
2210
- "🧬 Loaded baseline evaluation schedule for pairing"
2795
+ _event(
2796
+ console,
2797
+ "DATA",
2798
+ "Loaded baseline evaluation schedule for pairing",
2799
+ emoji="🧬",
2800
+ profile=profile_normalized,
2211
2801
  )
2212
2802
  else:
2213
2803
  msg = (
@@ -2216,8 +2806,12 @@ def run_command(
2216
2806
  )
2217
2807
  if strict_baseline:
2218
2808
  raise InvarlockError(code="E001", message=msg)
2219
- console.print(
2220
- f"[yellow]⚠️ {msg}. Falling back to dataset schedule.[/yellow]"
2809
+ _event(
2810
+ console,
2811
+ "WARN",
2812
+ f"{msg}. Falling back to dataset schedule.",
2813
+ emoji="⚠️",
2814
+ profile=profile_normalized,
2221
2815
  )
2222
2816
  baseline_report_data = None
2223
2817
  pairing_schedule = None
@@ -2243,15 +2837,23 @@ def run_command(
2243
2837
  adapter = registry.get_adapter(cfg.model.adapter)
2244
2838
  edit_name = getattr(getattr(cfg, "edit", None), "name", None)
2245
2839
  if not isinstance(edit_name, str) or not edit_name.strip():
2246
- console.print(
2247
- "[red]❌ Edit configuration must specify a non-empty `edit.name`.[/red]"
2840
+ _event(
2841
+ console,
2842
+ "FAIL",
2843
+ "Edit configuration must specify a non-empty `edit.name`.",
2844
+ emoji="❌",
2845
+ profile=profile_normalized,
2248
2846
  )
2249
2847
  raise typer.Exit(1)
2250
2848
  try:
2251
2849
  edit_op = registry.get_edit(edit_name.strip())
2252
2850
  except Exception:
2253
- console.print(
2254
- f"[yellow]⚠️ Unknown edit '{edit_name.strip()}'. Using pass-through shim.[/yellow]"
2851
+ _event(
2852
+ console,
2853
+ "WARN",
2854
+ f"Unknown edit '{edit_name.strip()}'. Using pass-through shim.",
2855
+ emoji="⚠️",
2856
+ profile=profile_normalized,
2255
2857
  )
2256
2858
  edit_op = SimpleNamespace(name=edit_name.strip())
2257
2859
 
@@ -2287,8 +2889,12 @@ def run_command(
2287
2889
  registry.get_plugin_metadata(guard_name, "guards")
2288
2890
  )
2289
2891
  except KeyError:
2290
- console.print(
2291
- f"[yellow]⚠️ Guard '{guard_name}' not found, skipping[/yellow]"
2892
+ _event(
2893
+ console,
2894
+ "WARN",
2895
+ f"Guard '{guard_name}' not found, skipping",
2896
+ emoji="⚠️",
2897
+ profile=profile_normalized,
2292
2898
  )
2293
2899
  plugin_provenance = {
2294
2900
  "adapter": adapter_meta,
@@ -2296,8 +2902,15 @@ def run_command(
2296
2902
  "guards": guard_metadata,
2297
2903
  }
2298
2904
  pm_acceptance_range = _resolve_pm_acceptance_range(cfg)
2299
-
2300
- console.print(f"🔌 Adapter: {adapter.name}")
2905
+ pm_drift_band = _resolve_pm_drift_band(cfg)
2906
+
2907
+ _event(
2908
+ console,
2909
+ "DATA",
2910
+ f"Adapter: {adapter.name}",
2911
+ emoji="🔌",
2912
+ profile=profile_normalized,
2913
+ )
2301
2914
 
2302
2915
  # Create run configuration
2303
2916
  guard_overrides = {
@@ -2361,6 +2974,9 @@ def run_command(
2361
2974
  pm_acceptance_range
2362
2975
  )
2363
2976
  run_context["pm_acceptance_range"] = pm_acceptance_range
2977
+ if pm_drift_band:
2978
+ run_context.setdefault("primary_metric", {})["drift_band"] = pm_drift_band
2979
+ run_context["pm_drift_band"] = pm_drift_band
2364
2980
  run_context["model_profile"] = {
2365
2981
  "family": model_profile.family,
2366
2982
  "default_loss": model_profile.default_loss,
@@ -2391,6 +3007,7 @@ def run_command(
2391
3007
  dataset_meta: dict[str, Any] = {}
2392
3008
  baseline_meta: dict[str, Any] = {}
2393
3009
  window_plan: dict[str, Any] | None = None
3010
+ dataset_timing_start: float | None = perf_counter() if collect_timings else None
2394
3011
  if pairing_schedule:
2395
3012
  harvested = _validate_and_harvest_baseline_schedule(
2396
3013
  cfg,
@@ -2413,7 +3030,7 @@ def run_command(
2413
3030
  try:
2414
3031
  tokenizer, tokenizer_hash = resolve_tokenizer(model_profile)
2415
3032
  except Exception as exc:
2416
- console.print(f"[red]{exc}[/red]")
3033
+ _event(console, "FAIL", str(exc), emoji="❌", profile=profile)
2417
3034
  raise typer.Exit(1) from exc
2418
3035
  preview_window_ids = pairing_schedule["preview"].get("window_ids")
2419
3036
  preview_labels = pairing_schedule["preview"].get("labels")
@@ -2635,7 +3252,13 @@ def run_command(
2635
3252
  if capacity_meta and "window_capacity" not in dataset_meta:
2636
3253
  dataset_meta["window_capacity"] = capacity_meta
2637
3254
  elif cfg.dataset.provider:
2638
- console.print(f"📊 Loading dataset: {cfg.dataset.provider}")
3255
+ _event(
3256
+ console,
3257
+ "DATA",
3258
+ f"Loading dataset: {cfg.dataset.provider}",
3259
+ emoji="📊",
3260
+ profile=profile_normalized,
3261
+ )
2639
3262
  # Pass through provider-specific kwargs when available
2640
3263
  provider_kwargs = {}
2641
3264
  for key in (
@@ -2695,6 +3318,7 @@ def run_command(
2695
3318
  provider_kwargs=provider_kwargs,
2696
3319
  console=console,
2697
3320
  resolved_device=resolved_device,
3321
+ emit=_provider_event,
2698
3322
  )
2699
3323
  )
2700
3324
 
@@ -2702,7 +3326,7 @@ def run_command(
2702
3326
  try:
2703
3327
  tokenizer, tokenizer_hash = resolve_tokenizer(model_profile)
2704
3328
  except Exception as exc:
2705
- console.print(f"[red]{exc}[/red]")
3329
+ _event(console, "FAIL", str(exc), emoji="❌", profile=profile)
2706
3330
  raise typer.Exit(1) from exc
2707
3331
 
2708
3332
  dataset_stride = getattr(
@@ -2736,7 +3360,7 @@ def run_command(
2736
3360
  console=console,
2737
3361
  )
2738
3362
  except RuntimeError as err:
2739
- console.print(f"[red]{err}[/red]")
3363
+ _event(console, "FAIL", str(err), emoji="❌", profile=profile)
2740
3364
  raise typer.Exit(1) from err
2741
3365
 
2742
3366
  actual_per_arm = int(window_plan["actual_preview"])
@@ -2748,9 +3372,12 @@ def run_command(
2748
3372
  cfg.dataset, "stride", getattr(cfg.dataset, "seq_len", 0)
2749
3373
  )
2750
3374
  else:
2751
- console.print(
2752
- "[yellow]⚠️ Release profile requested but dataset provider "
2753
- "does not expose capacity estimation; using configured window counts.[/yellow]"
3375
+ _event(
3376
+ console,
3377
+ "WARN",
3378
+ "Release profile requested but dataset provider does not expose capacity estimation; using configured window counts.",
3379
+ emoji="⚠️",
3380
+ profile=profile_normalized,
2754
3381
  )
2755
3382
 
2756
3383
  preview_records: list[tuple[list[int], list[int]]] = []
@@ -2954,8 +3581,12 @@ def run_command(
2954
3581
  raise RuntimeError(
2955
3582
  "Unable to construct non-overlapping windows within minimum window floor."
2956
3583
  )
2957
- console.print(
2958
- f"[yellow]⚠️ Detected {deficit} duplicate windows; reducing per-arm windows to {proposed_per_arm} and retrying stratification.[/yellow]"
3584
+ _event(
3585
+ console,
3586
+ "WARN",
3587
+ f"Detected {deficit} duplicate windows; reducing per-arm windows to {proposed_per_arm} and retrying stratification.",
3588
+ emoji="⚠️",
3589
+ profile=profile_normalized,
2959
3590
  )
2960
3591
 
2961
3592
  effective_preview = proposed_per_arm
@@ -3097,6 +3728,10 @@ def run_command(
3097
3728
  run_context["dataset_meta"] = dataset_meta
3098
3729
  if window_plan:
3099
3730
  run_context["window_plan"] = window_plan
3731
+ if dataset_timing_start is not None:
3732
+ timings["load_dataset"] = max(
3733
+ 0.0, float(perf_counter() - dataset_timing_start)
3734
+ )
3100
3735
 
3101
3736
  if os.environ.get("INVARLOCK_DEBUG_TRACE"):
3102
3737
  console.print(
@@ -3120,7 +3755,13 @@ def run_command(
3120
3755
  )
3121
3756
 
3122
3757
  # Execute the real pipeline using CoreRunner
3123
- console.print(f"⚙️ Executing pipeline with {len(guards)} guards...")
3758
+ _event(
3759
+ console,
3760
+ "EXEC",
3761
+ f"Executing pipeline with {len(guards)} guards...",
3762
+ emoji="⚙️",
3763
+ profile=profile_normalized,
3764
+ )
3124
3765
  runner = CoreRunner()
3125
3766
 
3126
3767
  # Prepare auto configuration for tier resolution
@@ -3185,8 +3826,8 @@ def run_command(
3185
3826
  for key, values in model_profile.module_selectors.items()
3186
3827
  }
3187
3828
 
3188
- console.print(f"✂️ Edit: {edit_op.name}")
3189
- console.print(f"🛡️ Guards: {[g.name for g in guards]}")
3829
+ console.print(_format_kv_line("Edit", str(edit_op.name)))
3830
+ console.print(_format_kv_line("Guards", _format_guard_chain(guards)))
3190
3831
 
3191
3832
  # Model load/snapshot strategy
3192
3833
  model = None
@@ -3200,8 +3841,30 @@ def run_command(
3200
3841
  # Try single-load with snapshot/restore if adapter supports it; fallback to reload per attempt
3201
3842
  try:
3202
3843
  # Load once
3203
- console.print(f"🔧 Loading model once: {cfg.model.id}")
3204
- model = _load_model_with_cfg(adapter, cfg, resolved_device)
3844
+ _event(
3845
+ console,
3846
+ "INIT",
3847
+ f"Loading model once: {cfg.model.id}",
3848
+ emoji="🔧",
3849
+ profile=profile_normalized,
3850
+ )
3851
+ with timed_step(
3852
+ console=console,
3853
+ style=_style_from_console(console, profile=profile_normalized),
3854
+ timings=timings,
3855
+ key="load_model",
3856
+ tag="INIT",
3857
+ message="Load model",
3858
+ emoji="🔧",
3859
+ ):
3860
+ model = _load_model_with_cfg(
3861
+ adapter,
3862
+ cfg,
3863
+ resolved_device,
3864
+ profile=profile_normalized,
3865
+ event_path=run_dir / "events.jsonl",
3866
+ warning_context={"phase": "load_model", "run_id": run_id},
3867
+ )
3205
3868
 
3206
3869
  # No edit-specific bootstrap logic
3207
3870
 
@@ -3357,9 +4020,13 @@ def run_command(
3357
4020
  return "reload"
3358
4021
 
3359
4022
  mode = _choose_snapshot_mode()
3360
- # Emit deterministic snapshot mode status line
3361
- console.print(
3362
- f"snapshot_mode: {'enabled' if mode in {'bytes', 'chunked'} else 'disabled'}"
4023
+ enabled = mode in {"bytes", "chunked"}
4024
+ _event(
4025
+ console,
4026
+ "INIT",
4027
+ f"Snapshot mode: {'enabled' if enabled else 'disabled'}",
4028
+ emoji="💾",
4029
+ profile=profile_normalized,
3363
4030
  )
3364
4031
  if mode == "chunked":
3365
4032
  snapshot_tmpdir = adapter.snapshot_chunked(model) # type: ignore[attr-defined]
@@ -3402,13 +4069,16 @@ def run_command(
3402
4069
 
3403
4070
  # RETRY LOOP - All report processing inside loop
3404
4071
  attempt = 1
3405
- profile_normalized = (profile or "").lower()
3406
4072
  measure_guard_overhead, skip_overhead = _should_measure_overhead(
3407
4073
  profile_normalized
3408
4074
  )
3409
4075
  if skip_overhead and profile_normalized in {"ci", "release"}:
3410
- console.print(
3411
- "[yellow]⚠️ Overhead check skipped via INVARLOCK_SKIP_OVERHEAD_CHECK[/yellow]"
4076
+ _event(
4077
+ console,
4078
+ "WARN",
4079
+ "Overhead check skipped via INVARLOCK_SKIP_OVERHEAD_CHECK",
4080
+ emoji="⚠️",
4081
+ profile=profile_normalized,
3412
4082
  )
3413
4083
 
3414
4084
  while True:
@@ -3416,12 +4086,32 @@ def run_command(
3416
4086
  set_seed(seed_bundle["python"])
3417
4087
 
3418
4088
  if retry_controller:
3419
- console.print(f"\n🚀 Attempt {attempt}/{max_attempts}")
4089
+ console.print("\n")
4090
+ _event(
4091
+ console,
4092
+ "EXEC",
4093
+ f"Attempt {attempt}/{max_attempts}",
4094
+ emoji="🚀",
4095
+ profile=profile_normalized,
4096
+ )
3420
4097
  if attempt > 1:
3421
- console.print(f"🔄 Retry attempt {attempt}/{max_attempts}")
4098
+ _event(
4099
+ console,
4100
+ "EXEC",
4101
+ f"Retry attempt {attempt}/{max_attempts}",
4102
+ emoji="🔄",
4103
+ profile=profile_normalized,
4104
+ )
3422
4105
  else:
3423
4106
  if attempt > 1:
3424
- console.print(f"\n🚀 Attempt {attempt}")
4107
+ console.print("\n")
4108
+ _event(
4109
+ console,
4110
+ "EXEC",
4111
+ f"Attempt {attempt}",
4112
+ emoji="🚀",
4113
+ profile=profile_normalized,
4114
+ )
3425
4115
 
3426
4116
  # Adjust parameters for retry attempts
3427
4117
  if retry_controller and attempt > 1:
@@ -3450,6 +4140,8 @@ def run_command(
3450
4140
  "checks": {},
3451
4141
  }
3452
4142
  elif measure_guard_overhead:
4143
+ bare_edit_config = dict(edit_config or {})
4144
+ bare_edit_config["emit"] = False
3453
4145
  guard_overhead_payload = _run_bare_control(
3454
4146
  adapter=adapter,
3455
4147
  edit_op=edit_op,
@@ -3458,7 +4150,7 @@ def run_command(
3458
4150
  run_config=run_config,
3459
4151
  calibration_data=calibration_data,
3460
4152
  auto_config=auto_config,
3461
- edit_config=edit_config,
4153
+ edit_config=bare_edit_config,
3462
4154
  preview_count=preview_count,
3463
4155
  final_count=final_count,
3464
4156
  seed_bundle=seed_bundle,
@@ -3472,34 +4164,53 @@ def run_command(
3472
4164
  )
3473
4165
 
3474
4166
  # Ensure clean state for guarded run
3475
- core_report, model = _execute_guarded_run(
3476
- runner=runner,
3477
- adapter=adapter,
3478
- model=model,
3479
- cfg=cfg,
3480
- edit_op=edit_op,
3481
- run_config=run_config,
3482
- guards=guards,
3483
- calibration_data=calibration_data,
3484
- auto_config=auto_config,
3485
- edit_config=edit_config,
3486
- preview_count=preview_count,
3487
- final_count=final_count,
3488
- restore_fn=restore_fn,
3489
- resolved_device=resolved_device,
4167
+ with timed_step(
3490
4168
  console=console,
3491
- snapshot_provenance=snapshot_provenance,
3492
- skip_model_load=skip_model_load,
3493
- )
4169
+ style=_style_from_console(console, profile=profile_normalized),
4170
+ timings=timings,
4171
+ key="execute",
4172
+ tag="EXEC",
4173
+ message="Execute pipeline",
4174
+ emoji="⚙️",
4175
+ ):
4176
+ core_report, model = _execute_guarded_run(
4177
+ runner=runner,
4178
+ adapter=adapter,
4179
+ model=model,
4180
+ cfg=cfg,
4181
+ edit_op=edit_op,
4182
+ run_config=run_config,
4183
+ guards=guards,
4184
+ calibration_data=calibration_data,
4185
+ auto_config=auto_config,
4186
+ edit_config=edit_config,
4187
+ preview_count=preview_count,
4188
+ final_count=final_count,
4189
+ restore_fn=restore_fn,
4190
+ resolved_device=resolved_device,
4191
+ profile_normalized=profile_normalized,
4192
+ console=console,
4193
+ snapshot_provenance=snapshot_provenance,
4194
+ skip_model_load=skip_model_load,
4195
+ )
3494
4196
  except _SnapshotRestoreFailed as exc:
3495
4197
  snapshot_provenance["restore_failed"] = True
3496
4198
  _free_model_memory(model)
3497
4199
  model = None
3498
4200
  restore_fn = None
3499
- console.print(
3500
- "[yellow]⚠️ Snapshot restore failed; switching to reload-per-attempt.[/yellow]"
4201
+ _event(
4202
+ console,
4203
+ "WARN",
4204
+ "Snapshot restore failed; switching to reload-per-attempt.",
4205
+ emoji="⚠️",
4206
+ profile=profile_normalized,
4207
+ )
4208
+ _event(
4209
+ console,
4210
+ "WARN",
4211
+ f"↳ {exc}",
4212
+ profile=profile_normalized,
3501
4213
  )
3502
- console.print(f"[yellow]↳ {exc}[/yellow]")
3503
4214
  if retry_controller:
3504
4215
  retry_controller.record_attempt(
3505
4216
  attempt,
@@ -3521,7 +4232,7 @@ def run_command(
3521
4232
  # Convert CoreRunner report to evaluation report
3522
4233
  report = create_empty_report()
3523
4234
 
3524
- # Persist minimal run context for certificate/report provenance.
4235
+ # Persist minimal run context for evaluation report provenance.
3525
4236
  try:
3526
4237
  report["context"] = {
3527
4238
  "profile": profile_normalized,
@@ -3631,6 +4342,8 @@ def run_command(
3631
4342
  report["meta"].update(meta_payload)
3632
4343
  if pm_acceptance_range:
3633
4344
  report["meta"]["pm_acceptance_range"] = pm_acceptance_range
4345
+ if pm_drift_band:
4346
+ report["meta"]["pm_drift_band"] = pm_drift_band
3634
4347
  report["meta"]["model_profile"] = {
3635
4348
  "family": model_profile.family,
3636
4349
  "default_loss": model_profile.default_loss,
@@ -3714,6 +4427,14 @@ def run_command(
3714
4427
  }
3715
4428
  )
3716
4429
 
4430
+ if edit_label:
4431
+ report.setdefault("edit", {})
4432
+ report["edit"]["name"] = edit_label
4433
+ report["edit"]["algorithm"] = edit_label
4434
+ if isinstance(core_report.context, dict):
4435
+ core_report.context.setdefault("edit", {})
4436
+ core_report.context["edit"]["name"] = edit_label
4437
+
3717
4438
  mask_artifact_path = _persist_ref_masks(core_report, run_dir)
3718
4439
  if mask_artifact_path:
3719
4440
  report.setdefault("artifacts", {})
@@ -3721,6 +4442,22 @@ def run_command(
3721
4442
 
3722
4443
  # Transfer metrics (PM-only: do not write ppl_* fields)
3723
4444
  if hasattr(core_report, "metrics") and core_report.metrics:
4445
+ if isinstance(core_report.metrics, dict):
4446
+ core_timings = core_report.metrics.get("timings")
4447
+ if isinstance(core_timings, dict):
4448
+ for key in (
4449
+ "prepare",
4450
+ "prepare_guards",
4451
+ "edit",
4452
+ "guards",
4453
+ "eval",
4454
+ "finalize",
4455
+ ):
4456
+ if key in core_timings:
4457
+ try:
4458
+ timings[key] = float(core_timings[key])
4459
+ except Exception:
4460
+ timings[key] = core_timings[key]
3724
4461
  metrics_payload = {
3725
4462
  "latency_ms_per_tok": core_report.metrics.get(
3726
4463
  "latency_ms_per_tok", 0.0
@@ -3772,6 +4509,11 @@ def run_command(
3772
4509
  "masked_tokens_total",
3773
4510
  "masked_tokens_preview",
3774
4511
  "masked_tokens_final",
4512
+ "timings",
4513
+ "guard_timings",
4514
+ "memory_snapshots",
4515
+ "gpu_memory_mb_peak",
4516
+ "gpu_memory_reserved_mb_peak",
3775
4517
  "reduction",
3776
4518
  ]
3777
4519
  for key in optional_keys:
@@ -3935,8 +4677,12 @@ def run_command(
3935
4677
  },
3936
4678
  }
3937
4679
  elif had_baseline and (profile or "").lower() in {"ci", "release"}:
3938
- console.print(
3939
- "[red]❌ [INVARLOCK:E001] PAIRING-SCHEDULE-MISMATCH: baseline pairing requested but evaluation windows were not produced. Check capacity/pairing config.[/red]"
4680
+ _event(
4681
+ console,
4682
+ "FAIL",
4683
+ "[INVARLOCK:E001] PAIRING-SCHEDULE-MISMATCH: baseline pairing requested but evaluation windows were not produced. Check capacity/pairing config.",
4684
+ emoji="❌",
4685
+ profile=profile_normalized,
3940
4686
  )
3941
4687
  raise typer.Exit(3)
3942
4688
  else:
@@ -4147,12 +4893,20 @@ def run_command(
4147
4893
  if ok:
4148
4894
  report["artifacts"]["checkpoint_path"] = str(export_dir)
4149
4895
  else:
4150
- console.print(
4151
- "[yellow]⚠️ Model export requested but adapter did not save a HF directory.[/yellow]"
4896
+ _event(
4897
+ console,
4898
+ "WARN",
4899
+ "Model export requested but adapter did not save a HF directory.",
4900
+ emoji="⚠️",
4901
+ profile=profile_normalized,
4152
4902
  )
4153
4903
  except Exception:
4154
- console.print(
4155
- "[yellow]⚠️ Model export requested but failed due to an unexpected error.[/yellow]"
4904
+ _event(
4905
+ console,
4906
+ "WARN",
4907
+ "Model export requested but failed due to an unexpected error.",
4908
+ emoji="⚠️",
4909
+ profile=profile_normalized,
4156
4910
  )
4157
4911
 
4158
4912
  # Set flags
@@ -4373,7 +5127,10 @@ def run_command(
4373
5127
  try:
4374
5128
  metric_kind_resolved, _provider_kind, metric_opts = (
4375
5129
  _resolve_metric_and_provider(
4376
- cfg, model_profile, resolved_loss_type=resolved_loss_type
5130
+ cfg,
5131
+ model_profile,
5132
+ resolved_loss_type=resolved_loss_type,
5133
+ metric_kind_override=metric_kind,
4377
5134
  )
4378
5135
  )
4379
5136
  if metric_kind_resolved:
@@ -4452,6 +5209,13 @@ def run_command(
4452
5209
  except Exception:
4453
5210
  pass
4454
5211
 
5212
+ telemetry_path: Path | None = None
5213
+ if telemetry:
5214
+ telemetry_path = run_dir / "telemetry.json"
5215
+ report.setdefault("artifacts", {})["telemetry_path"] = str(
5216
+ telemetry_path
5217
+ )
5218
+
4455
5219
  saved_files = _postprocess_and_summarize(
4456
5220
  report=report,
4457
5221
  run_dir=run_dir,
@@ -4468,6 +5232,31 @@ def run_command(
4468
5232
  except Exception:
4469
5233
  pass
4470
5234
 
5235
+ if telemetry and telemetry_path is not None:
5236
+ try:
5237
+ from invarlock.reporting.telemetry import save_telemetry_report
5238
+
5239
+ saved_path = save_telemetry_report(
5240
+ report, run_dir, filename=telemetry_path.name
5241
+ )
5242
+ if isinstance(saved_files, dict):
5243
+ saved_files["telemetry"] = str(saved_path)
5244
+ _event(
5245
+ console,
5246
+ "DATA",
5247
+ f"Telemetry: {saved_path}",
5248
+ emoji="📈",
5249
+ profile=profile_normalized,
5250
+ )
5251
+ except Exception as exc: # pragma: no cover - best-effort
5252
+ _event(
5253
+ console,
5254
+ "WARN",
5255
+ f"Telemetry export failed: {exc}",
5256
+ emoji="⚠️",
5257
+ profile=profile_normalized,
5258
+ )
5259
+
4471
5260
  # Metrics display
4472
5261
  pm_obj = None
4473
5262
  try:
@@ -4482,15 +5271,23 @@ def run_command(
4482
5271
  if isinstance(pm_prev, (int | float)) and isinstance(
4483
5272
  pm_fin, (int | float)
4484
5273
  ):
4485
- console.print(
4486
- f"📌 Primary Metric [{pm_kind}] — preview: {pm_prev:.3f}, final: {pm_fin:.3f}"
5274
+ _event(
5275
+ console,
5276
+ "METRIC",
5277
+ f"Primary Metric [{pm_kind}] — preview: {pm_prev:.3f}, final: {pm_fin:.3f}",
5278
+ emoji="📌",
5279
+ profile=profile_normalized,
4487
5280
  )
4488
5281
  ratio_vs_base = pm_obj.get("ratio_vs_baseline")
4489
5282
  if isinstance(ratio_vs_base, (int | float)) and math.isfinite(
4490
5283
  ratio_vs_base
4491
5284
  ):
4492
- console.print(
4493
- f"🔗 Ratio vs baseline [{pm_kind}]: {ratio_vs_base:.3f}"
5285
+ _event(
5286
+ console,
5287
+ "METRIC",
5288
+ f"Ratio vs baseline [{pm_kind}]: {ratio_vs_base:.3f}",
5289
+ emoji="🔗",
5290
+ profile=profile_normalized,
4494
5291
  )
4495
5292
  except Exception:
4496
5293
  pass
@@ -4502,8 +5299,12 @@ def run_command(
4502
5299
  console, guard_overhead_info
4503
5300
  )
4504
5301
  if not guard_overhead_info.get("passed", True):
4505
- console.print(
4506
- "[red]⚠️ Guard overhead gate FAILED: Guards add more than the permitted budget[/red]"
5302
+ _event(
5303
+ console,
5304
+ "FAIL",
5305
+ "Guard overhead gate FAILED: Guards add more than the permitted budget",
5306
+ emoji="⚠️",
5307
+ profile=profile_normalized,
4507
5308
  )
4508
5309
  # Only fail hard when the overhead check was actually evaluated
4509
5310
  # (e.g., for causal LMs with available bare/guarded PM). For
@@ -4528,11 +5329,11 @@ def run_command(
4528
5329
  f"(>{threshold_fraction * 100:.1f}% increase)"
4529
5330
  )
4530
5331
 
4531
- # Drift gate status is no longer surfaced in console; rely on certificate gates
5332
+ # Drift gate status is no longer surfaced in console; rely on evaluation report gates
4532
5333
 
4533
- # Certificate validation for --until-pass mode
5334
+ # Evaluation report validation for --until-pass mode
4534
5335
  if retry_controller and baseline:
4535
- from invarlock.reporting.certificate import make_certificate
5336
+ from invarlock.reporting.report_builder import make_report
4536
5337
 
4537
5338
  try:
4538
5339
  baseline_report = baseline_report_data
@@ -4544,15 +5345,21 @@ def run_command(
4544
5345
  if baseline_report is None:
4545
5346
  raise FileNotFoundError("Baseline report unavailable")
4546
5347
 
4547
- console.print("📜 Generating safety certificate...")
4548
- certificate = make_certificate(report, baseline_report)
5348
+ _event(
5349
+ console,
5350
+ "EXEC",
5351
+ "Generating evaluation report...",
5352
+ emoji="📜",
5353
+ profile=profile_normalized,
5354
+ )
5355
+ evaluation_report = make_report(report, baseline_report)
4549
5356
 
4550
- validation = certificate.get("validation", {})
4551
- certificate_passed = all(validation.values())
5357
+ validation = evaluation_report.get("validation", {})
5358
+ report_passed = all(validation.values())
4552
5359
 
4553
5360
  failed_gates = [k for k, v in validation.items() if not v]
4554
5361
  result_summary = {
4555
- "passed": certificate_passed,
5362
+ "passed": report_passed,
4556
5363
  "failures": failed_gates,
4557
5364
  "validation": validation,
4558
5365
  }
@@ -4560,12 +5367,22 @@ def run_command(
4560
5367
  attempt, result_summary, edit_config
4561
5368
  )
4562
5369
 
4563
- if certificate_passed:
4564
- console.print("[green]✅ Certificate PASSED all gates![/green]")
5370
+ if report_passed:
5371
+ _event(
5372
+ console,
5373
+ "PASS",
5374
+ "Evaluation report PASSED all gates!",
5375
+ emoji="✅",
5376
+ profile=profile_normalized,
5377
+ )
4565
5378
  break
4566
5379
  else:
4567
- console.print(
4568
- f"[yellow]⚠️ Certificate FAILED gates: {', '.join(failed_gates)}[/yellow]"
5380
+ _event(
5381
+ console,
5382
+ "FAIL",
5383
+ f"Evaluation report FAILED gates: {', '.join(failed_gates)}",
5384
+ emoji="⚠️",
5385
+ profile=profile_normalized,
4569
5386
  )
4570
5387
 
4571
5388
  # Auto-tune mask-only heads (binary search on keep count)
@@ -4610,31 +5427,43 @@ def run_command(
4610
5427
  }
4611
5428
  )
4612
5429
  head_section["global_k"] = next_keep
4613
- console.print(
4614
- f"🔧 Auto-tune adjust: global_k → {next_keep} (bounds {keep_low}-{keep_high})"
5430
+ _event(
5431
+ console,
5432
+ "INIT",
5433
+ f"Auto-tune adjust: global_k → {next_keep} (bounds {keep_low}-{keep_high})",
5434
+ emoji="🔧",
5435
+ profile=profile_normalized,
4615
5436
  )
4616
5437
  except Exception:
4617
5438
  pass
4618
5439
 
4619
- if retry_controller.should_retry(certificate_passed):
5440
+ if retry_controller.should_retry(report_passed):
4620
5441
  attempt += 1
4621
5442
  continue
4622
5443
  else:
4623
- console.print(
4624
- f"[red]❌ Exhausted retry budget after {attempt} attempts[/red]"
5444
+ _event(
5445
+ console,
5446
+ "FAIL",
5447
+ f"Exhausted retry budget after {attempt} attempts",
5448
+ emoji="❌",
5449
+ profile=profile_normalized,
4625
5450
  )
4626
5451
  break
4627
5452
 
4628
- except Exception as cert_error:
4629
- console.print(
4630
- f"[yellow]⚠️ Certificate validation failed: {cert_error}[/yellow]"
5453
+ except Exception as report_error:
5454
+ _event(
5455
+ console,
5456
+ "WARN",
5457
+ f"Evaluation report validation failed: {report_error}",
5458
+ emoji="⚠️",
5459
+ profile=profile_normalized,
4631
5460
  )
4632
5461
  if retry_controller:
4633
5462
  retry_controller.record_attempt(
4634
5463
  attempt,
4635
5464
  {
4636
5465
  "passed": False,
4637
- "failures": ["certificate_error"],
5466
+ "failures": ["report_error"],
4638
5467
  "validation": {},
4639
5468
  },
4640
5469
  edit_config,
@@ -4656,11 +5485,82 @@ def run_command(
4656
5485
  # (moved) Cleanup printing occurs after loop to guarantee execution
4657
5486
  pass
4658
5487
 
5488
+ if output_style.timing:
5489
+ total_duration = (
5490
+ max(0.0, float(perf_counter() - total_start))
5491
+ if total_start is not None
5492
+ else None
5493
+ )
5494
+ timings_for_summary: dict[str, float] = {}
5495
+ for key, value in timings.items():
5496
+ if isinstance(value, (int | float)):
5497
+ timings_for_summary[key] = float(value)
5498
+ if total_duration is not None:
5499
+ timings_for_summary["total"] = total_duration
5500
+
5501
+ has_breakdown = any(
5502
+ key in timings_for_summary
5503
+ for key in (
5504
+ "prepare",
5505
+ "prepare_guards",
5506
+ "edit",
5507
+ "guards",
5508
+ "eval",
5509
+ "finalize",
5510
+ )
5511
+ )
5512
+
5513
+ order: list[tuple[str, str]] = []
5514
+
5515
+ def _add(label: str, key: str) -> None:
5516
+ if key in timings_for_summary:
5517
+ order.append((label, key))
5518
+
5519
+ _add("Load model", "load_model")
5520
+ _add("Load data", "load_dataset")
5521
+ if has_breakdown:
5522
+ _add("Prepare", "prepare")
5523
+ _add("Prep guards", "prepare_guards")
5524
+ _add("Edit", "edit")
5525
+ _add("Guards", "guards")
5526
+ _add("Eval", "eval")
5527
+ _add("Finalize", "finalize")
5528
+ else:
5529
+ _add("Execute", "execute")
5530
+ _add("Total", "total")
5531
+
5532
+ extra_lines: list[str] = []
5533
+ metrics_section = (
5534
+ report.get("metrics", {}) if isinstance(report, dict) else {}
5535
+ )
5536
+ if isinstance(metrics_section, dict):
5537
+ mem_peak = metrics_section.get("memory_mb_peak")
5538
+ gpu_peak = metrics_section.get("gpu_memory_mb_peak")
5539
+ if isinstance(mem_peak, (int | float)):
5540
+ extra_lines.append(f" Peak Memory : {float(mem_peak):.2f} MB")
5541
+ if isinstance(gpu_peak, (int | float)):
5542
+ extra_lines.append(f" Peak GPU Mem: {float(gpu_peak):.2f} MB")
5543
+
5544
+ if timings_for_summary and order:
5545
+ print_timing_summary(
5546
+ console,
5547
+ timings_for_summary,
5548
+ style=output_style,
5549
+ order=order,
5550
+ extra_lines=extra_lines,
5551
+ )
5552
+
4659
5553
  # Normal path falls through; cleanup handled below in finally
4660
5554
  return report_path_out
4661
5555
 
4662
5556
  except FileNotFoundError as e:
4663
- console.print(f"[red]❌ Configuration file not found: {e}[/red]")
5557
+ _event(
5558
+ console,
5559
+ "FAIL",
5560
+ f"Configuration file not found: {e}",
5561
+ emoji="❌",
5562
+ profile=profile_normalized,
5563
+ )
4664
5564
  raise typer.Exit(1) from e
4665
5565
  except InvarlockError as ce:
4666
5566
  # InvarlockError → code 3 only in CI/Release; dev → 1
@@ -4676,12 +5576,22 @@ def run_command(
4676
5576
  traceback.print_exc()
4677
5577
  # Emit a clearer message for schema failures (exit 2)
4678
5578
  if isinstance(e, ValueError) and "Invalid RunReport" in str(e):
4679
- console.print(
4680
- "[red]❌ Schema invalid: run report structure failed validation[/red]"
5579
+ _event(
5580
+ console,
5581
+ "FAIL",
5582
+ "Schema invalid: run report structure failed validation",
5583
+ emoji="❌",
5584
+ profile=profile_normalized,
4681
5585
  )
4682
5586
  code = 2
4683
5587
  else:
4684
- console.print(f"[red]❌ Pipeline execution failed: {e}[/red]")
5588
+ _event(
5589
+ console,
5590
+ "FAIL",
5591
+ f"Pipeline execution failed: {e}",
5592
+ emoji="❌",
5593
+ profile=profile_normalized,
5594
+ )
4685
5595
  code = _resolve_exit_code(e, profile=profile)
4686
5596
  raise typer.Exit(code) from e
4687
5597
  finally:
@@ -4695,9 +5605,21 @@ def run_command(
4695
5605
  except Exception:
4696
5606
  pass
4697
5607
  finally:
4698
- console.print("cleanup: removed")
5608
+ _event(
5609
+ console,
5610
+ "INFO",
5611
+ "Cleanup: removed",
5612
+ emoji="🧹",
5613
+ profile=profile_normalized,
5614
+ )
4699
5615
  else:
4700
- console.print("cleanup: skipped")
5616
+ _event(
5617
+ console,
5618
+ "INFO",
5619
+ "Cleanup: skipped",
5620
+ emoji="🧹",
5621
+ profile=profile_normalized,
5622
+ )
4701
5623
  except Exception:
4702
5624
  # Best-effort cleanup printing; never raise from finally
4703
5625
  pass
@@ -4844,11 +5766,9 @@ def _print_guard_overhead_summary(
4844
5766
  """Print a concise guard-overhead console summary. Returns threshold fraction used."""
4845
5767
  evaluated = bool(guard_overhead_info.get("evaluated", True))
4846
5768
  if not evaluated:
4847
- console.print("🛡️ Guard Overhead: not evaluated")
5769
+ _event(console, "METRIC", "Guard Overhead: not evaluated", emoji="🛡️")
4848
5770
  return GUARD_OVERHEAD_THRESHOLD
4849
- overhead_status = (
4850
- "✅ PASS" if guard_overhead_info.get("passed", True) else "❌ FAIL"
4851
- )
5771
+ overhead_status = "PASS" if guard_overhead_info.get("passed", True) else "FAIL"
4852
5772
  overhead_percent = guard_overhead_info.get("overhead_percent")
4853
5773
  if isinstance(overhead_percent, (int | float)) and math.isfinite(
4854
5774
  float(overhead_percent)
@@ -4867,8 +5787,11 @@ def _print_guard_overhead_summary(
4867
5787
  except (TypeError, ValueError):
4868
5788
  threshold_fraction = GUARD_OVERHEAD_THRESHOLD
4869
5789
  threshold_display = f"≤ +{threshold_fraction * 100:.1f}%"
4870
- console.print(
4871
- f"🛡️ Guard Overhead: {overhead_status} {overhead_display} ({threshold_display})"
5790
+ _event(
5791
+ console,
5792
+ "METRIC",
5793
+ f"Guard Overhead: {overhead_status} {overhead_display} ({threshold_display})",
5794
+ emoji="🛡️",
4872
5795
  )
4873
5796
  return threshold_fraction
4874
5797
 
@@ -4878,8 +5801,12 @@ def _print_retry_summary(console: Console, retry_controller: Any | None) -> None
4878
5801
  try:
4879
5802
  if retry_controller and getattr(retry_controller, "attempt_history", None):
4880
5803
  summary = retry_controller.get_attempt_summary()
4881
- console.print(
4882
- f"\n📊 Retry Summary: {summary['total_attempts']} attempts in {summary['elapsed_time']:.1f}s"
5804
+ console.print("\n")
5805
+ _event(
5806
+ console,
5807
+ "METRIC",
5808
+ f"Retry Summary: {summary['total_attempts']} attempts in {summary['elapsed_time']:.1f}s",
5809
+ emoji="📊",
4883
5810
  )
4884
5811
  except Exception:
4885
5812
  # Never break the run for summary printing
@@ -4902,10 +5829,15 @@ def _init_retry_controller(
4902
5829
  retry_controller = RetryController(
4903
5830
  max_attempts=max_attempts, timeout=timeout, verbose=True
4904
5831
  )
4905
- console.print(f"🔄 Retry mode enabled: max {max_attempts} attempts")
5832
+ _event(
5833
+ console,
5834
+ "INIT",
5835
+ f"Retry mode enabled: max {max_attempts} attempts",
5836
+ emoji="🔄",
5837
+ )
4906
5838
  if baseline:
4907
- console.print(f"📋 Using baseline: {baseline}")
5839
+ _event(console, "DATA", f"Using baseline: {baseline}", emoji="📋")
4908
5840
  else:
4909
5841
  if baseline:
4910
- console.print(f"📋 Using baseline: {baseline}")
5842
+ _event(console, "DATA", f"Using baseline: {baseline}", emoji="📋")
4911
5843
  return retry_controller