invarlock 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. invarlock/__init__.py +1 -1
  2. invarlock/_data/runtime/profiles/ci_cpu.yaml +5 -0
  3. invarlock/_data/runtime/tiers.yaml +61 -0
  4. invarlock/adapters/hf_loading.py +97 -0
  5. invarlock/calibration/__init__.py +6 -0
  6. invarlock/calibration/spectral_null.py +301 -0
  7. invarlock/calibration/variance_ve.py +154 -0
  8. invarlock/cli/app.py +15 -0
  9. invarlock/cli/commands/calibrate.py +576 -0
  10. invarlock/cli/commands/doctor.py +16 -4
  11. invarlock/cli/commands/explain_gates.py +53 -9
  12. invarlock/cli/commands/plugins.py +12 -2
  13. invarlock/cli/commands/run.py +323 -81
  14. invarlock/cli/commands/verify.py +40 -0
  15. invarlock/cli/determinism.py +237 -0
  16. invarlock/core/auto_tuning.py +215 -17
  17. invarlock/core/registry.py +9 -4
  18. invarlock/eval/bench.py +467 -141
  19. invarlock/eval/bench_regression.py +12 -0
  20. invarlock/eval/data.py +29 -7
  21. invarlock/guards/spectral.py +216 -9
  22. invarlock/guards/variance.py +6 -3
  23. invarlock/reporting/certificate.py +403 -51
  24. invarlock/reporting/certificate_schema.py +4 -1
  25. invarlock/reporting/guards_analysis.py +108 -10
  26. invarlock/reporting/normalizer.py +21 -1
  27. invarlock/reporting/policy_utils.py +100 -16
  28. {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/METADATA +12 -10
  29. {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/RECORD +33 -26
  30. {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/WHEEL +0 -0
  31. {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/entry_points.txt +0 -0
  32. {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/licenses/LICENSE +0 -0
  33. {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
  import typer
7
7
  from rich.console import Console
8
8
 
9
- from invarlock.core.auto_tuning import TIER_POLICIES
9
+ from invarlock.core.auto_tuning import get_tier_policies
10
10
  from invarlock.reporting.certificate import make_certificate
11
11
 
12
12
  console = Console()
@@ -49,15 +49,38 @@ def explain_gates_command(
49
49
  "aggressive": 1.20,
50
50
  "none": 1.10,
51
51
  }
52
- limit_base = tier_thresholds.get(tier, 1.10)
52
+ resolved_policy = (
53
+ cert.get("resolved_policy", {})
54
+ if isinstance(cert.get("resolved_policy"), dict)
55
+ else {}
56
+ )
53
57
  metrics_policy = (
54
- TIER_POLICIES.get(tier, {}).get("metrics", {}) if isinstance(tier, str) else {}
58
+ resolved_policy.get("metrics", {})
59
+ if isinstance(resolved_policy.get("metrics"), dict)
60
+ else {}
55
61
  )
62
+ if not metrics_policy:
63
+ tier_policies = get_tier_policies()
64
+ tier_defaults = tier_policies.get(tier, tier_policies.get("balanced", {}))
65
+ metrics_policy = (
66
+ tier_defaults.get("metrics", {}) if isinstance(tier_defaults, dict) else {}
67
+ )
68
+ if not isinstance(metrics_policy, dict):
69
+ metrics_policy = {}
56
70
  pm_policy = (
57
- metrics_policy.get("pm_ratio", {}) if isinstance(metrics_policy, dict) else {}
71
+ metrics_policy.get("pm_ratio", {})
72
+ if isinstance(metrics_policy.get("pm_ratio"), dict)
73
+ else {}
58
74
  )
59
75
  hysteresis_ratio = float(pm_policy.get("hysteresis_ratio", 0.0))
60
76
  min_tokens = int(pm_policy.get("min_tokens", 0))
77
+ try:
78
+ limit_base = float(
79
+ pm_policy.get("ratio_limit_base", tier_thresholds.get(tier, 1.10))
80
+ or tier_thresholds.get(tier, 1.10)
81
+ )
82
+ except Exception:
83
+ limit_base = tier_thresholds.get(tier, 1.10)
61
84
  limit_with_hyst = limit_base + max(0.0, hysteresis_ratio)
62
85
  tokens_ok = True
63
86
  telem = cert.get("telemetry", {}) if isinstance(cert.get("telemetry"), dict) else {}
@@ -70,9 +93,16 @@ def explain_gates_command(
70
93
  tokens_ok = True
71
94
 
72
95
  # Primary-metric ratio gate explanation (ppl-like kinds shown as ratios)
73
- ppl = cert.get("ppl", {}) if isinstance(cert.get("ppl"), dict) else {}
74
- ratio = ppl.get("ratio_vs_baseline")
75
- ratio_ci = ppl.get("ratio_ci")
96
+ ratio = None
97
+ ratio_ci = None
98
+ if isinstance(cert.get("primary_metric"), dict):
99
+ pm = cert.get("primary_metric", {})
100
+ ratio = pm.get("ratio_vs_baseline")
101
+ ratio_ci = pm.get("display_ci")
102
+ elif isinstance(cert.get("ppl"), dict): # legacy
103
+ ppl = cert.get("ppl", {})
104
+ ratio = ppl.get("ratio_vs_baseline")
105
+ ratio_ci = ppl.get("ratio_ci")
76
106
  hysteresis_applied = bool(validation.get("hysteresis_applied"))
77
107
  status = "PASS" if bool(validation.get("primary_metric_acceptable")) else "FAIL"
78
108
  console.print("[bold]Gate: Primary Metric vs Baseline[/bold]")
@@ -109,8 +139,22 @@ def explain_gates_command(
109
139
  pass
110
140
 
111
141
  # Drift gate explanation
112
- drift = ppl.get("preview_final_ratio")
113
- drift_ci = ppl.get("drift_ci")
142
+ drift = None
143
+ drift_ci = None
144
+ if isinstance(cert.get("primary_metric"), dict):
145
+ pm = cert.get("primary_metric", {})
146
+ preview = pm.get("preview")
147
+ final = pm.get("final")
148
+ if isinstance(preview, int | float) and isinstance(final, int | float):
149
+ try:
150
+ if float(preview) != 0.0:
151
+ drift = float(final) / float(preview)
152
+ except Exception:
153
+ drift = None
154
+ if isinstance(cert.get("ppl"), dict): # legacy
155
+ ppl = cert.get("ppl", {})
156
+ drift = ppl.get("preview_final_ratio", drift)
157
+ drift_ci = ppl.get("drift_ci")
114
158
  drift_status = (
115
159
  "PASS" if bool(validation.get("preview_final_drift_acceptable")) else "FAIL"
116
160
  )
@@ -897,11 +897,21 @@ def _check_plugin_extras(plugin_name: str, plugin_type: str) -> str:
897
897
  if not plugin_info or not plugin_info["packages"]:
898
898
  return "" # No extra dependencies needed
899
899
 
900
- # Check each required package using import to play nice with tests that mock __import__
900
+ # Check each required package. For most packages we use a light import so
901
+ # tests can monkeypatch __import__; for GPU-only stacks like bitsandbytes
902
+ # we only probe presence via importlib.util.find_spec to avoid crashing on
903
+ # CPU-only builds during simple listing.
901
904
  missing_packages: list[str] = []
902
905
  for pkg in plugin_info["packages"]:
903
906
  try:
904
- __import__(pkg)
907
+ if pkg == "bitsandbytes":
908
+ import importlib.util as _util
909
+
910
+ spec = _util.find_spec(pkg)
911
+ if spec is None:
912
+ raise ImportError("bitsandbytes not importable")
913
+ else:
914
+ __import__(pkg)
905
915
  except Exception:
906
916
  missing_packages.append(pkg)
907
917
 
@@ -9,6 +9,7 @@ prefer Compare & Certify via `invarlock certify --baseline ... --subject ...`.
9
9
 
10
10
  import copy
11
11
  import hashlib
12
+ import inspect
12
13
  import json
13
14
  import math
14
15
  import os
@@ -81,6 +82,137 @@ GUARD_OVERHEAD_THRESHOLD = 0.01
81
82
  SPLIT_ALIASES: tuple[str, ...] = ("validation", "val", "dev", "eval", "test")
82
83
 
83
84
 
85
+ def _coerce_mapping(obj: object) -> dict[str, Any]:
86
+ """Best-effort conversion of config-like objects to plain dicts."""
87
+
88
+ if isinstance(obj, dict):
89
+ return obj
90
+ try:
91
+ raw = getattr(obj, "_data", None)
92
+ if isinstance(raw, dict):
93
+ return raw
94
+ except Exception:
95
+ pass
96
+ try:
97
+ dumped = obj.model_dump() # type: ignore[attr-defined]
98
+ if isinstance(dumped, dict):
99
+ return dumped
100
+ except Exception:
101
+ pass
102
+ try:
103
+ data = vars(obj)
104
+ if isinstance(data, dict):
105
+ return data
106
+ except Exception:
107
+ pass
108
+ return {}
109
+
110
+
111
+ def _resolve_pm_acceptance_range(
112
+ cfg: InvarLockConfig | dict[str, Any] | None,
113
+ ) -> dict[str, float]:
114
+ """Resolve primary-metric acceptance bounds from config/env with safe defaults."""
115
+
116
+ base_min = 0.95
117
+ base_max = 1.10
118
+
119
+ cfg_min = None
120
+ cfg_max = None
121
+ try:
122
+ cfg_map = _coerce_mapping(cfg) if cfg is not None else {}
123
+ pm_section = cfg_map.get("primary_metric") if isinstance(cfg_map, dict) else {}
124
+ pm_map = _coerce_mapping(pm_section)
125
+ acceptance = (
126
+ pm_map.get("acceptance_range") if isinstance(pm_map, dict) else None
127
+ )
128
+ if isinstance(acceptance, dict):
129
+ if acceptance.get("min") is not None:
130
+ try:
131
+ cfg_min = float(acceptance["min"])
132
+ except (TypeError, ValueError):
133
+ cfg_min = None
134
+ if acceptance.get("max") is not None:
135
+ try:
136
+ cfg_max = float(acceptance["max"])
137
+ except (TypeError, ValueError):
138
+ cfg_max = None
139
+ except Exception:
140
+ cfg_min = None
141
+ cfg_max = None
142
+
143
+ def _parse_env(name: str) -> float | None:
144
+ try:
145
+ raw = os.environ.get(name, "")
146
+ if raw is None or str(raw).strip() == "":
147
+ return None
148
+ return float(raw)
149
+ except Exception:
150
+ return None
151
+
152
+ env_min = _parse_env("INVARLOCK_PM_ACCEPTANCE_MIN")
153
+ env_max = _parse_env("INVARLOCK_PM_ACCEPTANCE_MAX")
154
+
155
+ has_explicit = any(v is not None for v in (cfg_min, cfg_max, env_min, env_max))
156
+ if not has_explicit:
157
+ return {}
158
+
159
+ min_val = (
160
+ env_min if env_min is not None else cfg_min if cfg_min is not None else base_min
161
+ )
162
+ max_val = (
163
+ env_max if env_max is not None else cfg_max if cfg_max is not None else base_max
164
+ )
165
+
166
+ try:
167
+ if min_val is not None and min_val <= 0:
168
+ min_val = base_min
169
+ except Exception:
170
+ min_val = base_min
171
+ try:
172
+ if max_val is not None and max_val <= 0:
173
+ max_val = base_max
174
+ except Exception:
175
+ max_val = base_max
176
+
177
+ try:
178
+ if max_val is not None and min_val is not None and max_val < min_val:
179
+ max_val = min_val
180
+ except Exception:
181
+ max_val = base_max
182
+
183
+ return {"min": float(min_val), "max": float(max_val)}
184
+
185
+
186
+ def _free_model_memory(model: object | None) -> None:
187
+ """Best-effort cleanup to release GPU memory for a model object."""
188
+ if model is None:
189
+ return
190
+ try:
191
+ import gc
192
+
193
+ del model
194
+ gc.collect()
195
+ if torch is not None and torch.cuda.is_available():
196
+ torch.cuda.empty_cache()
197
+ torch.cuda.synchronize()
198
+ except Exception:
199
+ # Cleanup should never raise; fallback is to proceed without cache purge
200
+ pass
201
+
202
+
203
+ def _should_measure_overhead(profile_normalized: str) -> tuple[bool, bool]:
204
+ """Return (measure_guard_overhead, skip_overhead) derived from env/profile."""
205
+
206
+ skip_overhead_env = (
207
+ os.environ.get("INVARLOCK_SKIP_OVERHEAD_CHECK", "").strip().lower()
208
+ )
209
+ skip_overhead = skip_overhead_env in {"1", "true", "yes"}
210
+ measure_guard_overhead = (
211
+ profile_normalized in {"ci", "release"} and not skip_overhead
212
+ )
213
+ return measure_guard_overhead, skip_overhead
214
+
215
+
84
216
  def _choose_dataset_split(
85
217
  *, requested: str | None, available: list[str] | None
86
218
  ) -> tuple[str, bool]:
@@ -687,6 +819,51 @@ def _resolve_provider_and_split(
687
819
  return data_provider, resolved_split, used_fallback_split
688
820
 
689
821
 
822
+ def _extract_model_load_kwargs(cfg: InvarLockConfig) -> dict[str, Any]:
823
+ """Return adapter.load_model kwargs from config (excluding core fields)."""
824
+ try:
825
+ data = cfg.model_dump()
826
+ except Exception:
827
+ data = {}
828
+ model = data.get("model") if isinstance(data, dict) else None
829
+ if not isinstance(model, dict):
830
+ return {}
831
+ return {
832
+ key: value
833
+ for key, value in model.items()
834
+ if key not in {"id", "adapter", "device"} and value is not None
835
+ }
836
+
837
+
838
+ def _load_model_with_cfg(adapter: Any, cfg: InvarLockConfig, device: str) -> Any:
839
+ """Load a model with config-provided kwargs, filtering for strict adapters."""
840
+ try:
841
+ model_id = cfg.model.id
842
+ except Exception:
843
+ try:
844
+ model_id = (cfg.model_dump().get("model") or {}).get("id")
845
+ except Exception:
846
+ model_id = None
847
+ if not isinstance(model_id, str) or not model_id:
848
+ raise ValueError("Missing model.id in config")
849
+
850
+ extra = _extract_model_load_kwargs(cfg)
851
+ try:
852
+ sig = inspect.signature(adapter.load_model)
853
+ accepts_var_kw = any(
854
+ p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
855
+ )
856
+ if accepts_var_kw:
857
+ return adapter.load_model(model_id, device=device, **extra)
858
+ allowed = {k: v for k, v in extra.items() if k in sig.parameters}
859
+ if allowed:
860
+ return adapter.load_model(model_id, device=device, **allowed)
861
+ except Exception:
862
+ # Fall back to the strictest call shape.
863
+ pass
864
+ return adapter.load_model(model_id, device=device)
865
+
866
+
690
867
  def _run_bare_control(
691
868
  *,
692
869
  adapter: Any,
@@ -768,6 +945,7 @@ def _run_bare_control(
768
945
  "errors": [],
769
946
  "checks": {},
770
947
  "source": f"{profile_normalized or 'ci'}_profile",
948
+ "mode": "bare",
771
949
  }
772
950
 
773
951
  if getattr(bare_report, "status", "").lower() not in {"success", "completed", "ok"}:
@@ -846,7 +1024,7 @@ def _postprocess_and_summarize(
846
1024
  match_fraction: float | None,
847
1025
  overlap_fraction: float | None,
848
1026
  console: Console,
849
- ) -> None:
1027
+ ) -> dict[str, str]:
850
1028
  """Finalize report windows stats and print/save summary artifacts."""
851
1029
  try:
852
1030
  ds = report.setdefault("dataset", {}).setdefault("windows", {})
@@ -870,6 +1048,7 @@ def _postprocess_and_summarize(
870
1048
  console.print(f"📄 Report: {saved_files['json']}")
871
1049
  if run_config.event_path:
872
1050
  console.print(f"📝 Events: {run_config.event_path}")
1051
+ return saved_files
873
1052
 
874
1053
 
875
1054
  def _compute_provider_digest(report: dict[str, Any]) -> dict[str, str] | None:
@@ -1406,6 +1585,7 @@ def run_command(
1406
1585
  no_cleanup = bool(_coerce_option(no_cleanup, False))
1407
1586
 
1408
1587
  # Use shared CLI coercers from invarlock.cli.utils
1588
+ report_path_out: str | None = None
1409
1589
 
1410
1590
  def _fail_run(message: str) -> None:
1411
1591
  console.print(f"[red]❌ {message}[/red]")
@@ -1542,6 +1722,26 @@ def run_command(
1542
1722
  cfg, device=device, out=out, console=console
1543
1723
  )
1544
1724
 
1725
+ determinism_meta: dict[str, Any] | None = None
1726
+ try:
1727
+ from invarlock.cli.determinism import apply_determinism_preset
1728
+
1729
+ preset = apply_determinism_preset(
1730
+ profile=profile_label,
1731
+ device=resolved_device,
1732
+ seed=int(seed_bundle.get("python") or seed_value),
1733
+ threads=int(os.environ.get("INVARLOCK_OMP_THREADS", 1) or 1),
1734
+ )
1735
+ if isinstance(preset, dict) and preset:
1736
+ determinism_meta = preset
1737
+ preset_seeds = preset.get("seeds")
1738
+ if isinstance(preset_seeds, dict) and preset_seeds:
1739
+ for key in ("python", "numpy", "torch"):
1740
+ if key in preset_seeds:
1741
+ seed_bundle[key] = preset_seeds.get(key)
1742
+ except Exception:
1743
+ determinism_meta = None
1744
+
1545
1745
  # Create run directory with timestamp
1546
1746
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1547
1747
  run_dir = output_dir / timestamp
@@ -1671,6 +1871,7 @@ def run_command(
1671
1871
  "edit": edit_meta,
1672
1872
  "guards": guard_metadata,
1673
1873
  }
1874
+ pm_acceptance_range = _resolve_pm_acceptance_range(cfg)
1674
1875
 
1675
1876
  console.print(f"🔌 Adapter: {adapter.name}")
1676
1877
 
@@ -1746,6 +1947,10 @@ def run_command(
1746
1947
  "plugins": plugin_provenance,
1747
1948
  "run_id": run_id,
1748
1949
  }
1950
+ run_context.setdefault("primary_metric", {})["acceptance_range"] = (
1951
+ pm_acceptance_range
1952
+ )
1953
+ run_context["pm_acceptance_range"] = pm_acceptance_range
1749
1954
  run_context["model_profile"] = {
1750
1955
  "family": model_profile.family,
1751
1956
  "default_loss": model_profile.default_loss,
@@ -2756,18 +2961,26 @@ def run_command(
2756
2961
 
2757
2962
  restore_fn = _restore2
2758
2963
  else:
2759
- # reload path
2964
+ # reload path - properly free GPU memory before setting to None
2965
+ _free_model_memory(model)
2760
2966
  model = None
2761
2967
  restore_fn = None
2762
2968
  except Exception:
2763
2969
  # On any failure, fall back to reload-per-attempt path
2970
+ _free_model_memory(model)
2764
2971
  model = None
2765
2972
  restore_fn = None
2766
2973
 
2767
2974
  # RETRY LOOP - All report processing inside loop
2768
2975
  attempt = 1
2769
2976
  profile_normalized = (profile or "").lower()
2770
- measure_guard_overhead = profile_normalized in {"ci", "release"}
2977
+ measure_guard_overhead, skip_overhead = _should_measure_overhead(
2978
+ profile_normalized
2979
+ )
2980
+ if skip_overhead and profile_normalized in {"ci", "release"}:
2981
+ console.print(
2982
+ "[yellow]⚠️ Overhead check skipped via INVARLOCK_SKIP_OVERHEAD_CHECK[/yellow]"
2983
+ )
2771
2984
 
2772
2985
  while True:
2773
2986
  # Reset RNG streams each attempt to guarantee determinism across retries
@@ -2790,7 +3003,23 @@ def run_command(
2790
3003
  )
2791
3004
 
2792
3005
  guard_overhead_payload: dict[str, Any] | None = None
2793
- if measure_guard_overhead:
3006
+ if skip_overhead and profile_normalized in {"ci", "release"}:
3007
+ guard_overhead_payload = {
3008
+ "overhead_threshold": GUARD_OVERHEAD_THRESHOLD,
3009
+ "evaluated": False,
3010
+ "passed": True,
3011
+ "skipped": True,
3012
+ "skip_reason": "INVARLOCK_SKIP_OVERHEAD_CHECK",
3013
+ "mode": "skipped",
3014
+ "source": "env:INVARLOCK_SKIP_OVERHEAD_CHECK",
3015
+ "messages": [
3016
+ "Overhead check skipped via INVARLOCK_SKIP_OVERHEAD_CHECK"
3017
+ ],
3018
+ "warnings": [],
3019
+ "errors": [],
3020
+ "checks": {},
3021
+ }
3022
+ elif measure_guard_overhead:
2794
3023
  guard_overhead_payload = _run_bare_control(
2795
3024
  adapter=adapter,
2796
3025
  edit_op=edit_op,
@@ -2932,7 +3161,11 @@ def run_command(
2932
3161
  meta_payload["invarlock_version"] = invarlock_version
2933
3162
  if env_flags:
2934
3163
  meta_payload["env_flags"] = env_flags
3164
+ if determinism_meta:
3165
+ meta_payload["determinism"] = determinism_meta
2935
3166
  report["meta"].update(meta_payload)
3167
+ if pm_acceptance_range:
3168
+ report["meta"]["pm_acceptance_range"] = pm_acceptance_range
2936
3169
  report["meta"]["model_profile"] = {
2937
3170
  "family": model_profile.family,
2938
3171
  "default_loss": model_profile.default_loss,
@@ -3089,87 +3322,90 @@ def run_command(
3089
3322
  report["metrics"].update(metrics_payload)
3090
3323
 
3091
3324
  if guard_overhead_payload is not None:
3092
- # Compute guarded primary-metric snapshot; pass structured reports into validator
3093
- try:
3094
- # Map loss type to ppl family kind
3095
- lk = str(resolved_loss_type or "causal").lower()
3096
- if lk == "mlm":
3097
- pm_kind_for_overhead = "ppl_mlm"
3098
- elif lk in {"seq2seq", "s2s", "t5"}:
3099
- pm_kind_for_overhead = "ppl_seq2seq"
3100
- else:
3101
- pm_kind_for_overhead = "ppl_causal"
3325
+ if bool(guard_overhead_payload.get("skipped", False)):
3326
+ report["guard_overhead"] = guard_overhead_payload
3327
+ else:
3328
+ # Compute guarded primary-metric snapshot; pass structured reports into validator
3329
+ try:
3330
+ # Map loss type to ppl family kind
3331
+ lk = str(resolved_loss_type or "causal").lower()
3332
+ if lk == "mlm":
3333
+ pm_kind_for_overhead = "ppl_mlm"
3334
+ elif lk in {"seq2seq", "s2s", "t5"}:
3335
+ pm_kind_for_overhead = "ppl_seq2seq"
3336
+ else:
3337
+ pm_kind_for_overhead = "ppl_causal"
3102
3338
 
3103
- # Prefer computing from the in-memory core_report windows to avoid ordering issues
3104
- pm_guarded = _extract_pm_snapshot_for_overhead(
3105
- core_report, kind=pm_kind_for_overhead
3106
- )
3107
- if not isinstance(pm_guarded, dict) or not pm_guarded:
3339
+ # Prefer computing from the in-memory core_report windows to avoid ordering issues
3108
3340
  pm_guarded = _extract_pm_snapshot_for_overhead(
3109
- report, kind=pm_kind_for_overhead
3341
+ core_report, kind=pm_kind_for_overhead
3110
3342
  )
3343
+ if not isinstance(pm_guarded, dict) or not pm_guarded:
3344
+ pm_guarded = _extract_pm_snapshot_for_overhead(
3345
+ report, kind=pm_kind_for_overhead
3346
+ )
3111
3347
 
3112
- guard_overhead_payload["guarded_report"] = (
3113
- {"metrics": {"primary_metric": pm_guarded}}
3114
- if isinstance(pm_guarded, dict) and pm_guarded
3115
- else None
3348
+ guard_overhead_payload["guarded_report"] = (
3349
+ {"metrics": {"primary_metric": pm_guarded}}
3350
+ if isinstance(pm_guarded, dict) and pm_guarded
3351
+ else None
3352
+ )
3353
+ except Exception:
3354
+ guard_overhead_payload["guarded_report"] = None
3355
+ bare_struct = guard_overhead_payload.get("bare_report") or {}
3356
+ guarded_struct = guard_overhead_payload.get("guarded_report") or {}
3357
+ # Be robust to mocks or minimal objects returned by validators
3358
+ result = validate_guard_overhead(
3359
+ bare_struct,
3360
+ guarded_struct,
3361
+ overhead_threshold=guard_overhead_payload.get(
3362
+ "overhead_threshold", GUARD_OVERHEAD_THRESHOLD
3363
+ ),
3116
3364
  )
3117
- except Exception:
3118
- guard_overhead_payload["guarded_report"] = None
3119
- bare_struct = guard_overhead_payload.get("bare_report") or {}
3120
- guarded_struct = guard_overhead_payload.get("guarded_report") or {}
3121
- # Be robust to mocks or minimal objects returned by validators
3122
- result = validate_guard_overhead(
3123
- bare_struct,
3124
- guarded_struct,
3125
- overhead_threshold=guard_overhead_payload.get(
3126
- "overhead_threshold", GUARD_OVERHEAD_THRESHOLD
3127
- ),
3128
- )
3129
- try:
3130
- messages = list(getattr(result, "messages", []))
3131
- except Exception: # pragma: no cover - defensive
3132
- messages = []
3133
- try:
3134
- warnings = list(getattr(result, "warnings", []))
3135
- except Exception: # pragma: no cover - defensive
3136
- warnings = []
3137
- try:
3138
- errors = list(getattr(result, "errors", []))
3139
- except Exception: # pragma: no cover - defensive
3140
- errors = []
3141
- try:
3142
- checks = dict(getattr(result, "checks", {}))
3143
- except Exception: # pragma: no cover - defensive
3144
- checks = {}
3145
- metrics_obj = getattr(result, "metrics", {})
3146
- if not isinstance(metrics_obj, dict):
3147
- metrics_obj = {}
3148
- overhead_ratio = metrics_obj.get("overhead_ratio")
3149
- if overhead_ratio is None:
3150
- overhead_ratio = getattr(result, "overhead_ratio", None)
3151
- overhead_percent = metrics_obj.get("overhead_percent")
3152
- if overhead_percent is None:
3153
- overhead_percent = getattr(result, "overhead_percent", None)
3154
- passed_flag = bool(getattr(result, "passed", False))
3155
-
3156
- guard_overhead_payload.update(
3157
- {
3158
- "messages": messages,
3159
- "warnings": warnings,
3160
- "errors": errors,
3161
- "checks": checks,
3162
- "overhead_ratio": overhead_ratio,
3163
- "overhead_percent": overhead_percent,
3164
- "passed": passed_flag,
3165
- "evaluated": True,
3166
- }
3167
- )
3168
- # Normalize for non-finite/degenerate cases
3169
- guard_overhead_payload = _normalize_overhead_result(
3170
- guard_overhead_payload, profile=profile_normalized
3171
- )
3172
- report["guard_overhead"] = guard_overhead_payload
3365
+ try:
3366
+ messages = list(getattr(result, "messages", []))
3367
+ except Exception: # pragma: no cover - defensive
3368
+ messages = []
3369
+ try:
3370
+ warnings = list(getattr(result, "warnings", []))
3371
+ except Exception: # pragma: no cover - defensive
3372
+ warnings = []
3373
+ try:
3374
+ errors = list(getattr(result, "errors", []))
3375
+ except Exception: # pragma: no cover - defensive
3376
+ errors = []
3377
+ try:
3378
+ checks = dict(getattr(result, "checks", {}))
3379
+ except Exception: # pragma: no cover - defensive
3380
+ checks = {}
3381
+ metrics_obj = getattr(result, "metrics", {})
3382
+ if not isinstance(metrics_obj, dict):
3383
+ metrics_obj = {}
3384
+ overhead_ratio = metrics_obj.get("overhead_ratio")
3385
+ if overhead_ratio is None:
3386
+ overhead_ratio = getattr(result, "overhead_ratio", None)
3387
+ overhead_percent = metrics_obj.get("overhead_percent")
3388
+ if overhead_percent is None:
3389
+ overhead_percent = getattr(result, "overhead_percent", None)
3390
+ passed_flag = bool(getattr(result, "passed", False))
3391
+
3392
+ guard_overhead_payload.update(
3393
+ {
3394
+ "messages": messages,
3395
+ "warnings": warnings,
3396
+ "errors": errors,
3397
+ "checks": checks,
3398
+ "overhead_ratio": overhead_ratio,
3399
+ "overhead_percent": overhead_percent,
3400
+ "passed": passed_flag,
3401
+ "evaluated": True,
3402
+ }
3403
+ )
3404
+ # Normalize for non-finite/degenerate cases
3405
+ guard_overhead_payload = _normalize_overhead_result(
3406
+ guard_overhead_payload, profile=profile_normalized
3407
+ )
3408
+ report["guard_overhead"] = guard_overhead_payload
3173
3409
 
3174
3410
  had_baseline = bool(baseline and Path(baseline).exists())
3175
3411
  if (
@@ -3714,7 +3950,7 @@ def run_command(
3714
3950
  except Exception:
3715
3951
  pass
3716
3952
 
3717
- _postprocess_and_summarize(
3953
+ saved_files = _postprocess_and_summarize(
3718
3954
  report=report,
3719
3955
  run_dir=run_dir,
3720
3956
  run_config=run_config,
@@ -3724,6 +3960,11 @@ def run_command(
3724
3960
  overlap_fraction=overlap_fraction,
3725
3961
  console=console,
3726
3962
  )
3963
+ try:
3964
+ if isinstance(saved_files, dict) and saved_files.get("json"):
3965
+ report_path_out = str(saved_files["json"])
3966
+ except Exception:
3967
+ pass
3727
3968
 
3728
3969
  # Metrics display
3729
3970
  pm_obj = None
@@ -3914,6 +4155,7 @@ def run_command(
3914
4155
  pass
3915
4156
 
3916
4157
  # Normal path falls through; cleanup handled below in finally
4158
+ return report_path_out
3917
4159
 
3918
4160
  except FileNotFoundError as e:
3919
4161
  console.print(f"[red]❌ Configuration file not found: {e}[/red]")