invarlock 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +1 -1
- invarlock/_data/runtime/profiles/ci_cpu.yaml +5 -0
- invarlock/_data/runtime/tiers.yaml +61 -0
- invarlock/adapters/hf_loading.py +97 -0
- invarlock/calibration/__init__.py +6 -0
- invarlock/calibration/spectral_null.py +301 -0
- invarlock/calibration/variance_ve.py +154 -0
- invarlock/cli/app.py +15 -0
- invarlock/cli/commands/calibrate.py +576 -0
- invarlock/cli/commands/doctor.py +16 -4
- invarlock/cli/commands/explain_gates.py +53 -9
- invarlock/cli/commands/plugins.py +12 -2
- invarlock/cli/commands/run.py +323 -81
- invarlock/cli/commands/verify.py +40 -0
- invarlock/cli/determinism.py +237 -0
- invarlock/core/auto_tuning.py +215 -17
- invarlock/core/registry.py +9 -4
- invarlock/eval/bench.py +467 -141
- invarlock/eval/bench_regression.py +12 -0
- invarlock/eval/data.py +29 -7
- invarlock/guards/spectral.py +216 -9
- invarlock/guards/variance.py +6 -3
- invarlock/reporting/certificate.py +403 -51
- invarlock/reporting/certificate_schema.py +4 -1
- invarlock/reporting/guards_analysis.py +108 -10
- invarlock/reporting/normalizer.py +21 -1
- invarlock/reporting/policy_utils.py +100 -16
- {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/METADATA +12 -10
- {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/RECORD +33 -26
- {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/WHEEL +0 -0
- {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/entry_points.txt +0 -0
- {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/top_level.txt +0 -0
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
import typer
|
|
7
7
|
from rich.console import Console
|
|
8
8
|
|
|
9
|
-
from invarlock.core.auto_tuning import
|
|
9
|
+
from invarlock.core.auto_tuning import get_tier_policies
|
|
10
10
|
from invarlock.reporting.certificate import make_certificate
|
|
11
11
|
|
|
12
12
|
console = Console()
|
|
@@ -49,15 +49,38 @@ def explain_gates_command(
|
|
|
49
49
|
"aggressive": 1.20,
|
|
50
50
|
"none": 1.10,
|
|
51
51
|
}
|
|
52
|
-
|
|
52
|
+
resolved_policy = (
|
|
53
|
+
cert.get("resolved_policy", {})
|
|
54
|
+
if isinstance(cert.get("resolved_policy"), dict)
|
|
55
|
+
else {}
|
|
56
|
+
)
|
|
53
57
|
metrics_policy = (
|
|
54
|
-
|
|
58
|
+
resolved_policy.get("metrics", {})
|
|
59
|
+
if isinstance(resolved_policy.get("metrics"), dict)
|
|
60
|
+
else {}
|
|
55
61
|
)
|
|
62
|
+
if not metrics_policy:
|
|
63
|
+
tier_policies = get_tier_policies()
|
|
64
|
+
tier_defaults = tier_policies.get(tier, tier_policies.get("balanced", {}))
|
|
65
|
+
metrics_policy = (
|
|
66
|
+
tier_defaults.get("metrics", {}) if isinstance(tier_defaults, dict) else {}
|
|
67
|
+
)
|
|
68
|
+
if not isinstance(metrics_policy, dict):
|
|
69
|
+
metrics_policy = {}
|
|
56
70
|
pm_policy = (
|
|
57
|
-
metrics_policy.get("pm_ratio", {})
|
|
71
|
+
metrics_policy.get("pm_ratio", {})
|
|
72
|
+
if isinstance(metrics_policy.get("pm_ratio"), dict)
|
|
73
|
+
else {}
|
|
58
74
|
)
|
|
59
75
|
hysteresis_ratio = float(pm_policy.get("hysteresis_ratio", 0.0))
|
|
60
76
|
min_tokens = int(pm_policy.get("min_tokens", 0))
|
|
77
|
+
try:
|
|
78
|
+
limit_base = float(
|
|
79
|
+
pm_policy.get("ratio_limit_base", tier_thresholds.get(tier, 1.10))
|
|
80
|
+
or tier_thresholds.get(tier, 1.10)
|
|
81
|
+
)
|
|
82
|
+
except Exception:
|
|
83
|
+
limit_base = tier_thresholds.get(tier, 1.10)
|
|
61
84
|
limit_with_hyst = limit_base + max(0.0, hysteresis_ratio)
|
|
62
85
|
tokens_ok = True
|
|
63
86
|
telem = cert.get("telemetry", {}) if isinstance(cert.get("telemetry"), dict) else {}
|
|
@@ -70,9 +93,16 @@ def explain_gates_command(
|
|
|
70
93
|
tokens_ok = True
|
|
71
94
|
|
|
72
95
|
# Primary-metric ratio gate explanation (ppl-like kinds shown as ratios)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
96
|
+
ratio = None
|
|
97
|
+
ratio_ci = None
|
|
98
|
+
if isinstance(cert.get("primary_metric"), dict):
|
|
99
|
+
pm = cert.get("primary_metric", {})
|
|
100
|
+
ratio = pm.get("ratio_vs_baseline")
|
|
101
|
+
ratio_ci = pm.get("display_ci")
|
|
102
|
+
elif isinstance(cert.get("ppl"), dict): # legacy
|
|
103
|
+
ppl = cert.get("ppl", {})
|
|
104
|
+
ratio = ppl.get("ratio_vs_baseline")
|
|
105
|
+
ratio_ci = ppl.get("ratio_ci")
|
|
76
106
|
hysteresis_applied = bool(validation.get("hysteresis_applied"))
|
|
77
107
|
status = "PASS" if bool(validation.get("primary_metric_acceptable")) else "FAIL"
|
|
78
108
|
console.print("[bold]Gate: Primary Metric vs Baseline[/bold]")
|
|
@@ -109,8 +139,22 @@ def explain_gates_command(
|
|
|
109
139
|
pass
|
|
110
140
|
|
|
111
141
|
# Drift gate explanation
|
|
112
|
-
drift =
|
|
113
|
-
drift_ci =
|
|
142
|
+
drift = None
|
|
143
|
+
drift_ci = None
|
|
144
|
+
if isinstance(cert.get("primary_metric"), dict):
|
|
145
|
+
pm = cert.get("primary_metric", {})
|
|
146
|
+
preview = pm.get("preview")
|
|
147
|
+
final = pm.get("final")
|
|
148
|
+
if isinstance(preview, int | float) and isinstance(final, int | float):
|
|
149
|
+
try:
|
|
150
|
+
if float(preview) != 0.0:
|
|
151
|
+
drift = float(final) / float(preview)
|
|
152
|
+
except Exception:
|
|
153
|
+
drift = None
|
|
154
|
+
if isinstance(cert.get("ppl"), dict): # legacy
|
|
155
|
+
ppl = cert.get("ppl", {})
|
|
156
|
+
drift = ppl.get("preview_final_ratio", drift)
|
|
157
|
+
drift_ci = ppl.get("drift_ci")
|
|
114
158
|
drift_status = (
|
|
115
159
|
"PASS" if bool(validation.get("preview_final_drift_acceptable")) else "FAIL"
|
|
116
160
|
)
|
|
@@ -897,11 +897,21 @@ def _check_plugin_extras(plugin_name: str, plugin_type: str) -> str:
|
|
|
897
897
|
if not plugin_info or not plugin_info["packages"]:
|
|
898
898
|
return "" # No extra dependencies needed
|
|
899
899
|
|
|
900
|
-
# Check each required package
|
|
900
|
+
# Check each required package. For most packages we use a light import so
|
|
901
|
+
# tests can monkeypatch __import__; for GPU-only stacks like bitsandbytes
|
|
902
|
+
# we only probe presence via importlib.util.find_spec to avoid crashing on
|
|
903
|
+
# CPU-only builds during simple listing.
|
|
901
904
|
missing_packages: list[str] = []
|
|
902
905
|
for pkg in plugin_info["packages"]:
|
|
903
906
|
try:
|
|
904
|
-
|
|
907
|
+
if pkg == "bitsandbytes":
|
|
908
|
+
import importlib.util as _util
|
|
909
|
+
|
|
910
|
+
spec = _util.find_spec(pkg)
|
|
911
|
+
if spec is None:
|
|
912
|
+
raise ImportError("bitsandbytes not importable")
|
|
913
|
+
else:
|
|
914
|
+
__import__(pkg)
|
|
905
915
|
except Exception:
|
|
906
916
|
missing_packages.append(pkg)
|
|
907
917
|
|
invarlock/cli/commands/run.py
CHANGED
|
@@ -9,6 +9,7 @@ prefer Compare & Certify via `invarlock certify --baseline ... --subject ...`.
|
|
|
9
9
|
|
|
10
10
|
import copy
|
|
11
11
|
import hashlib
|
|
12
|
+
import inspect
|
|
12
13
|
import json
|
|
13
14
|
import math
|
|
14
15
|
import os
|
|
@@ -81,6 +82,137 @@ GUARD_OVERHEAD_THRESHOLD = 0.01
|
|
|
81
82
|
SPLIT_ALIASES: tuple[str, ...] = ("validation", "val", "dev", "eval", "test")
|
|
82
83
|
|
|
83
84
|
|
|
85
|
+
def _coerce_mapping(obj: object) -> dict[str, Any]:
|
|
86
|
+
"""Best-effort conversion of config-like objects to plain dicts."""
|
|
87
|
+
|
|
88
|
+
if isinstance(obj, dict):
|
|
89
|
+
return obj
|
|
90
|
+
try:
|
|
91
|
+
raw = getattr(obj, "_data", None)
|
|
92
|
+
if isinstance(raw, dict):
|
|
93
|
+
return raw
|
|
94
|
+
except Exception:
|
|
95
|
+
pass
|
|
96
|
+
try:
|
|
97
|
+
dumped = obj.model_dump() # type: ignore[attr-defined]
|
|
98
|
+
if isinstance(dumped, dict):
|
|
99
|
+
return dumped
|
|
100
|
+
except Exception:
|
|
101
|
+
pass
|
|
102
|
+
try:
|
|
103
|
+
data = vars(obj)
|
|
104
|
+
if isinstance(data, dict):
|
|
105
|
+
return data
|
|
106
|
+
except Exception:
|
|
107
|
+
pass
|
|
108
|
+
return {}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _resolve_pm_acceptance_range(
|
|
112
|
+
cfg: InvarLockConfig | dict[str, Any] | None,
|
|
113
|
+
) -> dict[str, float]:
|
|
114
|
+
"""Resolve primary-metric acceptance bounds from config/env with safe defaults."""
|
|
115
|
+
|
|
116
|
+
base_min = 0.95
|
|
117
|
+
base_max = 1.10
|
|
118
|
+
|
|
119
|
+
cfg_min = None
|
|
120
|
+
cfg_max = None
|
|
121
|
+
try:
|
|
122
|
+
cfg_map = _coerce_mapping(cfg) if cfg is not None else {}
|
|
123
|
+
pm_section = cfg_map.get("primary_metric") if isinstance(cfg_map, dict) else {}
|
|
124
|
+
pm_map = _coerce_mapping(pm_section)
|
|
125
|
+
acceptance = (
|
|
126
|
+
pm_map.get("acceptance_range") if isinstance(pm_map, dict) else None
|
|
127
|
+
)
|
|
128
|
+
if isinstance(acceptance, dict):
|
|
129
|
+
if acceptance.get("min") is not None:
|
|
130
|
+
try:
|
|
131
|
+
cfg_min = float(acceptance["min"])
|
|
132
|
+
except (TypeError, ValueError):
|
|
133
|
+
cfg_min = None
|
|
134
|
+
if acceptance.get("max") is not None:
|
|
135
|
+
try:
|
|
136
|
+
cfg_max = float(acceptance["max"])
|
|
137
|
+
except (TypeError, ValueError):
|
|
138
|
+
cfg_max = None
|
|
139
|
+
except Exception:
|
|
140
|
+
cfg_min = None
|
|
141
|
+
cfg_max = None
|
|
142
|
+
|
|
143
|
+
def _parse_env(name: str) -> float | None:
|
|
144
|
+
try:
|
|
145
|
+
raw = os.environ.get(name, "")
|
|
146
|
+
if raw is None or str(raw).strip() == "":
|
|
147
|
+
return None
|
|
148
|
+
return float(raw)
|
|
149
|
+
except Exception:
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
env_min = _parse_env("INVARLOCK_PM_ACCEPTANCE_MIN")
|
|
153
|
+
env_max = _parse_env("INVARLOCK_PM_ACCEPTANCE_MAX")
|
|
154
|
+
|
|
155
|
+
has_explicit = any(v is not None for v in (cfg_min, cfg_max, env_min, env_max))
|
|
156
|
+
if not has_explicit:
|
|
157
|
+
return {}
|
|
158
|
+
|
|
159
|
+
min_val = (
|
|
160
|
+
env_min if env_min is not None else cfg_min if cfg_min is not None else base_min
|
|
161
|
+
)
|
|
162
|
+
max_val = (
|
|
163
|
+
env_max if env_max is not None else cfg_max if cfg_max is not None else base_max
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
if min_val is not None and min_val <= 0:
|
|
168
|
+
min_val = base_min
|
|
169
|
+
except Exception:
|
|
170
|
+
min_val = base_min
|
|
171
|
+
try:
|
|
172
|
+
if max_val is not None and max_val <= 0:
|
|
173
|
+
max_val = base_max
|
|
174
|
+
except Exception:
|
|
175
|
+
max_val = base_max
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
if max_val is not None and min_val is not None and max_val < min_val:
|
|
179
|
+
max_val = min_val
|
|
180
|
+
except Exception:
|
|
181
|
+
max_val = base_max
|
|
182
|
+
|
|
183
|
+
return {"min": float(min_val), "max": float(max_val)}
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _free_model_memory(model: object | None) -> None:
|
|
187
|
+
"""Best-effort cleanup to release GPU memory for a model object."""
|
|
188
|
+
if model is None:
|
|
189
|
+
return
|
|
190
|
+
try:
|
|
191
|
+
import gc
|
|
192
|
+
|
|
193
|
+
del model
|
|
194
|
+
gc.collect()
|
|
195
|
+
if torch is not None and torch.cuda.is_available():
|
|
196
|
+
torch.cuda.empty_cache()
|
|
197
|
+
torch.cuda.synchronize()
|
|
198
|
+
except Exception:
|
|
199
|
+
# Cleanup should never raise; fallback is to proceed without cache purge
|
|
200
|
+
pass
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _should_measure_overhead(profile_normalized: str) -> tuple[bool, bool]:
|
|
204
|
+
"""Return (measure_guard_overhead, skip_overhead) derived from env/profile."""
|
|
205
|
+
|
|
206
|
+
skip_overhead_env = (
|
|
207
|
+
os.environ.get("INVARLOCK_SKIP_OVERHEAD_CHECK", "").strip().lower()
|
|
208
|
+
)
|
|
209
|
+
skip_overhead = skip_overhead_env in {"1", "true", "yes"}
|
|
210
|
+
measure_guard_overhead = (
|
|
211
|
+
profile_normalized in {"ci", "release"} and not skip_overhead
|
|
212
|
+
)
|
|
213
|
+
return measure_guard_overhead, skip_overhead
|
|
214
|
+
|
|
215
|
+
|
|
84
216
|
def _choose_dataset_split(
|
|
85
217
|
*, requested: str | None, available: list[str] | None
|
|
86
218
|
) -> tuple[str, bool]:
|
|
@@ -687,6 +819,51 @@ def _resolve_provider_and_split(
|
|
|
687
819
|
return data_provider, resolved_split, used_fallback_split
|
|
688
820
|
|
|
689
821
|
|
|
822
|
+
def _extract_model_load_kwargs(cfg: InvarLockConfig) -> dict[str, Any]:
|
|
823
|
+
"""Return adapter.load_model kwargs from config (excluding core fields)."""
|
|
824
|
+
try:
|
|
825
|
+
data = cfg.model_dump()
|
|
826
|
+
except Exception:
|
|
827
|
+
data = {}
|
|
828
|
+
model = data.get("model") if isinstance(data, dict) else None
|
|
829
|
+
if not isinstance(model, dict):
|
|
830
|
+
return {}
|
|
831
|
+
return {
|
|
832
|
+
key: value
|
|
833
|
+
for key, value in model.items()
|
|
834
|
+
if key not in {"id", "adapter", "device"} and value is not None
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
|
|
838
|
+
def _load_model_with_cfg(adapter: Any, cfg: InvarLockConfig, device: str) -> Any:
|
|
839
|
+
"""Load a model with config-provided kwargs, filtering for strict adapters."""
|
|
840
|
+
try:
|
|
841
|
+
model_id = cfg.model.id
|
|
842
|
+
except Exception:
|
|
843
|
+
try:
|
|
844
|
+
model_id = (cfg.model_dump().get("model") or {}).get("id")
|
|
845
|
+
except Exception:
|
|
846
|
+
model_id = None
|
|
847
|
+
if not isinstance(model_id, str) or not model_id:
|
|
848
|
+
raise ValueError("Missing model.id in config")
|
|
849
|
+
|
|
850
|
+
extra = _extract_model_load_kwargs(cfg)
|
|
851
|
+
try:
|
|
852
|
+
sig = inspect.signature(adapter.load_model)
|
|
853
|
+
accepts_var_kw = any(
|
|
854
|
+
p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
|
|
855
|
+
)
|
|
856
|
+
if accepts_var_kw:
|
|
857
|
+
return adapter.load_model(model_id, device=device, **extra)
|
|
858
|
+
allowed = {k: v for k, v in extra.items() if k in sig.parameters}
|
|
859
|
+
if allowed:
|
|
860
|
+
return adapter.load_model(model_id, device=device, **allowed)
|
|
861
|
+
except Exception:
|
|
862
|
+
# Fall back to the strictest call shape.
|
|
863
|
+
pass
|
|
864
|
+
return adapter.load_model(model_id, device=device)
|
|
865
|
+
|
|
866
|
+
|
|
690
867
|
def _run_bare_control(
|
|
691
868
|
*,
|
|
692
869
|
adapter: Any,
|
|
@@ -768,6 +945,7 @@ def _run_bare_control(
|
|
|
768
945
|
"errors": [],
|
|
769
946
|
"checks": {},
|
|
770
947
|
"source": f"{profile_normalized or 'ci'}_profile",
|
|
948
|
+
"mode": "bare",
|
|
771
949
|
}
|
|
772
950
|
|
|
773
951
|
if getattr(bare_report, "status", "").lower() not in {"success", "completed", "ok"}:
|
|
@@ -846,7 +1024,7 @@ def _postprocess_and_summarize(
|
|
|
846
1024
|
match_fraction: float | None,
|
|
847
1025
|
overlap_fraction: float | None,
|
|
848
1026
|
console: Console,
|
|
849
|
-
) ->
|
|
1027
|
+
) -> dict[str, str]:
|
|
850
1028
|
"""Finalize report windows stats and print/save summary artifacts."""
|
|
851
1029
|
try:
|
|
852
1030
|
ds = report.setdefault("dataset", {}).setdefault("windows", {})
|
|
@@ -870,6 +1048,7 @@ def _postprocess_and_summarize(
|
|
|
870
1048
|
console.print(f"📄 Report: {saved_files['json']}")
|
|
871
1049
|
if run_config.event_path:
|
|
872
1050
|
console.print(f"📝 Events: {run_config.event_path}")
|
|
1051
|
+
return saved_files
|
|
873
1052
|
|
|
874
1053
|
|
|
875
1054
|
def _compute_provider_digest(report: dict[str, Any]) -> dict[str, str] | None:
|
|
@@ -1406,6 +1585,7 @@ def run_command(
|
|
|
1406
1585
|
no_cleanup = bool(_coerce_option(no_cleanup, False))
|
|
1407
1586
|
|
|
1408
1587
|
# Use shared CLI coercers from invarlock.cli.utils
|
|
1588
|
+
report_path_out: str | None = None
|
|
1409
1589
|
|
|
1410
1590
|
def _fail_run(message: str) -> None:
|
|
1411
1591
|
console.print(f"[red]❌ {message}[/red]")
|
|
@@ -1542,6 +1722,26 @@ def run_command(
|
|
|
1542
1722
|
cfg, device=device, out=out, console=console
|
|
1543
1723
|
)
|
|
1544
1724
|
|
|
1725
|
+
determinism_meta: dict[str, Any] | None = None
|
|
1726
|
+
try:
|
|
1727
|
+
from invarlock.cli.determinism import apply_determinism_preset
|
|
1728
|
+
|
|
1729
|
+
preset = apply_determinism_preset(
|
|
1730
|
+
profile=profile_label,
|
|
1731
|
+
device=resolved_device,
|
|
1732
|
+
seed=int(seed_bundle.get("python") or seed_value),
|
|
1733
|
+
threads=int(os.environ.get("INVARLOCK_OMP_THREADS", 1) or 1),
|
|
1734
|
+
)
|
|
1735
|
+
if isinstance(preset, dict) and preset:
|
|
1736
|
+
determinism_meta = preset
|
|
1737
|
+
preset_seeds = preset.get("seeds")
|
|
1738
|
+
if isinstance(preset_seeds, dict) and preset_seeds:
|
|
1739
|
+
for key in ("python", "numpy", "torch"):
|
|
1740
|
+
if key in preset_seeds:
|
|
1741
|
+
seed_bundle[key] = preset_seeds.get(key)
|
|
1742
|
+
except Exception:
|
|
1743
|
+
determinism_meta = None
|
|
1744
|
+
|
|
1545
1745
|
# Create run directory with timestamp
|
|
1546
1746
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
1547
1747
|
run_dir = output_dir / timestamp
|
|
@@ -1671,6 +1871,7 @@ def run_command(
|
|
|
1671
1871
|
"edit": edit_meta,
|
|
1672
1872
|
"guards": guard_metadata,
|
|
1673
1873
|
}
|
|
1874
|
+
pm_acceptance_range = _resolve_pm_acceptance_range(cfg)
|
|
1674
1875
|
|
|
1675
1876
|
console.print(f"🔌 Adapter: {adapter.name}")
|
|
1676
1877
|
|
|
@@ -1746,6 +1947,10 @@ def run_command(
|
|
|
1746
1947
|
"plugins": plugin_provenance,
|
|
1747
1948
|
"run_id": run_id,
|
|
1748
1949
|
}
|
|
1950
|
+
run_context.setdefault("primary_metric", {})["acceptance_range"] = (
|
|
1951
|
+
pm_acceptance_range
|
|
1952
|
+
)
|
|
1953
|
+
run_context["pm_acceptance_range"] = pm_acceptance_range
|
|
1749
1954
|
run_context["model_profile"] = {
|
|
1750
1955
|
"family": model_profile.family,
|
|
1751
1956
|
"default_loss": model_profile.default_loss,
|
|
@@ -2756,18 +2961,26 @@ def run_command(
|
|
|
2756
2961
|
|
|
2757
2962
|
restore_fn = _restore2
|
|
2758
2963
|
else:
|
|
2759
|
-
# reload path
|
|
2964
|
+
# reload path - properly free GPU memory before setting to None
|
|
2965
|
+
_free_model_memory(model)
|
|
2760
2966
|
model = None
|
|
2761
2967
|
restore_fn = None
|
|
2762
2968
|
except Exception:
|
|
2763
2969
|
# On any failure, fall back to reload-per-attempt path
|
|
2970
|
+
_free_model_memory(model)
|
|
2764
2971
|
model = None
|
|
2765
2972
|
restore_fn = None
|
|
2766
2973
|
|
|
2767
2974
|
# RETRY LOOP - All report processing inside loop
|
|
2768
2975
|
attempt = 1
|
|
2769
2976
|
profile_normalized = (profile or "").lower()
|
|
2770
|
-
measure_guard_overhead =
|
|
2977
|
+
measure_guard_overhead, skip_overhead = _should_measure_overhead(
|
|
2978
|
+
profile_normalized
|
|
2979
|
+
)
|
|
2980
|
+
if skip_overhead and profile_normalized in {"ci", "release"}:
|
|
2981
|
+
console.print(
|
|
2982
|
+
"[yellow]⚠️ Overhead check skipped via INVARLOCK_SKIP_OVERHEAD_CHECK[/yellow]"
|
|
2983
|
+
)
|
|
2771
2984
|
|
|
2772
2985
|
while True:
|
|
2773
2986
|
# Reset RNG streams each attempt to guarantee determinism across retries
|
|
@@ -2790,7 +3003,23 @@ def run_command(
|
|
|
2790
3003
|
)
|
|
2791
3004
|
|
|
2792
3005
|
guard_overhead_payload: dict[str, Any] | None = None
|
|
2793
|
-
if
|
|
3006
|
+
if skip_overhead and profile_normalized in {"ci", "release"}:
|
|
3007
|
+
guard_overhead_payload = {
|
|
3008
|
+
"overhead_threshold": GUARD_OVERHEAD_THRESHOLD,
|
|
3009
|
+
"evaluated": False,
|
|
3010
|
+
"passed": True,
|
|
3011
|
+
"skipped": True,
|
|
3012
|
+
"skip_reason": "INVARLOCK_SKIP_OVERHEAD_CHECK",
|
|
3013
|
+
"mode": "skipped",
|
|
3014
|
+
"source": "env:INVARLOCK_SKIP_OVERHEAD_CHECK",
|
|
3015
|
+
"messages": [
|
|
3016
|
+
"Overhead check skipped via INVARLOCK_SKIP_OVERHEAD_CHECK"
|
|
3017
|
+
],
|
|
3018
|
+
"warnings": [],
|
|
3019
|
+
"errors": [],
|
|
3020
|
+
"checks": {},
|
|
3021
|
+
}
|
|
3022
|
+
elif measure_guard_overhead:
|
|
2794
3023
|
guard_overhead_payload = _run_bare_control(
|
|
2795
3024
|
adapter=adapter,
|
|
2796
3025
|
edit_op=edit_op,
|
|
@@ -2932,7 +3161,11 @@ def run_command(
|
|
|
2932
3161
|
meta_payload["invarlock_version"] = invarlock_version
|
|
2933
3162
|
if env_flags:
|
|
2934
3163
|
meta_payload["env_flags"] = env_flags
|
|
3164
|
+
if determinism_meta:
|
|
3165
|
+
meta_payload["determinism"] = determinism_meta
|
|
2935
3166
|
report["meta"].update(meta_payload)
|
|
3167
|
+
if pm_acceptance_range:
|
|
3168
|
+
report["meta"]["pm_acceptance_range"] = pm_acceptance_range
|
|
2936
3169
|
report["meta"]["model_profile"] = {
|
|
2937
3170
|
"family": model_profile.family,
|
|
2938
3171
|
"default_loss": model_profile.default_loss,
|
|
@@ -3089,87 +3322,90 @@ def run_command(
|
|
|
3089
3322
|
report["metrics"].update(metrics_payload)
|
|
3090
3323
|
|
|
3091
3324
|
if guard_overhead_payload is not None:
|
|
3092
|
-
|
|
3093
|
-
|
|
3094
|
-
|
|
3095
|
-
|
|
3096
|
-
|
|
3097
|
-
|
|
3098
|
-
|
|
3099
|
-
|
|
3100
|
-
|
|
3101
|
-
|
|
3325
|
+
if bool(guard_overhead_payload.get("skipped", False)):
|
|
3326
|
+
report["guard_overhead"] = guard_overhead_payload
|
|
3327
|
+
else:
|
|
3328
|
+
# Compute guarded primary-metric snapshot; pass structured reports into validator
|
|
3329
|
+
try:
|
|
3330
|
+
# Map loss type to ppl family kind
|
|
3331
|
+
lk = str(resolved_loss_type or "causal").lower()
|
|
3332
|
+
if lk == "mlm":
|
|
3333
|
+
pm_kind_for_overhead = "ppl_mlm"
|
|
3334
|
+
elif lk in {"seq2seq", "s2s", "t5"}:
|
|
3335
|
+
pm_kind_for_overhead = "ppl_seq2seq"
|
|
3336
|
+
else:
|
|
3337
|
+
pm_kind_for_overhead = "ppl_causal"
|
|
3102
3338
|
|
|
3103
|
-
|
|
3104
|
-
pm_guarded = _extract_pm_snapshot_for_overhead(
|
|
3105
|
-
core_report, kind=pm_kind_for_overhead
|
|
3106
|
-
)
|
|
3107
|
-
if not isinstance(pm_guarded, dict) or not pm_guarded:
|
|
3339
|
+
# Prefer computing from the in-memory core_report windows to avoid ordering issues
|
|
3108
3340
|
pm_guarded = _extract_pm_snapshot_for_overhead(
|
|
3109
|
-
|
|
3341
|
+
core_report, kind=pm_kind_for_overhead
|
|
3110
3342
|
)
|
|
3343
|
+
if not isinstance(pm_guarded, dict) or not pm_guarded:
|
|
3344
|
+
pm_guarded = _extract_pm_snapshot_for_overhead(
|
|
3345
|
+
report, kind=pm_kind_for_overhead
|
|
3346
|
+
)
|
|
3111
3347
|
|
|
3112
|
-
|
|
3113
|
-
|
|
3114
|
-
|
|
3115
|
-
|
|
3348
|
+
guard_overhead_payload["guarded_report"] = (
|
|
3349
|
+
{"metrics": {"primary_metric": pm_guarded}}
|
|
3350
|
+
if isinstance(pm_guarded, dict) and pm_guarded
|
|
3351
|
+
else None
|
|
3352
|
+
)
|
|
3353
|
+
except Exception:
|
|
3354
|
+
guard_overhead_payload["guarded_report"] = None
|
|
3355
|
+
bare_struct = guard_overhead_payload.get("bare_report") or {}
|
|
3356
|
+
guarded_struct = guard_overhead_payload.get("guarded_report") or {}
|
|
3357
|
+
# Be robust to mocks or minimal objects returned by validators
|
|
3358
|
+
result = validate_guard_overhead(
|
|
3359
|
+
bare_struct,
|
|
3360
|
+
guarded_struct,
|
|
3361
|
+
overhead_threshold=guard_overhead_payload.get(
|
|
3362
|
+
"overhead_threshold", GUARD_OVERHEAD_THRESHOLD
|
|
3363
|
+
),
|
|
3116
3364
|
)
|
|
3117
|
-
|
|
3118
|
-
|
|
3119
|
-
|
|
3120
|
-
|
|
3121
|
-
|
|
3122
|
-
|
|
3123
|
-
|
|
3124
|
-
|
|
3125
|
-
|
|
3126
|
-
"
|
|
3127
|
-
|
|
3128
|
-
|
|
3129
|
-
|
|
3130
|
-
|
|
3131
|
-
|
|
3132
|
-
|
|
3133
|
-
|
|
3134
|
-
|
|
3135
|
-
|
|
3136
|
-
|
|
3137
|
-
|
|
3138
|
-
|
|
3139
|
-
|
|
3140
|
-
|
|
3141
|
-
|
|
3142
|
-
|
|
3143
|
-
|
|
3144
|
-
|
|
3145
|
-
|
|
3146
|
-
|
|
3147
|
-
|
|
3148
|
-
|
|
3149
|
-
|
|
3150
|
-
|
|
3151
|
-
|
|
3152
|
-
|
|
3153
|
-
|
|
3154
|
-
|
|
3155
|
-
|
|
3156
|
-
|
|
3157
|
-
|
|
3158
|
-
|
|
3159
|
-
|
|
3160
|
-
|
|
3161
|
-
"checks": checks,
|
|
3162
|
-
"overhead_ratio": overhead_ratio,
|
|
3163
|
-
"overhead_percent": overhead_percent,
|
|
3164
|
-
"passed": passed_flag,
|
|
3165
|
-
"evaluated": True,
|
|
3166
|
-
}
|
|
3167
|
-
)
|
|
3168
|
-
# Normalize for non-finite/degenerate cases
|
|
3169
|
-
guard_overhead_payload = _normalize_overhead_result(
|
|
3170
|
-
guard_overhead_payload, profile=profile_normalized
|
|
3171
|
-
)
|
|
3172
|
-
report["guard_overhead"] = guard_overhead_payload
|
|
3365
|
+
try:
|
|
3366
|
+
messages = list(getattr(result, "messages", []))
|
|
3367
|
+
except Exception: # pragma: no cover - defensive
|
|
3368
|
+
messages = []
|
|
3369
|
+
try:
|
|
3370
|
+
warnings = list(getattr(result, "warnings", []))
|
|
3371
|
+
except Exception: # pragma: no cover - defensive
|
|
3372
|
+
warnings = []
|
|
3373
|
+
try:
|
|
3374
|
+
errors = list(getattr(result, "errors", []))
|
|
3375
|
+
except Exception: # pragma: no cover - defensive
|
|
3376
|
+
errors = []
|
|
3377
|
+
try:
|
|
3378
|
+
checks = dict(getattr(result, "checks", {}))
|
|
3379
|
+
except Exception: # pragma: no cover - defensive
|
|
3380
|
+
checks = {}
|
|
3381
|
+
metrics_obj = getattr(result, "metrics", {})
|
|
3382
|
+
if not isinstance(metrics_obj, dict):
|
|
3383
|
+
metrics_obj = {}
|
|
3384
|
+
overhead_ratio = metrics_obj.get("overhead_ratio")
|
|
3385
|
+
if overhead_ratio is None:
|
|
3386
|
+
overhead_ratio = getattr(result, "overhead_ratio", None)
|
|
3387
|
+
overhead_percent = metrics_obj.get("overhead_percent")
|
|
3388
|
+
if overhead_percent is None:
|
|
3389
|
+
overhead_percent = getattr(result, "overhead_percent", None)
|
|
3390
|
+
passed_flag = bool(getattr(result, "passed", False))
|
|
3391
|
+
|
|
3392
|
+
guard_overhead_payload.update(
|
|
3393
|
+
{
|
|
3394
|
+
"messages": messages,
|
|
3395
|
+
"warnings": warnings,
|
|
3396
|
+
"errors": errors,
|
|
3397
|
+
"checks": checks,
|
|
3398
|
+
"overhead_ratio": overhead_ratio,
|
|
3399
|
+
"overhead_percent": overhead_percent,
|
|
3400
|
+
"passed": passed_flag,
|
|
3401
|
+
"evaluated": True,
|
|
3402
|
+
}
|
|
3403
|
+
)
|
|
3404
|
+
# Normalize for non-finite/degenerate cases
|
|
3405
|
+
guard_overhead_payload = _normalize_overhead_result(
|
|
3406
|
+
guard_overhead_payload, profile=profile_normalized
|
|
3407
|
+
)
|
|
3408
|
+
report["guard_overhead"] = guard_overhead_payload
|
|
3173
3409
|
|
|
3174
3410
|
had_baseline = bool(baseline and Path(baseline).exists())
|
|
3175
3411
|
if (
|
|
@@ -3714,7 +3950,7 @@ def run_command(
|
|
|
3714
3950
|
except Exception:
|
|
3715
3951
|
pass
|
|
3716
3952
|
|
|
3717
|
-
_postprocess_and_summarize(
|
|
3953
|
+
saved_files = _postprocess_and_summarize(
|
|
3718
3954
|
report=report,
|
|
3719
3955
|
run_dir=run_dir,
|
|
3720
3956
|
run_config=run_config,
|
|
@@ -3724,6 +3960,11 @@ def run_command(
|
|
|
3724
3960
|
overlap_fraction=overlap_fraction,
|
|
3725
3961
|
console=console,
|
|
3726
3962
|
)
|
|
3963
|
+
try:
|
|
3964
|
+
if isinstance(saved_files, dict) and saved_files.get("json"):
|
|
3965
|
+
report_path_out = str(saved_files["json"])
|
|
3966
|
+
except Exception:
|
|
3967
|
+
pass
|
|
3727
3968
|
|
|
3728
3969
|
# Metrics display
|
|
3729
3970
|
pm_obj = None
|
|
@@ -3914,6 +4155,7 @@ def run_command(
|
|
|
3914
4155
|
pass
|
|
3915
4156
|
|
|
3916
4157
|
# Normal path falls through; cleanup handled below in finally
|
|
4158
|
+
return report_path_out
|
|
3917
4159
|
|
|
3918
4160
|
except FileNotFoundError as e:
|
|
3919
4161
|
console.print(f"[red]❌ Configuration file not found: {e}[/red]")
|