invarlock 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. invarlock/__init__.py +1 -1
  2. invarlock/_data/runtime/profiles/ci_cpu.yaml +5 -0
  3. invarlock/_data/runtime/tiers.yaml +61 -0
  4. invarlock/adapters/hf_loading.py +97 -0
  5. invarlock/calibration/__init__.py +6 -0
  6. invarlock/calibration/spectral_null.py +301 -0
  7. invarlock/calibration/variance_ve.py +154 -0
  8. invarlock/cli/app.py +15 -0
  9. invarlock/cli/commands/calibrate.py +576 -0
  10. invarlock/cli/commands/doctor.py +16 -4
  11. invarlock/cli/commands/explain_gates.py +53 -9
  12. invarlock/cli/commands/plugins.py +12 -2
  13. invarlock/cli/commands/run.py +323 -81
  14. invarlock/cli/commands/verify.py +40 -0
  15. invarlock/cli/determinism.py +237 -0
  16. invarlock/core/auto_tuning.py +215 -17
  17. invarlock/core/registry.py +9 -4
  18. invarlock/eval/bench.py +467 -141
  19. invarlock/eval/bench_regression.py +12 -0
  20. invarlock/eval/data.py +29 -7
  21. invarlock/guards/spectral.py +216 -9
  22. invarlock/guards/variance.py +6 -3
  23. invarlock/reporting/certificate.py +403 -51
  24. invarlock/reporting/certificate_schema.py +4 -1
  25. invarlock/reporting/guards_analysis.py +108 -10
  26. invarlock/reporting/normalizer.py +21 -1
  27. invarlock/reporting/policy_utils.py +100 -16
  28. {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/METADATA +12 -10
  29. {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/RECORD +33 -26
  30. {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/WHEEL +0 -0
  31. {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/entry_points.txt +0 -0
  32. {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/licenses/LICENSE +0 -0
  33. {invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/top_level.txt +0 -0
invarlock/__init__.py CHANGED
@@ -12,7 +12,7 @@ For torch-dependent functionality, see subpackages under `invarlock.*`:
12
12
  - `invarlock.eval`: Metrics, guard-overhead checks, and certification
13
13
  """
14
14
 
15
- __version__ = "0.3.0"
15
+ __version__ = "0.3.2"
16
16
 
17
17
  # Core exports - torch-independent
18
18
  from .config import CFG, Defaults, get_default_config
@@ -11,5 +11,10 @@ dataset:
11
11
  final_n: 120
12
12
  stride: 512
13
13
 
14
+ primary_metric:
15
+ acceptance_range:
16
+ min: 0.95
17
+ max: 1.15
18
+
14
19
  context:
15
20
  telemetry_profile: "ci_cpu"
@@ -5,6 +5,17 @@
5
5
  # embedded in certificates and referenced by automation documentation.
6
6
 
7
7
  balanced:
8
+ metrics:
9
+ pm_ratio:
10
+ ratio_limit_base: 1.10
11
+ min_tokens: 50000
12
+ hysteresis_ratio: 0.002
13
+ min_token_fraction: 0.01
14
+ accuracy:
15
+ delta_min_pp: -1.0
16
+ min_examples: 200
17
+ hysteresis_delta_pp: 0.1
18
+ min_examples_fraction: 0.01
8
19
  variance_guard:
9
20
  deadband: 0.02
10
21
  min_abs_adjust: 0.012
@@ -41,6 +52,17 @@ balanced:
41
52
  other: 0.12
42
53
 
43
54
  conservative:
55
+ metrics:
56
+ pm_ratio:
57
+ ratio_limit_base: 1.05
58
+ min_tokens: 20000
59
+ hysteresis_ratio: 0.002
60
+ min_token_fraction: 0.01
61
+ accuracy:
62
+ delta_min_pp: -0.5
63
+ min_examples: 200
64
+ hysteresis_delta_pp: 0.1
65
+ min_examples_fraction: 0.01
44
66
  variance_guard:
45
67
  deadband: 0.03
46
68
  min_abs_adjust: 0.02
@@ -74,3 +96,42 @@ conservative:
74
96
  attn: 0.05
75
97
  embed: 0.07
76
98
  other: 0.07
99
+
100
+ aggressive:
101
+ metrics:
102
+ pm_ratio:
103
+ ratio_limit_base: 1.20
104
+ min_tokens: 50000
105
+ hysteresis_ratio: 0.002
106
+ min_token_fraction: 0.01
107
+ accuracy:
108
+ delta_min_pp: -2.0
109
+ min_examples: 200
110
+ hysteresis_delta_pp: 0.1
111
+ min_examples_fraction: 0.01
112
+ variance_guard:
113
+ deadband: 0.12
114
+ min_effect_lognll: 0.0005
115
+ spectral_guard:
116
+ sigma_quantile: 0.98
117
+ deadband: 0.15
118
+ scope: ffn
119
+ max_caps: 8
120
+ family_caps:
121
+ ffn: 3.0
122
+ attn: 3.5
123
+ embed: 2.5
124
+ other: 3.5
125
+ multiple_testing:
126
+ method: bh
127
+ alpha: 0.1
128
+ m: 4
129
+ rmt_guard:
130
+ deadband: 0.15
131
+ margin: 1.8
132
+ epsilon_default: 0.15
133
+ epsilon_by_family:
134
+ ffn: 0.15
135
+ attn: 0.15
136
+ embed: 0.15
137
+ other: 0.15
@@ -0,0 +1,97 @@
1
+ """Helpers for Hugging Face model loading.
2
+
3
+ Centralizes security- and performance-sensitive defaults used by HF adapters.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import os
9
+ from typing import Any
10
+
11
+ import torch
12
+
13
+ _TRUE = {"1", "true", "yes", "on"}
14
+ _FALSE = {"0", "false", "no", "off"}
15
+
16
+
17
+ def _coerce_bool(val: Any) -> bool | None:
18
+ if isinstance(val, bool):
19
+ return val
20
+ if isinstance(val, int):
21
+ return bool(val)
22
+ if isinstance(val, str):
23
+ s = val.strip().lower()
24
+ if s in _TRUE:
25
+ return True
26
+ if s in _FALSE:
27
+ return False
28
+ return None
29
+
30
+
31
+ def resolve_trust_remote_code(
32
+ kwargs: dict[str, Any] | None = None, *, default: bool = False
33
+ ) -> bool:
34
+ """Resolve trust_remote_code with config override and env opt-in."""
35
+ if kwargs and "trust_remote_code" in kwargs:
36
+ coerced = _coerce_bool(kwargs.get("trust_remote_code"))
37
+ if coerced is not None:
38
+ return coerced
39
+
40
+ for env_name in (
41
+ "INVARLOCK_TRUST_REMOTE_CODE",
42
+ "TRUST_REMOTE_CODE_BOOL",
43
+ "ALLOW_REMOTE_CODE",
44
+ ):
45
+ env_val = os.environ.get(env_name)
46
+ coerced = _coerce_bool(env_val)
47
+ if coerced is not None:
48
+ return coerced
49
+
50
+ return default
51
+
52
+
53
+ def default_torch_dtype() -> torch.dtype:
54
+ """Pick a safe default dtype for HF loads based on hardware."""
55
+ if torch.cuda.is_available():
56
+ try:
57
+ if (
58
+ hasattr(torch.cuda, "is_bf16_supported")
59
+ and torch.cuda.is_bf16_supported()
60
+ ):
61
+ return torch.bfloat16
62
+ except Exception:
63
+ pass
64
+ return torch.float16
65
+
66
+ if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
67
+ return torch.float16
68
+
69
+ return torch.float32
70
+
71
+
72
+ def resolve_torch_dtype(kwargs: dict[str, Any] | None = None) -> torch.dtype | str:
73
+ """Resolve torch_dtype from kwargs or choose a hardware-aware default."""
74
+ if kwargs and "torch_dtype" in kwargs:
75
+ val = kwargs.get("torch_dtype")
76
+ if isinstance(val, torch.dtype):
77
+ return val
78
+ if isinstance(val, str):
79
+ s = val.strip().lower()
80
+ if s == "auto":
81
+ return "auto"
82
+ mapping = {
83
+ "float16": torch.float16,
84
+ "fp16": torch.float16,
85
+ "half": torch.float16,
86
+ "bfloat16": torch.bfloat16,
87
+ "bf16": torch.bfloat16,
88
+ "float32": torch.float32,
89
+ "fp32": torch.float32,
90
+ }
91
+ if s in mapping:
92
+ return mapping[s]
93
+
94
+ return default_torch_dtype()
95
+
96
+
97
+ __all__ = ["resolve_trust_remote_code", "default_torch_dtype", "resolve_torch_dtype"]
@@ -0,0 +1,6 @@
1
+ """Calibration helpers and sweep harness utilities."""
2
+
3
+ __all__ = [
4
+ "spectral_null",
5
+ "variance_ve",
6
+ ]
@@ -0,0 +1,301 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ from collections import Counter, defaultdict
5
+ from typing import Any
6
+
7
+
8
+ def _finite01(value: Any) -> bool:
9
+ try:
10
+ f = float(value)
11
+ return math.isfinite(f) and 0.0 <= f <= 1.0
12
+ except Exception:
13
+ return False
14
+
15
+
16
+ def _bh_reject_families(
17
+ family_pvals: dict[str, float],
18
+ *,
19
+ alpha: float,
20
+ m: int,
21
+ ) -> set[str]:
22
+ if not family_pvals:
23
+ return set()
24
+ try:
25
+ alpha_f = float(alpha)
26
+ except Exception:
27
+ return set()
28
+ if not (0.0 < alpha_f <= 1.0):
29
+ return set()
30
+
31
+ names = list(family_pvals.keys())
32
+ pvals = [family_pvals[name] for name in names]
33
+ n = len(pvals)
34
+ m_eff = max(int(m) if isinstance(m, int) else 0, n, 1)
35
+
36
+ order = sorted(
37
+ range(n),
38
+ key=lambda idx: (float("inf") if not _finite01(pvals[idx]) else pvals[idx]),
39
+ )
40
+ max_k = 0
41
+ for rank, idx in enumerate(order, start=1):
42
+ p = pvals[idx]
43
+ if not _finite01(p):
44
+ continue
45
+ if p <= (alpha_f * rank) / m_eff:
46
+ max_k = rank
47
+ if max_k <= 0:
48
+ return set()
49
+ cutoff = (alpha_f * max_k) / m_eff
50
+ selected: set[str] = set()
51
+ for idx in order:
52
+ p = pvals[idx]
53
+ if _finite01(p) and p <= cutoff:
54
+ selected.add(names[idx])
55
+ return selected
56
+
57
+
58
+ def _bonferroni_reject_families(
59
+ family_pvals: dict[str, float],
60
+ *,
61
+ alpha: float,
62
+ m: int,
63
+ ) -> set[str]:
64
+ if not family_pvals:
65
+ return set()
66
+ try:
67
+ alpha_f = float(alpha)
68
+ except Exception:
69
+ return set()
70
+ if not (0.0 < alpha_f <= 1.0):
71
+ return set()
72
+ m_eff = max(int(m) if isinstance(m, int) else 0, len(family_pvals), 1)
73
+ cutoff = alpha_f / m_eff
74
+ return {fam for fam, p in family_pvals.items() if _finite01(p) and p <= cutoff}
75
+
76
+
77
+ def _extract_guard(report: dict[str, Any], name: str) -> dict[str, Any] | None:
78
+ guards = report.get("guards")
79
+ if isinstance(guards, list):
80
+ for item in guards:
81
+ if isinstance(item, dict) and item.get("name") == name:
82
+ return item
83
+ return None
84
+
85
+
86
+ def _extract_family_max_z(metrics: dict[str, Any]) -> dict[str, float]:
87
+ out: dict[str, float] = {}
88
+ summary = metrics.get("family_z_summary")
89
+ if isinstance(summary, dict):
90
+ for fam, vals in summary.items():
91
+ if not isinstance(vals, dict):
92
+ continue
93
+ z = vals.get("max")
94
+ try:
95
+ if z is not None and math.isfinite(float(z)):
96
+ out[str(fam)] = float(z)
97
+ except Exception:
98
+ continue
99
+ q = metrics.get("family_z_quantiles")
100
+ if isinstance(q, dict):
101
+ for fam, vals in q.items():
102
+ if not isinstance(vals, dict):
103
+ continue
104
+ z = vals.get("max")
105
+ try:
106
+ if z is not None and math.isfinite(float(z)):
107
+ out[str(fam)] = max(out.get(str(fam), float("-inf")), float(z))
108
+ except Exception:
109
+ continue
110
+ return out
111
+
112
+
113
+ def _extract_multiple_testing(metrics: dict[str, Any]) -> dict[str, Any]:
114
+ mt = metrics.get("multiple_testing")
115
+ if not isinstance(mt, dict):
116
+ return {}
117
+ out: dict[str, Any] = {}
118
+ method = mt.get("method")
119
+ if isinstance(method, str) and method.strip():
120
+ out["method"] = method.strip().lower()
121
+ try:
122
+ alpha = mt.get("alpha")
123
+ if alpha is not None:
124
+ out["alpha"] = float(alpha)
125
+ except Exception:
126
+ pass
127
+ try:
128
+ m_val = mt.get("m")
129
+ if m_val is not None:
130
+ out["m"] = int(m_val)
131
+ except Exception:
132
+ pass
133
+ return out
134
+
135
+
136
+ def _selected_families_for_alpha(
137
+ pvals: dict[str, float],
138
+ *,
139
+ method: str,
140
+ alpha: float,
141
+ m: int,
142
+ ) -> set[str]:
143
+ meth = (method or "").strip().lower()
144
+ if meth == "bonferroni":
145
+ return _bonferroni_reject_families(pvals, alpha=alpha, m=m)
146
+ # Default: BH
147
+ return _bh_reject_families(pvals, alpha=alpha, m=m)
148
+
149
+
150
+ def summarize_null_sweep_reports(
151
+ reports: list[dict[str, Any]],
152
+ *,
153
+ tier: str,
154
+ safety_margin: float = 0.05,
155
+ target_any_warning_rate: float = 0.01,
156
+ ) -> dict[str, Any]:
157
+ """Summarize spectral null-sweep results and recommend κ/alpha.
158
+
159
+ Inputs are run report dicts produced by `invarlock run` (or equivalent).
160
+ """
161
+
162
+ tier_norm = (tier or "").strip().lower() or "balanced"
163
+ margin = float(safety_margin or 0.0)
164
+ if not (0.0 <= margin <= 1.0):
165
+ margin = 0.05
166
+ target = float(target_any_warning_rate or 0.0)
167
+ if not (0.0 <= target <= 1.0):
168
+ target = 0.01
169
+
170
+ family_max_z: dict[str, float] = defaultdict(lambda: float("-inf"))
171
+ has_warning_default: list[bool] = []
172
+ run_pvals: list[dict[str, float]] = []
173
+
174
+ mt_method = "bh"
175
+ mt_alpha = 0.05
176
+ mt_m = 4
177
+
178
+ selected_by_family: Counter[str] = Counter()
179
+ candidate_by_family: Counter[str] = Counter()
180
+
181
+ for report in reports:
182
+ if not isinstance(report, dict):
183
+ continue
184
+ g = _extract_guard(report, "spectral") or {}
185
+ metrics = g.get("metrics", {}) if isinstance(g.get("metrics"), dict) else {}
186
+ mt = _extract_multiple_testing(metrics)
187
+ if mt:
188
+ mt_method = str(mt.get("method", mt_method))
189
+ if mt.get("alpha") is not None:
190
+ mt_alpha = float(mt.get("alpha"))
191
+ if mt.get("m") is not None:
192
+ mt_m = int(mt.get("m"))
193
+
194
+ fam_z = _extract_family_max_z(metrics)
195
+ for fam, z in fam_z.items():
196
+ family_max_z[fam] = max(family_max_z[fam], float(z))
197
+
198
+ selection = (
199
+ metrics.get("multiple_testing_selection")
200
+ if isinstance(metrics.get("multiple_testing_selection"), dict)
201
+ else {}
202
+ )
203
+ pvals = selection.get("family_pvalues")
204
+ if not isinstance(pvals, dict):
205
+ pvals = {}
206
+ parsed_pvals: dict[str, float] = {}
207
+ for fam, p in pvals.items():
208
+ try:
209
+ pf = float(p)
210
+ except Exception:
211
+ continue
212
+ if _finite01(pf):
213
+ parsed_pvals[str(fam)] = pf
214
+ run_pvals.append(parsed_pvals)
215
+
216
+ families_selected = selection.get("families_selected")
217
+ if isinstance(families_selected, list):
218
+ for fam in families_selected:
219
+ selected_by_family[str(fam)] += 1
220
+
221
+ fam_counts = selection.get("family_violation_counts")
222
+ if isinstance(fam_counts, dict):
223
+ for fam, count in fam_counts.items():
224
+ try:
225
+ candidate_by_family[str(fam)] += int(count)
226
+ except Exception:
227
+ continue
228
+
229
+ caps_applied = metrics.get("caps_applied")
230
+ try:
231
+ caps_applied_int = int(caps_applied) if caps_applied is not None else 0
232
+ except Exception:
233
+ caps_applied_int = 0
234
+ violations = g.get("violations", [])
235
+ has_warning_default.append(bool(caps_applied_int) or bool(violations))
236
+
237
+ n = max(len(has_warning_default), 1)
238
+ observed_any_rate = sum(1 for v in has_warning_default if v) / float(n)
239
+
240
+ # κ recommendation: max observed z per family (+ margin), rounded for stable tiers.yaml diffs.
241
+ rec_caps: dict[str, float] = {}
242
+ for fam, z in sorted(family_max_z.items()):
243
+ if not math.isfinite(z):
244
+ continue
245
+ kappa = z * (1.0 + margin)
246
+ rec_caps[fam] = float(round(kappa, 3))
247
+
248
+ # α calibration: choose the largest alpha that meets target_any_warning_rate.
249
+ # This uses per-run family p-values (from spectral.multiple_testing_selection).
250
+ def _rate_for_alpha(alpha: float) -> float:
251
+ any_sel = 0
252
+ for pvals in run_pvals:
253
+ selected = _selected_families_for_alpha(
254
+ pvals, method=mt_method, alpha=alpha, m=mt_m
255
+ )
256
+ any_sel += 1 if selected else 0
257
+ return any_sel / float(max(len(run_pvals), 1))
258
+
259
+ recommended_alpha = float(mt_alpha)
260
+ if run_pvals and observed_any_rate > target:
261
+ # Halving search is stable/deterministic and avoids dependency-heavy optimizers.
262
+ alpha_grid: list[float] = []
263
+ a = float(mt_alpha)
264
+ for _ in range(20):
265
+ if a <= 1e-6:
266
+ break
267
+ alpha_grid.append(a)
268
+ a *= 0.5
269
+ alpha_grid.append(1e-6)
270
+ best = None
271
+ for candidate in alpha_grid:
272
+ rate = _rate_for_alpha(candidate)
273
+ if rate <= target:
274
+ best = candidate
275
+ break
276
+ if best is not None:
277
+ recommended_alpha = float(best)
278
+
279
+ return {
280
+ "tier": tier_norm,
281
+ "n_runs": int(len(has_warning_default)),
282
+ "observed": {
283
+ "any_warning_rate": float(observed_any_rate),
284
+ "selected_by_family_runs": dict(selected_by_family),
285
+ "candidate_violations_by_family_total": dict(candidate_by_family),
286
+ "family_max_z": {
287
+ k: float(v) for k, v in sorted(family_max_z.items()) if math.isfinite(v)
288
+ },
289
+ },
290
+ "recommendations": {
291
+ "family_caps": rec_caps,
292
+ "multiple_testing": {
293
+ "method": str(mt_method),
294
+ "alpha": float(recommended_alpha),
295
+ "m": int(mt_m),
296
+ },
297
+ },
298
+ }
299
+
300
+
301
+ __all__ = ["summarize_null_sweep_reports"]
@@ -0,0 +1,154 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ from typing import Any
5
+
6
+
7
+ def _extract_guard(report: dict[str, Any], name: str) -> dict[str, Any] | None:
8
+ guards = report.get("guards")
9
+ if isinstance(guards, list):
10
+ for item in guards:
11
+ if isinstance(item, dict) and item.get("name") == name:
12
+ return item
13
+ return None
14
+
15
+
16
+ def _coerce_delta_ci(value: Any) -> tuple[float, float] | None:
17
+ if not (isinstance(value, tuple | list) and len(value) == 2):
18
+ return None
19
+ try:
20
+ lo = float(value[0])
21
+ hi = float(value[1])
22
+ except Exception:
23
+ return None
24
+ if not (math.isfinite(lo) and math.isfinite(hi)):
25
+ return None
26
+ return (lo, hi)
27
+
28
+
29
+ def _gain_lower_bound(
30
+ *, mean_delta: float | None, delta_ci: tuple[float, float] | None, one_sided: bool
31
+ ) -> float:
32
+ if delta_ci is None:
33
+ return 0.0
34
+ lo, hi = delta_ci
35
+ if hi >= 0.0:
36
+ return 0.0
37
+ if one_sided and (mean_delta is None or not (mean_delta < 0.0)):
38
+ return 0.0
39
+ # Gain CI lower bound is -upper (worst-case gain).
40
+ return max(0.0, -hi)
41
+
42
+
43
+ def _recommend_threshold_for_target_rate(
44
+ gains: list[float],
45
+ *,
46
+ target_rate: float,
47
+ safety_margin: float,
48
+ ) -> tuple[float, float]:
49
+ n = len(gains)
50
+ if n <= 0:
51
+ return 0.0, 0.0
52
+ target = float(target_rate)
53
+ if not (0.0 <= target <= 1.0):
54
+ target = 0.05
55
+ desired_passes = int(math.floor(target * n))
56
+ gains_desc = sorted((max(0.0, float(g)) for g in gains), reverse=True)
57
+
58
+ def pass_count(thr: float) -> int:
59
+ return sum(1 for g in gains_desc if g >= thr)
60
+
61
+ if desired_passes <= 0:
62
+ thr = (
63
+ (gains_desc[0] * (1.0 + max(0.0, safety_margin)))
64
+ if gains_desc[0] > 0
65
+ else 0.0
66
+ )
67
+ return float(round(thr, 3)), 0.0
68
+
69
+ unique_vals = sorted(set(gains_desc), reverse=True)
70
+ chosen = None
71
+ chosen_rate = 0.0
72
+ for val in unique_vals:
73
+ cnt = pass_count(val)
74
+ rate = cnt / float(n)
75
+ if cnt <= desired_passes:
76
+ chosen = float(val)
77
+ chosen_rate = float(rate)
78
+ break
79
+
80
+ if chosen is None:
81
+ # Ties at max prevent meeting desired_passes; force zero enable rate.
82
+ thr = gains_desc[0] * (1.0 + max(0.0, safety_margin))
83
+ return float(round(thr, 3)), 0.0
84
+
85
+ thr = chosen * (1.0 + max(0.0, safety_margin))
86
+ return float(round(thr, 3)), float(chosen_rate)
87
+
88
+
89
+ def summarize_ve_sweep_reports(
90
+ reports: list[dict[str, Any]],
91
+ *,
92
+ tier: str,
93
+ target_enable_rate: float = 0.05,
94
+ safety_margin: float = 0.0,
95
+ predictive_one_sided: bool = True,
96
+ ) -> dict[str, Any]:
97
+ """Summarize VE predictive-gate sweeps and recommend min_effect_lognll."""
98
+
99
+ tier_norm = (tier or "").strip().lower() or "balanced"
100
+ one_sided = bool(predictive_one_sided)
101
+ margin = float(safety_margin or 0.0)
102
+ if not (0.0 <= margin <= 1.0):
103
+ margin = 0.0
104
+
105
+ gains: list[float] = []
106
+ widths: list[float] = []
107
+ evaluated = 0
108
+
109
+ for report in reports:
110
+ if not isinstance(report, dict):
111
+ continue
112
+ g = _extract_guard(report, "variance") or {}
113
+ metrics = g.get("metrics", {}) if isinstance(g.get("metrics"), dict) else {}
114
+ pg = metrics.get("predictive_gate")
115
+ if not isinstance(pg, dict):
116
+ continue
117
+ evaluated += 1 if pg.get("evaluated") is True else 0
118
+
119
+ delta_ci = _coerce_delta_ci(pg.get("delta_ci"))
120
+ mean_delta = pg.get("mean_delta")
121
+ try:
122
+ mean_delta_f = float(mean_delta) if mean_delta is not None else None
123
+ except Exception:
124
+ mean_delta_f = None
125
+
126
+ gains.append(
127
+ _gain_lower_bound(
128
+ mean_delta=mean_delta_f, delta_ci=delta_ci, one_sided=one_sided
129
+ )
130
+ )
131
+ if delta_ci is not None:
132
+ widths.append(float(abs(delta_ci[1] - delta_ci[0])))
133
+
134
+ recommended, expected_rate = _recommend_threshold_for_target_rate(
135
+ gains, target_rate=target_enable_rate, safety_margin=margin
136
+ )
137
+
138
+ mean_width = float(sum(widths) / max(len(widths), 1)) if widths else None
139
+
140
+ return {
141
+ "tier": tier_norm,
142
+ "n_runs": int(len(gains)),
143
+ "observed": {
144
+ "evaluated_runs": int(evaluated),
145
+ "mean_ci_width": mean_width,
146
+ },
147
+ "recommendations": {
148
+ "min_effect_lognll": float(recommended),
149
+ "expected_enable_rate": float(expected_rate),
150
+ },
151
+ }
152
+
153
+
154
+ __all__ = ["summarize_ve_sweep_reports"]
invarlock/cli/app.py CHANGED
@@ -34,6 +34,7 @@ class OrderedGroup(TyperGroup):
34
34
  def list_commands(self, ctx): # type: ignore[override]
35
35
  return [
36
36
  "certify",
37
+ "calibrate",
37
38
  "report",
38
39
  "verify",
39
40
  "run",
@@ -161,10 +162,24 @@ def _register_subapps() -> None:
161
162
  from .commands.plugins import plugins_app as _plugins_app
162
163
  from .commands.report import report_app as _report_app
163
164
 
165
+ # Always-available subapps (lightweight imports)
164
166
  app.add_typer(_report_app, name="report")
165
167
  app.add_typer(_plugins_app, name="plugins")
166
168
  app.command(name="doctor")(_doctor_cmd)
167
169
 
170
+ # Optional: calibration subapp. This transitively imports guards, which may
171
+ # depend on torch/transformers. In minimal environments (no heavy deps),
172
+ # skip registration so `python -m invarlock --help` stays import-safe.
173
+ try:
174
+ from .commands.calibrate import calibrate_app as _calibrate_app
175
+ except ModuleNotFoundError as exc: # pragma: no cover - exercised in venv test
176
+ missing = getattr(exc, "name", "") or ""
177
+ if missing in {"torch", "transformers"}:
178
+ return
179
+ raise
180
+ else:
181
+ app.add_typer(_calibrate_app, name="calibrate")
182
+
168
183
 
169
184
  @app.command(
170
185
  name="verify",