invarlock 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. invarlock/__init__.py +2 -2
  2. invarlock/_data/runtime/tiers.yaml +57 -30
  3. invarlock/adapters/__init__.py +11 -15
  4. invarlock/adapters/auto.py +35 -40
  5. invarlock/adapters/capabilities.py +2 -2
  6. invarlock/adapters/hf_causal.py +418 -0
  7. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  8. invarlock/adapters/hf_mixin.py +25 -4
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/calibration/spectral_null.py +15 -10
  12. invarlock/calibration/variance_ve.py +0 -2
  13. invarlock/cli/adapter_auto.py +31 -21
  14. invarlock/cli/app.py +73 -2
  15. invarlock/cli/commands/calibrate.py +6 -2
  16. invarlock/cli/commands/certify.py +651 -91
  17. invarlock/cli/commands/doctor.py +11 -11
  18. invarlock/cli/commands/explain_gates.py +57 -8
  19. invarlock/cli/commands/plugins.py +13 -9
  20. invarlock/cli/commands/report.py +233 -69
  21. invarlock/cli/commands/run.py +1066 -244
  22. invarlock/cli/commands/verify.py +154 -15
  23. invarlock/cli/config.py +22 -6
  24. invarlock/cli/doctor_helpers.py +4 -5
  25. invarlock/cli/output.py +193 -0
  26. invarlock/cli/provenance.py +1 -1
  27. invarlock/core/api.py +45 -5
  28. invarlock/core/auto_tuning.py +65 -20
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/contracts.py +7 -1
  31. invarlock/core/registry.py +11 -13
  32. invarlock/core/runner.py +425 -75
  33. invarlock/edits/quant_rtn.py +65 -37
  34. invarlock/eval/bench.py +3 -16
  35. invarlock/eval/data.py +82 -51
  36. invarlock/eval/metrics.py +63 -2
  37. invarlock/eval/primary_metric.py +23 -0
  38. invarlock/eval/tail_stats.py +230 -0
  39. invarlock/eval/tasks/__init__.py +12 -0
  40. invarlock/eval/tasks/classification.py +48 -0
  41. invarlock/eval/tasks/qa.py +36 -0
  42. invarlock/eval/tasks/text_generation.py +102 -0
  43. invarlock/guards/_estimators.py +154 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/policies.py +16 -6
  46. invarlock/guards/rmt.py +627 -546
  47. invarlock/guards/spectral.py +348 -110
  48. invarlock/guards/tier_config.py +32 -30
  49. invarlock/guards/variance.py +7 -31
  50. invarlock/guards_ref/rmt_ref.py +23 -23
  51. invarlock/model_profile.py +90 -42
  52. invarlock/observability/health.py +6 -6
  53. invarlock/observability/metrics.py +108 -0
  54. invarlock/reporting/certificate.py +384 -55
  55. invarlock/reporting/certificate_schema.py +3 -2
  56. invarlock/reporting/dataset_hashing.py +15 -2
  57. invarlock/reporting/guards_analysis.py +350 -277
  58. invarlock/reporting/html.py +55 -5
  59. invarlock/reporting/normalizer.py +13 -0
  60. invarlock/reporting/policy_utils.py +38 -36
  61. invarlock/reporting/primary_metric_utils.py +71 -17
  62. invarlock/reporting/render.py +852 -431
  63. invarlock/reporting/report.py +40 -4
  64. invarlock/reporting/report_types.py +11 -3
  65. invarlock/reporting/telemetry.py +86 -0
  66. invarlock/reporting/validate.py +1 -18
  67. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/METADATA +27 -13
  68. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/RECORD +72 -65
  69. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
  70. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
  71. invarlock/adapters/hf_gpt2.py +0 -404
  72. invarlock/adapters/hf_llama.py +0 -487
  73. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
  74. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
@@ -35,6 +35,16 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
35
35
  "hysteresis_ratio": 0.002,
36
36
  "min_token_fraction": 0.01,
37
37
  },
38
+ "pm_tail": {
39
+ # Always-computed tail evidence; warn-only by default.
40
+ "mode": "warn",
41
+ "min_windows": 50,
42
+ "quantile": 0.95,
43
+ "quantile_max": 0.12,
44
+ "epsilon": 1e-4,
45
+ # Default to non-binding tail-mass checks until calibrated.
46
+ "mass_max": 1.0,
47
+ },
38
48
  "accuracy": {
39
49
  "delta_min_pp": -0.5,
40
50
  "min_examples": 200,
@@ -47,20 +57,26 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
47
57
  "deadband": 0.05, # Smaller no-op zone
48
58
  "scope": "ffn",
49
59
  "family_caps": {
50
- "ffn": {"kappa": 2.3},
60
+ "ffn": {"kappa": 3.849},
51
61
  "attn": {"kappa": 2.6},
52
62
  "embed": {"kappa": 2.8},
53
63
  "other": {"kappa": 2.8},
54
64
  },
55
65
  "ignore_preview_inflation": True,
56
66
  "max_caps": 3,
57
- "multiple_testing": {"method": "bonferroni", "alpha": 0.02, "m": 4},
67
+ "multiple_testing": {"method": "bonferroni", "alpha": 0.000625, "m": 4},
58
68
  },
59
69
  "rmt": {
60
70
  "margin": 1.40, # Lower spike allowance
61
71
  "deadband": 0.10, # Standard deadband
62
72
  "correct": True,
63
- "epsilon": {"attn": 0.05, "ffn": 0.06, "embed": 0.07, "other": 0.07},
73
+ "epsilon_default": 0.01,
74
+ "epsilon_by_family": {
75
+ "attn": 0.01,
76
+ "ffn": 0.01,
77
+ "embed": 0.01,
78
+ "other": 0.01,
79
+ },
64
80
  },
65
81
  "variance": {
66
82
  "min_gain": 0.01,
@@ -73,7 +89,7 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
73
89
  "mode": "ci",
74
90
  "alpha": 0.05,
75
91
  "tie_breaker_deadband": 0.005,
76
- "min_effect_lognll": 0.0018,
92
+ "min_effect_lognll": 0.016,
77
93
  "calibration": {
78
94
  "windows": 10,
79
95
  "min_coverage": 8,
@@ -95,6 +111,14 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
95
111
  "hysteresis_ratio": 0.002,
96
112
  "min_token_fraction": 0.01,
97
113
  },
114
+ "pm_tail": {
115
+ "mode": "warn",
116
+ "min_windows": 50,
117
+ "quantile": 0.95,
118
+ "quantile_max": 0.20,
119
+ "epsilon": 1e-4,
120
+ "mass_max": 1.0,
121
+ },
98
122
  "accuracy": {
99
123
  "delta_min_pp": -1.0,
100
124
  "min_examples": 200,
@@ -107,10 +131,10 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
107
131
  "deadband": 0.10, # Standard no-op zone
108
132
  "scope": "all",
109
133
  "family_caps": {
110
- "ffn": {"kappa": 2.5},
111
- "attn": {"kappa": 2.8},
112
- "embed": {"kappa": 3.0},
113
- "other": {"kappa": 3.0},
134
+ "ffn": {"kappa": 3.849},
135
+ "attn": {"kappa": 3.018},
136
+ "embed": {"kappa": 1.05},
137
+ "other": {"kappa": 0.0},
114
138
  },
115
139
  "ignore_preview_inflation": True,
116
140
  "max_caps": 5,
@@ -121,7 +145,13 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
121
145
  "margin": 1.50, # Default spike allowance
122
146
  "deadband": 0.10, # Standard deadband
123
147
  "correct": True,
124
- "epsilon": {"attn": 0.08, "ffn": 0.10, "embed": 0.12, "other": 0.12},
148
+ "epsilon_default": 0.01,
149
+ "epsilon_by_family": {
150
+ "attn": 0.01,
151
+ "ffn": 0.01,
152
+ "embed": 0.01,
153
+ "other": 0.01,
154
+ },
125
155
  },
126
156
  "variance": {
127
157
  "min_gain": 0.0,
@@ -134,7 +164,7 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
134
164
  "mode": "ci",
135
165
  "alpha": 0.05,
136
166
  "tie_breaker_deadband": 0.001,
137
- "min_effect_lognll": 0.0009,
167
+ "min_effect_lognll": 0.0,
138
168
  "min_abs_adjust": 0.012,
139
169
  "max_scale_step": 0.03,
140
170
  "topk_backstop": 1,
@@ -156,6 +186,14 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
156
186
  "hysteresis_ratio": 0.002,
157
187
  "min_token_fraction": 0.01,
158
188
  },
189
+ "pm_tail": {
190
+ "mode": "warn",
191
+ "min_windows": 50,
192
+ "quantile": 0.95,
193
+ "quantile_max": 0.30,
194
+ "epsilon": 1e-4,
195
+ "mass_max": 1.0,
196
+ },
159
197
  "accuracy": {
160
198
  "delta_min_pp": -2.0,
161
199
  "min_examples": 200,
@@ -168,18 +206,27 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
168
206
  "deadband": 0.15, # Larger no-op zone
169
207
  "scope": "ffn",
170
208
  "family_caps": {
171
- "ffn": {"kappa": 3.0},
209
+ "ffn": {"kappa": 3.849},
172
210
  "attn": {"kappa": 3.5},
173
211
  "embed": {"kappa": 2.5},
174
212
  "other": {"kappa": 3.5},
175
213
  },
176
214
  "ignore_preview_inflation": True,
215
+ "max_caps": 8,
216
+ "multiple_testing": {"method": "bh", "alpha": 0.00078125, "m": 4},
217
+ "max_spectral_norm": None,
177
218
  },
178
219
  "rmt": {
179
220
  "margin": 1.70, # Higher spike allowance
180
221
  "deadband": 0.15, # Larger deadband
181
222
  "correct": True,
182
- "epsilon": {"attn": 0.15, "ffn": 0.15, "embed": 0.15, "other": 0.15},
223
+ "epsilon_default": 0.01,
224
+ "epsilon_by_family": {
225
+ "attn": 0.01,
226
+ "ffn": 0.01,
227
+ "embed": 0.01,
228
+ "other": 0.01,
229
+ },
183
230
  },
184
231
  "variance": {
185
232
  "min_gain": 0.0,
@@ -192,7 +239,7 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
192
239
  "mode": "ci",
193
240
  "alpha": 0.05,
194
241
  "tie_breaker_deadband": 0.0005,
195
- "min_effect_lognll": 0.0005,
242
+ "min_effect_lognll": 0.033,
196
243
  "tap": ["transformer.h.*.mlp.c_proj", "transformer.h.*.attn.c_proj"],
197
244
  "predictive_gate": True,
198
245
  "calibration": {
@@ -244,8 +291,8 @@ def _load_runtime_yaml(
244
291
  res = base
245
292
  for part in rel_parts:
246
293
  res = res.joinpath(part)
247
- if getattr(res, "is_file", None) and res.is_file(): # type: ignore[attr-defined]
248
- text = res.read_text(encoding="utf-8") # type: ignore[assignment]
294
+ if getattr(res, "is_file", None) and res.is_file():
295
+ text = res.read_text(encoding="utf-8")
249
296
  data = yaml.safe_load(text) or {}
250
297
  if not isinstance(data, dict):
251
298
  raise ValueError("Runtime YAML must be a mapping")
@@ -301,7 +348,7 @@ def _tier_entry_to_policy(tier_entry: dict[str, Any]) -> dict[str, dict[str, Any
301
348
  if isinstance(metrics, dict):
302
349
  out["metrics"] = copy.deepcopy(metrics)
303
350
 
304
- spectral_src = tier_entry.get("spectral") or tier_entry.get("spectral_guard")
351
+ spectral_src = tier_entry.get("spectral_guard")
305
352
  if isinstance(spectral_src, dict):
306
353
  spectral = copy.deepcopy(spectral_src)
307
354
  if "family_caps" in spectral:
@@ -314,7 +361,7 @@ def _tier_entry_to_policy(tier_entry: dict[str, Any]) -> dict[str, dict[str, Any
314
361
  )
315
362
  out["spectral"] = spectral
316
363
 
317
- rmt_src = tier_entry.get("rmt") or tier_entry.get("rmt_guard")
364
+ rmt_src = tier_entry.get("rmt_guard")
318
365
  if isinstance(rmt_src, dict):
319
366
  rmt = copy.deepcopy(rmt_src)
320
367
  eps = rmt.get("epsilon_by_family")
@@ -322,11 +369,9 @@ def _tier_entry_to_policy(tier_entry: dict[str, Any]) -> dict[str, dict[str, Any
322
369
  rmt["epsilon_by_family"] = {
323
370
  str(k): float(v) for k, v in eps.items() if isinstance(v, int | float)
324
371
  }
325
- # Backward-compat: keep epsilon alias
326
- rmt["epsilon"] = dict(rmt["epsilon_by_family"])
327
372
  out["rmt"] = rmt
328
373
 
329
- variance_src = tier_entry.get("variance") or tier_entry.get("variance_guard")
374
+ variance_src = tier_entry.get("variance_guard")
330
375
  if isinstance(variance_src, dict):
331
376
  out["variance"] = copy.deepcopy(variance_src)
332
377
 
@@ -6,7 +6,7 @@ Numerically stable bootstrap helpers for evaluation metrics.
6
6
 
7
7
  This module provides bias-corrected and accelerated (BCa) confidence
8
8
  intervals tailored for paired log-loss statistics used by the runner
9
- and safety certificate reports.
9
+ and evaluation certificate reports.
10
10
  """
11
11
 
12
12
  from __future__ import annotations
@@ -21,7 +21,13 @@ def enforce_relative_spectral_cap(
21
21
  sigma = _spectral_norm(weight)
22
22
  limit = baseline_value * cap_ratio
23
23
  if sigma > limit and sigma > 0:
24
- weight.mul_(limit / sigma)
24
+ # Apply a tiny safety margin so that downstream SVD computations
25
+ # (which have small numerical error) don't report a value above the
26
+ # theoretical cap.
27
+ safe_limit = limit * (1.0 - 1e-6)
28
+ if safe_limit < 0:
29
+ safe_limit = 0.0
30
+ weight.mul_(safe_limit / sigma)
25
31
  return weight
26
32
 
27
33
 
@@ -140,23 +140,21 @@ class CoreRegistry:
140
140
  )
141
141
 
142
142
  # Register built-in adapters
143
- _fallback(self._adapters, "hf_gpt2", "invarlock.adapters", "HF_GPT2_Adapter")
144
- _fallback(self._adapters, "hf_bert", "invarlock.adapters", "HF_BERT_Adapter")
145
- _fallback(self._adapters, "hf_llama", "invarlock.adapters", "HF_LLaMA_Adapter")
146
- _fallback(self._adapters, "hf_t5", "invarlock.adapters", "HF_T5_Adapter")
147
143
  _fallback(
148
- self._adapters, "hf_onnx", "invarlock.adapters", "HF_ORT_CausalLM_Adapter"
144
+ self._adapters, "hf_causal", "invarlock.adapters", "HF_Causal_Adapter"
149
145
  )
150
- # Convenience auto adapters (delegate to built-ins)
146
+ _fallback(self._adapters, "hf_mlm", "invarlock.adapters", "HF_MLM_Adapter")
151
147
  _fallback(
152
- self._adapters,
153
- "hf_causal_auto",
154
- "invarlock.adapters",
155
- "HF_Causal_Auto_Adapter",
148
+ self._adapters, "hf_seq2seq", "invarlock.adapters", "HF_Seq2Seq_Adapter"
156
149
  )
157
150
  _fallback(
158
- self._adapters, "hf_mlm_auto", "invarlock.adapters", "HF_MLM_Auto_Adapter"
151
+ self._adapters,
152
+ "hf_causal_onnx",
153
+ "invarlock.adapters",
154
+ "HF_Causal_ONNX_Adapter",
155
+ required_deps=["optimum"],
159
156
  )
157
+ _fallback(self._adapters, "hf_auto", "invarlock.adapters", "HF_Auto_Adapter")
160
158
  # Optional plugin adapters (verify runtime dependencies)
161
159
  _fallback(
162
160
  self._adapters,
@@ -207,7 +205,7 @@ class CoreRegistry:
207
205
  missing: list[str] = []
208
206
  for dep in deps:
209
207
  try:
210
- spec = importlib.util.find_spec(dep) # type: ignore[attr-defined]
208
+ spec = importlib.util.find_spec(dep)
211
209
  except Exception:
212
210
  spec = None
213
211
  if spec is None:
@@ -430,7 +428,7 @@ class CoreRegistry:
430
428
  )
431
429
  return metadata
432
430
 
433
- # Typed-error wrappers that preserve existing behavior for legacy methods
431
+ # Typed-error wrappers that preserve existing behavior for existing methods
434
432
  def get_adapter_typed(self, name: str) -> ModelAdapter:
435
433
  try:
436
434
  return self.get_adapter(name)