invarlock 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +2 -2
- invarlock/_data/runtime/tiers.yaml +57 -30
- invarlock/adapters/__init__.py +11 -15
- invarlock/adapters/auto.py +35 -40
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_mixin.py +25 -4
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/calibration/spectral_null.py +15 -10
- invarlock/calibration/variance_ve.py +0 -2
- invarlock/cli/adapter_auto.py +31 -21
- invarlock/cli/app.py +73 -2
- invarlock/cli/commands/calibrate.py +6 -2
- invarlock/cli/commands/certify.py +651 -91
- invarlock/cli/commands/doctor.py +11 -11
- invarlock/cli/commands/explain_gates.py +57 -8
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +233 -69
- invarlock/cli/commands/run.py +1066 -244
- invarlock/cli/commands/verify.py +154 -15
- invarlock/cli/config.py +22 -6
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +1 -1
- invarlock/core/api.py +45 -5
- invarlock/core/auto_tuning.py +65 -20
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/contracts.py +7 -1
- invarlock/core/registry.py +11 -13
- invarlock/core/runner.py +425 -75
- invarlock/edits/quant_rtn.py +65 -37
- invarlock/eval/bench.py +3 -16
- invarlock/eval/data.py +82 -51
- invarlock/eval/metrics.py +63 -2
- invarlock/eval/primary_metric.py +23 -0
- invarlock/eval/tail_stats.py +230 -0
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/_estimators.py +154 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/policies.py +16 -6
- invarlock/guards/rmt.py +627 -546
- invarlock/guards/spectral.py +348 -110
- invarlock/guards/tier_config.py +32 -30
- invarlock/guards/variance.py +7 -31
- invarlock/guards_ref/rmt_ref.py +23 -23
- invarlock/model_profile.py +90 -42
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/reporting/certificate.py +384 -55
- invarlock/reporting/certificate_schema.py +3 -2
- invarlock/reporting/dataset_hashing.py +15 -2
- invarlock/reporting/guards_analysis.py +350 -277
- invarlock/reporting/html.py +55 -5
- invarlock/reporting/normalizer.py +13 -0
- invarlock/reporting/policy_utils.py +38 -36
- invarlock/reporting/primary_metric_utils.py +71 -17
- invarlock/reporting/render.py +852 -431
- invarlock/reporting/report.py +40 -4
- invarlock/reporting/report_types.py +11 -3
- invarlock/reporting/telemetry.py +86 -0
- invarlock/reporting/validate.py +1 -18
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/METADATA +27 -13
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/RECORD +72 -65
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
invarlock/core/auto_tuning.py
CHANGED
|
@@ -35,6 +35,16 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
|
|
|
35
35
|
"hysteresis_ratio": 0.002,
|
|
36
36
|
"min_token_fraction": 0.01,
|
|
37
37
|
},
|
|
38
|
+
"pm_tail": {
|
|
39
|
+
# Always-computed tail evidence; warn-only by default.
|
|
40
|
+
"mode": "warn",
|
|
41
|
+
"min_windows": 50,
|
|
42
|
+
"quantile": 0.95,
|
|
43
|
+
"quantile_max": 0.12,
|
|
44
|
+
"epsilon": 1e-4,
|
|
45
|
+
# Default to non-binding tail-mass checks until calibrated.
|
|
46
|
+
"mass_max": 1.0,
|
|
47
|
+
},
|
|
38
48
|
"accuracy": {
|
|
39
49
|
"delta_min_pp": -0.5,
|
|
40
50
|
"min_examples": 200,
|
|
@@ -47,20 +57,26 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
|
|
|
47
57
|
"deadband": 0.05, # Smaller no-op zone
|
|
48
58
|
"scope": "ffn",
|
|
49
59
|
"family_caps": {
|
|
50
|
-
"ffn": {"kappa":
|
|
60
|
+
"ffn": {"kappa": 3.849},
|
|
51
61
|
"attn": {"kappa": 2.6},
|
|
52
62
|
"embed": {"kappa": 2.8},
|
|
53
63
|
"other": {"kappa": 2.8},
|
|
54
64
|
},
|
|
55
65
|
"ignore_preview_inflation": True,
|
|
56
66
|
"max_caps": 3,
|
|
57
|
-
"multiple_testing": {"method": "bonferroni", "alpha": 0.
|
|
67
|
+
"multiple_testing": {"method": "bonferroni", "alpha": 0.000625, "m": 4},
|
|
58
68
|
},
|
|
59
69
|
"rmt": {
|
|
60
70
|
"margin": 1.40, # Lower spike allowance
|
|
61
71
|
"deadband": 0.10, # Standard deadband
|
|
62
72
|
"correct": True,
|
|
63
|
-
"
|
|
73
|
+
"epsilon_default": 0.01,
|
|
74
|
+
"epsilon_by_family": {
|
|
75
|
+
"attn": 0.01,
|
|
76
|
+
"ffn": 0.01,
|
|
77
|
+
"embed": 0.01,
|
|
78
|
+
"other": 0.01,
|
|
79
|
+
},
|
|
64
80
|
},
|
|
65
81
|
"variance": {
|
|
66
82
|
"min_gain": 0.01,
|
|
@@ -73,7 +89,7 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
|
|
|
73
89
|
"mode": "ci",
|
|
74
90
|
"alpha": 0.05,
|
|
75
91
|
"tie_breaker_deadband": 0.005,
|
|
76
|
-
"min_effect_lognll": 0.
|
|
92
|
+
"min_effect_lognll": 0.016,
|
|
77
93
|
"calibration": {
|
|
78
94
|
"windows": 10,
|
|
79
95
|
"min_coverage": 8,
|
|
@@ -95,6 +111,14 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
|
|
|
95
111
|
"hysteresis_ratio": 0.002,
|
|
96
112
|
"min_token_fraction": 0.01,
|
|
97
113
|
},
|
|
114
|
+
"pm_tail": {
|
|
115
|
+
"mode": "warn",
|
|
116
|
+
"min_windows": 50,
|
|
117
|
+
"quantile": 0.95,
|
|
118
|
+
"quantile_max": 0.20,
|
|
119
|
+
"epsilon": 1e-4,
|
|
120
|
+
"mass_max": 1.0,
|
|
121
|
+
},
|
|
98
122
|
"accuracy": {
|
|
99
123
|
"delta_min_pp": -1.0,
|
|
100
124
|
"min_examples": 200,
|
|
@@ -107,10 +131,10 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
|
|
|
107
131
|
"deadband": 0.10, # Standard no-op zone
|
|
108
132
|
"scope": "all",
|
|
109
133
|
"family_caps": {
|
|
110
|
-
"ffn": {"kappa":
|
|
111
|
-
"attn": {"kappa":
|
|
112
|
-
"embed": {"kappa":
|
|
113
|
-
"other": {"kappa":
|
|
134
|
+
"ffn": {"kappa": 3.849},
|
|
135
|
+
"attn": {"kappa": 3.018},
|
|
136
|
+
"embed": {"kappa": 1.05},
|
|
137
|
+
"other": {"kappa": 0.0},
|
|
114
138
|
},
|
|
115
139
|
"ignore_preview_inflation": True,
|
|
116
140
|
"max_caps": 5,
|
|
@@ -121,7 +145,13 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
|
|
|
121
145
|
"margin": 1.50, # Default spike allowance
|
|
122
146
|
"deadband": 0.10, # Standard deadband
|
|
123
147
|
"correct": True,
|
|
124
|
-
"
|
|
148
|
+
"epsilon_default": 0.01,
|
|
149
|
+
"epsilon_by_family": {
|
|
150
|
+
"attn": 0.01,
|
|
151
|
+
"ffn": 0.01,
|
|
152
|
+
"embed": 0.01,
|
|
153
|
+
"other": 0.01,
|
|
154
|
+
},
|
|
125
155
|
},
|
|
126
156
|
"variance": {
|
|
127
157
|
"min_gain": 0.0,
|
|
@@ -134,7 +164,7 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
|
|
|
134
164
|
"mode": "ci",
|
|
135
165
|
"alpha": 0.05,
|
|
136
166
|
"tie_breaker_deadband": 0.001,
|
|
137
|
-
"min_effect_lognll": 0.
|
|
167
|
+
"min_effect_lognll": 0.0,
|
|
138
168
|
"min_abs_adjust": 0.012,
|
|
139
169
|
"max_scale_step": 0.03,
|
|
140
170
|
"topk_backstop": 1,
|
|
@@ -156,6 +186,14 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
|
|
|
156
186
|
"hysteresis_ratio": 0.002,
|
|
157
187
|
"min_token_fraction": 0.01,
|
|
158
188
|
},
|
|
189
|
+
"pm_tail": {
|
|
190
|
+
"mode": "warn",
|
|
191
|
+
"min_windows": 50,
|
|
192
|
+
"quantile": 0.95,
|
|
193
|
+
"quantile_max": 0.30,
|
|
194
|
+
"epsilon": 1e-4,
|
|
195
|
+
"mass_max": 1.0,
|
|
196
|
+
},
|
|
159
197
|
"accuracy": {
|
|
160
198
|
"delta_min_pp": -2.0,
|
|
161
199
|
"min_examples": 200,
|
|
@@ -168,18 +206,27 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
|
|
|
168
206
|
"deadband": 0.15, # Larger no-op zone
|
|
169
207
|
"scope": "ffn",
|
|
170
208
|
"family_caps": {
|
|
171
|
-
"ffn": {"kappa": 3.
|
|
209
|
+
"ffn": {"kappa": 3.849},
|
|
172
210
|
"attn": {"kappa": 3.5},
|
|
173
211
|
"embed": {"kappa": 2.5},
|
|
174
212
|
"other": {"kappa": 3.5},
|
|
175
213
|
},
|
|
176
214
|
"ignore_preview_inflation": True,
|
|
215
|
+
"max_caps": 8,
|
|
216
|
+
"multiple_testing": {"method": "bh", "alpha": 0.00078125, "m": 4},
|
|
217
|
+
"max_spectral_norm": None,
|
|
177
218
|
},
|
|
178
219
|
"rmt": {
|
|
179
220
|
"margin": 1.70, # Higher spike allowance
|
|
180
221
|
"deadband": 0.15, # Larger deadband
|
|
181
222
|
"correct": True,
|
|
182
|
-
"
|
|
223
|
+
"epsilon_default": 0.01,
|
|
224
|
+
"epsilon_by_family": {
|
|
225
|
+
"attn": 0.01,
|
|
226
|
+
"ffn": 0.01,
|
|
227
|
+
"embed": 0.01,
|
|
228
|
+
"other": 0.01,
|
|
229
|
+
},
|
|
183
230
|
},
|
|
184
231
|
"variance": {
|
|
185
232
|
"min_gain": 0.0,
|
|
@@ -192,7 +239,7 @@ TIER_POLICIES: dict[str, dict[str, dict[str, Any]]] = {
|
|
|
192
239
|
"mode": "ci",
|
|
193
240
|
"alpha": 0.05,
|
|
194
241
|
"tie_breaker_deadband": 0.0005,
|
|
195
|
-
"min_effect_lognll": 0.
|
|
242
|
+
"min_effect_lognll": 0.033,
|
|
196
243
|
"tap": ["transformer.h.*.mlp.c_proj", "transformer.h.*.attn.c_proj"],
|
|
197
244
|
"predictive_gate": True,
|
|
198
245
|
"calibration": {
|
|
@@ -244,8 +291,8 @@ def _load_runtime_yaml(
|
|
|
244
291
|
res = base
|
|
245
292
|
for part in rel_parts:
|
|
246
293
|
res = res.joinpath(part)
|
|
247
|
-
if getattr(res, "is_file", None) and res.is_file():
|
|
248
|
-
text = res.read_text(encoding="utf-8")
|
|
294
|
+
if getattr(res, "is_file", None) and res.is_file():
|
|
295
|
+
text = res.read_text(encoding="utf-8")
|
|
249
296
|
data = yaml.safe_load(text) or {}
|
|
250
297
|
if not isinstance(data, dict):
|
|
251
298
|
raise ValueError("Runtime YAML must be a mapping")
|
|
@@ -301,7 +348,7 @@ def _tier_entry_to_policy(tier_entry: dict[str, Any]) -> dict[str, dict[str, Any
|
|
|
301
348
|
if isinstance(metrics, dict):
|
|
302
349
|
out["metrics"] = copy.deepcopy(metrics)
|
|
303
350
|
|
|
304
|
-
spectral_src = tier_entry.get("
|
|
351
|
+
spectral_src = tier_entry.get("spectral_guard")
|
|
305
352
|
if isinstance(spectral_src, dict):
|
|
306
353
|
spectral = copy.deepcopy(spectral_src)
|
|
307
354
|
if "family_caps" in spectral:
|
|
@@ -314,7 +361,7 @@ def _tier_entry_to_policy(tier_entry: dict[str, Any]) -> dict[str, dict[str, Any
|
|
|
314
361
|
)
|
|
315
362
|
out["spectral"] = spectral
|
|
316
363
|
|
|
317
|
-
rmt_src = tier_entry.get("
|
|
364
|
+
rmt_src = tier_entry.get("rmt_guard")
|
|
318
365
|
if isinstance(rmt_src, dict):
|
|
319
366
|
rmt = copy.deepcopy(rmt_src)
|
|
320
367
|
eps = rmt.get("epsilon_by_family")
|
|
@@ -322,11 +369,9 @@ def _tier_entry_to_policy(tier_entry: dict[str, Any]) -> dict[str, dict[str, Any
|
|
|
322
369
|
rmt["epsilon_by_family"] = {
|
|
323
370
|
str(k): float(v) for k, v in eps.items() if isinstance(v, int | float)
|
|
324
371
|
}
|
|
325
|
-
# Backward-compat: keep epsilon alias
|
|
326
|
-
rmt["epsilon"] = dict(rmt["epsilon_by_family"])
|
|
327
372
|
out["rmt"] = rmt
|
|
328
373
|
|
|
329
|
-
variance_src = tier_entry.get("
|
|
374
|
+
variance_src = tier_entry.get("variance_guard")
|
|
330
375
|
if isinstance(variance_src, dict):
|
|
331
376
|
out["variance"] = copy.deepcopy(variance_src)
|
|
332
377
|
|
invarlock/core/bootstrap.py
CHANGED
|
@@ -6,7 +6,7 @@ Numerically stable bootstrap helpers for evaluation metrics.
|
|
|
6
6
|
|
|
7
7
|
This module provides bias-corrected and accelerated (BCa) confidence
|
|
8
8
|
intervals tailored for paired log-loss statistics used by the runner
|
|
9
|
-
and
|
|
9
|
+
and evaluation certificate reports.
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
from __future__ import annotations
|
invarlock/core/contracts.py
CHANGED
|
@@ -21,7 +21,13 @@ def enforce_relative_spectral_cap(
|
|
|
21
21
|
sigma = _spectral_norm(weight)
|
|
22
22
|
limit = baseline_value * cap_ratio
|
|
23
23
|
if sigma > limit and sigma > 0:
|
|
24
|
-
|
|
24
|
+
# Apply a tiny safety margin so that downstream SVD computations
|
|
25
|
+
# (which have small numerical error) don't report a value above the
|
|
26
|
+
# theoretical cap.
|
|
27
|
+
safe_limit = limit * (1.0 - 1e-6)
|
|
28
|
+
if safe_limit < 0:
|
|
29
|
+
safe_limit = 0.0
|
|
30
|
+
weight.mul_(safe_limit / sigma)
|
|
25
31
|
return weight
|
|
26
32
|
|
|
27
33
|
|
invarlock/core/registry.py
CHANGED
|
@@ -140,23 +140,21 @@ class CoreRegistry:
|
|
|
140
140
|
)
|
|
141
141
|
|
|
142
142
|
# Register built-in adapters
|
|
143
|
-
_fallback(self._adapters, "hf_gpt2", "invarlock.adapters", "HF_GPT2_Adapter")
|
|
144
|
-
_fallback(self._adapters, "hf_bert", "invarlock.adapters", "HF_BERT_Adapter")
|
|
145
|
-
_fallback(self._adapters, "hf_llama", "invarlock.adapters", "HF_LLaMA_Adapter")
|
|
146
|
-
_fallback(self._adapters, "hf_t5", "invarlock.adapters", "HF_T5_Adapter")
|
|
147
143
|
_fallback(
|
|
148
|
-
self._adapters, "
|
|
144
|
+
self._adapters, "hf_causal", "invarlock.adapters", "HF_Causal_Adapter"
|
|
149
145
|
)
|
|
150
|
-
|
|
146
|
+
_fallback(self._adapters, "hf_mlm", "invarlock.adapters", "HF_MLM_Adapter")
|
|
151
147
|
_fallback(
|
|
152
|
-
self._adapters,
|
|
153
|
-
"hf_causal_auto",
|
|
154
|
-
"invarlock.adapters",
|
|
155
|
-
"HF_Causal_Auto_Adapter",
|
|
148
|
+
self._adapters, "hf_seq2seq", "invarlock.adapters", "HF_Seq2Seq_Adapter"
|
|
156
149
|
)
|
|
157
150
|
_fallback(
|
|
158
|
-
self._adapters,
|
|
151
|
+
self._adapters,
|
|
152
|
+
"hf_causal_onnx",
|
|
153
|
+
"invarlock.adapters",
|
|
154
|
+
"HF_Causal_ONNX_Adapter",
|
|
155
|
+
required_deps=["optimum"],
|
|
159
156
|
)
|
|
157
|
+
_fallback(self._adapters, "hf_auto", "invarlock.adapters", "HF_Auto_Adapter")
|
|
160
158
|
# Optional plugin adapters (verify runtime dependencies)
|
|
161
159
|
_fallback(
|
|
162
160
|
self._adapters,
|
|
@@ -207,7 +205,7 @@ class CoreRegistry:
|
|
|
207
205
|
missing: list[str] = []
|
|
208
206
|
for dep in deps:
|
|
209
207
|
try:
|
|
210
|
-
spec = importlib.util.find_spec(dep)
|
|
208
|
+
spec = importlib.util.find_spec(dep)
|
|
211
209
|
except Exception:
|
|
212
210
|
spec = None
|
|
213
211
|
if spec is None:
|
|
@@ -430,7 +428,7 @@ class CoreRegistry:
|
|
|
430
428
|
)
|
|
431
429
|
return metadata
|
|
432
430
|
|
|
433
|
-
# Typed-error wrappers that preserve existing behavior for
|
|
431
|
+
# Typed-error wrappers that preserve existing behavior for existing methods
|
|
434
432
|
def get_adapter_typed(self, name: str) -> ModelAdapter:
|
|
435
433
|
try:
|
|
436
434
|
return self.get_adapter(name)
|