invarlock 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +1 -1
- invarlock/_data/runtime/tiers.yaml +57 -30
- invarlock/adapters/__init__.py +1 -1
- invarlock/calibration/spectral_null.py +15 -10
- invarlock/calibration/variance_ve.py +0 -2
- invarlock/cli/commands/calibrate.py +6 -2
- invarlock/cli/commands/certify.py +58 -39
- invarlock/cli/commands/doctor.py +3 -1
- invarlock/cli/commands/explain_gates.py +57 -8
- invarlock/cli/commands/report.py +1 -1
- invarlock/cli/commands/run.py +159 -61
- invarlock/cli/commands/verify.py +78 -4
- invarlock/cli/config.py +21 -5
- invarlock/core/api.py +45 -5
- invarlock/core/auto_tuning.py +65 -20
- invarlock/core/contracts.py +7 -1
- invarlock/core/registry.py +2 -2
- invarlock/core/runner.py +314 -50
- invarlock/eval/bench.py +0 -13
- invarlock/eval/data.py +14 -28
- invarlock/eval/metrics.py +4 -1
- invarlock/eval/primary_metric.py +23 -0
- invarlock/eval/tail_stats.py +230 -0
- invarlock/guards/_estimators.py +154 -0
- invarlock/guards/policies.py +16 -6
- invarlock/guards/rmt.py +625 -544
- invarlock/guards/spectral.py +348 -110
- invarlock/guards/tier_config.py +32 -30
- invarlock/guards/variance.py +5 -29
- invarlock/guards_ref/rmt_ref.py +23 -23
- invarlock/model_profile.py +42 -15
- invarlock/reporting/certificate.py +225 -46
- invarlock/reporting/certificate_schema.py +2 -1
- invarlock/reporting/dataset_hashing.py +15 -2
- invarlock/reporting/guards_analysis.py +197 -274
- invarlock/reporting/normalizer.py +6 -0
- invarlock/reporting/policy_utils.py +38 -36
- invarlock/reporting/primary_metric_utils.py +71 -17
- invarlock/reporting/render.py +61 -0
- invarlock/reporting/report.py +1 -1
- invarlock/reporting/report_types.py +5 -2
- invarlock/reporting/validate.py +1 -18
- {invarlock-0.3.5.dist-info → invarlock-0.3.6.dist-info}/METADATA +6 -6
- {invarlock-0.3.5.dist-info → invarlock-0.3.6.dist-info}/RECORD +48 -46
- {invarlock-0.3.5.dist-info → invarlock-0.3.6.dist-info}/WHEEL +0 -0
- {invarlock-0.3.5.dist-info → invarlock-0.3.6.dist-info}/entry_points.txt +0 -0
- {invarlock-0.3.5.dist-info → invarlock-0.3.6.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.5.dist-info → invarlock-0.3.6.dist-info}/top_level.txt +0 -0
invarlock/guards/tier_config.py
CHANGED
|
@@ -31,7 +31,7 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
|
|
|
31
31
|
"deadband": 0.02,
|
|
32
32
|
"min_abs_adjust": 0.012,
|
|
33
33
|
"max_scale_step": 0.03,
|
|
34
|
-
"min_effect_lognll": 0.
|
|
34
|
+
"min_effect_lognll": 0.0,
|
|
35
35
|
"predictive_one_sided": True,
|
|
36
36
|
"topk_backstop": 1,
|
|
37
37
|
"max_adjusted_modules": 1,
|
|
@@ -43,10 +43,10 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
|
|
|
43
43
|
"max_caps": 5,
|
|
44
44
|
"max_spectral_norm": None,
|
|
45
45
|
"family_caps": {
|
|
46
|
-
"ffn":
|
|
47
|
-
"attn":
|
|
48
|
-
"embed":
|
|
49
|
-
"other":
|
|
46
|
+
"ffn": 3.849,
|
|
47
|
+
"attn": 3.018,
|
|
48
|
+
"embed": 1.05,
|
|
49
|
+
"other": 0.0,
|
|
50
50
|
},
|
|
51
51
|
"multiple_testing": {
|
|
52
52
|
"method": "bh",
|
|
@@ -57,12 +57,12 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
|
|
|
57
57
|
"rmt_guard": {
|
|
58
58
|
"deadband": 0.10,
|
|
59
59
|
"margin": 1.5,
|
|
60
|
-
"epsilon_default": 0.
|
|
60
|
+
"epsilon_default": 0.01,
|
|
61
61
|
"epsilon_by_family": {
|
|
62
|
-
"ffn": 0.
|
|
63
|
-
"attn": 0.
|
|
64
|
-
"embed": 0.
|
|
65
|
-
"other": 0.
|
|
62
|
+
"ffn": 0.01,
|
|
63
|
+
"attn": 0.01,
|
|
64
|
+
"embed": 0.01,
|
|
65
|
+
"other": 0.01,
|
|
66
66
|
},
|
|
67
67
|
},
|
|
68
68
|
},
|
|
@@ -71,7 +71,7 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
|
|
|
71
71
|
"deadband": 0.03,
|
|
72
72
|
"min_abs_adjust": 0.02,
|
|
73
73
|
"max_scale_step": 0.015,
|
|
74
|
-
"min_effect_lognll": 0.
|
|
74
|
+
"min_effect_lognll": 0.016,
|
|
75
75
|
"predictive_one_sided": False,
|
|
76
76
|
"topk_backstop": 0,
|
|
77
77
|
"max_adjusted_modules": 0,
|
|
@@ -81,61 +81,63 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
|
|
|
81
81
|
"deadband": 0.05,
|
|
82
82
|
"scope": "ffn",
|
|
83
83
|
"max_caps": 3,
|
|
84
|
+
"max_spectral_norm": None,
|
|
84
85
|
"family_caps": {
|
|
85
|
-
"ffn":
|
|
86
|
+
"ffn": 3.849,
|
|
86
87
|
"attn": 2.6,
|
|
87
88
|
"embed": 2.8,
|
|
88
89
|
"other": 2.8,
|
|
89
90
|
},
|
|
90
91
|
"multiple_testing": {
|
|
91
92
|
"method": "bonferroni",
|
|
92
|
-
"alpha": 0.
|
|
93
|
+
"alpha": 0.000625,
|
|
93
94
|
"m": 4,
|
|
94
95
|
},
|
|
95
96
|
},
|
|
96
97
|
"rmt_guard": {
|
|
97
98
|
"deadband": 0.05,
|
|
98
99
|
"margin": 1.3,
|
|
99
|
-
"epsilon_default": 0.
|
|
100
|
+
"epsilon_default": 0.01,
|
|
100
101
|
"epsilon_by_family": {
|
|
101
|
-
"ffn": 0.
|
|
102
|
-
"attn": 0.
|
|
103
|
-
"embed": 0.
|
|
104
|
-
"other": 0.
|
|
102
|
+
"ffn": 0.01,
|
|
103
|
+
"attn": 0.01,
|
|
104
|
+
"embed": 0.01,
|
|
105
|
+
"other": 0.01,
|
|
105
106
|
},
|
|
106
107
|
},
|
|
107
108
|
},
|
|
108
109
|
"aggressive": {
|
|
109
110
|
"variance_guard": {
|
|
110
111
|
"deadband": 0.12,
|
|
111
|
-
"min_effect_lognll": 0.
|
|
112
|
+
"min_effect_lognll": 0.033,
|
|
112
113
|
},
|
|
113
114
|
"spectral_guard": {
|
|
114
115
|
"sigma_quantile": 0.98,
|
|
115
116
|
"deadband": 0.15,
|
|
116
|
-
"scope": "
|
|
117
|
+
"scope": "ffn",
|
|
117
118
|
"max_caps": 8,
|
|
119
|
+
"max_spectral_norm": None,
|
|
118
120
|
"family_caps": {
|
|
119
|
-
"ffn": 3.
|
|
120
|
-
"attn": 3.
|
|
121
|
-
"embed":
|
|
121
|
+
"ffn": 3.849,
|
|
122
|
+
"attn": 3.5,
|
|
123
|
+
"embed": 2.5,
|
|
122
124
|
"other": 3.5,
|
|
123
125
|
},
|
|
124
126
|
"multiple_testing": {
|
|
125
127
|
"method": "bh",
|
|
126
|
-
"alpha": 0.
|
|
128
|
+
"alpha": 0.00078125,
|
|
127
129
|
"m": 4,
|
|
128
130
|
},
|
|
129
131
|
},
|
|
130
132
|
"rmt_guard": {
|
|
131
133
|
"deadband": 0.15,
|
|
132
134
|
"margin": 1.8,
|
|
133
|
-
"epsilon_default": 0.
|
|
135
|
+
"epsilon_default": 0.01,
|
|
134
136
|
"epsilon_by_family": {
|
|
135
|
-
"ffn": 0.
|
|
136
|
-
"attn": 0.
|
|
137
|
-
"embed": 0.
|
|
138
|
-
"other": 0.
|
|
137
|
+
"ffn": 0.01,
|
|
138
|
+
"attn": 0.01,
|
|
139
|
+
"embed": 0.01,
|
|
140
|
+
"other": 0.01,
|
|
139
141
|
},
|
|
140
142
|
},
|
|
141
143
|
},
|
|
@@ -257,7 +259,7 @@ def get_rmt_epsilon(tier: TierName = "balanced") -> dict[str, float]:
|
|
|
257
259
|
def get_variance_min_effect(tier: TierName = "balanced") -> float:
|
|
258
260
|
"""Get VE min_effect_lognll for a tier."""
|
|
259
261
|
config = get_tier_guard_config(tier, "variance_guard")
|
|
260
|
-
return config.get("min_effect_lognll", 0.
|
|
262
|
+
return config.get("min_effect_lognll", 0.0)
|
|
261
263
|
|
|
262
264
|
|
|
263
265
|
def check_drift(
|
invarlock/guards/variance.py
CHANGED
|
@@ -36,7 +36,7 @@ from ._contracts import guard_assert
|
|
|
36
36
|
# Import the policy type and Guard interface
|
|
37
37
|
from .policies import VariancePolicyDict
|
|
38
38
|
|
|
39
|
-
__all__ = ["equalise_residual_variance", "
|
|
39
|
+
__all__ = ["equalise_residual_variance", "VarianceGuard"]
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
def _safe_mean(
|
|
@@ -348,33 +348,6 @@ def equalise_residual_variance(
|
|
|
348
348
|
return applied_scales
|
|
349
349
|
|
|
350
350
|
|
|
351
|
-
def equalise_branch_variance(
|
|
352
|
-
model: nn.Module,
|
|
353
|
-
dataloader,
|
|
354
|
-
windows: int = 32,
|
|
355
|
-
tol: float = 0.02,
|
|
356
|
-
scale_bias: bool = True,
|
|
357
|
-
seed: int = 42,
|
|
358
|
-
device: str | None = None,
|
|
359
|
-
allow_empty: bool = False,
|
|
360
|
-
) -> dict[str, float]:
|
|
361
|
-
"""
|
|
362
|
-
Legacy alias for equalise_residual_variance.
|
|
363
|
-
|
|
364
|
-
Maintained for backward compatibility.
|
|
365
|
-
"""
|
|
366
|
-
return equalise_residual_variance(
|
|
367
|
-
model=model,
|
|
368
|
-
dataloader=dataloader,
|
|
369
|
-
windows=windows,
|
|
370
|
-
tol=tol,
|
|
371
|
-
scale_bias=scale_bias,
|
|
372
|
-
seed=seed,
|
|
373
|
-
device=device,
|
|
374
|
-
allow_empty=allow_empty,
|
|
375
|
-
)
|
|
376
|
-
|
|
377
|
-
|
|
378
351
|
def _predictive_gate_outcome(
|
|
379
352
|
mean_delta: float,
|
|
380
353
|
delta_ci: tuple[float, float] | None,
|
|
@@ -1328,7 +1301,10 @@ class VarianceGuard(Guard):
|
|
|
1328
1301
|
if not filtered_scales and topk > 0 and best_candidate:
|
|
1329
1302
|
name, scale = best_candidate
|
|
1330
1303
|
deadband = float(self._policy.get("deadband", 0.0) or 0.0)
|
|
1331
|
-
|
|
1304
|
+
# Backstop should remain below the main min_abs filter; clamp if deadband is large.
|
|
1305
|
+
threshold = max(deadband * 0.5, min_abs * 0.5)
|
|
1306
|
+
if min_abs > 0 and threshold >= min_abs:
|
|
1307
|
+
threshold = min_abs * 0.5
|
|
1332
1308
|
if best_delta >= threshold:
|
|
1333
1309
|
if max_step > 0.0:
|
|
1334
1310
|
limited_delta = min(best_delta, max_step)
|
invarlock/guards_ref/rmt_ref.py
CHANGED
|
@@ -1,37 +1,37 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import math
|
|
4
3
|
from collections.abc import Mapping
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
def rmt_decide(
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
baseline_by_family: Mapping[str, float],
|
|
8
|
+
current_by_family: Mapping[str, float],
|
|
10
9
|
epsilon_by_family: Mapping[str, float],
|
|
11
10
|
) -> dict[str, object]:
|
|
12
11
|
"""
|
|
13
|
-
Reference epsilon-rule decision for RMT.
|
|
12
|
+
Reference epsilon-rule decision for RMT activation edge-risk drift.
|
|
14
13
|
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
For each family with baseline edge-risk > 0:
|
|
15
|
+
PASS iff current_edge <= (1 + epsilon) * baseline_edge
|
|
17
16
|
"""
|
|
18
|
-
families = set(
|
|
19
|
-
delta_by_family: dict[str,
|
|
20
|
-
allowed_by_family: dict[str,
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
delta_by_family[
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
ok = all(
|
|
34
|
-
|
|
17
|
+
families = set(baseline_by_family) | set(current_by_family) | set(epsilon_by_family)
|
|
18
|
+
delta_by_family: dict[str, float] = {}
|
|
19
|
+
allowed_by_family: dict[str, float] = {}
|
|
20
|
+
for family in families:
|
|
21
|
+
baseline = float(baseline_by_family.get(family, 0.0) or 0.0)
|
|
22
|
+
current = float(current_by_family.get(family, 0.0) or 0.0)
|
|
23
|
+
if baseline <= 0.0:
|
|
24
|
+
continue
|
|
25
|
+
epsilon_val = float(epsilon_by_family.get(family, 0.0) or 0.0)
|
|
26
|
+
allowed = (1.0 + epsilon_val) * baseline
|
|
27
|
+
allowed_by_family[family] = allowed
|
|
28
|
+
delta_by_family[family] = (
|
|
29
|
+
(current / baseline) - 1.0 if baseline > 0 else float("inf")
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
ok = all(
|
|
33
|
+
float(current_by_family.get(family, 0.0) or 0.0) <= allowed_by_family[family]
|
|
34
|
+
for family in allowed_by_family
|
|
35
35
|
)
|
|
36
36
|
return {
|
|
37
37
|
"pass": ok,
|
invarlock/model_profile.py
CHANGED
|
@@ -5,12 +5,14 @@ from collections.abc import Callable
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
+
AutoTokenizer: Any | None = None
|
|
9
|
+
GPT2Tokenizer: Any | None = None
|
|
10
|
+
|
|
8
11
|
try:
|
|
9
|
-
from transformers import AutoTokenizer
|
|
12
|
+
from transformers import AutoTokenizer as _AutoTokenizer
|
|
13
|
+
from transformers import GPT2Tokenizer as _GPT2Tokenizer
|
|
10
14
|
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
|
|
11
15
|
except Exception: # pragma: no cover - exercised only when transformers is absent
|
|
12
|
-
AutoTokenizer = None # type: ignore[assignment]
|
|
13
|
-
GPT2Tokenizer = None # type: ignore[assignment]
|
|
14
16
|
|
|
15
17
|
class PreTrainedTokenizerBase: # type: ignore[no-redef]
|
|
16
18
|
"""Lightweight stub used when transformers is not installed."""
|
|
@@ -22,6 +24,11 @@ except Exception: # pragma: no cover - exercised only when transformers is abse
|
|
|
22
24
|
)
|
|
23
25
|
|
|
24
26
|
|
|
27
|
+
else: # pragma: no cover - transformers optional
|
|
28
|
+
AutoTokenizer = _AutoTokenizer
|
|
29
|
+
GPT2Tokenizer = _GPT2Tokenizer
|
|
30
|
+
|
|
31
|
+
|
|
25
32
|
TokenizerFactory = Callable[[], tuple[PreTrainedTokenizerBase, str]]
|
|
26
33
|
|
|
27
34
|
|
|
@@ -194,13 +201,24 @@ def _make_llama_tokenizer(model_id: str):
|
|
|
194
201
|
# Try offline-first to respect InvarLock network guard; fall back to a
|
|
195
202
|
# local GPT-2 tokenizer if the model assets are not cached or network
|
|
196
203
|
# access is denied.
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
except Exception:
|
|
204
|
+
tokenizer = None
|
|
205
|
+
if AutoTokenizer is not None:
|
|
200
206
|
try:
|
|
201
|
-
tokenizer = AutoTokenizer.from_pretrained(
|
|
207
|
+
tokenizer = AutoTokenizer.from_pretrained(
|
|
208
|
+
model_id, local_files_only=True
|
|
209
|
+
)
|
|
202
210
|
except Exception:
|
|
203
|
-
|
|
211
|
+
try:
|
|
212
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
213
|
+
except Exception:
|
|
214
|
+
tokenizer = None
|
|
215
|
+
if tokenizer is None:
|
|
216
|
+
if GPT2Tokenizer is None:
|
|
217
|
+
raise RuntimeError(
|
|
218
|
+
"Tokenization requires the 'transformers' extra. "
|
|
219
|
+
"Install it with: pip install 'invarlock[adapters]'."
|
|
220
|
+
)
|
|
221
|
+
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
|
204
222
|
# Ensure padding/bos tokens are configured so downstream encoding
|
|
205
223
|
# yields stable non-zero ids and a valid attention mask regardless of
|
|
206
224
|
# environment defaults or tokenizer variants.
|
|
@@ -234,15 +252,24 @@ def _make_unknown_tokenizer(model_id: str):
|
|
|
234
252
|
"Install it with: pip install 'invarlock[adapters]'."
|
|
235
253
|
)
|
|
236
254
|
# Unknown families: try local-only first, then remote, then degrade to GPT-2
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
model_id, local_files_only=True
|
|
240
|
-
)
|
|
241
|
-
except Exception:
|
|
255
|
+
tokenizer = None
|
|
256
|
+
if AutoTokenizer is not None:
|
|
242
257
|
try:
|
|
243
|
-
tokenizer = AutoTokenizer.from_pretrained(
|
|
258
|
+
tokenizer = AutoTokenizer.from_pretrained(
|
|
259
|
+
model_id, local_files_only=True
|
|
260
|
+
)
|
|
244
261
|
except Exception:
|
|
245
|
-
|
|
262
|
+
try:
|
|
263
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
264
|
+
except Exception:
|
|
265
|
+
tokenizer = None
|
|
266
|
+
if tokenizer is None:
|
|
267
|
+
if GPT2Tokenizer is None:
|
|
268
|
+
raise RuntimeError(
|
|
269
|
+
"Text tokenization requires the 'transformers' extra. "
|
|
270
|
+
"Install it with: pip install 'invarlock[adapters]'."
|
|
271
|
+
)
|
|
272
|
+
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
|
246
273
|
if getattr(tokenizer, "pad_token", None) is None:
|
|
247
274
|
eos_token = getattr(tokenizer, "eos_token", None)
|
|
248
275
|
if eos_token is not None:
|