invarlock 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +2 -2
- invarlock/_data/runtime/tiers.yaml +57 -30
- invarlock/adapters/__init__.py +11 -15
- invarlock/adapters/auto.py +35 -40
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_mixin.py +25 -4
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/calibration/spectral_null.py +15 -10
- invarlock/calibration/variance_ve.py +0 -2
- invarlock/cli/adapter_auto.py +31 -21
- invarlock/cli/app.py +73 -2
- invarlock/cli/commands/calibrate.py +6 -2
- invarlock/cli/commands/certify.py +651 -91
- invarlock/cli/commands/doctor.py +11 -11
- invarlock/cli/commands/explain_gates.py +57 -8
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +233 -69
- invarlock/cli/commands/run.py +1066 -244
- invarlock/cli/commands/verify.py +154 -15
- invarlock/cli/config.py +22 -6
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +1 -1
- invarlock/core/api.py +45 -5
- invarlock/core/auto_tuning.py +65 -20
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/contracts.py +7 -1
- invarlock/core/registry.py +11 -13
- invarlock/core/runner.py +425 -75
- invarlock/edits/quant_rtn.py +65 -37
- invarlock/eval/bench.py +3 -16
- invarlock/eval/data.py +82 -51
- invarlock/eval/metrics.py +63 -2
- invarlock/eval/primary_metric.py +23 -0
- invarlock/eval/tail_stats.py +230 -0
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/_estimators.py +154 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/policies.py +16 -6
- invarlock/guards/rmt.py +627 -546
- invarlock/guards/spectral.py +348 -110
- invarlock/guards/tier_config.py +32 -30
- invarlock/guards/variance.py +7 -31
- invarlock/guards_ref/rmt_ref.py +23 -23
- invarlock/model_profile.py +90 -42
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/reporting/certificate.py +384 -55
- invarlock/reporting/certificate_schema.py +3 -2
- invarlock/reporting/dataset_hashing.py +15 -2
- invarlock/reporting/guards_analysis.py +350 -277
- invarlock/reporting/html.py +55 -5
- invarlock/reporting/normalizer.py +13 -0
- invarlock/reporting/policy_utils.py +38 -36
- invarlock/reporting/primary_metric_utils.py +71 -17
- invarlock/reporting/render.py +852 -431
- invarlock/reporting/report.py +40 -4
- invarlock/reporting/report_types.py +11 -3
- invarlock/reporting/telemetry.py +86 -0
- invarlock/reporting/validate.py +1 -18
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/METADATA +27 -13
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/RECORD +72 -65
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
invarlock/guards/tier_config.py
CHANGED
|
@@ -31,7 +31,7 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
|
|
|
31
31
|
"deadband": 0.02,
|
|
32
32
|
"min_abs_adjust": 0.012,
|
|
33
33
|
"max_scale_step": 0.03,
|
|
34
|
-
"min_effect_lognll": 0.
|
|
34
|
+
"min_effect_lognll": 0.0,
|
|
35
35
|
"predictive_one_sided": True,
|
|
36
36
|
"topk_backstop": 1,
|
|
37
37
|
"max_adjusted_modules": 1,
|
|
@@ -43,10 +43,10 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
|
|
|
43
43
|
"max_caps": 5,
|
|
44
44
|
"max_spectral_norm": None,
|
|
45
45
|
"family_caps": {
|
|
46
|
-
"ffn":
|
|
47
|
-
"attn":
|
|
48
|
-
"embed":
|
|
49
|
-
"other":
|
|
46
|
+
"ffn": 3.849,
|
|
47
|
+
"attn": 3.018,
|
|
48
|
+
"embed": 1.05,
|
|
49
|
+
"other": 0.0,
|
|
50
50
|
},
|
|
51
51
|
"multiple_testing": {
|
|
52
52
|
"method": "bh",
|
|
@@ -57,12 +57,12 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
|
|
|
57
57
|
"rmt_guard": {
|
|
58
58
|
"deadband": 0.10,
|
|
59
59
|
"margin": 1.5,
|
|
60
|
-
"epsilon_default": 0.
|
|
60
|
+
"epsilon_default": 0.01,
|
|
61
61
|
"epsilon_by_family": {
|
|
62
|
-
"ffn": 0.
|
|
63
|
-
"attn": 0.
|
|
64
|
-
"embed": 0.
|
|
65
|
-
"other": 0.
|
|
62
|
+
"ffn": 0.01,
|
|
63
|
+
"attn": 0.01,
|
|
64
|
+
"embed": 0.01,
|
|
65
|
+
"other": 0.01,
|
|
66
66
|
},
|
|
67
67
|
},
|
|
68
68
|
},
|
|
@@ -71,7 +71,7 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
|
|
|
71
71
|
"deadband": 0.03,
|
|
72
72
|
"min_abs_adjust": 0.02,
|
|
73
73
|
"max_scale_step": 0.015,
|
|
74
|
-
"min_effect_lognll": 0.
|
|
74
|
+
"min_effect_lognll": 0.016,
|
|
75
75
|
"predictive_one_sided": False,
|
|
76
76
|
"topk_backstop": 0,
|
|
77
77
|
"max_adjusted_modules": 0,
|
|
@@ -81,61 +81,63 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
|
|
|
81
81
|
"deadband": 0.05,
|
|
82
82
|
"scope": "ffn",
|
|
83
83
|
"max_caps": 3,
|
|
84
|
+
"max_spectral_norm": None,
|
|
84
85
|
"family_caps": {
|
|
85
|
-
"ffn":
|
|
86
|
+
"ffn": 3.849,
|
|
86
87
|
"attn": 2.6,
|
|
87
88
|
"embed": 2.8,
|
|
88
89
|
"other": 2.8,
|
|
89
90
|
},
|
|
90
91
|
"multiple_testing": {
|
|
91
92
|
"method": "bonferroni",
|
|
92
|
-
"alpha": 0.
|
|
93
|
+
"alpha": 0.000625,
|
|
93
94
|
"m": 4,
|
|
94
95
|
},
|
|
95
96
|
},
|
|
96
97
|
"rmt_guard": {
|
|
97
98
|
"deadband": 0.05,
|
|
98
99
|
"margin": 1.3,
|
|
99
|
-
"epsilon_default": 0.
|
|
100
|
+
"epsilon_default": 0.01,
|
|
100
101
|
"epsilon_by_family": {
|
|
101
|
-
"ffn": 0.
|
|
102
|
-
"attn": 0.
|
|
103
|
-
"embed": 0.
|
|
104
|
-
"other": 0.
|
|
102
|
+
"ffn": 0.01,
|
|
103
|
+
"attn": 0.01,
|
|
104
|
+
"embed": 0.01,
|
|
105
|
+
"other": 0.01,
|
|
105
106
|
},
|
|
106
107
|
},
|
|
107
108
|
},
|
|
108
109
|
"aggressive": {
|
|
109
110
|
"variance_guard": {
|
|
110
111
|
"deadband": 0.12,
|
|
111
|
-
"min_effect_lognll": 0.
|
|
112
|
+
"min_effect_lognll": 0.033,
|
|
112
113
|
},
|
|
113
114
|
"spectral_guard": {
|
|
114
115
|
"sigma_quantile": 0.98,
|
|
115
116
|
"deadband": 0.15,
|
|
116
|
-
"scope": "
|
|
117
|
+
"scope": "ffn",
|
|
117
118
|
"max_caps": 8,
|
|
119
|
+
"max_spectral_norm": None,
|
|
118
120
|
"family_caps": {
|
|
119
|
-
"ffn": 3.
|
|
120
|
-
"attn": 3.
|
|
121
|
-
"embed":
|
|
121
|
+
"ffn": 3.849,
|
|
122
|
+
"attn": 3.5,
|
|
123
|
+
"embed": 2.5,
|
|
122
124
|
"other": 3.5,
|
|
123
125
|
},
|
|
124
126
|
"multiple_testing": {
|
|
125
127
|
"method": "bh",
|
|
126
|
-
"alpha": 0.
|
|
128
|
+
"alpha": 0.00078125,
|
|
127
129
|
"m": 4,
|
|
128
130
|
},
|
|
129
131
|
},
|
|
130
132
|
"rmt_guard": {
|
|
131
133
|
"deadband": 0.15,
|
|
132
134
|
"margin": 1.8,
|
|
133
|
-
"epsilon_default": 0.
|
|
135
|
+
"epsilon_default": 0.01,
|
|
134
136
|
"epsilon_by_family": {
|
|
135
|
-
"ffn": 0.
|
|
136
|
-
"attn": 0.
|
|
137
|
-
"embed": 0.
|
|
138
|
-
"other": 0.
|
|
137
|
+
"ffn": 0.01,
|
|
138
|
+
"attn": 0.01,
|
|
139
|
+
"embed": 0.01,
|
|
140
|
+
"other": 0.01,
|
|
139
141
|
},
|
|
140
142
|
},
|
|
141
143
|
},
|
|
@@ -257,7 +259,7 @@ def get_rmt_epsilon(tier: TierName = "balanced") -> dict[str, float]:
|
|
|
257
259
|
def get_variance_min_effect(tier: TierName = "balanced") -> float:
|
|
258
260
|
"""Get VE min_effect_lognll for a tier."""
|
|
259
261
|
config = get_tier_guard_config(tier, "variance_guard")
|
|
260
|
-
return config.get("min_effect_lognll", 0.
|
|
262
|
+
return config.get("min_effect_lognll", 0.0)
|
|
261
263
|
|
|
262
264
|
|
|
263
265
|
def check_drift(
|
invarlock/guards/variance.py
CHANGED
|
@@ -36,7 +36,7 @@ from ._contracts import guard_assert
|
|
|
36
36
|
# Import the policy type and Guard interface
|
|
37
37
|
from .policies import VariancePolicyDict
|
|
38
38
|
|
|
39
|
-
__all__ = ["equalise_residual_variance", "
|
|
39
|
+
__all__ = ["equalise_residual_variance", "VarianceGuard"]
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
def _safe_mean(
|
|
@@ -121,7 +121,7 @@ def _iter_transformer_layers(model: nn.Module):
|
|
|
121
121
|
# GPT-2 style
|
|
122
122
|
yield from model.transformer.h
|
|
123
123
|
elif hasattr(model, "model") and hasattr(model.model, "layers"):
|
|
124
|
-
#
|
|
124
|
+
# RoPE decoder style
|
|
125
125
|
yield from model.model.layers
|
|
126
126
|
elif hasattr(model, "encoder") and hasattr(model.encoder, "layer"):
|
|
127
127
|
# BERT style
|
|
@@ -214,7 +214,7 @@ def equalise_residual_variance(
|
|
|
214
214
|
hooks[name] = attn_proj.register_forward_hook(_branch_hook(name))
|
|
215
215
|
|
|
216
216
|
if hasattr(blk, "mlp"):
|
|
217
|
-
# Check for c_proj (GPT-2) or down_proj (
|
|
217
|
+
# Check for c_proj (GPT-2) or down_proj (RoPE decoder) or fc2 (generic)
|
|
218
218
|
mlp_proj = (
|
|
219
219
|
getattr(blk.mlp, "c_proj", None)
|
|
220
220
|
or getattr(blk.mlp, "down_proj", None)
|
|
@@ -348,33 +348,6 @@ def equalise_residual_variance(
|
|
|
348
348
|
return applied_scales
|
|
349
349
|
|
|
350
350
|
|
|
351
|
-
def equalise_branch_variance(
|
|
352
|
-
model: nn.Module,
|
|
353
|
-
dataloader,
|
|
354
|
-
windows: int = 32,
|
|
355
|
-
tol: float = 0.02,
|
|
356
|
-
scale_bias: bool = True,
|
|
357
|
-
seed: int = 42,
|
|
358
|
-
device: str | None = None,
|
|
359
|
-
allow_empty: bool = False,
|
|
360
|
-
) -> dict[str, float]:
|
|
361
|
-
"""
|
|
362
|
-
Legacy alias for equalise_residual_variance.
|
|
363
|
-
|
|
364
|
-
Maintained for backward compatibility.
|
|
365
|
-
"""
|
|
366
|
-
return equalise_residual_variance(
|
|
367
|
-
model=model,
|
|
368
|
-
dataloader=dataloader,
|
|
369
|
-
windows=windows,
|
|
370
|
-
tol=tol,
|
|
371
|
-
scale_bias=scale_bias,
|
|
372
|
-
seed=seed,
|
|
373
|
-
device=device,
|
|
374
|
-
allow_empty=allow_empty,
|
|
375
|
-
)
|
|
376
|
-
|
|
377
|
-
|
|
378
351
|
def _predictive_gate_outcome(
|
|
379
352
|
mean_delta: float,
|
|
380
353
|
delta_ci: tuple[float, float] | None,
|
|
@@ -1328,7 +1301,10 @@ class VarianceGuard(Guard):
|
|
|
1328
1301
|
if not filtered_scales and topk > 0 and best_candidate:
|
|
1329
1302
|
name, scale = best_candidate
|
|
1330
1303
|
deadband = float(self._policy.get("deadband", 0.0) or 0.0)
|
|
1331
|
-
|
|
1304
|
+
# Backstop should remain below the main min_abs filter; clamp if deadband is large.
|
|
1305
|
+
threshold = max(deadband * 0.5, min_abs * 0.5)
|
|
1306
|
+
if min_abs > 0 and threshold >= min_abs:
|
|
1307
|
+
threshold = min_abs * 0.5
|
|
1332
1308
|
if best_delta >= threshold:
|
|
1333
1309
|
if max_step > 0.0:
|
|
1334
1310
|
limited_delta = min(best_delta, max_step)
|
invarlock/guards_ref/rmt_ref.py
CHANGED
|
@@ -1,37 +1,37 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import math
|
|
4
3
|
from collections.abc import Mapping
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
def rmt_decide(
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
baseline_by_family: Mapping[str, float],
|
|
8
|
+
current_by_family: Mapping[str, float],
|
|
10
9
|
epsilon_by_family: Mapping[str, float],
|
|
11
10
|
) -> dict[str, object]:
|
|
12
11
|
"""
|
|
13
|
-
Reference epsilon-rule decision for RMT.
|
|
12
|
+
Reference epsilon-rule decision for RMT activation edge-risk drift.
|
|
14
13
|
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
For each family with baseline edge-risk > 0:
|
|
15
|
+
PASS iff current_edge <= (1 + epsilon) * baseline_edge
|
|
17
16
|
"""
|
|
18
|
-
families = set(
|
|
19
|
-
delta_by_family: dict[str,
|
|
20
|
-
allowed_by_family: dict[str,
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
delta_by_family[
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
ok = all(
|
|
34
|
-
|
|
17
|
+
families = set(baseline_by_family) | set(current_by_family) | set(epsilon_by_family)
|
|
18
|
+
delta_by_family: dict[str, float] = {}
|
|
19
|
+
allowed_by_family: dict[str, float] = {}
|
|
20
|
+
for family in families:
|
|
21
|
+
baseline = float(baseline_by_family.get(family, 0.0) or 0.0)
|
|
22
|
+
current = float(current_by_family.get(family, 0.0) or 0.0)
|
|
23
|
+
if baseline <= 0.0:
|
|
24
|
+
continue
|
|
25
|
+
epsilon_val = float(epsilon_by_family.get(family, 0.0) or 0.0)
|
|
26
|
+
allowed = (1.0 + epsilon_val) * baseline
|
|
27
|
+
allowed_by_family[family] = allowed
|
|
28
|
+
delta_by_family[family] = (
|
|
29
|
+
(current / baseline) - 1.0 if baseline > 0 else float("inf")
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
ok = all(
|
|
33
|
+
float(current_by_family.get(family, 0.0) or 0.0) <= allowed_by_family[family]
|
|
34
|
+
for family in allowed_by_family
|
|
35
35
|
)
|
|
36
36
|
return {
|
|
37
37
|
"pass": ok,
|
invarlock/model_profile.py
CHANGED
|
@@ -5,12 +5,14 @@ from collections.abc import Callable
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
+
AutoTokenizer: Any | None = None
|
|
9
|
+
GPT2Tokenizer: Any | None = None
|
|
10
|
+
|
|
8
11
|
try:
|
|
9
|
-
from transformers import AutoTokenizer
|
|
12
|
+
from transformers import AutoTokenizer as _AutoTokenizer
|
|
13
|
+
from transformers import GPT2Tokenizer as _GPT2Tokenizer
|
|
10
14
|
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
|
|
11
15
|
except Exception: # pragma: no cover - exercised only when transformers is absent
|
|
12
|
-
AutoTokenizer = None # type: ignore[assignment]
|
|
13
|
-
GPT2Tokenizer = None # type: ignore[assignment]
|
|
14
16
|
|
|
15
17
|
class PreTrainedTokenizerBase: # type: ignore[no-redef]
|
|
16
18
|
"""Lightweight stub used when transformers is not installed."""
|
|
@@ -22,6 +24,11 @@ except Exception: # pragma: no cover - exercised only when transformers is abse
|
|
|
22
24
|
)
|
|
23
25
|
|
|
24
26
|
|
|
27
|
+
else: # pragma: no cover - transformers optional
|
|
28
|
+
AutoTokenizer = _AutoTokenizer
|
|
29
|
+
GPT2Tokenizer = _GPT2Tokenizer
|
|
30
|
+
|
|
31
|
+
|
|
25
32
|
TokenizerFactory = Callable[[], tuple[PreTrainedTokenizerBase, str]]
|
|
26
33
|
|
|
27
34
|
|
|
@@ -99,7 +106,7 @@ def _gpt2_selectors() -> dict[str, list[str]]:
|
|
|
99
106
|
}
|
|
100
107
|
|
|
101
108
|
|
|
102
|
-
def
|
|
109
|
+
def _rope_decoder_selectors() -> dict[str, list[str]]:
|
|
103
110
|
return {
|
|
104
111
|
"attention": [
|
|
105
112
|
"self_attn.q_proj",
|
|
@@ -184,23 +191,34 @@ def _make_gpt2_tokenizer(model_id: str):
|
|
|
184
191
|
return factory
|
|
185
192
|
|
|
186
193
|
|
|
187
|
-
def
|
|
194
|
+
def _make_causal_auto_tokenizer(model_id: str):
|
|
188
195
|
def factory() -> tuple[PreTrainedTokenizerBase, str]:
|
|
189
196
|
if AutoTokenizer is None and GPT2Tokenizer is None:
|
|
190
197
|
raise RuntimeError(
|
|
191
|
-
"
|
|
198
|
+
"Causal tokenizers require the 'transformers' extra. "
|
|
192
199
|
"Install it with: pip install 'invarlock[adapters]'."
|
|
193
200
|
)
|
|
194
201
|
# Try offline-first to respect InvarLock network guard; fall back to a
|
|
195
202
|
# local GPT-2 tokenizer if the model assets are not cached or network
|
|
196
203
|
# access is denied.
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
except Exception:
|
|
204
|
+
tokenizer = None
|
|
205
|
+
if AutoTokenizer is not None:
|
|
200
206
|
try:
|
|
201
|
-
tokenizer = AutoTokenizer.from_pretrained(
|
|
207
|
+
tokenizer = AutoTokenizer.from_pretrained(
|
|
208
|
+
model_id, local_files_only=True
|
|
209
|
+
)
|
|
202
210
|
except Exception:
|
|
203
|
-
|
|
211
|
+
try:
|
|
212
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
213
|
+
except Exception:
|
|
214
|
+
tokenizer = None
|
|
215
|
+
if tokenizer is None:
|
|
216
|
+
if GPT2Tokenizer is None:
|
|
217
|
+
raise RuntimeError(
|
|
218
|
+
"Tokenization requires the 'transformers' extra. "
|
|
219
|
+
"Install it with: pip install 'invarlock[adapters]'."
|
|
220
|
+
)
|
|
221
|
+
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
|
204
222
|
# Ensure padding/bos tokens are configured so downstream encoding
|
|
205
223
|
# yields stable non-zero ids and a valid attention mask regardless of
|
|
206
224
|
# environment defaults or tokenizer variants.
|
|
@@ -209,7 +227,7 @@ def _make_llama_tokenizer(model_id: str):
|
|
|
209
227
|
eos_token = getattr(tokenizer, "eos_token", None)
|
|
210
228
|
if eos_token is not None:
|
|
211
229
|
tokenizer.pad_token = eos_token
|
|
212
|
-
# Some
|
|
230
|
+
# Some causal tokenizers default to not adding a BOS token on encode;
|
|
213
231
|
# enable it to guarantee at least one non-pad, non-zero token id.
|
|
214
232
|
if hasattr(tokenizer, "add_bos_token"):
|
|
215
233
|
try:
|
|
@@ -234,15 +252,24 @@ def _make_unknown_tokenizer(model_id: str):
|
|
|
234
252
|
"Install it with: pip install 'invarlock[adapters]'."
|
|
235
253
|
)
|
|
236
254
|
# Unknown families: try local-only first, then remote, then degrade to GPT-2
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
model_id, local_files_only=True
|
|
240
|
-
)
|
|
241
|
-
except Exception:
|
|
255
|
+
tokenizer = None
|
|
256
|
+
if AutoTokenizer is not None:
|
|
242
257
|
try:
|
|
243
|
-
tokenizer = AutoTokenizer.from_pretrained(
|
|
258
|
+
tokenizer = AutoTokenizer.from_pretrained(
|
|
259
|
+
model_id, local_files_only=True
|
|
260
|
+
)
|
|
244
261
|
except Exception:
|
|
245
|
-
|
|
262
|
+
try:
|
|
263
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
264
|
+
except Exception:
|
|
265
|
+
tokenizer = None
|
|
266
|
+
if tokenizer is None:
|
|
267
|
+
if GPT2Tokenizer is None:
|
|
268
|
+
raise RuntimeError(
|
|
269
|
+
"Text tokenization requires the 'transformers' extra. "
|
|
270
|
+
"Install it with: pip install 'invarlock[adapters]'."
|
|
271
|
+
)
|
|
272
|
+
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
|
246
273
|
if getattr(tokenizer, "pad_token", None) is None:
|
|
247
274
|
eos_token = getattr(tokenizer, "eos_token", None)
|
|
248
275
|
if eos_token is not None:
|
|
@@ -262,7 +289,7 @@ def detect_model_profile(model_id: str, adapter: str | None = None) -> ModelProf
|
|
|
262
289
|
model_lower = (model_id or "").lower()
|
|
263
290
|
|
|
264
291
|
if any(
|
|
265
|
-
keyword in adapter_lower for keyword in ("bert", "roberta", "deberta")
|
|
292
|
+
keyword in adapter_lower for keyword in ("hf_mlm", "bert", "roberta", "deberta")
|
|
266
293
|
) or any(keyword in model_lower for keyword in ("bert", "roberta", "deberta")):
|
|
267
294
|
return ModelProfile(
|
|
268
295
|
family="bert",
|
|
@@ -275,57 +302,78 @@ def detect_model_profile(model_id: str, adapter: str | None = None) -> ModelProf
|
|
|
275
302
|
cert_lints=(
|
|
276
303
|
{
|
|
277
304
|
"type": "equals",
|
|
278
|
-
"path": "
|
|
279
|
-
"value": "
|
|
280
|
-
"message": "BERT cert must
|
|
305
|
+
"path": "primary_metric.kind",
|
|
306
|
+
"value": "ppl_mlm",
|
|
307
|
+
"message": "BERT cert must use MLM metric.",
|
|
281
308
|
},
|
|
282
309
|
{
|
|
283
310
|
"type": "gte",
|
|
284
|
-
"path": "
|
|
311
|
+
"path": "telemetry.masked_tokens_total",
|
|
285
312
|
"value": "1",
|
|
286
313
|
"message": "BERT cert must report masked tokens.",
|
|
287
314
|
},
|
|
288
315
|
),
|
|
289
316
|
)
|
|
290
317
|
|
|
291
|
-
if any(keyword in adapter_lower for keyword in ("
|
|
292
|
-
keyword in model_lower for keyword in ("
|
|
318
|
+
if any(keyword in adapter_lower for keyword in ("hf_seq2seq", "t5", "bart")) or any(
|
|
319
|
+
keyword in model_lower for keyword in ("t5", "bart")
|
|
293
320
|
):
|
|
294
321
|
return ModelProfile(
|
|
295
|
-
family="
|
|
322
|
+
family="seq2seq",
|
|
323
|
+
default_loss="seq2seq",
|
|
324
|
+
make_tokenizer=_make_unknown_tokenizer(model_id),
|
|
325
|
+
default_metric="ppl_seq2seq",
|
|
326
|
+
default_provider="wikitext2",
|
|
327
|
+
module_selectors=_unknown_selectors(),
|
|
328
|
+
invariants=(),
|
|
329
|
+
cert_lints=(),
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
if any(
|
|
333
|
+
keyword in adapter_lower for keyword in ("gpt", "neox", "opt", "phi")
|
|
334
|
+
) or any(keyword in model_lower for keyword in ("gpt", "neox", "opt", "phi")):
|
|
335
|
+
return ModelProfile(
|
|
336
|
+
family="gpt2",
|
|
296
337
|
default_loss="causal",
|
|
297
|
-
make_tokenizer=
|
|
338
|
+
make_tokenizer=_make_gpt2_tokenizer(model_id),
|
|
298
339
|
default_metric="ppl_causal",
|
|
299
340
|
default_provider="wikitext2",
|
|
300
|
-
module_selectors=
|
|
301
|
-
invariants=("
|
|
341
|
+
module_selectors=_gpt2_selectors(),
|
|
342
|
+
invariants=("causal_masking",),
|
|
302
343
|
cert_lints=(
|
|
303
344
|
{
|
|
304
345
|
"type": "equals",
|
|
305
|
-
"path": "
|
|
306
|
-
"value": "
|
|
307
|
-
"message": "
|
|
346
|
+
"path": "primary_metric.kind",
|
|
347
|
+
"value": "ppl_causal",
|
|
348
|
+
"message": "GPT-style cert must use causal ppl metric.",
|
|
308
349
|
},
|
|
309
350
|
),
|
|
310
351
|
)
|
|
311
352
|
|
|
312
353
|
if any(
|
|
313
|
-
keyword in adapter_lower for keyword in ("
|
|
314
|
-
) or any(
|
|
354
|
+
keyword in adapter_lower for keyword in ("mistral", "mixtral", "qwen", "yi")
|
|
355
|
+
) or any(
|
|
356
|
+
keyword in model_lower for keyword in ("mistral", "mixtral", "qwen", "yi")
|
|
357
|
+
):
|
|
358
|
+
family = "causal"
|
|
359
|
+
for keyword in ("mixtral", "mistral", "qwen", "yi"):
|
|
360
|
+
if keyword in adapter_lower or keyword in model_lower:
|
|
361
|
+
family = keyword
|
|
362
|
+
break
|
|
315
363
|
return ModelProfile(
|
|
316
|
-
family=
|
|
364
|
+
family=family,
|
|
317
365
|
default_loss="causal",
|
|
318
|
-
make_tokenizer=
|
|
366
|
+
make_tokenizer=_make_causal_auto_tokenizer(model_id),
|
|
319
367
|
default_metric="ppl_causal",
|
|
320
368
|
default_provider="wikitext2",
|
|
321
|
-
module_selectors=
|
|
322
|
-
invariants=("
|
|
369
|
+
module_selectors=_rope_decoder_selectors(),
|
|
370
|
+
invariants=("rope_rotary_embedding",),
|
|
323
371
|
cert_lints=(
|
|
324
372
|
{
|
|
325
373
|
"type": "equals",
|
|
326
|
-
"path": "
|
|
327
|
-
"value": "
|
|
328
|
-
"message": "
|
|
374
|
+
"path": "primary_metric.kind",
|
|
375
|
+
"value": "ppl_causal",
|
|
376
|
+
"message": "Causal cert must use causal ppl metric.",
|
|
329
377
|
},
|
|
330
378
|
),
|
|
331
379
|
)
|
|
@@ -374,15 +374,15 @@ class InvarLockHealthChecker(HealthChecker):
|
|
|
374
374
|
"""Check adapter availability."""
|
|
375
375
|
try:
|
|
376
376
|
from invarlock.adapters import (
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
377
|
+
HF_Causal_Adapter,
|
|
378
|
+
HF_MLM_Adapter,
|
|
379
|
+
HF_Seq2Seq_Adapter,
|
|
380
380
|
)
|
|
381
381
|
|
|
382
382
|
adapters = {
|
|
383
|
-
"
|
|
384
|
-
"
|
|
385
|
-
"
|
|
383
|
+
"hf_causal": HF_Causal_Adapter,
|
|
384
|
+
"hf_mlm": HF_MLM_Adapter,
|
|
385
|
+
"hf_seq2seq": HF_Seq2Seq_Adapter,
|
|
386
386
|
}
|
|
387
387
|
|
|
388
388
|
available_adapters = []
|
|
@@ -455,3 +455,111 @@ def create_resource_metrics(registry: MetricsRegistry) -> dict[str, Any]:
|
|
|
455
455
|
"gpu_memory": registry.register_gauge("invarlock.resource.gpu_memory_percent"),
|
|
456
456
|
"disk_usage": registry.register_gauge("invarlock.resource.disk_percent"),
|
|
457
457
|
}
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def reset_peak_memory_stats() -> None:
|
|
461
|
+
"""Reset GPU peak memory stats when available."""
|
|
462
|
+
try:
|
|
463
|
+
import torch
|
|
464
|
+
|
|
465
|
+
if torch.cuda.is_available():
|
|
466
|
+
torch.cuda.reset_peak_memory_stats()
|
|
467
|
+
mps = getattr(torch, "mps", None)
|
|
468
|
+
if mps is not None and hasattr(mps, "reset_peak_memory_stats"):
|
|
469
|
+
mps.reset_peak_memory_stats()
|
|
470
|
+
except Exception:
|
|
471
|
+
pass
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def capture_memory_snapshot(
|
|
475
|
+
phase: str, *, timestamp: float | None = None
|
|
476
|
+
) -> dict[str, Any]:
|
|
477
|
+
"""Capture a point-in-time memory snapshot for the current process."""
|
|
478
|
+
snapshot: dict[str, Any] = {"phase": str(phase)}
|
|
479
|
+
if timestamp is None:
|
|
480
|
+
timestamp = time.time()
|
|
481
|
+
snapshot["ts"] = float(timestamp)
|
|
482
|
+
|
|
483
|
+
try:
|
|
484
|
+
import os
|
|
485
|
+
|
|
486
|
+
import psutil
|
|
487
|
+
|
|
488
|
+
process = psutil.Process(os.getpid())
|
|
489
|
+
rss_mb = process.memory_info().rss / 1024 / 1024
|
|
490
|
+
snapshot["rss_mb"] = float(rss_mb)
|
|
491
|
+
except Exception:
|
|
492
|
+
pass
|
|
493
|
+
|
|
494
|
+
try:
|
|
495
|
+
import torch
|
|
496
|
+
|
|
497
|
+
if torch.cuda.is_available():
|
|
498
|
+
device_index = torch.cuda.current_device()
|
|
499
|
+
snapshot["gpu_device"] = f"cuda:{device_index}"
|
|
500
|
+
snapshot["gpu_mb"] = float(
|
|
501
|
+
torch.cuda.memory_allocated(device_index) / 1024 / 1024
|
|
502
|
+
)
|
|
503
|
+
snapshot["gpu_reserved_mb"] = float(
|
|
504
|
+
torch.cuda.memory_reserved(device_index) / 1024 / 1024
|
|
505
|
+
)
|
|
506
|
+
snapshot["gpu_peak_mb"] = float(
|
|
507
|
+
torch.cuda.max_memory_allocated(device_index) / 1024 / 1024
|
|
508
|
+
)
|
|
509
|
+
snapshot["gpu_peak_reserved_mb"] = float(
|
|
510
|
+
torch.cuda.max_memory_reserved(device_index) / 1024 / 1024
|
|
511
|
+
)
|
|
512
|
+
else:
|
|
513
|
+
mps = getattr(torch, "mps", None)
|
|
514
|
+
if mps is not None and hasattr(torch.backends, "mps"):
|
|
515
|
+
if torch.backends.mps.is_available():
|
|
516
|
+
snapshot["gpu_device"] = "mps"
|
|
517
|
+
if hasattr(mps, "current_allocated_memory"):
|
|
518
|
+
snapshot["gpu_mb"] = float(
|
|
519
|
+
mps.current_allocated_memory() / 1024 / 1024
|
|
520
|
+
)
|
|
521
|
+
if hasattr(mps, "driver_allocated_memory"):
|
|
522
|
+
snapshot["gpu_reserved_mb"] = float(
|
|
523
|
+
mps.driver_allocated_memory() / 1024 / 1024
|
|
524
|
+
)
|
|
525
|
+
except Exception:
|
|
526
|
+
pass
|
|
527
|
+
|
|
528
|
+
if len(snapshot) <= 2:
|
|
529
|
+
return {}
|
|
530
|
+
return snapshot
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def summarize_memory_snapshots(
|
|
534
|
+
snapshots: list[dict[str, Any]],
|
|
535
|
+
) -> dict[str, float]:
|
|
536
|
+
"""Summarize memory snapshots into peak metrics."""
|
|
537
|
+
|
|
538
|
+
def _peak(key: str) -> float | None:
|
|
539
|
+
values: list[float] = []
|
|
540
|
+
for entry in snapshots:
|
|
541
|
+
if not isinstance(entry, dict):
|
|
542
|
+
continue
|
|
543
|
+
value = entry.get(key)
|
|
544
|
+
if isinstance(value, int | float):
|
|
545
|
+
values.append(float(value))
|
|
546
|
+
return max(values) if values else None
|
|
547
|
+
|
|
548
|
+
summary: dict[str, float] = {}
|
|
549
|
+
rss_peak = _peak("rss_mb")
|
|
550
|
+
if rss_peak is not None:
|
|
551
|
+
summary["memory_mb_peak"] = rss_peak
|
|
552
|
+
|
|
553
|
+
gpu_peak = _peak("gpu_peak_mb")
|
|
554
|
+
if gpu_peak is None:
|
|
555
|
+
gpu_peak = _peak("gpu_mb")
|
|
556
|
+
if gpu_peak is not None:
|
|
557
|
+
summary["gpu_memory_mb_peak"] = gpu_peak
|
|
558
|
+
|
|
559
|
+
gpu_reserved_peak = _peak("gpu_peak_reserved_mb")
|
|
560
|
+
if gpu_reserved_peak is None:
|
|
561
|
+
gpu_reserved_peak = _peak("gpu_reserved_mb")
|
|
562
|
+
if gpu_reserved_peak is not None:
|
|
563
|
+
summary["gpu_memory_reserved_mb_peak"] = gpu_reserved_peak
|
|
564
|
+
|
|
565
|
+
return summary
|