invarlock 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +2 -2
- invarlock/_data/runtime/tiers.yaml +57 -30
- invarlock/adapters/__init__.py +11 -15
- invarlock/adapters/auto.py +35 -40
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_mixin.py +25 -4
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/calibration/spectral_null.py +15 -10
- invarlock/calibration/variance_ve.py +0 -2
- invarlock/cli/adapter_auto.py +31 -21
- invarlock/cli/app.py +73 -2
- invarlock/cli/commands/calibrate.py +6 -2
- invarlock/cli/commands/certify.py +651 -91
- invarlock/cli/commands/doctor.py +11 -11
- invarlock/cli/commands/explain_gates.py +57 -8
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +233 -69
- invarlock/cli/commands/run.py +1066 -244
- invarlock/cli/commands/verify.py +154 -15
- invarlock/cli/config.py +22 -6
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +1 -1
- invarlock/core/api.py +45 -5
- invarlock/core/auto_tuning.py +65 -20
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/contracts.py +7 -1
- invarlock/core/registry.py +11 -13
- invarlock/core/runner.py +425 -75
- invarlock/edits/quant_rtn.py +65 -37
- invarlock/eval/bench.py +3 -16
- invarlock/eval/data.py +82 -51
- invarlock/eval/metrics.py +63 -2
- invarlock/eval/primary_metric.py +23 -0
- invarlock/eval/tail_stats.py +230 -0
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/_estimators.py +154 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/policies.py +16 -6
- invarlock/guards/rmt.py +627 -546
- invarlock/guards/spectral.py +348 -110
- invarlock/guards/tier_config.py +32 -30
- invarlock/guards/variance.py +7 -31
- invarlock/guards_ref/rmt_ref.py +23 -23
- invarlock/model_profile.py +90 -42
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/reporting/certificate.py +384 -55
- invarlock/reporting/certificate_schema.py +3 -2
- invarlock/reporting/dataset_hashing.py +15 -2
- invarlock/reporting/guards_analysis.py +350 -277
- invarlock/reporting/html.py +55 -5
- invarlock/reporting/normalizer.py +13 -0
- invarlock/reporting/policy_utils.py +38 -36
- invarlock/reporting/primary_metric_utils.py +71 -17
- invarlock/reporting/render.py +852 -431
- invarlock/reporting/report.py +40 -4
- invarlock/reporting/report_types.py +11 -3
- invarlock/reporting/telemetry.py +86 -0
- invarlock/reporting/validate.py +1 -18
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/METADATA +27 -13
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/RECORD +72 -65
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HuggingFace causal LM adapter (decoder-only).
|
|
3
|
+
=============================================
|
|
4
|
+
|
|
5
|
+
Role-based adapter for HuggingFace decoder-only causal language models.
|
|
6
|
+
|
|
7
|
+
This adapter intentionally avoids model-family naming. It selects a structural
|
|
8
|
+
spec at runtime (dense FFN vs MoE vs GPT-2-like blocks) and exposes a stable
|
|
9
|
+
`describe()` contract for InvarLock gates and reporting.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
from types import SimpleNamespace
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
import torch
|
|
19
|
+
import torch.nn as nn
|
|
20
|
+
|
|
21
|
+
from invarlock.core.api import ModelAdapter
|
|
22
|
+
from invarlock.core.error_utils import wrap_errors
|
|
23
|
+
from invarlock.core.exceptions import AdapterError, DependencyError, ModelLoadError
|
|
24
|
+
|
|
25
|
+
from .hf_mixin import HFAdapterMixin
|
|
26
|
+
|
|
27
|
+
TensorType = torch.Tensor
|
|
28
|
+
ModuleType = nn.Module
|
|
29
|
+
|
|
30
|
+
LIGHT_IMPORT = os.getenv("INVARLOCK_LIGHT_IMPORT", "").strip().lower() in {
|
|
31
|
+
"1",
|
|
32
|
+
"true",
|
|
33
|
+
"yes",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _first_item(seq: Any) -> Any | None:
|
|
38
|
+
try:
|
|
39
|
+
if hasattr(seq, "__len__") and len(seq) > 0: # type: ignore[arg-type]
|
|
40
|
+
return seq[0] # type: ignore[index]
|
|
41
|
+
except Exception:
|
|
42
|
+
pass
|
|
43
|
+
try:
|
|
44
|
+
return next(iter(seq))
|
|
45
|
+
except Exception:
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _has_set_attr(obj: Any, name: str) -> bool:
|
|
50
|
+
d = getattr(obj, "__dict__", None)
|
|
51
|
+
if isinstance(d, dict) and name in d:
|
|
52
|
+
return True
|
|
53
|
+
if isinstance(obj, nn.Module):
|
|
54
|
+
if hasattr(obj, "_modules") and name in obj._modules:
|
|
55
|
+
return True
|
|
56
|
+
if hasattr(obj, "_parameters") and name in obj._parameters:
|
|
57
|
+
return True
|
|
58
|
+
if hasattr(obj, "_buffers") and name in obj._buffers:
|
|
59
|
+
return True
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class _CausalSpec:
|
|
64
|
+
spec_name = "base"
|
|
65
|
+
|
|
66
|
+
def matches(self, model: Any, base: Any, layers: Any) -> bool:
|
|
67
|
+
raise NotImplementedError
|
|
68
|
+
|
|
69
|
+
def infer_mlp_dim(self, layer: Any, config: Any, hidden_size: int) -> int:
|
|
70
|
+
raise NotImplementedError
|
|
71
|
+
|
|
72
|
+
def layer_modules(self, model: Any, layer: Any) -> dict[str, Any]:
|
|
73
|
+
raise NotImplementedError
|
|
74
|
+
|
|
75
|
+
def tying_map(self, model: Any, base: Any) -> dict[str, str]:
|
|
76
|
+
return {}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class _DenseDecoderSpec(_CausalSpec):
|
|
80
|
+
spec_name = "dense_decoder"
|
|
81
|
+
|
|
82
|
+
def matches(self, model: Any, base: Any, layers: Any) -> bool:
|
|
83
|
+
layer = _first_item(layers)
|
|
84
|
+
if layer is None:
|
|
85
|
+
return False
|
|
86
|
+
has_attn = (
|
|
87
|
+
hasattr(layer, "self_attn")
|
|
88
|
+
and _has_set_attr(layer.self_attn, "q_proj")
|
|
89
|
+
and _has_set_attr(layer.self_attn, "k_proj")
|
|
90
|
+
and _has_set_attr(layer.self_attn, "v_proj")
|
|
91
|
+
and _has_set_attr(layer.self_attn, "o_proj")
|
|
92
|
+
)
|
|
93
|
+
has_mlp = (
|
|
94
|
+
hasattr(layer, "mlp")
|
|
95
|
+
and _has_set_attr(layer.mlp, "gate_proj")
|
|
96
|
+
and _has_set_attr(layer.mlp, "up_proj")
|
|
97
|
+
and _has_set_attr(layer.mlp, "down_proj")
|
|
98
|
+
)
|
|
99
|
+
has_norms = _has_set_attr(layer, "input_layernorm") and _has_set_attr(
|
|
100
|
+
layer, "post_attention_layernorm"
|
|
101
|
+
)
|
|
102
|
+
return bool(has_attn and has_mlp and has_norms)
|
|
103
|
+
|
|
104
|
+
def infer_mlp_dim(self, layer: Any, config: Any, hidden_size: int) -> int:
|
|
105
|
+
mlp_dim = int(getattr(config, "intermediate_size", hidden_size * 4) or 0)
|
|
106
|
+
try:
|
|
107
|
+
gate_proj = getattr(getattr(layer, "mlp", None), "gate_proj", None)
|
|
108
|
+
if gate_proj is not None and hasattr(gate_proj, "weight"):
|
|
109
|
+
mlp_dim = int(gate_proj.weight.shape[0])
|
|
110
|
+
except Exception:
|
|
111
|
+
pass
|
|
112
|
+
return int(mlp_dim)
|
|
113
|
+
|
|
114
|
+
def layer_modules(self, model: Any, layer: Any) -> dict[str, Any]:
|
|
115
|
+
mlp = layer.mlp
|
|
116
|
+
return {
|
|
117
|
+
"self_attn.q_proj": layer.self_attn.q_proj,
|
|
118
|
+
"self_attn.k_proj": layer.self_attn.k_proj,
|
|
119
|
+
"self_attn.v_proj": layer.self_attn.v_proj,
|
|
120
|
+
"self_attn.o_proj": layer.self_attn.o_proj,
|
|
121
|
+
"input_layernorm": layer.input_layernorm,
|
|
122
|
+
"post_attention_layernorm": layer.post_attention_layernorm,
|
|
123
|
+
"mlp.gate_proj": mlp.gate_proj,
|
|
124
|
+
"mlp.up_proj": mlp.up_proj,
|
|
125
|
+
"mlp.down_proj": mlp.down_proj,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
def tying_map(self, model: Any, base: Any) -> dict[str, str]:
|
|
129
|
+
tying: dict[str, str] = {}
|
|
130
|
+
try:
|
|
131
|
+
if hasattr(model, "lm_head") and hasattr(base, "embed_tokens"):
|
|
132
|
+
if model.lm_head.weight is base.embed_tokens.weight:
|
|
133
|
+
tying["lm_head.weight"] = "model.embed_tokens.weight"
|
|
134
|
+
except Exception:
|
|
135
|
+
pass
|
|
136
|
+
return tying
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class _MoEDecoderSpec(_CausalSpec):
|
|
140
|
+
spec_name = "moe_decoder"
|
|
141
|
+
|
|
142
|
+
def matches(self, model: Any, base: Any, layers: Any) -> bool:
|
|
143
|
+
layer = _first_item(layers)
|
|
144
|
+
if layer is None:
|
|
145
|
+
return False
|
|
146
|
+
has_attn = (
|
|
147
|
+
hasattr(layer, "self_attn")
|
|
148
|
+
and _has_set_attr(layer.self_attn, "q_proj")
|
|
149
|
+
and _has_set_attr(layer.self_attn, "k_proj")
|
|
150
|
+
and _has_set_attr(layer.self_attn, "v_proj")
|
|
151
|
+
and _has_set_attr(layer.self_attn, "o_proj")
|
|
152
|
+
)
|
|
153
|
+
moe = getattr(layer, "block_sparse_moe", None)
|
|
154
|
+
experts = getattr(moe, "experts", None) if moe is not None else None
|
|
155
|
+
expert0 = _first_item(experts) if experts is not None else None
|
|
156
|
+
has_moe = bool(
|
|
157
|
+
expert0 is not None
|
|
158
|
+
and _has_set_attr(expert0, "w1")
|
|
159
|
+
and _has_set_attr(expert0, "w2")
|
|
160
|
+
)
|
|
161
|
+
has_norms = _has_set_attr(layer, "input_layernorm") and _has_set_attr(
|
|
162
|
+
layer, "post_attention_layernorm"
|
|
163
|
+
)
|
|
164
|
+
return bool(has_attn and has_moe and has_norms)
|
|
165
|
+
|
|
166
|
+
def infer_mlp_dim(self, layer: Any, config: Any, hidden_size: int) -> int:
|
|
167
|
+
mlp_dim = int(getattr(config, "intermediate_size", hidden_size * 4) or 0)
|
|
168
|
+
try:
|
|
169
|
+
moe = getattr(layer, "block_sparse_moe", None)
|
|
170
|
+
experts = getattr(moe, "experts", None) if moe is not None else None
|
|
171
|
+
expert0 = _first_item(experts) if experts is not None else None
|
|
172
|
+
if expert0 is not None:
|
|
173
|
+
w1 = getattr(expert0, "w1", None)
|
|
174
|
+
if w1 is not None and hasattr(w1, "weight"):
|
|
175
|
+
mlp_dim = int(w1.weight.shape[0])
|
|
176
|
+
except Exception:
|
|
177
|
+
pass
|
|
178
|
+
return int(mlp_dim)
|
|
179
|
+
|
|
180
|
+
def layer_modules(self, model: Any, layer: Any) -> dict[str, Any]:
|
|
181
|
+
moe = layer.block_sparse_moe
|
|
182
|
+
expert0 = _first_item(moe.experts)
|
|
183
|
+
if expert0 is None:
|
|
184
|
+
raise AdapterError(
|
|
185
|
+
code="E202",
|
|
186
|
+
message="ADAPTER-STRUCTURE-INVALID: MoE layer missing experts",
|
|
187
|
+
details={"layer_class": layer.__class__.__name__},
|
|
188
|
+
)
|
|
189
|
+
return {
|
|
190
|
+
"self_attn.q_proj": layer.self_attn.q_proj,
|
|
191
|
+
"self_attn.k_proj": layer.self_attn.k_proj,
|
|
192
|
+
"self_attn.v_proj": layer.self_attn.v_proj,
|
|
193
|
+
"self_attn.o_proj": layer.self_attn.o_proj,
|
|
194
|
+
"input_layernorm": layer.input_layernorm,
|
|
195
|
+
"post_attention_layernorm": layer.post_attention_layernorm,
|
|
196
|
+
# Best-effort mapping to dense naming used elsewhere in the stack.
|
|
197
|
+
"mlp.gate_proj": expert0.w1,
|
|
198
|
+
"mlp.up_proj": getattr(expert0, "w3", expert0.w1),
|
|
199
|
+
"mlp.down_proj": expert0.w2,
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
def tying_map(self, model: Any, base: Any) -> dict[str, str]:
|
|
203
|
+
return _DenseDecoderSpec().tying_map(model, base)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class _GPT2LikeDecoderSpec(_CausalSpec):
|
|
207
|
+
spec_name = "gpt2_like"
|
|
208
|
+
|
|
209
|
+
def matches(self, model: Any, base: Any, layers: Any) -> bool:
|
|
210
|
+
layer = _first_item(layers)
|
|
211
|
+
if layer is None:
|
|
212
|
+
return False
|
|
213
|
+
return bool(
|
|
214
|
+
hasattr(layer, "attn")
|
|
215
|
+
and hasattr(layer.attn, "c_proj")
|
|
216
|
+
and hasattr(layer, "mlp")
|
|
217
|
+
and hasattr(layer.mlp, "c_proj")
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
def infer_mlp_dim(self, layer: Any, config: Any, hidden_size: int) -> int:
|
|
221
|
+
try:
|
|
222
|
+
c_fc = getattr(getattr(layer, "mlp", None), "c_fc", None)
|
|
223
|
+
if c_fc is not None and hasattr(c_fc, "weight"):
|
|
224
|
+
# HF GPT-style uses Conv1D where nf is out_features.
|
|
225
|
+
if hasattr(c_fc, "nf"):
|
|
226
|
+
return int(c_fc.nf)
|
|
227
|
+
return int(c_fc.weight.shape[0])
|
|
228
|
+
except Exception:
|
|
229
|
+
pass
|
|
230
|
+
return int(getattr(config, "n_inner", hidden_size * 4) or 0)
|
|
231
|
+
|
|
232
|
+
def layer_modules(self, model: Any, layer: Any) -> dict[str, Any]:
|
|
233
|
+
return {
|
|
234
|
+
"attn.c_attn": layer.attn.c_attn,
|
|
235
|
+
"attn.c_proj": layer.attn.c_proj,
|
|
236
|
+
"mlp.c_fc": layer.mlp.c_fc,
|
|
237
|
+
"mlp.c_proj": layer.mlp.c_proj,
|
|
238
|
+
"ln_1": layer.ln_1,
|
|
239
|
+
"ln_2": layer.ln_2,
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
def tying_map(self, model: Any, base: Any) -> dict[str, str]:
|
|
243
|
+
tying: dict[str, str] = {}
|
|
244
|
+
try:
|
|
245
|
+
if hasattr(model, "lm_head") and hasattr(base, "wte"):
|
|
246
|
+
if model.lm_head.weight is base.wte.weight:
|
|
247
|
+
tying["lm_head.weight"] = "transformer.wte.weight"
|
|
248
|
+
except Exception:
|
|
249
|
+
pass
|
|
250
|
+
return tying
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
_SPECS: list[_CausalSpec] = [
|
|
254
|
+
_MoEDecoderSpec(),
|
|
255
|
+
_DenseDecoderSpec(),
|
|
256
|
+
_GPT2LikeDecoderSpec(),
|
|
257
|
+
]
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
class HF_Causal_Adapter(HFAdapterMixin, ModelAdapter):
|
|
261
|
+
"""Spec-driven adapter for decoder-only causal LMs."""
|
|
262
|
+
|
|
263
|
+
name = "hf_causal"
|
|
264
|
+
|
|
265
|
+
def load_model(
|
|
266
|
+
self, model_id: str, device: str = "auto", **kwargs: Any
|
|
267
|
+
) -> ModuleType | Any:
|
|
268
|
+
try:
|
|
269
|
+
with wrap_errors(
|
|
270
|
+
DependencyError,
|
|
271
|
+
"E203",
|
|
272
|
+
"DEPENDENCY-MISSING: transformers",
|
|
273
|
+
lambda e: {"dependency": "transformers"},
|
|
274
|
+
):
|
|
275
|
+
from transformers import AutoModelForCausalLM # type: ignore
|
|
276
|
+
|
|
277
|
+
with wrap_errors(
|
|
278
|
+
ModelLoadError,
|
|
279
|
+
"E201",
|
|
280
|
+
"MODEL-LOAD-FAILED: transformers AutoModelForCausalLM",
|
|
281
|
+
lambda e: {"model_id": model_id},
|
|
282
|
+
):
|
|
283
|
+
model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs)
|
|
284
|
+
|
|
285
|
+
return self._safe_to_device(model, device)
|
|
286
|
+
except DependencyError:
|
|
287
|
+
if LIGHT_IMPORT:
|
|
288
|
+
stub = SimpleNamespace(name="hf_causal_stub")
|
|
289
|
+
stub.to = lambda *_a, **_k: stub # type: ignore[attr-defined]
|
|
290
|
+
return stub
|
|
291
|
+
raise
|
|
292
|
+
|
|
293
|
+
def _unwrap(self, model: Any) -> tuple[Any, Any, Any]:
|
|
294
|
+
config = getattr(model, "config", None)
|
|
295
|
+
if hasattr(model, "model") and hasattr(model.model, "layers"):
|
|
296
|
+
return model.model, model.model.layers, config
|
|
297
|
+
if hasattr(model, "transformer") and hasattr(model.transformer, "h"):
|
|
298
|
+
return model.transformer, model.transformer.h, config
|
|
299
|
+
if hasattr(model, "layers"):
|
|
300
|
+
return model, model.layers, config
|
|
301
|
+
if hasattr(model, "h"):
|
|
302
|
+
return model, model.h, config
|
|
303
|
+
raise AdapterError(
|
|
304
|
+
code="E202",
|
|
305
|
+
message="ADAPTER-STRUCTURE-INVALID: unrecognized HF causal LM structure",
|
|
306
|
+
details={"model_class": model.__class__.__name__},
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
def _select_spec(self, model: Any, base: Any, layers: Any) -> _CausalSpec:
|
|
310
|
+
for spec in _SPECS:
|
|
311
|
+
try:
|
|
312
|
+
if spec.matches(model, base, layers):
|
|
313
|
+
return spec
|
|
314
|
+
except Exception:
|
|
315
|
+
continue
|
|
316
|
+
return _DenseDecoderSpec()
|
|
317
|
+
|
|
318
|
+
def can_handle(self, model: ModuleType | Any) -> bool:
|
|
319
|
+
try:
|
|
320
|
+
base, layers, _cfg = self._unwrap(model)
|
|
321
|
+
except Exception:
|
|
322
|
+
return False
|
|
323
|
+
return any(spec.matches(model, base, layers) for spec in _SPECS)
|
|
324
|
+
|
|
325
|
+
def describe(self, model: ModuleType | Any) -> dict[str, Any]:
|
|
326
|
+
base, layers, config = self._unwrap(model)
|
|
327
|
+
if config is None:
|
|
328
|
+
raise AdapterError(
|
|
329
|
+
code="E202",
|
|
330
|
+
message="ADAPTER-STRUCTURE-INVALID: missing HuggingFace config on model",
|
|
331
|
+
details={"model_class": model.__class__.__name__},
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
try:
|
|
335
|
+
n_layers = len(layers)
|
|
336
|
+
except Exception:
|
|
337
|
+
n_layers = sum(1 for _ in iter(layers))
|
|
338
|
+
|
|
339
|
+
def _coerce_int(value: Any) -> int | None:
|
|
340
|
+
try:
|
|
341
|
+
if isinstance(value, bool):
|
|
342
|
+
return None
|
|
343
|
+
if isinstance(value, int):
|
|
344
|
+
return int(value)
|
|
345
|
+
if isinstance(value, float):
|
|
346
|
+
return int(value)
|
|
347
|
+
if isinstance(value, str):
|
|
348
|
+
stripped = value.strip()
|
|
349
|
+
if stripped and stripped.isdigit():
|
|
350
|
+
return int(stripped)
|
|
351
|
+
except Exception:
|
|
352
|
+
return None
|
|
353
|
+
return None
|
|
354
|
+
|
|
355
|
+
n_heads = _coerce_int(getattr(config, "num_attention_heads", None))
|
|
356
|
+
if n_heads is None:
|
|
357
|
+
n_heads = _coerce_int(getattr(config, "n_head", None))
|
|
358
|
+
|
|
359
|
+
hidden_size = _coerce_int(getattr(config, "hidden_size", None))
|
|
360
|
+
if hidden_size is None:
|
|
361
|
+
hidden_size = _coerce_int(getattr(config, "n_embd", None))
|
|
362
|
+
|
|
363
|
+
vocab_size = _coerce_int(getattr(config, "vocab_size", None))
|
|
364
|
+
|
|
365
|
+
if n_heads is None or hidden_size is None:
|
|
366
|
+
raise AdapterError(
|
|
367
|
+
code="E202",
|
|
368
|
+
message="ADAPTER-STRUCTURE-INVALID: missing head/hidden size metadata",
|
|
369
|
+
details={"model_class": model.__class__.__name__},
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
spec = self._select_spec(model, base, layers)
|
|
373
|
+
|
|
374
|
+
heads_per_layer = [int(n_heads)] * int(n_layers)
|
|
375
|
+
mlp_dims: list[int] = []
|
|
376
|
+
for idx in range(int(n_layers)):
|
|
377
|
+
layer = layers[idx]
|
|
378
|
+
mlp_dims.append(spec.infer_mlp_dim(layer, config, int(hidden_size)))
|
|
379
|
+
|
|
380
|
+
tying = spec.tying_map(model, base)
|
|
381
|
+
|
|
382
|
+
total_params = 0
|
|
383
|
+
try:
|
|
384
|
+
total_params = sum(p.numel() for p in model.parameters())
|
|
385
|
+
except Exception:
|
|
386
|
+
total_params = 0
|
|
387
|
+
|
|
388
|
+
try:
|
|
389
|
+
device = next(model.parameters()).device
|
|
390
|
+
except Exception:
|
|
391
|
+
device = torch.device("cpu")
|
|
392
|
+
|
|
393
|
+
return {
|
|
394
|
+
"n_layer": int(n_layers),
|
|
395
|
+
"heads_per_layer": heads_per_layer,
|
|
396
|
+
"mlp_dims": mlp_dims,
|
|
397
|
+
"tying": tying,
|
|
398
|
+
"model_type": str(getattr(config, "model_type", "") or "causal"),
|
|
399
|
+
"model_class": model.__class__.__name__,
|
|
400
|
+
"hf_model_type": str(getattr(config, "model_type", "") or ""),
|
|
401
|
+
"hf_config_class": config.__class__.__name__
|
|
402
|
+
if hasattr(config, "__class__")
|
|
403
|
+
else "unknown",
|
|
404
|
+
"n_heads": int(n_heads),
|
|
405
|
+
"hidden_size": int(hidden_size),
|
|
406
|
+
"vocab_size": int(vocab_size) if vocab_size is not None else None,
|
|
407
|
+
"total_params": int(total_params),
|
|
408
|
+
"device": str(device),
|
|
409
|
+
"spec": spec.spec_name,
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
def get_layer_modules(
|
|
413
|
+
self, model: ModuleType | Any, layer_idx: int
|
|
414
|
+
) -> dict[str, Any]:
|
|
415
|
+
base, layers, _cfg = self._unwrap(model)
|
|
416
|
+
spec = self._select_spec(model, base, layers)
|
|
417
|
+
layer = layers[layer_idx]
|
|
418
|
+
return spec.layer_modules(model, layer)
|
|
@@ -22,7 +22,7 @@ from invarlock.core.error_utils import wrap_errors
|
|
|
22
22
|
from invarlock.core.exceptions import DependencyError, ModelLoadError
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
class
|
|
25
|
+
class HF_Causal_ONNX_Adapter(ModelAdapter):
|
|
26
26
|
"""Optimum/ONNXRuntime causal LM adapter.
|
|
27
27
|
|
|
28
28
|
Provides a lightweight bridge that loads an ORTModelForCausalLM and
|
|
@@ -31,7 +31,7 @@ class HF_ORT_CausalLM_Adapter(ModelAdapter):
|
|
|
31
31
|
back to reload in the CLI runner.
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
|
-
name = "
|
|
34
|
+
name = "hf_causal_onnx"
|
|
35
35
|
|
|
36
36
|
# --- Loading ---------------------------------------------------------
|
|
37
37
|
def load_model(self, model_id: str, device: str = "cpu", **kwargs: Any): # type: ignore[override]
|
|
@@ -109,4 +109,4 @@ class HF_ORT_CausalLM_Adapter(ModelAdapter):
|
|
|
109
109
|
raise NotImplementedError("restore not supported for ONNXRuntime models")
|
|
110
110
|
|
|
111
111
|
|
|
112
|
-
__all__ = ["
|
|
112
|
+
__all__ = ["HF_Causal_ONNX_Adapter"]
|
invarlock/adapters/hf_mixin.py
CHANGED
|
@@ -490,18 +490,39 @@ class HFAdapterMixin:
|
|
|
490
490
|
"""Return mapping of tied parameter names to source parameter names."""
|
|
491
491
|
|
|
492
492
|
tying: dict[str, str] = {}
|
|
493
|
-
|
|
493
|
+
try:
|
|
494
|
+
named = model.named_parameters(remove_duplicate=False) # type: ignore[call-arg]
|
|
495
|
+
except TypeError: # pragma: no cover - torch version dependent
|
|
496
|
+
named = model.named_parameters()
|
|
497
|
+
params = dict(named)
|
|
498
|
+
|
|
499
|
+
def _is_tied(name_a: str, name_b: str) -> bool:
|
|
500
|
+
a = params.get(name_a)
|
|
501
|
+
b = params.get(name_b)
|
|
502
|
+
if a is None or b is None:
|
|
503
|
+
return False
|
|
504
|
+
try:
|
|
505
|
+
if a is b:
|
|
506
|
+
return True
|
|
507
|
+
if hasattr(a, "data_ptr") and hasattr(b, "data_ptr"):
|
|
508
|
+
return int(a.data_ptr()) == int(b.data_ptr())
|
|
509
|
+
except Exception:
|
|
510
|
+
return False
|
|
511
|
+
return False
|
|
494
512
|
|
|
495
|
-
if "lm_head.weight"
|
|
513
|
+
if _is_tied("lm_head.weight", "transformer.wte.weight"):
|
|
496
514
|
tying["lm_head.weight"] = "transformer.wte.weight"
|
|
497
515
|
|
|
516
|
+
if _is_tied("lm_head.weight", "model.embed_tokens.weight"):
|
|
517
|
+
tying["lm_head.weight"] = "model.embed_tokens.weight"
|
|
518
|
+
|
|
498
519
|
decoder_name = "cls.predictions.decoder.weight"
|
|
499
|
-
if decoder_name in
|
|
520
|
+
if decoder_name in params:
|
|
500
521
|
for candidate in (
|
|
501
522
|
"bert.embeddings.word_embeddings.weight",
|
|
502
523
|
"embeddings.word_embeddings.weight",
|
|
503
524
|
):
|
|
504
|
-
if candidate
|
|
525
|
+
if _is_tied(decoder_name, candidate):
|
|
505
526
|
tying[decoder_name] = candidate
|
|
506
527
|
break
|
|
507
528
|
|
|
@@ -1,15 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
|
-
HuggingFace
|
|
2
|
+
HuggingFace masked LM adapter.
|
|
3
3
|
==============================
|
|
4
4
|
|
|
5
|
-
ModelAdapter implementation for HuggingFace
|
|
6
|
-
|
|
7
|
-
This adapter provides BERT-specific integration including:
|
|
8
|
-
- Support for BERT, RoBERTa, DistilBERT, and other BERT variants
|
|
9
|
-
- Proper handling of bidirectional attention layers
|
|
10
|
-
- Support for classification heads and pooling layers
|
|
11
|
-
- Token type embeddings and position embeddings handling
|
|
12
|
-
- Proper device-aware state serialization
|
|
5
|
+
ModelAdapter implementation for HuggingFace masked language models.
|
|
13
6
|
"""
|
|
14
7
|
|
|
15
8
|
from typing import Any
|
|
@@ -27,7 +20,7 @@ TensorType = torch.Tensor
|
|
|
27
20
|
ModuleType = nn.Module
|
|
28
21
|
|
|
29
22
|
|
|
30
|
-
class
|
|
23
|
+
class HF_MLM_Adapter(HFAdapterMixin, ModelAdapter):
|
|
31
24
|
"""
|
|
32
25
|
HuggingFace-specific ModelAdapter implementation for BERT models.
|
|
33
26
|
|
|
@@ -39,7 +32,7 @@ class HF_BERT_Adapter(HFAdapterMixin, ModelAdapter):
|
|
|
39
32
|
- Device-aware state serialization
|
|
40
33
|
"""
|
|
41
34
|
|
|
42
|
-
name = "
|
|
35
|
+
name = "hf_mlm"
|
|
43
36
|
|
|
44
37
|
def load_model(
|
|
45
38
|
self, model_id: str, device: str = "auto", **kwargs: Any
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
"""
|
|
2
|
-
HuggingFace
|
|
3
|
-
|
|
2
|
+
HuggingFace encoder-decoder adapter.
|
|
3
|
+
===================================
|
|
4
4
|
|
|
5
|
-
ModelAdapter implementation for HuggingFace
|
|
5
|
+
ModelAdapter implementation for HuggingFace encoder-decoder (seq2seq) models.
|
|
6
6
|
|
|
7
|
-
Loads AutoModelForSeq2SeqLM
|
|
8
|
-
|
|
7
|
+
Loads AutoModelForSeq2SeqLM and exposes a minimal describe() sufficient for
|
|
8
|
+
guard policies and reporting.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
from __future__ import annotations
|
|
@@ -25,10 +25,10 @@ TensorType = torch.Tensor
|
|
|
25
25
|
ModuleType = nn.Module
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
class
|
|
29
|
-
"""HuggingFace
|
|
28
|
+
class HF_Seq2Seq_Adapter(HFAdapterMixin, ModelAdapter):
|
|
29
|
+
"""HuggingFace encoder-decoder adapter using AutoModelForSeq2SeqLM."""
|
|
30
30
|
|
|
31
|
-
name = "
|
|
31
|
+
name = "hf_seq2seq"
|
|
32
32
|
|
|
33
33
|
def load_model( # type: ignore[override]
|
|
34
34
|
self, model_id: str, device: str = "auto", **kwargs: Any
|
|
@@ -136,4 +136,4 @@ class HF_T5_Adapter(HFAdapterMixin, ModelAdapter):
|
|
|
136
136
|
return super().restore(model, blob)
|
|
137
137
|
|
|
138
138
|
|
|
139
|
-
__all__ = ["
|
|
139
|
+
__all__ = ["HF_Seq2Seq_Adapter"]
|
|
@@ -148,7 +148,7 @@ def _selected_families_for_alpha(
|
|
|
148
148
|
|
|
149
149
|
|
|
150
150
|
def summarize_null_sweep_reports(
|
|
151
|
-
reports: list[
|
|
151
|
+
reports: list[object],
|
|
152
152
|
*,
|
|
153
153
|
tier: str,
|
|
154
154
|
safety_margin: float = 0.05,
|
|
@@ -186,20 +186,25 @@ def summarize_null_sweep_reports(
|
|
|
186
186
|
mt = _extract_multiple_testing(metrics)
|
|
187
187
|
if mt:
|
|
188
188
|
mt_method = str(mt.get("method", mt_method))
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
189
|
+
alpha_value = mt.get("alpha")
|
|
190
|
+
if alpha_value is not None:
|
|
191
|
+
try:
|
|
192
|
+
mt_alpha = float(alpha_value)
|
|
193
|
+
except Exception:
|
|
194
|
+
pass
|
|
195
|
+
m_value = mt.get("m")
|
|
196
|
+
if m_value is not None:
|
|
197
|
+
try:
|
|
198
|
+
mt_m = int(m_value)
|
|
199
|
+
except Exception:
|
|
200
|
+
pass
|
|
193
201
|
|
|
194
202
|
fam_z = _extract_family_max_z(metrics)
|
|
195
203
|
for fam, z in fam_z.items():
|
|
196
204
|
family_max_z[fam] = max(family_max_z[fam], float(z))
|
|
197
205
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
if isinstance(metrics.get("multiple_testing_selection"), dict)
|
|
201
|
-
else {}
|
|
202
|
-
)
|
|
206
|
+
raw_selection = metrics.get("multiple_testing_selection")
|
|
207
|
+
selection = raw_selection if isinstance(raw_selection, dict) else {}
|
|
203
208
|
pvals = selection.get("family_pvalues")
|
|
204
209
|
if not isinstance(pvals, dict):
|
|
205
210
|
pvals = {}
|
|
@@ -107,8 +107,6 @@ def summarize_ve_sweep_reports(
|
|
|
107
107
|
evaluated = 0
|
|
108
108
|
|
|
109
109
|
for report in reports:
|
|
110
|
-
if not isinstance(report, dict):
|
|
111
|
-
continue
|
|
112
110
|
g = _extract_guard(report, "variance") or {}
|
|
113
111
|
metrics = g.get("metrics", {}) if isinstance(g.get("metrics"), dict) else {}
|
|
114
112
|
pg = metrics.get("predictive_gate")
|