invarlock 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. invarlock/__init__.py +2 -2
  2. invarlock/_data/runtime/tiers.yaml +57 -30
  3. invarlock/adapters/__init__.py +11 -15
  4. invarlock/adapters/auto.py +35 -40
  5. invarlock/adapters/capabilities.py +2 -2
  6. invarlock/adapters/hf_causal.py +418 -0
  7. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  8. invarlock/adapters/hf_mixin.py +25 -4
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/calibration/spectral_null.py +15 -10
  12. invarlock/calibration/variance_ve.py +0 -2
  13. invarlock/cli/adapter_auto.py +31 -21
  14. invarlock/cli/app.py +73 -2
  15. invarlock/cli/commands/calibrate.py +6 -2
  16. invarlock/cli/commands/certify.py +651 -91
  17. invarlock/cli/commands/doctor.py +11 -11
  18. invarlock/cli/commands/explain_gates.py +57 -8
  19. invarlock/cli/commands/plugins.py +13 -9
  20. invarlock/cli/commands/report.py +233 -69
  21. invarlock/cli/commands/run.py +1066 -244
  22. invarlock/cli/commands/verify.py +154 -15
  23. invarlock/cli/config.py +22 -6
  24. invarlock/cli/doctor_helpers.py +4 -5
  25. invarlock/cli/output.py +193 -0
  26. invarlock/cli/provenance.py +1 -1
  27. invarlock/core/api.py +45 -5
  28. invarlock/core/auto_tuning.py +65 -20
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/contracts.py +7 -1
  31. invarlock/core/registry.py +11 -13
  32. invarlock/core/runner.py +425 -75
  33. invarlock/edits/quant_rtn.py +65 -37
  34. invarlock/eval/bench.py +3 -16
  35. invarlock/eval/data.py +82 -51
  36. invarlock/eval/metrics.py +63 -2
  37. invarlock/eval/primary_metric.py +23 -0
  38. invarlock/eval/tail_stats.py +230 -0
  39. invarlock/eval/tasks/__init__.py +12 -0
  40. invarlock/eval/tasks/classification.py +48 -0
  41. invarlock/eval/tasks/qa.py +36 -0
  42. invarlock/eval/tasks/text_generation.py +102 -0
  43. invarlock/guards/_estimators.py +154 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/policies.py +16 -6
  46. invarlock/guards/rmt.py +627 -546
  47. invarlock/guards/spectral.py +348 -110
  48. invarlock/guards/tier_config.py +32 -30
  49. invarlock/guards/variance.py +7 -31
  50. invarlock/guards_ref/rmt_ref.py +23 -23
  51. invarlock/model_profile.py +90 -42
  52. invarlock/observability/health.py +6 -6
  53. invarlock/observability/metrics.py +108 -0
  54. invarlock/reporting/certificate.py +384 -55
  55. invarlock/reporting/certificate_schema.py +3 -2
  56. invarlock/reporting/dataset_hashing.py +15 -2
  57. invarlock/reporting/guards_analysis.py +350 -277
  58. invarlock/reporting/html.py +55 -5
  59. invarlock/reporting/normalizer.py +13 -0
  60. invarlock/reporting/policy_utils.py +38 -36
  61. invarlock/reporting/primary_metric_utils.py +71 -17
  62. invarlock/reporting/render.py +852 -431
  63. invarlock/reporting/report.py +40 -4
  64. invarlock/reporting/report_types.py +11 -3
  65. invarlock/reporting/telemetry.py +86 -0
  66. invarlock/reporting/validate.py +1 -18
  67. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/METADATA +27 -13
  68. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/RECORD +72 -65
  69. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
  70. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
  71. invarlock/adapters/hf_gpt2.py +0 -404
  72. invarlock/adapters/hf_llama.py +0 -487
  73. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
  74. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,418 @@
1
+ """
2
+ HuggingFace causal LM adapter (decoder-only).
3
+ =============================================
4
+
5
+ Role-based adapter for HuggingFace decoder-only causal language models.
6
+
7
+ This adapter intentionally avoids model-family naming. It selects a structural
8
+ spec at runtime (dense FFN vs MoE vs GPT-2-like blocks) and exposes a stable
9
+ `describe()` contract for InvarLock gates and reporting.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ from types import SimpleNamespace
16
+ from typing import Any
17
+
18
+ import torch
19
+ import torch.nn as nn
20
+
21
+ from invarlock.core.api import ModelAdapter
22
+ from invarlock.core.error_utils import wrap_errors
23
+ from invarlock.core.exceptions import AdapterError, DependencyError, ModelLoadError
24
+
25
+ from .hf_mixin import HFAdapterMixin
26
+
27
+ TensorType = torch.Tensor
28
+ ModuleType = nn.Module
29
+
30
+ LIGHT_IMPORT = os.getenv("INVARLOCK_LIGHT_IMPORT", "").strip().lower() in {
31
+ "1",
32
+ "true",
33
+ "yes",
34
+ }
35
+
36
+
37
+ def _first_item(seq: Any) -> Any | None:
38
+ try:
39
+ if hasattr(seq, "__len__") and len(seq) > 0: # type: ignore[arg-type]
40
+ return seq[0] # type: ignore[index]
41
+ except Exception:
42
+ pass
43
+ try:
44
+ return next(iter(seq))
45
+ except Exception:
46
+ return None
47
+
48
+
49
+ def _has_set_attr(obj: Any, name: str) -> bool:
50
+ d = getattr(obj, "__dict__", None)
51
+ if isinstance(d, dict) and name in d:
52
+ return True
53
+ if isinstance(obj, nn.Module):
54
+ if hasattr(obj, "_modules") and name in obj._modules:
55
+ return True
56
+ if hasattr(obj, "_parameters") and name in obj._parameters:
57
+ return True
58
+ if hasattr(obj, "_buffers") and name in obj._buffers:
59
+ return True
60
+ return False
61
+
62
+
63
+ class _CausalSpec:
64
+ spec_name = "base"
65
+
66
+ def matches(self, model: Any, base: Any, layers: Any) -> bool:
67
+ raise NotImplementedError
68
+
69
+ def infer_mlp_dim(self, layer: Any, config: Any, hidden_size: int) -> int:
70
+ raise NotImplementedError
71
+
72
+ def layer_modules(self, model: Any, layer: Any) -> dict[str, Any]:
73
+ raise NotImplementedError
74
+
75
+ def tying_map(self, model: Any, base: Any) -> dict[str, str]:
76
+ return {}
77
+
78
+
79
+ class _DenseDecoderSpec(_CausalSpec):
80
+ spec_name = "dense_decoder"
81
+
82
+ def matches(self, model: Any, base: Any, layers: Any) -> bool:
83
+ layer = _first_item(layers)
84
+ if layer is None:
85
+ return False
86
+ has_attn = (
87
+ hasattr(layer, "self_attn")
88
+ and _has_set_attr(layer.self_attn, "q_proj")
89
+ and _has_set_attr(layer.self_attn, "k_proj")
90
+ and _has_set_attr(layer.self_attn, "v_proj")
91
+ and _has_set_attr(layer.self_attn, "o_proj")
92
+ )
93
+ has_mlp = (
94
+ hasattr(layer, "mlp")
95
+ and _has_set_attr(layer.mlp, "gate_proj")
96
+ and _has_set_attr(layer.mlp, "up_proj")
97
+ and _has_set_attr(layer.mlp, "down_proj")
98
+ )
99
+ has_norms = _has_set_attr(layer, "input_layernorm") and _has_set_attr(
100
+ layer, "post_attention_layernorm"
101
+ )
102
+ return bool(has_attn and has_mlp and has_norms)
103
+
104
+ def infer_mlp_dim(self, layer: Any, config: Any, hidden_size: int) -> int:
105
+ mlp_dim = int(getattr(config, "intermediate_size", hidden_size * 4) or 0)
106
+ try:
107
+ gate_proj = getattr(getattr(layer, "mlp", None), "gate_proj", None)
108
+ if gate_proj is not None and hasattr(gate_proj, "weight"):
109
+ mlp_dim = int(gate_proj.weight.shape[0])
110
+ except Exception:
111
+ pass
112
+ return int(mlp_dim)
113
+
114
+ def layer_modules(self, model: Any, layer: Any) -> dict[str, Any]:
115
+ mlp = layer.mlp
116
+ return {
117
+ "self_attn.q_proj": layer.self_attn.q_proj,
118
+ "self_attn.k_proj": layer.self_attn.k_proj,
119
+ "self_attn.v_proj": layer.self_attn.v_proj,
120
+ "self_attn.o_proj": layer.self_attn.o_proj,
121
+ "input_layernorm": layer.input_layernorm,
122
+ "post_attention_layernorm": layer.post_attention_layernorm,
123
+ "mlp.gate_proj": mlp.gate_proj,
124
+ "mlp.up_proj": mlp.up_proj,
125
+ "mlp.down_proj": mlp.down_proj,
126
+ }
127
+
128
+ def tying_map(self, model: Any, base: Any) -> dict[str, str]:
129
+ tying: dict[str, str] = {}
130
+ try:
131
+ if hasattr(model, "lm_head") and hasattr(base, "embed_tokens"):
132
+ if model.lm_head.weight is base.embed_tokens.weight:
133
+ tying["lm_head.weight"] = "model.embed_tokens.weight"
134
+ except Exception:
135
+ pass
136
+ return tying
137
+
138
+
139
+ class _MoEDecoderSpec(_CausalSpec):
140
+ spec_name = "moe_decoder"
141
+
142
+ def matches(self, model: Any, base: Any, layers: Any) -> bool:
143
+ layer = _first_item(layers)
144
+ if layer is None:
145
+ return False
146
+ has_attn = (
147
+ hasattr(layer, "self_attn")
148
+ and _has_set_attr(layer.self_attn, "q_proj")
149
+ and _has_set_attr(layer.self_attn, "k_proj")
150
+ and _has_set_attr(layer.self_attn, "v_proj")
151
+ and _has_set_attr(layer.self_attn, "o_proj")
152
+ )
153
+ moe = getattr(layer, "block_sparse_moe", None)
154
+ experts = getattr(moe, "experts", None) if moe is not None else None
155
+ expert0 = _first_item(experts) if experts is not None else None
156
+ has_moe = bool(
157
+ expert0 is not None
158
+ and _has_set_attr(expert0, "w1")
159
+ and _has_set_attr(expert0, "w2")
160
+ )
161
+ has_norms = _has_set_attr(layer, "input_layernorm") and _has_set_attr(
162
+ layer, "post_attention_layernorm"
163
+ )
164
+ return bool(has_attn and has_moe and has_norms)
165
+
166
+ def infer_mlp_dim(self, layer: Any, config: Any, hidden_size: int) -> int:
167
+ mlp_dim = int(getattr(config, "intermediate_size", hidden_size * 4) or 0)
168
+ try:
169
+ moe = getattr(layer, "block_sparse_moe", None)
170
+ experts = getattr(moe, "experts", None) if moe is not None else None
171
+ expert0 = _first_item(experts) if experts is not None else None
172
+ if expert0 is not None:
173
+ w1 = getattr(expert0, "w1", None)
174
+ if w1 is not None and hasattr(w1, "weight"):
175
+ mlp_dim = int(w1.weight.shape[0])
176
+ except Exception:
177
+ pass
178
+ return int(mlp_dim)
179
+
180
+ def layer_modules(self, model: Any, layer: Any) -> dict[str, Any]:
181
+ moe = layer.block_sparse_moe
182
+ expert0 = _first_item(moe.experts)
183
+ if expert0 is None:
184
+ raise AdapterError(
185
+ code="E202",
186
+ message="ADAPTER-STRUCTURE-INVALID: MoE layer missing experts",
187
+ details={"layer_class": layer.__class__.__name__},
188
+ )
189
+ return {
190
+ "self_attn.q_proj": layer.self_attn.q_proj,
191
+ "self_attn.k_proj": layer.self_attn.k_proj,
192
+ "self_attn.v_proj": layer.self_attn.v_proj,
193
+ "self_attn.o_proj": layer.self_attn.o_proj,
194
+ "input_layernorm": layer.input_layernorm,
195
+ "post_attention_layernorm": layer.post_attention_layernorm,
196
+ # Best-effort mapping to dense naming used elsewhere in the stack.
197
+ "mlp.gate_proj": expert0.w1,
198
+ "mlp.up_proj": getattr(expert0, "w3", expert0.w1),
199
+ "mlp.down_proj": expert0.w2,
200
+ }
201
+
202
+ def tying_map(self, model: Any, base: Any) -> dict[str, str]:
203
+ return _DenseDecoderSpec().tying_map(model, base)
204
+
205
+
206
+ class _GPT2LikeDecoderSpec(_CausalSpec):
207
+ spec_name = "gpt2_like"
208
+
209
+ def matches(self, model: Any, base: Any, layers: Any) -> bool:
210
+ layer = _first_item(layers)
211
+ if layer is None:
212
+ return False
213
+ return bool(
214
+ hasattr(layer, "attn")
215
+ and hasattr(layer.attn, "c_proj")
216
+ and hasattr(layer, "mlp")
217
+ and hasattr(layer.mlp, "c_proj")
218
+ )
219
+
220
+ def infer_mlp_dim(self, layer: Any, config: Any, hidden_size: int) -> int:
221
+ try:
222
+ c_fc = getattr(getattr(layer, "mlp", None), "c_fc", None)
223
+ if c_fc is not None and hasattr(c_fc, "weight"):
224
+ # HF GPT-style uses Conv1D where nf is out_features.
225
+ if hasattr(c_fc, "nf"):
226
+ return int(c_fc.nf)
227
+ return int(c_fc.weight.shape[0])
228
+ except Exception:
229
+ pass
230
+ return int(getattr(config, "n_inner", hidden_size * 4) or 0)
231
+
232
+ def layer_modules(self, model: Any, layer: Any) -> dict[str, Any]:
233
+ return {
234
+ "attn.c_attn": layer.attn.c_attn,
235
+ "attn.c_proj": layer.attn.c_proj,
236
+ "mlp.c_fc": layer.mlp.c_fc,
237
+ "mlp.c_proj": layer.mlp.c_proj,
238
+ "ln_1": layer.ln_1,
239
+ "ln_2": layer.ln_2,
240
+ }
241
+
242
+ def tying_map(self, model: Any, base: Any) -> dict[str, str]:
243
+ tying: dict[str, str] = {}
244
+ try:
245
+ if hasattr(model, "lm_head") and hasattr(base, "wte"):
246
+ if model.lm_head.weight is base.wte.weight:
247
+ tying["lm_head.weight"] = "transformer.wte.weight"
248
+ except Exception:
249
+ pass
250
+ return tying
251
+
252
+
253
+ _SPECS: list[_CausalSpec] = [
254
+ _MoEDecoderSpec(),
255
+ _DenseDecoderSpec(),
256
+ _GPT2LikeDecoderSpec(),
257
+ ]
258
+
259
+
260
+ class HF_Causal_Adapter(HFAdapterMixin, ModelAdapter):
261
+ """Spec-driven adapter for decoder-only causal LMs."""
262
+
263
+ name = "hf_causal"
264
+
265
+ def load_model(
266
+ self, model_id: str, device: str = "auto", **kwargs: Any
267
+ ) -> ModuleType | Any:
268
+ try:
269
+ with wrap_errors(
270
+ DependencyError,
271
+ "E203",
272
+ "DEPENDENCY-MISSING: transformers",
273
+ lambda e: {"dependency": "transformers"},
274
+ ):
275
+ from transformers import AutoModelForCausalLM # type: ignore
276
+
277
+ with wrap_errors(
278
+ ModelLoadError,
279
+ "E201",
280
+ "MODEL-LOAD-FAILED: transformers AutoModelForCausalLM",
281
+ lambda e: {"model_id": model_id},
282
+ ):
283
+ model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs)
284
+
285
+ return self._safe_to_device(model, device)
286
+ except DependencyError:
287
+ if LIGHT_IMPORT:
288
+ stub = SimpleNamespace(name="hf_causal_stub")
289
+ stub.to = lambda *_a, **_k: stub # type: ignore[attr-defined]
290
+ return stub
291
+ raise
292
+
293
+ def _unwrap(self, model: Any) -> tuple[Any, Any, Any]:
294
+ config = getattr(model, "config", None)
295
+ if hasattr(model, "model") and hasattr(model.model, "layers"):
296
+ return model.model, model.model.layers, config
297
+ if hasattr(model, "transformer") and hasattr(model.transformer, "h"):
298
+ return model.transformer, model.transformer.h, config
299
+ if hasattr(model, "layers"):
300
+ return model, model.layers, config
301
+ if hasattr(model, "h"):
302
+ return model, model.h, config
303
+ raise AdapterError(
304
+ code="E202",
305
+ message="ADAPTER-STRUCTURE-INVALID: unrecognized HF causal LM structure",
306
+ details={"model_class": model.__class__.__name__},
307
+ )
308
+
309
+ def _select_spec(self, model: Any, base: Any, layers: Any) -> _CausalSpec:
310
+ for spec in _SPECS:
311
+ try:
312
+ if spec.matches(model, base, layers):
313
+ return spec
314
+ except Exception:
315
+ continue
316
+ return _DenseDecoderSpec()
317
+
318
+ def can_handle(self, model: ModuleType | Any) -> bool:
319
+ try:
320
+ base, layers, _cfg = self._unwrap(model)
321
+ except Exception:
322
+ return False
323
+ return any(spec.matches(model, base, layers) for spec in _SPECS)
324
+
325
+ def describe(self, model: ModuleType | Any) -> dict[str, Any]:
326
+ base, layers, config = self._unwrap(model)
327
+ if config is None:
328
+ raise AdapterError(
329
+ code="E202",
330
+ message="ADAPTER-STRUCTURE-INVALID: missing HuggingFace config on model",
331
+ details={"model_class": model.__class__.__name__},
332
+ )
333
+
334
+ try:
335
+ n_layers = len(layers)
336
+ except Exception:
337
+ n_layers = sum(1 for _ in iter(layers))
338
+
339
+ def _coerce_int(value: Any) -> int | None:
340
+ try:
341
+ if isinstance(value, bool):
342
+ return None
343
+ if isinstance(value, int):
344
+ return int(value)
345
+ if isinstance(value, float):
346
+ return int(value)
347
+ if isinstance(value, str):
348
+ stripped = value.strip()
349
+ if stripped and stripped.isdigit():
350
+ return int(stripped)
351
+ except Exception:
352
+ return None
353
+ return None
354
+
355
+ n_heads = _coerce_int(getattr(config, "num_attention_heads", None))
356
+ if n_heads is None:
357
+ n_heads = _coerce_int(getattr(config, "n_head", None))
358
+
359
+ hidden_size = _coerce_int(getattr(config, "hidden_size", None))
360
+ if hidden_size is None:
361
+ hidden_size = _coerce_int(getattr(config, "n_embd", None))
362
+
363
+ vocab_size = _coerce_int(getattr(config, "vocab_size", None))
364
+
365
+ if n_heads is None or hidden_size is None:
366
+ raise AdapterError(
367
+ code="E202",
368
+ message="ADAPTER-STRUCTURE-INVALID: missing head/hidden size metadata",
369
+ details={"model_class": model.__class__.__name__},
370
+ )
371
+
372
+ spec = self._select_spec(model, base, layers)
373
+
374
+ heads_per_layer = [int(n_heads)] * int(n_layers)
375
+ mlp_dims: list[int] = []
376
+ for idx in range(int(n_layers)):
377
+ layer = layers[idx]
378
+ mlp_dims.append(spec.infer_mlp_dim(layer, config, int(hidden_size)))
379
+
380
+ tying = spec.tying_map(model, base)
381
+
382
+ total_params = 0
383
+ try:
384
+ total_params = sum(p.numel() for p in model.parameters())
385
+ except Exception:
386
+ total_params = 0
387
+
388
+ try:
389
+ device = next(model.parameters()).device
390
+ except Exception:
391
+ device = torch.device("cpu")
392
+
393
+ return {
394
+ "n_layer": int(n_layers),
395
+ "heads_per_layer": heads_per_layer,
396
+ "mlp_dims": mlp_dims,
397
+ "tying": tying,
398
+ "model_type": str(getattr(config, "model_type", "") or "causal"),
399
+ "model_class": model.__class__.__name__,
400
+ "hf_model_type": str(getattr(config, "model_type", "") or ""),
401
+ "hf_config_class": config.__class__.__name__
402
+ if hasattr(config, "__class__")
403
+ else "unknown",
404
+ "n_heads": int(n_heads),
405
+ "hidden_size": int(hidden_size),
406
+ "vocab_size": int(vocab_size) if vocab_size is not None else None,
407
+ "total_params": int(total_params),
408
+ "device": str(device),
409
+ "spec": spec.spec_name,
410
+ }
411
+
412
+ def get_layer_modules(
413
+ self, model: ModuleType | Any, layer_idx: int
414
+ ) -> dict[str, Any]:
415
+ base, layers, _cfg = self._unwrap(model)
416
+ spec = self._select_spec(model, base, layers)
417
+ layer = layers[layer_idx]
418
+ return spec.layer_modules(model, layer)
@@ -22,7 +22,7 @@ from invarlock.core.error_utils import wrap_errors
22
22
  from invarlock.core.exceptions import DependencyError, ModelLoadError
23
23
 
24
24
 
25
- class HF_ORT_CausalLM_Adapter(ModelAdapter):
25
+ class HF_Causal_ONNX_Adapter(ModelAdapter):
26
26
  """Optimum/ONNXRuntime causal LM adapter.
27
27
 
28
28
  Provides a lightweight bridge that loads an ORTModelForCausalLM and
@@ -31,7 +31,7 @@ class HF_ORT_CausalLM_Adapter(ModelAdapter):
31
31
  back to reload in the CLI runner.
32
32
  """
33
33
 
34
- name = "hf_onnx"
34
+ name = "hf_causal_onnx"
35
35
 
36
36
  # --- Loading ---------------------------------------------------------
37
37
  def load_model(self, model_id: str, device: str = "cpu", **kwargs: Any): # type: ignore[override]
@@ -109,4 +109,4 @@ class HF_ORT_CausalLM_Adapter(ModelAdapter):
109
109
  raise NotImplementedError("restore not supported for ONNXRuntime models")
110
110
 
111
111
 
112
- __all__ = ["HF_ORT_CausalLM_Adapter"]
112
+ __all__ = ["HF_Causal_ONNX_Adapter"]
@@ -490,18 +490,39 @@ class HFAdapterMixin:
490
490
  """Return mapping of tied parameter names to source parameter names."""
491
491
 
492
492
  tying: dict[str, str] = {}
493
- param_names = set(dict(model.named_parameters()).keys())
493
+ try:
494
+ named = model.named_parameters(remove_duplicate=False) # type: ignore[call-arg]
495
+ except TypeError: # pragma: no cover - torch version dependent
496
+ named = model.named_parameters()
497
+ params = dict(named)
498
+
499
+ def _is_tied(name_a: str, name_b: str) -> bool:
500
+ a = params.get(name_a)
501
+ b = params.get(name_b)
502
+ if a is None or b is None:
503
+ return False
504
+ try:
505
+ if a is b:
506
+ return True
507
+ if hasattr(a, "data_ptr") and hasattr(b, "data_ptr"):
508
+ return int(a.data_ptr()) == int(b.data_ptr())
509
+ except Exception:
510
+ return False
511
+ return False
494
512
 
495
- if "lm_head.weight" in param_names and "transformer.wte.weight" in param_names:
513
+ if _is_tied("lm_head.weight", "transformer.wte.weight"):
496
514
  tying["lm_head.weight"] = "transformer.wte.weight"
497
515
 
516
+ if _is_tied("lm_head.weight", "model.embed_tokens.weight"):
517
+ tying["lm_head.weight"] = "model.embed_tokens.weight"
518
+
498
519
  decoder_name = "cls.predictions.decoder.weight"
499
- if decoder_name in param_names:
520
+ if decoder_name in params:
500
521
  for candidate in (
501
522
  "bert.embeddings.word_embeddings.weight",
502
523
  "embeddings.word_embeddings.weight",
503
524
  ):
504
- if candidate in param_names:
525
+ if _is_tied(decoder_name, candidate):
505
526
  tying[decoder_name] = candidate
506
527
  break
507
528
 
@@ -1,15 +1,8 @@
1
1
  """
2
- HuggingFace BERT Model Adapter
2
+ HuggingFace masked LM adapter.
3
3
  ==============================
4
4
 
5
- ModelAdapter implementation for HuggingFace BERT architecture models.
6
-
7
- This adapter provides BERT-specific integration including:
8
- - Support for BERT, RoBERTa, DistilBERT, and other BERT variants
9
- - Proper handling of bidirectional attention layers
10
- - Support for classification heads and pooling layers
11
- - Token type embeddings and position embeddings handling
12
- - Proper device-aware state serialization
5
+ ModelAdapter implementation for HuggingFace masked language models.
13
6
  """
14
7
 
15
8
  from typing import Any
@@ -27,7 +20,7 @@ TensorType = torch.Tensor
27
20
  ModuleType = nn.Module
28
21
 
29
22
 
30
- class HF_BERT_Adapter(HFAdapterMixin, ModelAdapter):
23
+ class HF_MLM_Adapter(HFAdapterMixin, ModelAdapter):
31
24
  """
32
25
  HuggingFace-specific ModelAdapter implementation for BERT models.
33
26
 
@@ -39,7 +32,7 @@ class HF_BERT_Adapter(HFAdapterMixin, ModelAdapter):
39
32
  - Device-aware state serialization
40
33
  """
41
34
 
42
- name = "hf_bert"
35
+ name = "hf_mlm"
43
36
 
44
37
  def load_model(
45
38
  self, model_id: str, device: str = "auto", **kwargs: Any
@@ -1,11 +1,11 @@
1
1
  """
2
- HuggingFace T5 Model Adapter
3
- ============================
2
+ HuggingFace encoder-decoder adapter.
3
+ ===================================
4
4
 
5
- ModelAdapter implementation for HuggingFace T5 encoder-decoder models.
5
+ ModelAdapter implementation for HuggingFace encoder-decoder (seq2seq) models.
6
6
 
7
- Loads AutoModelForSeq2SeqLM (e.g., t5-small/base/large) and exposes a minimal
8
- describe() sufficient for guard policies and reporting.
7
+ Loads AutoModelForSeq2SeqLM and exposes a minimal describe() sufficient for
8
+ guard policies and reporting.
9
9
  """
10
10
 
11
11
  from __future__ import annotations
@@ -25,10 +25,10 @@ TensorType = torch.Tensor
25
25
  ModuleType = nn.Module
26
26
 
27
27
 
28
- class HF_T5_Adapter(HFAdapterMixin, ModelAdapter):
29
- """HuggingFace T5 adapter using AutoModelForSeq2SeqLM."""
28
+ class HF_Seq2Seq_Adapter(HFAdapterMixin, ModelAdapter):
29
+ """HuggingFace encoder-decoder adapter using AutoModelForSeq2SeqLM."""
30
30
 
31
- name = "hf_t5"
31
+ name = "hf_seq2seq"
32
32
 
33
33
  def load_model( # type: ignore[override]
34
34
  self, model_id: str, device: str = "auto", **kwargs: Any
@@ -136,4 +136,4 @@ class HF_T5_Adapter(HFAdapterMixin, ModelAdapter):
136
136
  return super().restore(model, blob)
137
137
 
138
138
 
139
- __all__ = ["HF_T5_Adapter"]
139
+ __all__ = ["HF_Seq2Seq_Adapter"]
@@ -148,7 +148,7 @@ def _selected_families_for_alpha(
148
148
 
149
149
 
150
150
  def summarize_null_sweep_reports(
151
- reports: list[dict[str, Any]],
151
+ reports: list[object],
152
152
  *,
153
153
  tier: str,
154
154
  safety_margin: float = 0.05,
@@ -186,20 +186,25 @@ def summarize_null_sweep_reports(
186
186
  mt = _extract_multiple_testing(metrics)
187
187
  if mt:
188
188
  mt_method = str(mt.get("method", mt_method))
189
- if mt.get("alpha") is not None:
190
- mt_alpha = float(mt.get("alpha"))
191
- if mt.get("m") is not None:
192
- mt_m = int(mt.get("m"))
189
+ alpha_value = mt.get("alpha")
190
+ if alpha_value is not None:
191
+ try:
192
+ mt_alpha = float(alpha_value)
193
+ except Exception:
194
+ pass
195
+ m_value = mt.get("m")
196
+ if m_value is not None:
197
+ try:
198
+ mt_m = int(m_value)
199
+ except Exception:
200
+ pass
193
201
 
194
202
  fam_z = _extract_family_max_z(metrics)
195
203
  for fam, z in fam_z.items():
196
204
  family_max_z[fam] = max(family_max_z[fam], float(z))
197
205
 
198
- selection = (
199
- metrics.get("multiple_testing_selection")
200
- if isinstance(metrics.get("multiple_testing_selection"), dict)
201
- else {}
202
- )
206
+ raw_selection = metrics.get("multiple_testing_selection")
207
+ selection = raw_selection if isinstance(raw_selection, dict) else {}
203
208
  pvals = selection.get("family_pvalues")
204
209
  if not isinstance(pvals, dict):
205
210
  pvals = {}
@@ -107,8 +107,6 @@ def summarize_ve_sweep_reports(
107
107
  evaluated = 0
108
108
 
109
109
  for report in reports:
110
- if not isinstance(report, dict):
111
- continue
112
110
  g = _extract_guard(report, "variance") or {}
113
111
  metrics = g.get("metrics", {}) if isinstance(g.get("metrics"), dict) else {}
114
112
  pg = metrics.get("predictive_gate")