invarlock 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +2 -2
- invarlock/adapters/__init__.py +10 -14
- invarlock/adapters/auto.py +35 -40
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_mixin.py +25 -4
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/cli/adapter_auto.py +31 -21
- invarlock/cli/app.py +73 -2
- invarlock/cli/commands/certify.py +600 -59
- invarlock/cli/commands/doctor.py +8 -10
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +233 -69
- invarlock/cli/commands/run.py +907 -183
- invarlock/cli/commands/verify.py +76 -11
- invarlock/cli/config.py +1 -1
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +1 -1
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/registry.py +9 -11
- invarlock/core/runner.py +111 -25
- invarlock/edits/quant_rtn.py +65 -37
- invarlock/eval/bench.py +3 -3
- invarlock/eval/data.py +68 -23
- invarlock/eval/metrics.py +59 -1
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/rmt.py +2 -2
- invarlock/guards/variance.py +2 -2
- invarlock/model_profile.py +48 -27
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/reporting/certificate.py +159 -9
- invarlock/reporting/certificate_schema.py +1 -1
- invarlock/reporting/guards_analysis.py +154 -4
- invarlock/reporting/html.py +55 -5
- invarlock/reporting/normalizer.py +7 -0
- invarlock/reporting/render.py +791 -431
- invarlock/reporting/report.py +39 -3
- invarlock/reporting/report_types.py +6 -1
- invarlock/reporting/telemetry.py +86 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/METADATA +23 -9
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/RECORD +53 -48
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
invarlock/__init__.py
CHANGED
|
@@ -6,13 +6,13 @@ Core runtime package — torch-independent utilities, configuration, and interfa
|
|
|
6
6
|
|
|
7
7
|
This package provides the foundation for the InvarLock GuardChain without heavy dependencies.
|
|
8
8
|
For torch-dependent functionality, see subpackages under `invarlock.*`:
|
|
9
|
-
- `invarlock.adapters`: Model adapters (HF
|
|
9
|
+
- `invarlock.adapters`: Model adapters (HF causal/MLM/seq2seq + auto)
|
|
10
10
|
- `invarlock.guards`: Safety mechanisms (invariants, spectral, RMT, variance)
|
|
11
11
|
- `invarlock.edits`: Built-in quantization and edit interfaces
|
|
12
12
|
- `invarlock.eval`: Metrics, guard-overhead checks, and certification
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
|
-
__version__ = "0.3.
|
|
15
|
+
__version__ = "0.3.7"
|
|
16
16
|
|
|
17
17
|
# Core exports - torch-independent
|
|
18
18
|
from .config import CFG, Defaults, get_default_config
|
invarlock/adapters/__init__.py
CHANGED
|
@@ -29,13 +29,11 @@ from .capabilities import (
|
|
|
29
29
|
)
|
|
30
30
|
|
|
31
31
|
_LAZY_MAP = {
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
"
|
|
36
|
-
"
|
|
37
|
-
"HF_Causal_Auto_Adapter": ".auto",
|
|
38
|
-
"HF_MLM_Auto_Adapter": ".auto",
|
|
32
|
+
"HF_Causal_Adapter": ".hf_causal",
|
|
33
|
+
"HF_MLM_Adapter": ".hf_mlm",
|
|
34
|
+
"HF_Seq2Seq_Adapter": ".hf_seq2seq",
|
|
35
|
+
"HF_Causal_ONNX_Adapter": ".hf_causal_onnx",
|
|
36
|
+
"HF_Auto_Adapter": ".auto",
|
|
39
37
|
}
|
|
40
38
|
|
|
41
39
|
|
|
@@ -91,13 +89,11 @@ run_invarlock = _RemovedComponent("run_invarlock", "invarlock.cli.run")
|
|
|
91
89
|
quick_prune_gpt2 = _RemovedComponent("quick_prune_gpt2")
|
|
92
90
|
|
|
93
91
|
__all__ = [
|
|
94
|
-
"
|
|
95
|
-
"
|
|
96
|
-
"
|
|
97
|
-
"
|
|
98
|
-
"
|
|
99
|
-
"HF_Causal_Auto_Adapter",
|
|
100
|
-
"HF_MLM_Auto_Adapter",
|
|
92
|
+
"HF_Causal_Adapter",
|
|
93
|
+
"HF_MLM_Adapter",
|
|
94
|
+
"HF_Seq2Seq_Adapter",
|
|
95
|
+
"HF_Causal_ONNX_Adapter",
|
|
96
|
+
"HF_Auto_Adapter",
|
|
101
97
|
"BaseAdapter",
|
|
102
98
|
"AdapterConfig",
|
|
103
99
|
"AdapterInterface",
|
invarlock/adapters/auto.py
CHANGED
|
@@ -110,21 +110,26 @@ class _DelegatingAdapter(ModelAdapter):
|
|
|
110
110
|
|
|
111
111
|
def _load_adapter(self, adapter_name: str) -> ModelAdapter:
|
|
112
112
|
"""Load an adapter by name."""
|
|
113
|
-
if adapter_name == "
|
|
114
|
-
|
|
115
|
-
".
|
|
116
|
-
).
|
|
117
|
-
return
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
".
|
|
121
|
-
).
|
|
122
|
-
return
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
".
|
|
126
|
-
).
|
|
127
|
-
return
|
|
113
|
+
if adapter_name == "hf_causal":
|
|
114
|
+
HF_Causal_Adapter = _importlib.import_module(
|
|
115
|
+
".hf_causal", __package__
|
|
116
|
+
).HF_Causal_Adapter
|
|
117
|
+
return HF_Causal_Adapter()
|
|
118
|
+
if adapter_name == "hf_mlm":
|
|
119
|
+
HF_MLM_Adapter = _importlib.import_module(
|
|
120
|
+
".hf_mlm", __package__
|
|
121
|
+
).HF_MLM_Adapter
|
|
122
|
+
return HF_MLM_Adapter()
|
|
123
|
+
if adapter_name == "hf_seq2seq":
|
|
124
|
+
HF_Seq2Seq_Adapter = _importlib.import_module(
|
|
125
|
+
".hf_seq2seq", __package__
|
|
126
|
+
).HF_Seq2Seq_Adapter
|
|
127
|
+
return HF_Seq2Seq_Adapter()
|
|
128
|
+
if adapter_name == "hf_causal_onnx":
|
|
129
|
+
HF_Causal_ONNX_Adapter = _importlib.import_module(
|
|
130
|
+
".hf_causal_onnx", __package__
|
|
131
|
+
).HF_Causal_ONNX_Adapter
|
|
132
|
+
return HF_Causal_ONNX_Adapter()
|
|
128
133
|
elif adapter_name == "hf_bnb":
|
|
129
134
|
HF_BNB_Adapter = _importlib.import_module(
|
|
130
135
|
"invarlock.plugins.hf_bnb_adapter"
|
|
@@ -141,11 +146,11 @@ class _DelegatingAdapter(ModelAdapter):
|
|
|
141
146
|
).HF_GPTQ_Adapter
|
|
142
147
|
return HF_GPTQ_Adapter()
|
|
143
148
|
else:
|
|
144
|
-
# Default to
|
|
145
|
-
|
|
146
|
-
".
|
|
147
|
-
).
|
|
148
|
-
return
|
|
149
|
+
# Default to causal adapter
|
|
150
|
+
HF_Causal_Adapter = _importlib.import_module(
|
|
151
|
+
".hf_causal", __package__
|
|
152
|
+
).HF_Causal_Adapter
|
|
153
|
+
return HF_Causal_Adapter()
|
|
149
154
|
|
|
150
155
|
def _ensure_delegate_from_id(self, model_id: str) -> ModelAdapter:
|
|
151
156
|
if self._delegate is not None:
|
|
@@ -172,14 +177,16 @@ class _DelegatingAdapter(ModelAdapter):
|
|
|
172
177
|
self._delegate = self._load_adapter(quant_adapter)
|
|
173
178
|
return self._delegate
|
|
174
179
|
|
|
175
|
-
# Fall back to class
|
|
180
|
+
# Fall back to lightweight class-name inspection (no transformers import).
|
|
176
181
|
cls_name = getattr(model, "__class__", type(model)).__name__.lower()
|
|
177
|
-
if any(k in cls_name for k in ["
|
|
178
|
-
self._delegate = self._load_adapter("
|
|
179
|
-
elif any(k in cls_name for k in ["bert", "roberta", "albert", "deberta"]):
|
|
180
|
-
self._delegate = self._load_adapter("hf_bert")
|
|
182
|
+
if any(k in cls_name for k in ["bert", "roberta", "albert", "deberta"]):
|
|
183
|
+
self._delegate = self._load_adapter("hf_mlm")
|
|
181
184
|
else:
|
|
182
|
-
|
|
185
|
+
cfg = getattr(model, "config", None)
|
|
186
|
+
if getattr(cfg, "is_encoder_decoder", False):
|
|
187
|
+
self._delegate = self._load_adapter("hf_seq2seq")
|
|
188
|
+
else:
|
|
189
|
+
self._delegate = self._load_adapter("hf_causal")
|
|
183
190
|
return self._delegate
|
|
184
191
|
|
|
185
192
|
def can_handle(self, model: Any) -> bool: # pragma: no cover - trivial
|
|
@@ -206,21 +213,9 @@ class _DelegatingAdapter(ModelAdapter):
|
|
|
206
213
|
raise AttributeError(item)
|
|
207
214
|
|
|
208
215
|
|
|
209
|
-
class
|
|
210
|
-
name = "
|
|
216
|
+
class HF_Auto_Adapter(_DelegatingAdapter):
|
|
217
|
+
name = "hf_auto"
|
|
211
218
|
|
|
212
219
|
def load_model(self, model_id: str, device: str = "auto", **kwargs: Any) -> Any:
|
|
213
220
|
delegate = self._ensure_delegate_from_id(model_id)
|
|
214
221
|
return delegate.load_model(model_id, device=device, **kwargs)
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
class HF_MLM_Auto_Adapter(_DelegatingAdapter):
|
|
218
|
-
name = "hf_mlm_auto"
|
|
219
|
-
|
|
220
|
-
def load_model(self, model_id: str, device: str = "auto", **kwargs: Any) -> Any:
|
|
221
|
-
# Force BERT-like adapter for MLM families
|
|
222
|
-
HF_BERT_Adapter = _importlib.import_module(
|
|
223
|
-
".hf_bert", __package__
|
|
224
|
-
).HF_BERT_Adapter
|
|
225
|
-
self._delegate = HF_BERT_Adapter()
|
|
226
|
-
return self._delegate.load_model(model_id, device=device, **kwargs)
|
|
@@ -359,7 +359,7 @@ def _detect_weight_tying(model: Any) -> dict[str, str]:
|
|
|
359
359
|
tying: dict[str, str] = {}
|
|
360
360
|
|
|
361
361
|
# Common weight tying patterns
|
|
362
|
-
#
|
|
362
|
+
# Decoder embed_tokens style: lm_head.weight ↔ model.embed_tokens.weight
|
|
363
363
|
if hasattr(model, "lm_head") and hasattr(model, "model"):
|
|
364
364
|
inner = model.model
|
|
365
365
|
if hasattr(inner, "embed_tokens"):
|
|
@@ -408,7 +408,7 @@ def _detect_primary_metric(model: Any) -> str:
|
|
|
408
408
|
return "rouge"
|
|
409
409
|
return "ppl_seq2seq"
|
|
410
410
|
|
|
411
|
-
# Decoder-only models (GPT-like,
|
|
411
|
+
# Decoder-only models (GPT-like, RoPE-style)
|
|
412
412
|
return "ppl_causal"
|
|
413
413
|
|
|
414
414
|
|
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HuggingFace causal LM adapter (decoder-only).
|
|
3
|
+
=============================================
|
|
4
|
+
|
|
5
|
+
Role-based adapter for HuggingFace decoder-only causal language models.
|
|
6
|
+
|
|
7
|
+
This adapter intentionally avoids model-family naming. It selects a structural
|
|
8
|
+
spec at runtime (dense FFN vs MoE vs GPT-2-like blocks) and exposes a stable
|
|
9
|
+
`describe()` contract for InvarLock gates and reporting.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
from types import SimpleNamespace
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
import torch
|
|
19
|
+
import torch.nn as nn
|
|
20
|
+
|
|
21
|
+
from invarlock.core.api import ModelAdapter
|
|
22
|
+
from invarlock.core.error_utils import wrap_errors
|
|
23
|
+
from invarlock.core.exceptions import AdapterError, DependencyError, ModelLoadError
|
|
24
|
+
|
|
25
|
+
from .hf_mixin import HFAdapterMixin
|
|
26
|
+
|
|
27
|
+
TensorType = torch.Tensor
|
|
28
|
+
ModuleType = nn.Module
|
|
29
|
+
|
|
30
|
+
LIGHT_IMPORT = os.getenv("INVARLOCK_LIGHT_IMPORT", "").strip().lower() in {
|
|
31
|
+
"1",
|
|
32
|
+
"true",
|
|
33
|
+
"yes",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _first_item(seq: Any) -> Any | None:
|
|
38
|
+
try:
|
|
39
|
+
if hasattr(seq, "__len__") and len(seq) > 0: # type: ignore[arg-type]
|
|
40
|
+
return seq[0] # type: ignore[index]
|
|
41
|
+
except Exception:
|
|
42
|
+
pass
|
|
43
|
+
try:
|
|
44
|
+
return next(iter(seq))
|
|
45
|
+
except Exception:
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _has_set_attr(obj: Any, name: str) -> bool:
|
|
50
|
+
d = getattr(obj, "__dict__", None)
|
|
51
|
+
if isinstance(d, dict) and name in d:
|
|
52
|
+
return True
|
|
53
|
+
if isinstance(obj, nn.Module):
|
|
54
|
+
if hasattr(obj, "_modules") and name in obj._modules:
|
|
55
|
+
return True
|
|
56
|
+
if hasattr(obj, "_parameters") and name in obj._parameters:
|
|
57
|
+
return True
|
|
58
|
+
if hasattr(obj, "_buffers") and name in obj._buffers:
|
|
59
|
+
return True
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class _CausalSpec:
|
|
64
|
+
spec_name = "base"
|
|
65
|
+
|
|
66
|
+
def matches(self, model: Any, base: Any, layers: Any) -> bool:
|
|
67
|
+
raise NotImplementedError
|
|
68
|
+
|
|
69
|
+
def infer_mlp_dim(self, layer: Any, config: Any, hidden_size: int) -> int:
|
|
70
|
+
raise NotImplementedError
|
|
71
|
+
|
|
72
|
+
def layer_modules(self, model: Any, layer: Any) -> dict[str, Any]:
|
|
73
|
+
raise NotImplementedError
|
|
74
|
+
|
|
75
|
+
def tying_map(self, model: Any, base: Any) -> dict[str, str]:
|
|
76
|
+
return {}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class _DenseDecoderSpec(_CausalSpec):
|
|
80
|
+
spec_name = "dense_decoder"
|
|
81
|
+
|
|
82
|
+
def matches(self, model: Any, base: Any, layers: Any) -> bool:
|
|
83
|
+
layer = _first_item(layers)
|
|
84
|
+
if layer is None:
|
|
85
|
+
return False
|
|
86
|
+
has_attn = (
|
|
87
|
+
hasattr(layer, "self_attn")
|
|
88
|
+
and _has_set_attr(layer.self_attn, "q_proj")
|
|
89
|
+
and _has_set_attr(layer.self_attn, "k_proj")
|
|
90
|
+
and _has_set_attr(layer.self_attn, "v_proj")
|
|
91
|
+
and _has_set_attr(layer.self_attn, "o_proj")
|
|
92
|
+
)
|
|
93
|
+
has_mlp = (
|
|
94
|
+
hasattr(layer, "mlp")
|
|
95
|
+
and _has_set_attr(layer.mlp, "gate_proj")
|
|
96
|
+
and _has_set_attr(layer.mlp, "up_proj")
|
|
97
|
+
and _has_set_attr(layer.mlp, "down_proj")
|
|
98
|
+
)
|
|
99
|
+
has_norms = _has_set_attr(layer, "input_layernorm") and _has_set_attr(
|
|
100
|
+
layer, "post_attention_layernorm"
|
|
101
|
+
)
|
|
102
|
+
return bool(has_attn and has_mlp and has_norms)
|
|
103
|
+
|
|
104
|
+
def infer_mlp_dim(self, layer: Any, config: Any, hidden_size: int) -> int:
|
|
105
|
+
mlp_dim = int(getattr(config, "intermediate_size", hidden_size * 4) or 0)
|
|
106
|
+
try:
|
|
107
|
+
gate_proj = getattr(getattr(layer, "mlp", None), "gate_proj", None)
|
|
108
|
+
if gate_proj is not None and hasattr(gate_proj, "weight"):
|
|
109
|
+
mlp_dim = int(gate_proj.weight.shape[0])
|
|
110
|
+
except Exception:
|
|
111
|
+
pass
|
|
112
|
+
return int(mlp_dim)
|
|
113
|
+
|
|
114
|
+
def layer_modules(self, model: Any, layer: Any) -> dict[str, Any]:
|
|
115
|
+
mlp = layer.mlp
|
|
116
|
+
return {
|
|
117
|
+
"self_attn.q_proj": layer.self_attn.q_proj,
|
|
118
|
+
"self_attn.k_proj": layer.self_attn.k_proj,
|
|
119
|
+
"self_attn.v_proj": layer.self_attn.v_proj,
|
|
120
|
+
"self_attn.o_proj": layer.self_attn.o_proj,
|
|
121
|
+
"input_layernorm": layer.input_layernorm,
|
|
122
|
+
"post_attention_layernorm": layer.post_attention_layernorm,
|
|
123
|
+
"mlp.gate_proj": mlp.gate_proj,
|
|
124
|
+
"mlp.up_proj": mlp.up_proj,
|
|
125
|
+
"mlp.down_proj": mlp.down_proj,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
def tying_map(self, model: Any, base: Any) -> dict[str, str]:
|
|
129
|
+
tying: dict[str, str] = {}
|
|
130
|
+
try:
|
|
131
|
+
if hasattr(model, "lm_head") and hasattr(base, "embed_tokens"):
|
|
132
|
+
if model.lm_head.weight is base.embed_tokens.weight:
|
|
133
|
+
tying["lm_head.weight"] = "model.embed_tokens.weight"
|
|
134
|
+
except Exception:
|
|
135
|
+
pass
|
|
136
|
+
return tying
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class _MoEDecoderSpec(_CausalSpec):
|
|
140
|
+
spec_name = "moe_decoder"
|
|
141
|
+
|
|
142
|
+
def matches(self, model: Any, base: Any, layers: Any) -> bool:
|
|
143
|
+
layer = _first_item(layers)
|
|
144
|
+
if layer is None:
|
|
145
|
+
return False
|
|
146
|
+
has_attn = (
|
|
147
|
+
hasattr(layer, "self_attn")
|
|
148
|
+
and _has_set_attr(layer.self_attn, "q_proj")
|
|
149
|
+
and _has_set_attr(layer.self_attn, "k_proj")
|
|
150
|
+
and _has_set_attr(layer.self_attn, "v_proj")
|
|
151
|
+
and _has_set_attr(layer.self_attn, "o_proj")
|
|
152
|
+
)
|
|
153
|
+
moe = getattr(layer, "block_sparse_moe", None)
|
|
154
|
+
experts = getattr(moe, "experts", None) if moe is not None else None
|
|
155
|
+
expert0 = _first_item(experts) if experts is not None else None
|
|
156
|
+
has_moe = bool(
|
|
157
|
+
expert0 is not None
|
|
158
|
+
and _has_set_attr(expert0, "w1")
|
|
159
|
+
and _has_set_attr(expert0, "w2")
|
|
160
|
+
)
|
|
161
|
+
has_norms = _has_set_attr(layer, "input_layernorm") and _has_set_attr(
|
|
162
|
+
layer, "post_attention_layernorm"
|
|
163
|
+
)
|
|
164
|
+
return bool(has_attn and has_moe and has_norms)
|
|
165
|
+
|
|
166
|
+
def infer_mlp_dim(self, layer: Any, config: Any, hidden_size: int) -> int:
|
|
167
|
+
mlp_dim = int(getattr(config, "intermediate_size", hidden_size * 4) or 0)
|
|
168
|
+
try:
|
|
169
|
+
moe = getattr(layer, "block_sparse_moe", None)
|
|
170
|
+
experts = getattr(moe, "experts", None) if moe is not None else None
|
|
171
|
+
expert0 = _first_item(experts) if experts is not None else None
|
|
172
|
+
if expert0 is not None:
|
|
173
|
+
w1 = getattr(expert0, "w1", None)
|
|
174
|
+
if w1 is not None and hasattr(w1, "weight"):
|
|
175
|
+
mlp_dim = int(w1.weight.shape[0])
|
|
176
|
+
except Exception:
|
|
177
|
+
pass
|
|
178
|
+
return int(mlp_dim)
|
|
179
|
+
|
|
180
|
+
def layer_modules(self, model: Any, layer: Any) -> dict[str, Any]:
|
|
181
|
+
moe = layer.block_sparse_moe
|
|
182
|
+
expert0 = _first_item(moe.experts)
|
|
183
|
+
if expert0 is None:
|
|
184
|
+
raise AdapterError(
|
|
185
|
+
code="E202",
|
|
186
|
+
message="ADAPTER-STRUCTURE-INVALID: MoE layer missing experts",
|
|
187
|
+
details={"layer_class": layer.__class__.__name__},
|
|
188
|
+
)
|
|
189
|
+
return {
|
|
190
|
+
"self_attn.q_proj": layer.self_attn.q_proj,
|
|
191
|
+
"self_attn.k_proj": layer.self_attn.k_proj,
|
|
192
|
+
"self_attn.v_proj": layer.self_attn.v_proj,
|
|
193
|
+
"self_attn.o_proj": layer.self_attn.o_proj,
|
|
194
|
+
"input_layernorm": layer.input_layernorm,
|
|
195
|
+
"post_attention_layernorm": layer.post_attention_layernorm,
|
|
196
|
+
# Best-effort mapping to dense naming used elsewhere in the stack.
|
|
197
|
+
"mlp.gate_proj": expert0.w1,
|
|
198
|
+
"mlp.up_proj": getattr(expert0, "w3", expert0.w1),
|
|
199
|
+
"mlp.down_proj": expert0.w2,
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
def tying_map(self, model: Any, base: Any) -> dict[str, str]:
|
|
203
|
+
return _DenseDecoderSpec().tying_map(model, base)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class _GPT2LikeDecoderSpec(_CausalSpec):
|
|
207
|
+
spec_name = "gpt2_like"
|
|
208
|
+
|
|
209
|
+
def matches(self, model: Any, base: Any, layers: Any) -> bool:
|
|
210
|
+
layer = _first_item(layers)
|
|
211
|
+
if layer is None:
|
|
212
|
+
return False
|
|
213
|
+
return bool(
|
|
214
|
+
hasattr(layer, "attn")
|
|
215
|
+
and hasattr(layer.attn, "c_proj")
|
|
216
|
+
and hasattr(layer, "mlp")
|
|
217
|
+
and hasattr(layer.mlp, "c_proj")
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
def infer_mlp_dim(self, layer: Any, config: Any, hidden_size: int) -> int:
|
|
221
|
+
try:
|
|
222
|
+
c_fc = getattr(getattr(layer, "mlp", None), "c_fc", None)
|
|
223
|
+
if c_fc is not None and hasattr(c_fc, "weight"):
|
|
224
|
+
# HF GPT-style uses Conv1D where nf is out_features.
|
|
225
|
+
if hasattr(c_fc, "nf"):
|
|
226
|
+
return int(c_fc.nf)
|
|
227
|
+
return int(c_fc.weight.shape[0])
|
|
228
|
+
except Exception:
|
|
229
|
+
pass
|
|
230
|
+
return int(getattr(config, "n_inner", hidden_size * 4) or 0)
|
|
231
|
+
|
|
232
|
+
def layer_modules(self, model: Any, layer: Any) -> dict[str, Any]:
|
|
233
|
+
return {
|
|
234
|
+
"attn.c_attn": layer.attn.c_attn,
|
|
235
|
+
"attn.c_proj": layer.attn.c_proj,
|
|
236
|
+
"mlp.c_fc": layer.mlp.c_fc,
|
|
237
|
+
"mlp.c_proj": layer.mlp.c_proj,
|
|
238
|
+
"ln_1": layer.ln_1,
|
|
239
|
+
"ln_2": layer.ln_2,
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
def tying_map(self, model: Any, base: Any) -> dict[str, str]:
|
|
243
|
+
tying: dict[str, str] = {}
|
|
244
|
+
try:
|
|
245
|
+
if hasattr(model, "lm_head") and hasattr(base, "wte"):
|
|
246
|
+
if model.lm_head.weight is base.wte.weight:
|
|
247
|
+
tying["lm_head.weight"] = "transformer.wte.weight"
|
|
248
|
+
except Exception:
|
|
249
|
+
pass
|
|
250
|
+
return tying
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
_SPECS: list[_CausalSpec] = [
|
|
254
|
+
_MoEDecoderSpec(),
|
|
255
|
+
_DenseDecoderSpec(),
|
|
256
|
+
_GPT2LikeDecoderSpec(),
|
|
257
|
+
]
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
class HF_Causal_Adapter(HFAdapterMixin, ModelAdapter):
|
|
261
|
+
"""Spec-driven adapter for decoder-only causal LMs."""
|
|
262
|
+
|
|
263
|
+
name = "hf_causal"
|
|
264
|
+
|
|
265
|
+
def load_model(
|
|
266
|
+
self, model_id: str, device: str = "auto", **kwargs: Any
|
|
267
|
+
) -> ModuleType | Any:
|
|
268
|
+
try:
|
|
269
|
+
with wrap_errors(
|
|
270
|
+
DependencyError,
|
|
271
|
+
"E203",
|
|
272
|
+
"DEPENDENCY-MISSING: transformers",
|
|
273
|
+
lambda e: {"dependency": "transformers"},
|
|
274
|
+
):
|
|
275
|
+
from transformers import AutoModelForCausalLM # type: ignore
|
|
276
|
+
|
|
277
|
+
with wrap_errors(
|
|
278
|
+
ModelLoadError,
|
|
279
|
+
"E201",
|
|
280
|
+
"MODEL-LOAD-FAILED: transformers AutoModelForCausalLM",
|
|
281
|
+
lambda e: {"model_id": model_id},
|
|
282
|
+
):
|
|
283
|
+
model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs)
|
|
284
|
+
|
|
285
|
+
return self._safe_to_device(model, device)
|
|
286
|
+
except DependencyError:
|
|
287
|
+
if LIGHT_IMPORT:
|
|
288
|
+
stub = SimpleNamespace(name="hf_causal_stub")
|
|
289
|
+
stub.to = lambda *_a, **_k: stub # type: ignore[attr-defined]
|
|
290
|
+
return stub
|
|
291
|
+
raise
|
|
292
|
+
|
|
293
|
+
def _unwrap(self, model: Any) -> tuple[Any, Any, Any]:
|
|
294
|
+
config = getattr(model, "config", None)
|
|
295
|
+
if hasattr(model, "model") and hasattr(model.model, "layers"):
|
|
296
|
+
return model.model, model.model.layers, config
|
|
297
|
+
if hasattr(model, "transformer") and hasattr(model.transformer, "h"):
|
|
298
|
+
return model.transformer, model.transformer.h, config
|
|
299
|
+
if hasattr(model, "layers"):
|
|
300
|
+
return model, model.layers, config
|
|
301
|
+
if hasattr(model, "h"):
|
|
302
|
+
return model, model.h, config
|
|
303
|
+
raise AdapterError(
|
|
304
|
+
code="E202",
|
|
305
|
+
message="ADAPTER-STRUCTURE-INVALID: unrecognized HF causal LM structure",
|
|
306
|
+
details={"model_class": model.__class__.__name__},
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
def _select_spec(self, model: Any, base: Any, layers: Any) -> _CausalSpec:
|
|
310
|
+
for spec in _SPECS:
|
|
311
|
+
try:
|
|
312
|
+
if spec.matches(model, base, layers):
|
|
313
|
+
return spec
|
|
314
|
+
except Exception:
|
|
315
|
+
continue
|
|
316
|
+
return _DenseDecoderSpec()
|
|
317
|
+
|
|
318
|
+
def can_handle(self, model: ModuleType | Any) -> bool:
|
|
319
|
+
try:
|
|
320
|
+
base, layers, _cfg = self._unwrap(model)
|
|
321
|
+
except Exception:
|
|
322
|
+
return False
|
|
323
|
+
return any(spec.matches(model, base, layers) for spec in _SPECS)
|
|
324
|
+
|
|
325
|
+
def describe(self, model: ModuleType | Any) -> dict[str, Any]:
|
|
326
|
+
base, layers, config = self._unwrap(model)
|
|
327
|
+
if config is None:
|
|
328
|
+
raise AdapterError(
|
|
329
|
+
code="E202",
|
|
330
|
+
message="ADAPTER-STRUCTURE-INVALID: missing HuggingFace config on model",
|
|
331
|
+
details={"model_class": model.__class__.__name__},
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
try:
|
|
335
|
+
n_layers = len(layers)
|
|
336
|
+
except Exception:
|
|
337
|
+
n_layers = sum(1 for _ in iter(layers))
|
|
338
|
+
|
|
339
|
+
def _coerce_int(value: Any) -> int | None:
|
|
340
|
+
try:
|
|
341
|
+
if isinstance(value, bool):
|
|
342
|
+
return None
|
|
343
|
+
if isinstance(value, int):
|
|
344
|
+
return int(value)
|
|
345
|
+
if isinstance(value, float):
|
|
346
|
+
return int(value)
|
|
347
|
+
if isinstance(value, str):
|
|
348
|
+
stripped = value.strip()
|
|
349
|
+
if stripped and stripped.isdigit():
|
|
350
|
+
return int(stripped)
|
|
351
|
+
except Exception:
|
|
352
|
+
return None
|
|
353
|
+
return None
|
|
354
|
+
|
|
355
|
+
n_heads = _coerce_int(getattr(config, "num_attention_heads", None))
|
|
356
|
+
if n_heads is None:
|
|
357
|
+
n_heads = _coerce_int(getattr(config, "n_head", None))
|
|
358
|
+
|
|
359
|
+
hidden_size = _coerce_int(getattr(config, "hidden_size", None))
|
|
360
|
+
if hidden_size is None:
|
|
361
|
+
hidden_size = _coerce_int(getattr(config, "n_embd", None))
|
|
362
|
+
|
|
363
|
+
vocab_size = _coerce_int(getattr(config, "vocab_size", None))
|
|
364
|
+
|
|
365
|
+
if n_heads is None or hidden_size is None:
|
|
366
|
+
raise AdapterError(
|
|
367
|
+
code="E202",
|
|
368
|
+
message="ADAPTER-STRUCTURE-INVALID: missing head/hidden size metadata",
|
|
369
|
+
details={"model_class": model.__class__.__name__},
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
spec = self._select_spec(model, base, layers)
|
|
373
|
+
|
|
374
|
+
heads_per_layer = [int(n_heads)] * int(n_layers)
|
|
375
|
+
mlp_dims: list[int] = []
|
|
376
|
+
for idx in range(int(n_layers)):
|
|
377
|
+
layer = layers[idx]
|
|
378
|
+
mlp_dims.append(spec.infer_mlp_dim(layer, config, int(hidden_size)))
|
|
379
|
+
|
|
380
|
+
tying = spec.tying_map(model, base)
|
|
381
|
+
|
|
382
|
+
total_params = 0
|
|
383
|
+
try:
|
|
384
|
+
total_params = sum(p.numel() for p in model.parameters())
|
|
385
|
+
except Exception:
|
|
386
|
+
total_params = 0
|
|
387
|
+
|
|
388
|
+
try:
|
|
389
|
+
device = next(model.parameters()).device
|
|
390
|
+
except Exception:
|
|
391
|
+
device = torch.device("cpu")
|
|
392
|
+
|
|
393
|
+
return {
|
|
394
|
+
"n_layer": int(n_layers),
|
|
395
|
+
"heads_per_layer": heads_per_layer,
|
|
396
|
+
"mlp_dims": mlp_dims,
|
|
397
|
+
"tying": tying,
|
|
398
|
+
"model_type": str(getattr(config, "model_type", "") or "causal"),
|
|
399
|
+
"model_class": model.__class__.__name__,
|
|
400
|
+
"hf_model_type": str(getattr(config, "model_type", "") or ""),
|
|
401
|
+
"hf_config_class": config.__class__.__name__
|
|
402
|
+
if hasattr(config, "__class__")
|
|
403
|
+
else "unknown",
|
|
404
|
+
"n_heads": int(n_heads),
|
|
405
|
+
"hidden_size": int(hidden_size),
|
|
406
|
+
"vocab_size": int(vocab_size) if vocab_size is not None else None,
|
|
407
|
+
"total_params": int(total_params),
|
|
408
|
+
"device": str(device),
|
|
409
|
+
"spec": spec.spec_name,
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
def get_layer_modules(
|
|
413
|
+
self, model: ModuleType | Any, layer_idx: int
|
|
414
|
+
) -> dict[str, Any]:
|
|
415
|
+
base, layers, _cfg = self._unwrap(model)
|
|
416
|
+
spec = self._select_spec(model, base, layers)
|
|
417
|
+
layer = layers[layer_idx]
|
|
418
|
+
return spec.layer_modules(model, layer)
|
|
@@ -22,7 +22,7 @@ from invarlock.core.error_utils import wrap_errors
|
|
|
22
22
|
from invarlock.core.exceptions import DependencyError, ModelLoadError
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
class
|
|
25
|
+
class HF_Causal_ONNX_Adapter(ModelAdapter):
|
|
26
26
|
"""Optimum/ONNXRuntime causal LM adapter.
|
|
27
27
|
|
|
28
28
|
Provides a lightweight bridge that loads an ORTModelForCausalLM and
|
|
@@ -31,7 +31,7 @@ class HF_ORT_CausalLM_Adapter(ModelAdapter):
|
|
|
31
31
|
back to reload in the CLI runner.
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
|
-
name = "
|
|
34
|
+
name = "hf_causal_onnx"
|
|
35
35
|
|
|
36
36
|
# --- Loading ---------------------------------------------------------
|
|
37
37
|
def load_model(self, model_id: str, device: str = "cpu", **kwargs: Any): # type: ignore[override]
|
|
@@ -109,4 +109,4 @@ class HF_ORT_CausalLM_Adapter(ModelAdapter):
|
|
|
109
109
|
raise NotImplementedError("restore not supported for ONNXRuntime models")
|
|
110
110
|
|
|
111
111
|
|
|
112
|
-
__all__ = ["
|
|
112
|
+
__all__ = ["HF_Causal_ONNX_Adapter"]
|
invarlock/adapters/hf_mixin.py
CHANGED
|
@@ -490,18 +490,39 @@ class HFAdapterMixin:
|
|
|
490
490
|
"""Return mapping of tied parameter names to source parameter names."""
|
|
491
491
|
|
|
492
492
|
tying: dict[str, str] = {}
|
|
493
|
-
|
|
493
|
+
try:
|
|
494
|
+
named = model.named_parameters(remove_duplicate=False) # type: ignore[call-arg]
|
|
495
|
+
except TypeError: # pragma: no cover - torch version dependent
|
|
496
|
+
named = model.named_parameters()
|
|
497
|
+
params = dict(named)
|
|
498
|
+
|
|
499
|
+
def _is_tied(name_a: str, name_b: str) -> bool:
|
|
500
|
+
a = params.get(name_a)
|
|
501
|
+
b = params.get(name_b)
|
|
502
|
+
if a is None or b is None:
|
|
503
|
+
return False
|
|
504
|
+
try:
|
|
505
|
+
if a is b:
|
|
506
|
+
return True
|
|
507
|
+
if hasattr(a, "data_ptr") and hasattr(b, "data_ptr"):
|
|
508
|
+
return int(a.data_ptr()) == int(b.data_ptr())
|
|
509
|
+
except Exception:
|
|
510
|
+
return False
|
|
511
|
+
return False
|
|
494
512
|
|
|
495
|
-
if "lm_head.weight"
|
|
513
|
+
if _is_tied("lm_head.weight", "transformer.wte.weight"):
|
|
496
514
|
tying["lm_head.weight"] = "transformer.wte.weight"
|
|
497
515
|
|
|
516
|
+
if _is_tied("lm_head.weight", "model.embed_tokens.weight"):
|
|
517
|
+
tying["lm_head.weight"] = "model.embed_tokens.weight"
|
|
518
|
+
|
|
498
519
|
decoder_name = "cls.predictions.decoder.weight"
|
|
499
|
-
if decoder_name in
|
|
520
|
+
if decoder_name in params:
|
|
500
521
|
for candidate in (
|
|
501
522
|
"bert.embeddings.word_embeddings.weight",
|
|
502
523
|
"embeddings.word_embeddings.weight",
|
|
503
524
|
):
|
|
504
|
-
if candidate
|
|
525
|
+
if _is_tied(decoder_name, candidate):
|
|
505
526
|
tying[decoder_name] = candidate
|
|
506
527
|
break
|
|
507
528
|
|