invarlock 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +2 -2
- invarlock/_data/runtime/tiers.yaml +57 -30
- invarlock/adapters/__init__.py +11 -15
- invarlock/adapters/auto.py +35 -40
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_mixin.py +25 -4
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/calibration/spectral_null.py +15 -10
- invarlock/calibration/variance_ve.py +0 -2
- invarlock/cli/adapter_auto.py +31 -21
- invarlock/cli/app.py +73 -2
- invarlock/cli/commands/calibrate.py +6 -2
- invarlock/cli/commands/certify.py +651 -91
- invarlock/cli/commands/doctor.py +11 -11
- invarlock/cli/commands/explain_gates.py +57 -8
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +233 -69
- invarlock/cli/commands/run.py +1066 -244
- invarlock/cli/commands/verify.py +154 -15
- invarlock/cli/config.py +22 -6
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +1 -1
- invarlock/core/api.py +45 -5
- invarlock/core/auto_tuning.py +65 -20
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/contracts.py +7 -1
- invarlock/core/registry.py +11 -13
- invarlock/core/runner.py +425 -75
- invarlock/edits/quant_rtn.py +65 -37
- invarlock/eval/bench.py +3 -16
- invarlock/eval/data.py +82 -51
- invarlock/eval/metrics.py +63 -2
- invarlock/eval/primary_metric.py +23 -0
- invarlock/eval/tail_stats.py +230 -0
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/_estimators.py +154 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/policies.py +16 -6
- invarlock/guards/rmt.py +627 -546
- invarlock/guards/spectral.py +348 -110
- invarlock/guards/tier_config.py +32 -30
- invarlock/guards/variance.py +7 -31
- invarlock/guards_ref/rmt_ref.py +23 -23
- invarlock/model_profile.py +90 -42
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/reporting/certificate.py +384 -55
- invarlock/reporting/certificate_schema.py +3 -2
- invarlock/reporting/dataset_hashing.py +15 -2
- invarlock/reporting/guards_analysis.py +350 -277
- invarlock/reporting/html.py +55 -5
- invarlock/reporting/normalizer.py +13 -0
- invarlock/reporting/policy_utils.py +38 -36
- invarlock/reporting/primary_metric_utils.py +71 -17
- invarlock/reporting/render.py +852 -431
- invarlock/reporting/report.py +40 -4
- invarlock/reporting/report_types.py +11 -3
- invarlock/reporting/telemetry.py +86 -0
- invarlock/reporting/validate.py +1 -18
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/METADATA +27 -13
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/RECORD +72 -65
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
invarlock/adapters/hf_llama.py
DELETED
|
@@ -1,487 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
HuggingFace LLaMA Model Adapter
|
|
3
|
-
===============================
|
|
4
|
-
|
|
5
|
-
ModelAdapter implementation for HuggingFace LLaMA architecture models.
|
|
6
|
-
|
|
7
|
-
This adapter provides LLaMA-specific integration including:
|
|
8
|
-
- Support for LLaMA, LLaMA-2, Code Llama, and other LLaMA variants
|
|
9
|
-
- Proper handling of RMSNorm layers and SwiGLU activation
|
|
10
|
-
- RoPE (Rotary Position Embedding) support
|
|
11
|
-
- Group Query Attention (GQA) handling for LLaMA-2
|
|
12
|
-
- Proper device-aware state serialization
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
from typing import Any
|
|
16
|
-
|
|
17
|
-
import torch
|
|
18
|
-
import torch.nn as nn
|
|
19
|
-
|
|
20
|
-
from invarlock.core.api import ModelAdapter
|
|
21
|
-
from invarlock.core.error_utils import wrap_errors
|
|
22
|
-
from invarlock.core.exceptions import AdapterError, DependencyError, ModelLoadError
|
|
23
|
-
|
|
24
|
-
from .hf_mixin import HFAdapterMixin
|
|
25
|
-
|
|
26
|
-
TensorType = torch.Tensor
|
|
27
|
-
ModuleType = nn.Module
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class HF_LLaMA_Adapter(HFAdapterMixin, ModelAdapter):
|
|
31
|
-
"""
|
|
32
|
-
HuggingFace-specific ModelAdapter implementation for LLaMA models.
|
|
33
|
-
|
|
34
|
-
Supports LLaMA, LLaMA-2, Code Llama, and other LLaMA variants with:
|
|
35
|
-
- Enhanced LLaMA model detection and validation
|
|
36
|
-
- Support for Group Query Attention (GQA) in LLaMA-2
|
|
37
|
-
- RMSNorm layer handling
|
|
38
|
-
- RoPE position embedding support
|
|
39
|
-
- Device-aware state serialization
|
|
40
|
-
"""
|
|
41
|
-
|
|
42
|
-
name = "hf_llama"
|
|
43
|
-
|
|
44
|
-
def load_model(
|
|
45
|
-
self, model_id: str, device: str = "auto", **kwargs: Any
|
|
46
|
-
) -> ModuleType | Any:
|
|
47
|
-
"""
|
|
48
|
-
Load a HuggingFace LLaMA model.
|
|
49
|
-
|
|
50
|
-
Args:
|
|
51
|
-
model_id: Model identifier (e.g. "meta-llama/Llama-2-7b-hf")
|
|
52
|
-
device: Target device ("auto", "cuda", "mps", "cpu")
|
|
53
|
-
|
|
54
|
-
Returns:
|
|
55
|
-
Loaded LLaMA model
|
|
56
|
-
"""
|
|
57
|
-
# Lazy import to map missing dependency
|
|
58
|
-
with wrap_errors(
|
|
59
|
-
DependencyError,
|
|
60
|
-
"E203",
|
|
61
|
-
"DEPENDENCY-MISSING: transformers",
|
|
62
|
-
lambda e: {"dependency": "transformers"},
|
|
63
|
-
):
|
|
64
|
-
from transformers import AutoModelForCausalLM # type: ignore
|
|
65
|
-
|
|
66
|
-
with wrap_errors(
|
|
67
|
-
ModelLoadError,
|
|
68
|
-
"E201",
|
|
69
|
-
"MODEL-LOAD-FAILED: transformers AutoModelForCausalLM",
|
|
70
|
-
lambda e: {"model_id": model_id},
|
|
71
|
-
):
|
|
72
|
-
model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs)
|
|
73
|
-
|
|
74
|
-
# Use safe device movement that respects quantization constraints
|
|
75
|
-
return self._safe_to_device(model, device)
|
|
76
|
-
|
|
77
|
-
def can_handle(self, model: ModuleType | Any) -> bool:
|
|
78
|
-
"""
|
|
79
|
-
Check if this adapter can handle the given model.
|
|
80
|
-
|
|
81
|
-
Enhanced detection for HuggingFace LLaMA models with validation
|
|
82
|
-
of expected structure and configuration.
|
|
83
|
-
|
|
84
|
-
Args:
|
|
85
|
-
model: The model to check
|
|
86
|
-
|
|
87
|
-
Returns:
|
|
88
|
-
True if this is a HuggingFace LLaMA compatible model
|
|
89
|
-
"""
|
|
90
|
-
|
|
91
|
-
# Helper to detect explicitly set attributes (avoid unittest.mock auto-creation)
|
|
92
|
-
def _has_set_attr(obj, name: str) -> bool:
|
|
93
|
-
# Only treat attributes as present if explicitly set to avoid Mock auto-creation
|
|
94
|
-
d = getattr(obj, "__dict__", None)
|
|
95
|
-
if isinstance(d, dict) and name in d:
|
|
96
|
-
return True
|
|
97
|
-
# For nn.Module, also consider registered submodules/params/buffers
|
|
98
|
-
if isinstance(obj, nn.Module):
|
|
99
|
-
if hasattr(obj, "_modules") and name in obj._modules:
|
|
100
|
-
return True
|
|
101
|
-
if hasattr(obj, "_parameters") and name in obj._parameters:
|
|
102
|
-
return True
|
|
103
|
-
if hasattr(obj, "_buffers") and name in obj._buffers:
|
|
104
|
-
return True
|
|
105
|
-
return False
|
|
106
|
-
|
|
107
|
-
# Check for HuggingFace LLaMA class names
|
|
108
|
-
model_name = model.__class__.__name__
|
|
109
|
-
if model_name in ["LlamaModel", "LlamaForCausalLM"]:
|
|
110
|
-
# Verify it has HF config
|
|
111
|
-
if hasattr(model, "config") and hasattr(model.config, "model_type"):
|
|
112
|
-
return model.config.model_type == "llama"
|
|
113
|
-
|
|
114
|
-
# Early bare-structure acceptance (no wrapper), minimal checks for tests
|
|
115
|
-
if hasattr(model, "layers"):
|
|
116
|
-
layers_obj = model.layers
|
|
117
|
-
# Obtain first layer via index or iterator
|
|
118
|
-
first_layer = None
|
|
119
|
-
try:
|
|
120
|
-
if hasattr(layers_obj, "__len__") and len(layers_obj) > 0:
|
|
121
|
-
first_layer = layers_obj[0]
|
|
122
|
-
except Exception:
|
|
123
|
-
first_layer = None
|
|
124
|
-
if first_layer is None:
|
|
125
|
-
try:
|
|
126
|
-
first_layer = next(iter(layers_obj))
|
|
127
|
-
except Exception:
|
|
128
|
-
first_layer = None
|
|
129
|
-
if first_layer is not None:
|
|
130
|
-
candidate_layer = first_layer
|
|
131
|
-
# Minimal structural check for bare models (satisfies test expectations)
|
|
132
|
-
if hasattr(candidate_layer, "self_attn") and hasattr(
|
|
133
|
-
candidate_layer, "mlp"
|
|
134
|
-
):
|
|
135
|
-
return True
|
|
136
|
-
|
|
137
|
-
# Structural validation for LLaMA-like models
|
|
138
|
-
if hasattr(model, "config") and hasattr(model, "model"):
|
|
139
|
-
config = model.config
|
|
140
|
-
llama_model = model.model
|
|
141
|
-
|
|
142
|
-
# Check for LLaMA configuration attributes
|
|
143
|
-
if (
|
|
144
|
-
hasattr(config, "num_hidden_layers")
|
|
145
|
-
and hasattr(config, "num_attention_heads")
|
|
146
|
-
and hasattr(config, "hidden_size")
|
|
147
|
-
and hasattr(llama_model, "layers")
|
|
148
|
-
):
|
|
149
|
-
# Validate LLaMA structure
|
|
150
|
-
try:
|
|
151
|
-
layers = llama_model.layers
|
|
152
|
-
layer = None
|
|
153
|
-
# Length-based path with robust exception handling
|
|
154
|
-
try:
|
|
155
|
-
if hasattr(layers, "__len__") and len(layers) > 0:
|
|
156
|
-
layer = layers[0]
|
|
157
|
-
except Exception:
|
|
158
|
-
layer = None
|
|
159
|
-
# Iterator fallback
|
|
160
|
-
if layer is None and hasattr(layers, "__iter__"):
|
|
161
|
-
try:
|
|
162
|
-
# Call mocked __iter__ directly to support unittest.mock patterns
|
|
163
|
-
layer = next(layers.__iter__())
|
|
164
|
-
except (StopIteration, TypeError, AttributeError):
|
|
165
|
-
return False
|
|
166
|
-
if layer is None:
|
|
167
|
-
return False
|
|
168
|
-
|
|
169
|
-
# Check for LLaMA layer structure (strict: only count explicitly set attributes)
|
|
170
|
-
if (
|
|
171
|
-
hasattr(layer, "self_attn")
|
|
172
|
-
and hasattr(layer, "mlp")
|
|
173
|
-
and _has_set_attr(layer.self_attn, "q_proj")
|
|
174
|
-
and _has_set_attr(layer.self_attn, "k_proj")
|
|
175
|
-
and _has_set_attr(layer.self_attn, "v_proj")
|
|
176
|
-
and _has_set_attr(layer.self_attn, "o_proj")
|
|
177
|
-
and _has_set_attr(layer.mlp, "gate_proj")
|
|
178
|
-
and _has_set_attr(layer.mlp, "up_proj")
|
|
179
|
-
and _has_set_attr(layer.mlp, "down_proj")
|
|
180
|
-
):
|
|
181
|
-
# Check for RMSNorm (characteristic of LLaMA)
|
|
182
|
-
if _has_set_attr(layer, "input_layernorm") and _has_set_attr(
|
|
183
|
-
layer, "post_attention_layernorm"
|
|
184
|
-
):
|
|
185
|
-
return True
|
|
186
|
-
else:
|
|
187
|
-
return False
|
|
188
|
-
else:
|
|
189
|
-
return False
|
|
190
|
-
|
|
191
|
-
except (AttributeError, TypeError):
|
|
192
|
-
return False
|
|
193
|
-
|
|
194
|
-
# Check for bare LLaMA model structure (less common but possible)
|
|
195
|
-
# Accept list/tuple/ModuleList and iterator-only mocks
|
|
196
|
-
if hasattr(model, "layers") and hasattr(model, "config"):
|
|
197
|
-
try:
|
|
198
|
-
layers = model.layers
|
|
199
|
-
first_layer = None
|
|
200
|
-
# Length-based access
|
|
201
|
-
try:
|
|
202
|
-
if hasattr(layers, "__len__") and len(layers) > 0:
|
|
203
|
-
first_layer = layers[0]
|
|
204
|
-
except Exception:
|
|
205
|
-
first_layer = None
|
|
206
|
-
# Iterator-based access
|
|
207
|
-
if first_layer is None and hasattr(layers, "__iter__"):
|
|
208
|
-
try:
|
|
209
|
-
# Call __iter__ directly to support unittest.mock patterns
|
|
210
|
-
first_layer = (
|
|
211
|
-
next(layers.__iter__())
|
|
212
|
-
if hasattr(layers, "__iter__")
|
|
213
|
-
else next(iter(layers))
|
|
214
|
-
)
|
|
215
|
-
except Exception:
|
|
216
|
-
first_layer = None
|
|
217
|
-
if first_layer is not None:
|
|
218
|
-
candidate_layer = first_layer
|
|
219
|
-
if (
|
|
220
|
-
hasattr(candidate_layer, "self_attn")
|
|
221
|
-
and hasattr(candidate_layer, "mlp")
|
|
222
|
-
and hasattr(candidate_layer.self_attn, "q_proj")
|
|
223
|
-
and hasattr(candidate_layer.mlp, "gate_proj")
|
|
224
|
-
):
|
|
225
|
-
return True
|
|
226
|
-
except Exception:
|
|
227
|
-
pass
|
|
228
|
-
|
|
229
|
-
return False
|
|
230
|
-
|
|
231
|
-
def describe(self, model: ModuleType | Any) -> dict[str, Any]:
|
|
232
|
-
"""
|
|
233
|
-
Get structural description of the HuggingFace LLaMA model.
|
|
234
|
-
|
|
235
|
-
Returns the required format for validation gates:
|
|
236
|
-
- n_layer: int
|
|
237
|
-
- heads_per_layer: List[int]
|
|
238
|
-
- mlp_dims: List[int]
|
|
239
|
-
- tying: Dict[str, str] (weight tying map)
|
|
240
|
-
|
|
241
|
-
Args:
|
|
242
|
-
model: The HuggingFace LLaMA model to describe
|
|
243
|
-
|
|
244
|
-
Returns:
|
|
245
|
-
Dictionary with model structure info in required format
|
|
246
|
-
"""
|
|
247
|
-
# Determine model structure
|
|
248
|
-
if hasattr(model, "model"):
|
|
249
|
-
# LlamaForCausalLM structure
|
|
250
|
-
llama_model = model.model
|
|
251
|
-
layers = llama_model.layers
|
|
252
|
-
config = model.config
|
|
253
|
-
elif hasattr(model, "layers"):
|
|
254
|
-
# Direct LlamaModel structure
|
|
255
|
-
layers = model.layers
|
|
256
|
-
config = model.config
|
|
257
|
-
llama_model = model
|
|
258
|
-
else:
|
|
259
|
-
raise AdapterError(
|
|
260
|
-
code="E202",
|
|
261
|
-
message=(
|
|
262
|
-
"ADAPTER-STRUCTURE-INVALID: unrecognized HuggingFace LLaMA model structure"
|
|
263
|
-
),
|
|
264
|
-
details={"model_class": model.__class__.__name__},
|
|
265
|
-
)
|
|
266
|
-
|
|
267
|
-
# Extract basic configuration
|
|
268
|
-
# Robust layer count with Mock/iterator support; allow empty layers
|
|
269
|
-
try:
|
|
270
|
-
n_layers = len(layers)
|
|
271
|
-
except Exception:
|
|
272
|
-
try:
|
|
273
|
-
# Fallback: count via iteration
|
|
274
|
-
n_layers = sum(1 for _ in iter(layers))
|
|
275
|
-
except Exception as err:
|
|
276
|
-
raise AdapterError(
|
|
277
|
-
code="E202",
|
|
278
|
-
message=(
|
|
279
|
-
"ADAPTER-STRUCTURE-INVALID: unrecognized HuggingFace LLaMA model structure"
|
|
280
|
-
),
|
|
281
|
-
details={"error": str(err)},
|
|
282
|
-
) from err
|
|
283
|
-
n_heads = getattr(config, "num_attention_heads", None)
|
|
284
|
-
hidden_size = getattr(config, "hidden_size", None)
|
|
285
|
-
vocab_size = getattr(config, "vocab_size", None)
|
|
286
|
-
|
|
287
|
-
# LLaMA-2 specific: Group Query Attention support
|
|
288
|
-
num_key_value_heads = getattr(config, "num_key_value_heads", n_heads)
|
|
289
|
-
|
|
290
|
-
if n_heads is None or hidden_size is None:
|
|
291
|
-
raise AdapterError(
|
|
292
|
-
code="E202",
|
|
293
|
-
message=(
|
|
294
|
-
"ADAPTER-STRUCTURE-INVALID: missing num_attention_heads or hidden_size"
|
|
295
|
-
),
|
|
296
|
-
details={"model_class": model.__class__.__name__},
|
|
297
|
-
)
|
|
298
|
-
|
|
299
|
-
# Get device info
|
|
300
|
-
try:
|
|
301
|
-
device = next(model.parameters()).device
|
|
302
|
-
except StopIteration:
|
|
303
|
-
device = torch.device("cpu")
|
|
304
|
-
|
|
305
|
-
# Calculate total parameters
|
|
306
|
-
total_params = sum(p.numel() for p in model.parameters())
|
|
307
|
-
|
|
308
|
-
# Get MLP dimensions for each layer
|
|
309
|
-
mlp_dims = []
|
|
310
|
-
heads_per_layer = []
|
|
311
|
-
|
|
312
|
-
for layer_idx in range(n_layers):
|
|
313
|
-
layer = layers[layer_idx]
|
|
314
|
-
|
|
315
|
-
# For LLaMA, all layers have the same head count
|
|
316
|
-
heads_per_layer.append(n_heads)
|
|
317
|
-
|
|
318
|
-
# Get MLP intermediate dimension (gate_proj/up_proj output size)
|
|
319
|
-
if hasattr(layer.mlp.gate_proj, "weight"):
|
|
320
|
-
# Linear layer: (out_features, in_features)
|
|
321
|
-
mlp_dim = layer.mlp.gate_proj.weight.shape[0]
|
|
322
|
-
else:
|
|
323
|
-
# Fallback to config
|
|
324
|
-
mlp_dim = getattr(config, "intermediate_size", hidden_size * 4)
|
|
325
|
-
|
|
326
|
-
mlp_dims.append(mlp_dim)
|
|
327
|
-
|
|
328
|
-
# Detect weight tying (lm_head ↔ embed_tokens)
|
|
329
|
-
tying_map = {}
|
|
330
|
-
if hasattr(model, "lm_head") and hasattr(llama_model, "embed_tokens"):
|
|
331
|
-
# Check if the weights are the same tensor (tied)
|
|
332
|
-
if model.lm_head.weight is llama_model.embed_tokens.weight:
|
|
333
|
-
tying_map["lm_head.weight"] = "model.embed_tokens.weight"
|
|
334
|
-
|
|
335
|
-
# Build the required description format
|
|
336
|
-
description = {
|
|
337
|
-
# Required fields for validation gates
|
|
338
|
-
"n_layer": n_layers,
|
|
339
|
-
"heads_per_layer": heads_per_layer,
|
|
340
|
-
"mlp_dims": mlp_dims,
|
|
341
|
-
"tying": tying_map,
|
|
342
|
-
# Additional useful information
|
|
343
|
-
"model_type": "llama",
|
|
344
|
-
"model_class": model.__class__.__name__,
|
|
345
|
-
"n_heads": n_heads,
|
|
346
|
-
"num_key_value_heads": num_key_value_heads, # GQA support
|
|
347
|
-
"hidden_size": hidden_size,
|
|
348
|
-
"vocab_size": vocab_size,
|
|
349
|
-
"total_params": total_params,
|
|
350
|
-
"device": str(device),
|
|
351
|
-
# HuggingFace specific info
|
|
352
|
-
"hf_model_type": getattr(config, "model_type", "llama"),
|
|
353
|
-
"hf_config_class": config.__class__.__name__
|
|
354
|
-
if hasattr(config, "__class__")
|
|
355
|
-
else "unknown",
|
|
356
|
-
# LLaMA specific architecture details
|
|
357
|
-
"architecture": {
|
|
358
|
-
"has_lm_head": hasattr(model, "lm_head"),
|
|
359
|
-
"has_model_wrapper": hasattr(model, "model"),
|
|
360
|
-
"layer_norm_type": "rms", # LLaMA uses RMSNorm
|
|
361
|
-
"activation": "silu", # LLaMA uses SwiGLU (SiLU activation)
|
|
362
|
-
"positional_encoding": "rope", # LLaMA uses RoPE
|
|
363
|
-
"use_bias": getattr(
|
|
364
|
-
config, "use_bias", False
|
|
365
|
-
), # LLaMA typically no bias
|
|
366
|
-
"rope_theta": getattr(config, "rope_theta", 10000.0),
|
|
367
|
-
"max_position_embeddings": getattr(
|
|
368
|
-
config, "max_position_embeddings", 2048
|
|
369
|
-
),
|
|
370
|
-
"is_gqa": num_key_value_heads != n_heads, # Group Query Attention
|
|
371
|
-
"gqa_ratio": n_heads // num_key_value_heads
|
|
372
|
-
if num_key_value_heads != n_heads
|
|
373
|
-
else 1,
|
|
374
|
-
"pretraining_tp": getattr(
|
|
375
|
-
config, "pretraining_tp", 1
|
|
376
|
-
), # Tensor parallelism
|
|
377
|
-
"rms_norm_eps": getattr(config, "rms_norm_eps", 1e-6),
|
|
378
|
-
},
|
|
379
|
-
}
|
|
380
|
-
|
|
381
|
-
return description
|
|
382
|
-
|
|
383
|
-
def _extract_weight_tying_info(self, model: ModuleType | Any) -> dict[str, str]:
|
|
384
|
-
"""
|
|
385
|
-
Extract weight tying relationships from the model.
|
|
386
|
-
|
|
387
|
-
Args:
|
|
388
|
-
model: The model to analyze
|
|
389
|
-
|
|
390
|
-
Returns:
|
|
391
|
-
Dictionary mapping tied parameter names to their source parameter names
|
|
392
|
-
"""
|
|
393
|
-
tying_info = {}
|
|
394
|
-
|
|
395
|
-
# Check for lm_head ↔ embed_tokens tying (common in LLaMA)
|
|
396
|
-
if hasattr(model, "lm_head") and hasattr(model, "model"):
|
|
397
|
-
if hasattr(model.model, "embed_tokens"):
|
|
398
|
-
if model.lm_head.weight is model.model.embed_tokens.weight:
|
|
399
|
-
tying_info["lm_head.weight"] = "model.embed_tokens.weight"
|
|
400
|
-
|
|
401
|
-
return tying_info
|
|
402
|
-
|
|
403
|
-
def _restore_weight_tying(
|
|
404
|
-
self, model: ModuleType | Any, tied_param: str, source_param: str
|
|
405
|
-
) -> None:
|
|
406
|
-
"""
|
|
407
|
-
Restore a weight tying relationship between parameters.
|
|
408
|
-
|
|
409
|
-
Args:
|
|
410
|
-
model: The model to modify
|
|
411
|
-
tied_param: Name of the parameter that should be tied
|
|
412
|
-
source_param: Name of the source parameter to tie to
|
|
413
|
-
"""
|
|
414
|
-
# This is a placeholder for weight tying restoration logic
|
|
415
|
-
print(
|
|
416
|
-
f"Warning: Weight tying relationship {tied_param} -> {source_param} may have been broken during restore"
|
|
417
|
-
)
|
|
418
|
-
|
|
419
|
-
def get_layer_modules(
|
|
420
|
-
self, model: ModuleType | Any, layer_idx: int
|
|
421
|
-
) -> dict[str, ModuleType | Any]:
|
|
422
|
-
"""
|
|
423
|
-
Get the modules for a specific layer (utility method).
|
|
424
|
-
|
|
425
|
-
Args:
|
|
426
|
-
model: The HuggingFace LLaMA model
|
|
427
|
-
layer_idx: Index of the layer to get modules for
|
|
428
|
-
|
|
429
|
-
Returns:
|
|
430
|
-
Dictionary mapping module names to modules
|
|
431
|
-
"""
|
|
432
|
-
if hasattr(model, "model"):
|
|
433
|
-
layer = model.model.layers[layer_idx]
|
|
434
|
-
else:
|
|
435
|
-
layer = model.layers[layer_idx]
|
|
436
|
-
|
|
437
|
-
modules = {
|
|
438
|
-
"self_attn.q_proj": layer.self_attn.q_proj, # Query projection
|
|
439
|
-
"self_attn.k_proj": layer.self_attn.k_proj, # Key projection
|
|
440
|
-
"self_attn.v_proj": layer.self_attn.v_proj, # Value projection
|
|
441
|
-
"self_attn.o_proj": layer.self_attn.o_proj, # Output projection
|
|
442
|
-
"mlp.gate_proj": layer.mlp.gate_proj, # Gate projection (SwiGLU)
|
|
443
|
-
"mlp.up_proj": layer.mlp.up_proj, # Up projection (SwiGLU)
|
|
444
|
-
"mlp.down_proj": layer.mlp.down_proj, # Down projection
|
|
445
|
-
"input_layernorm": layer.input_layernorm, # RMSNorm before attention
|
|
446
|
-
"post_attention_layernorm": layer.post_attention_layernorm, # RMSNorm before MLP
|
|
447
|
-
}
|
|
448
|
-
|
|
449
|
-
return modules
|
|
450
|
-
|
|
451
|
-
def get_attention_info(self, model: ModuleType | Any) -> dict[str, Any]:
|
|
452
|
-
"""
|
|
453
|
-
Get attention-specific information for LLaMA models.
|
|
454
|
-
|
|
455
|
-
Args:
|
|
456
|
-
model: The HuggingFace LLaMA model
|
|
457
|
-
|
|
458
|
-
Returns:
|
|
459
|
-
Dictionary with attention configuration details
|
|
460
|
-
"""
|
|
461
|
-
config = model.config
|
|
462
|
-
|
|
463
|
-
def _safe_int(val):
|
|
464
|
-
return val if isinstance(val, int) else None
|
|
465
|
-
|
|
466
|
-
num_heads = _safe_int(getattr(config, "num_attention_heads", None))
|
|
467
|
-
hidden_size = _safe_int(getattr(config, "hidden_size", None))
|
|
468
|
-
num_key_value_heads = (
|
|
469
|
-
_safe_int(getattr(config, "num_key_value_heads", None)) or num_heads
|
|
470
|
-
)
|
|
471
|
-
|
|
472
|
-
head_dim = None
|
|
473
|
-
if isinstance(hidden_size, int) and isinstance(num_heads, int) and num_heads:
|
|
474
|
-
head_dim = hidden_size // num_heads
|
|
475
|
-
|
|
476
|
-
return {
|
|
477
|
-
"num_attention_heads": num_heads,
|
|
478
|
-
"num_key_value_heads": num_key_value_heads,
|
|
479
|
-
"head_dim": head_dim,
|
|
480
|
-
"is_group_query_attention": num_key_value_heads != num_heads,
|
|
481
|
-
"gqa_groups": num_heads // num_key_value_heads
|
|
482
|
-
if num_key_value_heads != num_heads
|
|
483
|
-
else 1,
|
|
484
|
-
"rope_theta": getattr(config, "rope_theta", 10000.0),
|
|
485
|
-
"max_position_embeddings": getattr(config, "max_position_embeddings", 2048),
|
|
486
|
-
"attention_dropout": getattr(config, "attention_dropout", 0.0),
|
|
487
|
-
}
|
|
File without changes
|
|
File without changes
|