invarlock 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. invarlock/__init__.py +2 -2
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +35 -40
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_mixin.py +25 -4
  8. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  9. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  10. invarlock/cli/adapter_auto.py +31 -21
  11. invarlock/cli/app.py +73 -2
  12. invarlock/cli/commands/certify.py +600 -59
  13. invarlock/cli/commands/doctor.py +8 -10
  14. invarlock/cli/commands/plugins.py +13 -9
  15. invarlock/cli/commands/report.py +233 -69
  16. invarlock/cli/commands/run.py +907 -183
  17. invarlock/cli/commands/verify.py +76 -11
  18. invarlock/cli/config.py +1 -1
  19. invarlock/cli/doctor_helpers.py +4 -5
  20. invarlock/cli/output.py +193 -0
  21. invarlock/cli/provenance.py +1 -1
  22. invarlock/core/bootstrap.py +1 -1
  23. invarlock/core/registry.py +9 -11
  24. invarlock/core/runner.py +111 -25
  25. invarlock/edits/quant_rtn.py +65 -37
  26. invarlock/eval/bench.py +3 -3
  27. invarlock/eval/data.py +68 -23
  28. invarlock/eval/metrics.py +59 -1
  29. invarlock/eval/tasks/__init__.py +12 -0
  30. invarlock/eval/tasks/classification.py +48 -0
  31. invarlock/eval/tasks/qa.py +36 -0
  32. invarlock/eval/tasks/text_generation.py +102 -0
  33. invarlock/guards/invariants.py +19 -10
  34. invarlock/guards/rmt.py +2 -2
  35. invarlock/guards/variance.py +2 -2
  36. invarlock/model_profile.py +48 -27
  37. invarlock/observability/health.py +6 -6
  38. invarlock/observability/metrics.py +108 -0
  39. invarlock/reporting/certificate.py +159 -9
  40. invarlock/reporting/certificate_schema.py +1 -1
  41. invarlock/reporting/guards_analysis.py +154 -4
  42. invarlock/reporting/html.py +55 -5
  43. invarlock/reporting/normalizer.py +7 -0
  44. invarlock/reporting/render.py +791 -431
  45. invarlock/reporting/report.py +39 -3
  46. invarlock/reporting/report_types.py +6 -1
  47. invarlock/reporting/telemetry.py +86 -0
  48. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/METADATA +23 -9
  49. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/RECORD +53 -48
  50. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
  51. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
  52. invarlock/adapters/hf_gpt2.py +0 -404
  53. invarlock/adapters/hf_llama.py +0 -487
  54. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
  55. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
@@ -1,487 +0,0 @@
1
- """
2
- HuggingFace LLaMA Model Adapter
3
- ===============================
4
-
5
- ModelAdapter implementation for HuggingFace LLaMA architecture models.
6
-
7
- This adapter provides LLaMA-specific integration including:
8
- - Support for LLaMA, LLaMA-2, Code Llama, and other LLaMA variants
9
- - Proper handling of RMSNorm layers and SwiGLU activation
10
- - RoPE (Rotary Position Embedding) support
11
- - Group Query Attention (GQA) handling for LLaMA-2
12
- - Proper device-aware state serialization
13
- """
14
-
15
- from typing import Any
16
-
17
- import torch
18
- import torch.nn as nn
19
-
20
- from invarlock.core.api import ModelAdapter
21
- from invarlock.core.error_utils import wrap_errors
22
- from invarlock.core.exceptions import AdapterError, DependencyError, ModelLoadError
23
-
24
- from .hf_mixin import HFAdapterMixin
25
-
26
- TensorType = torch.Tensor
27
- ModuleType = nn.Module
28
-
29
-
30
- class HF_LLaMA_Adapter(HFAdapterMixin, ModelAdapter):
31
- """
32
- HuggingFace-specific ModelAdapter implementation for LLaMA models.
33
-
34
- Supports LLaMA, LLaMA-2, Code Llama, and other LLaMA variants with:
35
- - Enhanced LLaMA model detection and validation
36
- - Support for Group Query Attention (GQA) in LLaMA-2
37
- - RMSNorm layer handling
38
- - RoPE position embedding support
39
- - Device-aware state serialization
40
- """
41
-
42
- name = "hf_llama"
43
-
44
- def load_model(
45
- self, model_id: str, device: str = "auto", **kwargs: Any
46
- ) -> ModuleType | Any:
47
- """
48
- Load a HuggingFace LLaMA model.
49
-
50
- Args:
51
- model_id: Model identifier (e.g. "meta-llama/Llama-2-7b-hf")
52
- device: Target device ("auto", "cuda", "mps", "cpu")
53
-
54
- Returns:
55
- Loaded LLaMA model
56
- """
57
- # Lazy import to map missing dependency
58
- with wrap_errors(
59
- DependencyError,
60
- "E203",
61
- "DEPENDENCY-MISSING: transformers",
62
- lambda e: {"dependency": "transformers"},
63
- ):
64
- from transformers import AutoModelForCausalLM # type: ignore
65
-
66
- with wrap_errors(
67
- ModelLoadError,
68
- "E201",
69
- "MODEL-LOAD-FAILED: transformers AutoModelForCausalLM",
70
- lambda e: {"model_id": model_id},
71
- ):
72
- model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs)
73
-
74
- # Use safe device movement that respects quantization constraints
75
- return self._safe_to_device(model, device)
76
-
77
- def can_handle(self, model: ModuleType | Any) -> bool:
78
- """
79
- Check if this adapter can handle the given model.
80
-
81
- Enhanced detection for HuggingFace LLaMA models with validation
82
- of expected structure and configuration.
83
-
84
- Args:
85
- model: The model to check
86
-
87
- Returns:
88
- True if this is a HuggingFace LLaMA compatible model
89
- """
90
-
91
- # Helper to detect explicitly set attributes (avoid unittest.mock auto-creation)
92
- def _has_set_attr(obj, name: str) -> bool:
93
- # Only treat attributes as present if explicitly set to avoid Mock auto-creation
94
- d = getattr(obj, "__dict__", None)
95
- if isinstance(d, dict) and name in d:
96
- return True
97
- # For nn.Module, also consider registered submodules/params/buffers
98
- if isinstance(obj, nn.Module):
99
- if hasattr(obj, "_modules") and name in obj._modules:
100
- return True
101
- if hasattr(obj, "_parameters") and name in obj._parameters:
102
- return True
103
- if hasattr(obj, "_buffers") and name in obj._buffers:
104
- return True
105
- return False
106
-
107
- # Check for HuggingFace LLaMA class names
108
- model_name = model.__class__.__name__
109
- if model_name in ["LlamaModel", "LlamaForCausalLM"]:
110
- # Verify it has HF config
111
- if hasattr(model, "config") and hasattr(model.config, "model_type"):
112
- return model.config.model_type == "llama"
113
-
114
- # Early bare-structure acceptance (no wrapper), minimal checks for tests
115
- if hasattr(model, "layers"):
116
- layers_obj = model.layers
117
- # Obtain first layer via index or iterator
118
- first_layer = None
119
- try:
120
- if hasattr(layers_obj, "__len__") and len(layers_obj) > 0:
121
- first_layer = layers_obj[0]
122
- except Exception:
123
- first_layer = None
124
- if first_layer is None:
125
- try:
126
- first_layer = next(iter(layers_obj))
127
- except Exception:
128
- first_layer = None
129
- if first_layer is not None:
130
- candidate_layer = first_layer
131
- # Minimal structural check for bare models (satisfies test expectations)
132
- if hasattr(candidate_layer, "self_attn") and hasattr(
133
- candidate_layer, "mlp"
134
- ):
135
- return True
136
-
137
- # Structural validation for LLaMA-like models
138
- if hasattr(model, "config") and hasattr(model, "model"):
139
- config = model.config
140
- llama_model = model.model
141
-
142
- # Check for LLaMA configuration attributes
143
- if (
144
- hasattr(config, "num_hidden_layers")
145
- and hasattr(config, "num_attention_heads")
146
- and hasattr(config, "hidden_size")
147
- and hasattr(llama_model, "layers")
148
- ):
149
- # Validate LLaMA structure
150
- try:
151
- layers = llama_model.layers
152
- layer = None
153
- # Length-based path with robust exception handling
154
- try:
155
- if hasattr(layers, "__len__") and len(layers) > 0:
156
- layer = layers[0]
157
- except Exception:
158
- layer = None
159
- # Iterator fallback
160
- if layer is None and hasattr(layers, "__iter__"):
161
- try:
162
- # Call mocked __iter__ directly to support unittest.mock patterns
163
- layer = next(layers.__iter__())
164
- except (StopIteration, TypeError, AttributeError):
165
- return False
166
- if layer is None:
167
- return False
168
-
169
- # Check for LLaMA layer structure (strict: only count explicitly set attributes)
170
- if (
171
- hasattr(layer, "self_attn")
172
- and hasattr(layer, "mlp")
173
- and _has_set_attr(layer.self_attn, "q_proj")
174
- and _has_set_attr(layer.self_attn, "k_proj")
175
- and _has_set_attr(layer.self_attn, "v_proj")
176
- and _has_set_attr(layer.self_attn, "o_proj")
177
- and _has_set_attr(layer.mlp, "gate_proj")
178
- and _has_set_attr(layer.mlp, "up_proj")
179
- and _has_set_attr(layer.mlp, "down_proj")
180
- ):
181
- # Check for RMSNorm (characteristic of LLaMA)
182
- if _has_set_attr(layer, "input_layernorm") and _has_set_attr(
183
- layer, "post_attention_layernorm"
184
- ):
185
- return True
186
- else:
187
- return False
188
- else:
189
- return False
190
-
191
- except (AttributeError, TypeError):
192
- return False
193
-
194
- # Check for bare LLaMA model structure (less common but possible)
195
- # Accept list/tuple/ModuleList and iterator-only mocks
196
- if hasattr(model, "layers") and hasattr(model, "config"):
197
- try:
198
- layers = model.layers
199
- first_layer = None
200
- # Length-based access
201
- try:
202
- if hasattr(layers, "__len__") and len(layers) > 0:
203
- first_layer = layers[0]
204
- except Exception:
205
- first_layer = None
206
- # Iterator-based access
207
- if first_layer is None and hasattr(layers, "__iter__"):
208
- try:
209
- # Call __iter__ directly to support unittest.mock patterns
210
- first_layer = (
211
- next(layers.__iter__())
212
- if hasattr(layers, "__iter__")
213
- else next(iter(layers))
214
- )
215
- except Exception:
216
- first_layer = None
217
- if first_layer is not None:
218
- candidate_layer = first_layer
219
- if (
220
- hasattr(candidate_layer, "self_attn")
221
- and hasattr(candidate_layer, "mlp")
222
- and hasattr(candidate_layer.self_attn, "q_proj")
223
- and hasattr(candidate_layer.mlp, "gate_proj")
224
- ):
225
- return True
226
- except Exception:
227
- pass
228
-
229
- return False
230
-
231
- def describe(self, model: ModuleType | Any) -> dict[str, Any]:
232
- """
233
- Get structural description of the HuggingFace LLaMA model.
234
-
235
- Returns the required format for validation gates:
236
- - n_layer: int
237
- - heads_per_layer: List[int]
238
- - mlp_dims: List[int]
239
- - tying: Dict[str, str] (weight tying map)
240
-
241
- Args:
242
- model: The HuggingFace LLaMA model to describe
243
-
244
- Returns:
245
- Dictionary with model structure info in required format
246
- """
247
- # Determine model structure
248
- if hasattr(model, "model"):
249
- # LlamaForCausalLM structure
250
- llama_model = model.model
251
- layers = llama_model.layers
252
- config = model.config
253
- elif hasattr(model, "layers"):
254
- # Direct LlamaModel structure
255
- layers = model.layers
256
- config = model.config
257
- llama_model = model
258
- else:
259
- raise AdapterError(
260
- code="E202",
261
- message=(
262
- "ADAPTER-STRUCTURE-INVALID: unrecognized HuggingFace LLaMA model structure"
263
- ),
264
- details={"model_class": model.__class__.__name__},
265
- )
266
-
267
- # Extract basic configuration
268
- # Robust layer count with Mock/iterator support; allow empty layers
269
- try:
270
- n_layers = len(layers)
271
- except Exception:
272
- try:
273
- # Fallback: count via iteration
274
- n_layers = sum(1 for _ in iter(layers))
275
- except Exception as err:
276
- raise AdapterError(
277
- code="E202",
278
- message=(
279
- "ADAPTER-STRUCTURE-INVALID: unrecognized HuggingFace LLaMA model structure"
280
- ),
281
- details={"error": str(err)},
282
- ) from err
283
- n_heads = getattr(config, "num_attention_heads", None)
284
- hidden_size = getattr(config, "hidden_size", None)
285
- vocab_size = getattr(config, "vocab_size", None)
286
-
287
- # LLaMA-2 specific: Group Query Attention support
288
- num_key_value_heads = getattr(config, "num_key_value_heads", n_heads)
289
-
290
- if n_heads is None or hidden_size is None:
291
- raise AdapterError(
292
- code="E202",
293
- message=(
294
- "ADAPTER-STRUCTURE-INVALID: missing num_attention_heads or hidden_size"
295
- ),
296
- details={"model_class": model.__class__.__name__},
297
- )
298
-
299
- # Get device info
300
- try:
301
- device = next(model.parameters()).device
302
- except StopIteration:
303
- device = torch.device("cpu")
304
-
305
- # Calculate total parameters
306
- total_params = sum(p.numel() for p in model.parameters())
307
-
308
- # Get MLP dimensions for each layer
309
- mlp_dims = []
310
- heads_per_layer = []
311
-
312
- for layer_idx in range(n_layers):
313
- layer = layers[layer_idx]
314
-
315
- # For LLaMA, all layers have the same head count
316
- heads_per_layer.append(n_heads)
317
-
318
- # Get MLP intermediate dimension (gate_proj/up_proj output size)
319
- if hasattr(layer.mlp.gate_proj, "weight"):
320
- # Linear layer: (out_features, in_features)
321
- mlp_dim = layer.mlp.gate_proj.weight.shape[0]
322
- else:
323
- # Fallback to config
324
- mlp_dim = getattr(config, "intermediate_size", hidden_size * 4)
325
-
326
- mlp_dims.append(mlp_dim)
327
-
328
- # Detect weight tying (lm_head ↔ embed_tokens)
329
- tying_map = {}
330
- if hasattr(model, "lm_head") and hasattr(llama_model, "embed_tokens"):
331
- # Check if the weights are the same tensor (tied)
332
- if model.lm_head.weight is llama_model.embed_tokens.weight:
333
- tying_map["lm_head.weight"] = "model.embed_tokens.weight"
334
-
335
- # Build the required description format
336
- description = {
337
- # Required fields for validation gates
338
- "n_layer": n_layers,
339
- "heads_per_layer": heads_per_layer,
340
- "mlp_dims": mlp_dims,
341
- "tying": tying_map,
342
- # Additional useful information
343
- "model_type": "llama",
344
- "model_class": model.__class__.__name__,
345
- "n_heads": n_heads,
346
- "num_key_value_heads": num_key_value_heads, # GQA support
347
- "hidden_size": hidden_size,
348
- "vocab_size": vocab_size,
349
- "total_params": total_params,
350
- "device": str(device),
351
- # HuggingFace specific info
352
- "hf_model_type": getattr(config, "model_type", "llama"),
353
- "hf_config_class": config.__class__.__name__
354
- if hasattr(config, "__class__")
355
- else "unknown",
356
- # LLaMA specific architecture details
357
- "architecture": {
358
- "has_lm_head": hasattr(model, "lm_head"),
359
- "has_model_wrapper": hasattr(model, "model"),
360
- "layer_norm_type": "rms", # LLaMA uses RMSNorm
361
- "activation": "silu", # LLaMA uses SwiGLU (SiLU activation)
362
- "positional_encoding": "rope", # LLaMA uses RoPE
363
- "use_bias": getattr(
364
- config, "use_bias", False
365
- ), # LLaMA typically no bias
366
- "rope_theta": getattr(config, "rope_theta", 10000.0),
367
- "max_position_embeddings": getattr(
368
- config, "max_position_embeddings", 2048
369
- ),
370
- "is_gqa": num_key_value_heads != n_heads, # Group Query Attention
371
- "gqa_ratio": n_heads // num_key_value_heads
372
- if num_key_value_heads != n_heads
373
- else 1,
374
- "pretraining_tp": getattr(
375
- config, "pretraining_tp", 1
376
- ), # Tensor parallelism
377
- "rms_norm_eps": getattr(config, "rms_norm_eps", 1e-6),
378
- },
379
- }
380
-
381
- return description
382
-
383
- def _extract_weight_tying_info(self, model: ModuleType | Any) -> dict[str, str]:
384
- """
385
- Extract weight tying relationships from the model.
386
-
387
- Args:
388
- model: The model to analyze
389
-
390
- Returns:
391
- Dictionary mapping tied parameter names to their source parameter names
392
- """
393
- tying_info = {}
394
-
395
- # Check for lm_head ↔ embed_tokens tying (common in LLaMA)
396
- if hasattr(model, "lm_head") and hasattr(model, "model"):
397
- if hasattr(model.model, "embed_tokens"):
398
- if model.lm_head.weight is model.model.embed_tokens.weight:
399
- tying_info["lm_head.weight"] = "model.embed_tokens.weight"
400
-
401
- return tying_info
402
-
403
- def _restore_weight_tying(
404
- self, model: ModuleType | Any, tied_param: str, source_param: str
405
- ) -> None:
406
- """
407
- Restore a weight tying relationship between parameters.
408
-
409
- Args:
410
- model: The model to modify
411
- tied_param: Name of the parameter that should be tied
412
- source_param: Name of the source parameter to tie to
413
- """
414
- # This is a placeholder for weight tying restoration logic
415
- print(
416
- f"Warning: Weight tying relationship {tied_param} -> {source_param} may have been broken during restore"
417
- )
418
-
419
- def get_layer_modules(
420
- self, model: ModuleType | Any, layer_idx: int
421
- ) -> dict[str, ModuleType | Any]:
422
- """
423
- Get the modules for a specific layer (utility method).
424
-
425
- Args:
426
- model: The HuggingFace LLaMA model
427
- layer_idx: Index of the layer to get modules for
428
-
429
- Returns:
430
- Dictionary mapping module names to modules
431
- """
432
- if hasattr(model, "model"):
433
- layer = model.model.layers[layer_idx]
434
- else:
435
- layer = model.layers[layer_idx]
436
-
437
- modules = {
438
- "self_attn.q_proj": layer.self_attn.q_proj, # Query projection
439
- "self_attn.k_proj": layer.self_attn.k_proj, # Key projection
440
- "self_attn.v_proj": layer.self_attn.v_proj, # Value projection
441
- "self_attn.o_proj": layer.self_attn.o_proj, # Output projection
442
- "mlp.gate_proj": layer.mlp.gate_proj, # Gate projection (SwiGLU)
443
- "mlp.up_proj": layer.mlp.up_proj, # Up projection (SwiGLU)
444
- "mlp.down_proj": layer.mlp.down_proj, # Down projection
445
- "input_layernorm": layer.input_layernorm, # RMSNorm before attention
446
- "post_attention_layernorm": layer.post_attention_layernorm, # RMSNorm before MLP
447
- }
448
-
449
- return modules
450
-
451
- def get_attention_info(self, model: ModuleType | Any) -> dict[str, Any]:
452
- """
453
- Get attention-specific information for LLaMA models.
454
-
455
- Args:
456
- model: The HuggingFace LLaMA model
457
-
458
- Returns:
459
- Dictionary with attention configuration details
460
- """
461
- config = model.config
462
-
463
- def _safe_int(val):
464
- return val if isinstance(val, int) else None
465
-
466
- num_heads = _safe_int(getattr(config, "num_attention_heads", None))
467
- hidden_size = _safe_int(getattr(config, "hidden_size", None))
468
- num_key_value_heads = (
469
- _safe_int(getattr(config, "num_key_value_heads", None)) or num_heads
470
- )
471
-
472
- head_dim = None
473
- if isinstance(hidden_size, int) and isinstance(num_heads, int) and num_heads:
474
- head_dim = hidden_size // num_heads
475
-
476
- return {
477
- "num_attention_heads": num_heads,
478
- "num_key_value_heads": num_key_value_heads,
479
- "head_dim": head_dim,
480
- "is_group_query_attention": num_key_value_heads != num_heads,
481
- "gqa_groups": num_heads // num_key_value_heads
482
- if num_key_value_heads != num_heads
483
- else 1,
484
- "rope_theta": getattr(config, "rope_theta", 10000.0),
485
- "max_position_embeddings": getattr(config, "max_position_embeddings", 2048),
486
- "attention_dropout": getattr(config, "attention_dropout", 0.0),
487
- }