invarlock 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +1 -1
- invarlock/adapters/__init__.py +13 -0
- invarlock/adapters/auto.py +149 -22
- invarlock/adapters/capabilities.py +421 -0
- invarlock/adapters/hf_llama.py +2 -2
- invarlock/adapters/hf_mixin.py +122 -1
- invarlock/core/registry.py +34 -6
- invarlock/guards/variance.py +41 -6
- invarlock/plugins/hf_awq_adapter.py +22 -1
- invarlock/plugins/hf_bnb_adapter.py +117 -22
- invarlock/plugins/hf_gptq_adapter.py +24 -1
- {invarlock-0.2.0.dist-info → invarlock-0.3.0.dist-info}/METADATA +2 -2
- {invarlock-0.2.0.dist-info → invarlock-0.3.0.dist-info}/RECORD +17 -16
- {invarlock-0.2.0.dist-info → invarlock-0.3.0.dist-info}/WHEEL +0 -0
- {invarlock-0.2.0.dist-info → invarlock-0.3.0.dist-info}/entry_points.txt +0 -0
- {invarlock-0.2.0.dist-info → invarlock-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.2.0.dist-info → invarlock-0.3.0.dist-info}/top_level.txt +0 -0
invarlock/adapters/hf_mixin.py
CHANGED
|
@@ -4,10 +4,12 @@ Shared HuggingFace adapter mixin.
|
|
|
4
4
|
|
|
5
5
|
Provides reusable functionality for InvarLock's HuggingFace adapters:
|
|
6
6
|
- Device resolution helpers
|
|
7
|
+
- Safe device movement for quantized models
|
|
7
8
|
- Snapshot/restore with device awareness
|
|
8
9
|
- Chunked snapshot helpers to reduce peak memory usage
|
|
9
10
|
- Lightweight config serialization
|
|
10
11
|
- Weight-tying detection plumbing
|
|
12
|
+
- Quantization detection and capabilities
|
|
11
13
|
"""
|
|
12
14
|
|
|
13
15
|
from __future__ import annotations
|
|
@@ -17,12 +19,15 @@ import json
|
|
|
17
19
|
import os
|
|
18
20
|
import tempfile
|
|
19
21
|
from pathlib import Path
|
|
20
|
-
from typing import Any
|
|
22
|
+
from typing import TYPE_CHECKING, Any
|
|
21
23
|
|
|
22
24
|
import torch
|
|
23
25
|
|
|
24
26
|
from invarlock.security import is_secure_path
|
|
25
27
|
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from .capabilities import ModelCapabilities, QuantizationConfig
|
|
30
|
+
|
|
26
31
|
SCALAR_TYPES = (int, float, str, bool)
|
|
27
32
|
|
|
28
33
|
|
|
@@ -91,6 +96,122 @@ class HFAdapterMixin:
|
|
|
91
96
|
|
|
92
97
|
return torch.device(device_str)
|
|
93
98
|
|
|
99
|
+
def _safe_to_device(
|
|
100
|
+
self,
|
|
101
|
+
model: torch.nn.Module,
|
|
102
|
+
device: str | torch.device | None = "auto",
|
|
103
|
+
capabilities: ModelCapabilities | None = None,
|
|
104
|
+
) -> torch.nn.Module:
|
|
105
|
+
"""
|
|
106
|
+
Safely move model to device, respecting quantization constraints.
|
|
107
|
+
|
|
108
|
+
For quantized models (BNB, AWQ, GPTQ), device movement may be
|
|
109
|
+
impossible or already handled by the loading mechanism. This
|
|
110
|
+
method checks the model's capabilities before attempting .to().
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
model: The model to move.
|
|
114
|
+
device: Target device ("auto", "cuda", "mps", "cpu").
|
|
115
|
+
capabilities: Pre-computed capabilities, or None to auto-detect.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
The model (possibly on the new device, or unchanged if not movable).
|
|
119
|
+
"""
|
|
120
|
+
target_device = self._resolve_device(device)
|
|
121
|
+
|
|
122
|
+
# Auto-detect capabilities if not provided
|
|
123
|
+
if capabilities is None:
|
|
124
|
+
capabilities = self._detect_capabilities(model)
|
|
125
|
+
|
|
126
|
+
# Check if model can be moved
|
|
127
|
+
if capabilities is not None and not capabilities.device_movable:
|
|
128
|
+
# Model handles its own device placement (e.g., BNB, AWQ, GPTQ)
|
|
129
|
+
# Log this decision for debugging but don't attempt .to()
|
|
130
|
+
return model
|
|
131
|
+
|
|
132
|
+
# Safe to move
|
|
133
|
+
return model.to(target_device)
|
|
134
|
+
|
|
135
|
+
def _detect_capabilities(self, model: torch.nn.Module) -> ModelCapabilities | None:
|
|
136
|
+
"""
|
|
137
|
+
Detect model capabilities from a loaded model instance.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
model: Loaded model instance.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
ModelCapabilities if detection succeeds, None otherwise.
|
|
144
|
+
"""
|
|
145
|
+
try:
|
|
146
|
+
from .capabilities import detect_capabilities_from_model
|
|
147
|
+
|
|
148
|
+
return detect_capabilities_from_model(model)
|
|
149
|
+
except ImportError:
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
def _is_quantized_model(self, model: torch.nn.Module) -> bool:
|
|
153
|
+
"""
|
|
154
|
+
Check if a model is quantized (BNB, AWQ, GPTQ).
|
|
155
|
+
|
|
156
|
+
This is a quick heuristic check that doesn't require full
|
|
157
|
+
capability detection.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
model: Model to check.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
True if the model appears to be quantized.
|
|
164
|
+
"""
|
|
165
|
+
config = getattr(model, "config", None)
|
|
166
|
+
if config is None:
|
|
167
|
+
return False
|
|
168
|
+
|
|
169
|
+
# Check for quantization_config attribute
|
|
170
|
+
quant_cfg = getattr(config, "quantization_config", None)
|
|
171
|
+
if quant_cfg is not None:
|
|
172
|
+
return True
|
|
173
|
+
|
|
174
|
+
# Check for BNB-specific attributes on the model
|
|
175
|
+
if hasattr(model, "is_loaded_in_8bit") and model.is_loaded_in_8bit:
|
|
176
|
+
return True
|
|
177
|
+
if hasattr(model, "is_loaded_in_4bit") and model.is_loaded_in_4bit:
|
|
178
|
+
return True
|
|
179
|
+
|
|
180
|
+
# Check for quantized module types in the model
|
|
181
|
+
for module in model.modules():
|
|
182
|
+
module_name = module.__class__.__name__.lower()
|
|
183
|
+
if any(
|
|
184
|
+
q in module_name
|
|
185
|
+
for q in ["linear8bit", "linear4bit", "quantlinear", "awqlinear"]
|
|
186
|
+
):
|
|
187
|
+
return True
|
|
188
|
+
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
def _detect_quantization_config(
|
|
192
|
+
self, model: torch.nn.Module
|
|
193
|
+
) -> QuantizationConfig | None:
|
|
194
|
+
"""
|
|
195
|
+
Detect quantization configuration from a model.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
model: Model to inspect.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
QuantizationConfig if quantization detected, None otherwise.
|
|
202
|
+
"""
|
|
203
|
+
try:
|
|
204
|
+
from .capabilities import detect_quantization_from_config
|
|
205
|
+
|
|
206
|
+
config = getattr(model, "config", None)
|
|
207
|
+
if config is not None:
|
|
208
|
+
quant_cfg = detect_quantization_from_config(config)
|
|
209
|
+
if quant_cfg.is_quantized():
|
|
210
|
+
return quant_cfg
|
|
211
|
+
except ImportError:
|
|
212
|
+
pass
|
|
213
|
+
return None
|
|
214
|
+
|
|
94
215
|
# ------------------------------------------------------------------
|
|
95
216
|
# HF save/export helpers
|
|
96
217
|
# ------------------------------------------------------------------
|
invarlock/core/registry.py
CHANGED
|
@@ -117,14 +117,24 @@ class CoreRegistry:
|
|
|
117
117
|
module: str,
|
|
118
118
|
class_name: str,
|
|
119
119
|
status: str = "Available (fallback)",
|
|
120
|
+
required_deps: list[str] | None = None,
|
|
120
121
|
) -> None:
|
|
121
122
|
if name not in registry:
|
|
123
|
+
# Check runtime dependencies for optional plugins
|
|
124
|
+
actual_available = True
|
|
125
|
+
actual_status = status
|
|
126
|
+
if required_deps:
|
|
127
|
+
missing = self._check_runtime_dependencies(required_deps)
|
|
128
|
+
if missing:
|
|
129
|
+
actual_available = False
|
|
130
|
+
actual_status = f"Needs extra: {', '.join(missing)}"
|
|
131
|
+
|
|
122
132
|
registry[name] = PluginInfo(
|
|
123
133
|
name=name,
|
|
124
134
|
module=module,
|
|
125
135
|
class_name=class_name,
|
|
126
|
-
available=
|
|
127
|
-
status=
|
|
136
|
+
available=actual_available,
|
|
137
|
+
status=actual_status,
|
|
128
138
|
package="invarlock",
|
|
129
139
|
version=INVARLOCK_VERSION,
|
|
130
140
|
)
|
|
@@ -147,27 +157,30 @@ class CoreRegistry:
|
|
|
147
157
|
_fallback(
|
|
148
158
|
self._adapters, "hf_mlm_auto", "invarlock.adapters", "HF_MLM_Auto_Adapter"
|
|
149
159
|
)
|
|
150
|
-
# Optional plugin adapters (
|
|
160
|
+
# Optional plugin adapters (verify runtime dependencies)
|
|
151
161
|
_fallback(
|
|
152
162
|
self._adapters,
|
|
153
163
|
"hf_gptq",
|
|
154
164
|
"invarlock.plugins.hf_gptq_adapter",
|
|
155
165
|
"HF_GPTQ_Adapter",
|
|
156
|
-
status="Available (
|
|
166
|
+
status="Available (plugin)",
|
|
167
|
+
required_deps=["auto_gptq"],
|
|
157
168
|
)
|
|
158
169
|
_fallback(
|
|
159
170
|
self._adapters,
|
|
160
171
|
"hf_awq",
|
|
161
172
|
"invarlock.plugins.hf_awq_adapter",
|
|
162
173
|
"HF_AWQ_Adapter",
|
|
163
|
-
status="Available (
|
|
174
|
+
status="Available (plugin)",
|
|
175
|
+
required_deps=["autoawq"],
|
|
164
176
|
)
|
|
165
177
|
_fallback(
|
|
166
178
|
self._adapters,
|
|
167
179
|
"hf_bnb",
|
|
168
180
|
"invarlock.plugins.hf_bnb_adapter",
|
|
169
181
|
"HF_BNB_Adapter",
|
|
170
|
-
status="Available (
|
|
182
|
+
status="Available (plugin)",
|
|
183
|
+
required_deps=["bitsandbytes"],
|
|
171
184
|
)
|
|
172
185
|
|
|
173
186
|
# Register built-in edits (quant-only core) and internal no-op
|
|
@@ -181,6 +194,21 @@ class CoreRegistry:
|
|
|
181
194
|
_fallback(self._guards, "rmt", "invarlock.guards", "RMTGuard")
|
|
182
195
|
_fallback(self._guards, "hello_guard", "invarlock.plugins", "HelloGuard")
|
|
183
196
|
|
|
197
|
+
def _check_runtime_dependencies(self, deps: list[str]) -> list[str]:
|
|
198
|
+
"""
|
|
199
|
+
Check if runtime dependencies are actually importable.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
List of missing dependency names.
|
|
203
|
+
"""
|
|
204
|
+
missing = []
|
|
205
|
+
for dep in deps:
|
|
206
|
+
try:
|
|
207
|
+
importlib.import_module(dep)
|
|
208
|
+
except ImportError:
|
|
209
|
+
missing.append(dep)
|
|
210
|
+
return missing
|
|
211
|
+
|
|
184
212
|
def _create_plugin_info(
|
|
185
213
|
self, entry_point: EntryPoint, plugin_type: str
|
|
186
214
|
) -> PluginInfo:
|
invarlock/guards/variance.py
CHANGED
|
@@ -39,6 +39,30 @@ from .policies import VariancePolicyDict
|
|
|
39
39
|
__all__ = ["equalise_residual_variance", "equalise_branch_variance", "VarianceGuard"]
|
|
40
40
|
|
|
41
41
|
|
|
42
|
+
def _safe_mean(
|
|
43
|
+
samples: list[float] | np.ndarray, default: float | None = None
|
|
44
|
+
) -> float | None:
|
|
45
|
+
"""
|
|
46
|
+
Compute mean of samples, returning default if empty.
|
|
47
|
+
|
|
48
|
+
Avoids numpy RuntimeWarning "Mean of empty slice" when samples is empty
|
|
49
|
+
or contains no valid values.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
samples: List or array of float values.
|
|
53
|
+
default: Value to return if samples is empty.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Mean value or default if samples is empty.
|
|
57
|
+
"""
|
|
58
|
+
if samples is None:
|
|
59
|
+
return default
|
|
60
|
+
arr = np.asarray(samples)
|
|
61
|
+
if arr.size == 0:
|
|
62
|
+
return default
|
|
63
|
+
return float(np.nanmean(arr))
|
|
64
|
+
|
|
65
|
+
|
|
42
66
|
try: # Optional dependency: tqdm (progress bars)
|
|
43
67
|
from tqdm.auto import tqdm as _tqdm
|
|
44
68
|
except Exception: # pragma: no cover - exercised only when tqdm is absent
|
|
@@ -1472,7 +1496,14 @@ class VarianceGuard(Guard):
|
|
|
1472
1496
|
|
|
1473
1497
|
if coverage >= min_coverage and not self._scales:
|
|
1474
1498
|
ppl_no_ve_samples = ppl_no_ve_samples[:coverage]
|
|
1475
|
-
ppl_no_ve_mean =
|
|
1499
|
+
ppl_no_ve_mean = _safe_mean(ppl_no_ve_samples)
|
|
1500
|
+
if ppl_no_ve_mean is None:
|
|
1501
|
+
# No valid samples - cannot compute mean
|
|
1502
|
+
self._ratio_ci = None
|
|
1503
|
+
predictive_state["reason"] = "no_valid_samples"
|
|
1504
|
+
self._predictive_gate_state = predictive_state
|
|
1505
|
+
self._stats["predictive_gate"] = predictive_state.copy()
|
|
1506
|
+
return
|
|
1476
1507
|
self.set_ab_results(
|
|
1477
1508
|
ppl_no_ve=ppl_no_ve_mean,
|
|
1478
1509
|
ppl_with_ve=ppl_no_ve_mean,
|
|
@@ -1527,8 +1558,12 @@ class VarianceGuard(Guard):
|
|
|
1527
1558
|
n_bootstrap=500,
|
|
1528
1559
|
seed=calib_seed,
|
|
1529
1560
|
)
|
|
1530
|
-
ppl_no_ve_mean =
|
|
1531
|
-
ppl_with_ve_mean =
|
|
1561
|
+
ppl_no_ve_mean = _safe_mean(ppl_no_ve_samples)
|
|
1562
|
+
ppl_with_ve_mean = _safe_mean(ppl_with_ve_samples)
|
|
1563
|
+
if ppl_no_ve_mean is None or ppl_with_ve_mean is None:
|
|
1564
|
+
# Fallback if means couldn't be computed
|
|
1565
|
+
ppl_no_ve_mean = ppl_no_ve_mean or 0.0
|
|
1566
|
+
ppl_with_ve_mean = ppl_with_ve_mean or 0.0
|
|
1532
1567
|
self.set_ab_results(
|
|
1533
1568
|
ppl_no_ve=ppl_no_ve_mean,
|
|
1534
1569
|
ppl_with_ve=ppl_with_ve_mean,
|
|
@@ -2118,7 +2153,7 @@ class VarianceGuard(Guard):
|
|
|
2118
2153
|
|
|
2119
2154
|
if coverage >= min_coverage and not self._scales:
|
|
2120
2155
|
ppl_no_ve_samples = ppl_no_ve_samples[:coverage]
|
|
2121
|
-
ppl_no_ve_mean =
|
|
2156
|
+
ppl_no_ve_mean = _safe_mean(ppl_no_ve_samples, default=0.0)
|
|
2122
2157
|
self.set_ab_results(
|
|
2123
2158
|
ppl_no_ve=ppl_no_ve_mean,
|
|
2124
2159
|
ppl_with_ve=ppl_no_ve_mean,
|
|
@@ -2158,8 +2193,8 @@ class VarianceGuard(Guard):
|
|
|
2158
2193
|
n_bootstrap=500,
|
|
2159
2194
|
seed=calib_seed,
|
|
2160
2195
|
)
|
|
2161
|
-
ppl_no_ve_mean =
|
|
2162
|
-
ppl_with_ve_mean =
|
|
2196
|
+
ppl_no_ve_mean = _safe_mean(ppl_no_ve_samples, default=0.0)
|
|
2197
|
+
ppl_with_ve_mean = _safe_mean(ppl_with_ve_samples, default=0.0)
|
|
2163
2198
|
self.set_ab_results(
|
|
2164
2199
|
ppl_no_ve=ppl_no_ve_mean,
|
|
2165
2200
|
ppl_with_ve=ppl_with_ve_mean,
|
|
@@ -4,12 +4,16 @@ HuggingFace AWQ Adapter (plugin)
|
|
|
4
4
|
|
|
5
5
|
Optional adapter for loading AWQ-quantized causal LMs from the Hub.
|
|
6
6
|
Requires the `autoawq` extra on supported platforms (typically Linux/CUDA).
|
|
7
|
+
|
|
8
|
+
AWQ models are pre-quantized and typically handle device placement internally
|
|
9
|
+
during loading. This adapter does NOT call .to() on the loaded model.
|
|
7
10
|
"""
|
|
8
11
|
|
|
9
12
|
from __future__ import annotations
|
|
10
13
|
|
|
11
14
|
from typing import Any
|
|
12
15
|
|
|
16
|
+
from invarlock.adapters.capabilities import ModelCapabilities
|
|
13
17
|
from invarlock.adapters.hf_mixin import HFAdapterMixin
|
|
14
18
|
from invarlock.core.api import ModelAdapter
|
|
15
19
|
from invarlock.core.error_utils import wrap_errors
|
|
@@ -56,7 +60,24 @@ class HF_AWQ_Adapter(HFAdapterMixin, ModelAdapter):
|
|
|
56
60
|
trust_remote_code=True,
|
|
57
61
|
**{k: v for k, v in kwargs.items() if k != "device"},
|
|
58
62
|
)
|
|
59
|
-
|
|
63
|
+
|
|
64
|
+
# AWQ models are pre-quantized; use safe device movement
|
|
65
|
+
# which respects the model's device constraints
|
|
66
|
+
return self._safe_to_device(
|
|
67
|
+
model, device, capabilities=ModelCapabilities.for_awq()
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def get_capabilities(self, model: Any) -> ModelCapabilities:
|
|
71
|
+
"""Return capabilities for an AWQ-quantized model."""
|
|
72
|
+
config = getattr(model, "config", None)
|
|
73
|
+
group_size = 128 # Default AWQ group size
|
|
74
|
+
if config is not None:
|
|
75
|
+
quant_cfg = getattr(config, "quantization_config", None)
|
|
76
|
+
if isinstance(quant_cfg, dict):
|
|
77
|
+
group_size = quant_cfg.get("group_size", 128)
|
|
78
|
+
elif quant_cfg is not None:
|
|
79
|
+
group_size = getattr(quant_cfg, "group_size", 128)
|
|
80
|
+
return ModelCapabilities.for_awq(group_size=group_size)
|
|
60
81
|
|
|
61
82
|
def can_handle(self, model: Any) -> bool:
|
|
62
83
|
cfg = getattr(model, "config", None)
|
|
@@ -5,18 +5,73 @@ HuggingFace BitsAndBytes Adapter (plugin)
|
|
|
5
5
|
Optional adapter for loading 4/8-bit quantized causal LMs via bitsandbytes
|
|
6
6
|
through Transformers. Requires GPU for practical use.
|
|
7
7
|
Install with the `gpu` extra on supported platforms.
|
|
8
|
+
|
|
9
|
+
This adapter handles both:
|
|
10
|
+
1. Fresh quantization of FP16 models (load_in_8bit/load_in_4bit)
|
|
11
|
+
2. Loading pre-quantized BNB checkpoints (auto-detected via quantization_config)
|
|
8
12
|
"""
|
|
9
13
|
|
|
10
14
|
from __future__ import annotations
|
|
11
15
|
|
|
16
|
+
from pathlib import Path
|
|
12
17
|
from typing import Any
|
|
13
18
|
|
|
19
|
+
from invarlock.adapters.capabilities import (
|
|
20
|
+
ModelCapabilities,
|
|
21
|
+
QuantizationMethod,
|
|
22
|
+
detect_quantization_from_config,
|
|
23
|
+
)
|
|
14
24
|
from invarlock.adapters.hf_mixin import HFAdapterMixin
|
|
15
25
|
from invarlock.core.api import ModelAdapter
|
|
16
26
|
from invarlock.core.error_utils import wrap_errors
|
|
17
27
|
from invarlock.core.exceptions import DependencyError, ModelLoadError
|
|
18
28
|
|
|
19
29
|
|
|
30
|
+
def _is_local_path(model_id: str) -> bool:
|
|
31
|
+
"""Check if model_id is a local filesystem path."""
|
|
32
|
+
return Path(model_id).exists()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _detect_pre_quantized_bnb(model_id: str) -> tuple[bool, int]:
|
|
36
|
+
"""
|
|
37
|
+
Detect if a local checkpoint is pre-quantized with BNB.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Tuple of (is_pre_quantized, bits) where bits is 8 or 4.
|
|
41
|
+
"""
|
|
42
|
+
if not _is_local_path(model_id):
|
|
43
|
+
return False, 0
|
|
44
|
+
|
|
45
|
+
config_path = Path(model_id) / "config.json"
|
|
46
|
+
if not config_path.exists():
|
|
47
|
+
return False, 0
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
import json
|
|
51
|
+
|
|
52
|
+
config_data = json.loads(config_path.read_text())
|
|
53
|
+
quant_cfg = config_data.get("quantization_config", {})
|
|
54
|
+
|
|
55
|
+
if not quant_cfg:
|
|
56
|
+
return False, 0
|
|
57
|
+
|
|
58
|
+
# Check for BNB quantization
|
|
59
|
+
quant_method = quant_cfg.get("quant_method", "").lower()
|
|
60
|
+
if quant_method == "bitsandbytes" or "load_in_8bit" in quant_cfg:
|
|
61
|
+
if quant_cfg.get("load_in_8bit"):
|
|
62
|
+
return True, 8
|
|
63
|
+
if quant_cfg.get("load_in_4bit"):
|
|
64
|
+
return True, 4
|
|
65
|
+
# Fallback to bits field
|
|
66
|
+
bits = quant_cfg.get("bits", 8)
|
|
67
|
+
return True, bits
|
|
68
|
+
|
|
69
|
+
except Exception:
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
return False, 0
|
|
73
|
+
|
|
74
|
+
|
|
20
75
|
class HF_BNB_Adapter(HFAdapterMixin, ModelAdapter):
|
|
21
76
|
name = "hf_bnb"
|
|
22
77
|
|
|
@@ -29,32 +84,72 @@ class HF_BNB_Adapter(HFAdapterMixin, ModelAdapter):
|
|
|
29
84
|
):
|
|
30
85
|
from transformers import AutoModelForCausalLM
|
|
31
86
|
|
|
32
|
-
#
|
|
33
|
-
|
|
34
|
-
load_in_4bit = bool(kwargs.pop("load_in_4bit", False))
|
|
87
|
+
# Check if this is a pre-quantized checkpoint
|
|
88
|
+
is_pre_quantized, pre_quant_bits = _detect_pre_quantized_bnb(model_id)
|
|
35
89
|
|
|
36
|
-
if
|
|
37
|
-
|
|
90
|
+
if is_pre_quantized:
|
|
91
|
+
# Load pre-quantized checkpoint WITHOUT re-applying quantization
|
|
92
|
+
with wrap_errors(
|
|
93
|
+
ModelLoadError,
|
|
94
|
+
"E201",
|
|
95
|
+
"MODEL-LOAD-FAILED: bitsandbytes/transformers (pre-quantized)",
|
|
96
|
+
lambda e: {"model_id": model_id, "pre_quantized_bits": pre_quant_bits},
|
|
97
|
+
):
|
|
98
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
99
|
+
model_id,
|
|
100
|
+
device_map="auto",
|
|
101
|
+
trust_remote_code=True,
|
|
102
|
+
# Do NOT pass load_in_8bit/load_in_4bit for pre-quantized
|
|
103
|
+
**{
|
|
104
|
+
k: v
|
|
105
|
+
for k, v in kwargs.items()
|
|
106
|
+
if k not in ("load_in_8bit", "load_in_4bit")
|
|
107
|
+
},
|
|
108
|
+
)
|
|
109
|
+
else:
|
|
110
|
+
# Fresh quantization of FP16 model
|
|
111
|
+
load_in_8bit = bool(kwargs.pop("load_in_8bit", True))
|
|
112
|
+
load_in_4bit = bool(kwargs.pop("load_in_4bit", False))
|
|
38
113
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
model_id,
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
114
|
+
if load_in_4bit:
|
|
115
|
+
load_in_8bit = False
|
|
116
|
+
|
|
117
|
+
with wrap_errors(
|
|
118
|
+
ModelLoadError,
|
|
119
|
+
"E201",
|
|
120
|
+
"MODEL-LOAD-FAILED: bitsandbytes/transformers",
|
|
121
|
+
lambda e: {"model_id": model_id},
|
|
122
|
+
):
|
|
123
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
124
|
+
model_id,
|
|
125
|
+
device_map="auto",
|
|
126
|
+
load_in_8bit=load_in_8bit,
|
|
127
|
+
load_in_4bit=load_in_4bit,
|
|
128
|
+
trust_remote_code=True,
|
|
129
|
+
**kwargs,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# BNB models handle their own device placement via device_map="auto"
|
|
133
|
+
# Do NOT call .to() on BNB models - it will raise an error
|
|
134
|
+
_ = self._resolve_device(device) # Validate device string only
|
|
56
135
|
return model
|
|
57
136
|
|
|
137
|
+
def get_capabilities(self, model: Any) -> ModelCapabilities:
|
|
138
|
+
"""Return capabilities for a BNB-quantized model."""
|
|
139
|
+
config = getattr(model, "config", None)
|
|
140
|
+
if config is not None:
|
|
141
|
+
quant_cfg = detect_quantization_from_config(config)
|
|
142
|
+
if quant_cfg.method == QuantizationMethod.BNB_8BIT:
|
|
143
|
+
return ModelCapabilities.for_bnb_8bit(from_checkpoint=True)
|
|
144
|
+
elif quant_cfg.method == QuantizationMethod.BNB_4BIT:
|
|
145
|
+
return ModelCapabilities.for_bnb_4bit(
|
|
146
|
+
from_checkpoint=True,
|
|
147
|
+
double_quant=quant_cfg.double_quant,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Default to 8-bit if we can't determine
|
|
151
|
+
return ModelCapabilities.for_bnb_8bit()
|
|
152
|
+
|
|
58
153
|
def can_handle(self, model: Any) -> bool:
|
|
59
154
|
cfg = getattr(model, "config", None)
|
|
60
155
|
return hasattr(cfg, "n_layer") or hasattr(cfg, "num_hidden_layers")
|
|
@@ -4,12 +4,16 @@ HuggingFace GPTQ Adapter (plugin)
|
|
|
4
4
|
|
|
5
5
|
Optional adapter for loading AutoGPTQ-quantized causal LMs from the Hub.
|
|
6
6
|
Requires the `auto-gptq` extra on supported platforms (typically Linux/CUDA).
|
|
7
|
+
|
|
8
|
+
GPTQ models are pre-quantized and typically handle device placement internally
|
|
9
|
+
during loading. This adapter uses safe device movement to respect constraints.
|
|
7
10
|
"""
|
|
8
11
|
|
|
9
12
|
from __future__ import annotations
|
|
10
13
|
|
|
11
14
|
from typing import Any
|
|
12
15
|
|
|
16
|
+
from invarlock.adapters.capabilities import ModelCapabilities
|
|
13
17
|
from invarlock.adapters.hf_mixin import HFAdapterMixin
|
|
14
18
|
from invarlock.core.api import ModelAdapter
|
|
15
19
|
from invarlock.core.error_utils import wrap_errors
|
|
@@ -47,7 +51,26 @@ class HF_GPTQ_Adapter(HFAdapterMixin, ModelAdapter):
|
|
|
47
51
|
**{k: v for k, v in kwargs.items() if k not in {"device"}},
|
|
48
52
|
)
|
|
49
53
|
|
|
50
|
-
|
|
54
|
+
# GPTQ models are pre-quantized; use safe device movement
|
|
55
|
+
# which respects the model's device constraints
|
|
56
|
+
return self._safe_to_device(
|
|
57
|
+
model, device, capabilities=ModelCapabilities.for_gptq()
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def get_capabilities(self, model: Any) -> ModelCapabilities:
|
|
61
|
+
"""Return capabilities for a GPTQ-quantized model."""
|
|
62
|
+
config = getattr(model, "config", None)
|
|
63
|
+
bits = 4 # Default GPTQ bits
|
|
64
|
+
group_size = 128 # Default GPTQ group size
|
|
65
|
+
if config is not None:
|
|
66
|
+
quant_cfg = getattr(config, "quantization_config", None)
|
|
67
|
+
if isinstance(quant_cfg, dict):
|
|
68
|
+
bits = quant_cfg.get("bits", 4)
|
|
69
|
+
group_size = quant_cfg.get("group_size", 128)
|
|
70
|
+
elif quant_cfg is not None:
|
|
71
|
+
bits = getattr(quant_cfg, "bits", 4)
|
|
72
|
+
group_size = getattr(quant_cfg, "group_size", 128)
|
|
73
|
+
return ModelCapabilities.for_gptq(bits=bits, group_size=group_size)
|
|
51
74
|
|
|
52
75
|
# ---- Introspection ----
|
|
53
76
|
def can_handle(self, model: Any) -> bool:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: invarlock
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Edit‑agnostic robustness certificates for weight edits (InvarLock framework)
|
|
5
5
|
Author-email: InvarLock Team <oss@invarlock.dev>
|
|
6
6
|
Maintainer-email: InvarLock Maintainers <support@invarlock.dev>
|
|
@@ -112,7 +112,7 @@ they don’t, roll back safely.
|
|
|
112
112
|
Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
|
|
113
113
|
variance) producing a machine‑readable Safety Certificate.
|
|
114
114
|
|
|
115
|
-
> **Status:** 0.
|
|
115
|
+
> **Status:** 0.3.0 (pre‑1.0). Until 1.0, **minor** releases may be
|
|
116
116
|
> breaking. See CLI help and the CHANGELOG for updates.
|
|
117
117
|
|
|
118
118
|
[](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
invarlock/__init__.py,sha256=
|
|
1
|
+
invarlock/__init__.py,sha256=Oan5J3YATdRnh-9M-k6dQh6JAExLsg5SrU2gfoau9dE,1268
|
|
2
2
|
invarlock/__main__.py,sha256=ffhoKctw89j-henmQXThbHDIdlvK9fBfsy8LpjhOEXc,146
|
|
3
3
|
invarlock/config.py,sha256=7BUOl7EW258YnsgRipjOx6lmWou5jNDzimREd35ewsQ,1725
|
|
4
4
|
invarlock/model_profile.py,sha256=sFHpK-1Q-1DjiZTWMolQBG4Dw9feLJgk3GnaD3ixgd8,12809
|
|
@@ -9,15 +9,16 @@ invarlock/sparsity_utils.py,sha256=30SC3osptca2GmzxezbfX31EE6sRUhmEz8u3jn4vB2c,8
|
|
|
9
9
|
invarlock/_data/runtime/tiers.yaml,sha256=A0NMcz5o-TXJp1gZq2Sd7uYAQSsMQcG0ZV1FuLxl5jA,1700
|
|
10
10
|
invarlock/_data/runtime/profiles/ci_cpu.yaml,sha256=N_ur2CkyL_g9-jwTwATniMhdrv0SL64lnHCQvjq-WWQ,324
|
|
11
11
|
invarlock/_data/runtime/profiles/release.yaml,sha256=xF0Qb0OTm904U6L3wK674JMTcDPegYvpKgwUB9pfq_w,482
|
|
12
|
-
invarlock/adapters/__init__.py,sha256=
|
|
12
|
+
invarlock/adapters/__init__.py,sha256=Bwj8aKjhFxCzvcdxTIl-nG7IXyIE4L3Nd_fsIghbZxA,3418
|
|
13
13
|
invarlock/adapters/_capabilities.py,sha256=FmzUR5BHsxWe92Z9W1As-G5_5wG1PvqF2sUpjZ2_CdY,1483
|
|
14
|
-
invarlock/adapters/auto.py,sha256=
|
|
14
|
+
invarlock/adapters/auto.py,sha256=XWENU5hi66AtMPlQDRuINk1SGW1Kym8Tot-2jXGFGcQ,7643
|
|
15
15
|
invarlock/adapters/base.py,sha256=szSh1bECeDSDGQSr5oIWhs5RlI587gE4gzdt5cnOJ1s,16100
|
|
16
16
|
invarlock/adapters/base_types.py,sha256=3IuHt63_RjGZqoTOdkMpfGPiZTGqcvXXDq1KU-8QemQ,1612
|
|
17
|
+
invarlock/adapters/capabilities.py,sha256=oAK_zgCzAFmss8qAU2zgc8kcahtadGtbPTYR7StXiCo,15360
|
|
17
18
|
invarlock/adapters/hf_bert.py,sha256=DkUXCile7ALlHVZvMkNLAl_YrhHpdmQmomzNyIAPBEo,35547
|
|
18
19
|
invarlock/adapters/hf_gpt2.py,sha256=zuNapMDj4kdzlpGJY11tMOr9dh0V1C0qkOTRwi1xCnQ,14814
|
|
19
|
-
invarlock/adapters/hf_llama.py,sha256=
|
|
20
|
-
invarlock/adapters/hf_mixin.py,sha256=
|
|
20
|
+
invarlock/adapters/hf_llama.py,sha256=TbE9wII1GAQG7gbtiZCZ4S92rKoWRM7VH5vCnqEHc-4,19102
|
|
21
|
+
invarlock/adapters/hf_mixin.py,sha256=rhm0MbjzoHtEAl54tmW3T7tf1C_VxTtcPSdQ7mQ0CIU,18279
|
|
21
22
|
invarlock/adapters/hf_onnx.py,sha256=kEqgQEEdGUeaXDF0EgaMHOQQMhc1xIvur0bQvdky-AY,4446
|
|
22
23
|
invarlock/adapters/hf_t5.py,sha256=2SpDKPyERrrkTWXcDJDo0J5NNjFLePuC965e1zy2tus,4738
|
|
23
24
|
invarlock/adapters/py.typed,sha256=LCPmZeE_vANVVJDNvuq9A07i7jg9Nxrq6f10UeuNfZc,37
|
|
@@ -55,7 +56,7 @@ invarlock/core/contracts.py,sha256=9j55WVwMrEsUqxWlzAdMsHtkzgfkSftdizhcLiJBauw,2
|
|
|
55
56
|
invarlock/core/error_utils.py,sha256=T23-p5ONQ-SeVuMR4Ts0cWyupsSa-0DAgsejRTfxeCg,1782
|
|
56
57
|
invarlock/core/events.py,sha256=8XBAi-9A7ys7QJQwqlz8PVlfxF0TM_TvLqjcPtDwZm4,9428
|
|
57
58
|
invarlock/core/exceptions.py,sha256=b4OszJ0Fd0Ezy8s99AzprS7lAkqdZYGXaSj9fYaln4E,2077
|
|
58
|
-
invarlock/core/registry.py,sha256=
|
|
59
|
+
invarlock/core/registry.py,sha256=vgKmhekteJS2oRu8g3tBHm48z_Eda5oPKq2ajQnDDc4,18884
|
|
59
60
|
invarlock/core/retry.py,sha256=KTVkrTnWs60jwATOZDHinERH56GnOGjsKR0lmohagEo,4503
|
|
60
61
|
invarlock/core/runner.py,sha256=IWp6cL21IzP_7k26AqcCFHg772f3XHO_PK2PB_-DS0s,81448
|
|
61
62
|
invarlock/core/types.py,sha256=nVLMP4yqlxwhE1moQU7FWVeGJqTuud-cvTZiutdBGKk,3585
|
|
@@ -89,7 +90,7 @@ invarlock/guards/py.typed,sha256=LCPmZeE_vANVVJDNvuq9A07i7jg9Nxrq6f10UeuNfZc,37
|
|
|
89
90
|
invarlock/guards/rmt.py,sha256=f_fZEKZyp_xJcmc7wAMNSrzs702JCztfq3zcK4CSJgk,78710
|
|
90
91
|
invarlock/guards/spectral.py,sha256=tOHBoAm23xkPKUYLdr41ZQYHc806pxrhVXwsmoDLiKE,51132
|
|
91
92
|
invarlock/guards/tier_config.py,sha256=_WJIQ4qvIOc8EI1ygBhpBqbZPt072RT6H6Oh9-LqNWY,10706
|
|
92
|
-
invarlock/guards/variance.py,sha256=
|
|
93
|
+
invarlock/guards/variance.py,sha256=LziyAF5QewT-dtCbRJGb7IklVTIp1IfwTqZhE3yJtv4,133080
|
|
93
94
|
invarlock/guards_ref/__init__.py,sha256=jLnyFqdqQaheG1qQMlU4Gx7R118rkkQHPqFVF3_1ih0,366
|
|
94
95
|
invarlock/guards_ref/rmt_ref.py,sha256=md-aSzLCxPL3OXmrA5NtI9wK7cVSyd2xw8WtSodcGQY,1246
|
|
95
96
|
invarlock/guards_ref/spectral_ref.py,sha256=FdwFfrs5hxEEUIfBV3CvAJvTX78gAM00mKLEXyZ0zJo,4386
|
|
@@ -104,9 +105,9 @@ invarlock/observability/py.typed,sha256=LCPmZeE_vANVVJDNvuq9A07i7jg9Nxrq6f10UeuN
|
|
|
104
105
|
invarlock/observability/utils.py,sha256=1AC3ZEzbrDzTOvSIFxAtWlITy1BHEPZ032HKKzqK_10,16492
|
|
105
106
|
invarlock/plugins/__init__.py,sha256=aZqkpuTT0K3y0SX4702k3YpHnM3kng9fz0qO3XE43hY,260
|
|
106
107
|
invarlock/plugins/hello_guard.py,sha256=-peYJaZd52bbdw6y8cBxEd3CbtFWualDs8nX60CeXhI,896
|
|
107
|
-
invarlock/plugins/hf_awq_adapter.py,sha256=
|
|
108
|
-
invarlock/plugins/hf_bnb_adapter.py,sha256=
|
|
109
|
-
invarlock/plugins/hf_gptq_adapter.py,sha256=
|
|
108
|
+
invarlock/plugins/hf_awq_adapter.py,sha256=UGwzcqJyuqrYHWZ4F-vn-9LNfEDyolOgVDBi0jN35xc,3770
|
|
109
|
+
invarlock/plugins/hf_bnb_adapter.py,sha256=g0ysWEi8dQzLtJy8iCszfTsYCOACuZMFYnTLMAEVxs0,6011
|
|
110
|
+
invarlock/plugins/hf_gptq_adapter.py,sha256=ysugAcnjLqF5sqpijSNiim1xUpRmoIgBrG053X3S2hE,3743
|
|
110
111
|
invarlock/plugins/py.typed,sha256=LCPmZeE_vANVVJDNvuq9A07i7jg9Nxrq6f10UeuNfZc,37
|
|
111
112
|
invarlock/reporting/__init__.py,sha256=A0541EqxcdTpslNbZEWIO4q-LCqzCQcadev2IBKEBbM,232
|
|
112
113
|
invarlock/reporting/certificate.py,sha256=3xlAryynUh31iGenN7us3L3VGYB8vW722uVYGQiXdrM,124085
|
|
@@ -124,9 +125,9 @@ invarlock/reporting/utils.py,sha256=1aLYgSUR4XvgmhDvU9YK9ICd7W5sjft1qdsZC9JJSRY,
|
|
|
124
125
|
invarlock/reporting/validate.py,sha256=396Fe4SPII4zaD1adGtH2hsl5asQOCDqDL4W3YixEXU,22453
|
|
125
126
|
invarlock/utils/__init__.py,sha256=DR2pBrgddLH2PW-6ninOE8CM7DNvlvgyYsCkckozbPU,4276
|
|
126
127
|
invarlock/utils/digest.py,sha256=sfnqGFRiRf7l950MjSIrWO1XbUfXlcEfNLeWFbBUr8I,1290
|
|
127
|
-
invarlock-0.
|
|
128
|
-
invarlock-0.
|
|
129
|
-
invarlock-0.
|
|
130
|
-
invarlock-0.
|
|
131
|
-
invarlock-0.
|
|
132
|
-
invarlock-0.
|
|
128
|
+
invarlock-0.3.0.dist-info/licenses/LICENSE,sha256=uFddaXYY02nEFdPpS7bam_bnm0st41BibzD0jHULPXw,10413
|
|
129
|
+
invarlock-0.3.0.dist-info/METADATA,sha256=MtsIbRufSXZ3xxDzNbxjDrYH0oDhIb1xDQdlElAsdjw,21783
|
|
130
|
+
invarlock-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
131
|
+
invarlock-0.3.0.dist-info/entry_points.txt,sha256=i0e4ZzmJNMBGG-69lbgP-muEcn1je2TUIWwl9SJERm0,670
|
|
132
|
+
invarlock-0.3.0.dist-info/top_level.txt,sha256=GXfftc_YDHHcQC2vQgYbZ5cTO82YuWY3HusHMT3DuKs,10
|
|
133
|
+
invarlock-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|