compressed-tensors 0.11.1a20250904__py3-none-any.whl → 0.11.1a20250908__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/quantization/quant_scheme.py +10 -0
- compressed_tensors/transform/factory/base.py +11 -4
- compressed_tensors/transform/factory/hadamard.py +15 -12
- compressed_tensors/transform/factory/matrix_multiply.py +14 -11
- compressed_tensors/transform/factory/random_hadamard.py +2 -3
- compressed_tensors/utils/offload.py +4 -1
- compressed_tensors/version.py +1 -1
- {compressed_tensors-0.11.1a20250904.dist-info → compressed_tensors-0.11.1a20250908.dist-info}/METADATA +1 -1
- {compressed_tensors-0.11.1a20250904.dist-info → compressed_tensors-0.11.1a20250908.dist-info}/RECORD +12 -12
- {compressed_tensors-0.11.1a20250904.dist-info → compressed_tensors-0.11.1a20250908.dist-info}/WHEEL +0 -0
- {compressed_tensors-0.11.1a20250904.dist-info → compressed_tensors-0.11.1a20250908.dist-info}/licenses/LICENSE +0 -0
- {compressed_tensors-0.11.1a20250904.dist-info → compressed_tensors-0.11.1a20250908.dist-info}/top_level.txt +0 -0
@@ -60,6 +60,16 @@ class QuantizationScheme(BaseModel):
|
|
60
60
|
format = model.format
|
61
61
|
|
62
62
|
if inputs is not None:
|
63
|
+
if inputs.strategy not in (
|
64
|
+
QuantizationStrategy.TOKEN,
|
65
|
+
QuantizationStrategy.TENSOR,
|
66
|
+
QuantizationStrategy.TENSOR_GROUP,
|
67
|
+
):
|
68
|
+
raise ValueError(
|
69
|
+
f"Using {inputs.strategy} strategy is not supported for "
|
70
|
+
"activation quantization"
|
71
|
+
)
|
72
|
+
|
63
73
|
if inputs.actorder is not None:
|
64
74
|
raise ValueError("Cannot apply actorder to input activations")
|
65
75
|
|
@@ -18,6 +18,7 @@ from typing import List, Optional, Set, Tuple
|
|
18
18
|
|
19
19
|
import torch
|
20
20
|
import torch.nn.utils.parametrize as P
|
21
|
+
import tqdm
|
21
22
|
from compressed_tensors.registry.registry import RegistryMixin, T
|
22
23
|
from compressed_tensors.transform import (
|
23
24
|
TransformArgs,
|
@@ -84,15 +85,21 @@ class TransformFactory(RegistryMixin, ABC):
|
|
84
85
|
"""
|
85
86
|
raise NotImplementedError()
|
86
87
|
|
87
|
-
def apply_to_model(self, model: Module):
|
88
|
+
def apply_to_model(self, model: Module, use_tqdm=True):
|
88
89
|
"""
|
89
90
|
Create transforms and apply them to the model
|
90
91
|
|
91
92
|
:param model: module to apply transforms to
|
92
93
|
"""
|
93
|
-
|
94
|
-
|
95
|
-
|
94
|
+
modules_args = [
|
95
|
+
(module, arg)
|
96
|
+
for arg in self.scheme.apply
|
97
|
+
for _, module in match_named_modules(model, arg.targets, arg.ignore)
|
98
|
+
]
|
99
|
+
|
100
|
+
desc = f"Applying {self.name} transforms"
|
101
|
+
for module, arg in tqdm.tqdm(modules_args, desc=desc, disable=(not use_tqdm)):
|
102
|
+
self._apply_to_module(module, arg)
|
96
103
|
|
97
104
|
self._update_tied_weights()
|
98
105
|
|
@@ -53,24 +53,28 @@ class HadamardFactory(TransformFactory):
|
|
53
53
|
"""
|
54
54
|
assert hasattr(module, "weight")
|
55
55
|
size = get_transform_size(module, args.location, self.scheme.head_dim)
|
56
|
-
dtype = self.scheme.precision
|
57
|
-
device = get_offloaded_device(module)
|
58
56
|
exec_device = get_execution_device(module)
|
59
|
-
|
60
|
-
|
61
|
-
|
57
|
+
device = get_offloaded_device(module)
|
58
|
+
precision = self.scheme.precision if args.is_online() else torch.float64
|
59
|
+
|
60
|
+
factory_kwargs = {
|
61
|
+
"device": device,
|
62
|
+
"construct_device": exec_device,
|
63
|
+
"precision": precision,
|
64
|
+
}
|
65
|
+
weight = self.weights.get(size, factory_kwargs=factory_kwargs)
|
66
|
+
# TODO: permutations should be keyed by fused modules, not weight
|
62
67
|
perm = self.perms[weight] if self.scheme.randomize else None
|
63
68
|
return HadamardTransform(weight, perm, self.scheme, args, type(module))
|
64
69
|
|
65
70
|
def _create_weight(
|
66
71
|
self,
|
67
72
|
size: int,
|
68
|
-
dtype: dtype,
|
69
73
|
device: device,
|
70
74
|
construct_device: device,
|
75
|
+
precision: dtype,
|
71
76
|
) -> Parameter:
|
72
|
-
|
73
|
-
data = deterministic_hadamard_matrix(size, dtype, construct_device)
|
77
|
+
data = deterministic_hadamard_matrix(size, precision, construct_device)
|
74
78
|
data = data.to(device=device)
|
75
79
|
return Parameter(data, requires_grad=self.scheme.requires_grad)
|
76
80
|
|
@@ -94,8 +98,7 @@ class HadamardTransform(TransformBase):
|
|
94
98
|
self.scheme = scheme
|
95
99
|
self.args = args
|
96
100
|
self.module_type = module_type
|
97
|
-
self._scale = torch.tensor(weight.size(0), dtype=
|
98
|
-
self._precision = scheme.precision if args.is_online() else torch.float64
|
101
|
+
self._scale = torch.tensor(weight.size(0), dtype=torch.float64).sqrt()
|
99
102
|
|
100
103
|
def forward(self, value: Tensor) -> Tensor:
|
101
104
|
weight = self.weight
|
@@ -108,8 +111,8 @@ class HadamardTransform(TransformBase):
|
|
108
111
|
|
109
112
|
return (
|
110
113
|
apply_transform_weight(
|
111
|
-
weight.to(
|
112
|
-
value.to(
|
114
|
+
weight.to(device=value.device),
|
115
|
+
value.to(dtype=weight.dtype),
|
113
116
|
self.args.location,
|
114
117
|
self.module_type,
|
115
118
|
)
|
@@ -21,8 +21,8 @@ from compressed_tensors.transform.utils.matrix import (
|
|
21
21
|
apply_transform_weight,
|
22
22
|
get_transform_size,
|
23
23
|
)
|
24
|
-
from compressed_tensors.utils import get_offloaded_device
|
25
24
|
from compressed_tensors.utils.helpers import ParameterizedDefaultDict
|
25
|
+
from compressed_tensors.utils.offload import get_offloaded_device
|
26
26
|
from torch import Tensor, device, dtype
|
27
27
|
from torch.nn import Module, Parameter
|
28
28
|
|
@@ -52,19 +52,23 @@ class RandomMatrixFactory(TransformFactory):
|
|
52
52
|
"""
|
53
53
|
assert hasattr(module, "weight")
|
54
54
|
size = get_transform_size(module, args.location, self.scheme.head_dim)
|
55
|
-
dtype = self.scheme.precision
|
56
55
|
device = get_offloaded_device(module)
|
56
|
+
precision = self.scheme.precision if args.is_online() else torch.float64
|
57
57
|
|
58
|
-
|
58
|
+
factory_kwargs = {"device": device, "precision": precision}
|
59
|
+
weight = self.weights.get(size, factory_kwargs=factory_kwargs)
|
59
60
|
if args.inverse:
|
60
61
|
weight = self.inverses[weight]
|
61
62
|
|
62
63
|
return RandomMatrixTransform(weight, self.scheme, args, type(module))
|
63
64
|
|
64
|
-
def _create_weight(self, size: int,
|
65
|
-
# TODO:
|
65
|
+
def _create_weight(self, size: int, device: device, precision: dtype) -> Parameter:
|
66
|
+
# TODO: construct such that weight is invertible (has non-zero determinant)
|
66
67
|
data = torch.rand(
|
67
|
-
(size, size),
|
68
|
+
(size, size),
|
69
|
+
generator=self.generator,
|
70
|
+
dtype=precision,
|
71
|
+
device=device,
|
68
72
|
)
|
69
73
|
return Parameter(data, requires_grad=self.scheme.requires_grad)
|
70
74
|
|
@@ -87,12 +91,11 @@ class RandomMatrixTransform(TransformBase):
|
|
87
91
|
self.scheme = scheme
|
88
92
|
self.args = args
|
89
93
|
self.module_type = module_type
|
90
|
-
self._precision = scheme.precision if args.is_online() else torch.float64
|
91
94
|
|
92
95
|
def forward(self, value: Tensor) -> Parameter:
|
93
96
|
return apply_transform_weight(
|
94
|
-
self.weight.to(
|
95
|
-
value.to(self.
|
97
|
+
self.weight.to(device=value.device),
|
98
|
+
value.to(dtype=self.weight.dtype),
|
96
99
|
self.args.location,
|
97
100
|
self.module_type,
|
98
101
|
).to(value.dtype)
|
@@ -100,8 +103,8 @@ class RandomMatrixTransform(TransformBase):
|
|
100
103
|
def right_inverse(self, value: Tensor) -> Tensor:
|
101
104
|
inverse = high_precision_invert(self.weight)
|
102
105
|
return apply_transform_weight(
|
103
|
-
inverse.to(
|
104
|
-
value.to(
|
106
|
+
inverse.to(device=value.device),
|
107
|
+
value.to(dtype=inverse.dtype),
|
105
108
|
self.args.location,
|
106
109
|
self.module_type,
|
107
110
|
).to(value.dtype)
|
@@ -31,11 +31,10 @@ class RandomHadamardFactory(HadamardFactory):
|
|
31
31
|
def _create_weight(
|
32
32
|
self,
|
33
33
|
size: int,
|
34
|
-
dtype: dtype,
|
35
34
|
device: device,
|
36
35
|
construct_device: device,
|
36
|
+
precision: dtype,
|
37
37
|
) -> Parameter:
|
38
|
-
|
39
|
-
data = random_hadamard_matrix(size, dtype, construct_device, self.generator)
|
38
|
+
data = random_hadamard_matrix(size, precision, construct_device, self.generator)
|
40
39
|
data = data.to(device=device)
|
41
40
|
return Parameter(data, requires_grad=self.scheme.requires_grad)
|
@@ -131,7 +131,10 @@ def get_offloaded_device(module: torch.nn.Module) -> torch.device:
|
|
131
131
|
first_key = list(module._hf_hook.weights_map.keys())[0]
|
132
132
|
prefix_dataset = module._hf_hook.weights_map.dataset
|
133
133
|
return prefix_dataset[first_key].device
|
134
|
-
|
134
|
+
else:
|
135
|
+
# if the module is not offloaded, then any addded weights
|
136
|
+
# should be placed the module's execution device
|
137
|
+
return get_execution_device(module)
|
135
138
|
|
136
139
|
|
137
140
|
@check_accelerate(fallback=None)
|
compressed_tensors/version.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.11.
|
3
|
+
Version: 0.11.1a20250908
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
{compressed_tensors-0.11.1a20250904.dist-info → compressed_tensors-0.11.1a20250908.dist-info}/RECORD
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
|
2
2
|
compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
|
3
|
-
compressed_tensors/version.py,sha256=
|
3
|
+
compressed_tensors/version.py,sha256=8qf_B1P1NNbEDyEkRyxNNhdvTofGEV0EE02UMN3na5k,523
|
4
4
|
compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
|
5
5
|
compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
|
6
6
|
compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
|
@@ -28,7 +28,7 @@ compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn
|
|
28
28
|
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
29
29
|
compressed_tensors/quantization/quant_args.py,sha256=5AxYKqCSlg7CDgz2N8G4ZRVIiSUKvIm-SCQa-Bq_SF0,12916
|
30
30
|
compressed_tensors/quantization/quant_config.py,sha256=2NgDwKuQn0f-ojiHC8c6tXtYX_zQlk26Rj-bU71QKvA,10598
|
31
|
-
compressed_tensors/quantization/quant_scheme.py,sha256=
|
31
|
+
compressed_tensors/quantization/quant_scheme.py,sha256=2pV3tPNgo6ovi6FLxP4ZFznEmInlC1L90Pq9I1HI_Xk,9275
|
32
32
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
|
33
33
|
compressed_tensors/quantization/lifecycle/apply.py,sha256=TuSjKomSk4N0My-UY9PWk2Nyuze6TilEGPsZELgotzk,14716
|
34
34
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
|
@@ -45,10 +45,10 @@ compressed_tensors/transform/transform_args.py,sha256=rVgReFp7wMXcYugkfd325e2tTF
|
|
45
45
|
compressed_tensors/transform/transform_config.py,sha256=3YdtGcau3qkcapX9GMUiLuhQHFQZKFYT3eLgJGj1L6s,1204
|
46
46
|
compressed_tensors/transform/transform_scheme.py,sha256=S7vYLnuv7xZ_bwphkpCiGqZLjnnTnb4lj1T8a6WwnE0,2094
|
47
47
|
compressed_tensors/transform/factory/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
|
48
|
-
compressed_tensors/transform/factory/base.py,sha256=
|
49
|
-
compressed_tensors/transform/factory/hadamard.py,sha256=
|
50
|
-
compressed_tensors/transform/factory/matrix_multiply.py,sha256=
|
51
|
-
compressed_tensors/transform/factory/random_hadamard.py,sha256=
|
48
|
+
compressed_tensors/transform/factory/base.py,sha256=82fwlX4gVlN67H7P_T3pbvN5pB-XQnG-dZJ53evj-DA,7979
|
49
|
+
compressed_tensors/transform/factory/hadamard.py,sha256=0SS_gM7b2df8SbsCF9LNbLLBMcwly7bXND1KED8uxhc,4550
|
50
|
+
compressed_tensors/transform/factory/matrix_multiply.py,sha256=u-7V04EvEe9G3VEF--YwoVV-h5kmh6hXq8stY_EWmLY,4456
|
51
|
+
compressed_tensors/transform/factory/random_hadamard.py,sha256=ck-LF7sl7i9NW4fxLypgHgkw91lc_TpwHO8bXX-0fPU,1577
|
52
52
|
compressed_tensors/transform/utils/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
|
53
53
|
compressed_tensors/transform/utils/hadamard.py,sha256=hDJZC0Gw2fKdxqa3f8TmFc5J0eJqxHtFRxswLU_yVJc,5548
|
54
54
|
compressed_tensors/transform/utils/hadamards.safetensors,sha256=mFd1GzNodGG-ifA1IoH-0nHYzfraCOvrq_dX2zFI1B4,1436901
|
@@ -57,14 +57,14 @@ compressed_tensors/utils/__init__.py,sha256=spzbjUO4-hZ2jXGST27r3MIt2yzIXsjdbEaY
|
|
57
57
|
compressed_tensors/utils/helpers.py,sha256=Q3iRAa2XSdmmn4vSpUplnvKOmWwn4Clao9ZkPBHXtpI,12604
|
58
58
|
compressed_tensors/utils/internal.py,sha256=7SSWgDoNFRnlfadwkoFhLW-T2jOc7Po_WzWv5h32Sa8,982
|
59
59
|
compressed_tensors/utils/match.py,sha256=y03xJyWTXV8bjIPN5Z4S0_w797qMnh-Z4aiPEGQ4zNE,11239
|
60
|
-
compressed_tensors/utils/offload.py,sha256=
|
60
|
+
compressed_tensors/utils/offload.py,sha256=b0Q2P0hJLQBGEqdRwOh6SOK3_eJCqHNnIX38-wqeef0,24577
|
61
61
|
compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
|
62
62
|
compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
|
63
63
|
compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
|
64
64
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
65
65
|
compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
|
66
|
-
compressed_tensors-0.11.
|
67
|
-
compressed_tensors-0.11.
|
68
|
-
compressed_tensors-0.11.
|
69
|
-
compressed_tensors-0.11.
|
70
|
-
compressed_tensors-0.11.
|
66
|
+
compressed_tensors-0.11.1a20250908.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
67
|
+
compressed_tensors-0.11.1a20250908.dist-info/METADATA,sha256=lLokpvLLt0OOrfW9axfgwLmBhqCR7IqyjLCQFRdxCsU,7031
|
68
|
+
compressed_tensors-0.11.1a20250908.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
69
|
+
compressed_tensors-0.11.1a20250908.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
70
|
+
compressed_tensors-0.11.1a20250908.dist-info/RECORD,,
|
{compressed_tensors-0.11.1a20250904.dist-info → compressed_tensors-0.11.1a20250908.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|