compressed-tensors-nightly 0.9.1.20250204__py3-none-any.whl → 0.9.1.20250205__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/compressors/base.py +9 -0
- compressed_tensors/compressors/quantized_compressors/base.py +2 -2
- compressed_tensors/compressors/quantized_compressors/naive_quantized.py +12 -6
- compressed_tensors/compressors/quantized_compressors/pack_quantized.py +13 -7
- compressed_tensors/compressors/sparse_compressors/base.py +2 -3
- compressed_tensors/compressors/sparse_compressors/dense.py +8 -0
- compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +11 -5
- compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +7 -1
- compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +8 -2
- compressed_tensors/utils/safetensors_load.py +7 -5
- {compressed_tensors_nightly-0.9.1.20250204.dist-info → compressed_tensors_nightly-0.9.1.20250205.dist-info}/METADATA +13 -4
- {compressed_tensors_nightly-0.9.1.20250204.dist-info → compressed_tensors_nightly-0.9.1.20250205.dist-info}/RECORD +15 -15
- {compressed_tensors_nightly-0.9.1.20250204.dist-info → compressed_tensors_nightly-0.9.1.20250205.dist-info}/WHEEL +1 -1
- {compressed_tensors_nightly-0.9.1.20250204.dist-info → compressed_tensors_nightly-0.9.1.20250205.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.9.1.20250204.dist-info → compressed_tensors_nightly-0.9.1.20250205.dist-info}/top_level.txt +0 -0
@@ -77,6 +77,15 @@ class BaseCompressor(RegistryMixin, ABC):
|
|
77
77
|
"""
|
78
78
|
raise NotImplementedError()
|
79
79
|
|
80
|
+
@property
|
81
|
+
@abstractmethod
|
82
|
+
def compression_param_names(self) -> Tuple[str]:
|
83
|
+
"""
|
84
|
+
Returns a tuple of compression parameter names introduced by
|
85
|
+
the compressor during compression
|
86
|
+
"""
|
87
|
+
raise NotImplementedError()
|
88
|
+
|
80
89
|
@abstractmethod
|
81
90
|
def compress(
|
82
91
|
self,
|
@@ -144,7 +144,7 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
144
144
|
|
145
145
|
def _decompress_from_path(self, path_to_model, names_to_scheme, device):
|
146
146
|
weight_mappings = get_nested_weight_mappings(
|
147
|
-
path_to_model, self.
|
147
|
+
path_to_model, self.compression_param_names
|
148
148
|
)
|
149
149
|
for weight_name in weight_mappings.keys():
|
150
150
|
weight_data = {}
|
@@ -161,7 +161,7 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
161
161
|
|
162
162
|
def _decompress_from_state_dict(self, state_dict, names_to_scheme):
|
163
163
|
weight_mappings = get_nested_mappings_from_state_dict(
|
164
|
-
state_dict, self.
|
164
|
+
state_dict, self.compression_param_names
|
165
165
|
)
|
166
166
|
for weight_name in weight_mappings.keys():
|
167
167
|
weight_data = {}
|
@@ -41,12 +41,18 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
|
|
41
41
|
type to the type specified by the layer's QuantizationArgs.
|
42
42
|
"""
|
43
43
|
|
44
|
-
|
45
|
-
|
46
|
-
"
|
47
|
-
|
48
|
-
|
49
|
-
|
44
|
+
@property
|
45
|
+
def compression_param_names(self) -> Tuple[str]:
|
46
|
+
"""
|
47
|
+
Returns a tuple of compression parameter names introduced by
|
48
|
+
the compressor during compression
|
49
|
+
"""
|
50
|
+
return (
|
51
|
+
"weight",
|
52
|
+
"weight_scale",
|
53
|
+
"weight_zero_point",
|
54
|
+
"weight_g_idx",
|
55
|
+
)
|
50
56
|
|
51
57
|
def compression_param_info(
|
52
58
|
self,
|
@@ -36,13 +36,19 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
36
36
|
Compresses a quantized model by packing every eight 4-bit weights into an int32
|
37
37
|
"""
|
38
38
|
|
39
|
-
|
40
|
-
|
41
|
-
"
|
42
|
-
|
43
|
-
|
44
|
-
"
|
45
|
-
|
39
|
+
@property
|
40
|
+
def compression_param_names(self) -> Tuple[str]:
|
41
|
+
"""
|
42
|
+
Returns a tuple of compression parameter names introduced by
|
43
|
+
the compressor during compression
|
44
|
+
"""
|
45
|
+
return (
|
46
|
+
"weight_packed",
|
47
|
+
"weight_scale",
|
48
|
+
"weight_zero_point",
|
49
|
+
"weight_g_idx",
|
50
|
+
"weight_shape",
|
51
|
+
)
|
46
52
|
|
47
53
|
def compression_param_info(
|
48
54
|
self,
|
@@ -30,8 +30,7 @@ _LOGGER: logging.Logger = logging.getLogger(__name__)
|
|
30
30
|
class BaseSparseCompressor(BaseCompressor):
|
31
31
|
"""
|
32
32
|
Base class representing a sparse compression algorithm. Each child class should
|
33
|
-
implement
|
34
|
-
classes should also define COMPRESSION_PARAM_NAMES.
|
33
|
+
implement compression_param_names, compress_weight and decompress_weight;
|
35
34
|
|
36
35
|
Compressors support compressing/decompressing a full module state dict or a single
|
37
36
|
quantized PyTorch leaf module.
|
@@ -113,7 +112,7 @@ class BaseSparseCompressor(BaseCompressor):
|
|
113
112
|
"""
|
114
113
|
weight_mappings, ignored_params = get_nested_weight_mappings(
|
115
114
|
path_to_model_or_tensors,
|
116
|
-
self.
|
115
|
+
self.compression_param_names,
|
117
116
|
return_unmatched_params=True,
|
118
117
|
)
|
119
118
|
for weight_name in weight_mappings.keys():
|
@@ -25,6 +25,14 @@ class DenseCompressor(BaseCompressor):
|
|
25
25
|
Identity compressor for dense models, returns the original state_dict
|
26
26
|
"""
|
27
27
|
|
28
|
+
@property
|
29
|
+
def compression_param_names(self) -> Tuple[str]:
|
30
|
+
"""
|
31
|
+
Returns a tuple of compression parameter names introduced by
|
32
|
+
the compressor during compression
|
33
|
+
"""
|
34
|
+
return ()
|
35
|
+
|
28
36
|
def compress(self, model_state: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]:
|
29
37
|
return model_state
|
30
38
|
|
@@ -40,11 +40,17 @@ class Sparse24BitMaskCompressor(BaseSparseCompressor):
|
|
40
40
|
values tensor, with their locations stored in a 2d bitmask
|
41
41
|
"""
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
"
|
46
|
-
|
47
|
-
|
43
|
+
@property
|
44
|
+
def compression_param_names(self) -> Tuple[str]:
|
45
|
+
"""
|
46
|
+
Returns a tuple of compression parameter names introduced by
|
47
|
+
the compressor during compression
|
48
|
+
"""
|
49
|
+
return (
|
50
|
+
"shape",
|
51
|
+
"compressed",
|
52
|
+
"bitmask",
|
53
|
+
)
|
48
54
|
|
49
55
|
def compress_weight(self, name, value):
|
50
56
|
bitmask_tensor = Sparse24BitMaskTensor.from_dense(
|
@@ -38,7 +38,13 @@ class BitmaskCompressor(BaseSparseCompressor):
|
|
38
38
|
values tensor, with their locations stored in a 2d bitmask
|
39
39
|
"""
|
40
40
|
|
41
|
-
|
41
|
+
@property
|
42
|
+
def compression_param_names(self) -> Tuple[str]:
|
43
|
+
"""
|
44
|
+
Returns a tuple of compression parameter names introduced by
|
45
|
+
the compressor during compression
|
46
|
+
"""
|
47
|
+
return ("shape", "compressed", "bitmask", "row_offsets")
|
42
48
|
|
43
49
|
def compress_weight(self, name, value):
|
44
50
|
bitmask_tensor = BitmaskTensor.from_dense(value)
|
@@ -42,8 +42,6 @@ class Marlin24Compressor(BaseCompressor):
|
|
42
42
|
Marlin24 kernel. Decompression is not implemented for this compressor.
|
43
43
|
"""
|
44
44
|
|
45
|
-
COMPRESSION_PARAM_NAMES = ["weight_packed", "scale_packed", "meta"]
|
46
|
-
|
47
45
|
@staticmethod
|
48
46
|
def validate_quant_compatability(
|
49
47
|
model_quant_args: Dict[str, QuantizationArgs]
|
@@ -105,6 +103,14 @@ class Marlin24Compressor(BaseCompressor):
|
|
105
103
|
|
106
104
|
return True
|
107
105
|
|
106
|
+
@property
|
107
|
+
def compression_param_names(self) -> Tuple[str]:
|
108
|
+
"""
|
109
|
+
Returns a tuple of compression parameter names introduced by
|
110
|
+
the compressor during compression
|
111
|
+
"""
|
112
|
+
return ("weight_packed", "scale_packed", "meta")
|
113
|
+
|
108
114
|
def compress(
|
109
115
|
self,
|
110
116
|
model_state: Dict[str, Tensor],
|
@@ -16,7 +16,7 @@ import json
|
|
16
16
|
import os
|
17
17
|
import re
|
18
18
|
import struct
|
19
|
-
from typing import Dict,
|
19
|
+
from typing import Dict, Iterable, Optional, Tuple, Union
|
20
20
|
|
21
21
|
from safetensors import safe_open
|
22
22
|
from torch import Tensor
|
@@ -180,7 +180,9 @@ def get_weight_mappings(path_to_model_or_tensors: str) -> Dict[str, str]:
|
|
180
180
|
|
181
181
|
|
182
182
|
def get_nested_weight_mappings(
|
183
|
-
model_path: str,
|
183
|
+
model_path: str,
|
184
|
+
params_to_nest: Iterable[str],
|
185
|
+
return_unmatched_params: bool = False,
|
184
186
|
) -> Union[NestedWeightMappingType, Tuple[NestedWeightMappingType, WeightMappingType]]:
|
185
187
|
"""
|
186
188
|
Takes a path to a state dict saved in safetensors format and returns a nested
|
@@ -211,7 +213,7 @@ def get_nested_weight_mappings(
|
|
211
213
|
|
212
214
|
:param model_path: Path to the safetensors state dict, must contain either a
|
213
215
|
single safetensors file or multiple files with an index.
|
214
|
-
:param params_to_nest:
|
216
|
+
:param params_to_nest: Iterable of parameter names to nest.
|
215
217
|
:param return_unmatched_params: If True, return a second dictionary containing
|
216
218
|
the remaining parameters that were not matched to the params_to_nest.
|
217
219
|
:return:
|
@@ -247,7 +249,7 @@ def get_nested_weight_mappings(
|
|
247
249
|
|
248
250
|
|
249
251
|
def get_nested_mappings_from_state_dict(
|
250
|
-
state_dict, params_to_nest
|
252
|
+
state_dict, params_to_nest: Iterable[str]
|
251
253
|
) -> NestedWeightMappingType:
|
252
254
|
"""
|
253
255
|
Takes a state dict and returns a nested mapping from uncompressed
|
@@ -262,7 +264,7 @@ def get_nested_mappings_from_state_dict(
|
|
262
264
|
}
|
263
265
|
|
264
266
|
:param state_dict: state dict of the model
|
265
|
-
:param params_to_nest:
|
267
|
+
:param params_to_nest: Iterable of parameter names to nest.
|
266
268
|
:return: Nested mapping of parameterized layer names to the value of
|
267
269
|
each layer's compression parameters.
|
268
270
|
"""
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.9.1.
|
3
|
+
Version: 0.9.1.20250205
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -11,8 +11,6 @@ License-File: LICENSE
|
|
11
11
|
Requires-Dist: torch>=1.7.0
|
12
12
|
Requires-Dist: transformers
|
13
13
|
Requires-Dist: pydantic>=2.0
|
14
|
-
Provides-Extra: accelerate
|
15
|
-
Requires-Dist: accelerate; extra == "accelerate"
|
16
14
|
Provides-Extra: dev
|
17
15
|
Requires-Dist: black==22.12.0; extra == "dev"
|
18
16
|
Requires-Dist: isort==5.8.0; extra == "dev"
|
@@ -20,6 +18,17 @@ Requires-Dist: wheel>=0.36.2; extra == "dev"
|
|
20
18
|
Requires-Dist: flake8>=3.8.3; extra == "dev"
|
21
19
|
Requires-Dist: pytest>=6.0.0; extra == "dev"
|
22
20
|
Requires-Dist: nbconvert>=7.16.3; extra == "dev"
|
21
|
+
Provides-Extra: accelerate
|
22
|
+
Requires-Dist: accelerate; extra == "accelerate"
|
23
|
+
Dynamic: author
|
24
|
+
Dynamic: author-email
|
25
|
+
Dynamic: description
|
26
|
+
Dynamic: description-content-type
|
27
|
+
Dynamic: home-page
|
28
|
+
Dynamic: license
|
29
|
+
Dynamic: provides-extra
|
30
|
+
Dynamic: requires-dist
|
31
|
+
Dynamic: summary
|
23
32
|
|
24
33
|
# compressed-tensors
|
25
34
|
|
@@ -2,21 +2,21 @@ compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLO
|
|
2
2
|
compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
|
3
3
|
compressed_tensors/version.py,sha256=4fhjdvtnsinY2nzy2jDu0CL4Tytgk6EVPNsww7-spZU,1586
|
4
4
|
compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
|
5
|
-
compressed_tensors/compressors/base.py,sha256=
|
5
|
+
compressed_tensors/compressors/base.py,sha256=x8dQrWVEurynXw03yHJZTaAmrRTOsdZJoHjmvs0IKwk,7002
|
6
6
|
compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
|
7
7
|
compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
|
8
8
|
compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=3WyzAW2Rm_uLprxwO2QH6FR76W6Mk4r2yedayaSZHhw,18396
|
9
9
|
compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=09UJq68Pht6Bf-4iP9xYl3tetKsncNPHD8IAGbePsr4,714
|
10
|
-
compressed_tensors/compressors/quantized_compressors/base.py,sha256=
|
11
|
-
compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=
|
12
|
-
compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=
|
10
|
+
compressed_tensors/compressors/quantized_compressors/base.py,sha256=cp8S1Kr3HhlMHIz7k4vGo-qxxdknEC3qP1QLIhNnwRA,7217
|
11
|
+
compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=fd0KlkSx6bvZ3xwIkK3jEUdPSUPs56Eua4dEDOtzKW0,5150
|
12
|
+
compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=zH2PocRe_T5yt1-3kLdZH9AUQWQyaVOi4U9nEJiYaWA,8509
|
13
13
|
compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
|
14
|
-
compressed_tensors/compressors/sparse_compressors/base.py,sha256=
|
15
|
-
compressed_tensors/compressors/sparse_compressors/dense.py,sha256=
|
16
|
-
compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py,sha256=
|
17
|
-
compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py,sha256=
|
14
|
+
compressed_tensors/compressors/sparse_compressors/base.py,sha256=CVWbs3sd7GKJEoWOIKImABQ01VOTX8dlF2AQaEVPotw,5883
|
15
|
+
compressed_tensors/compressors/sparse_compressors/dense.py,sha256=_uW_HISeDNz4yboSZWoh6GwrkUE6HFibzPQSKrHOCkg,1505
|
16
|
+
compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py,sha256=mEKSSgpXookqYSJw3mlyP6cYYKD-eaIvpQMvi4JO6TY,8807
|
17
|
+
compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py,sha256=S8vW0FI9ep_XtUQOxj0P5utJt3vKEYOHjWEPp-Xd9aY,5820
|
18
18
|
compressed_tensors/compressors/sparse_quantized_compressors/__init__.py,sha256=4f_cwcKXB1nVVMoiKgTFAc8jAPjPLElo-Df_EDm1_xw,675
|
19
|
-
compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py,sha256=
|
19
|
+
compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py,sha256=xY0CdHXAzVHeDeSCD_I-5UZKcntKzd3FiKSP-ZqcSBs,9614
|
20
20
|
compressed_tensors/config/__init__.py,sha256=8sOoZ6xvYSC79mBvEtO8l6xk4PC80d29AnnJiGMrY2M,737
|
21
21
|
compressed_tensors/config/base.py,sha256=R3iUmFf1MslEjin5LgwQbmfJHIsS7Uw0UIxfn780uqY,3479
|
22
22
|
compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
|
@@ -43,10 +43,10 @@ compressed_tensors/utils/helpers.py,sha256=RrNvzD08naEjEiXdU-FdZjQVda1nQywu1hA_G
|
|
43
43
|
compressed_tensors/utils/offload.py,sha256=H4aAg21zUvJM2uwE6QCNYazX_p_o41yQUAgLLWBqR0w,14079
|
44
44
|
compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
|
45
45
|
compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
|
46
|
-
compressed_tensors/utils/safetensors_load.py,sha256=
|
46
|
+
compressed_tensors/utils/safetensors_load.py,sha256=5SeM2hzLh77Ne8Vk7qR6-km7cf8bhov41ExpWITqX3A,11470
|
47
47
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
48
|
-
compressed_tensors_nightly-0.9.1.
|
49
|
-
compressed_tensors_nightly-0.9.1.
|
50
|
-
compressed_tensors_nightly-0.9.1.
|
51
|
-
compressed_tensors_nightly-0.9.1.
|
52
|
-
compressed_tensors_nightly-0.9.1.
|
48
|
+
compressed_tensors_nightly-0.9.1.20250205.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
49
|
+
compressed_tensors_nightly-0.9.1.20250205.dist-info/METADATA,sha256=121PgN0lvgzT_mMQs0wTOqwxVv8Wmcv_gFpf28Tktvg,6992
|
50
|
+
compressed_tensors_nightly-0.9.1.20250205.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
51
|
+
compressed_tensors_nightly-0.9.1.20250205.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
52
|
+
compressed_tensors_nightly-0.9.1.20250205.dist-info/RECORD,,
|
File without changes
|