compressed-tensors-nightly 0.9.1.20250203__tar.gz → 0.9.1.20250205__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed-tensors-nightly-0.9.1.20250203/src/compressed_tensors_nightly.egg-info → compressed_tensors_nightly-0.9.1.20250205}/PKG-INFO +22 -3
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/base.py +9 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/quantized_compressors/base.py +2 -2
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +12 -6
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +13 -7
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/base.py +2 -3
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/dense.py +8 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +11 -5
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +7 -1
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +8 -2
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/safetensors_load.py +7 -5
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205/src/compressed_tensors_nightly.egg-info}/PKG-INFO +22 -3
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors_nightly.egg-info/SOURCES.txt +3 -1
- compressed_tensors_nightly-0.9.1.20250205/tests/test_registry.py +53 -0
- compressed_tensors_nightly-0.9.1.20250205/tests/testing_utils.py +144 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/LICENSE +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/README.md +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/pyproject.toml +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/setup.cfg +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/setup.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/base.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/helpers.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/config/base.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/config/dense.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/linear/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/linear/compressed_linear.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/quant_args.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/quant_config.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/registry/registry.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/helpers.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/offload.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/permute.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/version.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors_nightly.egg-info/dependency_links.txt +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors_nightly.egg-info/requires.txt +0 -0
- {compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors_nightly.egg-info/top_level.txt +0 -0
@@ -1,15 +1,34 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.9.1.
|
3
|
+
Version: 0.9.1.20250205
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
7
7
|
Author-email: support@neuralmagic.com
|
8
8
|
License: Apache 2.0
|
9
9
|
Description-Content-Type: text/markdown
|
10
|
+
License-File: LICENSE
|
11
|
+
Requires-Dist: torch>=1.7.0
|
12
|
+
Requires-Dist: transformers
|
13
|
+
Requires-Dist: pydantic>=2.0
|
10
14
|
Provides-Extra: dev
|
15
|
+
Requires-Dist: black==22.12.0; extra == "dev"
|
16
|
+
Requires-Dist: isort==5.8.0; extra == "dev"
|
17
|
+
Requires-Dist: wheel>=0.36.2; extra == "dev"
|
18
|
+
Requires-Dist: flake8>=3.8.3; extra == "dev"
|
19
|
+
Requires-Dist: pytest>=6.0.0; extra == "dev"
|
20
|
+
Requires-Dist: nbconvert>=7.16.3; extra == "dev"
|
11
21
|
Provides-Extra: accelerate
|
12
|
-
|
22
|
+
Requires-Dist: accelerate; extra == "accelerate"
|
23
|
+
Dynamic: author
|
24
|
+
Dynamic: author-email
|
25
|
+
Dynamic: description
|
26
|
+
Dynamic: description-content-type
|
27
|
+
Dynamic: home-page
|
28
|
+
Dynamic: license
|
29
|
+
Dynamic: provides-extra
|
30
|
+
Dynamic: requires-dist
|
31
|
+
Dynamic: summary
|
13
32
|
|
14
33
|
# compressed-tensors
|
15
34
|
|
@@ -77,6 +77,15 @@ class BaseCompressor(RegistryMixin, ABC):
|
|
77
77
|
"""
|
78
78
|
raise NotImplementedError()
|
79
79
|
|
80
|
+
@property
|
81
|
+
@abstractmethod
|
82
|
+
def compression_param_names(self) -> Tuple[str]:
|
83
|
+
"""
|
84
|
+
Returns a tuple of compression parameter names introduced by
|
85
|
+
the compressor during compression
|
86
|
+
"""
|
87
|
+
raise NotImplementedError()
|
88
|
+
|
80
89
|
@abstractmethod
|
81
90
|
def compress(
|
82
91
|
self,
|
@@ -144,7 +144,7 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
144
144
|
|
145
145
|
def _decompress_from_path(self, path_to_model, names_to_scheme, device):
|
146
146
|
weight_mappings = get_nested_weight_mappings(
|
147
|
-
path_to_model, self.
|
147
|
+
path_to_model, self.compression_param_names
|
148
148
|
)
|
149
149
|
for weight_name in weight_mappings.keys():
|
150
150
|
weight_data = {}
|
@@ -161,7 +161,7 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
161
161
|
|
162
162
|
def _decompress_from_state_dict(self, state_dict, names_to_scheme):
|
163
163
|
weight_mappings = get_nested_mappings_from_state_dict(
|
164
|
-
state_dict, self.
|
164
|
+
state_dict, self.compression_param_names
|
165
165
|
)
|
166
166
|
for weight_name in weight_mappings.keys():
|
167
167
|
weight_data = {}
|
@@ -41,12 +41,18 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
|
|
41
41
|
type to the type specified by the layer's QuantizationArgs.
|
42
42
|
"""
|
43
43
|
|
44
|
-
|
45
|
-
|
46
|
-
"
|
47
|
-
|
48
|
-
|
49
|
-
|
44
|
+
@property
|
45
|
+
def compression_param_names(self) -> Tuple[str]:
|
46
|
+
"""
|
47
|
+
Returns a tuple of compression parameter names introduced by
|
48
|
+
the compressor during compression
|
49
|
+
"""
|
50
|
+
return (
|
51
|
+
"weight",
|
52
|
+
"weight_scale",
|
53
|
+
"weight_zero_point",
|
54
|
+
"weight_g_idx",
|
55
|
+
)
|
50
56
|
|
51
57
|
def compression_param_info(
|
52
58
|
self,
|
@@ -36,13 +36,19 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
36
36
|
Compresses a quantized model by packing every eight 4-bit weights into an int32
|
37
37
|
"""
|
38
38
|
|
39
|
-
|
40
|
-
|
41
|
-
"
|
42
|
-
|
43
|
-
|
44
|
-
"
|
45
|
-
|
39
|
+
@property
|
40
|
+
def compression_param_names(self) -> Tuple[str]:
|
41
|
+
"""
|
42
|
+
Returns a tuple of compression parameter names introduced by
|
43
|
+
the compressor during compression
|
44
|
+
"""
|
45
|
+
return (
|
46
|
+
"weight_packed",
|
47
|
+
"weight_scale",
|
48
|
+
"weight_zero_point",
|
49
|
+
"weight_g_idx",
|
50
|
+
"weight_shape",
|
51
|
+
)
|
46
52
|
|
47
53
|
def compression_param_info(
|
48
54
|
self,
|
@@ -30,8 +30,7 @@ _LOGGER: logging.Logger = logging.getLogger(__name__)
|
|
30
30
|
class BaseSparseCompressor(BaseCompressor):
|
31
31
|
"""
|
32
32
|
Base class representing a sparse compression algorithm. Each child class should
|
33
|
-
implement
|
34
|
-
classes should also define COMPRESSION_PARAM_NAMES.
|
33
|
+
implement compression_param_names, compress_weight and decompress_weight;
|
35
34
|
|
36
35
|
Compressors support compressing/decompressing a full module state dict or a single
|
37
36
|
quantized PyTorch leaf module.
|
@@ -113,7 +112,7 @@ class BaseSparseCompressor(BaseCompressor):
|
|
113
112
|
"""
|
114
113
|
weight_mappings, ignored_params = get_nested_weight_mappings(
|
115
114
|
path_to_model_or_tensors,
|
116
|
-
self.
|
115
|
+
self.compression_param_names,
|
117
116
|
return_unmatched_params=True,
|
118
117
|
)
|
119
118
|
for weight_name in weight_mappings.keys():
|
@@ -25,6 +25,14 @@ class DenseCompressor(BaseCompressor):
|
|
25
25
|
Identity compressor for dense models, returns the original state_dict
|
26
26
|
"""
|
27
27
|
|
28
|
+
@property
|
29
|
+
def compression_param_names(self) -> Tuple[str]:
|
30
|
+
"""
|
31
|
+
Returns a tuple of compression parameter names introduced by
|
32
|
+
the compressor during compression
|
33
|
+
"""
|
34
|
+
return ()
|
35
|
+
|
28
36
|
def compress(self, model_state: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]:
|
29
37
|
return model_state
|
30
38
|
|
@@ -40,11 +40,17 @@ class Sparse24BitMaskCompressor(BaseSparseCompressor):
|
|
40
40
|
values tensor, with their locations stored in a 2d bitmask
|
41
41
|
"""
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
"
|
46
|
-
|
47
|
-
|
43
|
+
@property
|
44
|
+
def compression_param_names(self) -> Tuple[str]:
|
45
|
+
"""
|
46
|
+
Returns a tuple of compression parameter names introduced by
|
47
|
+
the compressor during compression
|
48
|
+
"""
|
49
|
+
return (
|
50
|
+
"shape",
|
51
|
+
"compressed",
|
52
|
+
"bitmask",
|
53
|
+
)
|
48
54
|
|
49
55
|
def compress_weight(self, name, value):
|
50
56
|
bitmask_tensor = Sparse24BitMaskTensor.from_dense(
|
@@ -38,7 +38,13 @@ class BitmaskCompressor(BaseSparseCompressor):
|
|
38
38
|
values tensor, with their locations stored in a 2d bitmask
|
39
39
|
"""
|
40
40
|
|
41
|
-
|
41
|
+
@property
|
42
|
+
def compression_param_names(self) -> Tuple[str]:
|
43
|
+
"""
|
44
|
+
Returns a tuple of compression parameter names introduced by
|
45
|
+
the compressor during compression
|
46
|
+
"""
|
47
|
+
return ("shape", "compressed", "bitmask", "row_offsets")
|
42
48
|
|
43
49
|
def compress_weight(self, name, value):
|
44
50
|
bitmask_tensor = BitmaskTensor.from_dense(value)
|
@@ -42,8 +42,6 @@ class Marlin24Compressor(BaseCompressor):
|
|
42
42
|
Marlin24 kernel. Decompression is not implemented for this compressor.
|
43
43
|
"""
|
44
44
|
|
45
|
-
COMPRESSION_PARAM_NAMES = ["weight_packed", "scale_packed", "meta"]
|
46
|
-
|
47
45
|
@staticmethod
|
48
46
|
def validate_quant_compatability(
|
49
47
|
model_quant_args: Dict[str, QuantizationArgs]
|
@@ -105,6 +103,14 @@ class Marlin24Compressor(BaseCompressor):
|
|
105
103
|
|
106
104
|
return True
|
107
105
|
|
106
|
+
@property
|
107
|
+
def compression_param_names(self) -> Tuple[str]:
|
108
|
+
"""
|
109
|
+
Returns a tuple of compression parameter names introduced by
|
110
|
+
the compressor during compression
|
111
|
+
"""
|
112
|
+
return ("weight_packed", "scale_packed", "meta")
|
113
|
+
|
108
114
|
def compress(
|
109
115
|
self,
|
110
116
|
model_state: Dict[str, Tensor],
|
@@ -16,7 +16,7 @@ import json
|
|
16
16
|
import os
|
17
17
|
import re
|
18
18
|
import struct
|
19
|
-
from typing import Dict,
|
19
|
+
from typing import Dict, Iterable, Optional, Tuple, Union
|
20
20
|
|
21
21
|
from safetensors import safe_open
|
22
22
|
from torch import Tensor
|
@@ -180,7 +180,9 @@ def get_weight_mappings(path_to_model_or_tensors: str) -> Dict[str, str]:
|
|
180
180
|
|
181
181
|
|
182
182
|
def get_nested_weight_mappings(
|
183
|
-
model_path: str,
|
183
|
+
model_path: str,
|
184
|
+
params_to_nest: Iterable[str],
|
185
|
+
return_unmatched_params: bool = False,
|
184
186
|
) -> Union[NestedWeightMappingType, Tuple[NestedWeightMappingType, WeightMappingType]]:
|
185
187
|
"""
|
186
188
|
Takes a path to a state dict saved in safetensors format and returns a nested
|
@@ -211,7 +213,7 @@ def get_nested_weight_mappings(
|
|
211
213
|
|
212
214
|
:param model_path: Path to the safetensors state dict, must contain either a
|
213
215
|
single safetensors file or multiple files with an index.
|
214
|
-
:param params_to_nest:
|
216
|
+
:param params_to_nest: Iterable of parameter names to nest.
|
215
217
|
:param return_unmatched_params: If True, return a second dictionary containing
|
216
218
|
the remaining parameters that were not matched to the params_to_nest.
|
217
219
|
:return:
|
@@ -247,7 +249,7 @@ def get_nested_weight_mappings(
|
|
247
249
|
|
248
250
|
|
249
251
|
def get_nested_mappings_from_state_dict(
|
250
|
-
state_dict, params_to_nest
|
252
|
+
state_dict, params_to_nest: Iterable[str]
|
251
253
|
) -> NestedWeightMappingType:
|
252
254
|
"""
|
253
255
|
Takes a state dict and returns a nested mapping from uncompressed
|
@@ -262,7 +264,7 @@ def get_nested_mappings_from_state_dict(
|
|
262
264
|
}
|
263
265
|
|
264
266
|
:param state_dict: state dict of the model
|
265
|
-
:param params_to_nest:
|
267
|
+
:param params_to_nest: Iterable of parameter names to nest.
|
266
268
|
:return: Nested mapping of parameterized layer names to the value of
|
267
269
|
each layer's compression parameters.
|
268
270
|
"""
|
@@ -1,15 +1,34 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.9.1.
|
3
|
+
Version: 0.9.1.20250205
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
7
7
|
Author-email: support@neuralmagic.com
|
8
8
|
License: Apache 2.0
|
9
9
|
Description-Content-Type: text/markdown
|
10
|
+
License-File: LICENSE
|
11
|
+
Requires-Dist: torch>=1.7.0
|
12
|
+
Requires-Dist: transformers
|
13
|
+
Requires-Dist: pydantic>=2.0
|
10
14
|
Provides-Extra: dev
|
15
|
+
Requires-Dist: black==22.12.0; extra == "dev"
|
16
|
+
Requires-Dist: isort==5.8.0; extra == "dev"
|
17
|
+
Requires-Dist: wheel>=0.36.2; extra == "dev"
|
18
|
+
Requires-Dist: flake8>=3.8.3; extra == "dev"
|
19
|
+
Requires-Dist: pytest>=6.0.0; extra == "dev"
|
20
|
+
Requires-Dist: nbconvert>=7.16.3; extra == "dev"
|
11
21
|
Provides-Extra: accelerate
|
12
|
-
|
22
|
+
Requires-Dist: accelerate; extra == "accelerate"
|
23
|
+
Dynamic: author
|
24
|
+
Dynamic: author-email
|
25
|
+
Dynamic: description
|
26
|
+
Dynamic: description-content-type
|
27
|
+
Dynamic: home-page
|
28
|
+
Dynamic: license
|
29
|
+
Dynamic: provides-extra
|
30
|
+
Dynamic: requires-dist
|
31
|
+
Dynamic: summary
|
13
32
|
|
14
33
|
# compressed-tensors
|
15
34
|
|
@@ -54,4 +54,6 @@ src/compressed_tensors_nightly.egg-info/PKG-INFO
|
|
54
54
|
src/compressed_tensors_nightly.egg-info/SOURCES.txt
|
55
55
|
src/compressed_tensors_nightly.egg-info/dependency_links.txt
|
56
56
|
src/compressed_tensors_nightly.egg-info/requires.txt
|
57
|
-
src/compressed_tensors_nightly.egg-info/top_level.txt
|
57
|
+
src/compressed_tensors_nightly.egg-info/top_level.txt
|
58
|
+
tests/test_registry.py
|
59
|
+
tests/testing_utils.py
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import pytest
|
16
|
+
from compressed_tensors import (
|
17
|
+
BaseCompressor,
|
18
|
+
BitmaskCompressor,
|
19
|
+
BitmaskConfig,
|
20
|
+
CompressionFormat,
|
21
|
+
DenseCompressor,
|
22
|
+
DenseSparsityConfig,
|
23
|
+
SparsityCompressionConfig,
|
24
|
+
)
|
25
|
+
|
26
|
+
|
27
|
+
@pytest.mark.parametrize(
|
28
|
+
"name,type",
|
29
|
+
[
|
30
|
+
[CompressionFormat.sparse_bitmask.value, BitmaskConfig],
|
31
|
+
[CompressionFormat.dense.value, DenseSparsityConfig],
|
32
|
+
],
|
33
|
+
)
|
34
|
+
def test_configs(name, type):
|
35
|
+
config = SparsityCompressionConfig.load_from_registry(name)
|
36
|
+
assert isinstance(config, type)
|
37
|
+
assert config.format == name
|
38
|
+
|
39
|
+
|
40
|
+
@pytest.mark.parametrize(
|
41
|
+
"name,type",
|
42
|
+
[
|
43
|
+
[CompressionFormat.sparse_bitmask.value, BitmaskCompressor],
|
44
|
+
[CompressionFormat.dense.value, DenseCompressor],
|
45
|
+
],
|
46
|
+
)
|
47
|
+
def test_compressors(name, type):
|
48
|
+
compressor = BaseCompressor.load_from_registry(
|
49
|
+
name, config=SparsityCompressionConfig(format="none")
|
50
|
+
)
|
51
|
+
assert isinstance(compressor, type)
|
52
|
+
assert isinstance(compressor.config, SparsityCompressionConfig)
|
53
|
+
assert compressor.config.format == "none"
|
@@ -0,0 +1,144 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# flake8: noqa
|
15
|
+
import unittest
|
16
|
+
|
17
|
+
import pytest
|
18
|
+
|
19
|
+
|
20
|
+
def compressed_tensors_config_available():
|
21
|
+
try:
|
22
|
+
from transformers.utils.quantization_config import ( # noqa: F401
|
23
|
+
CompressedTensorsConfig,
|
24
|
+
)
|
25
|
+
|
26
|
+
return True
|
27
|
+
except ImportError:
|
28
|
+
return False
|
29
|
+
|
30
|
+
|
31
|
+
def accelerate_availabe():
|
32
|
+
try:
|
33
|
+
import accelerate # noqa: F401
|
34
|
+
|
35
|
+
return True
|
36
|
+
|
37
|
+
except ImportError:
|
38
|
+
return False
|
39
|
+
|
40
|
+
|
41
|
+
_is_compressed_tensors_config_available = compressed_tensors_config_available()
|
42
|
+
_is_accelerate_available = accelerate_availabe()
|
43
|
+
|
44
|
+
|
45
|
+
def requires_hf_quantizer():
|
46
|
+
return pytest.mark.skipif(
|
47
|
+
not _is_compressed_tensors_config_available,
|
48
|
+
reason="requires transformers>=4.45 to support CompressedTensorsHfQuantizer",
|
49
|
+
)
|
50
|
+
|
51
|
+
|
52
|
+
def requires_accelerate():
|
53
|
+
return pytest.mark.skipif(
|
54
|
+
not _is_accelerate_available,
|
55
|
+
reason="requires accelerate",
|
56
|
+
)
|
57
|
+
|
58
|
+
|
59
|
+
def get_random_mat(M, K, dtype) -> "torch.Tensor":
|
60
|
+
"""
|
61
|
+
:param M: number of rows
|
62
|
+
:param K: number of columns
|
63
|
+
:param dtype: data type of the matrix
|
64
|
+
:return: random matrix of shape (M, K) with non-zero values
|
65
|
+
"""
|
66
|
+
import torch
|
67
|
+
from compressed_tensors.quantization import FP8_DTYPE
|
68
|
+
|
69
|
+
rand_tensor_dtype = dtype
|
70
|
+
if dtype in [torch.int8, FP8_DTYPE]:
|
71
|
+
rand_tensor_dtype = torch.float16
|
72
|
+
mat = torch.rand(M, K, dtype=rand_tensor_dtype).cuda()
|
73
|
+
mat = mat.masked_fill_(mat == 0, 1)
|
74
|
+
return mat.to(dtype)
|
75
|
+
|
76
|
+
|
77
|
+
def generate_pruned_semi_structured_mat(M, K, dtype) -> "torch.Tensor":
|
78
|
+
"""
|
79
|
+
:param M: number of rows
|
80
|
+
:param K: number of columns
|
81
|
+
:param dtype: data type of the matrix
|
82
|
+
:return: random matrix of shape (M, K) with 2:4 sparsity pattern
|
83
|
+
"""
|
84
|
+
import torch
|
85
|
+
from compressed_tensors.quantization import FP8_DTYPE
|
86
|
+
|
87
|
+
mask = torch.Tensor([0, 0, 1, 1]).tile((M, K // 4)).bool()
|
88
|
+
rand_tensor_dtype = dtype
|
89
|
+
if dtype in [torch.int8, FP8_DTYPE]:
|
90
|
+
rand_tensor_dtype = torch.float16
|
91
|
+
mat = torch.rand(M, K, dtype=rand_tensor_dtype)
|
92
|
+
mat = mat.masked_fill_(mat == 0, 1)
|
93
|
+
if dtype == FP8_DTYPE:
|
94
|
+
# some float8_e4m3fn operations are not supported on CPU
|
95
|
+
mat = mat.cuda()
|
96
|
+
mask = mask.cuda()
|
97
|
+
mat = mat * mask
|
98
|
+
return mat.to(dtype)
|
99
|
+
|
100
|
+
|
101
|
+
def induce_sparsity(tensor, sparsity_ratio) -> "torch.Tensor":
|
102
|
+
"""
|
103
|
+
Makes a tensor sparse by zeroing out a given fraction
|
104
|
+
of its smallest absolute values.
|
105
|
+
|
106
|
+
:param: weight_tensor (torch.Tensor): The input weight tensor.
|
107
|
+
:param: sparsity_ratio (float): Fraction of weights to be zeroed
|
108
|
+
(0 <= sparsity_ratio <= 1).
|
109
|
+
:returns: torch.Tensor: Sparse version of the input tensor.
|
110
|
+
"""
|
111
|
+
import torch
|
112
|
+
|
113
|
+
if not (0 <= sparsity_ratio <= 1):
|
114
|
+
raise ValueError("Sparsity ratio must be between 0 and 1.")
|
115
|
+
|
116
|
+
# Flatten the tensor and compute the threshold for sparsity
|
117
|
+
flattened = tensor.view(-1)
|
118
|
+
k = int(sparsity_ratio * flattened.numel())
|
119
|
+
|
120
|
+
if k > 0:
|
121
|
+
threshold = torch.topk(flattened.abs(), k, largest=False).values.max()
|
122
|
+
sparse_tensor = torch.where(
|
123
|
+
tensor.abs() > threshold, tensor, torch.zeros_like(tensor)
|
124
|
+
)
|
125
|
+
else:
|
126
|
+
sparse_tensor = tensor
|
127
|
+
|
128
|
+
return sparse_tensor
|
129
|
+
|
130
|
+
|
131
|
+
def is_gpu_available():
|
132
|
+
"""
|
133
|
+
:return: True if a GPU is available, False otherwise
|
134
|
+
"""
|
135
|
+
try:
|
136
|
+
import torch # noqa: F401
|
137
|
+
|
138
|
+
return torch.cuda.device_count() > 0
|
139
|
+
except ImportError:
|
140
|
+
return False
|
141
|
+
|
142
|
+
|
143
|
+
def requires_gpu(test_case):
|
144
|
+
return unittest.skipUnless(is_gpu_available(), "test requires GPU")(test_case)
|
{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/LICENSE
RENAMED
File without changes
|
{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/README.md
RENAMED
File without changes
|
File without changes
|
{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/setup.cfg
RENAMED
File without changes
|
{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/setup.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|