compressed-tensors-nightly 0.9.1.20250204__tar.gz → 0.9.1.20250205__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {compressed-tensors-nightly-0.9.1.20250204/src/compressed_tensors_nightly.egg-info → compressed_tensors_nightly-0.9.1.20250205}/PKG-INFO +22 -3
  2. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/base.py +9 -0
  3. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/quantized_compressors/base.py +2 -2
  4. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +12 -6
  5. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +13 -7
  6. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/base.py +2 -3
  7. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/dense.py +8 -0
  8. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +11 -5
  9. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +7 -1
  10. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +8 -2
  11. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/safetensors_load.py +7 -5
  12. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205/src/compressed_tensors_nightly.egg-info}/PKG-INFO +22 -3
  13. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors_nightly.egg-info/SOURCES.txt +3 -1
  14. compressed_tensors_nightly-0.9.1.20250205/tests/test_registry.py +53 -0
  15. compressed_tensors_nightly-0.9.1.20250205/tests/testing_utils.py +144 -0
  16. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/LICENSE +0 -0
  17. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/README.md +0 -0
  18. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/pyproject.toml +0 -0
  19. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/setup.cfg +0 -0
  20. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/setup.py +0 -0
  21. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/__init__.py +0 -0
  22. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/base.py +0 -0
  23. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/__init__.py +0 -0
  24. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/helpers.py +0 -0
  25. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  26. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  27. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  28. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  29. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  30. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/config/__init__.py +0 -0
  31. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/config/base.py +0 -0
  32. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/config/dense.py +0 -0
  33. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  34. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  35. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/linear/__init__.py +0 -0
  36. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  37. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/__init__.py +0 -0
  38. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  39. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  40. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  41. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  42. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  43. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  44. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/quant_args.py +0 -0
  45. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/quant_config.py +0 -0
  46. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  47. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  48. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  49. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/registry/__init__.py +0 -0
  50. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/registry/registry.py +0 -0
  51. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/__init__.py +0 -0
  52. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/helpers.py +0 -0
  53. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/offload.py +0 -0
  54. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/permutations_24.py +0 -0
  55. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/permute.py +0 -0
  56. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  57. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/version.py +0 -0
  58. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors_nightly.egg-info/dependency_links.txt +0 -0
  59. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors_nightly.egg-info/requires.txt +0 -0
  60. {compressed-tensors-nightly-0.9.1.20250204 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors_nightly.egg-info/top_level.txt +0 -0
@@ -1,15 +1,34 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.9.1.20250204
3
+ Version: 0.9.1.20250205
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
7
7
  Author-email: support@neuralmagic.com
8
8
  License: Apache 2.0
9
9
  Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: torch>=1.7.0
12
+ Requires-Dist: transformers
13
+ Requires-Dist: pydantic>=2.0
10
14
  Provides-Extra: dev
15
+ Requires-Dist: black==22.12.0; extra == "dev"
16
+ Requires-Dist: isort==5.8.0; extra == "dev"
17
+ Requires-Dist: wheel>=0.36.2; extra == "dev"
18
+ Requires-Dist: flake8>=3.8.3; extra == "dev"
19
+ Requires-Dist: pytest>=6.0.0; extra == "dev"
20
+ Requires-Dist: nbconvert>=7.16.3; extra == "dev"
11
21
  Provides-Extra: accelerate
12
- License-File: LICENSE
22
+ Requires-Dist: accelerate; extra == "accelerate"
23
+ Dynamic: author
24
+ Dynamic: author-email
25
+ Dynamic: description
26
+ Dynamic: description-content-type
27
+ Dynamic: home-page
28
+ Dynamic: license
29
+ Dynamic: provides-extra
30
+ Dynamic: requires-dist
31
+ Dynamic: summary
13
32
 
14
33
  # compressed-tensors
15
34
 
@@ -77,6 +77,15 @@ class BaseCompressor(RegistryMixin, ABC):
77
77
  """
78
78
  raise NotImplementedError()
79
79
 
80
+ @property
81
+ @abstractmethod
82
+ def compression_param_names(self) -> Tuple[str]:
83
+ """
84
+ Returns a tuple of compression parameter names introduced by
85
+ the compressor during compression
86
+ """
87
+ raise NotImplementedError()
88
+
80
89
  @abstractmethod
81
90
  def compress(
82
91
  self,
@@ -144,7 +144,7 @@ class BaseQuantizationCompressor(BaseCompressor):
144
144
 
145
145
  def _decompress_from_path(self, path_to_model, names_to_scheme, device):
146
146
  weight_mappings = get_nested_weight_mappings(
147
- path_to_model, self.COMPRESSION_PARAM_NAMES
147
+ path_to_model, self.compression_param_names
148
148
  )
149
149
  for weight_name in weight_mappings.keys():
150
150
  weight_data = {}
@@ -161,7 +161,7 @@ class BaseQuantizationCompressor(BaseCompressor):
161
161
 
162
162
  def _decompress_from_state_dict(self, state_dict, names_to_scheme):
163
163
  weight_mappings = get_nested_mappings_from_state_dict(
164
- state_dict, self.COMPRESSION_PARAM_NAMES
164
+ state_dict, self.compression_param_names
165
165
  )
166
166
  for weight_name in weight_mappings.keys():
167
167
  weight_data = {}
@@ -41,12 +41,18 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
41
41
  type to the type specified by the layer's QuantizationArgs.
42
42
  """
43
43
 
44
- COMPRESSION_PARAM_NAMES = [
45
- "weight",
46
- "weight_scale",
47
- "weight_zero_point",
48
- "weight_g_idx",
49
- ]
44
+ @property
45
+ def compression_param_names(self) -> Tuple[str]:
46
+ """
47
+ Returns a tuple of compression parameter names introduced by
48
+ the compressor during compression
49
+ """
50
+ return (
51
+ "weight",
52
+ "weight_scale",
53
+ "weight_zero_point",
54
+ "weight_g_idx",
55
+ )
50
56
 
51
57
  def compression_param_info(
52
58
  self,
@@ -36,13 +36,19 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
36
36
  Compresses a quantized model by packing every eight 4-bit weights into an int32
37
37
  """
38
38
 
39
- COMPRESSION_PARAM_NAMES = [
40
- "weight_packed",
41
- "weight_scale",
42
- "weight_zero_point",
43
- "weight_g_idx",
44
- "weight_shape",
45
- ]
39
+ @property
40
+ def compression_param_names(self) -> Tuple[str]:
41
+ """
42
+ Returns a tuple of compression parameter names introduced by
43
+ the compressor during compression
44
+ """
45
+ return (
46
+ "weight_packed",
47
+ "weight_scale",
48
+ "weight_zero_point",
49
+ "weight_g_idx",
50
+ "weight_shape",
51
+ )
46
52
 
47
53
  def compression_param_info(
48
54
  self,
@@ -30,8 +30,7 @@ _LOGGER: logging.Logger = logging.getLogger(__name__)
30
30
  class BaseSparseCompressor(BaseCompressor):
31
31
  """
32
32
  Base class representing a sparse compression algorithm. Each child class should
33
- implement compression_param_info, compress_weight and decompress_weight; child
34
- classes should also define COMPRESSION_PARAM_NAMES.
33
+ implement compression_param_names, compress_weight and decompress_weight;
35
34
 
36
35
  Compressors support compressing/decompressing a full module state dict or a single
37
36
  quantized PyTorch leaf module.
@@ -113,7 +112,7 @@ class BaseSparseCompressor(BaseCompressor):
113
112
  """
114
113
  weight_mappings, ignored_params = get_nested_weight_mappings(
115
114
  path_to_model_or_tensors,
116
- self.COMPRESSION_PARAM_NAMES,
115
+ self.compression_param_names,
117
116
  return_unmatched_params=True,
118
117
  )
119
118
  for weight_name in weight_mappings.keys():
@@ -25,6 +25,14 @@ class DenseCompressor(BaseCompressor):
25
25
  Identity compressor for dense models, returns the original state_dict
26
26
  """
27
27
 
28
+ @property
29
+ def compression_param_names(self) -> Tuple[str]:
30
+ """
31
+ Returns a tuple of compression parameter names introduced by
32
+ the compressor during compression
33
+ """
34
+ return ()
35
+
28
36
  def compress(self, model_state: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]:
29
37
  return model_state
30
38
 
@@ -40,11 +40,17 @@ class Sparse24BitMaskCompressor(BaseSparseCompressor):
40
40
  values tensor, with their locations stored in a 2d bitmask
41
41
  """
42
42
 
43
- COMPRESSION_PARAM_NAMES = [
44
- "shape",
45
- "compressed",
46
- "bitmask",
47
- ]
43
+ @property
44
+ def compression_param_names(self) -> Tuple[str]:
45
+ """
46
+ Returns a tuple of compression parameter names introduced by
47
+ the compressor during compression
48
+ """
49
+ return (
50
+ "shape",
51
+ "compressed",
52
+ "bitmask",
53
+ )
48
54
 
49
55
  def compress_weight(self, name, value):
50
56
  bitmask_tensor = Sparse24BitMaskTensor.from_dense(
@@ -38,7 +38,13 @@ class BitmaskCompressor(BaseSparseCompressor):
38
38
  values tensor, with their locations stored in a 2d bitmask
39
39
  """
40
40
 
41
- COMPRESSION_PARAM_NAMES = ["shape", "compressed", "bitmask", "row_offsets"]
41
+ @property
42
+ def compression_param_names(self) -> Tuple[str]:
43
+ """
44
+ Returns a tuple of compression parameter names introduced by
45
+ the compressor during compression
46
+ """
47
+ return ("shape", "compressed", "bitmask", "row_offsets")
42
48
 
43
49
  def compress_weight(self, name, value):
44
50
  bitmask_tensor = BitmaskTensor.from_dense(value)
@@ -42,8 +42,6 @@ class Marlin24Compressor(BaseCompressor):
42
42
  Marlin24 kernel. Decompression is not implemented for this compressor.
43
43
  """
44
44
 
45
- COMPRESSION_PARAM_NAMES = ["weight_packed", "scale_packed", "meta"]
46
-
47
45
  @staticmethod
48
46
  def validate_quant_compatability(
49
47
  model_quant_args: Dict[str, QuantizationArgs]
@@ -105,6 +103,14 @@ class Marlin24Compressor(BaseCompressor):
105
103
 
106
104
  return True
107
105
 
106
+ @property
107
+ def compression_param_names(self) -> Tuple[str]:
108
+ """
109
+ Returns a tuple of compression parameter names introduced by
110
+ the compressor during compression
111
+ """
112
+ return ("weight_packed", "scale_packed", "meta")
113
+
108
114
  def compress(
109
115
  self,
110
116
  model_state: Dict[str, Tensor],
@@ -16,7 +16,7 @@ import json
16
16
  import os
17
17
  import re
18
18
  import struct
19
- from typing import Dict, List, Optional, Tuple, Union
19
+ from typing import Dict, Iterable, Optional, Tuple, Union
20
20
 
21
21
  from safetensors import safe_open
22
22
  from torch import Tensor
@@ -180,7 +180,9 @@ def get_weight_mappings(path_to_model_or_tensors: str) -> Dict[str, str]:
180
180
 
181
181
 
182
182
  def get_nested_weight_mappings(
183
- model_path: str, params_to_nest: List[str], return_unmatched_params: bool = False
183
+ model_path: str,
184
+ params_to_nest: Iterable[str],
185
+ return_unmatched_params: bool = False,
184
186
  ) -> Union[NestedWeightMappingType, Tuple[NestedWeightMappingType, WeightMappingType]]:
185
187
  """
186
188
  Takes a path to a state dict saved in safetensors format and returns a nested
@@ -211,7 +213,7 @@ def get_nested_weight_mappings(
211
213
 
212
214
  :param model_path: Path to the safetensors state dict, must contain either a
213
215
  single safetensors file or multiple files with an index.
214
- :param params_to_nest: List of parameter names to nest.
216
+ :param params_to_nest: Iterable of parameter names to nest.
215
217
  :param return_unmatched_params: If True, return a second dictionary containing
216
218
  the remaining parameters that were not matched to the params_to_nest.
217
219
  :return:
@@ -247,7 +249,7 @@ def get_nested_weight_mappings(
247
249
 
248
250
 
249
251
  def get_nested_mappings_from_state_dict(
250
- state_dict, params_to_nest
252
+ state_dict, params_to_nest: Iterable[str]
251
253
  ) -> NestedWeightMappingType:
252
254
  """
253
255
  Takes a state dict and returns a nested mapping from uncompressed
@@ -262,7 +264,7 @@ def get_nested_mappings_from_state_dict(
262
264
  }
263
265
 
264
266
  :param state_dict: state dict of the model
265
- :param params_to_nest: List of parameter names to nest.
267
+ :param params_to_nest: Iterable of parameter names to nest.
266
268
  :return: Nested mapping of parameterized layer names to the value of
267
269
  each layer's compression parameters.
268
270
  """
@@ -1,15 +1,34 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.9.1.20250204
3
+ Version: 0.9.1.20250205
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
7
7
  Author-email: support@neuralmagic.com
8
8
  License: Apache 2.0
9
9
  Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: torch>=1.7.0
12
+ Requires-Dist: transformers
13
+ Requires-Dist: pydantic>=2.0
10
14
  Provides-Extra: dev
15
+ Requires-Dist: black==22.12.0; extra == "dev"
16
+ Requires-Dist: isort==5.8.0; extra == "dev"
17
+ Requires-Dist: wheel>=0.36.2; extra == "dev"
18
+ Requires-Dist: flake8>=3.8.3; extra == "dev"
19
+ Requires-Dist: pytest>=6.0.0; extra == "dev"
20
+ Requires-Dist: nbconvert>=7.16.3; extra == "dev"
11
21
  Provides-Extra: accelerate
12
- License-File: LICENSE
22
+ Requires-Dist: accelerate; extra == "accelerate"
23
+ Dynamic: author
24
+ Dynamic: author-email
25
+ Dynamic: description
26
+ Dynamic: description-content-type
27
+ Dynamic: home-page
28
+ Dynamic: license
29
+ Dynamic: provides-extra
30
+ Dynamic: requires-dist
31
+ Dynamic: summary
13
32
 
14
33
  # compressed-tensors
15
34
 
@@ -54,4 +54,6 @@ src/compressed_tensors_nightly.egg-info/PKG-INFO
54
54
  src/compressed_tensors_nightly.egg-info/SOURCES.txt
55
55
  src/compressed_tensors_nightly.egg-info/dependency_links.txt
56
56
  src/compressed_tensors_nightly.egg-info/requires.txt
57
- src/compressed_tensors_nightly.egg-info/top_level.txt
57
+ src/compressed_tensors_nightly.egg-info/top_level.txt
58
+ tests/test_registry.py
59
+ tests/testing_utils.py
@@ -0,0 +1,53 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pytest
16
+ from compressed_tensors import (
17
+ BaseCompressor,
18
+ BitmaskCompressor,
19
+ BitmaskConfig,
20
+ CompressionFormat,
21
+ DenseCompressor,
22
+ DenseSparsityConfig,
23
+ SparsityCompressionConfig,
24
+ )
25
+
26
+
27
+ @pytest.mark.parametrize(
28
+ "name,type",
29
+ [
30
+ [CompressionFormat.sparse_bitmask.value, BitmaskConfig],
31
+ [CompressionFormat.dense.value, DenseSparsityConfig],
32
+ ],
33
+ )
34
+ def test_configs(name, type):
35
+ config = SparsityCompressionConfig.load_from_registry(name)
36
+ assert isinstance(config, type)
37
+ assert config.format == name
38
+
39
+
40
+ @pytest.mark.parametrize(
41
+ "name,type",
42
+ [
43
+ [CompressionFormat.sparse_bitmask.value, BitmaskCompressor],
44
+ [CompressionFormat.dense.value, DenseCompressor],
45
+ ],
46
+ )
47
+ def test_compressors(name, type):
48
+ compressor = BaseCompressor.load_from_registry(
49
+ name, config=SparsityCompressionConfig(format="none")
50
+ )
51
+ assert isinstance(compressor, type)
52
+ assert isinstance(compressor.config, SparsityCompressionConfig)
53
+ assert compressor.config.format == "none"
@@ -0,0 +1,144 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # flake8: noqa
15
+ import unittest
16
+
17
+ import pytest
18
+
19
+
20
+ def compressed_tensors_config_available():
21
+ try:
22
+ from transformers.utils.quantization_config import ( # noqa: F401
23
+ CompressedTensorsConfig,
24
+ )
25
+
26
+ return True
27
+ except ImportError:
28
+ return False
29
+
30
+
31
+ def accelerate_availabe():
32
+ try:
33
+ import accelerate # noqa: F401
34
+
35
+ return True
36
+
37
+ except ImportError:
38
+ return False
39
+
40
+
41
+ _is_compressed_tensors_config_available = compressed_tensors_config_available()
42
+ _is_accelerate_available = accelerate_availabe()
43
+
44
+
45
+ def requires_hf_quantizer():
46
+ return pytest.mark.skipif(
47
+ not _is_compressed_tensors_config_available,
48
+ reason="requires transformers>=4.45 to support CompressedTensorsHfQuantizer",
49
+ )
50
+
51
+
52
+ def requires_accelerate():
53
+ return pytest.mark.skipif(
54
+ not _is_accelerate_available,
55
+ reason="requires accelerate",
56
+ )
57
+
58
+
59
+ def get_random_mat(M, K, dtype) -> "torch.Tensor":
60
+ """
61
+ :param M: number of rows
62
+ :param K: number of columns
63
+ :param dtype: data type of the matrix
64
+ :return: random matrix of shape (M, K) with non-zero values
65
+ """
66
+ import torch
67
+ from compressed_tensors.quantization import FP8_DTYPE
68
+
69
+ rand_tensor_dtype = dtype
70
+ if dtype in [torch.int8, FP8_DTYPE]:
71
+ rand_tensor_dtype = torch.float16
72
+ mat = torch.rand(M, K, dtype=rand_tensor_dtype).cuda()
73
+ mat = mat.masked_fill_(mat == 0, 1)
74
+ return mat.to(dtype)
75
+
76
+
77
+ def generate_pruned_semi_structured_mat(M, K, dtype) -> "torch.Tensor":
78
+ """
79
+ :param M: number of rows
80
+ :param K: number of columns
81
+ :param dtype: data type of the matrix
82
+ :return: random matrix of shape (M, K) with 2:4 sparsity pattern
83
+ """
84
+ import torch
85
+ from compressed_tensors.quantization import FP8_DTYPE
86
+
87
+ mask = torch.Tensor([0, 0, 1, 1]).tile((M, K // 4)).bool()
88
+ rand_tensor_dtype = dtype
89
+ if dtype in [torch.int8, FP8_DTYPE]:
90
+ rand_tensor_dtype = torch.float16
91
+ mat = torch.rand(M, K, dtype=rand_tensor_dtype)
92
+ mat = mat.masked_fill_(mat == 0, 1)
93
+ if dtype == FP8_DTYPE:
94
+ # some float8_e4m3fn operations are not supported on CPU
95
+ mat = mat.cuda()
96
+ mask = mask.cuda()
97
+ mat = mat * mask
98
+ return mat.to(dtype)
99
+
100
+
101
+ def induce_sparsity(tensor, sparsity_ratio) -> "torch.Tensor":
102
+ """
103
+ Makes a tensor sparse by zeroing out a given fraction
104
+ of its smallest absolute values.
105
+
106
+ :param: weight_tensor (torch.Tensor): The input weight tensor.
107
+ :param: sparsity_ratio (float): Fraction of weights to be zeroed
108
+ (0 <= sparsity_ratio <= 1).
109
+ :returns: torch.Tensor: Sparse version of the input tensor.
110
+ """
111
+ import torch
112
+
113
+ if not (0 <= sparsity_ratio <= 1):
114
+ raise ValueError("Sparsity ratio must be between 0 and 1.")
115
+
116
+ # Flatten the tensor and compute the threshold for sparsity
117
+ flattened = tensor.view(-1)
118
+ k = int(sparsity_ratio * flattened.numel())
119
+
120
+ if k > 0:
121
+ threshold = torch.topk(flattened.abs(), k, largest=False).values.max()
122
+ sparse_tensor = torch.where(
123
+ tensor.abs() > threshold, tensor, torch.zeros_like(tensor)
124
+ )
125
+ else:
126
+ sparse_tensor = tensor
127
+
128
+ return sparse_tensor
129
+
130
+
131
+ def is_gpu_available():
132
+ """
133
+ :return: True if a GPU is available, False otherwise
134
+ """
135
+ try:
136
+ import torch # noqa: F401
137
+
138
+ return torch.cuda.device_count() > 0
139
+ except ImportError:
140
+ return False
141
+
142
+
143
+ def requires_gpu(test_case):
144
+ return unittest.skipUnless(is_gpu_available(), "test requires GPU")(test_case)