compressed-tensors 0.9.0__tar.gz → 0.9.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/PKG-INFO +22 -3
  2. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/base.py +9 -0
  3. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +109 -4
  4. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/quantized_compressors/base.py +2 -2
  5. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +12 -6
  6. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +36 -13
  7. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/sparse_compressors/base.py +2 -3
  8. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/sparse_compressors/dense.py +8 -0
  9. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +15 -7
  10. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +7 -1
  11. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +8 -2
  12. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/quantization/lifecycle/apply.py +17 -12
  13. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/quantization/quant_args.py +8 -9
  14. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/utils/helpers.py +8 -4
  15. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/utils/offload.py +7 -1
  16. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/utils/safetensors_load.py +7 -5
  17. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/version.py +1 -1
  18. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors.egg-info/PKG-INFO +22 -3
  19. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors.egg-info/SOURCES.txt +3 -1
  20. compressed_tensors-0.9.2/tests/test_registry.py +53 -0
  21. compressed_tensors-0.9.2/tests/testing_utils.py +144 -0
  22. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/LICENSE +0 -0
  23. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/README.md +0 -0
  24. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/pyproject.toml +0 -0
  25. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/setup.cfg +0 -0
  26. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/setup.py +0 -0
  27. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/__init__.py +0 -0
  28. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/base.py +0 -0
  29. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/__init__.py +0 -0
  30. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/helpers.py +0 -0
  31. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  32. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  33. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  34. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  35. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/config/__init__.py +0 -0
  36. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/config/base.py +0 -0
  37. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/config/dense.py +0 -0
  38. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  39. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  40. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/linear/__init__.py +0 -0
  41. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  42. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/quantization/__init__.py +0 -0
  43. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  44. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  45. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  46. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  47. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  48. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/quantization/quant_config.py +0 -0
  49. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  50. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  51. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  52. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/registry/__init__.py +0 -0
  53. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/registry/registry.py +0 -0
  54. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/utils/__init__.py +0 -0
  55. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/utils/permutations_24.py +0 -0
  56. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/utils/permute.py +0 -0
  57. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  58. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  59. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors.egg-info/requires.txt +0 -0
  60. {compressed-tensors-0.9.0 → compressed_tensors-0.9.2}/src/compressed_tensors.egg-info/top_level.txt +0 -0
@@ -1,15 +1,34 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: compressed-tensors
3
- Version: 0.9.0
3
+ Version: 0.9.2
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
7
7
  Author-email: support@neuralmagic.com
8
8
  License: Apache 2.0
9
9
  Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: torch>=1.7.0
12
+ Requires-Dist: transformers
13
+ Requires-Dist: pydantic>=2.0
10
14
  Provides-Extra: dev
15
+ Requires-Dist: black==22.12.0; extra == "dev"
16
+ Requires-Dist: isort==5.8.0; extra == "dev"
17
+ Requires-Dist: wheel>=0.36.2; extra == "dev"
18
+ Requires-Dist: flake8>=3.8.3; extra == "dev"
19
+ Requires-Dist: pytest>=6.0.0; extra == "dev"
20
+ Requires-Dist: nbconvert>=7.16.3; extra == "dev"
11
21
  Provides-Extra: accelerate
12
- License-File: LICENSE
22
+ Requires-Dist: accelerate; extra == "accelerate"
23
+ Dynamic: author
24
+ Dynamic: author-email
25
+ Dynamic: description
26
+ Dynamic: description-content-type
27
+ Dynamic: home-page
28
+ Dynamic: license
29
+ Dynamic: provides-extra
30
+ Dynamic: requires-dist
31
+ Dynamic: summary
13
32
 
14
33
  # compressed-tensors
15
34
 
@@ -77,6 +77,15 @@ class BaseCompressor(RegistryMixin, ABC):
77
77
  """
78
78
  raise NotImplementedError()
79
79
 
80
+ @property
81
+ @abstractmethod
82
+ def compression_param_names(self) -> Tuple[str]:
83
+ """
84
+ Returns a tuple of compression parameter names introduced by
85
+ the compressor during compression
86
+ """
87
+ raise NotImplementedError()
88
+
80
89
  @abstractmethod
81
90
  def compress(
82
91
  self,
@@ -19,7 +19,7 @@ import os
19
19
  import re
20
20
  from contextlib import contextmanager
21
21
  from copy import deepcopy
22
- from typing import TYPE_CHECKING, Any, Dict, Optional, Set, TypeVar, Union
22
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, TypeVar, Union
23
23
 
24
24
  import compressed_tensors
25
25
  import torch
@@ -39,13 +39,17 @@ from compressed_tensors.quantization import (
39
39
  apply_quantization_config,
40
40
  load_pretrained_quantization,
41
41
  )
42
- from compressed_tensors.quantization.lifecycle import expand_sparse_target_names
42
+ from compressed_tensors.quantization.lifecycle import expand_target_names
43
43
  from compressed_tensors.quantization.quant_args import QuantizationArgs
44
44
  from compressed_tensors.quantization.utils import (
45
45
  is_module_quantized,
46
46
  iter_named_leaf_modules,
47
47
  )
48
- from compressed_tensors.utils import get_safetensors_folder, update_parameter_data
48
+ from compressed_tensors.utils import (
49
+ get_safetensors_folder,
50
+ merge_names,
51
+ update_parameter_data,
52
+ )
49
53
  from compressed_tensors.utils.helpers import (
50
54
  fix_fsdp_module_name,
51
55
  is_compressed_tensors_config,
@@ -254,6 +258,107 @@ class ModelCompressor:
254
258
  quantization_config.format, config=quantization_config
255
259
  )
256
260
 
261
+ def get_missing_module_keys(self, model: Module) -> List[str]:
262
+ """
263
+ Identifies the expected missing weight keys in the compressed state_dict.
264
+
265
+ When a model undergoes sparsity or quantization compression, certain
266
+ weight tensors may be absent from the checkpoint by virtue of compression.
267
+ This function determines which weight keys are missing based on the
268
+ applied compression techniques.
269
+
270
+
271
+ :param model: The PyTorch model to check for missing keys.
272
+ :return: A list of missing keys expected in the compressed state_dict.
273
+ """
274
+ missing_keys = set()
275
+
276
+ # Determine missing keys due to sparsity compression
277
+ if (
278
+ self.sparsity_compressor
279
+ and self.sparsity_config.format != CompressionFormat.dense.value
280
+ ):
281
+ sparse_targets = expand_target_names(
282
+ model=model,
283
+ targets=self.sparsity_config.targets,
284
+ ignore=self.sparsity_config.ignore,
285
+ )
286
+ missing_keys.update(
287
+ merge_names(target, "weight") for target in sparse_targets
288
+ )
289
+
290
+ # Determine missing keys due to pack quantization
291
+ if (
292
+ self.quantization_compressor
293
+ and self.quantization_config.format
294
+ == CompressionFormat.pack_quantized.value
295
+ ):
296
+ for scheme in self.quantization_config.config_groups.values():
297
+ quant_targets = expand_target_names(
298
+ model=model,
299
+ targets=scheme.targets,
300
+ ignore=self.quantization_config.ignore,
301
+ )
302
+ missing_keys.update(
303
+ merge_names(target, "weight") for target in quant_targets
304
+ )
305
+
306
+ return list(missing_keys)
307
+
308
+ def get_unexpected_file_keys(self, model: Module) -> List[str]:
309
+ """
310
+ Identifies extra keys introduced by the compression process in the
311
+ compressed state_dict that are not expected by the model graph.
312
+
313
+ During sparsity or quantization compression, additional metadata or
314
+ auxiliary parameters may be stored in the checkpoint, which do not
315
+ correspond to any parameter in the original model. These keys are
316
+ typically introduced to support the reconstruction of compressed weights.
317
+
318
+ For example, Sparse24Bitmask compression may introduce keys such as
319
+ 'compressed', 'bitmask', and 'shape' in the checkpoint, which are
320
+ not part of the original model parameters.
321
+
322
+ :param model: The PyTorch model to check for unexpected keys.
323
+ :return: A list of extra keys introduced by the compression process
324
+ that are not expected by the model.
325
+ """
326
+
327
+ unexpected_keys = set()
328
+
329
+ # Identify unexpected keys from sparsity compression
330
+ if (
331
+ self.sparsity_compressor
332
+ and self.sparsity_config.format != CompressionFormat.dense.value
333
+ ):
334
+ sparse_targets: Set[str] = expand_target_names(
335
+ model=model,
336
+ targets=self.sparsity_config.targets,
337
+ ignore=self.sparsity_config.ignore,
338
+ )
339
+ unexpected_keys.update(
340
+ merge_names(target, param)
341
+ for target in sparse_targets
342
+ for param in self.sparsity_compressor.compression_param_names
343
+ )
344
+
345
+ # Identify unexpected keys from quantization compression
346
+ if self.quantization_compressor:
347
+ for scheme in self.quantization_config.config_groups.values():
348
+ quant_targets: Set[str] = expand_target_names(
349
+ model=model,
350
+ targets=scheme.targets,
351
+ ignore=self.quantization_config.ignore,
352
+ )
353
+ unexpected_keys.update(
354
+ merge_names(target, param)
355
+ for target in quant_targets
356
+ for param in self.quantization_compressor.compression_param_names
357
+ if param != "weight"
358
+ )
359
+
360
+ return list(unexpected_keys)
361
+
257
362
  def compress(
258
363
  self, model: Module, state_dict: Optional[Dict[str, Tensor]] = None
259
364
  ) -> Dict[str, Tensor]:
@@ -283,7 +388,7 @@ class ModelCompressor:
283
388
  )
284
389
 
285
390
  if self.sparsity_compressor is not None:
286
- sparse_compression_targets: Set[str] = expand_sparse_target_names(
391
+ sparse_compression_targets: Set[str] = expand_target_names(
287
392
  model=model,
288
393
  targets=self.sparsity_config.targets,
289
394
  ignore=self.sparsity_config.ignore,
@@ -144,7 +144,7 @@ class BaseQuantizationCompressor(BaseCompressor):
144
144
 
145
145
  def _decompress_from_path(self, path_to_model, names_to_scheme, device):
146
146
  weight_mappings = get_nested_weight_mappings(
147
- path_to_model, self.COMPRESSION_PARAM_NAMES
147
+ path_to_model, self.compression_param_names
148
148
  )
149
149
  for weight_name in weight_mappings.keys():
150
150
  weight_data = {}
@@ -161,7 +161,7 @@ class BaseQuantizationCompressor(BaseCompressor):
161
161
 
162
162
  def _decompress_from_state_dict(self, state_dict, names_to_scheme):
163
163
  weight_mappings = get_nested_mappings_from_state_dict(
164
- state_dict, self.COMPRESSION_PARAM_NAMES
164
+ state_dict, self.compression_param_names
165
165
  )
166
166
  for weight_name in weight_mappings.keys():
167
167
  weight_data = {}
@@ -41,12 +41,18 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
41
41
  type to the type specified by the layer's QuantizationArgs.
42
42
  """
43
43
 
44
- COMPRESSION_PARAM_NAMES = [
45
- "weight",
46
- "weight_scale",
47
- "weight_zero_point",
48
- "weight_g_idx",
49
- ]
44
+ @property
45
+ def compression_param_names(self) -> Tuple[str]:
46
+ """
47
+ Returns a tuple of compression parameter names introduced by
48
+ the compressor during compression
49
+ """
50
+ return (
51
+ "weight",
52
+ "weight_scale",
53
+ "weight_zero_point",
54
+ "weight_g_idx",
55
+ )
50
56
 
51
57
  def compression_param_info(
52
58
  self,
@@ -36,13 +36,19 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
36
36
  Compresses a quantized model by packing every eight 4-bit weights into an int32
37
37
  """
38
38
 
39
- COMPRESSION_PARAM_NAMES = [
40
- "weight_packed",
41
- "weight_scale",
42
- "weight_zero_point",
43
- "weight_g_idx",
44
- "weight_shape",
45
- ]
39
+ @property
40
+ def compression_param_names(self) -> Tuple[str]:
41
+ """
42
+ Returns a tuple of compression parameter names introduced by
43
+ the compressor during compression
44
+ """
45
+ return (
46
+ "weight_packed",
47
+ "weight_scale",
48
+ "weight_zero_point",
49
+ "weight_g_idx",
50
+ "weight_shape",
51
+ )
46
52
 
47
53
  def compression_param_info(
48
54
  self,
@@ -138,8 +144,20 @@ def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
138
144
  """
139
145
  Packs a tensor of quantized weights stored in int8 into int32s with padding
140
146
 
147
+ Pseudocode:
148
+ 1. Shift wrt num_bits to convert to unsigned. num_bits=8
149
+ [1,2] -> [129, 130]
150
+ 2. Pad to fill in 32 bits
151
+ [129, 130] -> [129, 130, 0, 0]
152
+ 3. convert to binary align in order
153
+ [129, 130, 0, 0] -> 00000000 00000000 10000010 10000001
154
+ 4. convert aligned binary to number
155
+ 00000000000000001000001010000001 -> 33409
156
+ 5. covert back to uint32
157
+ 33409 -> 33409
158
+
141
159
  :param value: tensor to pack
142
- :param num_bits: number of bits used to store underlying data
160
+ :param num_bits: number of bits used to store underlying data, must be at least 1
143
161
  :returns: packed int32 tensor
144
162
  """
145
163
  if value.dtype is not torch.int8:
@@ -148,19 +166,22 @@ def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
148
166
  if num_bits > 8:
149
167
  raise ValueError("Packing is only supported for less than 8 bits")
150
168
 
169
+ if num_bits < 1:
170
+ raise ValueError(f"num_bits must be at least 1, got {num_bits}")
171
+
151
172
  # convert to unsigned for packing
152
- offset = pow(2, num_bits) // 2
173
+ offset = 1 << (num_bits - 1)
153
174
  value = (value + offset).to(torch.uint8)
154
175
  value = value.cpu().numpy().astype(np.uint32)
155
176
  pack_factor = 32 // num_bits
156
177
 
157
178
  # pad input tensor and initialize packed output
158
179
  packed_size = math.ceil(value.shape[1] / pack_factor)
159
- packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
160
- padding = packed.shape[1] * pack_factor - value.shape[1]
180
+ padding = packed_size * pack_factor - value.shape[1]
161
181
  value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
162
182
 
163
183
  # pack values
184
+ packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
164
185
  for i in range(pack_factor):
165
186
  packed |= value[:, i::pack_factor] << num_bits * i
166
187
 
@@ -174,7 +195,9 @@ def unpack_from_int32(
174
195
  ) -> torch.Tensor:
175
196
  """
176
197
  Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
177
- original their bit range
198
+ original bit range.
199
+
200
+ Return tensors in int8
178
201
 
179
202
  :param value: tensor to upack
180
203
  :param num_bits: number of bits to unpack each data point into
@@ -192,7 +215,7 @@ def unpack_from_int32(
192
215
  pack_factor = 32 // num_bits
193
216
 
194
217
  # unpack
195
- mask = pow(2, num_bits) - 1
218
+ mask = (1 << num_bits) - 1
196
219
  unpacked = torch.zeros(
197
220
  (value.shape[0], value.shape[1] * pack_factor),
198
221
  device=value.device,
@@ -30,8 +30,7 @@ _LOGGER: logging.Logger = logging.getLogger(__name__)
30
30
  class BaseSparseCompressor(BaseCompressor):
31
31
  """
32
32
  Base class representing a sparse compression algorithm. Each child class should
33
- implement compression_param_info, compress_weight and decompress_weight; child
34
- classes should also define COMPRESSION_PARAM_NAMES.
33
+ implement compression_param_names, compress_weight and decompress_weight;
35
34
 
36
35
  Compressors support compressing/decompressing a full module state dict or a single
37
36
  quantized PyTorch leaf module.
@@ -113,7 +112,7 @@ class BaseSparseCompressor(BaseCompressor):
113
112
  """
114
113
  weight_mappings, ignored_params = get_nested_weight_mappings(
115
114
  path_to_model_or_tensors,
116
- self.COMPRESSION_PARAM_NAMES,
115
+ self.compression_param_names,
117
116
  return_unmatched_params=True,
118
117
  )
119
118
  for weight_name in weight_mappings.keys():
@@ -25,6 +25,14 @@ class DenseCompressor(BaseCompressor):
25
25
  Identity compressor for dense models, returns the original state_dict
26
26
  """
27
27
 
28
+ @property
29
+ def compression_param_names(self) -> Tuple[str]:
30
+ """
31
+ Returns a tuple of compression parameter names introduced by
32
+ the compressor during compression
33
+ """
34
+ return ()
35
+
28
36
  def compress(self, model_state: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]:
29
37
  return model_state
30
38
 
@@ -40,11 +40,17 @@ class Sparse24BitMaskCompressor(BaseSparseCompressor):
40
40
  values tensor, with their locations stored in a 2d bitmask
41
41
  """
42
42
 
43
- COMPRESSION_PARAM_NAMES = [
44
- "shape",
45
- "compressed",
46
- "bitmask",
47
- ]
43
+ @property
44
+ def compression_param_names(self) -> Tuple[str]:
45
+ """
46
+ Returns a tuple of compression parameter names introduced by
47
+ the compressor during compression
48
+ """
49
+ return (
50
+ "shape",
51
+ "compressed",
52
+ "bitmask",
53
+ )
48
54
 
49
55
  def compress_weight(self, name, value):
50
56
  bitmask_tensor = Sparse24BitMaskTensor.from_dense(
@@ -103,8 +109,10 @@ class Sparse24BitMaskTensor:
103
109
  :param bitmask: 2d bitmask of non-zero values
104
110
  :return: instantiated Sparse24BitMaskTensor
105
111
  """
106
- if isinstance(shape, Tensor):
107
- shape = shape.tolist()
112
+ if isinstance(shape, list):
113
+ shape = torch.tensor(shape)
114
+ if isinstance(shape, torch.Tensor):
115
+ shape = shape.flatten().tolist()
108
116
  return Sparse24BitMaskTensor(
109
117
  shape=shape, compressed=compressed, bitmask=bitmask
110
118
  )
@@ -38,7 +38,13 @@ class BitmaskCompressor(BaseSparseCompressor):
38
38
  values tensor, with their locations stored in a 2d bitmask
39
39
  """
40
40
 
41
- COMPRESSION_PARAM_NAMES = ["shape", "compressed", "bitmask", "row_offsets"]
41
+ @property
42
+ def compression_param_names(self) -> Tuple[str]:
43
+ """
44
+ Returns a tuple of compression parameter names introduced by
45
+ the compressor during compression
46
+ """
47
+ return ("shape", "compressed", "bitmask", "row_offsets")
42
48
 
43
49
  def compress_weight(self, name, value):
44
50
  bitmask_tensor = BitmaskTensor.from_dense(value)
@@ -42,8 +42,6 @@ class Marlin24Compressor(BaseCompressor):
42
42
  Marlin24 kernel. Decompression is not implemented for this compressor.
43
43
  """
44
44
 
45
- COMPRESSION_PARAM_NAMES = ["weight_packed", "scale_packed", "meta"]
46
-
47
45
  @staticmethod
48
46
  def validate_quant_compatability(
49
47
  model_quant_args: Dict[str, QuantizationArgs]
@@ -105,6 +103,14 @@ class Marlin24Compressor(BaseCompressor):
105
103
 
106
104
  return True
107
105
 
106
+ @property
107
+ def compression_param_names(self) -> Tuple[str]:
108
+ """
109
+ Returns a tuple of compression parameter names introduced by
110
+ the compressor during compression
111
+ """
112
+ return ("weight_packed", "scale_packed", "meta")
113
+
108
114
  def compress(
109
115
  self,
110
116
  model_state: Dict[str, Tensor],
@@ -52,8 +52,8 @@ __all__ = [
52
52
  "apply_quantization_config",
53
53
  "apply_quantization_status",
54
54
  "find_name_or_class_matches",
55
- "expand_sparse_target_names",
56
- "is_sparse_target",
55
+ "expand_target_names",
56
+ "is_target",
57
57
  ]
58
58
 
59
59
  from compressed_tensors.quantization.utils.helpers import is_module_quantized
@@ -247,8 +247,10 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
247
247
  model.apply(compress_quantized_weights)
248
248
 
249
249
 
250
- def expand_sparse_target_names(
251
- model: Module, targets: Iterable[str], ignore: Iterable[str]
250
+ def expand_target_names(
251
+ model: Module,
252
+ targets: Optional[Iterable[str]] = None,
253
+ ignore: Optional[Iterable[str]] = None,
252
254
  ) -> Set[str]:
253
255
  """
254
256
  Finds all unique module names in the model that match the given
@@ -257,20 +259,23 @@ def expand_sparse_target_names(
257
259
  Note: Targets must be regexes, layer types, or full layer names.
258
260
 
259
261
  :param model: model to search for targets in
260
- :param targets: list of targets to search for
261
- :param ignore: list of targets to ignore
262
+ :param targets: Iterable of targets to search for
263
+ :param ignore: Iterable of targets to ignore
262
264
  :return: set of all targets that match the given targets and should
263
265
  not be ignored
264
266
  """
265
267
  return {
266
268
  name
267
269
  for name, module in iter_named_leaf_modules(model)
268
- if is_sparse_target(name, module, targets, ignore)
270
+ if is_target(name, module, targets, ignore)
269
271
  }
270
272
 
271
273
 
272
- def is_sparse_target(
273
- name: str, module: Module, targets: Iterable[str], ignore: Iterable[str]
274
+ def is_target(
275
+ name: str,
276
+ module: Module,
277
+ targets: Optional[Iterable[str]] = None,
278
+ ignore: Optional[Iterable[str]] = None,
274
279
  ) -> bool:
275
280
  """
276
281
  Determines if a module should be included in the targets based on the
@@ -280,12 +285,12 @@ def is_sparse_target(
280
285
 
281
286
  :param name: name of the module
282
287
  :param module: the module itself
283
- :param targets: list of targets to search for
284
- :param ignore: list of targets to ignore
288
+ :param targets: Iterable of targets to search for
289
+ :param ignore: Iterable of targets to ignore
285
290
  :return: True if the module is a target and not ignored, False otherwise
286
291
  """
287
292
  return bool(
288
- find_name_or_class_matches(name, module, targets)
293
+ find_name_or_class_matches(name, module, targets or [])
289
294
  and not find_name_or_class_matches(name, module, ignore or [])
290
295
  )
291
296
 
@@ -18,6 +18,7 @@ from typing import Any, Dict, Optional, Union
18
18
 
19
19
  import torch
20
20
  from compressed_tensors.utils import Aliasable
21
+ from compressed_tensors.utils.helpers import deprecated
21
22
  from pydantic import BaseModel, Field, field_validator, model_validator
22
23
 
23
24
 
@@ -109,10 +110,10 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
109
110
  dynamic: bool = False
110
111
  actorder: Union[ActivationOrdering, bool, None] = None
111
112
  observer: Optional[str] = Field(
112
- default="minmax",
113
+ default=None,
113
114
  description=(
114
- "The class to use to compute the quantization param - "
115
- "scale and zero-point'"
115
+ "Determines the method of computing quantization parameters (scales and "
116
+ "zero-points). Defaults to min-max when not using dynamic quantization"
116
117
  ),
117
118
  )
118
119
  observer_kwargs: Dict[str, Any] = Field(
@@ -123,12 +124,6 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
123
124
  ),
124
125
  )
125
126
 
126
- def get_observer(self):
127
- """
128
- :return: torch quantization FakeQuantize built based on these QuantizationArgs
129
- """
130
- return self.observer
131
-
132
127
  @field_validator("type", mode="before")
133
128
  def validate_type(cls, value) -> QuantizationType:
134
129
  if isinstance(value, str):
@@ -250,6 +245,10 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
250
245
  else:
251
246
  raise ValueError(f"Invalid quantization type {self.type}")
252
247
 
248
+ @deprecated("QuantizationArgs.observer")
249
+ def get_observer(self) -> str:
250
+ return self.observer
251
+
253
252
 
254
253
  def round_to_quantized_type(
255
254
  tensor: torch.Tensor, args: QuantizationArgs
@@ -14,13 +14,17 @@
14
14
 
15
15
  import warnings
16
16
  from functools import wraps
17
- from typing import Any, Callable, Dict, List, Optional
17
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
18
18
 
19
19
  import numpy
20
20
  import torch
21
21
  from transformers import AutoConfig
22
22
 
23
23
 
24
+ if TYPE_CHECKING:
25
+ from compressed_tensors.compressors import ModelCompressor
26
+
27
+
24
28
  __all__ = [
25
29
  "infer_compressor_from_model_config",
26
30
  "fix_fsdp_module_name",
@@ -166,8 +170,8 @@ def deprecated(future_name: Optional[str] = None, message: Optional[str] = None)
166
170
  """
167
171
  Decorator to mark functions as deprecated
168
172
 
169
- :param new_function: Function called in place of depreciated function
170
- :param message: Depreciation message, replaces default depreciation message
173
+ :param new_function: Function called in place of deprecated function
174
+ :param message: Deprecation message, replaces default deprecation message
171
175
  """
172
176
 
173
177
  def decorator(func: Callable[[Any], Any]):
@@ -301,7 +305,7 @@ def pack_bitmasks(bytemasks: torch.Tensor) -> torch.Tensor:
301
305
 
302
306
 
303
307
  def unpack_bitmasks(
304
- packed_bitmasks: torch.Tensor, original_shape: torch.Size
308
+ packed_bitmasks: torch.Tensor, original_shape: List[int]
305
309
  ) -> torch.Tensor:
306
310
  """
307
311
  Converts a bitmask tensor back to a bytemask tensor for use during decompression
@@ -26,6 +26,7 @@ Utilities associated with offloading functionality provided by `accelerate`.
26
26
  """
27
27
 
28
28
  import contextlib
29
+ import warnings
29
30
  from functools import wraps
30
31
  from typing import Any, Callable, Dict, Literal, Optional, Union
31
32
 
@@ -200,9 +201,14 @@ def update_offload_parameter(
200
201
  """
201
202
  param = getattr(module, name)
202
203
  data = data.to(param.dtype)
204
+ if param.data.shape != data.shape:
205
+ warnings.warn(
206
+ f"Shape of parameter being updated {param.data.shape} does not match shape "
207
+ f"of update data {data.shape}"
208
+ )
203
209
 
204
210
  # copy data into onloaded parameter if applicable
205
- if param.device != "meta":
211
+ if param.device != torch.device("meta"):
206
212
  param.data.copy_(data)
207
213
 
208
214
  # update offload dict
@@ -16,7 +16,7 @@ import json
16
16
  import os
17
17
  import re
18
18
  import struct
19
- from typing import Dict, List, Optional, Tuple, Union
19
+ from typing import Dict, Iterable, Optional, Tuple, Union
20
20
 
21
21
  from safetensors import safe_open
22
22
  from torch import Tensor
@@ -180,7 +180,9 @@ def get_weight_mappings(path_to_model_or_tensors: str) -> Dict[str, str]:
180
180
 
181
181
 
182
182
  def get_nested_weight_mappings(
183
- model_path: str, params_to_nest: List[str], return_unmatched_params: bool = False
183
+ model_path: str,
184
+ params_to_nest: Iterable[str],
185
+ return_unmatched_params: bool = False,
184
186
  ) -> Union[NestedWeightMappingType, Tuple[NestedWeightMappingType, WeightMappingType]]:
185
187
  """
186
188
  Takes a path to a state dict saved in safetensors format and returns a nested
@@ -211,7 +213,7 @@ def get_nested_weight_mappings(
211
213
 
212
214
  :param model_path: Path to the safetensors state dict, must contain either a
213
215
  single safetensors file or multiple files with an index.
214
- :param params_to_nest: List of parameter names to nest.
216
+ :param params_to_nest: Iterable of parameter names to nest.
215
217
  :param return_unmatched_params: If True, return a second dictionary containing
216
218
  the remaining parameters that were not matched to the params_to_nest.
217
219
  :return:
@@ -247,7 +249,7 @@ def get_nested_weight_mappings(
247
249
 
248
250
 
249
251
  def get_nested_mappings_from_state_dict(
250
- state_dict, params_to_nest
252
+ state_dict, params_to_nest: Iterable[str]
251
253
  ) -> NestedWeightMappingType:
252
254
  """
253
255
  Takes a state dict and returns a nested mapping from uncompressed
@@ -262,7 +264,7 @@ def get_nested_mappings_from_state_dict(
262
264
  }
263
265
 
264
266
  :param state_dict: state dict of the model
265
- :param params_to_nest: List of parameter names to nest.
267
+ :param params_to_nest: Iterable of parameter names to nest.
266
268
  :return: Nested mapping of parameterized layer names to the value of
267
269
  each layer's compression parameters.
268
270
  """
@@ -17,7 +17,7 @@ Functionality for storing and setting the version info for SparseML
17
17
  """
18
18
 
19
19
 
20
- version_base = "0.9.0"
20
+ version_base = "0.9.2"
21
21
  is_release = True # change to True to set the generated version as a release version
22
22
 
23
23
 
@@ -1,15 +1,34 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: compressed-tensors
3
- Version: 0.9.0
3
+ Version: 0.9.2
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
7
7
  Author-email: support@neuralmagic.com
8
8
  License: Apache 2.0
9
9
  Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: torch>=1.7.0
12
+ Requires-Dist: transformers
13
+ Requires-Dist: pydantic>=2.0
10
14
  Provides-Extra: dev
15
+ Requires-Dist: black==22.12.0; extra == "dev"
16
+ Requires-Dist: isort==5.8.0; extra == "dev"
17
+ Requires-Dist: wheel>=0.36.2; extra == "dev"
18
+ Requires-Dist: flake8>=3.8.3; extra == "dev"
19
+ Requires-Dist: pytest>=6.0.0; extra == "dev"
20
+ Requires-Dist: nbconvert>=7.16.3; extra == "dev"
11
21
  Provides-Extra: accelerate
12
- License-File: LICENSE
22
+ Requires-Dist: accelerate; extra == "accelerate"
23
+ Dynamic: author
24
+ Dynamic: author-email
25
+ Dynamic: description
26
+ Dynamic: description-content-type
27
+ Dynamic: home-page
28
+ Dynamic: license
29
+ Dynamic: provides-extra
30
+ Dynamic: requires-dist
31
+ Dynamic: summary
13
32
 
14
33
  # compressed-tensors
15
34
 
@@ -54,4 +54,6 @@ src/compressed_tensors/utils/offload.py
54
54
  src/compressed_tensors/utils/permutations_24.py
55
55
  src/compressed_tensors/utils/permute.py
56
56
  src/compressed_tensors/utils/safetensors_load.py
57
- src/compressed_tensors/utils/semi_structured_conversions.py
57
+ src/compressed_tensors/utils/semi_structured_conversions.py
58
+ tests/test_registry.py
59
+ tests/testing_utils.py
@@ -0,0 +1,53 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pytest
16
+ from compressed_tensors import (
17
+ BaseCompressor,
18
+ BitmaskCompressor,
19
+ BitmaskConfig,
20
+ CompressionFormat,
21
+ DenseCompressor,
22
+ DenseSparsityConfig,
23
+ SparsityCompressionConfig,
24
+ )
25
+
26
+
27
+ @pytest.mark.parametrize(
28
+ "name,type",
29
+ [
30
+ [CompressionFormat.sparse_bitmask.value, BitmaskConfig],
31
+ [CompressionFormat.dense.value, DenseSparsityConfig],
32
+ ],
33
+ )
34
+ def test_configs(name, type):
35
+ config = SparsityCompressionConfig.load_from_registry(name)
36
+ assert isinstance(config, type)
37
+ assert config.format == name
38
+
39
+
40
+ @pytest.mark.parametrize(
41
+ "name,type",
42
+ [
43
+ [CompressionFormat.sparse_bitmask.value, BitmaskCompressor],
44
+ [CompressionFormat.dense.value, DenseCompressor],
45
+ ],
46
+ )
47
+ def test_compressors(name, type):
48
+ compressor = BaseCompressor.load_from_registry(
49
+ name, config=SparsityCompressionConfig(format="none")
50
+ )
51
+ assert isinstance(compressor, type)
52
+ assert isinstance(compressor.config, SparsityCompressionConfig)
53
+ assert compressor.config.format == "none"
@@ -0,0 +1,144 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # flake8: noqa
15
+ import unittest
16
+
17
+ import pytest
18
+
19
+
20
+ def compressed_tensors_config_available():
21
+ try:
22
+ from transformers.utils.quantization_config import ( # noqa: F401
23
+ CompressedTensorsConfig,
24
+ )
25
+
26
+ return True
27
+ except ImportError:
28
+ return False
29
+
30
+
31
+ def accelerate_availabe():
32
+ try:
33
+ import accelerate # noqa: F401
34
+
35
+ return True
36
+
37
+ except ImportError:
38
+ return False
39
+
40
+
41
+ _is_compressed_tensors_config_available = compressed_tensors_config_available()
42
+ _is_accelerate_available = accelerate_availabe()
43
+
44
+
45
+ def requires_hf_quantizer():
46
+ return pytest.mark.skipif(
47
+ not _is_compressed_tensors_config_available,
48
+ reason="requires transformers>=4.45 to support CompressedTensorsHfQuantizer",
49
+ )
50
+
51
+
52
+ def requires_accelerate():
53
+ return pytest.mark.skipif(
54
+ not _is_accelerate_available,
55
+ reason="requires accelerate",
56
+ )
57
+
58
+
59
+ def get_random_mat(M, K, dtype) -> "torch.Tensor":
60
+ """
61
+ :param M: number of rows
62
+ :param K: number of columns
63
+ :param dtype: data type of the matrix
64
+ :return: random matrix of shape (M, K) with non-zero values
65
+ """
66
+ import torch
67
+ from compressed_tensors.quantization import FP8_DTYPE
68
+
69
+ rand_tensor_dtype = dtype
70
+ if dtype in [torch.int8, FP8_DTYPE]:
71
+ rand_tensor_dtype = torch.float16
72
+ mat = torch.rand(M, K, dtype=rand_tensor_dtype).cuda()
73
+ mat = mat.masked_fill_(mat == 0, 1)
74
+ return mat.to(dtype)
75
+
76
+
77
+ def generate_pruned_semi_structured_mat(M, K, dtype) -> "torch.Tensor":
78
+ """
79
+ :param M: number of rows
80
+ :param K: number of columns
81
+ :param dtype: data type of the matrix
82
+ :return: random matrix of shape (M, K) with 2:4 sparsity pattern
83
+ """
84
+ import torch
85
+ from compressed_tensors.quantization import FP8_DTYPE
86
+
87
+ mask = torch.Tensor([0, 0, 1, 1]).tile((M, K // 4)).bool()
88
+ rand_tensor_dtype = dtype
89
+ if dtype in [torch.int8, FP8_DTYPE]:
90
+ rand_tensor_dtype = torch.float16
91
+ mat = torch.rand(M, K, dtype=rand_tensor_dtype)
92
+ mat = mat.masked_fill_(mat == 0, 1)
93
+ if dtype == FP8_DTYPE:
94
+ # some float8_e4m3fn operations are not supported on CPU
95
+ mat = mat.cuda()
96
+ mask = mask.cuda()
97
+ mat = mat * mask
98
+ return mat.to(dtype)
99
+
100
+
101
+ def induce_sparsity(tensor, sparsity_ratio) -> "torch.Tensor":
102
+ """
103
+ Makes a tensor sparse by zeroing out a given fraction
104
+ of its smallest absolute values.
105
+
106
+ :param: weight_tensor (torch.Tensor): The input weight tensor.
107
+ :param: sparsity_ratio (float): Fraction of weights to be zeroed
108
+ (0 <= sparsity_ratio <= 1).
109
+ :returns: torch.Tensor: Sparse version of the input tensor.
110
+ """
111
+ import torch
112
+
113
+ if not (0 <= sparsity_ratio <= 1):
114
+ raise ValueError("Sparsity ratio must be between 0 and 1.")
115
+
116
+ # Flatten the tensor and compute the threshold for sparsity
117
+ flattened = tensor.view(-1)
118
+ k = int(sparsity_ratio * flattened.numel())
119
+
120
+ if k > 0:
121
+ threshold = torch.topk(flattened.abs(), k, largest=False).values.max()
122
+ sparse_tensor = torch.where(
123
+ tensor.abs() > threshold, tensor, torch.zeros_like(tensor)
124
+ )
125
+ else:
126
+ sparse_tensor = tensor
127
+
128
+ return sparse_tensor
129
+
130
+
131
+ def is_gpu_available():
132
+ """
133
+ :return: True if a GPU is available, False otherwise
134
+ """
135
+ try:
136
+ import torch # noqa: F401
137
+
138
+ return torch.cuda.device_count() > 0
139
+ except ImportError:
140
+ return False
141
+
142
+
143
+ def requires_gpu(test_case):
144
+ return unittest.skipUnless(is_gpu_available(), "test requires GPU")(test_case)