compressed-tensors-nightly 0.7.1.20241023__py3-none-any.whl → 0.7.1.20241025__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,16 +12,17 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from enum import Enum
15
+ from enum import Enum, unique
16
16
  from typing import List, Optional
17
17
 
18
18
  from compressed_tensors.registry import RegistryMixin
19
19
  from pydantic import BaseModel
20
20
 
21
21
 
22
- __all__ = ["SparsityCompressionConfig", "CompressionFormat"]
22
+ __all__ = ["SparsityCompressionConfig", "CompressionFormat", "SparsityStructure"]
23
23
 
24
24
 
25
+ @unique
25
26
  class CompressionFormat(Enum):
26
27
  dense = "dense"
27
28
  sparse_bitmask = "sparse-bitmask"
@@ -32,6 +33,63 @@ class CompressionFormat(Enum):
32
33
  marlin_24 = "marlin-24"
33
34
 
34
35
 
36
+ @unique
37
+ class SparsityStructure(Enum):
38
+ """
39
+ An enumeration to represent different sparsity structures.
40
+
41
+ Attributes
42
+ ----------
43
+ TWO_FOUR : str
44
+ Represents a 2:4 sparsity structure.
45
+ ZERO_ZERO : str
46
+ Represents a 0:0 sparsity structure.
47
+ UNSTRUCTURED : str
48
+ Represents an unstructured sparsity structure.
49
+
50
+ Examples
51
+ --------
52
+ >>> SparsityStructure('2:4')
53
+ <SparsityStructure.TWO_FOUR: '2:4'>
54
+
55
+ >>> SparsityStructure('unstructured')
56
+ <SparsityStructure.UNSTRUCTURED: 'unstructured'>
57
+
58
+ >>> SparsityStructure('2:4') == SparsityStructure.TWO_FOUR
59
+ True
60
+
61
+ >>> SparsityStructure('UNSTRUCTURED') == SparsityStructure.UNSTRUCTURED
62
+ True
63
+
64
+ >>> SparsityStructure(None) == SparsityStructure.UNSTRUCTURED
65
+ True
66
+
67
+ >>> SparsityStructure('invalid')
68
+ Traceback (most recent call last):
69
+ ...
70
+ ValueError: invalid is not a valid SparsityStructure
71
+ """
72
+
73
+ TWO_FOUR = "2:4"
74
+ UNSTRUCTURED = "unstructured"
75
+ ZERO_ZERO = "0:0"
76
+
77
+ def __new__(cls, value):
78
+ obj = object.__new__(cls)
79
+ obj._value_ = value.lower() if value is not None else value
80
+ return obj
81
+
82
+ @classmethod
83
+ def _missing_(cls, value):
84
+ # Handle None and case-insensitive values
85
+ if value is None:
86
+ return cls.UNSTRUCTURED
87
+ for member in cls:
88
+ if member.value == value.lower():
89
+ return member
90
+ raise ValueError(f"{value} is not a valid {cls.__name__}")
91
+
92
+
35
93
  class SparsityCompressionConfig(RegistryMixin, BaseModel):
36
94
  """
37
95
  Base data class for storing sparsity compression parameters
@@ -64,14 +64,12 @@ def set_module_for_calibration(module: Module, quantize_weights_upfront: bool =
64
64
  quantization_args=module.quantization_scheme.weights,
65
65
  )
66
66
 
67
- observer = module.weight_observer
68
-
69
- g_idx = getattr(module, "weight_g_idx", None)
70
-
71
67
  offloaded = is_module_offloaded(module)
72
68
  if offloaded:
73
69
  module._hf_hook.pre_forward(module)
74
70
 
71
+ observer = module.weight_observer
72
+ g_idx = getattr(module, "weight_g_idx", None)
75
73
  scale, zero_point = observer(module.weight, g_idx=g_idx)
76
74
  update_parameter_data(module, scale, "weight_scale")
77
75
  update_parameter_data(module, zero_point, "weight_zero_point")
@@ -14,7 +14,6 @@
14
14
 
15
15
 
16
16
  from compressed_tensors.quantization.quant_config import QuantizationStatus
17
- from compressed_tensors.quantization.utils import is_kv_cache_quant_scheme
18
17
  from torch.nn import Module
19
18
 
20
19
 
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from collections import Counter
16
- from typing import Optional, Tuple
16
+ from typing import Tuple
17
17
 
18
18
  import torch
19
19
  from compressed_tensors.quantization.quant_args import (
@@ -70,7 +70,9 @@ class MovingAverageMSEObserver(Observer):
70
70
  absolute_min_val = torch.amin(observed, dim=reduce_dims, keepdims=True)
71
71
  absolute_max_val = torch.amax(observed, dim=reduce_dims, keepdims=True)
72
72
 
73
- best = torch.full_like(absolute_min_val, torch.finfo(absolute_min_val.dtype).max)
73
+ best = torch.full_like(
74
+ absolute_min_val, torch.finfo(absolute_min_val.dtype).max
75
+ )
74
76
  min_val = torch.ones_like(absolute_min_val)
75
77
  max_val = torch.zeros_like(absolute_max_val)
76
78
  for i in range(int(self.maxshrink * self.grid)):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.7.1.20241023
3
+ Version: 0.7.1.20241025
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -17,7 +17,7 @@ compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py,sha256=4fKwC
17
17
  compressed_tensors/compressors/sparse_quantized_compressors/__init__.py,sha256=4f_cwcKXB1nVVMoiKgTFAc8jAPjPLElo-Df_EDm1_xw,675
18
18
  compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py,sha256=akqE7eW8CLTslpWRxERaZ8R0TSm1lS7D1bgZXKL0xi8,9427
19
19
  compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
20
- compressed_tensors/config/base.py,sha256=BNTFKy12isY7qblwxdi_R1f00EzgrNOXLrfxqLCPT8w,1903
20
+ compressed_tensors/config/base.py,sha256=3bFAdwDZjOt-U3fneOeL8dRci-PS8DqstnXuQVtkfiQ,3435
21
21
  compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
22
22
  compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
23
23
  compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
@@ -29,17 +29,17 @@ compressed_tensors/quantization/quant_config.py,sha256=NCiMvUMnnz5kTyAkDylxjtEGQ
29
29
  compressed_tensors/quantization/quant_scheme.py,sha256=5ggPz5sqEfTUgvJJeiPIINA74QtO-08hb3szsm7UHGE,6000
30
30
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
31
31
  compressed_tensors/quantization/lifecycle/apply.py,sha256=czaayvpeUYyWRJhO_klffw6esptOgA9sBKL5TWQcRdw,15805
32
- compressed_tensors/quantization/lifecycle/calibration.py,sha256=gPSD3kiH4VuU6nq-OLbOmhBGaMXsebEwLm4PkEnUhf0,3043
32
+ compressed_tensors/quantization/lifecycle/calibration.py,sha256=fJ2RDL3E4hmWR8v8nYhq_tv31K8WV00o_4Y3xr7c37Y,3041
33
33
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
34
34
  compressed_tensors/quantization/lifecycle/forward.py,sha256=8GjOnx4rwOZZqSDTdnejNOY2DVTjNDzH0DfY_rQam6k,16575
35
- compressed_tensors/quantization/lifecycle/frozen.py,sha256=8myzxsz5h5Odh5cIB2lDHb7xLRYBYnAhA1PO8YGuCtM,1839
35
+ compressed_tensors/quantization/lifecycle/frozen.py,sha256=71TsgS0Uxku0NomdWOBJsVfXCGTne-Gx9zUEMsCmw5Q,1764
36
36
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
37
37
  compressed_tensors/quantization/lifecycle/initialize.py,sha256=lKoFy18PjbSklyum7f4hoLuWtHShBKax7JDTBzPlCqM,8839
38
38
  compressed_tensors/quantization/observers/__init__.py,sha256=DYrttzq-8MHLZUzpX-xzzm4hrw6HcXkMkux82KBKb1M,738
39
39
  compressed_tensors/quantization/observers/base.py,sha256=5ovQicWPYHjIxr6-EkQ4lgOX0PpI9g23iSzKpxjM1Zg,8420
40
- compressed_tensors/quantization/observers/helpers.py,sha256=o9hg4E9b5cCb5PaEAj6jHiUWkNrKtYtv0b1pGg-T9B4,5516
40
+ compressed_tensors/quantization/observers/helpers.py,sha256=nUFdNEIACiPBfFwNYDGCXOvw6tf7j6jfTvDwImHKMPg,5506
41
41
  compressed_tensors/quantization/observers/min_max.py,sha256=sQXqU3z-voxIDfR_9mQzwQUflZj2sASm_G8CYaXntFw,3865
42
- compressed_tensors/quantization/observers/mse.py,sha256=9JRbvXo0VKLrgsTNuVlQ7AV87wwjRUuQludG0v7IJbI,6058
42
+ compressed_tensors/quantization/observers/mse.py,sha256=G5Y9v4MqXUVcKxBSmCFFW3p_7rlu-6scqLIN88ng-sE,6080
43
43
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
44
44
  compressed_tensors/quantization/utils/helpers.py,sha256=y4LEyC2oUd876ZMdALWKGH3Ct5EgBJZV4id_NUjTGH8,9531
45
45
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
@@ -51,8 +51,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
51
51
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
52
52
  compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
53
53
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
54
- compressed_tensors_nightly-0.7.1.20241023.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
55
- compressed_tensors_nightly-0.7.1.20241023.dist-info/METADATA,sha256=VTd017y76OKUEeK4DGjn_qbgympazs5s85NMVClrTUM,6799
56
- compressed_tensors_nightly-0.7.1.20241023.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
57
- compressed_tensors_nightly-0.7.1.20241023.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
58
- compressed_tensors_nightly-0.7.1.20241023.dist-info/RECORD,,
54
+ compressed_tensors_nightly-0.7.1.20241025.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
55
+ compressed_tensors_nightly-0.7.1.20241025.dist-info/METADATA,sha256=c5Ij0aprxvjscvNlBWuoUIUkNE0Fd2sakpFBRaoZI2A,6799
56
+ compressed_tensors_nightly-0.7.1.20241025.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
57
+ compressed_tensors_nightly-0.7.1.20241025.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
58
+ compressed_tensors_nightly-0.7.1.20241025.dist-info/RECORD,,