compressed-tensors 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,6 @@ import compressed_tensors
24
24
  import torch
25
25
  import transformers
26
26
  from compressed_tensors.base import (
27
- COMPRESSION_CONFIG_NAME,
28
27
  COMPRESSION_VERSION_NAME,
29
28
  QUANTIZATION_CONFIG_NAME,
30
29
  QUANTIZATION_METHOD_NAME,
@@ -39,6 +38,7 @@ from compressed_tensors.quantization import (
39
38
  apply_quantization_config,
40
39
  load_pretrained_quantization,
41
40
  )
41
+ from compressed_tensors.quantization.quant_args import QuantizationArgs
42
42
  from compressed_tensors.quantization.utils import (
43
43
  is_module_quantized,
44
44
  iter_named_leaf_modules,
@@ -103,12 +103,14 @@ class ModelCompressor:
103
103
  :return: compressor for the configs, or None if model is not compressed
104
104
  """
105
105
  config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
106
- compression_config = getattr(config, COMPRESSION_CONFIG_NAME, None)
106
+ compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
107
+
107
108
  return cls.from_compression_config(compression_config)
108
109
 
109
110
  @classmethod
110
111
  def from_compression_config(
111
- cls, compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"]
112
+ cls,
113
+ compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
112
114
  ):
113
115
  """
114
116
  :param compression_config:
@@ -265,7 +267,11 @@ class ModelCompressor:
265
267
  state_dict = model.state_dict()
266
268
 
267
269
  compressed_state_dict = state_dict
268
- quantized_modules_to_args = map_modules_to_quant_args(model)
270
+
271
+ quantized_modules_to_args: Dict[
272
+ str, QuantizationArgs
273
+ ] = map_modules_to_quant_args(model)
274
+
269
275
  if self.quantization_compressor is not None:
270
276
  compressed_state_dict = self.quantization_compressor.compress(
271
277
  state_dict, names_to_scheme=quantized_modules_to_args
@@ -369,7 +375,13 @@ class ModelCompressor:
369
375
  update_parameter_data(module, data, param_name)
370
376
 
371
377
 
372
- def map_modules_to_quant_args(model: Module) -> Dict:
378
+ def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
379
+ """
380
+ Given a pytorch model, map out the submodule name (usually linear layers)
381
+ to the QuantizationArgs
382
+
383
+ :param model: pytorch model
384
+ """
373
385
  quantized_modules_to_args = {}
374
386
  for name, submodule in iter_named_leaf_modules(model):
375
387
  if is_module_quantized(submodule):
@@ -93,9 +93,11 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
93
93
  args=quantization_args,
94
94
  dtype=quantization_args.pytorch_dtype(),
95
95
  )
96
+ else:
97
+ quantized_weight = weight
96
98
 
97
- if device is not None:
98
- quantized_weight = quantized_weight.to(device)
99
+ if device is not None:
100
+ quantized_weight = quantized_weight.to(device)
99
101
 
100
102
  return {"weight": quantized_weight}
101
103
 
@@ -94,6 +94,8 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
94
94
  args=quantization_args,
95
95
  dtype=torch.int8,
96
96
  )
97
+ else:
98
+ quantized_weight = weight
97
99
 
98
100
  packed_weight = pack_to_int32(quantized_weight, quantization_args.num_bits)
99
101
  weight_shape = torch.tensor(weight.shape)
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Dict, Tuple
16
+
15
17
  import torch
16
18
  from compressed_tensors.compressors.base import BaseCompressor
17
19
  from compressed_tensors.quantization import (
@@ -53,7 +55,7 @@ class CompressedLinear(Linear):
53
55
  )
54
56
 
55
57
  # get the shape and dtype of compressed parameters
56
- compression_params = module.compressor.compression_param_info(
58
+ compression_params: Dict[str, Tuple] = module.compressor.compression_param_info(
57
59
  module.weight.shape, quantization_scheme.weights
58
60
  )
59
61
 
@@ -106,7 +106,8 @@ def apply_quantization_config(
106
106
  model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
107
107
  ) -> OrderedDict:
108
108
  """
109
- Initializes the model for quantization in-place based on the given config
109
+ Initializes the model for quantization in-place based on the given config.
110
+ Optionally coverts quantizable modules to compressed_linear modules
110
111
 
111
112
  :param model: model to apply quantization config to
112
113
  :param config: quantization config
@@ -17,6 +17,7 @@ from enum import Enum
17
17
  from typing import Any, Dict, Optional, Union
18
18
 
19
19
  import torch
20
+ from compressed_tensors.utils import Aliasable
20
21
  from pydantic import BaseModel, Field, field_validator, model_validator
21
22
 
22
23
 
@@ -53,17 +54,29 @@ class QuantizationStrategy(str, Enum):
53
54
  TOKEN = "token"
54
55
 
55
56
 
56
- class ActivationOrdering(str, Enum):
57
+ class ActivationOrdering(Aliasable, str, Enum):
57
58
  """
58
59
  Enum storing strategies for activation ordering
59
60
 
60
61
  Group: reorder groups and weight\n
61
- Weight: only reorder weight, not groups. Slightly lower latency and
62
- accuracy compared to group actorder\n
62
+ Weight: only reorder weight, not groups. Slightly lower accuracy but also lower
63
+ latency when compared to group actorder\n
64
+ Dynamic: alias for Group\n
65
+ Static: alias for Weight\n
63
66
  """
64
67
 
65
68
  GROUP = "group"
66
69
  WEIGHT = "weight"
70
+ # aliases
71
+ DYNAMIC = "dynamic"
72
+ STATIC = "static"
73
+
74
+ @staticmethod
75
+ def get_aliases() -> Dict[str, str]:
76
+ return {
77
+ "dynamic": "group",
78
+ "static": "weight",
79
+ }
67
80
 
68
81
 
69
82
  class QuantizationArgs(BaseModel, use_enum_values=True):
@@ -132,9 +132,9 @@ class QuantizationConfig(BaseModel):
132
132
  `k_proj` and `v_proj` in their names. If this is not the case
133
133
  and kv_cache_scheme != None, the quantization of kv cache will fail
134
134
  :global_compression_ratio: optional informational config to report the model
135
- compression ratio acheived by the quantization config
135
+ compression ratio acheived by the quantization config
136
136
  :ignore: optional list of layers to ignore from config_groups. Layers in this list
137
- are not quantized even if they match up with a target in config_groups
137
+ are not quantized even if they match up with a target in config_groups
138
138
  """
139
139
 
140
140
  config_groups: Dict[str, Union[QuantizationScheme, List[str]]]
@@ -13,14 +13,14 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from copy import deepcopy
16
- from typing import List, Optional
16
+ from typing import Any, Dict, List, Optional
17
17
 
18
18
  from compressed_tensors.quantization.quant_args import (
19
19
  QuantizationArgs,
20
20
  QuantizationStrategy,
21
21
  QuantizationType,
22
22
  )
23
- from pydantic import BaseModel
23
+ from pydantic import BaseModel, model_validator
24
24
 
25
25
 
26
26
  __all__ = [
@@ -36,7 +36,7 @@ class QuantizationScheme(BaseModel):
36
36
  of modules should be quantized
37
37
 
38
38
  :param targets: list of modules to apply the QuantizationArgs to, can be layer
39
- names, layer types or a regular expression
39
+ names, layer types or a regular expression, typically ["Linear"]
40
40
  :param weights: quantization config for layer weights
41
41
  :param input_activations: quantization config for layer inputs
42
42
  :param output_activations: quantization config for layer outputs
@@ -47,27 +47,20 @@ class QuantizationScheme(BaseModel):
47
47
  input_activations: Optional[QuantizationArgs] = None
48
48
  output_activations: Optional[QuantizationArgs] = None
49
49
 
50
- @classmethod
51
- def default_scheme(
52
- cls,
53
- targets: Optional[List[str]] = None,
54
- ):
55
-
56
- if targets is None:
57
- # default to quantizing all Linear layers
58
- targets = ["Linear"]
59
-
60
- # by default, activations and weights are left unquantized
61
- weights = None
62
- input_activations = None
63
- output_activations = None
64
-
65
- return cls(
66
- targets=targets,
67
- weights=weights,
68
- input_activations=input_activations,
69
- output_activations=output_activations,
70
- )
50
+ @model_validator(mode="after")
51
+ def validate_model_after(model: "QuantizationArgs") -> Dict[str, Any]:
52
+ inputs = model.input_activations
53
+ outputs = model.output_activations
54
+
55
+ if inputs is not None:
56
+ if inputs.actorder is not None:
57
+ raise ValueError("Cannot apply actorder to input activations")
58
+
59
+ if outputs is not None:
60
+ if outputs.actorder is not None:
61
+ raise ValueError("Cannot apply actorder to output activations")
62
+
63
+ return model
71
64
 
72
65
 
73
66
  """
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Optional
15
+ from typing import Any, Dict, Optional
16
16
 
17
17
  import torch
18
18
  from transformers import AutoConfig
@@ -24,6 +24,7 @@ __all__ = [
24
24
  "tensor_follows_mask_structure",
25
25
  "replace_module",
26
26
  "is_compressed_tensors_config",
27
+ "Aliasable",
27
28
  ]
28
29
 
29
30
  FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
@@ -119,3 +120,34 @@ def is_compressed_tensors_config(compression_config: Any) -> bool:
119
120
  return isinstance(compression_config, CompressedTensorsConfig)
120
121
  except ImportError:
121
122
  return False
123
+
124
+
125
+ class Aliasable:
126
+ """
127
+ A mixin for enums to allow aliasing of enum members
128
+
129
+ Example:
130
+ >>> class MyClass(Aliasable, int, Enum):
131
+ >>> ...
132
+ """
133
+
134
+ @staticmethod
135
+ def get_aliases() -> Dict[str, str]:
136
+ raise NotImplementedError()
137
+
138
+ def __eq__(self, other):
139
+ if isinstance(other, self.__class__):
140
+ aliases = self.get_aliases()
141
+ return self.value == other.value or (
142
+ aliases.get(self.value, self.value)
143
+ == aliases.get(other.value, other.value)
144
+ )
145
+ else:
146
+ aliases = self.get_aliases()
147
+ self_value = aliases.get(self.value, self.value)
148
+ other_value = aliases.get(other, other)
149
+ return self_value == other_value
150
+
151
+ def __hash__(self):
152
+ canonical_value = self.aliases.get(self.value, self.value)
153
+ return hash(canonical_value)
@@ -17,7 +17,7 @@ Functionality for storing and setting the version info for SparseML
17
17
  """
18
18
 
19
19
 
20
- version_base = "0.8.0"
20
+ version_base = "0.8.1"
21
21
  is_release = True # change to True to set the generated version as a release version
22
22
 
23
23
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors
3
- Version: 0.8.0
3
+ Version: 0.8.1
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,15 +1,15 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
- compressed_tensors/version.py,sha256=Z9w80ldLHldBZrnrRolznhe-AZsAg5ftvHw17kgPs10,1585
3
+ compressed_tensors/version.py,sha256=U6bppqc5inOxvcJDHWhDoSXvBrvbH425oJM2WG7TECY,1585
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=D9TNwQcjanDiAHODPbg8JUqc66e3j50rctY7A708NEs,6743
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
7
7
  compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
8
- compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=XJgPsq8KiDfiR4e8bSI38lmoOd2ApqRk1aPcXS2obqY,15600
8
+ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=sxh1TvW1Bp9YJE41hW0XZfd0kYYB85nhJvBLVRTDcV0,15886
9
9
  compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=09UJq68Pht6Bf-4iP9xYl3tetKsncNPHD8IAGbePsr4,714
10
10
  compressed_tensors/compressors/quantized_compressors/base.py,sha256=K1KOnS6Y8nUA1-HN7VhyfsDc01nilW0WfXMUhuD-l8w,5954
11
- compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=Mmfr-hap-4zw7CzE1mXi0UirknqGidNxw38GGWVgTqM,4916
12
- compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=9H8UrG5v1GRtslLjOEiUM2dnyxJnR-HJmlsFezQs_r0,7706
11
+ compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=MMUya3Iwarm0BkeYXqKTUnEDPiBw98GKF09QiNST45k,4960
12
+ compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=1CLwvBlu4AtGkuo3IisD1-rQzwLiA6hE1bCc-pF_XGo,7758
13
13
  compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=i2TESH27l7KXeOhJ6hShIoI904XX96l-cRQiMR6MAaU,704
14
14
  compressed_tensors/compressors/sparse_compressors/base.py,sha256=Ua4rUSGyucEs-YJI5z3oIUF-zqQLrFsQ9f-qKasEdUM,4410
15
15
  compressed_tensors/compressors/sparse_compressors/dense.py,sha256=lSKNWRx6H7aUqaJj1j4qbXk8Gkm1UohbnvW1Rvq6Ra4,1284
@@ -21,13 +21,13 @@ compressed_tensors/config/base.py,sha256=3bFAdwDZjOt-U3fneOeL8dRci-PS8DqstnXuQVt
21
21
  compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
22
22
  compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
23
23
  compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
24
- compressed_tensors/linear/compressed_linear.py,sha256=0jTTf6XxOAjAYs3tvFtgiNMAO4W10sSeR-pdH2M413g,3218
24
+ compressed_tensors/linear/compressed_linear.py,sha256=MJa-UfoKhIkdUWRD1shrXXri2cOwR5GK0a4t4bNYosM,3268
25
25
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
26
- compressed_tensors/quantization/quant_args.py,sha256=osjNwCSB6tcyH9Qeg5sHEiB-bHyi3XJ8TzkGVJuGTc4,8711
27
- compressed_tensors/quantization/quant_config.py,sha256=NCiMvUMnnz5kTyAkDylxjtEGQnjgsIYIeNR2zyHEdTQ,10371
28
- compressed_tensors/quantization/quant_scheme.py,sha256=5ggPz5sqEfTUgvJJeiPIINA74QtO-08hb3szsm7UHGE,6000
26
+ compressed_tensors/quantization/quant_args.py,sha256=jwC__lSmuiJ2qSJYYZGgWgQNbZu6YhhS0e-qugrTNXE,9058
27
+ compressed_tensors/quantization/quant_config.py,sha256=K6kOZ6LDXpFlqsVzR4NEATV6y6Ea83rJWnNyVlvw-pI,10379
28
+ compressed_tensors/quantization/quant_scheme.py,sha256=eQ0JrRZ80GX69fpwW87VzPzzhajhk4mUaJScjk82OY4,6010
29
29
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
30
- compressed_tensors/quantization/lifecycle/apply.py,sha256=pdCqxXnVw7HoDDanaOtek13g8x_nb54CBUlfuMdhFG4,14993
30
+ compressed_tensors/quantization/lifecycle/apply.py,sha256=jCUSgeOBtagE5IhgIbyYMZ4kv8Rm20VGJ4IxXZ5HAnw,15066
31
31
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
32
32
  compressed_tensors/quantization/lifecycle/forward.py,sha256=QPL6-vKOFuKdKIEsVqMhsw4x552Jpm2sqO0oeChbnrM,12941
33
33
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
@@ -37,14 +37,14 @@ compressed_tensors/quantization/utils/helpers.py,sha256=DBP-sGRpGAY01K0LFE7qqonN
37
37
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
38
38
  compressed_tensors/registry/registry.py,sha256=vRcjVB1ITfSbfYUaGndBBmqhip_5vsS62weorVg0iXo,11896
39
39
  compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
40
- compressed_tensors/utils/helpers.py,sha256=hWGIR0W7ENHwdC7wW2SQJJiCF9-xOu_u3fY2RzLyYg4,4101
40
+ compressed_tensors/utils/helpers.py,sha256=T3p0TbhWbQIRjL6Up2Z7UhZO5jpR6WxBhYPPvrhE6lE,5018
41
41
  compressed_tensors/utils/offload.py,sha256=d9q8LNe8HyF8tOjgjA7QGLD3HRysmNp0d8eBbdqBgIM,4089
42
42
  compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
43
43
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
44
44
  compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
45
45
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
46
- compressed_tensors-0.8.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
47
- compressed_tensors-0.8.0.dist-info/METADATA,sha256=lRjH5wempREQ2lTFNqzMusIW95YHN4rF8yd73MVvOe0,6782
48
- compressed_tensors-0.8.0.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
49
- compressed_tensors-0.8.0.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
50
- compressed_tensors-0.8.0.dist-info/RECORD,,
46
+ compressed_tensors-0.8.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
47
+ compressed_tensors-0.8.1.dist-info/METADATA,sha256=rDPAoGePUI_yRN7LRP23t3vKWhDfxPbeNR1TX6vpPPI,6782
48
+ compressed_tensors-0.8.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
49
+ compressed_tensors-0.8.1.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
50
+ compressed_tensors-0.8.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.45.0)
2
+ Generator: bdist_wheel (0.45.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5