compressed-tensors-nightly 0.8.0.20241122__py3-none-any.whl → 0.8.0.20241124__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,6 @@ import compressed_tensors
24
24
  import torch
25
25
  import transformers
26
26
  from compressed_tensors.base import (
27
- COMPRESSION_CONFIG_NAME,
28
27
  COMPRESSION_VERSION_NAME,
29
28
  QUANTIZATION_CONFIG_NAME,
30
29
  QUANTIZATION_METHOD_NAME,
@@ -39,6 +38,7 @@ from compressed_tensors.quantization import (
39
38
  apply_quantization_config,
40
39
  load_pretrained_quantization,
41
40
  )
41
+ from compressed_tensors.quantization.quant_args import QuantizationArgs
42
42
  from compressed_tensors.quantization.utils import (
43
43
  is_module_quantized,
44
44
  iter_named_leaf_modules,
@@ -103,12 +103,14 @@ class ModelCompressor:
103
103
  :return: compressor for the configs, or None if model is not compressed
104
104
  """
105
105
  config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
106
- compression_config = getattr(config, COMPRESSION_CONFIG_NAME, None)
106
+ compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
107
+
107
108
  return cls.from_compression_config(compression_config)
108
109
 
109
110
  @classmethod
110
111
  def from_compression_config(
111
- cls, compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"]
112
+ cls,
113
+ compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
112
114
  ):
113
115
  """
114
116
  :param compression_config:
@@ -265,7 +267,11 @@ class ModelCompressor:
265
267
  state_dict = model.state_dict()
266
268
 
267
269
  compressed_state_dict = state_dict
268
- quantized_modules_to_args = map_modules_to_quant_args(model)
270
+
271
+ quantized_modules_to_args: Dict[
272
+ str, QuantizationArgs
273
+ ] = map_modules_to_quant_args(model)
274
+
269
275
  if self.quantization_compressor is not None:
270
276
  compressed_state_dict = self.quantization_compressor.compress(
271
277
  state_dict, names_to_scheme=quantized_modules_to_args
@@ -369,7 +375,13 @@ class ModelCompressor:
369
375
  update_parameter_data(module, data, param_name)
370
376
 
371
377
 
372
- def map_modules_to_quant_args(model: Module) -> Dict:
378
+ def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
379
+ """
380
+ Given a pytorch model, map out the submodule name (usually linear layers)
381
+ to the QuantizationArgs
382
+
383
+ :param model: pytorch model
384
+ """
373
385
  quantized_modules_to_args = {}
374
386
  for name, submodule in iter_named_leaf_modules(model):
375
387
  if is_module_quantized(submodule):
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Dict, Tuple
16
+
15
17
  import torch
16
18
  from compressed_tensors.compressors.base import BaseCompressor
17
19
  from compressed_tensors.quantization import (
@@ -53,7 +55,7 @@ class CompressedLinear(Linear):
53
55
  )
54
56
 
55
57
  # get the shape and dtype of compressed parameters
56
- compression_params = module.compressor.compression_param_info(
58
+ compression_params: Dict[str, Tuple] = module.compressor.compression_param_info(
57
59
  module.weight.shape, quantization_scheme.weights
58
60
  )
59
61
 
@@ -106,7 +106,8 @@ def apply_quantization_config(
106
106
  model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
107
107
  ) -> OrderedDict:
108
108
  """
109
- Initializes the model for quantization in-place based on the given config
109
+ Initializes the model for quantization in-place based on the given config.
110
+ Optionally coverts quantizable modules to compressed_linear modules
110
111
 
111
112
  :param model: model to apply quantization config to
112
113
  :param config: quantization config
@@ -132,9 +132,9 @@ class QuantizationConfig(BaseModel):
132
132
  `k_proj` and `v_proj` in their names. If this is not the case
133
133
  and kv_cache_scheme != None, the quantization of kv cache will fail
134
134
  :global_compression_ratio: optional informational config to report the model
135
- compression ratio acheived by the quantization config
135
+ compression ratio acheived by the quantization config
136
136
  :ignore: optional list of layers to ignore from config_groups. Layers in this list
137
- are not quantized even if they match up with a target in config_groups
137
+ are not quantized even if they match up with a target in config_groups
138
138
  """
139
139
 
140
140
  config_groups: Dict[str, Union[QuantizationScheme, List[str]]]
@@ -36,7 +36,7 @@ class QuantizationScheme(BaseModel):
36
36
  of modules should be quantized
37
37
 
38
38
  :param targets: list of modules to apply the QuantizationArgs to, can be layer
39
- names, layer types or a regular expression
39
+ names, layer types or a regular expression, typically ["Linear"]
40
40
  :param weights: quantization config for layer weights
41
41
  :param input_activations: quantization config for layer inputs
42
42
  :param output_activations: quantization config for layer outputs
@@ -47,28 +47,6 @@ class QuantizationScheme(BaseModel):
47
47
  input_activations: Optional[QuantizationArgs] = None
48
48
  output_activations: Optional[QuantizationArgs] = None
49
49
 
50
- @classmethod
51
- def default_scheme(
52
- cls,
53
- targets: Optional[List[str]] = None,
54
- ):
55
-
56
- if targets is None:
57
- # default to quantizing all Linear layers
58
- targets = ["Linear"]
59
-
60
- # by default, activations and weights are left unquantized
61
- weights = None
62
- input_activations = None
63
- output_activations = None
64
-
65
- return cls(
66
- targets=targets,
67
- weights=weights,
68
- input_activations=input_activations,
69
- output_activations=output_activations,
70
- )
71
-
72
50
 
73
51
  """
74
52
  Pre-Set Quantization Scheme Args
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.8.0.20241122
3
+ Version: 0.8.0.20241124
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -5,7 +5,7 @@ compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1
5
5
  compressed_tensors/compressors/base.py,sha256=D9TNwQcjanDiAHODPbg8JUqc66e3j50rctY7A708NEs,6743
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
7
7
  compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
8
- compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=XJgPsq8KiDfiR4e8bSI38lmoOd2ApqRk1aPcXS2obqY,15600
8
+ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=sxh1TvW1Bp9YJE41hW0XZfd0kYYB85nhJvBLVRTDcV0,15886
9
9
  compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=09UJq68Pht6Bf-4iP9xYl3tetKsncNPHD8IAGbePsr4,714
10
10
  compressed_tensors/compressors/quantized_compressors/base.py,sha256=K1KOnS6Y8nUA1-HN7VhyfsDc01nilW0WfXMUhuD-l8w,5954
11
11
  compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=Mmfr-hap-4zw7CzE1mXi0UirknqGidNxw38GGWVgTqM,4916
@@ -21,13 +21,13 @@ compressed_tensors/config/base.py,sha256=3bFAdwDZjOt-U3fneOeL8dRci-PS8DqstnXuQVt
21
21
  compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
22
22
  compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
23
23
  compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
24
- compressed_tensors/linear/compressed_linear.py,sha256=0jTTf6XxOAjAYs3tvFtgiNMAO4W10sSeR-pdH2M413g,3218
24
+ compressed_tensors/linear/compressed_linear.py,sha256=MJa-UfoKhIkdUWRD1shrXXri2cOwR5GK0a4t4bNYosM,3268
25
25
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
26
26
  compressed_tensors/quantization/quant_args.py,sha256=osjNwCSB6tcyH9Qeg5sHEiB-bHyi3XJ8TzkGVJuGTc4,8711
27
- compressed_tensors/quantization/quant_config.py,sha256=NCiMvUMnnz5kTyAkDylxjtEGQnjgsIYIeNR2zyHEdTQ,10371
28
- compressed_tensors/quantization/quant_scheme.py,sha256=5ggPz5sqEfTUgvJJeiPIINA74QtO-08hb3szsm7UHGE,6000
27
+ compressed_tensors/quantization/quant_config.py,sha256=K6kOZ6LDXpFlqsVzR4NEATV6y6Ea83rJWnNyVlvw-pI,10379
28
+ compressed_tensors/quantization/quant_scheme.py,sha256=o3SaeNKWFVqbOtk0cLlwHh_YenU4hnteXFtH5ey9zLk,5452
29
29
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
30
- compressed_tensors/quantization/lifecycle/apply.py,sha256=pdCqxXnVw7HoDDanaOtek13g8x_nb54CBUlfuMdhFG4,14993
30
+ compressed_tensors/quantization/lifecycle/apply.py,sha256=jCUSgeOBtagE5IhgIbyYMZ4kv8Rm20VGJ4IxXZ5HAnw,15066
31
31
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
32
32
  compressed_tensors/quantization/lifecycle/forward.py,sha256=QPL6-vKOFuKdKIEsVqMhsw4x552Jpm2sqO0oeChbnrM,12941
33
33
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
@@ -43,8 +43,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
43
43
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
44
44
  compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
45
45
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
46
- compressed_tensors_nightly-0.8.0.20241122.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
47
- compressed_tensors_nightly-0.8.0.20241122.dist-info/METADATA,sha256=4kWPSxMdTxocUah2jo1PG8Y_hO-rCf60bj_sVukBsDw,6799
48
- compressed_tensors_nightly-0.8.0.20241122.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
49
- compressed_tensors_nightly-0.8.0.20241122.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
50
- compressed_tensors_nightly-0.8.0.20241122.dist-info/RECORD,,
46
+ compressed_tensors_nightly-0.8.0.20241124.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
47
+ compressed_tensors_nightly-0.8.0.20241124.dist-info/METADATA,sha256=WfFSOxQOHVkPwaftGCghUA9za3NTcO7HeWRyBPbXGxk,6799
48
+ compressed_tensors_nightly-0.8.0.20241124.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
49
+ compressed_tensors_nightly-0.8.0.20241124.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
50
+ compressed_tensors_nightly-0.8.0.20241124.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.45.0)
2
+ Generator: bdist_wheel (0.45.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5