compressed-tensors-nightly 0.9.2.20250326__py3-none-any.whl → 0.9.2.20250328__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,7 @@ import os
19
19
  import re
20
20
  from contextlib import contextmanager
21
21
  from copy import deepcopy
22
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, TypeVar, Union
22
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, TypeVar, Union
23
23
 
24
24
  import compressed_tensors
25
25
  import torch
@@ -522,10 +522,13 @@ class ModelCompressor:
522
522
  update_parameter_data(module, data, param_name)
523
523
 
524
524
 
525
- def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
525
+ def map_modules_to_quant_args(
526
+ model: Module,
527
+ ) -> Dict[str, Union[QuantizationArgs, Tuple[QuantizationArgs, QuantizationArgs]]]:
526
528
  """
527
529
  Given a pytorch model, map out the submodule name (usually linear layers)
528
- to the QuantizationArgs
530
+ to the weight QuantizationArgs. If running input activation quantization, will also
531
+ map to the input QuantizationArgs in a tuple.
529
532
 
530
533
  :param model: pytorch model
531
534
  """
@@ -535,6 +538,12 @@ def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
535
538
  if submodule.quantization_scheme.weights is not None:
536
539
  name = fix_fsdp_module_name(name)
537
540
  quantized_modules_to_args[name] = submodule.quantization_scheme.weights
541
+ if submodule.quantization_scheme.input_activations is not None:
542
+ weight_args = quantized_modules_to_args.get(name)
543
+ quantized_modules_to_args[name] = (
544
+ weight_args,
545
+ submodule.quantization_scheme.input_activations,
546
+ )
538
547
 
539
548
  return quantized_modules_to_args
540
549
 
@@ -82,11 +82,32 @@ class BaseQuantizationCompressor(BaseCompressor):
82
82
  """
83
83
  compressed_dict = {}
84
84
  weight_suffix = ".weight"
85
+ input_zp_suffix = ".input_zero_point"
86
+ weight_zp_suffix = ".weight_zero_point"
85
87
  _LOGGER.debug(
86
88
  f"Compressing model with {len(model_state)} parameterized layers..."
87
89
  )
88
90
 
89
91
  for name, value in tqdm(model_state.items(), desc="Quantized Compression"):
92
+ # check if the parameter we're compressing is the weight zp
93
+ # or the input zp
94
+ is_weight_zp = name.endswith(weight_zp_suffix)
95
+ is_input_zp = name.endswith(input_zp_suffix)
96
+
97
+ # if we're saving the weight zp, fetch weight quant args
98
+ if is_weight_zp:
99
+ quant_args_zp = names_to_scheme.get(name[: -(len(weight_zp_suffix))])
100
+ if isinstance(quant_args_zp, tuple):
101
+ # If tuple, first value is weight args, second is input args
102
+ quant_args_zp = quant_args_zp[0]
103
+
104
+ # if we're saving the input zp, fetch input quant args
105
+ if is_input_zp:
106
+ input_args_zp = names_to_scheme.get(name[: -(len(input_zp_suffix))])
107
+ if isinstance(input_args_zp, tuple):
108
+ # If tuple, first value is weight args, second is input args
109
+ input_args_zp = input_args_zp[-1]
110
+
90
111
  if name.endswith(weight_suffix):
91
112
  prefix = name[: -(len(weight_suffix))]
92
113
  scale = model_state.get(merge_names(prefix, "weight_scale"), None)
@@ -94,7 +115,11 @@ class BaseQuantizationCompressor(BaseCompressor):
94
115
  g_idx = model_state.get(merge_names(prefix, "weight_g_idx"), None)
95
116
  if scale is not None:
96
117
  # weight is quantized, compress it
97
- quant_args = names_to_scheme[prefix]
118
+ if isinstance(names_to_scheme[prefix], tuple):
119
+ quant_args = names_to_scheme[prefix][0]
120
+ else:
121
+ quant_args = names_to_scheme[prefix]
122
+
98
123
  compressed_data = self.compress_weight(
99
124
  weight=value,
100
125
  scale=scale,
@@ -107,7 +132,11 @@ class BaseQuantizationCompressor(BaseCompressor):
107
132
  compressed_dict[merge_names(prefix, key)] = value
108
133
  else:
109
134
  compressed_dict[name] = value.to("cpu")
110
- elif name.endswith("zero_point") and torch.all(value == 0):
135
+ # only save if asym
136
+ elif is_weight_zp and quant_args_zp.symmetric:
137
+ continue
138
+ # only save if asym
139
+ elif is_input_zp and input_args_zp.symmetric:
111
140
  continue
112
141
  elif name.endswith("g_idx") and torch.any(value <= -1):
113
142
  continue
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.9.2.20250326
3
+ Version: 0.9.2.20250328
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -5,9 +5,9 @@ compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1
5
5
  compressed_tensors/compressors/base.py,sha256=x8dQrWVEurynXw03yHJZTaAmrRTOsdZJoHjmvs0IKwk,7002
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
7
7
  compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
8
- compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=AmIE1SoNRH1fNgQALfNkdQo8y5tePVpdWUgLIOtf5rg,22569
8
+ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=n0gcrKwefJuO6b4LNjCynJQf7NNqNHDcoLlzZgTCPGc,23080
9
9
  compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=09UJq68Pht6Bf-4iP9xYl3tetKsncNPHD8IAGbePsr4,714
10
- compressed_tensors/compressors/quantized_compressors/base.py,sha256=cp8S1Kr3HhlMHIz7k4vGo-qxxdknEC3qP1QLIhNnwRA,7217
10
+ compressed_tensors/compressors/quantized_compressors/base.py,sha256=GXTSWgFAhksbno94Ulpth9-YM4a7NsDlx4oQGGB0swQ,8567
11
11
  compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=fd0KlkSx6bvZ3xwIkK3jEUdPSUPs56Eua4dEDOtzKW0,5150
12
12
  compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=zH2PocRe_T5yt1-3kLdZH9AUQWQyaVOi4U9nEJiYaWA,8509
13
13
  compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
@@ -45,8 +45,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
45
45
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
46
46
  compressed_tensors/utils/safetensors_load.py,sha256=5SeM2hzLh77Ne8Vk7qR6-km7cf8bhov41ExpWITqX3A,11470
47
47
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
48
- compressed_tensors_nightly-0.9.2.20250326.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
- compressed_tensors_nightly-0.9.2.20250326.dist-info/METADATA,sha256=8mSl6Eo2-XJT8E-61HvVBpm0MeEycgqlV7xd-EDSzV8,7014
50
- compressed_tensors_nightly-0.9.2.20250326.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
51
- compressed_tensors_nightly-0.9.2.20250326.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
52
- compressed_tensors_nightly-0.9.2.20250326.dist-info/RECORD,,
48
+ compressed_tensors_nightly-0.9.2.20250328.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
+ compressed_tensors_nightly-0.9.2.20250328.dist-info/METADATA,sha256=a8qKM1jfhBRUQQjTZxApJbiogfTMhCVgoUfjSWDLyrQ,7014
50
+ compressed_tensors_nightly-0.9.2.20250328.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
51
+ compressed_tensors_nightly-0.9.2.20250328.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
52
+ compressed_tensors_nightly-0.9.2.20250328.dist-info/RECORD,,