compressed-tensors 0.13.0__py3-none-any.whl → 0.13.1a20260108__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,7 @@ import torch
19
19
  from compressed_tensors.config import SparsityCompressionConfig
20
20
  from compressed_tensors.quantization import QuantizationArgs, QuantizationConfig
21
21
  from compressed_tensors.registry import RegistryMixin
22
- from compressed_tensors.utils import has_offloaded_params
22
+ from compressed_tensors.utils import has_offloaded_params, register_offload_parameter
23
23
  from torch import Tensor
24
24
  from torch.nn import Module
25
25
 
@@ -185,10 +185,16 @@ class BaseCompressor(RegistryMixin, ABC):
185
185
  for name, parameter in module.named_parameters():
186
186
  compressed_data[name] = parameter
187
187
 
188
- return self.decompress_weight(
188
+ decompressed_weight = self.decompress_weight(
189
189
  compressed_data=compressed_data, quantization_args=quantization_args
190
190
  ).to(device)
191
191
 
192
+ for name in ("weight_scale", "weight_zero_point"):
193
+ if hasattr(module, name):
194
+ register_offload_parameter(module, name, compressed_data[name])
195
+
196
+ return decompressed_weight
197
+
192
198
  def decompress_weight(
193
199
  self, compressed_data: Dict[str, Tensor], **kwargs
194
200
  ) -> torch.Tensor:
@@ -36,6 +36,7 @@ from compressed_tensors.config import CompressionFormat, SparsityCompressionConf
36
36
  from compressed_tensors.config.format import (
37
37
  infer_and_set_per_module_quantization_format,
38
38
  )
39
+ from compressed_tensors.linear.compressed_linear import CompressedLinear
39
40
  from compressed_tensors.quantization import (
40
41
  DEFAULT_QUANTIZATION_METHOD,
41
42
  QuantizationConfig,
@@ -474,6 +475,9 @@ class ModelCompressor:
474
475
  ),
475
476
  desc="Compressing model",
476
477
  ):
478
+ if isinstance(module, CompressedLinear):
479
+ continue # already compressed
480
+
477
481
  module_device = get_execution_device(module)
478
482
  is_meta = module_device.type == "meta"
479
483
 
@@ -127,6 +127,11 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
127
127
  m, n = weight.shape
128
128
  # TODO: use a user provided dequant dtype
129
129
  unpacked = unpack_fp4_from_uint8(weight, m, n * 2)
130
+
131
+ # decompress scale
132
+ scale = scale.to(unpacked.dtype)
133
+ compressed_data["weight_scale"] = torch.nn.Parameter(scale, requires_grad=False)
134
+
130
135
  decompressed_weight = dequantize(
131
136
  x_q=unpacked, scale=scale, global_scale=global_scale, dtype=unpacked.dtype
132
137
  )
@@ -175,6 +175,10 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
175
175
  zero_point = unpack_from_int32(
176
176
  zero_point, num_bits, original_zp_shape, packed_dim=0
177
177
  )
178
+ # Update the compressed_data dict with unpacked zero_point
179
+ compressed_data["weight_zero_point"] = torch.nn.Parameter(
180
+ zero_point, requires_grad=False
181
+ )
178
182
 
179
183
  decompressed_weight = dequantize(
180
184
  x_q=unpacked, scale=scale, zero_point=zero_point, g_idx=g_idx
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.13.0'
21
- __version_tuple__ = version_tuple = (0, 13, 0)
20
+ __version__ = version = '0.13.1.a20260108'
21
+ __version_tuple__ = version_tuple = (0, 13, 1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.13.0
3
+ Version: 0.13.1a20260108
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/vllm-project/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,17 +1,17 @@
1
1
  compressed_tensors/__init__.py,sha256=SRqNYFVvxAaLa4SImhoiIBKfoOSj7EUdx0CxXjGC2PA,884
2
2
  compressed_tensors/base.py,sha256=dKAVgQAp9GPH6YspvF_cbGXCrbiqAeLEIPydYAO40WE,859
3
3
  compressed_tensors/logger.py,sha256=sTm1Od1cV0aDxBm3YN-PPvsOATxY_2tBV62TQE4HiPw,4032
4
- compressed_tensors/version.py,sha256=irlt-ETr2vtyV_bhp1DlVRDx0Kx9LTwnA5UW4gdE8ZA,513
4
+ compressed_tensors/version.py,sha256=D9Tr1A6Ar4IfaCyjHY6AXHVcNJ-8knOr1SKszH6r8ZM,523
5
5
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
6
- compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
6
+ compressed_tensors/compressors/base.py,sha256=Ou_BVtGlCq9hSs1WxkRXXSnNkaqLECWptT8xyqs1EBY,7461
7
7
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
8
8
  compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
9
- compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=zPe3T0hyHuvIzSXHBWHPiqJ3sQcdVY3tgwF1aBeG7oo,38044
9
+ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=Xh_3e4GO_ByuCH-FzHmSknLLaKqJSZSCN-mFpQnBcB8,38218
10
10
  compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=Yzje89SRQ6VdlNFhob2sG8afXbSzAHaJyTwRNAzP7g8,743
11
11
  compressed_tensors/compressors/quantized_compressors/base.py,sha256=eO-9HSzrI1yOsP462BzOlg71CTPLIi60qobOyEocHxY,10347
12
- compressed_tensors/compressors/quantized_compressors/fp4_quantized.py,sha256=eApXKF0CoUOSyo5HtQZTelnNjGZEwLXsqN0i1tPKESA,8332
12
+ compressed_tensors/compressors/quantized_compressors/fp4_quantized.py,sha256=jaG3_xL55YjpT_uh9-xSKLVChykQt4WXezIyrNXvEHg,8491
13
13
  compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
14
- compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=lvEA403q3mzpJLz-YFP94MHZzQCH0nNkgA0u8GzodXs,10978
14
+ compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=SljqFdcqchwX_N2sJmxvb59KSYW6OZXyVRk0g0sewjc,11182
15
15
  compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
16
16
  compressed_tensors/compressors/sparse_compressors/base.py,sha256=YNZWcHjDleAlqbgRZQ6oJf44MQb_UDNvJGOqhl26uFA,8098
17
17
  compressed_tensors/compressors/sparse_compressors/dense.py,sha256=-OujJ1e0iXBvxYVULrIGvAZ9l-IC0mXczZRnimQdgo4,2314
@@ -69,8 +69,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
69
69
  compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
70
70
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
71
71
  compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
72
- compressed_tensors-0.13.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
73
- compressed_tensors-0.13.0.dist-info/METADATA,sha256=UYfXtfT2KmVRBDA_Wa_OWk9H6es1P2M3_gTrEeoxn_E,7018
74
- compressed_tensors-0.13.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
75
- compressed_tensors-0.13.0.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
76
- compressed_tensors-0.13.0.dist-info/RECORD,,
72
+ compressed_tensors-0.13.1a20260108.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
73
+ compressed_tensors-0.13.1a20260108.dist-info/METADATA,sha256=xb0w2YRYhgiXU9NrSMgsVgQMN7Tapw3WZ2oFBtt-QnA,7027
74
+ compressed_tensors-0.13.1a20260108.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
75
+ compressed_tensors-0.13.1a20260108.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
76
+ compressed_tensors-0.13.1a20260108.dist-info/RECORD,,