compressed-tensors 0.9.3__py3-none-any.whl → 0.9.4a20250410__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -230,10 +230,6 @@ class QuantizationConfig(BaseModel):
230
230
  group_name = "group_" + str(idx)
231
231
  config_groups[group_name] = scheme
232
232
 
233
- # TODO: this is incorrect in compressed mode, since we are overwriting the
234
- # original weight we lose the uncompressed bit_depth indo
235
- compression_ratio = calculate_compression_ratio(model)
236
-
237
233
  if format is None:
238
234
  if quantization_status == QuantizationStatus.COMPRESSED:
239
235
  format = CompressionFormat.int_quantized.value
@@ -244,7 +240,7 @@ class QuantizationConfig(BaseModel):
244
240
  config_groups=config_groups,
245
241
  quantization_status=quantization_status,
246
242
  kv_cache_scheme=kv_cache_scheme,
247
- global_compression_ratio=compression_ratio,
243
+ global_compression_ratio=None,
248
244
  format=format,
249
245
  ignore=consolidated_ignore,
250
246
  )
@@ -142,6 +142,18 @@ W4A16 = dict(
142
142
  ),
143
143
  )
144
144
 
145
+ # 4 bit integer weights only asymmetric quantization
146
+ W4A16_ASYM = dict(
147
+ weights=QuantizationArgs(
148
+ num_bits=4,
149
+ type=QuantizationType.INT,
150
+ strategy=QuantizationStrategy.GROUP,
151
+ group_size=128,
152
+ symmetric=False,
153
+ dynamic=False,
154
+ ),
155
+ )
156
+
145
157
  # 4 bit integer weights and 8 bit activations quantization
146
158
  INT8_W4A8 = dict(
147
159
  weights=QuantizationArgs(
@@ -205,6 +217,7 @@ PRESET_SCHEMES = {
205
217
  # Integer weight only schemes
206
218
  "W8A16": W8A16,
207
219
  "W4A16": W4A16,
220
+ "W4A16_ASYM": W4A16_ASYM,
208
221
  # Integer weight and activation schemes
209
222
  "W8A8": INT8_W8A8,
210
223
  "INT8": INT8_W8A8, # alias for W8A8
@@ -64,8 +64,11 @@ def calculate_qparams(
64
64
  :param quantization_args: settings to quantization
65
65
  :return: tuple of the calculated scale(s) and zero point(s)
66
66
  """
67
+ # based on the implementations for consuming quantized values,
68
+ # 0.0 must always be representable within the quantized range
67
69
  min_vals = torch.min(min_vals, torch.zeros_like(min_vals))
68
70
  max_vals = torch.max(max_vals, torch.zeros_like(max_vals))
71
+
69
72
  device = min_vals.device
70
73
 
71
74
  bit_min, bit_max = calculate_range(quantization_args, device)
@@ -84,6 +87,9 @@ def calculate_qparams(
84
87
  zero_points = torch.clamp(zero_points, bit_min, bit_max)
85
88
 
86
89
  # match zero-points to quantized type
90
+ # if casting to int, use round instead of truncate
91
+ if quantization_args.type == QuantizationType.INT:
92
+ zero_points = torch.round(zero_points)
87
93
  zero_points = zero_points.to(zp_dtype)
88
94
 
89
95
  if scales.ndim == 0:
@@ -96,7 +102,7 @@ def calculate_qparams(
96
102
  def compute_dynamic_scales_and_zp(value: Tensor, args: QuantizationArgs):
97
103
  """
98
104
  Returns the computed scales and zero points for dynamic activation
99
- qunatization.
105
+ quantization.
100
106
 
101
107
  :param value: tensor to calculate quantization parameters for
102
108
  :param args: quantization args
@@ -1,53 +1,21 @@
1
- # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing,
10
- # software distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
14
3
 
15
- """
16
- Functionality for storing and setting the version info for SparseML
17
- """
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
18
5
 
6
+ TYPE_CHECKING = False
7
+ if TYPE_CHECKING:
8
+ from typing import Tuple
9
+ from typing import Union
19
10
 
20
- version_base = "0.9.3"
21
- is_release = True # change to True to set the generated version as a release version
11
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
12
+ else:
13
+ VERSION_TUPLE = object
22
14
 
15
+ version: str
16
+ __version__: str
17
+ __version_tuple__: VERSION_TUPLE
18
+ version_tuple: VERSION_TUPLE
23
19
 
24
- def _generate_version(
25
- is_release: bool,
26
- version_base: str,
27
- ):
28
- from datetime import date
29
-
30
- if is_release:
31
- return version_base
32
- else:
33
- return f"{version_base}.{date.today().strftime('%Y%m%d')}"
34
-
35
-
36
- __all__ = [
37
- "__version__",
38
- "version_base",
39
- "is_release",
40
- "version",
41
- "version_major",
42
- "version_minor",
43
- "version_patch",
44
- "version_build",
45
- "version_major_minor",
46
- ]
47
- __version__ = _generate_version(is_release, version_base)
48
-
49
- version = __version__
50
- version_major, version_minor, version_patch, version_build = version.split(".") + (
51
- [None] if len(version.split(".")) < 4 else []
52
- ) # handle conditional for version being 3 parts or 4 (4 containing build date)
53
- version_major_minor = f"{version_major}.{version_minor}"
20
+ __version__ = version = '0.9.4a20250410'
21
+ __version_tuple__ = version_tuple = (0, 9, 4)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.3
3
+ Version: 0.9.4a20250410
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -64,7 +64,7 @@ pip install compressed-tensors
64
64
 
65
65
  Nightly release:
66
66
  ```bash
67
- pip install compressed-tensors-nightly
67
+ pip install --pre compressed-tensors
68
68
  ```
69
69
 
70
70
  ### From Source
@@ -1,6 +1,6 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
- compressed_tensors/version.py,sha256=X4y5lqlF1QFUgl25iumzagpg3dzyVoLP6i82HZEhCJA,1585
3
+ compressed_tensors/version.py,sha256=ttCtltINyBJn0un6r4KDJzCF49mLYcu-E_khCO2nku8,520
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=x8dQrWVEurynXw03yHJZTaAmrRTOsdZJoHjmvs0IKwk,7002
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -26,8 +26,8 @@ compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajC
26
26
  compressed_tensors/linear/compressed_linear.py,sha256=_m6XpNcI53eeSHO8VdiuAM6UBTdpDhn5Ivd8iRMwEKc,3980
27
27
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
28
28
  compressed_tensors/quantization/quant_args.py,sha256=sKpb8DcNObidjXjNol1Tn_Iih3ZXBycSp-fyz68TGhY,9117
29
- compressed_tensors/quantization/quant_config.py,sha256=vx06wBo91p4LCb3Vzd-2eCTUeIf_Sz2ZXRP263eQyjQ,10385
30
- compressed_tensors/quantization/quant_scheme.py,sha256=eQ0JrRZ80GX69fpwW87VzPzzhajhk4mUaJScjk82OY4,6010
29
+ compressed_tensors/quantization/quant_config.py,sha256=MxSUcb5dOqMN6LFyD5K2h8X0TvEtcWIAoiUJqD2dHGE,10159
30
+ compressed_tensors/quantization/quant_scheme.py,sha256=yz0oMbbwp7QZXXd2k5KIJu-Q6aTqg2929VdUzZ7vysM,6324
31
31
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
32
32
  compressed_tensors/quantization/lifecycle/apply.py,sha256=lZmCCSm1_o79iUAy460w6Bv9FaOvntVisMdS-dN9fnk,16594
33
33
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
@@ -35,7 +35,7 @@ compressed_tensors/quantization/lifecycle/forward.py,sha256=DOWouUqfaLA4Qhg-ojVV
35
35
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
36
36
  compressed_tensors/quantization/lifecycle/initialize.py,sha256=sK3PLm69N91QepBuq-83Qd2Br6XcOmRDpD5qo_WWNJo,7469
37
37
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
38
- compressed_tensors/quantization/utils/helpers.py,sha256=DBP-sGRpGAY01K0LFE7qqonNj4hkTYL_mXrMs2LtAD8,14100
38
+ compressed_tensors/quantization/utils/helpers.py,sha256=-wX0H7zVysJ67jRRCGbx6BfxbMU_1sqffTf5YUIpPiU,14391
39
39
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
40
40
  compressed_tensors/registry/registry.py,sha256=vRcjVB1ITfSbfYUaGndBBmqhip_5vsS62weorVg0iXo,11896
41
41
  compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
@@ -45,8 +45,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
45
45
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
46
46
  compressed_tensors/utils/safetensors_load.py,sha256=5SeM2hzLh77Ne8Vk7qR6-km7cf8bhov41ExpWITqX3A,11470
47
47
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
48
- compressed_tensors-0.9.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
- compressed_tensors-0.9.3.dist-info/METADATA,sha256=zs3aFaG-BGV9hqJbW9Zwzex0TVcM5sPZhiaeVx2qjR0,6997
50
- compressed_tensors-0.9.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
51
- compressed_tensors-0.9.3.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
52
- compressed_tensors-0.9.3.dist-info/RECORD,,
48
+ compressed_tensors-0.9.4a20250410.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
+ compressed_tensors-0.9.4a20250410.dist-info/METADATA,sha256=EKD2EHV9dh7GHjeO25QvapLLR3JabT4WC04IU6K-cLg,7004
50
+ compressed_tensors-0.9.4a20250410.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
51
+ compressed_tensors-0.9.4a20250410.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
52
+ compressed_tensors-0.9.4a20250410.dist-info/RECORD,,