compressed-tensors 0.9.4a20250408__py3-none-any.whl → 0.9.4a20250410__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -142,6 +142,18 @@ W4A16 = dict(
142
142
  ),
143
143
  )
144
144
 
145
+ # 4 bit integer weights only asymmetric quantization
146
+ W4A16_ASYM = dict(
147
+ weights=QuantizationArgs(
148
+ num_bits=4,
149
+ type=QuantizationType.INT,
150
+ strategy=QuantizationStrategy.GROUP,
151
+ group_size=128,
152
+ symmetric=False,
153
+ dynamic=False,
154
+ ),
155
+ )
156
+
145
157
  # 4 bit integer weights and 8 bit activations quantization
146
158
  INT8_W4A8 = dict(
147
159
  weights=QuantizationArgs(
@@ -205,6 +217,7 @@ PRESET_SCHEMES = {
205
217
  # Integer weight only schemes
206
218
  "W8A16": W8A16,
207
219
  "W4A16": W4A16,
220
+ "W4A16_ASYM": W4A16_ASYM,
208
221
  # Integer weight and activation schemes
209
222
  "W8A8": INT8_W8A8,
210
223
  "INT8": INT8_W8A8, # alias for W8A8
@@ -64,8 +64,11 @@ def calculate_qparams(
64
64
  :param quantization_args: settings to quantization
65
65
  :return: tuple of the calculated scale(s) and zero point(s)
66
66
  """
67
+ # based on the implementations for consuming quantized values,
68
+ # 0.0 must always be representable within the quantized range
67
69
  min_vals = torch.min(min_vals, torch.zeros_like(min_vals))
68
70
  max_vals = torch.max(max_vals, torch.zeros_like(max_vals))
71
+
69
72
  device = min_vals.device
70
73
 
71
74
  bit_min, bit_max = calculate_range(quantization_args, device)
@@ -84,6 +87,9 @@ def calculate_qparams(
84
87
  zero_points = torch.clamp(zero_points, bit_min, bit_max)
85
88
 
86
89
  # match zero-points to quantized type
90
+ # if casting to int, use round instead of truncate
91
+ if quantization_args.type == QuantizationType.INT:
92
+ zero_points = torch.round(zero_points)
87
93
  zero_points = zero_points.to(zp_dtype)
88
94
 
89
95
  if scales.ndim == 0:
@@ -96,7 +102,7 @@ def calculate_qparams(
96
102
  def compute_dynamic_scales_and_zp(value: Tensor, args: QuantizationArgs):
97
103
  """
98
104
  Returns the computed scales and zero points for dynamic activation
99
- qunatization.
105
+ quantization.
100
106
 
101
107
  :param value: tensor to calculate quantization parameters for
102
108
  :param args: quantization args
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.9.4a20250408'
20
+ __version__ = version = '0.9.4a20250410'
21
21
  __version_tuple__ = version_tuple = (0, 9, 4)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.4a20250408
3
+ Version: 0.9.4a20250410
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,6 +1,6 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
- compressed_tensors/version.py,sha256=dU0WxLg_un23vp7nx7GfLU01yj3Z9Aru2yP4cp8c0-c,520
3
+ compressed_tensors/version.py,sha256=ttCtltINyBJn0un6r4KDJzCF49mLYcu-E_khCO2nku8,520
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=x8dQrWVEurynXw03yHJZTaAmrRTOsdZJoHjmvs0IKwk,7002
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -27,7 +27,7 @@ compressed_tensors/linear/compressed_linear.py,sha256=_m6XpNcI53eeSHO8VdiuAM6UBT
27
27
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
28
28
  compressed_tensors/quantization/quant_args.py,sha256=sKpb8DcNObidjXjNol1Tn_Iih3ZXBycSp-fyz68TGhY,9117
29
29
  compressed_tensors/quantization/quant_config.py,sha256=MxSUcb5dOqMN6LFyD5K2h8X0TvEtcWIAoiUJqD2dHGE,10159
30
- compressed_tensors/quantization/quant_scheme.py,sha256=eQ0JrRZ80GX69fpwW87VzPzzhajhk4mUaJScjk82OY4,6010
30
+ compressed_tensors/quantization/quant_scheme.py,sha256=yz0oMbbwp7QZXXd2k5KIJu-Q6aTqg2929VdUzZ7vysM,6324
31
31
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
32
32
  compressed_tensors/quantization/lifecycle/apply.py,sha256=lZmCCSm1_o79iUAy460w6Bv9FaOvntVisMdS-dN9fnk,16594
33
33
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
@@ -35,7 +35,7 @@ compressed_tensors/quantization/lifecycle/forward.py,sha256=DOWouUqfaLA4Qhg-ojVV
35
35
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
36
36
  compressed_tensors/quantization/lifecycle/initialize.py,sha256=sK3PLm69N91QepBuq-83Qd2Br6XcOmRDpD5qo_WWNJo,7469
37
37
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
38
- compressed_tensors/quantization/utils/helpers.py,sha256=DBP-sGRpGAY01K0LFE7qqonNj4hkTYL_mXrMs2LtAD8,14100
38
+ compressed_tensors/quantization/utils/helpers.py,sha256=-wX0H7zVysJ67jRRCGbx6BfxbMU_1sqffTf5YUIpPiU,14391
39
39
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
40
40
  compressed_tensors/registry/registry.py,sha256=vRcjVB1ITfSbfYUaGndBBmqhip_5vsS62weorVg0iXo,11896
41
41
  compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
@@ -45,8 +45,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
45
45
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
46
46
  compressed_tensors/utils/safetensors_load.py,sha256=5SeM2hzLh77Ne8Vk7qR6-km7cf8bhov41ExpWITqX3A,11470
47
47
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
48
- compressed_tensors-0.9.4a20250408.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
- compressed_tensors-0.9.4a20250408.dist-info/METADATA,sha256=CXl80o7QymLek4-pwpCHF9L3-OgIowJ2KDmfi8r-YBs,7004
50
- compressed_tensors-0.9.4a20250408.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
51
- compressed_tensors-0.9.4a20250408.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
52
- compressed_tensors-0.9.4a20250408.dist-info/RECORD,,
48
+ compressed_tensors-0.9.4a20250410.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
+ compressed_tensors-0.9.4a20250410.dist-info/METADATA,sha256=EKD2EHV9dh7GHjeO25QvapLLR3JabT4WC04IU6K-cLg,7004
50
+ compressed_tensors-0.9.4a20250410.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
51
+ compressed_tensors-0.9.4a20250410.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
52
+ compressed_tensors-0.9.4a20250410.dist-info/RECORD,,