PyPI - compressed-tensors - Versions diffs - 0.9.4a20250408__py3-none-any.whl → 0.9.4a20250412__py3-none-any.whl - Mend

compressed-tensors 0.9.4a20250408py3-none-any.whl → 0.9.4a20250412py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -142,6 +142,18 @@ W4A16 = dict(
     ),
 )
+# 4 bit integer weights only asymmetric quantization
+W4A16_ASYM = dict(
+    weights=QuantizationArgs(
+        num_bits=4,
+        type=QuantizationType.INT,
+        strategy=QuantizationStrategy.GROUP,
+        group_size=128,
+        symmetric=False,
+        dynamic=False,
+    ),
+)
 # 4 bit integer weights and 8 bit activations quantization
 INT8_W4A8 = dict(
     weights=QuantizationArgs(
@@ -205,6 +217,7 @@ PRESET_SCHEMES = {
     # Integer weight only schemes
     "W8A16": W8A16,
     "W4A16": W4A16,
+    "W4A16_ASYM": W4A16_ASYM,
     # Integer weight and activation schemes
     "W8A8": INT8_W8A8,
     "INT8": INT8_W8A8,  # alias for W8A8

compressed_tensors/quantization/utils/helpers.py CHANGED Viewed

@@ -64,8 +64,11 @@ def calculate_qparams(
     :param quantization_args: settings to quantization
     :return: tuple of the calculated scale(s) and zero point(s)
     """
+    # based on the implementations for consuming quantized values,
+    # 0.0 must always be representable within the quantized range
     min_vals = torch.min(min_vals, torch.zeros_like(min_vals))
     max_vals = torch.max(max_vals, torch.zeros_like(max_vals))
     device = min_vals.device
     bit_min, bit_max = calculate_range(quantization_args, device)
@@ -84,6 +87,9 @@ def calculate_qparams(
         zero_points = torch.clamp(zero_points, bit_min, bit_max)
     # match zero-points to quantized type
+    # if casting to int, use round instead of truncate
+    if quantization_args.type == QuantizationType.INT:
+        zero_points = torch.round(zero_points)
     zero_points = zero_points.to(zp_dtype)
     if scales.ndim == 0:
@@ -96,7 +102,7 @@ def calculate_qparams(
 def compute_dynamic_scales_and_zp(value: Tensor, args: QuantizationArgs):
     """
     Returns the computed scales and zero points for dynamic activation
-    qunatization.
+    quantization.
     :param value: tensor to calculate quantization parameters for
     :param args: quantization args

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.9.4a20250408'
+__version__ = version = '0.9.4a20250412'
 __version_tuple__ = version_tuple = (0, 9, 4)

{compressed_tensors-0.9.4a20250408.dist-info → compressed_tensors-0.9.4a20250412.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.4a20250408
+Version: 0.9.4a20250412
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.4a20250408.dist-info → compressed_tensors-0.9.4a20250412.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
-compressed_tensors/version.py,sha256=dU0WxLg_un23vp7nx7GfLU01yj3Z9Aru2yP4cp8c0-c,520
+compressed_tensors/version.py,sha256=jx-1bgwpeUbFDFsDP16lBU9G8O_p6RDC_WNXPUkZkOM,520
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=x8dQrWVEurynXw03yHJZTaAmrRTOsdZJoHjmvs0IKwk,7002
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -27,7 +27,7 @@ compressed_tensors/linear/compressed_linear.py,sha256=_m6XpNcI53eeSHO8VdiuAM6UBT
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
 compressed_tensors/quantization/quant_args.py,sha256=sKpb8DcNObidjXjNol1Tn_Iih3ZXBycSp-fyz68TGhY,9117
 compressed_tensors/quantization/quant_config.py,sha256=MxSUcb5dOqMN6LFyD5K2h8X0TvEtcWIAoiUJqD2dHGE,10159
-compressed_tensors/quantization/quant_scheme.py,sha256=eQ0JrRZ80GX69fpwW87VzPzzhajhk4mUaJScjk82OY4,6010
+compressed_tensors/quantization/quant_scheme.py,sha256=yz0oMbbwp7QZXXd2k5KIJu-Q6aTqg2929VdUzZ7vysM,6324
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
 compressed_tensors/quantization/lifecycle/apply.py,sha256=lZmCCSm1_o79iUAy460w6Bv9FaOvntVisMdS-dN9fnk,16594
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
@@ -35,7 +35,7 @@ compressed_tensors/quantization/lifecycle/forward.py,sha256=DOWouUqfaLA4Qhg-ojVV
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
 compressed_tensors/quantization/lifecycle/initialize.py,sha256=sK3PLm69N91QepBuq-83Qd2Br6XcOmRDpD5qo_WWNJo,7469
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
-compressed_tensors/quantization/utils/helpers.py,sha256=DBP-sGRpGAY01K0LFE7qqonNj4hkTYL_mXrMs2LtAD8,14100
+compressed_tensors/quantization/utils/helpers.py,sha256=-wX0H7zVysJ67jRRCGbx6BfxbMU_1sqffTf5YUIpPiU,14391
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
 compressed_tensors/registry/registry.py,sha256=vRcjVB1ITfSbfYUaGndBBmqhip_5vsS62weorVg0iXo,11896
 compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
@@ -45,8 +45,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=5SeM2hzLh77Ne8Vk7qR6-km7cf8bhov41ExpWITqX3A,11470
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors-0.9.4a20250408.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.9.4a20250408.dist-info/METADATA,sha256=CXl80o7QymLek4-pwpCHF9L3-OgIowJ2KDmfi8r-YBs,7004
-compressed_tensors-0.9.4a20250408.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-compressed_tensors-0.9.4a20250408.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.9.4a20250408.dist-info/RECORD,,
+compressed_tensors-0.9.4a20250412.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.9.4a20250412.dist-info/METADATA,sha256=WQouxo47DknCnw5X2jTcfFUlrrsxRSQmMp_QS-P3mVQ,7004
+compressed_tensors-0.9.4a20250412.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+compressed_tensors-0.9.4a20250412.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.9.4a20250412.dist-info/RECORD,,

{compressed_tensors-0.9.4a20250408.dist-info → compressed_tensors-0.9.4a20250412.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.9.4a20250408.dist-info → compressed_tensors-0.9.4a20250412.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.9.4a20250408.dist-info → compressed_tensors-0.9.4a20250412.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.9.4a20250408__py3-none-any.whl → 0.9.4a20250412__py3-none-any.whl

compressed-tensors 0.9.4a20250408py3-none-any.whl → 0.9.4a20250412py3-none-any.whl