PyPI - compressed-tensors - Versions diffs - 0.9.5a20250530__py3-none-any.whl → 0.9.5a20250602__py3-none-any.whl - Mend

compressed-tensors 0.9.5a20250530py3-none-any.whl → 0.9.5a20250602py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -227,31 +227,42 @@ def _process_quantization(
             perm = torch.argsort(g_idx)
             x = safe_permute(x, perm, dim=1)
-        # TODO: experiment with vectorizing for loop for performance
-        end = 0
-        for index, group_count in enumerate(group_sizes):
-            sc = scale[:, index].view(-1, 1)
-            zp = zero_point[:, index].view(-1, 1) if zero_point is not None else None
-            start = end
-            end = start + group_count
-            if do_quantize:
-                output[:, start:end] = _quantize(
-                    x=x[:, start:end],
-                    scale=sc,
-                    zero_point=zp,
-                    q_min=q_min,
-                    q_max=q_max,
-                    args=args,
-                    dtype=dtype,
-                    global_scale=global_scale,
-                )
+        x = torch.reshape(
+            x,
+            (
+                x.shape[0],
+                ceil(x.shape[1] / group_size),
+                group_size,
+            ),
+        )
-            if do_dequantize:
-                input = output[:, start:end] if do_quantize else x[:, start:end]
-                output[:, start:end] = _dequantize(
-                    x_q=input, scale=sc, zero_point=zp, global_scale=global_scale
-                )
+        if do_quantize:
+            output = _quantize(
+                x=x,
+                scale=scale.unsqueeze(-1),
+                zero_point=zero_point.unsqueeze(-1) if zero_point is not None else None,
+                dtype=dtype,
+                global_scale=global_scale,
+                q_min=q_min,
+                q_max=q_max,
+                args=args,
+            )
+        if do_dequantize:
+            input = output if do_quantize else x
+            output = _dequantize(
+                x_q=input,
+                scale=scale.unsqueeze(-1),
+                zero_point=zero_point.unsqueeze(-1) if zero_point is not None else None,
+                global_scale=global_scale,
+            )
+        output = torch.reshape(
+            output,
+            (output.shape[0], output.shape[1] * output.shape[2]),
+        )
+        output = output.to(output_dtype)
         if not is_column_order:
             output = safe_permute(output, torch.argsort(perm), dim=1)

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -53,6 +53,7 @@ class FP4_E2M1_DATA(FloatArgs):
     min = -6.0
     @staticmethod
+    @torch.compile
     def cast_to_fp4(x):
         sign = torch.sign(x)
         x = torch.abs(x)

compressed_tensors/quantization/utils/helpers.py CHANGED Viewed

@@ -81,7 +81,7 @@ def calculate_qparams(
         currently only applied/supported for Fp4
     :return: tuple of the calculated scale(s) and zero point(s). For FP4, the calculated
-        scale if of dtype FP8
+        scale is of dtype FP8
     """
     # based on the implementations for consuming quantized values,
     # 0.0 must always be representable within the quantized range
@@ -490,7 +490,6 @@ def generate_global_scale(
     attempts to use the entire FP8 dtype range while mapping a per-group max
     to the FP4 max.
     """
-    scale_dtype = scale_data.dtype
     tensor_amax = torch.abs(input_tensor.data).max().to(dtype)
     global_scale = scale_data.max * quant_data.max / tensor_amax
     return global_scale.to(dtype)

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.9.5.a20250530'
+__version__ = version = '0.9.5.a20250602'
 __version_tuple__ = version_tuple = (0, 9, 5)

{compressed_tensors-0.9.5a20250530.dist-info → compressed_tensors-0.9.5a20250602.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250530
+Version: 0.9.5a20250602
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250530.dist-info → compressed_tensors-0.9.5a20250602.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
-compressed_tensors/version.py,sha256=BwDcUUpFaOn_-cMqdBWktPf89WCzFmESpx94d8qAUZM,521
+compressed_tensors/version.py,sha256=HRt5ki0v2pIyEYRWy0pzJwoSKwmkkF4pTx8ox29EyvU,521
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -26,17 +26,17 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
 compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
 compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn43Mn4CoV-q4JlTJ_o,4052
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
-compressed_tensors/quantization/quant_args.py,sha256=huROC8fbY899EYa2MnEmujvcBeHYLpn-e8ZEViEFASo,11804
+compressed_tensors/quantization/quant_args.py,sha256=2OpiiSdl4KidzNmjx7J8UlQoAYmt5k5GdXv_73ELw0A,11823
 compressed_tensors/quantization/quant_config.py,sha256=aFi6PKqmEX9iP9O8GVn3mEUjRDEwk_hOCbmmiq-j9oU,10198
 compressed_tensors/quantization/quant_scheme.py,sha256=IDWa1GWUbUdWCo8j78Jz6svYF5hLz89J2PVYWBBnXRc,7102
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
 compressed_tensors/quantization/lifecycle/apply.py,sha256=-OKZ-FFFfIIoeGTrho8lXx6HVWZQp3Xkn3Q-G0hU-CM,18294
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
-compressed_tensors/quantization/lifecycle/forward.py,sha256=65USJEtsp_n8X36L5y4g4ftMnhrQyRWbwKJ8RZMMiBo,14797
+compressed_tensors/quantization/lifecycle/forward.py,sha256=WFwvNebxXNUlpX5p1xG80oa8W9fz4-Xd6LCH_B_nptg,14881
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
 compressed_tensors/quantization/lifecycle/initialize.py,sha256=976sZ45ywGVzH1n4pyVhG7hnUBP1wKEWoo9cHrmKHxU,12522
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
-compressed_tensors/quantization/utils/helpers.py,sha256=I-bJcMdBFXjIUQEpnxMMN_FfQyXjojpe5w7ZIKSZ5UU,17588
+compressed_tensors/quantization/utils/helpers.py,sha256=UXs7mTItMdNyGh9NcsztO7qqxG1pmtNQqaAj_bsa7m8,17553
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
 compressed_tensors/registry/registry.py,sha256=0s15BxdGgzBv8RL4kUJCYcuDOFUh_KZYvNvLEeRqWTc,11956
 compressed_tensors/transform/__init__.py,sha256=oa5VdrE-GtDYYceXNSwj5X_ropoXLLukm6Aufcc9WhY,747
@@ -50,8 +50,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors-0.9.5a20250530.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.9.5a20250530.dist-info/METADATA,sha256=avjHgMxk1vnX09YKjerSCov-X8mTckulmJV1xQyLk5I,7004
-compressed_tensors-0.9.5a20250530.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.9.5a20250530.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.9.5a20250530.dist-info/RECORD,,
+compressed_tensors-0.9.5a20250602.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.9.5a20250602.dist-info/METADATA,sha256=U1CBsVaqy32z2gjdWS4xVTMmlIAxk9BweHUhk51l338,7004
+compressed_tensors-0.9.5a20250602.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.9.5a20250602.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.9.5a20250602.dist-info/RECORD,,

{compressed_tensors-0.9.5a20250530.dist-info → compressed_tensors-0.9.5a20250602.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250530.dist-info → compressed_tensors-0.9.5a20250602.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250530.dist-info → compressed_tensors-0.9.5a20250602.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.9.5a20250530__py3-none-any.whl → 0.9.5a20250602__py3-none-any.whl

compressed-tensors 0.9.5a20250530py3-none-any.whl → 0.9.5a20250602py3-none-any.whl