PyPI - compressed-tensors - Versions diffs - 0.10.3a20250728__py3-none-any.whl → 0.10.3a20250731__py3-none-any.whl - Mend

compressed-tensors 0.10.3a20250728py3-none-any.whl → 0.10.3a20250731py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py CHANGED Viewed

@@ -61,6 +61,27 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
             "weight_global_scale",
         )
+    def compression_param_info(
+        self,
+        weight_shape: torch.Size,
+        quantization_args: Optional[QuantizationArgs] = None,
+    ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
+        """
+        Creates a dictionary of expected shapes and dtypes for each compression
+            parameter used by the compressor
+        :param weight_shape: uncompressed weight shape
+        :param quantization_args: quantization parameters for the weight
+        :return: dictionary mapping compressed parameter names to shape and dtype
+        """
+        output = {
+            "weight_packed": (
+                torch.Size((weight_shape[0], weight_shape[1] // 2)),
+                torch.uint8,
+            ),
+        }
+        return output
     def compress_weight(
         self,
         weight: Tensor,

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -257,13 +257,10 @@ def _process_quantization(
         QuantizationStrategy.GROUP,
         QuantizationStrategy.TENSOR_GROUP,
     ):
-        n_dims = x.shape
-        if len(n_dims) > 2:
-            x = x.squeeze(0)
         output_dtype = dtype if dtype is not None else x.dtype
         output = torch.zeros_like(x).to(output_dtype)
-        columns = output.shape[1]
+        columns = output.shape[-1]
         # TODO: make validation step for inputs
@@ -293,14 +290,12 @@ def _process_quantization(
             perm = torch.argsort(g_idx)
             x = safe_permute(x, perm, dim=1)
-        x = torch.reshape(
-            x,
-            (
-                x.shape[0],
-                ceil(x.shape[1] / group_size),
-                group_size,
-            ),
+        # Maintain all dimensions apart from the last dim, which is divided by the group_size
+        reshaped_dims = (
+            ceil(x.shape[-1] / group_size),
+            group_size,
         )
+        x = x.unflatten(-1, reshaped_dims)
         if do_quantize:
             output = _quantize(
@@ -323,19 +318,12 @@ def _process_quantization(
                 global_scale=global_scale,
             )
-        output = torch.reshape(
-            output,
-            (output.shape[0], output.shape[1] * output.shape[2]),
-        )
+        output = output.flatten(start_dim=-2)
         output = output.to(output_dtype)
         if not is_column_order:
             output = safe_permute(output, torch.argsort(perm), dim=1)
-        if len(n_dims) > 2:
-            output = output.unsqueeze(0)
     else:  # covers channel, token and tensor strategies
         if do_quantize:
             output = _quantize(

compressed_tensors/quantization/utils/helpers.py CHANGED Viewed

@@ -175,20 +175,16 @@ def compute_dynamic_scales_and_zp(
         QuantizationStrategy.TENSOR_GROUP,
         QuantizationStrategy.GROUP,
     ):
-        if len(value.shape) > 2:
-            value = value.squeeze(0)
-        dim = {0, 1}
-        reduce_dims = tuple(idx for idx in range(3) if idx not in dim)
+        reduce_dims = -1
         keep_dims = False
-        value = torch.reshape(
-            value,
-            (
-                value.shape[0],
-                math.ceil(value.shape[1] / args.group_size),
-                args.group_size,
-            ),
+        reshaped_dims = (
+            math.ceil(value.shape[-1] / args.group_size),
+            args.group_size,
         )
+        value = value.unflatten(-1, reshaped_dims)
     else:
         supported_strategies = (
             QuantizationStrategy.TOKEN,

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.10.3.a20250728'
+__version__ = version = '0.10.3.a20250731'
 __version_tuple__ = version_tuple = (0, 10, 3)

{compressed_tensors-0.10.3a20250728.dist-info → compressed_tensors-0.10.3a20250731.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.10.3a20250728
+Version: 0.10.3a20250731
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.10.3a20250728.dist-info → compressed_tensors-0.10.3a20250731.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
-compressed_tensors/version.py,sha256=EY3NpvLIsm31BPA-e32djbQIUYdm3sP8W28lHH72d0Y,523
+compressed_tensors/version.py,sha256=cuOuj6FL5GE-iPKjLVFuRjlwW0_6uDC3tDxFkkHyXFg,523
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -9,7 +9,7 @@ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=e-2n
 compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
 compressed_tensors/compressors/quantized_compressors/base.py,sha256=YGUMzbxekj_36ChgQnVZN6T8uDjXtGG1zfMIBGBLWco,10354
 compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
-compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Gw-lVzk5jrKUlM5UTCiJBmhM5gHzB9mn8r298MVUbDI,6395
+compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=tKEaYom4SdMwZWg4MDMMMLNGTLgcVT20lPzewboVpMM,7145
 compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=47W1hFTi5YHVNKEWptzztsSutwI1kxy2Troh-NW1y14,11244
 compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
 compressed_tensors/compressors/sparse_compressors/base.py,sha256=YNZWcHjDleAlqbgRZQ6oJf44MQb_UDNvJGOqhl26uFA,8098
@@ -32,11 +32,11 @@ compressed_tensors/quantization/quant_scheme.py,sha256=xk2LPn18tjS1PEOyf0WKvavBq
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
 compressed_tensors/quantization/lifecycle/apply.py,sha256=wM8mVcbKvZjBo18pSXMp28i30YWwUXJPSS7_HCakH9U,17892
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
-compressed_tensors/quantization/lifecycle/forward.py,sha256=V98jWzb3rfV91EC6kfzAyXtmnbLjNF01Rd_EHU2bLo8,17506
+compressed_tensors/quantization/lifecycle/forward.py,sha256=lQwibkDGroJqONhP9ATZWwaZF9suPmCZMQEagFlFc94,17329
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
 compressed_tensors/quantization/lifecycle/initialize.py,sha256=BM7bR_uNa-Ex4T-roHonWiRaxCi5sFysXyl0cFh1ZVs,10257
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
-compressed_tensors/quantization/utils/helpers.py,sha256=Je96Wai9SOizbdE5ph0nsJ86zS96lE4fkf_9q9o2tpA,17212
+compressed_tensors/quantization/utils/helpers.py,sha256=7a89X0kg6xDGplw6trOrkRQzMRPu-txY_qvEt07Vcgc,17036
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
 compressed_tensors/registry/registry.py,sha256=0s15BxdGgzBv8RL4kUJCYcuDOFUh_KZYvNvLEeRqWTc,11956
 compressed_tensors/transform/__init__.py,sha256=v2wfl4CMfA6KbD7Hxx_MbRev63y_6QLDlccZq-WTtdw,907
@@ -62,8 +62,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors-0.10.3a20250728.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.10.3a20250728.dist-info/METADATA,sha256=rQbbrFahVspKPEfY86EpebdjgoYAtSyyH7JLOPTPcrg,7031
-compressed_tensors-0.10.3a20250728.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.10.3a20250728.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.10.3a20250728.dist-info/RECORD,,
+compressed_tensors-0.10.3a20250731.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.10.3a20250731.dist-info/METADATA,sha256=1NCpfVbLTf6aGJ38rJz3Lmu9DptHpuYm5vTRxIB9PB8,7031
+compressed_tensors-0.10.3a20250731.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.10.3a20250731.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.10.3a20250731.dist-info/RECORD,,

{compressed_tensors-0.10.3a20250728.dist-info → compressed_tensors-0.10.3a20250731.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.10.3a20250728.dist-info → compressed_tensors-0.10.3a20250731.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.10.3a20250728.dist-info → compressed_tensors-0.10.3a20250731.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.10.3a20250728__py3-none-any.whl → 0.10.3a20250731__py3-none-any.whl

compressed-tensors 0.10.3a20250728py3-none-any.whl → 0.10.3a20250731py3-none-any.whl