PyPI - compressed-tensors - Versions diffs - 0.10.3a20250724__py3-none-any.whl → 0.10.3a20250731__py3-none-any.whl - Mend

compressed-tensors 0.10.3a20250724py3-none-any.whl → 0.10.3a20250731py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py CHANGED Viewed

@@ -61,6 +61,27 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
             "weight_global_scale",
         )
+    def compression_param_info(
+        self,
+        weight_shape: torch.Size,
+        quantization_args: Optional[QuantizationArgs] = None,
+    ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
+        """
+        Creates a dictionary of expected shapes and dtypes for each compression
+            parameter used by the compressor
+        :param weight_shape: uncompressed weight shape
+        :param quantization_args: quantization parameters for the weight
+        :return: dictionary mapping compressed parameter names to shape and dtype
+        """
+        output = {
+            "weight_packed": (
+                torch.Size((weight_shape[0], weight_shape[1] // 2)),
+                torch.uint8,
+            ),
+        }
+        return output
     def compress_weight(
         self,
         weight: Tensor,

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -112,17 +112,21 @@ def dequantize(
             if scale.shape[1] == 1:
                 args = QuantizationArgs(strategy=QuantizationStrategy.CHANNEL)
             # Scale height matches input or is 1 -> group quantization across columns
-            #
+            #
             # Example 1: scale.shape[0] == 1
             # x_q: (4, 8), scale: (1, 4) -> 2 columns per group
             #
-            # Example 2: scale.shape[0] == x_q.shape[0]
+            # Example 2: scale.shape[0] == x_q.shape[0]
             # x_q: (4, 8), scale: (4, 4) -> 2 elements per group (per row)
             elif (scale.shape[0] == 1) or (scale.shape[0] == x_q.shape[0]):
                 group_size = int(x_q.shape[1] / scale.shape[1])
-                args = QuantizationArgs(strategy=QuantizationStrategy.GROUP, group_size=group_size)
+                args = QuantizationArgs(
+                    strategy=QuantizationStrategy.GROUP, group_size=group_size
+                )
             else:
-                args = QuantizationArgs(strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape)
+                args = QuantizationArgs(
+                    strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape
+                )
         else:
             raise ValueError(
                 f"Could not infer a quantization strategy from scale with {scale.ndim} "
@@ -253,13 +257,10 @@ def _process_quantization(
         QuantizationStrategy.GROUP,
         QuantizationStrategy.TENSOR_GROUP,
     ):
-        n_dims = x.shape
-        if len(n_dims) > 2:
-            x = x.squeeze(0)
         output_dtype = dtype if dtype is not None else x.dtype
         output = torch.zeros_like(x).to(output_dtype)
-        columns = output.shape[1]
+        columns = output.shape[-1]
         # TODO: make validation step for inputs
@@ -289,14 +290,12 @@ def _process_quantization(
             perm = torch.argsort(g_idx)
             x = safe_permute(x, perm, dim=1)
-        x = torch.reshape(
-            x,
-            (
-                x.shape[0],
-                ceil(x.shape[1] / group_size),
-                group_size,
-            ),
+        # Maintain all dimensions apart from the last dim, which is divided by the group_size
+        reshaped_dims = (
+            ceil(x.shape[-1] / group_size),
+            group_size,
         )
+        x = x.unflatten(-1, reshaped_dims)
         if do_quantize:
             output = _quantize(
@@ -319,19 +318,12 @@ def _process_quantization(
                 global_scale=global_scale,
             )
-        output = torch.reshape(
-            output,
-            (output.shape[0], output.shape[1] * output.shape[2]),
-        )
+        output = output.flatten(start_dim=-2)
         output = output.to(output_dtype)
         if not is_column_order:
             output = safe_permute(output, torch.argsort(perm), dim=1)
-        if len(n_dims) > 2:
-            output = output.unsqueeze(0)
     else:  # covers channel, token and tensor strategies
         if do_quantize:
             output = _quantize(

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -185,27 +185,29 @@ def _initialize_scale_zero_point(
         elif quantization_args.strategy == QuantizationStrategy.BLOCK:
             # For block quantization, scale shape should match number of blocks - only for weights
             if quantization_args.block_structure is None:
-                raise ValueError("Block quantization requires block_structure to be specified")
+                raise ValueError(
+                    "Block quantization requires block_structure to be specified"
+                )
             block_height, block_width = quantization_args.block_structure
             rows, cols = weight_shape[-2], weight_shape[-1]
             num_rows_blocks = math.ceil(rows / block_height)
             num_cols_blocks = math.ceil(cols / block_width)
             # Warn if dimensions don't divide evenly
             if rows % block_height != 0 or cols % block_width != 0:
                 warnings.warn(
                     f"Block quantization: tensor shape {weight_shape} does not divide evenly "
                     f"by block structure {quantization_args.block_structure}. "
                     f"Some blocks will be incomplete which may affect quantization quality.",
-                    UserWarning
+                    UserWarning,
                 )
             expected_shape = (num_rows_blocks, num_cols_blocks)
     elif quantization_args.strategy == QuantizationStrategy.BLOCK:
         warnings.warn(
             f"BLOCK quantization not supported for {base_name} activations. "
             f"Falling back to tensor-level quantization.",
-            UserWarning
+            UserWarning,
         )
         expected_shape = 1

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -64,8 +64,9 @@ class QuantizationScheme(BaseModel):
                 raise ValueError("Cannot apply actorder to output activations")
         if (
-            inputs and weights
-            and weights.strategy == QuantizationStrategy.GROUP
+            inputs
+            and weights
+            and weights.strategy == QuantizationStrategy.GROUP
             and inputs.strategy == QuantizationStrategy.GROUP
             and weights.group_size != inputs.group_size
         ):
@@ -75,7 +76,7 @@ class QuantizationScheme(BaseModel):
                 "may complicate fused kernel implementations. Consider using "
                 "TENSOR_GROUP strategy for both or matching group sizes.",
                 UserWarning,
-                stacklevel=2
+                stacklevel=2,
             )
         return model

compressed_tensors/quantization/utils/helpers.py CHANGED Viewed

@@ -175,20 +175,16 @@ def compute_dynamic_scales_and_zp(
         QuantizationStrategy.TENSOR_GROUP,
         QuantizationStrategy.GROUP,
     ):
-        if len(value.shape) > 2:
-            value = value.squeeze(0)
-        dim = {0, 1}
-        reduce_dims = tuple(idx for idx in range(3) if idx not in dim)
+        reduce_dims = -1
         keep_dims = False
-        value = torch.reshape(
-            value,
-            (
-                value.shape[0],
-                math.ceil(value.shape[1] / args.group_size),
-                args.group_size,
-            ),
+        reshaped_dims = (
+            math.ceil(value.shape[-1] / args.group_size),
+            args.group_size,
         )
+        value = value.unflatten(-1, reshaped_dims)
     else:
         supported_strategies = (
             QuantizationStrategy.TOKEN,

compressed_tensors/transform/factory/hadamard.py CHANGED Viewed

@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import math
 from typing import Optional, Union
-import math
 import torch
 from compressed_tensors.transform import TransformArgs, TransformScheme
 from compressed_tensors.transform.factory.base import TransformBase, TransformFactory
@@ -103,7 +103,8 @@ class HadamardTransform(TransformBase):
         if self.args.inverse:
             weight = weight.T
-        return apply_transform_weight(
-            weight, value, self.args.location, self.module_type
-        ) / self._scale
+        return (
+            apply_transform_weight(weight, value, self.args.location, self.module_type)
+            / self._scale
+        )

compressed_tensors/utils/match.py CHANGED Viewed

@@ -18,6 +18,7 @@ from collections.abc import Generator
 from typing import Iterable, Tuple
 import torch
+from compressed_tensors.utils.internal import InternalModule
 _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -28,8 +29,6 @@ __all__ = [
     "match_named_parameters",
     "match_modules_set",
     "is_match",
-    "match_name",
-    "match_class",
 ]
@@ -83,13 +82,16 @@ def match_named_parameters(
     """
     unmatched_targets = set(targets)
     for module_name, module in model.named_modules():
+        if isinstance(module, InternalModule):
+            continue
         for param_name, param in module.named_parameters(recurse=False):
             param_fqn = f"{module_name}.{param_name}"
             for target in targets:
-                if match_name(param_fqn, target):
+                if _match_name(param_fqn, target):
                     unmatched_targets -= {target}
-                    if not any(match_name(param_fqn, ign) for ign in ignore):
+                    if not any(_match_name(param_fqn, ign) for ign in ignore):
                         yield param_fqn, module, param
     if warn_on_fail:
@@ -165,11 +167,14 @@ def match_modules_set(
 def is_match(name: str, module: torch.nn.Module, target: str) -> bool:
     """
     Returns true if either module name or module parent classes match against target
+    and the module is not an internal module
     """
-    return match_name(name, target) or match_class(module, target)
+    return not isinstance(module, InternalModule) and (
+        _match_name(name, target) or _match_class(module, target)
+    )
-def match_name(name: str, target: str) -> bool:
+def _match_name(name: str, target: str) -> bool:
     """
     Returns true if target string begins with "re:" and
     regex matches or if target string exactly matches name
@@ -180,7 +185,7 @@ def match_name(name: str, target: str) -> bool:
         return target == name
-def match_class(module: torch.nn.Module, target: str) -> bool:
+def _match_class(module: torch.nn.Module, target: str) -> bool:
     """
     Returns true if any torch parent class names match the target string exactly
     """

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.10.3.a20250724'
+__version__ = version = '0.10.3.a20250731'
 __version_tuple__ = version_tuple = (0, 10, 3)

{compressed_tensors-0.10.3a20250724.dist-info → compressed_tensors-0.10.3a20250731.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.10.3a20250724
+Version: 0.10.3a20250731
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.10.3a20250724.dist-info → compressed_tensors-0.10.3a20250731.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
-compressed_tensors/version.py,sha256=LKiXh8O_XB2unUsk0HmC-_PgpfbOswj5PZqtDHOPnRg,523
+compressed_tensors/version.py,sha256=cuOuj6FL5GE-iPKjLVFuRjlwW0_6uDC3tDxFkkHyXFg,523
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -9,7 +9,7 @@ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=e-2n
 compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
 compressed_tensors/compressors/quantized_compressors/base.py,sha256=YGUMzbxekj_36ChgQnVZN6T8uDjXtGG1zfMIBGBLWco,10354
 compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
-compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Gw-lVzk5jrKUlM5UTCiJBmhM5gHzB9mn8r298MVUbDI,6395
+compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=tKEaYom4SdMwZWg4MDMMMLNGTLgcVT20lPzewboVpMM,7145
 compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=47W1hFTi5YHVNKEWptzztsSutwI1kxy2Troh-NW1y14,11244
 compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
 compressed_tensors/compressors/sparse_compressors/base.py,sha256=YNZWcHjDleAlqbgRZQ6oJf44MQb_UDNvJGOqhl26uFA,8098
@@ -28,15 +28,15 @@ compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
 compressed_tensors/quantization/quant_args.py,sha256=yKTj_4lAy_pnXeTCyUADpyz2qAzJXYJU2P03NF_TP68,12835
 compressed_tensors/quantization/quant_config.py,sha256=w6sEEZGVGIF0Ub2r_cqRfZwbkBT8WzfY3ug52olmjGY,10049
-compressed_tensors/quantization/quant_scheme.py,sha256=qApRLsPxELe5S2qFv8OVyAZ5TpRL7gT35i4U3c9PAwI,8461
+compressed_tensors/quantization/quant_scheme.py,sha256=xk2LPn18tjS1PEOyf0WKvavBq3rzAVHFLB3H2mQQWnc,8473
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
 compressed_tensors/quantization/lifecycle/apply.py,sha256=wM8mVcbKvZjBo18pSXMp28i30YWwUXJPSS7_HCakH9U,17892
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
-compressed_tensors/quantization/lifecycle/forward.py,sha256=jT70Mbbu9pH10vu5ALVD7VWGoFdMEUpxmihGrf4frjM,17432
+compressed_tensors/quantization/lifecycle/forward.py,sha256=lQwibkDGroJqONhP9ATZWwaZF9suPmCZMQEagFlFc94,17329
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
-compressed_tensors/quantization/lifecycle/initialize.py,sha256=3Vuj1a-Y7f_7QXagG7BAeAPnDGtbWGFJXBATg6eT-O0,10241
+compressed_tensors/quantization/lifecycle/initialize.py,sha256=BM7bR_uNa-Ex4T-roHonWiRaxCi5sFysXyl0cFh1ZVs,10257
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
-compressed_tensors/quantization/utils/helpers.py,sha256=Je96Wai9SOizbdE5ph0nsJ86zS96lE4fkf_9q9o2tpA,17212
+compressed_tensors/quantization/utils/helpers.py,sha256=7a89X0kg6xDGplw6trOrkRQzMRPu-txY_qvEt07Vcgc,17036
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
 compressed_tensors/registry/registry.py,sha256=0s15BxdGgzBv8RL4kUJCYcuDOFUh_KZYvNvLEeRqWTc,11956
 compressed_tensors/transform/__init__.py,sha256=v2wfl4CMfA6KbD7Hxx_MbRev63y_6QLDlccZq-WTtdw,907
@@ -46,7 +46,7 @@ compressed_tensors/transform/transform_config.py,sha256=A3RuLNDqBNEByQNeu40Kg7sI
 compressed_tensors/transform/transform_scheme.py,sha256=uGLC4avdbhrVqNC3-Eo0p7WzNRQK92Fpg0N9hWiuCRQ,1752
 compressed_tensors/transform/factory/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
 compressed_tensors/transform/factory/base.py,sha256=Zplf8QO-mFqGwDEhLdYL_afSu7v4nMa79oNhidRNPvY,5880
-compressed_tensors/transform/factory/hadamard.py,sha256=iJ2OyKitR2Duw0z5Jqj69GTih2C1WtHRXQCTtATaTtw,4180
+compressed_tensors/transform/factory/hadamard.py,sha256=B0BVjbF3y707MO6L2XfEoZJTQU965vU9dUPLOiUSXII,4193
 compressed_tensors/transform/factory/matrix_multiply.py,sha256=LdoV2E12HTucmUWcw7UKOpRNnL8QhOOIUnNVlpOpGiI,3925
 compressed_tensors/transform/factory/random_hadamard.py,sha256=nUhTlFa4ikSpcl4Umme71pnjMPgwYoGlwjKlU27UHZ4,1634
 compressed_tensors/transform/utils/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
@@ -56,14 +56,14 @@ compressed_tensors/transform/utils/matrix.py,sha256=FIHCUlpWVIIhdr3c6EbQec41JeiP
 compressed_tensors/utils/__init__.py,sha256=KZctuotCmX4byXhwDvSeXgp-Ny_awpziAX-WUkZfodI,853
 compressed_tensors/utils/helpers.py,sha256=Q3iRAa2XSdmmn4vSpUplnvKOmWwn4Clao9ZkPBHXtpI,12604
 compressed_tensors/utils/internal.py,sha256=7SSWgDoNFRnlfadwkoFhLW-T2jOc7Po_WzWv5h32Sa8,982
-compressed_tensors/utils/match.py,sha256=DjqTH-J9-E7ULVXPLV-HBRhdi07JhK-H90PbFK-DRAY,7017
+compressed_tensors/utils/match.py,sha256=ZVBPzrGYExq7-6RRUlU5XeCjl0ooLaNUoDO6Cgnn9cY,7220
 compressed_tensors/utils/offload.py,sha256=3XiBuWbUkBAt8v1t5i57qDcbB3VJQs_FDeayi-JzIWg,23896
 compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors-0.10.3a20250724.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.10.3a20250724.dist-info/METADATA,sha256=ZH66sWeKBfvuLUe-ArnII1LYXG3UAEYUt6D6YPQ_W-M,7031
-compressed_tensors-0.10.3a20250724.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.10.3a20250724.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.10.3a20250724.dist-info/RECORD,,
+compressed_tensors-0.10.3a20250731.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.10.3a20250731.dist-info/METADATA,sha256=1NCpfVbLTf6aGJ38rJz3Lmu9DptHpuYm5vTRxIB9PB8,7031
+compressed_tensors-0.10.3a20250731.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.10.3a20250731.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.10.3a20250731.dist-info/RECORD,,

{compressed_tensors-0.10.3a20250724.dist-info → compressed_tensors-0.10.3a20250731.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.10.3a20250724.dist-info → compressed_tensors-0.10.3a20250731.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.10.3a20250724.dist-info → compressed_tensors-0.10.3a20250731.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.10.3a20250724__py3-none-any.whl → 0.10.3a20250731__py3-none-any.whl

compressed-tensors 0.10.3a20250724py3-none-any.whl → 0.10.3a20250731py3-none-any.whl