PyPI - compressed-tensors - Versions diffs - 0.10.3a20250728__py3-none-any.whl → 0.10.3a20250805__py3-none-any.whl - Mend

compressed-tensors 0.10.3a20250728py3-none-any.whl → 0.10.3a20250805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py CHANGED Viewed

@@ -61,6 +61,27 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
             "weight_global_scale",
         )
+    def compression_param_info(
+        self,
+        weight_shape: torch.Size,
+        quantization_args: Optional[QuantizationArgs] = None,
+    ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
+        """
+        Creates a dictionary of expected shapes and dtypes for each compression
+            parameter used by the compressor
+        :param weight_shape: uncompressed weight shape
+        :param quantization_args: quantization parameters for the weight
+        :return: dictionary mapping compressed parameter names to shape and dtype
+        """
+        output = {
+            "weight_packed": (
+                torch.Size((weight_shape[0], weight_shape[1] // 2)),
+                torch.uint8,
+            ),
+        }
+        return output
     def compress_weight(
         self,
         weight: Tensor,

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -124,8 +124,13 @@ def dequantize(
                     strategy=QuantizationStrategy.GROUP, group_size=group_size
                 )
             else:
+                rows, cols = x_q.shape[-2], x_q.shape[-1]
+                block_height = rows // scale.shape[0]  # Rows per block
+                block_width = cols // scale.shape[1]  # Columns per block
                 args = QuantizationArgs(
-                    strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape
+                    strategy=QuantizationStrategy.BLOCK,
+                    block_structure=[block_height, block_width],
                 )
         else:
             raise ValueError(
@@ -257,13 +262,10 @@ def _process_quantization(
         QuantizationStrategy.GROUP,
         QuantizationStrategy.TENSOR_GROUP,
     ):
-        n_dims = x.shape
-        if len(n_dims) > 2:
-            x = x.squeeze(0)
         output_dtype = dtype if dtype is not None else x.dtype
         output = torch.zeros_like(x).to(output_dtype)
-        columns = output.shape[1]
+        columns = output.shape[-1]
         # TODO: make validation step for inputs
@@ -293,14 +295,12 @@ def _process_quantization(
             perm = torch.argsort(g_idx)
             x = safe_permute(x, perm, dim=1)
-        x = torch.reshape(
-            x,
-            (
-                x.shape[0],
-                ceil(x.shape[1] / group_size),
-                group_size,
-            ),
+        # Maintain all dimensions apart from the last dim, which is divided by the group_size
+        reshaped_dims = (
+            ceil(x.shape[-1] / group_size),
+            group_size,
         )
+        x = x.unflatten(-1, reshaped_dims)
         if do_quantize:
             output = _quantize(
@@ -323,19 +323,12 @@ def _process_quantization(
                 global_scale=global_scale,
             )
-        output = torch.reshape(
-            output,
-            (output.shape[0], output.shape[1] * output.shape[2]),
-        )
+        output = output.flatten(start_dim=-2)
         output = output.to(output_dtype)
         if not is_column_order:
             output = safe_permute(output, torch.argsort(perm), dim=1)
-        if len(n_dims) > 2:
-            output = output.unsqueeze(0)
     else:  # covers channel, token and tensor strategies
         if do_quantize:
             output = _quantize(

compressed_tensors/quantization/utils/helpers.py CHANGED Viewed

@@ -175,20 +175,16 @@ def compute_dynamic_scales_and_zp(
         QuantizationStrategy.TENSOR_GROUP,
         QuantizationStrategy.GROUP,
     ):
-        if len(value.shape) > 2:
-            value = value.squeeze(0)
-        dim = {0, 1}
-        reduce_dims = tuple(idx for idx in range(3) if idx not in dim)
+        reduce_dims = -1
         keep_dims = False
-        value = torch.reshape(
-            value,
-            (
-                value.shape[0],
-                math.ceil(value.shape[1] / args.group_size),
-                args.group_size,
-            ),
+        reshaped_dims = (
+            math.ceil(value.shape[-1] / args.group_size),
+            args.group_size,
         )
+        value = value.unflatten(-1, reshaped_dims)
     else:
         supported_strategies = (
             QuantizationStrategy.TOKEN,

compressed_tensors/utils/match.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import logging
 import re
 from collections.abc import Generator
-from typing import Iterable, Tuple
+from typing import Iterable, Mapping, Optional, Tuple
 import torch
 from compressed_tensors.utils.internal import InternalModule
@@ -32,10 +32,14 @@ __all__ = [
 ]
+FusedMappping = Mapping[str, Iterable[str]]
 def match_named_modules(
     model: torch.nn.Module,
     targets: Iterable[str],
     ignore: Iterable[str] = tuple(),
+    fused: Optional[FusedMappping] = None,
     warn_on_fail: bool = False,
 ) -> Generator[Tuple[str, torch.nn.Module]]:
     """
@@ -45,16 +49,18 @@ def match_named_modules(
     :param model: model containing submodules to match against
     :param targets: target strings, potentially containing "re:" prefixes
     :param ignore: targets to ignore, potentially containing "re:" prefixes
+    :fused: optional mapping from suffixes of fused modules to the suffixes of their
+        corresponding shards. See `compressed_tensors.utils.match.is_match`
     :param warn_on_fail: if True, warns if any targets do not match any modules in model
     :return: generator of module names and modules
     """
     unmatched_targets = set(targets)
     for name, module in model.named_modules():
         for target in targets:
-            if is_match(name, module, target):
+            if is_match(name, module, target, fused):
                 unmatched_targets -= {target}
-                if not any(is_match(name, module, ign) for ign in ignore):
+                if not any(is_match(name, module, ign, fused) for ign in ignore):
                     yield name, module
     if warn_on_fail:
@@ -68,6 +74,7 @@ def match_named_parameters(
     model: torch.nn.Module,
     targets: Iterable[str],
     ignore: Iterable[str] = tuple(),
+    fused: Optional[FusedMappping] = None,
     warn_on_fail: bool = False,
 ) -> Generator[Tuple[str, torch.nn.Module, torch.nn.Parameter]]:
     """
@@ -77,6 +84,8 @@ def match_named_parameters(
     :param model: model containing params to match against
     :param targets: target strings, potentially containing "re:" prefixes
     :param ignore: targets to ignore, potentially containing "re:" prefixes
+    :fused: optional mapping from suffixes of fused modules to the suffixes of their
+        corresponding shards. See `compressed_tensors.utils.match.is_match`
     :param warn_on_fail: if True, warns if any targets do not match any params in model
     :return: generator of fully-qualified param names, parent modules, and params
     """
@@ -88,10 +97,10 @@ def match_named_parameters(
         for param_name, param in module.named_parameters(recurse=False):
             param_fqn = f"{module_name}.{param_name}"
             for target in targets:
-                if _match_name(param_fqn, target):
+                if _match_name(param_fqn, target, fused):
                     unmatched_targets -= {target}
-                    if not any(_match_name(param_fqn, ign) for ign in ignore):
+                    if not any(_match_name(param_fqn, ign, fused) for ign in ignore):
                         yield param_fqn, module, param
     if warn_on_fail:
@@ -164,21 +173,56 @@ def match_modules_set(
         raise ValueError(f"Unable to match targets into set: {unmatched_keys}")
-def is_match(name: str, module: torch.nn.Module, target: str) -> bool:
+def is_match(
+    name: str,
+    module: torch.nn.Module,
+    target: str,
+    fused: Optional[FusedMappping] = None,
+) -> bool:
     """
     Returns true if either module name or module parent classes match against target
-    and the module is not an internal module
+    and the module is not an internal module. The name and module may refer to a fused
+    module defined by vLLM. In these cases, a `fused` mapping must be provided.
+    For example, in `vllm/model_executor/models/llama.py`:
+    ```python
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+        "gate_up_proj": ["gate_proj", "up_proj"]
+    }
+    ```
+    :param name: name of module
+    :param module: module to match
+    :param target: target which matches name or module, potentially contains regex
+    :fused: optional mapping from suffixes of fused modules to the suffixes of their
+        corresponding shards
     """
     return not isinstance(module, InternalModule) and (
-        _match_name(name, target) or _match_class(module, target)
+        _match_name(name, target, fused) or _match_class(module, target)
     )
-def _match_name(name: str, target: str) -> bool:
+def _match_name(name: str, target: str, fused: Optional[FusedMappping] = None) -> bool:
     """
-    Returns true if target string begins with "re:" and
-    regex matches or if target string exactly matches name
+    Returns true if target string begins with "re:" and regex matches or if target
+    string exactly matches name. If the name refers to a fused module defined by vLLM,
+    a `fused` mapping must be provided.
+    :param name: name of module
+    :param target: target name, potentially contains regex
+    :fused: optional mapping from suffixes of fused modules to the suffixes of their
+        corresponding shards
     """
+    if fused is not None:
+        for fused_suffix in fused:
+            if name.endswith(fused_suffix):
+                name_stripped = name.removesuffix(fused_suffix)
+                return any(
+                    _match_name(name_stripped + shard_suffix, target)
+                    for shard_suffix in fused[fused_suffix]
+                )
     if target.startswith("re:"):
         return re.match(target.removeprefix("re:"), name) is not None
     else:
@@ -187,10 +231,20 @@ def _match_name(name: str, target: str) -> bool:
 def _match_class(module: torch.nn.Module, target: str) -> bool:
     """
-    Returns true if any torch parent class names match the target string exactly
+    Returns true if any torch parent class names match the target string exactly.
+    A special exception is made for vllm's `LinearBase` class which matches `Linear`
+    :param module: module to match
+    :param target: target which matches name or module
     """
     # will never match against a regex pattern since `:` is not allowed in class names
     return any(
-        issubclass(cls, torch.nn.Module) and cls.__name__ == target
+        (
+            issubclass(cls, torch.nn.Module)
+            and (
+                cls.__name__ == target
+                or (cls.__name__ == "LinearBase" and target == "Linear")
+            )
+        )
         for cls in module.__class__.__mro__
     )

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.10.3.a20250728'
+__version__ = version = '0.10.3.a20250805'
 __version_tuple__ = version_tuple = (0, 10, 3)

{compressed_tensors-0.10.3a20250728.dist-info → compressed_tensors-0.10.3a20250805.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.10.3a20250728
+Version: 0.10.3a20250805
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.10.3a20250728.dist-info → compressed_tensors-0.10.3a20250805.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
-compressed_tensors/version.py,sha256=EY3NpvLIsm31BPA-e32djbQIUYdm3sP8W28lHH72d0Y,523
+compressed_tensors/version.py,sha256=UcH3DkUtSV6xgd1l5QTWXLV_iWa7GzNrCWIOpZvkzkE,523
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -9,7 +9,7 @@ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=e-2n
 compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
 compressed_tensors/compressors/quantized_compressors/base.py,sha256=YGUMzbxekj_36ChgQnVZN6T8uDjXtGG1zfMIBGBLWco,10354
 compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
-compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Gw-lVzk5jrKUlM5UTCiJBmhM5gHzB9mn8r298MVUbDI,6395
+compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=tKEaYom4SdMwZWg4MDMMMLNGTLgcVT20lPzewboVpMM,7145
 compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=47W1hFTi5YHVNKEWptzztsSutwI1kxy2Troh-NW1y14,11244
 compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
 compressed_tensors/compressors/sparse_compressors/base.py,sha256=YNZWcHjDleAlqbgRZQ6oJf44MQb_UDNvJGOqhl26uFA,8098
@@ -32,11 +32,11 @@ compressed_tensors/quantization/quant_scheme.py,sha256=xk2LPn18tjS1PEOyf0WKvavBq
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
 compressed_tensors/quantization/lifecycle/apply.py,sha256=wM8mVcbKvZjBo18pSXMp28i30YWwUXJPSS7_HCakH9U,17892
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
-compressed_tensors/quantization/lifecycle/forward.py,sha256=V98jWzb3rfV91EC6kfzAyXtmnbLjNF01Rd_EHU2bLo8,17506
+compressed_tensors/quantization/lifecycle/forward.py,sha256=HzfoRkK3CkEHuCqRWatq0kyu5sFx8ULZHNmmjRNIpWI,17571
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
 compressed_tensors/quantization/lifecycle/initialize.py,sha256=BM7bR_uNa-Ex4T-roHonWiRaxCi5sFysXyl0cFh1ZVs,10257
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
-compressed_tensors/quantization/utils/helpers.py,sha256=Je96Wai9SOizbdE5ph0nsJ86zS96lE4fkf_9q9o2tpA,17212
+compressed_tensors/quantization/utils/helpers.py,sha256=7a89X0kg6xDGplw6trOrkRQzMRPu-txY_qvEt07Vcgc,17036
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
 compressed_tensors/registry/registry.py,sha256=0s15BxdGgzBv8RL4kUJCYcuDOFUh_KZYvNvLEeRqWTc,11956
 compressed_tensors/transform/__init__.py,sha256=v2wfl4CMfA6KbD7Hxx_MbRev63y_6QLDlccZq-WTtdw,907
@@ -56,14 +56,14 @@ compressed_tensors/transform/utils/matrix.py,sha256=FIHCUlpWVIIhdr3c6EbQec41JeiP
 compressed_tensors/utils/__init__.py,sha256=KZctuotCmX4byXhwDvSeXgp-Ny_awpziAX-WUkZfodI,853
 compressed_tensors/utils/helpers.py,sha256=Q3iRAa2XSdmmn4vSpUplnvKOmWwn4Clao9ZkPBHXtpI,12604
 compressed_tensors/utils/internal.py,sha256=7SSWgDoNFRnlfadwkoFhLW-T2jOc7Po_WzWv5h32Sa8,982
-compressed_tensors/utils/match.py,sha256=ZVBPzrGYExq7-6RRUlU5XeCjl0ooLaNUoDO6Cgnn9cY,7220
+compressed_tensors/utils/match.py,sha256=9x-yZIlq7ndSLf2aQwNT7IpBQDe-8H6utiJkji8wPrQ,9397
 compressed_tensors/utils/offload.py,sha256=3XiBuWbUkBAt8v1t5i57qDcbB3VJQs_FDeayi-JzIWg,23896
 compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors-0.10.3a20250728.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.10.3a20250728.dist-info/METADATA,sha256=rQbbrFahVspKPEfY86EpebdjgoYAtSyyH7JLOPTPcrg,7031
-compressed_tensors-0.10.3a20250728.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.10.3a20250728.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.10.3a20250728.dist-info/RECORD,,
+compressed_tensors-0.10.3a20250805.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.10.3a20250805.dist-info/METADATA,sha256=8SpvZ9SNB_DGL6L4I8QrtLczHtxI17ezOlwf6Ew_4R8,7031
+compressed_tensors-0.10.3a20250805.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.10.3a20250805.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.10.3a20250805.dist-info/RECORD,,

{compressed_tensors-0.10.3a20250728.dist-info → compressed_tensors-0.10.3a20250805.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.10.3a20250728.dist-info → compressed_tensors-0.10.3a20250805.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.10.3a20250728.dist-info → compressed_tensors-0.10.3a20250805.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.10.3a20250728__py3-none-any.whl → 0.10.3a20250805__py3-none-any.whl

compressed-tensors 0.10.3a20250728py3-none-any.whl → 0.10.3a20250805py3-none-any.whl