PyPI - compressed-tensors - Versions diffs - 0.10.3a20250731__py3-none-any.whl → 0.10.3a20250806__py3-none-any.whl - Mend

compressed-tensors 0.10.3a20250731py3-none-any.whl → 0.10.3a20250806py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -124,8 +124,13 @@ def dequantize(
                     strategy=QuantizationStrategy.GROUP, group_size=group_size
                 )
             else:
+                rows, cols = x_q.shape[-2], x_q.shape[-1]
+                block_height = rows // scale.shape[0]  # Rows per block
+                block_width = cols // scale.shape[1]  # Columns per block
                 args = QuantizationArgs(
-                    strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape
+                    strategy=QuantizationStrategy.BLOCK,
+                    block_structure=[block_height, block_width],
                 )
         else:
             raise ValueError(

compressed_tensors/transform/factory/base.py CHANGED Viewed

@@ -13,7 +13,8 @@
 # limitations under the License.
 from abc import ABC, abstractmethod
-from typing import Optional
+from collections import defaultdict
+from typing import List, Optional, Tuple, Set
 import torch
 import torch.nn.utils.parametrize as P
@@ -49,10 +50,13 @@ class TransformFactory(RegistryMixin, ABC):
     :param seed: random seed used to transform weight randomization
     """
+    transforms: List["TransformBase"]
     def __init__(self, name: str, scheme: TransformScheme, seed: Optional[int] = None):
         self.name = name
         self.scheme = scheme
         self.generator = torch.Generator()
+        self.transforms = list()
         if seed is not None:
             self.generator.manual_seed(seed)
@@ -90,6 +94,8 @@ class TransformFactory(RegistryMixin, ABC):
             for _, module in match_named_modules(model, arg.targets, arg.ignore):
                 self._apply_to_module(module, arg)
+        self._update_tied_weights()
     def _apply_to_module(self, module: Module, args: TransformArgs):
         """
         Create transforms and apply them to the module
@@ -97,9 +103,17 @@ class TransformFactory(RegistryMixin, ABC):
         :param module: target module to apply transforms to
         :param args: defines how the transform will be applied to the target module
         """
+        if has_offloaded_params(module):
+            if module._hf_hook.place_submodules:
+                raise NotImplementedError(
+                    "Applying transforms to offloaded submodules with "
+                    "`place_submodules=True` is not supported"
+                )
         # create transform as submodule
         transform_name = f"{self.name}_{args.location}"
         transform = self.create_transform(module, args)
+        self.transforms.append(transform)
         register_offload_module(module, transform_name, transform)
         # register input transformation hook
@@ -128,8 +142,9 @@ class TransformFactory(RegistryMixin, ABC):
                     raise ValueError("Offloaded training is not supported")
                 P.register_parametrization(module, "weight", transform)
-            # transform is no longer needed (unfusing is not supported)
-            delete_offload_module(module, transform_name)
+            else:
+                # transform is no longer needed (unfusing is not supported)
+                delete_offload_module(module, transform_name)
         # register output transformation hook
         elif args.location == TransformLocation.OUTPUT:
@@ -143,6 +158,31 @@ class TransformFactory(RegistryMixin, ABC):
         else:
             raise NotImplementedError()
+    def _update_tied_weights(self):
+        """
+        Populate the `_dynamic_tied_weights_keys` attribute of transforms,
+        which is used by transformers to detect and remove shared pointers
+        during saving
+        """
+        # map from data_ptrs to keys
+        ptr_to_keys: dict[int, List[Tuple[TransformBase, str]]] = defaultdict(list)
+        for transform in self.transforms:
+            for name, param in transform.named_parameters(recurse=False):
+                # NOTE: previously asserted that parent._hf_hook.place_submodules=False
+                if has_offloaded_params(transform):
+                    param = transform._hf_hook.weights_map[name]
+                ptr_to_keys[param.data_ptr()].append((transform, name))
+        # populate `_dynamic_tied_weights_keys` if there is more than one key
+        # and ensure that they share tensors
+        for shared_keys in ptr_to_keys.values():
+            if len(shared_keys) > 1:
+                tensor = getattr(shared_keys[0][0], shared_keys[0][1])
+                for transform, name in shared_keys:
+                    transform._dynamic_tied_weights_keys.add(name)
+                    setattr(transform, name, tensor)
 class TransformBase(InternalModule, ABC):
     """
@@ -151,6 +191,11 @@ class TransformBase(InternalModule, ABC):
     args: TransformArgs
     weight: Parameter
+    _dynamic_tied_weights_keys: Set[str]
+    def __init__(self):
+        super().__init__()
+        self._dynamic_tied_weights_keys = set()
     @abstractmethod
     def forward(self, value: Tensor) -> Tensor:

compressed_tensors/transform/factory/matrix_multiply.py CHANGED Viewed

@@ -70,6 +70,7 @@ class RandomMatrixFactory(TransformFactory):
     def _create_inverse(self, weight: Parameter) -> Parameter:
         data = high_precision_invert(weight.data)
+        data = data.contiguous()  # ensure proper serialization
         return Parameter(data, requires_grad=False)

compressed_tensors/utils/match.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import logging
 import re
 from collections.abc import Generator
-from typing import Iterable, Tuple
+from typing import Iterable, Mapping, Optional, Tuple
 import torch
 from compressed_tensors.utils.internal import InternalModule
@@ -32,10 +32,14 @@ __all__ = [
 ]
+FusedMappping = Mapping[str, Iterable[str]]
 def match_named_modules(
     model: torch.nn.Module,
     targets: Iterable[str],
     ignore: Iterable[str] = tuple(),
+    fused: Optional[FusedMappping] = None,
     warn_on_fail: bool = False,
 ) -> Generator[Tuple[str, torch.nn.Module]]:
     """
@@ -45,16 +49,18 @@ def match_named_modules(
     :param model: model containing submodules to match against
     :param targets: target strings, potentially containing "re:" prefixes
     :param ignore: targets to ignore, potentially containing "re:" prefixes
+    :fused: optional mapping from suffixes of fused modules to the suffixes of their
+        corresponding shards. See `compressed_tensors.utils.match.is_match`
     :param warn_on_fail: if True, warns if any targets do not match any modules in model
     :return: generator of module names and modules
     """
     unmatched_targets = set(targets)
     for name, module in model.named_modules():
         for target in targets:
-            if is_match(name, module, target):
+            if is_match(name, module, target, fused):
                 unmatched_targets -= {target}
-                if not any(is_match(name, module, ign) for ign in ignore):
+                if not any(is_match(name, module, ign, fused) for ign in ignore):
                     yield name, module
     if warn_on_fail:
@@ -68,6 +74,7 @@ def match_named_parameters(
     model: torch.nn.Module,
     targets: Iterable[str],
     ignore: Iterable[str] = tuple(),
+    fused: Optional[FusedMappping] = None,
     warn_on_fail: bool = False,
 ) -> Generator[Tuple[str, torch.nn.Module, torch.nn.Parameter]]:
     """
@@ -77,6 +84,8 @@ def match_named_parameters(
     :param model: model containing params to match against
     :param targets: target strings, potentially containing "re:" prefixes
     :param ignore: targets to ignore, potentially containing "re:" prefixes
+    :fused: optional mapping from suffixes of fused modules to the suffixes of their
+        corresponding shards. See `compressed_tensors.utils.match.is_match`
     :param warn_on_fail: if True, warns if any targets do not match any params in model
     :return: generator of fully-qualified param names, parent modules, and params
     """
@@ -88,10 +97,10 @@ def match_named_parameters(
         for param_name, param in module.named_parameters(recurse=False):
             param_fqn = f"{module_name}.{param_name}"
             for target in targets:
-                if _match_name(param_fqn, target):
+                if _match_name(param_fqn, target, fused):
                     unmatched_targets -= {target}
-                    if not any(_match_name(param_fqn, ign) for ign in ignore):
+                    if not any(_match_name(param_fqn, ign, fused) for ign in ignore):
                         yield param_fqn, module, param
     if warn_on_fail:
@@ -164,21 +173,56 @@ def match_modules_set(
         raise ValueError(f"Unable to match targets into set: {unmatched_keys}")
-def is_match(name: str, module: torch.nn.Module, target: str) -> bool:
+def is_match(
+    name: str,
+    module: torch.nn.Module,
+    target: str,
+    fused: Optional[FusedMappping] = None,
+) -> bool:
     """
     Returns true if either module name or module parent classes match against target
-    and the module is not an internal module
+    and the module is not an internal module. The name and module may refer to a fused
+    module defined by vLLM. In these cases, a `fused` mapping must be provided.
+    For example, in `vllm/model_executor/models/llama.py`:
+    ```python
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+        "gate_up_proj": ["gate_proj", "up_proj"]
+    }
+    ```
+    :param name: name of module
+    :param module: module to match
+    :param target: target which matches name or module, potentially contains regex
+    :fused: optional mapping from suffixes of fused modules to the suffixes of their
+        corresponding shards
     """
     return not isinstance(module, InternalModule) and (
-        _match_name(name, target) or _match_class(module, target)
+        _match_name(name, target, fused) or _match_class(module, target)
     )
-def _match_name(name: str, target: str) -> bool:
+def _match_name(name: str, target: str, fused: Optional[FusedMappping] = None) -> bool:
     """
-    Returns true if target string begins with "re:" and
-    regex matches or if target string exactly matches name
+    Returns true if target string begins with "re:" and regex matches or if target
+    string exactly matches name. If the name refers to a fused module defined by vLLM,
+    a `fused` mapping must be provided.
+    :param name: name of module
+    :param target: target name, potentially contains regex
+    :fused: optional mapping from suffixes of fused modules to the suffixes of their
+        corresponding shards
     """
+    if fused is not None:
+        for fused_suffix in fused:
+            if name.endswith(fused_suffix):
+                name_stripped = name.removesuffix(fused_suffix)
+                return any(
+                    _match_name(name_stripped + shard_suffix, target)
+                    for shard_suffix in fused[fused_suffix]
+                )
     if target.startswith("re:"):
         return re.match(target.removeprefix("re:"), name) is not None
     else:
@@ -187,10 +231,20 @@ def _match_name(name: str, target: str) -> bool:
 def _match_class(module: torch.nn.Module, target: str) -> bool:
     """
-    Returns true if any torch parent class names match the target string exactly
+    Returns true if any torch parent class names match the target string exactly.
+    A special exception is made for vllm's `LinearBase` class which matches `Linear`
+    :param module: module to match
+    :param target: target which matches name or module
     """
     # will never match against a regex pattern since `:` is not allowed in class names
     return any(
-        issubclass(cls, torch.nn.Module) and cls.__name__ == target
+        (
+            issubclass(cls, torch.nn.Module)
+            and (
+                cls.__name__ == target
+                or (cls.__name__ == "LinearBase" and target == "Linear")
+            )
+        )
         for cls in module.__class__.__mro__
     )

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.10.3.a20250731'
+__version__ = version = '0.10.3.a20250806'
 __version_tuple__ = version_tuple = (0, 10, 3)

{compressed_tensors-0.10.3a20250731.dist-info → compressed_tensors-0.10.3a20250806.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.10.3a20250731
+Version: 0.10.3a20250806
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.10.3a20250731.dist-info → compressed_tensors-0.10.3a20250806.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
-compressed_tensors/version.py,sha256=cuOuj6FL5GE-iPKjLVFuRjlwW0_6uDC3tDxFkkHyXFg,523
+compressed_tensors/version.py,sha256=AuoKIjSgjjAcZIPZe3HN5zhNJ7enhDAjwQrqUHPg76o,523
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -32,7 +32,7 @@ compressed_tensors/quantization/quant_scheme.py,sha256=xk2LPn18tjS1PEOyf0WKvavBq
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
 compressed_tensors/quantization/lifecycle/apply.py,sha256=wM8mVcbKvZjBo18pSXMp28i30YWwUXJPSS7_HCakH9U,17892
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
-compressed_tensors/quantization/lifecycle/forward.py,sha256=lQwibkDGroJqONhP9ATZWwaZF9suPmCZMQEagFlFc94,17329
+compressed_tensors/quantization/lifecycle/forward.py,sha256=HzfoRkK3CkEHuCqRWatq0kyu5sFx8ULZHNmmjRNIpWI,17571
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
 compressed_tensors/quantization/lifecycle/initialize.py,sha256=BM7bR_uNa-Ex4T-roHonWiRaxCi5sFysXyl0cFh1ZVs,10257
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
@@ -45,9 +45,9 @@ compressed_tensors/transform/transform_args.py,sha256=jJY-Qt996w45LWQ10AHd7tUtNr
 compressed_tensors/transform/transform_config.py,sha256=A3RuLNDqBNEByQNeu40Kg7sItwE6kWgnX18Umg1uONI,2128
 compressed_tensors/transform/transform_scheme.py,sha256=uGLC4avdbhrVqNC3-Eo0p7WzNRQK92Fpg0N9hWiuCRQ,1752
 compressed_tensors/transform/factory/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
-compressed_tensors/transform/factory/base.py,sha256=Zplf8QO-mFqGwDEhLdYL_afSu7v4nMa79oNhidRNPvY,5880
+compressed_tensors/transform/factory/base.py,sha256=NJ3lI95tJk6gHOeZEVheQ_Ae7NHhhUG_9FHXu613x30,7740
 compressed_tensors/transform/factory/hadamard.py,sha256=B0BVjbF3y707MO6L2XfEoZJTQU965vU9dUPLOiUSXII,4193
-compressed_tensors/transform/factory/matrix_multiply.py,sha256=LdoV2E12HTucmUWcw7UKOpRNnL8QhOOIUnNVlpOpGiI,3925
+compressed_tensors/transform/factory/matrix_multiply.py,sha256=kCB7cfM_PCgJDyyhg2d1rKTEiyuscwzhprXY7VfIx6E,3989
 compressed_tensors/transform/factory/random_hadamard.py,sha256=nUhTlFa4ikSpcl4Umme71pnjMPgwYoGlwjKlU27UHZ4,1634
 compressed_tensors/transform/utils/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
 compressed_tensors/transform/utils/hadamard.py,sha256=hDJZC0Gw2fKdxqa3f8TmFc5J0eJqxHtFRxswLU_yVJc,5548
@@ -56,14 +56,14 @@ compressed_tensors/transform/utils/matrix.py,sha256=FIHCUlpWVIIhdr3c6EbQec41JeiP
 compressed_tensors/utils/__init__.py,sha256=KZctuotCmX4byXhwDvSeXgp-Ny_awpziAX-WUkZfodI,853
 compressed_tensors/utils/helpers.py,sha256=Q3iRAa2XSdmmn4vSpUplnvKOmWwn4Clao9ZkPBHXtpI,12604
 compressed_tensors/utils/internal.py,sha256=7SSWgDoNFRnlfadwkoFhLW-T2jOc7Po_WzWv5h32Sa8,982
-compressed_tensors/utils/match.py,sha256=ZVBPzrGYExq7-6RRUlU5XeCjl0ooLaNUoDO6Cgnn9cY,7220
+compressed_tensors/utils/match.py,sha256=9x-yZIlq7ndSLf2aQwNT7IpBQDe-8H6utiJkji8wPrQ,9397
 compressed_tensors/utils/offload.py,sha256=3XiBuWbUkBAt8v1t5i57qDcbB3VJQs_FDeayi-JzIWg,23896
 compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors-0.10.3a20250731.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.10.3a20250731.dist-info/METADATA,sha256=1NCpfVbLTf6aGJ38rJz3Lmu9DptHpuYm5vTRxIB9PB8,7031
-compressed_tensors-0.10.3a20250731.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.10.3a20250731.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.10.3a20250731.dist-info/RECORD,,
+compressed_tensors-0.10.3a20250806.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.10.3a20250806.dist-info/METADATA,sha256=e8DIx-6UDn2Wj7fGLEBgVru2k9Tme9dOPgxS_ciZDcw,7031
+compressed_tensors-0.10.3a20250806.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.10.3a20250806.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.10.3a20250806.dist-info/RECORD,,

{compressed_tensors-0.10.3a20250731.dist-info → compressed_tensors-0.10.3a20250806.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.10.3a20250731.dist-info → compressed_tensors-0.10.3a20250806.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.10.3a20250731.dist-info → compressed_tensors-0.10.3a20250806.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.10.3a20250731__py3-none-any.whl → 0.10.3a20250806__py3-none-any.whl

compressed-tensors 0.10.3a20250731py3-none-any.whl → 0.10.3a20250806py3-none-any.whl