PyPI - compressed-tensors - Versions diffs - 0.12.3a20251114__py3-none-any.whl → 0.12.3a20251203__py3-none-any.whl - Mend

compressed-tensors 0.12.3a20251114py3-none-any.whl → 0.12.3a20251203py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

compressed_tensors/utils/match.py CHANGED Viewed

@@ -13,7 +13,9 @@
 # limitations under the License.
 import logging
+import os
 import re
+from collections import defaultdict
 from collections.abc import Generator
 from typing import Iterable, List, Mapping, Optional, Tuple, Union
@@ -29,6 +31,7 @@ __all__ = [
     "match_named_parameters",
     "match_targets",
     "match_modules_set",
+    "get_lowest_common_ancestor_name",
     "is_match",
     "is_narrow_match",
 ]
@@ -157,34 +160,68 @@ def match_targets(
     return matched_targets
+def get_lowest_common_ancestor_name(names: list[str | None]) -> str:
+    """
+    Given a list of names, returns the lowest-scope common name ignoring Nones.
+    Implementation is a small alteration of os.path.commonprefix
+    https://docs.python.org/3/library/os.path.html#os.path.commonprefix
+    ([s1, s2]->prefix->result)
+    # case 0: multiple modules: [abc.a., abc.b.] -> .abc. -> abc
+    # case 1: single module: [abc.] -> .abc. -> abc
+    # case 2: substring modules: [abc., ab.] -> .ab -> ""
+    # case 3: parent & child: [ab., ab.a.] -> .ab. -> ab
+    """
+    names = [name for name in names if name is not None]
+    if len(names) == 0:
+        return ""
+    # 1) find longest shared prefix
+    s1 = "." + min(names) + "."
+    s2 = "." + max(names) + "."
+    common_prefix = os.path.commonprefix([s1, s2])
+    # 2) throw away right most dot and name fragment, throw away leftmost char
+    # ".keep.thro" -> "keep", "." -> ""
+    return common_prefix[1 : common_prefix.rfind(".")]
 def match_modules_set(
     model: torch.nn.Module,
     targets: Optional[Iterable[str]],
     ignore: Optional[Iterable[str]] = None,
-) -> Generator[Iterable[torch.nn.Module]]:
+    error_on_module_rematch: bool = True,
+) -> Generator[List[List[torch.nn.Module]]]:
     """
-    Yields modules grouped with the same order and size as `targets`.
-    Values are returned in order of `model.named_modules()`
+    Yields modules grouped by parent context.
+    We group by parent context so that we can return ALL matches of a
+    specific target that can be paired with another target. This is most
+    relevant in the case of MoE modules with multiple modules for each
+    expert i.e. post_attention_layernorm <-> mlp.expert.N.gate_proj,
+    mlp.expert.N.up_proj for all N. The parent context will differ from
+    one layer to another while being the same for one expert to another.
-    E.g. the following targets would yield module belonging to the following layers:
+    Each returned group is a list (of lists) with the same size
+    and order as `targets` while all matches for each target and
+    the overall order of the groups are ordered in the same way
+    as `model.named_modules`
+    E.g. the following targets would yield modules belonging to the following layers:
     ```python3
     match_modules_set(model, ["q_proj", "k_proj", "v_proj"]) == (
-        (
-            `model.layers.0.self_attn.q_proj`,
-            `model.layers.0.self_attn.k_proj`,
-            `model.layers.0.self_attn.v_proj`,
-        ),
-        (
-            `model.layers.1.self_attn.q_proj`,
-            `model.layers.1.self_attn.k_proj`,
-            `model.layers.1.self_attn.v_proj`,
-        ),
+        [
+            [`layers.0.self_attn.q_proj`],
+            [`layers.0.self_attn.k_proj`],
+            [`layers.0.self_attn.v_proj`],
+        ],
+        [
+            [`layers.1.self_attn.q_proj`],
+            [`layers.1.self_attn.k_proj`],
+            [`layers.1.self_attn.v_proj`],
+        ],
         ...
-        (
-            `model.layers.32.self_attn.q_proj`,
-            `model.layers.32.self_attn.k_proj`,
-            `model.layers.32.self_attn.v_proj`,
-        ),
     )
     ```
@@ -192,33 +229,125 @@ def match_modules_set(
     For example, matching layer norms to their subsequent linear layers
     ```python3
     for norm, q, k, v in match_modules_set(model, (norm_tgt, q_tgt, k_tgt, v_tgt)):
-        fuse_norm_linears(norm, [q, k, v])
+        fuse_norm_linears(*norm, [*q, *k, *v])
+    ```
+    Alternatively for MoE you would get multiple matches
+    per target per group, E.g.
+    ```python3
+    targets = [
+        "post_attention_layernorm",
+        "up_proj",
+        "down_proj"
+    ]
+    match_modules_set(model, targets) == (
+        [
+            [layers.0.post_attention_layernorm],
+            [
+                `layers.0.mlp.experts.0.up_proj`,
+                `layers.0.mlp.experts.1.up_proj`,
+                ...
+            ],
+            [
+                `layers.0.mlp.experts.0.down_proj`,
+                `layers.0.mlp.experts.1.down_proj`,
+                ...
+            ]
+        ], # <- first yield
+        [
+            [layers.1.post_attention_layernorm],
+            [
+                `layers.1.mlp.experts.0.up_proj`,
+                `layers.1.mlp.experts.1.up_proj`,
+                ...
+            ],
+            [
+                `layers.1.mlp.experts.0.down_proj`,
+                `layers.1.mlp.experts.1.down_proj`,
+                ...
+            ]
+        ],
+        ...
+    )
+    ```
     :param model: model containing modules to match against
     :param targets: target strings, potentially containing "re:" prefixes
     :param ignore: targets to ignore, potentially containing "re:" prefixes
+    :param error_on_module_rematch: if True, errors when a module gets
+      matched to multiple targets, if False, no error. (Defaults to True)
     """
     targets = targets or []
     ignore = ignore or []
-    matches = dict.fromkeys(targets, None)
+    # as we iterate through modules and try to match them with targets,
+    # the algorithm can be in 2 possible states:
+    # 0) unmatched_targets > 0, i.e. some of the targets haven't been matched.
+    #   Keep matching until all targets have at least one match
+    # 1) unmatched_targets == 0 i.e. we have at least one match for each target.
+    #   At this point we are unsure if we have a full set or if we need to add
+    #   more matches.
+    # There are 3 things that can happen once were in state 1:
+    # A) found a new match with same parent_context,
+    #   (add it to matches and keep going)
+    # B) found a new match with different parent_context, i.e. we found a match
+    #   that requires a deeper parent context, this indicates that this match
+    #   should be part of a new set.
+    #   (yield current set [not including newest match] and go back to state 0)
+    # C) ran out of modules, we will always yield the final remaining set when
+    #   we we've iterated through all the modules in the model.
+    #   (yield final set then exit.)
+    # Note: its possible to iterate through all the modules in the model while
+    #   not having a full matched set if the user specified a bad matching, in
+    #   that case something has gone wrong and we error
+    matches = defaultdict(list)
+    parent_context = None
+    unmatched_targets = set(targets)
     for name, module in model.named_modules():
-        # match until we get a full set
+        matched_targets_for_cur_module = set()
         for target in targets:
             if is_match(name, module, target, ignore):
-                if matches[target] is not None:
-                    raise ValueError(f"Matched a {target} twice before completing set")
-                matches[target] = module
-        # once we have a full set, yield and reset
-        if targets and all((matches[target] is not None for target in targets)):
-            yield [matches[target] for target in targets]  # ensure correct ordering
-            matches = dict.fromkeys(targets, None)
-    # check that none are left over
-    unmatched_keys = [match for match, value in matches.items() if value is not None]
-    if len(unmatched_keys):
-        raise ValueError(f"Unable to match targets into set: {unmatched_keys}")
+                new_parent_context = get_lowest_common_ancestor_name(
+                    [name, parent_context]
+                )
+                # code for (B)
+                if not unmatched_targets and new_parent_context != parent_context:
+                    yield [matches[target] for target in targets]
+                    matches = defaultdict(list)
+                    new_parent_context = name
+                    unmatched_targets = set(targets)
+                matches[target].append(module)
+                parent_context = new_parent_context
+                unmatched_targets -= {target}
+                matched_targets_for_cur_module |= {target}
+        if len(matched_targets_for_cur_module) > 1 and error_on_module_rematch:
+            raise ValueError(
+                f"module: {name} was matched with multiple targets: "
+                f"{matched_targets_for_cur_module} which is unexpected "
+                "disable this check by setting `error_on_module_rematch = False`"
+            )
+    # never found anything
+    if unmatched_targets == set(targets):
+        return
+    # code for (C)
+    if not unmatched_targets:  # have a full matching
+        yield [matches[target] for target in targets]
+        return
+    raise ValueError(
+        f"Found a final incomplete set with matches found for keys: "
+        f"{set(targets) - unmatched_targets} "
+        f"but no matches found for keys: {unmatched_targets}"
+    )
 def is_match(

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.12.3.a20251114'
+__version__ = version = '0.12.3.a20251203'
 __version_tuple__ = version_tuple = (0, 12, 3)

{compressed_tensors-0.12.3a20251114.dist-info → compressed_tensors-0.12.3a20251203.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.12.3a20251114
+Version: 0.12.3a20251203
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/vllm-project/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.12.3a20251114.dist-info → compressed_tensors-0.12.3a20251203.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 compressed_tensors/__init__.py,sha256=SRqNYFVvxAaLa4SImhoiIBKfoOSj7EUdx0CxXjGC2PA,884
 compressed_tensors/base.py,sha256=dKAVgQAp9GPH6YspvF_cbGXCrbiqAeLEIPydYAO40WE,859
 compressed_tensors/logger.py,sha256=sTm1Od1cV0aDxBm3YN-PPvsOATxY_2tBV62TQE4HiPw,4032
-compressed_tensors/version.py,sha256=_76JjfEalYtLnwlx_1vHVRHYO4_7nPpez11U9pkUbyk,523
+compressed_tensors/version.py,sha256=muVSrnf9zuwEcHxyznnbC_TivRxUkdpFuvlk05CdEcA,523
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -63,14 +63,14 @@ compressed_tensors/transform/utils/matrix.py,sha256=BapkVu1763cN1VPP0ukvSzmG0dHo
 compressed_tensors/utils/__init__.py,sha256=eXvtlJEUiV4XPcfsxVrOwL7DyY8r-L0XG_Rr5qmZrmU,850
 compressed_tensors/utils/helpers.py,sha256=WHYh8yxMsmG2HxcfNVzcMLC-dtgWroRZaLcreADmUYE,15562
 compressed_tensors/utils/internal.py,sha256=7SSWgDoNFRnlfadwkoFhLW-T2jOc7Po_WzWv5h32Sa8,982
-compressed_tensors/utils/match.py,sha256=g1K6x56LoCGVR_sA25MOpja_U_V6_MZ-6cSY_Q_IauY,12320
+compressed_tensors/utils/match.py,sha256=wlNvl_x8QYpd8pyiAsRrJITHAKihRpi_pC_1iNi5UgI,17120
 compressed_tensors/utils/offload.py,sha256=eXqLzl8kUkVDlNtcO5sn_4QoDcbAaxbCAS3tyZ-aGr8,23538
 compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
 compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
 compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
-compressed_tensors-0.12.3a20251114.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.12.3a20251114.dist-info/METADATA,sha256=_GmxiVbPvm29hVkbhLwcHbFVKEnTq8RJPLVRIuFKyQQ,7027
-compressed_tensors-0.12.3a20251114.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.12.3a20251114.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.12.3a20251114.dist-info/RECORD,,
+compressed_tensors-0.12.3a20251203.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.12.3a20251203.dist-info/METADATA,sha256=SH2vzxy3ZkciavF49vLmX-6dMl07CIKvkljmba5ArcQ,7027
+compressed_tensors-0.12.3a20251203.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.12.3a20251203.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.12.3a20251203.dist-info/RECORD,,

{compressed_tensors-0.12.3a20251114.dist-info → compressed_tensors-0.12.3a20251203.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.12.3a20251114.dist-info → compressed_tensors-0.12.3a20251203.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.12.3a20251114.dist-info → compressed_tensors-0.12.3a20251203.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.12.3a20251114__py3-none-any.whl → 0.12.3a20251203__py3-none-any.whl

compressed-tensors 0.12.3a20251114py3-none-any.whl → 0.12.3a20251203py3-none-any.whl