PyPI - compressed-tensors - Versions diffs - 0.10.0__py3-none-any.whl → 0.10.1__py3-none-any.whl - Mend

compressed-tensors 0.10.0py3-none-any.whl → 0.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

compressed_tensors/transform/transform_args.py CHANGED Viewed

@@ -13,15 +13,31 @@
 # limitations under the License.
 from enum import Enum
-from typing import Any, List
+from typing import List
 from pydantic import BaseModel, Field, field_validator
-__all__ = ["TransformArgs"]
+__all__ = ["TransformArgs", "TransformLocation"]
 class TransformLocation(str, Enum):
+    """
+    Enum representing which parameters/activations a transform weight should be applied
+    to on a given module.
+    | -------------------------------------------------------------------------------------------------------- |  # noqa: E501
+    | Name            | Runtime     | Values        | Locations Where Inverse Could Be Applied                 |  # noqa: E501
+    | --------------- | ----------- | ------------- | -------------------------------------------------------- |  # noqa: E501
+    | `INPUT`         | online      | activations   | `prev.WEIGHT_OUTPUT`, `prev.OUTPUT`, `this.WEIGHT_INPUT` |  # noqa: E501
+    | `WEIGHT_INPUT`  | offline     | weight        | `prev.WEIGHT_OUTPUT`, `prev.OUTPUT`, `this.INPUT`        |  # noqa: E501
+    | `WEIGHT_OUTPUT` | offline     | weight        | `this.OUTPUT`, `next.INPUT`, `next.WEIGHT_INPUT`         |  # noqa: E501
+    | `OUTPUT`        | online      | activations   | `this.WEIGHT_OUTPUT`, `next.INPUT`, `next.WEIGHT_INPUT`  |  # noqa: E501
+    | `K_CACHE`       | online      | key_values    | `q_proj.Q_ATTN`                                          |  # noqa: E501
+    | `Q_ATTN`        | online      | query_values  | `k_proj.K_CACHE`                                         |  # noqa: E501
+    | -------------------------------------------------------------------------------------------------------- |  # noqa: E501
+    """
     INPUT = "input"
     WEIGHT_INPUT = "weight_input"
     WEIGHT_OUTPUT = "weight_output"

compressed_tensors/transform/utils/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

compressed_tensors/transform/utils/hadamard.py ADDED Viewed

@@ -0,0 +1,161 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+from typing import Optional, Tuple
+import numpy
+import torch
+__all__ = ["random_hadamard_matrix", "deterministic_hadamard_matrix"]
+# adapted from:
+# https://github.com/scipy/scipy/blob/v1.15.2/scipy/linalg/_special_matrices.py
+def deterministic_hadamard_matrix(size: int) -> torch.Tensor:
+    """
+    Construct an n-by-n Hadamard matrix, using Sylvester's construction.
+    `n` must be a power of 2.
+    :param size: order of the matrix, must be a power of 2
+    :return: hadamard matrix of size `size`
+    """
+    if size <= 0:
+        raise ValueError("Cannot construct deterministic hadamard of size <= 0")
+    log2 = int(math.log(size, 2))
+    if size != 2**log2:
+        raise ValueError("Cannot construct deterministic hadamard of size != 2^n")
+    H = numpy.array([[1]], dtype=int)
+    # Sylvester's construction
+    for i in range(0, log2):
+        H = numpy.vstack((numpy.hstack((H, H)), numpy.hstack((H, -H))))
+    return torch.from_numpy(H / math.sqrt(size))
+# adapted from:
+# https://github.com/facebookresearch/SpinQuant/blob/main/utils/hadamard_utils.py
+# TODO: the following library exists for online rotations and should be considered
+# in the future:
+# https://github.com/Dao-AILab/fast-hadamard-transform/tree/master
+def random_hadamard_matrix(
+    size: int, gen: Optional[torch.Generator] = None
+) -> torch.Tensor:
+    """
+    Produces a randomly generated Hadamard matrix.
+    See https://cornell-relaxml.github.io/quip-sharp/ ,
+    Section "Randomized Hadamard Transformation"
+    :param size: The dimension of the hamadard matrix
+    :param gen: Optional generator random values
+    :return: randomly generated hadamard matrix
+    """
+    # Benefits: support other shapes / non powers of 2, support randomization
+    Q = torch.randint(low=0, high=2, size=(size,), generator=gen, dtype=torch.float64)
+    Q = Q * 2 - 1
+    Q = torch.diag(Q)
+    return _matmul_hadU(Q) / math.sqrt(size)
+def _get_hadK(n: int, transpose: bool = False) -> Tuple[torch.Tensor, int]:
+    # NOTE: we can easily extend the list of supported shapes/sizes
+    # by adding to these methods
+    hadK, K = None, None
+    if n % 20 == 0:
+        assert _is_pow2(n // 20)
+        K = 20
+        hadK = _get_had20().T if transpose else _get_had20()
+    elif n % 12 == 0:
+        assert _is_pow2(n // 12)
+        K = 12
+        hadK = _get_had12().T if transpose else _get_had12()
+    else:
+        assert _is_pow2(n)
+        K = 1
+    return hadK, K
+def _matmul_hadU(X, transpose=False) -> torch.Tensor:
+    n = X.shape[-1]
+    # Check if we have the determined hadamard matrix
+    hadK, K = _get_hadK(n, transpose)
+    # Reshape diag matrix with randomized -1/+1
+    input = X.clone().view(-1, n, 1)
+    output = input.clone()
+    # for cases when hadK is not predetermined, determine hadamard matrix
+    while input.shape[1] > K:
+        input = input.view(input.shape[0], input.shape[1] // 2, 2, input.shape[2])
+        output = output.view(input.shape)
+        output[:, :, 0, :] = input[:, :, 0, :] + input[:, :, 1, :]
+        output[:, :, 1, :] = input[:, :, 0, :] - input[:, :, 1, :]
+        output = output.view(input.shape[0], input.shape[1], -1)
+        (input, output) = (output, input)
+    del output
+    # K == 1 when hadK is None; this happens when the size dim (n)
+    # is not comaptible with any of the maintained hadamard matrices
+    if K > 1:
+        # Do not explicitly repeat - OOM
+        # input = torch.bmm(
+        #     hadK.repeat(len(input), 1, 1).to(input.device).to(input.dtype), input)
+        # Use bcast instead
+        # for cases when hadK is pre-determined
+        input = hadK.view(1, K, K).to(input) @ input
+    # normalize
+    return input.view(X.shape)
+def _is_pow2(n: int) -> bool:
+    return (n & (n - 1) == 0) and (n > 0)
+def _reshape_bits(packed_bits: numpy.ndarray, original_size: int) -> numpy.ndarray:
+    had_unpacked = numpy.unpackbits(packed_bits)
+    had_unpacked = [1 if x == 1 else -1 for x in had_unpacked]
+    had_unpacked = numpy.array(had_unpacked).reshape((original_size, original_size))
+    return had_unpacked
+# http://www.neilsloane.com/hadamard/index.html
+def _get_had12() -> torch.Tensor:
+    # fmt: off
+    had_12 = numpy.array([128,  13,  29, 232, 235,  71, 218,
+        62, 209, 246, 139, 180, 157, 168, 237, 199, 106,  59], dtype=numpy.uint8)
+    # fmt: on
+    # TODO: just unpack during apply
+    had_12_unpacked = _reshape_bits(had_12, original_size=12)
+    return torch.tensor(had_12_unpacked)
+def _get_had20() -> torch.Tensor:
+    # fmt: off
+    had_20 = numpy.array([128, 0,  13, 133, 121, 236,  43, 203,  97,  94, 155,  10, 252,
+        216, 87, 230, 194, 191,  54,  21, 249, 176, 171, 205, 133, 222, 108,  42, 243,
+        97, 215, 155,  10, 188, 216, 149, 230, 200, 175, 54, 133, 121, 188,  43,
+        205, 225,  94, 107,  10, 243], dtype=numpy.uint8)
+    # fmt: on
+    # TODO: just unpack during apply
+    had_20_unpacked = _reshape_bits(had_20, original_size=20)
+    return torch.tensor(had_20_unpacked)

compressed_tensors/transform/utils/utils.py ADDED Viewed

@@ -0,0 +1,91 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from compressed_tensors.transform import TransformLocation
+__all__ = ["get_matrix_size", "apply_transform_weight"]
+def get_matrix_size(module: torch.nn.Module, location: TransformLocation) -> int:
+    """
+    Determine the size of a matrix given its location on the module
+    :param module: module that matrix will be applied to
+    :param location: location on module
+    :return: size of matrix
+    """
+    assert isinstance(module, torch.nn.Linear)
+    if location in ("input", TransformLocation.WEIGHT_INPUT):
+        return module.in_features
+    else:
+        return module.out_features
+def apply_transform_weight(
+    weight: torch.Tensor,
+    value: torch.Tensor,
+    location: TransformLocation,
+) -> torch.Tensor:
+    """
+    Using the transform location, determine how to apply the transform weight to the
+    given value. For more info on input and output transforms, see `TransformLocation`
+    The following explains how weights should be applied to values according to location
+    let  x          be input activation
+         W          be weight,
+         yh, xh, Wh be transformed output, input, weight
+    note that
+         y  = (x W.T)        // torch.nn.Linear
+    Choose values for yh, xh, and Wh which incorporate matrix transforms
+    let  V, Vi      be transform matrices on input side
+         U, Ui      be transform matrices on output side
+    pick xh = (x V)
+         Wh = (U.T W Vi.T)
+         yh = (y U)
+    The following shows that `yh = (xh) (Wh).T` for the chosen values of yh, xh, and Wh
+    (xh) (Wh).T = (x V) (U.T W Vi.T).T
+                = (x V) (Vi W.T U)        // transpose matrix product identity
+                = (x W.T) U
+                = y U
+                = yh
+    :param weight: transform weight to apply
+    :param value: value to apply weight to
+    :param location: determines how weight should be applied
+    :return: value after transform weight has been applied
+    """
+    if location == TransformLocation.INPUT:
+        return value @ weight
+    elif location == TransformLocation.WEIGHT_INPUT:
+        return value @ weight.T
+    elif location == TransformLocation.WEIGHT_OUTPUT:
+        return weight.T @ value
+    elif location == TransformLocation.OUTPUT:
+        return value @ weight
+    else:
+        raise NotImplementedError(f"{location} has not been implemented yet")

compressed_tensors/utils/helpers.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import contextlib
 import warnings
 from functools import wraps
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
@@ -38,6 +39,8 @@ __all__ = [
     "shard_tensor",
     "pack_bitmasks",
     "unpack_bitmasks",
+    "patch_attr",
+    "ParameterizedDefaultDict",
 ]
 FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
@@ -328,3 +331,53 @@ def unpack_bitmasks(
     )
     return unpacked_bitmasks_torch
+@contextlib.contextmanager
+def patch_attr(base: object, attr: str, value: Any):
+    """
+    Patch the value of an object attribute. Original value is restored upon exit
+    :param base: object which has the attribute to patch
+    :param attr: name of the the attribute to patch
+    :param value: used to replace original value
+    Usage:
+    >>> from types import SimpleNamespace
+    >>> obj = SimpleNamespace()
+    >>> with patch_attr(obj, "attribute", "value"):
+    ...     assert obj.attribute == "value"
+    >>> assert not hasattr(obj, "attribute")
+    """
+    _sentinel = object()
+    original_value = getattr(base, attr, _sentinel)
+    setattr(base, attr, value)
+    try:
+        yield
+    finally:
+        if original_value is not _sentinel:
+            setattr(base, attr, original_value)
+        else:
+            delattr(base, attr)
+class ParameterizedDefaultDict(dict):
+    """
+    Similar to `collections.DefaultDict`, but upon fetching a key which is missing,
+    the key is passed as arguments to the `default_factory`
+    :param default_factory: function which takes a key as input and returns the
+        corresponding default value
+    """
+    def __init__(self, default_factory: Callable[[Any], Any]):
+        self.default_factory = default_factory
+    def __missing__(self, key):
+        if isinstance(key, tuple):
+            value = self.default_factory(*key)
+        else:
+            value = self.default_factory(key)
+        self[key] = value
+        return value

compressed_tensors/utils/offload.py CHANGED Viewed

@@ -87,13 +87,15 @@ def check_accelerate(fallback: Any):
         if not _has_accelerate:
             if fallback == "error":
-                raise ValueError(
-                    "Please install `accelerate` in order to use this function"
-                )
-            @wraps(func)
-            def fallback_fn(*args, **kwargs):
-                return fallback
+                @wraps(func)
+                def fallback_fn(*args, **kwargs):
+                    raise ValueError(
+                        "Please install `accelerate` in order to use this function"
+                    )
+            else:
+                @wraps(func)
+                def fallback_fn(*args, **kwargs):
+                    return fallback
             return fallback_fn

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.10.0'
-__version_tuple__ = version_tuple = (0, 10, 0)
+__version__ = version = '0.10.1'
+__version_tuple__ = version_tuple = (0, 10, 1)

{compressed_tensors-0.10.0.dist-info → compressed_tensors-0.10.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.10.0
+Version: 0.10.1
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.10.0.dist-info → compressed_tensors-0.10.1.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
-compressed_tensors/version.py,sha256=P22A-D7Hg0yC0IOerZDcj2-6YOrcxCN9Sq5s06MywPA,513
+compressed_tensors/version.py,sha256=StiR6uxiq6hqMzT3MUIl_ZooIq2cetH9oWrHUI_qWFU,513
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -40,18 +40,21 @@ compressed_tensors/quantization/utils/helpers.py,sha256=bqxNL2NU1XVsSxNzmDVZE3zd
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
 compressed_tensors/registry/registry.py,sha256=0s15BxdGgzBv8RL4kUJCYcuDOFUh_KZYvNvLEeRqWTc,11956
 compressed_tensors/transform/__init__.py,sha256=oa5VdrE-GtDYYceXNSwj5X_ropoXLLukm6Aufcc9WhY,747
-compressed_tensors/transform/transform_args.py,sha256=Sazu_4kXL7IvIEgTaimgo8dV-qacXf_t1NLEfDvPJEU,1759
+compressed_tensors/transform/transform_args.py,sha256=8-Ab5_dFfdObfwVCgrWrEWcoVRzXmMBSDSUxjftI-Ss,3177
 compressed_tensors/transform/transform_config.py,sha256=6JA8VFcoz4EGHOev6thj51OuB7K2gKUUazWjrVPYDLc,2144
 compressed_tensors/transform/transform_scheme.py,sha256=c7NAuLDL0itFgUfBMNShegMI9bzKL7s4LR3QJTHsXLs,1733
+compressed_tensors/transform/utils/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
+compressed_tensors/transform/utils/hadamard.py,sha256=SmPZmnHtc5N36gJA5EbM1T65uf4w1_flgl7SWBeg_W8,5642
+compressed_tensors/transform/utils/utils.py,sha256=PRPTYwPs2nnNaQMq2GEbC4QYKHFKlZwaRyPgdDhl66g,2992
 compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
-compressed_tensors/utils/helpers.py,sha256=RrNvzD08naEjEiXdU-FdZjQVda1nQywu1hA_GCDj0vg,10415
-compressed_tensors/utils/offload.py,sha256=hAGjp9aS0HpFVhjYMGf-WTm76WMY6cS-YXhVEn80qPE,20196
+compressed_tensors/utils/helpers.py,sha256=cPg-ikdeA92aIGwBONg8GmPNvcGlFhozyJVwsRiXBTA,11981
+compressed_tensors/utils/offload.py,sha256=fT7WiUQmRmJ2Reb3I5kNcsHy4YdmZJHSOTNdS0tbKQo,20316
 compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors-0.10.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.10.0.dist-info/METADATA,sha256=LCNJhwDW8s0vzcb1XkGUzuKz2NTFKN1sbc5-xTx9pP4,6996
-compressed_tensors-0.10.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.10.0.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.10.0.dist-info/RECORD,,
+compressed_tensors-0.10.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.10.1.dist-info/METADATA,sha256=2y4RJsufdvf5Bap5PKk73UA3STedxdzbD0yRuZF21uc,6996
+compressed_tensors-0.10.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.10.1.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.10.1.dist-info/RECORD,,

{compressed_tensors-0.10.0.dist-info → compressed_tensors-0.10.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.10.0.dist-info → compressed_tensors-0.10.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.10.0.dist-info → compressed_tensors-0.10.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.10.0__py3-none-any.whl → 0.10.1__py3-none-any.whl

compressed-tensors 0.10.0py3-none-any.whl → 0.10.1py3-none-any.whl