PyPI - compressed-tensors - Versions diffs - 0.10.3a20250715__tar.gz → 0.10.3a20250716__tar.gz - Mend

compressed-tensors 0.10.3a20250715tar.gz → 0.10.3a20250716tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

{compressed_tensors-0.10.3a20250715/src/compressed_tensors.egg-info → compressed_tensors-0.10.3a20250716}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.10.3a20250715
+Version: 0.10.3a20250716
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.10.3a20250715 → compressed_tensors-0.10.3a20250716}/src/compressed_tensors/transform/factory/base.py RENAMED Viewed

@@ -117,10 +117,8 @@ class TransformFactory(RegistryMixin, ABC):
             TransformLocation.WEIGHT_INPUT,
             TransformLocation.WEIGHT_OUTPUT,
         ):
-            assert isinstance(module, torch.nn.Linear)
-            assert module.bias is None
             # fuse transform into weight
+            assert hasattr(module, "weight")
             with torch.no_grad(), align_module_device(module):
                 update_offload_parameter(module, "weight", transform(module.weight))

{compressed_tensors-0.10.3a20250715 → compressed_tensors-0.10.3a20250716}/src/compressed_tensors/transform/factory/hadamard.py RENAMED Viewed

@@ -14,13 +14,14 @@
 from typing import Optional, Union
+import math
 import torch
 from compressed_tensors.transform import TransformArgs, TransformScheme
 from compressed_tensors.transform.factory.base import TransformBase, TransformFactory
 from compressed_tensors.transform.utils.hadamard import deterministic_hadamard_matrix
-from compressed_tensors.transform.utils.utils import (
+from compressed_tensors.transform.utils.matrix import (
     apply_transform_weight,
-    get_matrix_size,
+    get_transform_size,
 )
 from compressed_tensors.utils import get_execution_device, get_offloaded_device
 from compressed_tensors.utils.helpers import ParameterizedDefaultDict
@@ -51,8 +52,8 @@ class HadamardFactory(TransformFactory):
         :param module: parent module that transform will be applied to
         :param args: defines how the transform will be applied to the module
         """
-        assert isinstance(module, Linear)
-        size = get_matrix_size(module, args.location)
+        assert hasattr(module, "weight")
+        size = get_transform_size(module, args.location, self.scheme.head_dim)
         dtype = module.weight.dtype
         device = get_offloaded_device(module)
         exec_device = get_execution_device(module)
@@ -60,7 +61,7 @@ class HadamardFactory(TransformFactory):
         factory_kwargs = {"construct_device": exec_device}
         weight = self.weights.get(size, dtype, device, factory_kwargs=factory_kwargs)
         perm = self.perms[weight] if self.scheme.randomize else None
-        return HadamardTransform(weight, perm, args)
+        return HadamardTransform(weight, perm, args, type(module))
     def _create_weight(
         self,
@@ -81,12 +82,18 @@ class HadamardFactory(TransformFactory):
 class HadamardTransform(TransformBase):
     def __init__(
-        self, weight: Parameter, perm: Union[Parameter, None], args: TransformArgs
+        self,
+        weight: Parameter,
+        perm: Optional[Parameter],
+        args: TransformArgs,
+        module_type: type[torch.nn.Module],
     ):
         super().__init__()
         self.weight = weight
         self.perm = perm
         self.args = args
+        self.module_type = module_type
+        self._scale = math.sqrt(weight.size(0))
     def forward(self, value: Tensor) -> Tensor:
         weight = self.weight
@@ -96,5 +103,7 @@ class HadamardTransform(TransformBase):
         if self.args.inverse:
             weight = weight.T
-        return apply_transform_weight(weight, value, self.args.location)
+        return apply_transform_weight(
+            weight, value, self.args.location, self.module_type
+        ) / self._scale

{compressed_tensors-0.10.3a20250715 → compressed_tensors-0.10.3a20250716}/src/compressed_tensors/transform/factory/matrix_multiply.py RENAMED Viewed

@@ -17,9 +17,9 @@ from typing import Optional
 import torch
 from compressed_tensors.transform import TransformArgs, TransformScheme
 from compressed_tensors.transform.factory.base import TransformBase, TransformFactory
-from compressed_tensors.transform.utils.utils import (
+from compressed_tensors.transform.utils.matrix import (
     apply_transform_weight,
-    get_matrix_size,
+    get_transform_size,
 )
 from compressed_tensors.utils import get_offloaded_device
 from compressed_tensors.utils.helpers import ParameterizedDefaultDict
@@ -50,8 +50,8 @@ class RandomMatrixFactory(TransformFactory):
         :param module: parent module that transform will be applied to
         :param args: defines how the transform will be applied to the module
         """
-        assert isinstance(module, Linear)
-        size = get_matrix_size(module, args.location)
+        assert hasattr(module, "weight")
+        size = get_transform_size(module, args.location, self.scheme.head_dim)
         dtype = module.weight.dtype
         device = get_offloaded_device(module)
@@ -59,7 +59,7 @@ class RandomMatrixFactory(TransformFactory):
         if args.inverse:
             weight = self.inverses[weight]
-        return RandomMatrixTransform(weight, args)
+        return RandomMatrixTransform(weight, args, type(module))
     def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
         # TODO: verify that weight is invertible (has non-zero determinant)
@@ -74,17 +74,27 @@ class RandomMatrixFactory(TransformFactory):
 class RandomMatrixTransform(TransformBase):
-    def __init__(self, weight: Tensor, args: TransformArgs):
+    def __init__(
+        self,
+        weight: Tensor,
+        args: TransformArgs,
+        module_type: type[torch.nn.Module],
+    ):
         super().__init__()
         self.weight = weight  # is an inverse if args.inverse
         self.args = args
+        self.module_type = module_type
     def forward(self, value: Tensor) -> Parameter:
-        return apply_transform_weight(self.weight, value, self.args.location)
+        return apply_transform_weight(
+            self.weight, value, self.args.location, self.module_type
+        )
     def right_inverse(self, value: Tensor) -> Tensor:
         inverse = high_precision_invert(self.weight)
-        return apply_transform_weight(inverse, value, self.args.location)
+        return apply_transform_weight(
+            inverse, value, self.args.location, self.module_type
+        )
 def high_precision_invert(weight: Tensor) -> Tensor:

{compressed_tensors-0.10.3a20250715 → compressed_tensors-0.10.3a20250716}/src/compressed_tensors/transform/transform_scheme.py RENAMED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List
+from typing import List, Optional
 from compressed_tensors.transform import TransformArgs
 from pydantic import BaseModel, Field
@@ -40,3 +40,4 @@ class TransformScheme(BaseModel):
     apply: List[TransformArgs] = Field(default_factory=list)
     randomize: bool = Field(default=False)
     requires_grad: bool = Field(default=False)
+    head_dim: Optional[int] = Field(default=None)

{compressed_tensors-0.10.3a20250715 → compressed_tensors-0.10.3a20250716}/src/compressed_tensors/transform/utils/hadamard.py RENAMED Viewed

@@ -59,7 +59,7 @@ def deterministic_hadamard_matrix(
     for _ in range(log2):
         H = torch.vstack((torch.hstack((H, H)), torch.hstack((H, -H))))
-    return H / math.sqrt(size)
+    return H
 def random_hadamard_matrix(
@@ -86,7 +86,7 @@ def random_hadamard_matrix(
     Q = Q.to(device=device)
     Q = Q * 2 - 1
     Q = torch.diag(Q)
-    return _matmul_hadU(Q) / math.sqrt(size)
+    return _matmul_hadU(Q)
 def is_pow2(n: int) -> bool:

compressed_tensors-0.10.3a20250716/src/compressed_tensors/transform/utils/matrix.py ADDED Viewed

@@ -0,0 +1,179 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Callable, Optional, Tuple
+import torch
+from compressed_tensors.transform import TransformLocation
+__all__ = ["get_transform_size", "apply_transform_weight"]
+def get_transform_size(
+    module: torch.nn.Module,
+    location: TransformLocation,
+    head_dim: Optional[int] = None,
+) -> int:
+    """
+    Determine the size of a transform matrix given its location on the module
+    :param module: module that matrix will be applied to
+    :param location: location on module
+    :param head_dim: size of head when transform is applied to mha
+    :return: size of matrix
+    """
+    if isinstance(module, torch.nn.Linear):
+        if location in (TransformLocation.INPUT, TransformLocation.WEIGHT_INPUT):
+            size = module.in_features
+        else:
+            size = module.out_features
+    elif isinstance(module, torch.nn.Embedding):
+        if location in (TransformLocation.INPUT, TransformLocation.WEIGHT_INPUT):
+            size = module.num_embeddings
+        else:
+            size = module.embedding_dim
+    else:
+        raise NotImplementedError(f"Transforms on {type(module)} are not supported")
+    if head_dim is not None:
+        if size % head_dim != 0:
+            raise ValueError(
+                f"{head_dim} must divide {size} for {type(module)} at {location}"
+            )
+        size = head_dim
+    return size
+def apply_transform_weight(
+    transform_weight: torch.Tensor,
+    value: torch.Tensor,
+    location: TransformLocation,
+    module_type: type[torch.nn.Module],
+) -> torch.Tensor:
+    """
+    Using the transform location, apply the transform_weight to the
+    given value wrt linear weights. For more info on input and output transforms,
+    see `TransformLocation`
+    The following explains how weights should be applied to values according to location
+    let  x          be input activation
+         W          be weight,
+         yh, xh, Wh be transformed output, input, weight
+    note that
+         y  = (x W.T)        // torch.nn.Linear
+    Choose values for yh, xh, and Wh which incorporate matrix transforms
+    let  V, Vi      be transform matrices on input side
+         U, Ui      be transform matrices on output side
+    pick xh = (x V)
+         Wh = (U.T W Vi.T)
+         yh = (y U)
+    The following shows that `yh = (xh) (Wh).T` for the chosen values of yh, xh, and Wh
+    (xh) (Wh).T = (x V) (U.T W Vi.T).T
+                = (x V) (Vi W.T U)        // transpose matrix product identity
+                = (x W.T) U
+                = y U
+                = yh
+    :param transform_weight: transform weight to apply
+    :param value: value to apply transform_weight to
+    :param location: determines how weight should be applied
+    :param model_type: result of type(module), passed in to determine application of
+        weight transform
+    :return: value after transform_weight has been applied
+    """
+    assert transform_weight.shape[0] == transform_weight.shape[1]
+    if module_type == torch.nn.Linear:
+        if location == TransformLocation.INPUT:
+            return _multihead_matmul(value, transform_weight)
+        elif location == TransformLocation.WEIGHT_INPUT:
+            # equivalent to (transform_weight @ value.T).T
+            return _multihead_matmul(value, transform_weight.T)
+        elif location == TransformLocation.WEIGHT_OUTPUT:
+            # equivalent to (value.T @ transform_weight).T
+            return _multihead_matmul(transform_weight.T, value)
+        elif location == TransformLocation.OUTPUT:
+            return _multihead_matmul(value, transform_weight)
+    # similar derivation to torch.nn.Linear, but `y = (x W)`
+    elif module_type == torch.nn.Embedding:
+        if location == TransformLocation.INPUT:
+            return _multihead_matmul(value, transform_weight)
+        elif location == TransformLocation.WEIGHT_INPUT:
+            return _multihead_matmul(
+                transform_weight,
+                value,
+            )
+        elif location == TransformLocation.WEIGHT_OUTPUT:
+            return _multihead_matmul(value, transform_weight)
+        elif location == TransformLocation.OUTPUT:
+            return _multihead_matmul(value, transform_weight)
+    raise NotImplementedError(
+        f"Applying transforms to {module_type} {location} is not supported"
+    )
+def _multihead_matmul(A: torch.Tensor, B: torch.Tensor) -> torch.Tensor:
+    """
+    Performs A @ B for last two dims of two matrices A and B that possibly
+    have different shapes, as is the case in multi-headed dimension. If
+    shapes are different, this is equivalent to converting the last two dims
+    of the smaller matrix into a block-diagonal matrix with the same shape as
+    the last two dims of the larger matrix.
+    E.g. if A is half the size of B, this function will perform
+    [[A  ]  @ B
+     [  A]]
+    If B is a third of the size of A, this function will perform
+    A @ [[B    ]
+         [  B  ]
+         [    B]]
+    This function will error out if the shapes are not evenly divisble
+    :param A: left-hand tensor
+    :param B: right-hand tensor
+    :return: result
+    """
+    if A.shape[-1] > B.shape[-2]:
+        head_dim = B.shape[-2]
+        num_heads = A.shape[-1] // head_dim
+        A = A.unflatten(-1, (num_heads, head_dim))
+        return (A @ B).flatten(-2, -1)
+    elif A.shape[-1] < B.shape[-2]:
+        head_dim = A.shape[-1]
+        num_heads = B.shape[-2] // head_dim
+        B = B.unflatten(-2, (num_heads, head_dim))
+        return (A @ B).flatten(-3, -2)
+    else:
+        return A @ B

{compressed_tensors-0.10.3a20250715 → compressed_tensors-0.10.3a20250716}/src/compressed_tensors/version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.10.3.a20250715'
+__version__ = version = '0.10.3.a20250716'
 __version_tuple__ = version_tuple = (0, 10, 3)

{compressed_tensors-0.10.3a20250715 → compressed_tensors-0.10.3a20250716/src/compressed_tensors.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.10.3a20250715
+Version: 0.10.3a20250716
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.10.3a20250715 → compressed_tensors-0.10.3a20250716}/src/compressed_tensors.egg-info/SOURCES.txt RENAMED Viewed

@@ -84,7 +84,7 @@ src/compressed_tensors/transform/factory/random_hadamard.py
 src/compressed_tensors/transform/utils/__init__.py
 src/compressed_tensors/transform/utils/hadamard.py
 src/compressed_tensors/transform/utils/hadamards.safetensors
-src/compressed_tensors/transform/utils/utils.py
+src/compressed_tensors/transform/utils/matrix.py
 src/compressed_tensors/utils/__init__.py
 src/compressed_tensors/utils/helpers.py
 src/compressed_tensors/utils/internal.py

compressed_tensors-0.10.3a20250716/tests/test_transform/conftest.py ADDED Viewed

@@ -0,0 +1,115 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import torch
+from compressed_tensors.transform import TransformArgs
+class TransformableModel(torch.nn.Module):
+    def __init__(self, *sizes):
+        super().__init__()
+        self.fcs = torch.nn.ModuleList(
+            [
+                torch.nn.Linear(sizes[index], sizes[index + 1], bias=False)
+                for index in range(0, len(sizes) - 1)
+            ]
+        )
+    def forward(self, x):
+        for layer in self.fcs:
+            x = layer(x)
+        return x
+class MockAttention(torch.nn.Module):
+    def __init__(
+        self, hidden_size: int, num_attention_heads: int, num_key_value_heads: int
+    ):
+        super().__init__()
+        self.num_attention_heads = num_attention_heads
+        self.num_key_value_heads = num_key_value_heads
+        self.num_key_value_groups = num_attention_heads // num_key_value_heads
+        self.head_dim = hidden_size // num_attention_heads
+        self.scaling = self.head_dim**-0.5
+        assert hidden_size >= num_attention_heads * self.head_dim
+        self.q_proj = torch.nn.Linear(
+            hidden_size, num_attention_heads * self.head_dim, bias=False
+        )
+        self.k_proj = torch.nn.Linear(
+            hidden_size, num_key_value_heads * self.head_dim, bias=False
+        )
+        self.v_proj = torch.nn.Linear(
+            hidden_size, num_key_value_heads * self.head_dim, bias=False
+        )
+        self.o_proj = torch.nn.Linear(
+            num_attention_heads * self.head_dim, hidden_size, bias=False
+        )
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        batch_size, seq_len, hidden_size = hidden_states.shape
+        hidden_shape = (batch_size, seq_len, -1, self.head_dim)
+        query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2)
+        key_states = self.k_proj(hidden_states).view(hidden_shape).transpose(1, 2)
+        value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)
+        key_states = self.repeat_kv(key_states, self.num_key_value_groups)
+        value_states = self.repeat_kv(value_states, self.num_key_value_groups)
+        attn_weights = (
+            torch.matmul(query_states, key_states.transpose(2, 3)) * self.scaling
+        )
+        attn_weights = torch.nn.functional.softmax(
+            attn_weights, dim=-1, dtype=torch.float32
+        ).to(query_states.dtype)
+        attn_output = torch.matmul(attn_weights, value_states)
+        attn_output = attn_output.transpose(1, 2).contiguous()
+        attn_output = attn_output.reshape((batch_size, seq_len, -1)).contiguous()
+        return self.o_proj(attn_output)
+    def repeat_kv(self, hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
+        batch, num_key_value_heads, slen, head_dim = hidden_states.shape
+        if n_rep == 1:
+            return hidden_states
+        hidden_states = hidden_states[:, :, None, :, :].expand(
+            batch, num_key_value_heads, n_rep, slen, head_dim
+        )
+        return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
+@pytest.fixture(scope="function")
+def model_apply():
+    model = TransformableModel(2, 4, 8, 16, 32, 64)
+    apply = [
+        # weight output -> input
+        TransformArgs(targets="fcs.0", location="weight_output"),
+        TransformArgs(targets="fcs.1", location="input", inverse=True),
+        # output -> weight input
+        TransformArgs(targets="fcs.1", location="output"),
+        TransformArgs(targets="fcs.2", location="weight_input", inverse=True),
+        # output -> input
+        TransformArgs(targets="fcs.2", location="output"),
+        TransformArgs(targets="fcs.3", location="input", inverse=True),
+        # weight output -> weight input
+        TransformArgs(targets="fcs.3", location="weight_output"),
+        TransformArgs(targets="fcs.4", location="weight_input", inverse=True),
+    ]
+    return model, apply

compressed_tensors-0.10.3a20250716/tests/test_transform/factory/test_correctness.py ADDED Viewed

@@ -0,0 +1,168 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import torch
+from compressed_tensors.transform import (
+    TransformArgs,
+    TransformConfig,
+    TransformFactory,
+    TransformScheme,
+    apply_transform_config,
+)
+from compressed_tensors.utils import offloaded_dispatch
+from tests.test_transform.conftest import MockAttention
+from tests.testing_utils import requires_accelerate, requires_gpu
+@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
+@pytest.mark.parametrize("randomized", (True, False))
+@pytest.mark.parametrize("head_dim", (None, 2, 4))
+@pytest.mark.parametrize("input_batch_size", (1, 5, 17))
+def test_correctness_linear(type, randomized, head_dim, input_batch_size):
+    size = (4, 8)
+    module = torch.nn.Linear(*size, bias=False)
+    scheme = TransformScheme(type=type, randomized=randomized, head_dim=head_dim)
+    factory = TransformFactory.from_scheme(scheme, name="")
+    input_tfm = factory.create_transform(
+        module, TransformArgs(targets="Linear", location="input", inverse=True)
+    )
+    w_in_tfm = factory.create_transform(
+        module, TransformArgs(targets="Linear", location="weight_input")
+    )
+    w_out_tfm = factory.create_transform(
+        module, TransformArgs(targets="Linear", location="weight_output")
+    )
+    output_tfm = factory.create_transform(
+        module, TransformArgs(targets="Linear", location="output", inverse=True)
+    )
+    input = torch.rand((input_batch_size, 5, size[0]))
+    true_output = input @ module.weight.T
+    input_transformed = input_tfm(input)
+    weight_transformed = w_out_tfm(w_in_tfm(module.weight))
+    output = output_tfm(input_transformed @ weight_transformed.T)
+    assert torch.allclose(true_output, output, atol=1e-5, rtol=0.0)
+@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
+@pytest.mark.parametrize("randomized", (True, False))
+@pytest.mark.parametrize("embed_loc", ("weight_output", "output"))
+@pytest.mark.parametrize("linear_loc", ("input", "weight_input"))
+def test_correctness_embedding(type, randomized, embed_loc, linear_loc):
+    model = torch.nn.Sequential(
+        torch.nn.Embedding(2, 4),
+        torch.nn.Linear(4, 8, bias=False),
+    )
+    input = torch.randint(high=1, low=0, size=(17, 5, 2))
+    true_output = model(input)
+    config = TransformConfig(
+        config_groups={
+            "": TransformScheme(
+                type=type,
+                randomized=randomized,
+                apply=[
+                    TransformArgs(targets="Embedding", location=embed_loc),
+                    TransformArgs(targets="Linear", location=linear_loc, inverse=True),
+                ],
+            )
+        }
+    )
+    apply_transform_config(model, config)
+    # compare outputs
+    output = model(input)
+    assert torch.allclose(true_output, output, atol=1e-5, rtol=0.0)
+@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
+@pytest.mark.parametrize("randomized", (True, False))
+@pytest.mark.parametrize("input_batch_size", (1, 5, 17))
+def test_correctness_model(
+    type, randomized, input_batch_size, model_apply, offload=False
+):
+    # load model
+    model = model_apply[0]
+    if offload:
+        model = offloaded_dispatch(model, torch.device("cuda"))
+    # get output
+    input = torch.rand((input_batch_size, 5, model.fcs[0].in_features))
+    if offload:
+        input = input.to(torch.device("cuda"))
+    true_output = model(input)
+    # apply transforms
+    config = TransformConfig(
+        config_groups={
+            "": TransformScheme(type=type, randomized=randomized, apply=model_apply[1])
+        }
+    )
+    apply_transform_config(model, config)
+    # compare outputs
+    output = model(input)
+    assert torch.allclose(true_output, output, atol=1e-5, rtol=0.0)
+@requires_gpu
+@requires_accelerate()
+@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
+@pytest.mark.parametrize("randomized", (True, False))
+@pytest.mark.parametrize("input_batch_size", (1, 5, 17))
+def test_correctness_model_offload(type, randomized, input_batch_size, model_apply):
+    test_correctness_model(
+        type, randomized, input_batch_size, model_apply, offload=True
+    )
+@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
+@pytest.mark.parametrize("randomized", (True, False))
+@pytest.mark.parametrize("head_dim", (4, 8))
+@pytest.mark.parametrize("input_batch_size", (1, 5, 17))
+def test_correctness_attention_heads(type, randomized, head_dim, input_batch_size):
+    hidden_size = 64
+    num_attention_heads = 8
+    attention = MockAttention(
+        hidden_size=hidden_size,
+        num_attention_heads=num_attention_heads,
+        num_key_value_heads=head_dim,
+    )
+    input = torch.rand(input_batch_size, 5, hidden_size)
+    true_output = attention(input)
+    config = TransformConfig(
+        config_groups={
+            "": TransformScheme(
+                type=type,
+                randomized=randomized,
+                head_dim=head_dim,
+                apply=[
+                    TransformArgs(targets="v_proj", location="weight_output"),
+                    TransformArgs(
+                        targets="o_proj", location="weight_input", inverse=True
+                    ),
+                ],
+            )
+        }
+    )
+    apply_transform_config(attention, config)
+    output = attention(input)
+    assert torch.allclose(true_output, output, atol=1e-5, rtol=0.0)

compressed-tensors 0.10.3a20250715__tar.gz → 0.10.3a20250716__tar.gz

compressed-tensors 0.10.3a20250715tar.gz → 0.10.3a20250716tar.gz