PyPI - compressed-tensors - Versions diffs - 0.13.1a20260123__tar.gz → 0.13.1a20260127__tar.gz - Mend

compressed-tensors 0.13.1a20260123tar.gz → 0.13.1a20260127tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (176) hide show

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/.github/actions/test/action.yml RENAMED Viewed

@@ -23,7 +23,7 @@ runs:
       with:
           venv: ${{ inputs.venv }}
           name: compressed
-          extra: "[dev,accelerate]"
+          extra: "[dev]"
     - name: clean up
       run: |

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/.github/workflows/test-check.yaml RENAMED Viewed

@@ -30,7 +30,7 @@ jobs:
         - name: Set Env
           run: pip3 install --upgrade pip setuptools
         - name: "⚙️ Install dependencies"
-          run: pip3 install .[dev,accelerate]
+          run: pip3 install .[dev]
         - name: clean up
           run: |
             echo "cleaning up disk space as GHA runner has limited disk size."

{compressed_tensors-0.13.1a20260123/src/compressed_tensors.egg-info → compressed_tensors-0.13.1a20260127}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.13.1a20260123
+Version: 0.13.1a20260127
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/vllm-project/compressed-tensors
 Author: Neuralmagic, Inc.
@@ -9,7 +9,7 @@ License: Apache 2.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: torch<=2.9.1,>=1.7.0
-Requires-Dist: transformers
+Requires-Dist: transformers<5.0.0
 Requires-Dist: pydantic>=2.0
 Requires-Dist: loguru
 Provides-Extra: dev
@@ -19,6 +19,8 @@ Requires-Dist: wheel>=0.36.2; extra == "dev"
 Requires-Dist: flake8>=3.8.3; extra == "dev"
 Requires-Dist: pytest>=6.0.0; extra == "dev"
 Requires-Dist: nbconvert>=7.16.3; extra == "dev"
+Requires-Dist: transformers<5.0; extra == "dev"
+Requires-Dist: accelerate; extra == "dev"
 Provides-Extra: accelerate
 Requires-Dist: accelerate; extra == "accelerate"
 Dynamic: author

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/setup.py RENAMED Viewed

@@ -88,11 +88,11 @@ def _setup_packages() -> List:
     )
 def _setup_install_requires() -> List:
-    return ["torch>=1.7.0,<=2.9.1", "transformers", "pydantic>=2.0", "loguru"]
+    return ["torch>=1.7.0,<=2.9.1", "transformers<5.0.0", "pydantic>=2.0", "loguru"]
 def _setup_extras() -> Dict:
     return {
-        "dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3"],
+        "dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3", "transformers<5.0", "accelerate"],
         "accelerate": ["accelerate"]
     }

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/linear/compressed_linear.py RENAMED Viewed

@@ -87,12 +87,6 @@ class CompressedLinear(Linear):
         # mark module as compressed
         module.quantization_status = QuantizationStatus.COMPRESSED
-        # handles case where forward is wrapped in new_forward by accelerate hooks
-        if hasattr(module, "_old_forward"):
-            module._old_forward = CompressedLinear.forward.__get__(
-                module, CompressedLinear
-            )
         return module
     def forward(self, input: Tensor) -> Tensor:

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/offload/__init__.py RENAMED Viewed

@@ -135,9 +135,7 @@ def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.M
     """
     cache = base._parameters
     if isinstance(cache, OffloadCache):
-        offload_module(
-            module, cache.onload_device, cache.offload_device, no_split=False
-        )
+        offload_module(module, cache.onload_device, cache.offload_device)
     base.register_module(name, module)
@@ -178,9 +176,12 @@ def align_module_device(
     if isinstance(module._parameters, OffloadCache):
         assert isinstance(module._buffers, OffloadCache)
         with module._parameters.disable_offloading():
-            with patch_attr(
-                module._parameters, "onload_device", execution_device
-            ), patch_attr(module._buffers, "onload_device", execution_device):
+            if execution_device is not None:
+                with patch_attr(
+                    module._parameters, "onload_device", execution_device
+                ), patch_attr(module._buffers, "onload_device", execution_device):
+                    yield
+            else:
                 yield
     else:

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/offload/dispatch.py RENAMED Viewed

@@ -39,7 +39,7 @@ ModelType = TypeVar("ModelType", bound=torch.nn.Module)
 def offload_model(
     model: ModelType,
     onload_device: torch.device | str,
-    offload_device: Optional[torch.device | str | Literal["disk"]] = None,
+    offload_device: torch.device | str | Literal["disk"] = torch.device("cpu"),
 ) -> ModelType:
     """
     Offload a model to the `offload_device`. During forward passes, model weights will

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/forward.py RENAMED Viewed

@@ -14,7 +14,6 @@
 from functools import wraps
 from math import ceil
-from typing import Optional
 import torch
 from compressed_tensors.quantization.quant_args import (
@@ -47,9 +46,9 @@ def quantize(
     scale: torch.Tensor,
     zero_point: torch.Tensor,
     args: QuantizationArgs,
-    dtype: Optional[torch.dtype] = None,
-    g_idx: Optional[torch.Tensor] = None,
-    global_scale: Optional[torch.Tensor] = None,
+    dtype: torch.dtype | None = None,
+    g_idx: torch.Tensor | None = None,
+    global_scale: torch.Tensor | None = None,
 ) -> torch.Tensor:
     """
     Quantize the input tensor x using the QuantizationStrategy specified in args.
@@ -85,11 +84,11 @@ def quantize(
 def dequantize(
     x_q: torch.Tensor,
     scale: torch.Tensor,
-    zero_point: Optional[torch.Tensor] = None,
-    args: Optional[QuantizationArgs] = None,
-    dtype: Optional[torch.dtype] = None,
-    g_idx: Optional[torch.Tensor] = None,
-    global_scale: Optional[torch.Tensor] = None,
+    zero_point: torch.Tensor | None = None,
+    args: QuantizationArgs | None = None,
+    dtype: torch.dtype | None = None,
+    g_idx: torch.Tensor | None = None,
+    global_scale: torch.Tensor | None = None,
 ) -> torch.Tensor:
     """
     Dequantize a quantized input tensor x_q based on the strategy specified in args. If
@@ -159,8 +158,8 @@ def fake_quantize(
     scale: torch.Tensor,
     zero_point: torch.Tensor,
     args: QuantizationArgs,
-    g_idx: Optional[torch.Tensor] = None,
-    global_scale: Optional[torch.Tensor] = None,
+    g_idx: torch.Tensor | None = None,
+    global_scale: torch.Tensor | None = None,
 ) -> torch.Tensor:
     """
     Fake quantize the input tensor x by quantizing then dequantizing with
@@ -195,11 +194,11 @@ def _process_quantization(
     scale: torch.Tensor,
     zero_point: torch.Tensor,
     args: QuantizationArgs,
-    g_idx: Optional[torch.Tensor] = None,
-    dtype: Optional[torch.dtype] = None,
+    g_idx: torch.Tensor | None = None,
+    dtype: torch.dtype | None = None,
     do_quantize: bool = True,
     do_dequantize: bool = True,
-    global_scale: Optional[torch.Tensor] = None,
+    global_scale: torch.Tensor | None = None,
 ) -> torch.Tensor:
     q_min, q_max = calculate_range(args, x.device)
     group_size = args.group_size
@@ -457,8 +456,8 @@ def _quantize(
     q_min: torch.Tensor,
     q_max: torch.Tensor,
     args: QuantizationArgs,
-    dtype: Optional[torch.dtype] = None,
-    global_scale: Optional[torch.Tensor] = None,
+    dtype: torch.dtype | None = None,
+    global_scale: torch.Tensor | None = None,
 ) -> torch.Tensor:
     # if a global scale is optionally provided, use it
@@ -486,9 +485,9 @@ def _quantize(
 def _dequantize(
     x_q: torch.Tensor,
     scale: torch.Tensor,
-    zero_point: torch.Tensor = None,
-    dtype: Optional[torch.dtype] = None,
-    global_scale: Optional[torch.Tensor] = None,
+    zero_point: torch.Tensor | None = None,
+    dtype: torch.dtype | None = None,
+    global_scale: torch.Tensor | None = None,
 ) -> torch.Tensor:
     # if a global scale is optionally provided, use it

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/initialize.py RENAMED Viewed

@@ -23,6 +23,7 @@ from compressed_tensors.modeling import (
     QuantizedAttentionImpl,
     QuantizedKVCache,
 )
+from compressed_tensors.offload import unwrap_offload_forward
 from compressed_tensors.quantization import (
     ActivationOrdering,
     DynamicType,
@@ -37,7 +38,6 @@ from compressed_tensors.quantization.lifecycle.forward import (
 )
 from compressed_tensors.quantization.utils import strategy_cdiv
 from compressed_tensors.utils import (
-    disable_hf_hook,
     get_execution_device,
     get_head_dim,
     get_num_attn_heads,
@@ -134,7 +134,7 @@ def initialize_module_for_quantization(
                 force_zero_point=force_zero_point,
             )
-        with disable_hf_hook(module):
+        with unwrap_offload_forward(module):
             # wrap forward call of module to perform
             # quantized actions based on calltime status
             wrap_module_forward_quantized(module, scheme)

compressed_tensors-0.13.1a20260127/src/compressed_tensors/transform/apply.py ADDED Viewed

@@ -0,0 +1,36 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from compressed_tensors import TRANSFORM_CONFIG_NAME
+from compressed_tensors.transform import TransformConfig, TransformFactory
+__all__ = ["apply_transform_config"]
+def apply_transform_config(model: torch.nn.Module, config: TransformConfig):
+    """
+    Apply a transform config to a model. Weight transforms are fused into weights, while
+    activation transforms are attached as submodules and trigger via pytorch hooks
+    :param model: model to apply config to
+    :param config: transform config to apply
+    """
+    for name, scheme in config.config_groups.items():
+        factory = TransformFactory.from_scheme(scheme, name=name)
+        factory.apply_to_model(model)
+    # attach config to model for compression/serialization
+    setattr(model, TRANSFORM_CONFIG_NAME, config)

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/factory/base.py RENAMED Viewed

@@ -26,6 +26,7 @@ from compressed_tensors.modeling.kvcache import (
     initialize_hooked_kv_cache,
     register_key_hook,
 )
+from compressed_tensors.offload import OffloadCache
 from compressed_tensors.registry.registry import RegistryMixin, T
 from compressed_tensors.transform import (
     TransformArgs,
@@ -34,8 +35,6 @@ from compressed_tensors.transform import (
 )
 from compressed_tensors.utils import (
     align_module_device,
-    delete_offload_module,
-    has_offloaded_params,
     match_named_modules,
     patch_attr,
     register_offload_module,
@@ -116,13 +115,6 @@ class TransformFactory(RegistryMixin, ABC):
         :param module: target module to apply transforms to
         :param args: defines how the transform will be applied to the target module
         """
-        if has_offloaded_params(module):
-            if module._hf_hook.place_submodules:
-                raise NotImplementedError(
-                    "Applying transforms to offloaded submodules with "
-                    "`place_submodules=True` is not supported"
-                )
         # create transform as submodule
         transform_name = f"{self.name}_{args.location}"
         transform = self.create_transform(module, args)
@@ -150,13 +142,13 @@ class TransformFactory(RegistryMixin, ABC):
             if self.scheme.requires_grad:
                 # for training, the weight changes with every forward pass
                 # so we can leverage parametrization to propagate the gradient
-                if has_offloaded_params(module):
+                if isinstance(module._parameters, OffloadCache):
                     raise ValueError("Offloaded training is not supported")
                 P.register_parametrization(module, "weight", transform)
             else:
                 # transform is no longer needed (unfusing is not supported)
-                delete_offload_module(module, transform_name)
+                delattr(module, transform_name)
         # register output transformation hook
         elif args.location == TransformLocation.OUTPUT:

compressed_tensors-0.13.1a20260127/src/compressed_tensors/utils/offload.py ADDED Viewed

@@ -0,0 +1,195 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Utilities associated with offloading functionality
+| ------------------------------------------------------------------------------------------------------ | # noqa: E501
+| Operation  | Without offloading support             | With offloading support                          | # noqa: E501
+| ---------- | -------------------------------------- | ------------------------------------------------ | # noqa: E501
+| Update     | module.name.data.copy_(new_data)       | update_offload_parameter(module, name, new_data) | # noqa: E501
+| ------------------------------------------------------------------------------------------------------ | # noqa: E501
+"""
+import contextlib
+from typing import Literal, Optional
+import torch
+from compressed_tensors.offload import (
+    align_module_device,
+    align_modules,
+    disable_offloading,
+    get_execution_device,
+    get_offloaded_device,
+    offload_model,
+    register_offload_module,
+    remove_dispatch,
+    update_offload_parameter,
+)
+from compressed_tensors.utils.helpers import deprecated
+__all__ = [
+    "get_execution_device",
+    "get_offloaded_device",
+    "update_parameter_data",
+    "register_offload_parameter",
+    "update_offload_parameter",
+    "delete_offload_parameter",
+    "has_offloaded_params",
+    "disable_hf_hook",
+    "disable_offload",
+    "align_modules",
+    "align_module_device",
+    "register_offload_module",
+    "delete_offload_module",
+    "offloaded_dispatch",
+    "disable_offloading",
+    "remove_dispatch",
+    "cast_to_device",
+    "offload_to_weights_map",
+    "delete_from_weights_map",
+]
+def update_parameter_data(
+    module: torch.nn.Module, new_param_data: torch.Tensor, param_name: str
+):
+    """
+    Update the data of an existing parameter and its offload dict. Supports both
+    parameters of offloaded modules and non-offloaded modules
+    :param module: module containing the parameter to update
+    :param new_param_data: tensor to update parameter with
+    :param param_name: name of module parameter to update
+    """
+    update_offload_parameter(module, param_name, new_param_data)
+""" Candidates for Upstreaming """
+@deprecated()
+def cast_to_device(device_spec: int | torch.device) -> torch.device:
+    """
+    Convert an integer device index or torch.device into a torch.device object.
+    :param device_spec: Device index (int) or torch.device object.
+                        Negative integers map to CPU.
+    :return: torch.device corresponding to the given device specification.
+    """
+    if isinstance(device_spec, int):
+        return torch.device(f"cuda:{device_spec}" if device_spec >= 0 else "cpu")
+    return device_spec
+@deprecated("module.register_parameter(name, parameter)")
+def register_offload_parameter(
+    module: torch.nn.Module,
+    name: str,
+    parameter: torch.nn.Parameter,
+    offload_device: Optional[torch.device | Literal["disk"]] = None,
+):
+    """
+    Register a parameter to the given module which may be offloaded
+    :param module: maybe offloaded module
+    :param name: name of newly registered parameter
+    :param parameter: parameter being registered
+    :param offload_device: device on which weight will be offloaded to. If None is
+        provided, then infer device from parameters on module
+    """
+    if offload_device == "disk":
+        raise NotImplementedError("Disk offloading is not currently supported")
+    module.register_parameter(name, parameter)
+@deprecated("delattr(module, name)")
+def delete_offload_parameter(module: torch.nn.Module, name: str):
+    """
+    Delete a parameter from a module which may be offloaded,
+    including any metadata in _hf_hook
+    :param module: maybe offloaded module
+    :param name: name of parameter being deleted
+    """
+    delattr(module, name)
+@deprecated("compressed_tensors.offload::unwrap_offload")
+@contextlib.contextmanager
+def disable_hf_hook(module: torch.nn.Module):
+    raise ValueError()
+@deprecated("delattr(base, name)")
+def delete_offload_module(base: torch.nn.Module, name: str):
+    """
+    Delete a submodule from a model which may contain offloading
+    :param base: parent module to delete submodule from
+    :param name: name of submodule on parent
+    """
+    delattr(base, name)
+@deprecated("compressed_tensors.offload::offload_model")
+def offloaded_dispatch(
+    module: torch.nn.Module,
+    execution_device: torch.device,
+    offload_device: Optional[torch.device | Literal["disk"]] = None,
+) -> torch.nn.Module:
+    """
+    Dispatch a model, keeping device parameters offloaded on their current device
+    :param module: module containing parameters to offload
+    :param execution_device: device that modules will onload and execute on
+    :param offload_device: device that module parameters will offload to
+    :return: module with offloading device hooks
+    """
+    if offload_device is not None:
+        raise ValueError(
+            "Passing offload_device to offloaded_dispatch is no longer supported"
+        )
+    offload_model(module, execution_device)
+@deprecated("compressed_tensors.offload::align_module_device")
+def disable_offload(module: torch.nn.Module):
+    raise ValueError()
+@deprecated()
+def offload_to_weights_map(*args, **kwargs):
+    raise ValueError()
+@deprecated()
+def delete_from_weights_map(*args, **kwargs):
+    raise ValueError()
+@deprecated()
+def has_offloaded_params(module: torch.nn.Module) -> bool:
+    """
+    Checks if a module has offloaded parameters by checking if the given module has a
+    AlignDevicesHook attached with offloading enabled
+    Args:
+        module (`torch.nn.Module`): The module to check for an offload hook.
+    Returns:
+        bool: `True` if the module has an offload hook and offloading is enabled,
+        `False` otherwise.
+    """
+    return False

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.13.1.a20260123'
+__version__ = version = '0.13.1.a20260127'
 __version_tuple__ = version_tuple = (0, 13, 1)

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127/src/compressed_tensors.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.13.1a20260123
+Version: 0.13.1a20260127
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/vllm-project/compressed-tensors
 Author: Neuralmagic, Inc.
@@ -9,7 +9,7 @@ License: Apache 2.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: torch<=2.9.1,>=1.7.0
-Requires-Dist: transformers
+Requires-Dist: transformers<5.0.0
 Requires-Dist: pydantic>=2.0
 Requires-Dist: loguru
 Provides-Extra: dev
@@ -19,6 +19,8 @@ Requires-Dist: wheel>=0.36.2; extra == "dev"
 Requires-Dist: flake8>=3.8.3; extra == "dev"
 Requires-Dist: pytest>=6.0.0; extra == "dev"
 Requires-Dist: nbconvert>=7.16.3; extra == "dev"
+Requires-Dist: transformers<5.0; extra == "dev"
+Requires-Dist: accelerate; extra == "dev"
 Provides-Extra: accelerate
 Requires-Dist: accelerate; extra == "accelerate"
 Dynamic: author

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors.egg-info/SOURCES.txt RENAMED Viewed

@@ -166,7 +166,6 @@ tests/test_transform/utils/test_hadamard.py
 tests/test_utils/__init__.py
 tests/test_utils/test_helpers.py
 tests/test_utils/test_match.py
-tests/test_utils/test_offload.py
 tests/test_utils/test_safetensors_load.py
 tests/test_utils/test_type.py
 utils/copyright.py

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors.egg-info/requires.txt RENAMED Viewed

@@ -1,5 +1,5 @@
 torch<=2.9.1,>=1.7.0
-transformers
+transformers<5.0.0
 pydantic>=2.0
 loguru
@@ -13,3 +13,5 @@ wheel>=0.36.2
 flake8>=3.8.3
 pytest>=6.0.0
 nbconvert>=7.16.3
+transformers<5.0
+accelerate

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_apply.py RENAMED Viewed

@@ -32,7 +32,6 @@ from compressed_tensors.quantization import (
 )
 from compressed_tensors.quantization.lifecycle import apply_quantization_config
 from compressed_tensors.utils import is_match, match_named_modules
-from tests.testing_utils import requires_accelerate
 from transformers import AutoModelForCausalLM
@@ -322,7 +321,6 @@ def get_sample_tinyllama_quant_config(
     return QuantizationConfig.model_validate(config_dict)
-@requires_accelerate()
 @pytest.mark.parametrize(
     "target,should_raise_warning",
     [
@@ -462,12 +460,8 @@ def test_multi_apply_quantization_config():
             )
-@requires_accelerate()
 def test_apply_kv_cache():
-    from accelerate import init_empty_weights
-    with init_empty_weights():
-        model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")
+    model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")
     args = QuantizationArgs(
         num_bits=8,
@@ -486,12 +480,8 @@ def test_apply_kv_cache():
         assert hasattr(layer.self_attn, "v_scale")
-@requires_accelerate()
 def test_apply_attention():
-    from accelerate import init_empty_weights
-    with init_empty_weights():
-        model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")
+    model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")
     scheme = QuantizationScheme(
         targets=["LlamaAttention"],

{compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_initialize.py RENAMED Viewed

@@ -17,6 +17,7 @@ import math
 import pytest
 import torch
+from compressed_tensors.offload import offload_model
 from compressed_tensors.quantization import (
     FP8_E4M3_DATA,
     ActivationOrdering,
@@ -28,7 +29,7 @@ from compressed_tensors.quantization import (
 from compressed_tensors.quantization.lifecycle.initialize import (
     initialize_module_for_quantization,
 )
-from tests.testing_utils import requires_accelerate
+from tests.testing_utils import requires_gpu
 from torch.nn import Linear
@@ -98,7 +99,7 @@ def test_initialize_module_for_quantization(
     assert layer.quantization_status == QuantizationStatus.INITIALIZED
-@requires_accelerate()
+@requires_gpu
 @pytest.mark.parametrize(
     "weights,input_activations",
     [
@@ -119,9 +120,7 @@ def test_initialize_module_for_quantization(
 def test_initialize_module_for_quantization_offloaded(
     create_quantization_scheme, weights, input_activations, layer
 ):
-    from accelerate.hooks import attach_align_device_hook
-    attach_align_device_hook(layer, offload=True)
+    offload_model(layer, "cuda:0")
     test_initialize_module_for_quantization(
         create_quantization_scheme,

compressed-tensors 0.13.1a20260123__tar.gz → 0.13.1a20260127__tar.gz

compressed-tensors 0.13.1a20260123tar.gz → 0.13.1a20260127tar.gz