PyPI - compressed-tensors - Versions diffs - 0.13.1a20260116__tar.gz → 0.13.1a20260127__tar.gz - Mend

compressed-tensors 0.13.1a20260116tar.gz → 0.13.1a20260127tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (176) hide show

{compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/.github/actions/test/action.yml RENAMED Viewed

@@ -23,7 +23,7 @@ runs:
       with:
           venv: ${{ inputs.venv }}
           name: compressed
-          extra: "[dev,accelerate]"
+          extra: "[dev]"
     - name: clean up
       run: |

compressed_tensors-0.13.1a20260127/.github/mergify.yml ADDED Viewed

@@ -0,0 +1,64 @@
+pull_request_rules:
+  - name: label-documentation
+    description: Automatically apply documentation label
+    conditions:
+      - label != stale
+      - -closed
+      - or:
+          - files~=^[^/]+\.md$
+          - files~=^docs/
+          - files~=^examples/
+    actions:
+      label:
+        add:
+          - documentation
+  - name: ping author on conflicts and add 'needs-rebase' label
+    conditions:
+      - label != stale
+      - conflict
+      - -closed
+    actions:
+      label:
+        add:
+          - needs-rebase
+      comment:
+        message: |
+          This pull request has merge conflicts that must be resolved before it can be
+          merged. Please rebase the PR, @{{author}}.
+          https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork
+  - name: remove 'needs-rebase' label when conflict is resolved
+    conditions:
+      - -conflict
+      - -closed
+    actions:
+      label:
+        remove:
+          - needs-rebase
+  - name: add quality-failed label
+    conditions:
+      - label != stale
+      - check-failure = quality-check
+      - -closed
+    actions:
+      label:
+        add:
+          - quality-failed
+      comment:
+        message: |
+          The quality checks have failed. Please run `make style` and `make quality` under
+          the root directory to adddress the lint failures. You will need to install the
+          dev optional install to get the required linting packages.
+  - name: remove quality-failed label
+    conditions:
+      - label != stale
+      - -check-failure = quality-check
+      - -closed
+    actions:
+      label:
+        remove:
+          - quality-failed

compressed_tensors-0.13.1a20260127/.github/workflows/stale.yml ADDED Viewed

@@ -0,0 +1,44 @@
+name: 'Close inactive PRs'
+on:
+  schedule:
+    - cron: '0 17 * * *'
+jobs:
+  close-pull-requests:
+    if: github.repository == 'vllm-project/compressed-tensors'
+    permissions:
+      issues: write
+      pull-requests: write
+      actions: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@997185467fa4f803885201cee163a9f38240193d
+        with:
+          operations-per-run: 1000
+          exempt-draft-pr: true
+          exempt-issue-labels: 'keep-open'
+          exempt-pr-labels: 'keep-open'
+          days-before-issue-stale: 90
+          days-before-issue-close: 30
+          stale-issue-label: 'stale'
+          stale-issue-message: >
+            This issue has been automatically marked as stale because it has not
+            had any activity within 90 days. It will be automatically closed if no
+            further activity occurs within 30 days. Leave a comment if
+            you feel this issue should remain open. Thank you!
+          close-issue-message: >
+            This issue has been automatically closed due to inactivity. Please
+            feel free to reopen if you feel it is still relevant. Thank you!
+          days-before-pr-stale: 90
+          days-before-pr-close: 30
+          stale-pr-label: 'stale'
+          stale-pr-message: >
+            This pull request has been automatically marked as stale because it
+            has not had any activity within 90 days. It will be automatically
+            closed if no further activity occurs within 30 days.
+          close-pr-message: >
+            This pull request has been automatically closed due to inactivity.
+            Please feel free to reopen if you intend to continue working on it.

{compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/.github/workflows/test-check.yaml RENAMED Viewed

@@ -30,7 +30,7 @@ jobs:
         - name: Set Env
           run: pip3 install --upgrade pip setuptools
         - name: "⚙️ Install dependencies"
-          run: pip3 install .[dev,accelerate]
+          run: pip3 install .[dev]
         - name: clean up
           run: |
             echo "cleaning up disk space as GHA runner has limited disk size."

{compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/Makefile RENAMED Viewed

@@ -8,7 +8,7 @@ quality:
 	@echo "Running copyright checks";
 	python utils/copyright.py quality $(PYCHECKGLOBS)
 	@echo "Running python quality checks";
-	black --check $(PYCHECKDIRS);
+	black --target-version py310 --check $(PYCHECKDIRS);
 	isort --check-only $(PYCHECKDIRS);
 	flake8 $(PYCHECKDIRS);
@@ -17,7 +17,7 @@ style:
 	@echo "Running copyright style";
 	python utils/copyright.py style $(PYCHECKGLOBS)
 	@echo "Running python styling";
-	black $(PYCHECKDIRS);
+	black --target-version py310 $(PYCHECKDIRS);
 	isort $(PYCHECKDIRS);
 # run tests for the repo

{compressed_tensors-0.13.1a20260116/src/compressed_tensors.egg-info → compressed_tensors-0.13.1a20260127}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.13.1a20260116
+Version: 0.13.1a20260127
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/vllm-project/compressed-tensors
 Author: Neuralmagic, Inc.
@@ -8,8 +8,8 @@ Author-email: support@neuralmagic.com
 License: Apache 2.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: torch>=1.7.0
-Requires-Dist: transformers
+Requires-Dist: torch<=2.9.1,>=1.7.0
+Requires-Dist: transformers<5.0.0
 Requires-Dist: pydantic>=2.0
 Requires-Dist: loguru
 Provides-Extra: dev
@@ -19,6 +19,8 @@ Requires-Dist: wheel>=0.36.2; extra == "dev"
 Requires-Dist: flake8>=3.8.3; extra == "dev"
 Requires-Dist: pytest>=6.0.0; extra == "dev"
 Requires-Dist: nbconvert>=7.16.3; extra == "dev"
+Requires-Dist: transformers<5.0; extra == "dev"
+Requires-Dist: accelerate; extra == "dev"
 Provides-Extra: accelerate
 Requires-Dist: accelerate; extra == "accelerate"
 Dynamic: author

{compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/setup.py RENAMED Viewed

@@ -88,11 +88,11 @@ def _setup_packages() -> List:
     )
 def _setup_install_requires() -> List:
-    return ["torch>=1.7.0", "transformers", "pydantic>=2.0", "loguru"]
+    return ["torch>=1.7.0,<=2.9.1", "transformers<5.0.0", "pydantic>=2.0", "loguru"]
 def _setup_extras() -> Dict:
     return {
-        "dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3"],
+        "dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3", "transformers<5.0", "accelerate"],
         "accelerate": ["accelerate"]
     }

{compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/__init__.py RENAMED Viewed

@@ -20,5 +20,14 @@ from .base import *
 from .compressors import *
 from .config import *
 from .quantization import QuantizationConfig, QuantizationStatus
-from .utils import *
+# avoid resolving compressed_tensors.offload as compressed_tensors.utils.offload
+from .utils.offload import *
+from .utils.helpers import *
+from .utils.internal import *
+from .utils.match import *
+from .utils.permutations_24 import *
+from .utils.safetensors_load import *
+from .utils.semi_structured_conversions import *
+from .utils.type import *
 from .version import *

{compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py RENAMED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import logging
+import warnings
 from typing import Dict, Generator, Tuple
 import numpy as np
@@ -138,6 +139,12 @@ class Marlin24Compressor(BaseCompressor):
         :param show_progress: whether to show tqdm progress
         :return: compressed state dict
         """
+        warnings.warn(
+            "The marlin24 format is deprecated and will be removed in a "
+            "future release. vLLM no longer supports marlin24 models.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         self.validate_quant_compatability(names_to_scheme)
         compressed_dict = {}

{compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/config/format.py RENAMED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import warnings
 from typing import List, Optional
 import torch
@@ -68,6 +69,12 @@ def _get_quant_compression_format(
         ):
             # marlin24 kernel only applicable for channel/group quantization
             # Note: vLLM may only support group quant for marlin24
+            warnings.warn(
+                "The marlin24 format is deprecated and will be removed in a "
+                "future release. vLLM no longer supports marlin24 models.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
             return CompressionFormat.marlin_24
         return CompressionFormat.pack_quantized

{compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/linear/compressed_linear.py RENAMED Viewed

@@ -87,12 +87,6 @@ class CompressedLinear(Linear):
         # mark module as compressed
         module.quantization_status = QuantizationStatus.COMPRESSED
-        # handles case where forward is wrapped in new_forward by accelerate hooks
-        if hasattr(module, "_old_forward"):
-            module._old_forward = CompressedLinear.forward.__get__(
-                module, CompressedLinear
-            )
         return module
     def forward(self, input: Tensor) -> Tensor:

compressed_tensors-0.13.1a20260127/src/compressed_tensors/offload/__init__.py ADDED Viewed

@@ -0,0 +1,198 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import contextlib
+from typing import Iterable, Optional
+import torch
+from compressed_tensors.offload.cache import OffloadCache
+from compressed_tensors.offload.dispatch import (  # noqa: F401
+    dispatch_model,
+    offload_model,
+    remove_dispatch,
+)
+from compressed_tensors.offload.module import offload_module, unwrap_offload_forward
+from compressed_tensors.offload.utils import get_module_device, move_module_tensor
+from compressed_tensors.utils.helpers import patch_attr
+__all__ = [
+    # dispatch models
+    "offload_model",
+    "dispatch_model",
+    "remove_dispatch",
+    # control movement
+    "disable_onloading",
+    "disable_offloading",
+    # manipulate parameters
+    "update_offload_parameter",
+    "get_execution_device",
+    "get_offloaded_device",
+    "register_offload_module",
+    # manipulate forward
+    "unwrap_offload_forward",
+    # backwards compatibility: should be deprecated
+    "align_modules",
+    "align_module_device",
+]
+@contextlib.contextmanager
+def disable_offloading():
+    """
+    When offloading is disabled, onloaded tensors remain onloaded in memory until exit
+    ```
+    with OffloadCache.disable_offloading():
+        ... = cache["weight"]
+        ... = cache["weight"]  # cache hit
+        ... = cache["weight"]  # cache hit
+    # upon exit, all onloaded weights are released
+    ```
+    """
+    with OffloadCache.disable_offloading():
+        yield
+@contextlib.contextmanager
+def disable_onloading():
+    """
+    When onloading is disabled, tensors are not offloaded on access, and assignments do
+    not trigger offloading. This is mostly used to disable device movement for debugging
+    ```
+    with OffloadCache.disable_onloading():
+        tensor = ...
+        cache["weight"] = tensor   # assignments do not trigger onloading
+        cache["weight"] is tensor  # tensor remains offloaded
+    ```
+    """
+    with OffloadCache.disable_onloading():
+        yield
+def update_offload_parameter(module: torch.nn.Module, name: str, data: torch.Tensor):
+    """
+    Update the data of an existing parameter and its offload dict. Supports both
+    parameters of offloaded modules and non-offloaded modules
+    :param module: module containing the parameter to update
+    :param name: name of module parameter to update
+    :param data: tensor to update parameter with
+    """
+    if isinstance(module._parameters, OffloadCache):
+        with module._parameters.disable_onloading():
+            value = getattr(module, name)
+            value.copy_(module._parameters.offload(data))
+            setattr(module, name, value)
+    else:
+        getattr(module, name).copy_(data)
+def get_execution_device(module: torch.nn.Module) -> torch.device | str:
+    """
+    Get the device which inputs should be moved to before module execution.
+    :param module: module to check, may be offloaded
+    :return: onload device of module
+    """
+    if isinstance(module._parameters, OffloadCache):
+        return module._parameters.onload_device
+    else:
+        return get_module_device(module)
+def get_offloaded_device(module: torch.nn.Module) -> torch.device:
+    """
+    :param module: module to check
+    :return: device module is offloaded to onto after forward pass
+    """
+    with disable_onloading():
+        return get_module_device(module)
+def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.Module):
+    """
+    Register a submodule with offloading if the parent module is offloaded
+    :param base: module to attach submodule to
+    :param name: name of submodule
+    :param module: submodule to attach
+    """
+    cache = base._parameters
+    if isinstance(cache, OffloadCache):
+        offload_module(module, cache.onload_device, cache.offload_device)
+    base.register_module(name, module)
+""" Implemented for backwards compatibility """
+@contextlib.contextmanager
+def align_modules(
+    modules: torch.nn.Module | Iterable[torch.nn.Module],
+    execution_device: Optional[torch.device] = None,
+):
+    """
+    Context manager for onloading modules to a device, and disabling onload and offload
+    attempts triggered by forward calls. Used for sequential onloading of layers
+    :param modules: `torch.nn.Module` or iterable of `torch.nn.Module`s to onload
+    :param execution_device: device to onload to
+    """
+    with contextlib.ExitStack() as stack:
+        for module in modules:
+            stack.enter_context(align_module_device(module, execution_device))
+        yield
+@contextlib.contextmanager
+def align_module_device(
+    module: torch.nn.Module, execution_device: Optional[torch.device] = None
+):
+    """
+    Context manager that moves a module's parameters to the specified execution device.
+    :param module: Module with parameters to align
+    :param execution_device: If provided, overrides the module's execution device
+        within the context. Otherwise, use hook execution device or pass
+    """
+    if isinstance(module._parameters, OffloadCache):
+        assert isinstance(module._buffers, OffloadCache)
+        with module._parameters.disable_offloading():
+            if execution_device is not None:
+                with patch_attr(
+                    module._parameters, "onload_device", execution_device
+                ), patch_attr(module._buffers, "onload_device", execution_device):
+                    yield
+            else:
+                yield
+    else:
+        original_device = {}
+        for name, param in module.named_parameters(recurse=False):
+            original_device[name] = param.device
+            move_module_tensor(module, name, execution_device)
+        try:
+            yield
+        finally:
+            for name, param in module.named_parameters(recurse=False):
+                device = original_device[name]
+                move_module_tensor(module, name, device)

compressed_tensors-0.13.1a20260127/src/compressed_tensors/offload/cache/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+from .base import OffloadCache
+from .cpu import CPUCache

compressed-tensors 0.13.1a20260116__tar.gz → 0.13.1a20260127__tar.gz

compressed-tensors 0.13.1a20260116tar.gz → 0.13.1a20260127tar.gz