PyPI - compressed-tensors - Versions diffs - 0.9.5a20250425__py3-none-any.whl → 0.9.5a20250428__py3-none-any.whl - Mend

compressed-tensors 0.9.5a20250425py3-none-any.whl → 0.9.5a20250428py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -31,7 +31,7 @@ from compressed_tensors.quantization.quant_scheme import QuantizationScheme
 from compressed_tensors.quantization.utils import is_kv_cache_quant_scheme
 from compressed_tensors.utils import (
     disable_hf_hook,
-    has_offloaded_params,
+    get_execution_device,
     register_offload_parameter,
 )
 from torch.nn import Module, Parameter
@@ -148,11 +148,8 @@ def _initialize_scale_zero_point(
     if quantization_args.dynamic:
         return
-    # begin on the same device as other parameters or cpu if offloaded.
-    # in the offloaded case, there's no point moving tensors to the execution device
-    # if they're going to be immediately offloaded by `register_offload_parameter`
-    params_device = next(module.parameters()).device
-    device = "cpu" if has_offloaded_params(module) else params_device
+    # initialize on execution device to avoid performing quantized ops on cpu
+    device = get_execution_device(module)
     # infer expected scale/zero point shape
     if quantization_args.strategy == QuantizationStrategy.TOKEN:

compressed_tensors/utils/offload.py CHANGED Viewed

@@ -28,7 +28,7 @@ Utilities associated with offloading functionality provided by `accelerate`.
 import contextlib
 import warnings
 from functools import wraps
-from typing import Any, Callable, Dict, Literal, Optional, Union
+from typing import Any, Callable, Dict, Iterable, Literal, Optional, Union
 import torch
@@ -67,6 +67,8 @@ __all__ = [
     "delete_offload_parameter",
     "has_offloaded_params",
     "disable_hf_hook",
+    "disable_offload",
+    "align_modules",
     "align_module_device",
 ]
@@ -344,6 +346,43 @@ def delete_from_weights_map(
         )
+@contextlib.contextmanager
+def disable_offload(module: torch.nn.Module):
+    """
+    Context manager to disable module onloading and offloading. Parameters will stay on
+    their current device
+    :param module: module to disable offloading for
+    """
+    if has_offloaded_params(module):
+        module._hf_hook.offload = False
+        yield
+        module._hf_hook.offload = True
+    else:
+        yield
+@contextlib.contextmanager
+def align_modules(
+    modules: Union[torch.nn.Module, Iterable[torch.nn.Module]],
+    execution_device: Optional[torch.device] = None,
+):
+    """
+    Context manager for onloading modules to a device, and disabling onload and offload
+    attempts triggered by forward calls. Used for sequential onloading of layers
+    :param modules: `torch.nn.Module` or iterable of `torch.nn.Module`s to onload
+    :param execution_device: device to onload to
+    """
+    modules = (modules,) if isinstance(modules, torch.nn.Module) else modules
+    with contextlib.ExitStack() as stack:
+        for module in modules:
+            stack.enter_context(align_module_device(module, execution_device))
+            stack.enter_context(disable_offload(module))  # disable redundant onloading
+        yield
 """ Upstreamed Functions """

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.9.5.a20250425'
+__version__ = version = '0.9.5.a20250428'
 __version_tuple__ = version_tuple = (0, 9, 5)

{compressed_tensors-0.9.5a20250425.dist-info → compressed_tensors-0.9.5a20250428.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250425
+Version: 0.9.5a20250428
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250425.dist-info → compressed_tensors-0.9.5a20250428.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
-compressed_tensors/version.py,sha256=gTsh59eInRNC2C-PhuijwOqmtbLPd40f4pGQquK-Hqk,521
+compressed_tensors/version.py,sha256=KzQHMtL_lC-Yc3CaKzKPE8g_ZpnqQDe-a-Boi82UqKY,521
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -33,20 +33,20 @@ compressed_tensors/quantization/lifecycle/apply.py,sha256=OR-6QmN9pFRGteYMBAatu2
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
 compressed_tensors/quantization/lifecycle/forward.py,sha256=DOWouUqfaLA4Qhg-ojVVBdhhSAlgZqFC26vZARxE0ko,12961
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
-compressed_tensors/quantization/lifecycle/initialize.py,sha256=SY4-FJWpVSupQjuvy7rrIc0pFYU9cRL5Lo1KyfUSvoU,8010
+compressed_tensors/quantization/lifecycle/initialize.py,sha256=7bqFwozBdw5hkJ7pwavJUweJ4nqsaZGtXYMQTb6WJcQ,7767
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
 compressed_tensors/quantization/utils/helpers.py,sha256=-wX0H7zVysJ67jRRCGbx6BfxbMU_1sqffTf5YUIpPiU,14391
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
 compressed_tensors/registry/registry.py,sha256=vRcjVB1ITfSbfYUaGndBBmqhip_5vsS62weorVg0iXo,11896
 compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
 compressed_tensors/utils/helpers.py,sha256=RrNvzD08naEjEiXdU-FdZjQVda1nQywu1hA_GCDj0vg,10415
-compressed_tensors/utils/offload.py,sha256=Fmb4jBJhH5OdSQFaecFSHK_UreSyZdynEkadZ_oKcvM,14153
+compressed_tensors/utils/offload.py,sha256=JNQ66_6vhSsizhlUaMgyEdBuFolYxbgUuT1mAZrCfKY,15436
 compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=rwj0ufU5561ScWDoCG7tzLBRDtiykNno2Iq4PM_JA7E,11499
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors-0.9.5a20250425.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.9.5a20250425.dist-info/METADATA,sha256=7YSisZwnrlyu0uq9yJUWdtGmOCOzkVTWIfnCNPaX7sc,7004
-compressed_tensors-0.9.5a20250425.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
-compressed_tensors-0.9.5a20250425.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.9.5a20250425.dist-info/RECORD,,
+compressed_tensors-0.9.5a20250428.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.9.5a20250428.dist-info/METADATA,sha256=eLy89co_VEi2up7wSW_5HntVVqtokiWBAoXa1rJ2TGQ,7004
+compressed_tensors-0.9.5a20250428.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
+compressed_tensors-0.9.5a20250428.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.9.5a20250428.dist-info/RECORD,,

{compressed_tensors-0.9.5a20250425.dist-info → compressed_tensors-0.9.5a20250428.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250425.dist-info → compressed_tensors-0.9.5a20250428.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250425.dist-info → compressed_tensors-0.9.5a20250428.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.9.5a20250425__py3-none-any.whl → 0.9.5a20250428__py3-none-any.whl

compressed-tensors 0.9.5a20250425py3-none-any.whl → 0.9.5a20250428py3-none-any.whl