PyPI - compressed-tensors - Versions diffs - 0.9.5a20250602__tar.gz → 0.9.5a20250604__tar.gz - Mend

compressed-tensors 0.9.5a20250602tar.gz → 0.9.5a20250604tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

{compressed_tensors-0.9.5a20250602/src/compressed_tensors.egg-info → compressed_tensors-0.9.5a20250604}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250602
+Version: 0.9.5a20250604
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/model_compressors/model_compressor.py RENAMED Viewed

@@ -50,6 +50,7 @@ from compressed_tensors.utils import (
     align_module_device,
     delete_offload_parameter,
     get_execution_device,
+    get_offloaded_device,
     get_safetensors_folder,
     has_offloaded_params,
     merge_names,
@@ -408,16 +409,17 @@ class ModelCompressor:
                     )
                 # remove any existing parameters
-                device = get_execution_device(module)
+                exec_device = get_execution_device(module)
+                offload_device = get_offloaded_device(module)
                 for name, _ in list(module.named_parameters()):
-                    delattr(module, name)
+                    delete_offload_parameter(module, name)
                 # replace with compressed parameters
                 for name, value in state_dict.items():
                     name = name.removeprefix(f"{prefix}.")
-                    value = value.to(device)
+                    value = value.to(exec_device)
                     param = torch.nn.Parameter(value, requires_grad=False)
-                    register_offload_parameter(module, name, param)
+                    register_offload_parameter(module, name, param, offload_device)
                 module.quantization_status = QuantizationStatus.COMPRESSED
@@ -460,30 +462,26 @@ class ModelCompressor:
                 # quantization second
                 if prefix in module_to_scheme:
-                    generator = self.quantization_compressor.decompress_from_state_dict(
-                        state_dict,
-                        names_to_scheme=module_to_scheme,
+                    state_dict = (
+                        self.quantization_compressor.decompress_module_from_state_dict(
+                            prefix,
+                            state_dict,
+                            scheme=module_to_scheme[prefix],
+                        )
                     )
-                    # generates (mod_path, {param_name, param_val})
-                    # of compressed params and used params, but not unused params
-                    # some used params are removed by get_unexpected_file_keys
-                    state_dict = {
-                        merge_names(module_path, param_name): param_value
-                        for module_path, compressed_data in generator
-                        for param_name, param_value in compressed_data.items()
-                    }
                 # remove any existing parameters
-                device = get_execution_device(module)
+                exec_device = get_execution_device(module)
+                offload_device = get_offloaded_device(module)
                 for name, _ in list(module.named_parameters()):
                     delete_offload_parameter(module, name)
                 # replace with decompressed parameters
                 for name, value in state_dict.items():
                     name = name.removeprefix(f"{prefix}.")
-                    value = value.to(device)
+                    value = value.to(exec_device)
                     param = torch.nn.Parameter(value, requires_grad=False)
-                    register_offload_parameter(module, name, param)
+                    register_offload_parameter(module, name, param, offload_device)
                 module.quantization_status = QuantizationStatus.FROZEN

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/quantized_compressors/base.py RENAMED Viewed

@@ -24,6 +24,7 @@ from compressed_tensors.utils import (
     get_nested_weight_mappings,
     merge_names,
 )
+from compressed_tensors.utils.safetensors_load import match_param_name
 from safetensors import safe_open
 from torch import Tensor
 from tqdm import tqdm
@@ -223,9 +224,7 @@ class BaseQuantizationCompressor(BaseCompressor):
             state_dict, self.compression_param_names
         )
         for module_path in weight_mappings.keys():
-            weight_data = {}
-            for param_name, param_value in weight_mappings[module_path].items():
-                weight_data[param_name] = param_value
+            weight_data = weight_mappings[module_path].copy()
             if "weight_scale" in weight_data:
                 quant_args = names_to_scheme[module_path].weights
@@ -234,3 +233,31 @@ class BaseQuantizationCompressor(BaseCompressor):
                 )
                 weight_data["weight"] = decompressed
                 yield module_path, weight_data
+    def decompress_module_from_state_dict(
+        self,
+        prefix: str,
+        state_dict: Dict[str, torch.Tensor],
+        scheme: QuantizationScheme,
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Only used by in-memory decompression pathways to decompress the parameters of
+        one module
+        :param prefix: prefix of state_dict, typically the path to the module
+        :param state_dict: state dict containing module parameter values
+        :param scheme: quantization scheme of module to decompress
+        :return: state dict with weight decompressed if applicable
+        """
+        state_dict = {
+            key.removeprefix(f"{prefix}."): value for key, value in state_dict.items()
+        }
+        if "weight_scale" in state_dict:
+            state_dict["weight"] = self.decompress_weight(
+                compressed_data=state_dict, quantization_args=scheme.weights
+            )
+        state_dict = {f"{prefix}.{key}": value for key, value in state_dict.items()}
+        return state_dict

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/quantization/lifecycle/apply.py RENAMED Viewed

@@ -27,14 +27,8 @@ from compressed_tensors.quantization.lifecycle.compressed import (
 )
 from compressed_tensors.quantization.lifecycle.initialize import (
     initialize_module_for_quantization,
-    update_fused_layer_weight_global_scales,
-)
-from compressed_tensors.quantization.quant_args import (
-    FP4_E2M1_DATA,
-    FP8_E4M3_DATA,
-    QuantizationArgs,
-    QuantizationType,
 )
+from compressed_tensors.quantization.quant_args import QuantizationArgs
 from compressed_tensors.quantization.quant_config import (
     QuantizationConfig,
     QuantizationStatus,
@@ -272,9 +266,6 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
             )
         )
-        if status == QuantizationStatus.INITIALIZED:
-            update_fused_layer_weight_global_scales(model)
     if current_status < status >= QuantizationStatus.COMPRESSED > current_status:
         model.apply(compress_quantized_weights)

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/quantization/lifecycle/forward.py RENAMED Viewed

@@ -21,7 +21,6 @@ from compressed_tensors.quantization.quant_args import (
     DynamicType,
     QuantizationArgs,
     QuantizationStrategy,
-    QuantizationType,
     round_to_quantized_type,
 )
 from compressed_tensors.quantization.quant_config import QuantizationStatus
@@ -405,7 +404,7 @@ def _quantize(
     # if a global scale is optionally provided, use it
     # to further scale the local `scale` parameter
-    if global_scale:
+    if global_scale is not None:
         scale = scale.to(global_scale.dtype) / global_scale
     scaled = x / scale
@@ -438,7 +437,7 @@ def _dequantize(
     # if a global scale is optionally provided, use it
     # to further scale the local `scale` parameter
-    if global_scale:
+    if global_scale is not None:
         scale = scale.to(global_scale.dtype) / global_scale
     dequant_value = x_q.to(scale.dtype)

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/quantization/lifecycle/initialize.py RENAMED Viewed

@@ -23,26 +23,18 @@ from compressed_tensors.quantization.lifecycle.forward import (
     wrap_module_forward_quantized,
 )
 from compressed_tensors.quantization.quant_args import (
-    FP4_E2M1_DATA,
     FP8_E4M3_DATA,
     ActivationOrdering,
     QuantizationArgs,
     QuantizationStrategy,
-    QuantizationType,
 )
 from compressed_tensors.quantization.quant_config import QuantizationStatus
 from compressed_tensors.quantization.quant_scheme import QuantizationScheme
-from compressed_tensors.quantization.utils import (
-    generate_global_scale,
-    is_fp4,
-    is_kv_cache_quant_scheme,
-    iter_named_quantizable_modules,
-)
+from compressed_tensors.quantization.utils import is_fp4, is_kv_cache_quant_scheme
 from compressed_tensors.utils import (
     disable_hf_hook,
     get_execution_device,
     register_offload_parameter,
-    update_parameter_data,
 )
 from torch.nn import Module, Parameter
@@ -51,7 +43,6 @@ __all__ = [
     "initialize_module_for_quantization",
     "is_attention_module",
     "KVCacheScaleType",
-    "update_fused_layer_weight_global_scales",
 ]
@@ -162,22 +153,13 @@ def _initialize_scale_zero_point(
     # initialize on execution device to avoid performing quantized ops on cpu
     device = get_execution_device(module)
-    # 1. Create global_scales for tensor_group
+    # 1. Create global_scales for tensor_group - generates
+    # a per tensor scale
     if quantization_args.strategy == QuantizationStrategy.TENSOR_GROUP:
-        # TODO: should move to llmcompressor
-        if base_name == "weight":
-            # When applying weight-only FP4 quantization, generate a global_scale
-            # This scale is applied during runtime to ensure that the generated
-            # local scale falls properly within the FP8 range (i.e max value is FP8_max)
-            # which is the expected dtype of NVFP4A16 scales
-            value = generate_global_scale(input_tensor=module.weight)
-            value = value.to(device)
-            init_global_scale = Parameter(value, requires_grad=False)
-        else:
-            init_global_scale = Parameter(
-                torch.empty(1, dtype=torch.float32, device=device),
-                requires_grad=False,
-            )
+        init_global_scale = Parameter(
+            torch.empty(1, dtype=torch.float32, device=device),
+            requires_grad=False,
+        )
         register_offload_parameter(
             module, f"{base_name}_global_scale", init_global_scale
         )
@@ -258,91 +240,3 @@ def _initialize_attn_scales(module: Module) -> None:
         requires_grad=False,
     )
     register_offload_parameter(module, KVCacheScaleType.VALUE.value, init_scale)
-# TODO: Potentially introduce an argument to turn this off
-# Only relevant for NVFP4A16 currently
-def update_fused_layer_weight_global_scales(model: torch.nn.Module):
-    """
-    When running NVFP4A16 quantization, update the global scale
-    such that q,k,v layers are treated as one tensor with the same
-    global_scale and gate_proj/up_proj layers are treated as one tensor
-    with the same global scale. This is requirement currently being set
-    by vLLM and may be removed in the future OR potentially make it
-    an optional step.
-    :param model: model to quantize
-    """
-    def _is_attention_module(module: Module):
-        return "attention" in module.__class__.__name__.lower() and (
-            hasattr(module, "k_proj")
-            or hasattr(module, "v_proj")
-            or hasattr(module, "qkv_proj")
-        )
-    def _is_mlp_module(module: Module):
-        return "mlp" in module.__class__.__name__.lower() and (
-            hasattr(module, "gate_proj") or hasattr(module, "up_proj")
-        )
-    def _valid_fp4_quant(layer_list: List[torch.nn.Linear]):
-        """
-        Return True if all the linear layers in the layer_list are
-        NVFP4A16 quantized.
-        """
-        for layer in layer_list:
-            scheme = getattr(layer, "quantization_scheme", None)
-            if scheme is None:
-                return False
-            weight_quant_args = scheme.weights
-            if weight_quant_args is None:
-                return False
-            if not is_fp4(quantization_args=weight_quant_args):
-                return False
-        return True
-    for name, submodule in iter_named_quantizable_modules(
-        model,
-        include_attn=True,
-        include_mlp=True,
-    ):
-        if _is_attention_module(submodule):
-            # already fused/treated as one layer
-            if hasattr(submodule, "qkv_proj"):
-                continue
-            if not _valid_fp4_quant(
-                [submodule.q_proj, submodule.v_proj, submodule.k_proj]
-            ):
-                continue
-            q_weight = submodule.q_proj.weight.data
-            v_weight = submodule.v_proj.weight.data
-            k_weight = submodule.k_proj.weight.data
-            value = generate_global_scale(
-                input_tensor=torch.cat((q_weight, v_weight, k_weight), dim=0)
-            )
-            update_parameter_data(submodule.q_proj, value, "weight_global_scale")
-            update_parameter_data(submodule.k_proj, value, "weight_global_scale")
-            update_parameter_data(submodule.v_proj, value, "weight_global_scale")
-        if _is_mlp_module(submodule):
-            if not _valid_fp4_quant([submodule.gate_proj, submodule.up_proj]):
-                continue
-            gate_data = submodule.gate_proj.weight.data
-            up_data = submodule.up_proj.weight.data
-            value = generate_global_scale(
-                input_tensor=torch.cat((gate_data, up_data), dim=0)
-            )
-            update_parameter_data(submodule.gate_proj, value, "weight_global_scale")
-            update_parameter_data(submodule.up_proj, value, "weight_global_scale")

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/quantization/utils/helpers.py RENAMED Viewed

@@ -47,7 +47,7 @@ __all__ = [
     "compute_dynamic_scales_and_zp",
     "calculate_range",
     "calculate_qparams",
-    "generate_global_scale",
+    "generate_gparam",
     "is_fp4",
 ]
@@ -110,6 +110,7 @@ def calculate_qparams(
         else:
             scales = max_val_pos / (float(bit_range) / 2)
+        # TODO: in the case of MoEs, the global_scale may also be 0/need to be clamped
         if scales.dtype == FP8_E4M3_DATA.dtype:
             # torch.clamp not supported for FP8
             # use the next largest fp8 value from 0
@@ -475,8 +476,9 @@ def parse_out_kv_cache_args(
     return kv_cache_args, quant_scheme_to_layers
-def generate_global_scale(
-    input_tensor: torch.Tensor,
+def generate_gparam(
+    updated_min_val: torch.Tensor,
+    updated_max_val: torch.Tensor,
     scale_data: Optional[FloatArgs] = FP8_E4M3_DATA,
     quant_data: Optional[FloatArgs] = FP4_E2M1_DATA,
     dtype: Optional[torch.dtype] = torch.float32,
@@ -490,6 +492,8 @@ def generate_global_scale(
     attempts to use the entire FP8 dtype range while mapping a per-group max
     to the FP4 max.
     """
-    tensor_amax = torch.abs(input_tensor.data).max().to(dtype)
-    global_scale = scale_data.max * quant_data.max / tensor_amax
-    return global_scale.to(dtype)
+    min_vals = torch.min(updated_min_val, torch.zeros_like(updated_min_val))
+    max_vals = torch.max(updated_max_val, torch.zeros_like(updated_max_val))
+    max_val_pos = torch.max(torch.abs(min_vals), torch.abs(max_vals))
+    global_scale = scale_data.max * quant_data.max / max_val_pos
+    return global_scale.to(dtype).reshape([1])

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/utils/offload.py RENAMED Viewed

@@ -28,15 +28,18 @@ Utilities associated with offloading functionality provided by `accelerate`.
 import contextlib
 import warnings
 from functools import wraps
-from typing import Any, Callable, Dict, Iterable, Literal, Optional, Union
+from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Union
 import torch
 try:
+    from accelerate import dispatch_model
     from accelerate.hooks import (
         AlignDevicesHook,
         add_hook_to_module,
+        attach_align_device_hook,
+        named_module_tensors,
         remove_hook_from_module,
     )
     from accelerate.utils import (
@@ -54,6 +57,9 @@ except ImportError:
     OffloadedWeightsLoader = None
     PrefixedDataset = None
     set_module_tensor_to_device = None
+    named_module_tensors = None
+    dispatch_model = None
+    attach_align_device_hook = None
 __all__ = [
@@ -70,6 +76,9 @@ __all__ = [
     "disable_offload",
     "align_modules",
     "align_module_device",
+    "register_offload_module",
+    "delete_offload_module",
+    "force_cpu_offload",
 ]
@@ -77,6 +86,11 @@ def check_accelerate(fallback: Any):
     def decorator(func: Callable[[Any], Any]):
         if not _has_accelerate:
+            if fallback == "error":
+                raise ValueError(
+                    "Please install `accelerate` in order to use this function"
+                )
             @wraps(func)
             def fallback_fn(*args, **kwargs):
                 return fallback
@@ -346,6 +360,7 @@ def delete_from_weights_map(
         )
+@check_accelerate(fallback=contextlib.nullcontext())
 @contextlib.contextmanager
 def disable_offload(module: torch.nn.Module):
     """
@@ -362,6 +377,7 @@ def disable_offload(module: torch.nn.Module):
         yield
+@check_accelerate(fallback=contextlib.nullcontext())
 @contextlib.contextmanager
 def align_modules(
     modules: Union[torch.nn.Module, Iterable[torch.nn.Module]],
@@ -383,6 +399,123 @@ def align_modules(
         yield
+def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.Module):
+    """
+    Register a submodule with offloading if the parent module is offloaded
+    :param base: module to attach submodule to
+    :param name: name of submodule
+    :param module: submodule to attach
+    """
+    if has_offloaded_params(base):
+        hook: AlignDevicesHook = base._hf_hook
+        assert hook.offload
+        assert hook.weights_map is not None
+        assert hook.tied_params_map is not None
+        # offloading kwargs for submodule
+        place_submodules = False
+        offload_buffers = True
+        # copy device offloading arguments from parent
+        current_device = next(base.parameters()).device  # assume base has parameters
+        offload_device = get_offloaded_device(base)
+        # offload parameters to weights map
+        for param_name, param in named_module_tensors(
+            module, include_buffers=offload_buffers, recurse=place_submodules
+        ):
+            offloaded = param.to(offload_device)
+            hook.tied_params_map[offloaded.data_ptr()] = {}  # (1)
+            offload_to_weights_map(hook.weights_map, f"{name}.{param_name}", offloaded)
+            # if the parent places submodules, offload here
+            if hook.place_submodules:
+                set_module_tensor_to_device(module, param_name, current_device)
+        # if the parent does not place submodules, then add a hook
+        # parameters are offloaded by `add_hook_to_module`
+        if not hook.place_submodules:
+            weights_map = PrefixedDataset(
+                hook.weights_map.dataset, prefix=f"{hook.weights_map.prefix}{name}."
+            )
+            submodule_hook = AlignDevicesHook(
+                execution_device=hook.execution_device,
+                offload=hook.offload,
+                io_same_device=False,
+                weights_map=weights_map,
+                offload_buffers=offload_buffers,
+                place_submodules=place_submodules,
+                skip_keys=None,
+                tied_params_map=hook.tied_params_map,
+            )
+            add_hook_to_module(module, submodule_hook)
+    base.register_module(name, module)
+    # (1): Since we cannot know which pointers are shared when we add parameters in an
+    # online way, assume that all pointers are shared. This comes at no runtime cost
+def delete_offload_module(base: torch.nn.Module, name: str):
+    """
+    Delete a submodule from a model which may contain offloading
+    :param base: parent module to delete submodule from
+    :param name: name of submodule on parent
+    """
+    module: torch.nn.Module = getattr(base, name)
+    for param_name, _ in list(module.named_parameters()):
+        delete_offload_parameter(module, param_name)
+    delattr(base, name)
+@check_accelerate(fallback="error")
+def force_cpu_offload(
+    module: torch.nn.Module, execution_device: torch.device
+) -> torch.nn.Module:
+    """
+    Force cpu offloading a module, primarily used for testing
+    :param module: module containing parameters to offload
+    :param execution_device: execution device submodules
+    :return: module with hooks to perform cpu offloading
+    """
+    # edge case: there is a bug in `dispatch_model` which causes
+    # the function to only work if the model contains submodules
+    if next(module.children(), None) is None:
+        attach_align_device_hook(
+            module,
+            execution_device=execution_device,
+            offload=True,
+            weights_map=module.state_dict(),
+            tied_params_map={},
+        )
+        return module
+    device_map = {}
+    def collect_device_map(name: List[str], module: torch.nn.Module):
+        if next(module.parameters(recurse=False), None) is not None:
+            device_map[".".join(name)] = "cpu"
+            return
+        else:
+            for submodule_name, submodule in module.named_children():
+                name.append(submodule_name)
+                collect_device_map(name, submodule)
+                name.pop()
+    collect_device_map([], module)
+    return dispatch_model(
+        module, device_map, main_device=execution_device, force_hooks=True
+    )
 """ Upstreamed Functions """

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.9.5.a20250602'
+__version__ = version = '0.9.5.a20250604'
 __version_tuple__ = version_tuple = (0, 9, 5)

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604/src/compressed_tensors.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250602
+Version: 0.9.5a20250604
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/test_utils/test_helpers.py RENAMED Viewed

@@ -20,10 +20,7 @@ from compressed_tensors.quantization import (
     QuantizationArgs,
     QuantizationStrategy,
 )
-from compressed_tensors.quantization.utils import (
-    calculate_qparams,
-    generate_global_scale,
-)
+from compressed_tensors.quantization.utils import calculate_qparams, generate_gparam
 @pytest.mark.parametrize(
@@ -70,8 +67,9 @@ def test_fused_global_scales():
     layer = torch.nn.Linear(7, 8)
     max_tensor_value = torch.abs(layer.weight.data).max()
     # use defaults
-    global_scale = generate_global_scale(layer.weight)
+    min_val, max_val = torch.aminmax(layer.weight)
+    global_scale = generate_gparam(min_val.data, max_val.data)
     # max value should be = (448 * 6) / global_scale
-    assert max_tensor_value == pytest.approx(
+    assert max_tensor_value.item() == pytest.approx(
         FP4_E2M1_DATA.max * FP8_E4M3_DATA.max / global_scale, abs=0.001
     )

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_utils/test_offload.py RENAMED Viewed

@@ -16,10 +16,13 @@ import torch
 from compressed_tensors.utils import (
     align_module_device,
     align_modules,
+    delete_offload_module,
     delete_offload_parameter,
     disable_hf_hook,
+    force_cpu_offload,
     get_execution_device,
     has_offloaded_params,
+    register_offload_module,
     register_offload_parameter,
     update_offload_parameter,
 )
@@ -37,9 +40,17 @@ class ExampleModule(torch.nn.Module):
         return x * self.a + self.b
+class ExampleModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.linear = torch.nn.Linear(1, 2)
+    def forward(self, x):
+        return self.linear(x)
 @requires_accelerate()
 def test_has_offloaded_params():
-    from accelerate.big_modeling import cpu_offload_with_hook
     from accelerate.hooks import attach_align_device_hook, remove_hook_from_module
     module = ExampleModule()
@@ -48,10 +59,6 @@ def test_has_offloaded_params():
     attach_align_device_hook(module, offload=False)
     assert not has_offloaded_params(module)
-    remove_hook_from_module(module)
-    module, _ = cpu_offload_with_hook(module)
-    assert not has_offloaded_params(module)
     remove_hook_from_module(module)
     attach_align_device_hook(module, offload=True, weights_map=module.state_dict())
     assert has_offloaded_params(module)
@@ -334,3 +341,86 @@ def test_offload_to_weights_map():
     weights_map = PrefixedDataset(OffloadedWeightsLoader({name: old_value}), prefix)
     offload_to_weights_map(weights_map, name, new_value)
     assert weights_map[name] == new_value
+@requires_gpu
+@requires_accelerate()
+@pytest.mark.parametrize("exec_device", [torch.device("cpu"), torch.device("cuda")])
+def test_register_offload_module(exec_device):
+    # no offloading
+    model = ExampleModel()
+    child = torch.nn.Linear(2, 3)
+    register_offload_module(model, "child", child)
+    register_offload_module(model.linear, "child", child)
+    assert child in model.children()
+    assert child in model.linear.children()
+    # with offloading
+    model = ExampleModel()
+    child = torch.nn.Linear(2, 3)
+    force_cpu_offload(model, exec_device)
+    register_offload_module(model, "child", child)
+    register_offload_module(model.linear, "child", child)
+    assert child in model.children()
+    assert child in model.linear.children()
+    # can run modules
+    model(torch.empty(1))
+    child(torch.empty(2, device=exec_device))
+@requires_gpu
+@requires_accelerate()
+@pytest.mark.parametrize("exec_device", [torch.device("cpu"), torch.device("cuda")])
+def test_delete_offload_module(exec_device):
+    # no offloading
+    model = ExampleModel()
+    child = torch.nn.Linear(2, 3)
+    register_offload_module(model, "child", child)
+    register_offload_module(model.linear, "child", child)
+    delete_offload_module(model, "child")
+    delete_offload_module(model.linear, "child")
+    assert not child in model.children()
+    assert not child in model.linear.children()
+    # with offloading
+    model = ExampleModel()
+    child = torch.nn.Linear(2, 3)
+    force_cpu_offload(model, exec_device)
+    register_offload_module(model, "child", child)
+    register_offload_module(model.linear, "child", child)
+    delete_offload_module(model, "child")
+    delete_offload_module(model.linear, "child")
+    assert not child in model.children()
+    assert not child in model.linear.children()
+@requires_gpu
+@requires_accelerate()
+@pytest.mark.parametrize("exec_device", [torch.device("cpu"), torch.device("cuda")])
+def test_force_cpu_offload(exec_device):
+    # single module
+    module = torch.nn.Linear(1, 2)
+    module = force_cpu_offload(module, exec_device)
+    assert has_offloaded_params(module)
+    assert module._hf_hook.offload
+    assert module.weight.device == torch.device("meta")
+    assert "weight" in module._hf_hook.weights_map
+    assert module._hf_hook.tied_params_map is not None
+    # can run
+    module(torch.empty(1, device=exec_device))
+    # model
+    model = ExampleModel()
+    model = force_cpu_offload(model, exec_device)
+    assert not has_offloaded_params(model)
+    assert has_offloaded_params(model.linear)
+    assert model.linear._hf_hook.offload
+    assert model.linear.weight.device == torch.device("meta")
+    assert "weight" in model.linear._hf_hook.weights_map
+    assert model.linear._hf_hook.tied_params_map is not None
+    # can run
+    model(torch.empty(1, device=exec_device))

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/.github/.gitkeep RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/.github/actions/test/action.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/.github/scripts/step-status RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/.github/workflows/build-test.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/.github/workflows/build.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/.github/workflows/report.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/.github/workflows/test-check.yaml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/.github/workflows/test.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/.github/workflows/trigger-all.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/.github/workflows/upload.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/.gitignore RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/Makefile RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/README.md RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/examples/bit_packing/ex_quantize_and_pack.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/examples/bit_packing/int4_config.json RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/examples/bitmask_compression.ipynb RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/examples/llama_1.1b/ex_config_quantization.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/examples/llama_1.1b/ex_llmcompressor_quantization.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/examples/llama_1.1b/example_quant_config.json RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/examples/llama_1.1b/example_quant_recipe.yaml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/examples/quantize_and_pack_int4.ipynb RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/pyproject.toml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/setup.cfg RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/setup.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/README.md RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/model_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/quantized_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/sparse_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/sparse_compressors/base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/sparse_compressors/dense.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/config/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/config/base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/config/dense.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/config/sparse_24_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/config/sparse_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/linear/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/linear/compressed_linear.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/quantization/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/quantization/lifecycle/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/quantization/lifecycle/compressed.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/quantization/lifecycle/helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/quantization/quant_args.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/quantization/quant_config.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/quantization/quant_scheme.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/quantization/utils/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/registry/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/registry/registry.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/transform/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/transform/transform_args.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/transform/transform_config.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/transform/transform_scheme.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/utils/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/utils/helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/utils/permutations_24.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/utils/permute.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/utils/safetensors_load.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors/utils/semi_structured_conversions.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors.egg-info/requires.txt RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/src/compressed_tensors.egg-info/top_level.txt RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/conftest.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_compressors/model_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_compressors/model_compressors/test_model_compressor.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_compressors/quantized_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_compressors/quantized_compressors/test_fp8_quant.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_compressors/quantized_compressors/test_int_quant.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_compressors/quantized_compressors/test_pack_quant.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_compressors/sparse_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_compressors/sparse_compressors/test_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_compressors/sparse_quantized_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_configs/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_configs/test_base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_examples/test_bitmask_compression_ipynb.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_linear/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_linear/test_compressed_linear.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/lifecycle/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/lifecycle/conftest.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/lifecycle/test_apply.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/lifecycle/test_enabled.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/lifecycle/test_forward.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/lifecycle/test_helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/lifecycle/test_initialize.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/lifecycle/test_lifecycle.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/test_configs/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/test_configs/test_bit_depths.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/test_configs/test_strategies.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/test_quant_args.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/test_quant_config.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_quantization/test_quant_scheme.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_registry.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_transform/test_transform_args.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_transform/test_transform_config.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_transform/test_transform_scheme.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_utils/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_utils/test_helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/test_utils/test_safetensors_load.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/tests/testing_utils.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250602 → compressed_tensors-0.9.5a20250604}/utils/copyright.py RENAMED Viewed

File without changes

compressed-tensors 0.9.5a20250602__tar.gz → 0.9.5a20250604__tar.gz

compressed-tensors 0.9.5a20250602tar.gz → 0.9.5a20250604tar.gz