PyPI - compressed-tensors - Versions diffs - 0.8.1__py3-none-any.whl → 0.9.1__py3-none-any.whl - Mend

compressed-tensors 0.8.1py3-none-any.whl → 0.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

compressed_tensors/utils/offload.py CHANGED Viewed

@@ -11,9 +11,48 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Utilities associated with offloading functionality provided by `accelerate`.
+| ----------------------------------------------------------------------------------------------------- | # noqa: E501
+| Operation | Without offloading support             | With offloading support                          | # noqa: E501
+| --------- | -------------------------------------- | ------------------------------------------------ | # noqa: E501
+| Add       | module.register_parameter(name, param) | register_offload_parameter(module, name, param)  | # noqa: E501
+| Check     | N/A                                    | has_offloaded_params(module)                     | # noqa: E501
+| Onload    | N/A                                    | with align_module_device(module)                 | # noqa: E501
+| Update    | module.name.data.copy_(new_data)       | update_offload_parameter(module, name, new_data) | # noqa: E501
+| Delete    | del module.name                        | delete_offload_parameter(module, name)           | # noqa: E501
+| ----------------------------------------------------------------------------------------------------- | # noqa: E501
+"""
+import contextlib
+from functools import wraps
+from typing import Any, Callable, Dict, Literal, Optional, Union
 import torch
-from torch.nn import Module
+try:
+    from accelerate.hooks import (
+        AlignDevicesHook,
+        add_hook_to_module,
+        remove_hook_from_module,
+    )
+    from accelerate.utils import (
+        OffloadedWeightsLoader,
+        PrefixedDataset,
+        set_module_tensor_to_device,
+    )
+    _has_accelerate = True
+except ImportError:
+    _has_accelerate = False
+    AlignDevicesHook = None
+    add_hook_to_module = None
+    remove_hook_from_module = None
+    OffloadedWeightsLoader = None
+    PrefixedDataset = None
+    set_module_tensor_to_device = None
 __all__ = [
@@ -22,23 +61,44 @@ __all__ = [
     "get_offloaded_device",
     "update_prefix_dict",
     "update_parameter_data",
+    "register_offload_parameter",
+    "update_offload_parameter",
+    "delete_offload_parameter",
+    "has_offloaded_params",
+    "disable_hf_hook",
+    "align_module_device",
 ]
-def is_module_offloaded(module: Module) -> bool:
-    """
-    :param module: layer to check
-    :return: True if layer is offloaded from GPU, False otherwise
-    """
-    return hasattr(module, "_hf_hook") and module._hf_hook.offload
+def check_accelerate(fallback: Any):
+    def decorator(func: Callable[[Any], Any]):
+        if not _has_accelerate:
+            @wraps(func)
+            def fallback_fn(*args, **kwargs):
+                return fallback
+            return fallback_fn
+        return func
+    return decorator
-def get_execution_device(module: Module) -> torch.device:
+""" Candidates for Depreciation """
+@check_accelerate(fallback=False)
+def is_module_offloaded(module: torch.nn.Module) -> bool:
+    return has_offloaded_params(module)
+def get_execution_device(module: torch.nn.Module) -> torch.device:
     """
-    :param module: layer to check
-    :return: device layer is loaded onto during forward pass
+    :param module: module to check
+    :return: device module is loaded onto during forward pass
     """
-    if is_module_offloaded(module):
+    if has_offloaded_params(module):
         return module._hf_hook.execution_device
     device = next(module.parameters()).device
@@ -49,68 +109,296 @@ def get_execution_device(module: Module) -> torch.device:
     return device
-def get_offloaded_device(module: Module) -> torch.device:
+def get_offloaded_device(module: torch.nn.Module) -> torch.device:
     """
-    :param module: layer to check
-    :return: device layer is offloaded to onto after forward pass
+    :param module: module to check
+    :return: device module is offloaded to onto after forward pass
     """
-    if is_module_offloaded(module):
+    if has_offloaded_params(module):
         first_key = list(module._hf_hook.weights_map.keys())[0]
         prefix_dataset = module._hf_hook.weights_map.dataset
         return prefix_dataset[first_key].device
     return next(module.parameters()).device
-def update_prefix_dict(module: Module, key: str, data: torch.Tensor):
+@check_accelerate(fallback=None)
+def update_prefix_dict(module: torch.nn.Module, key: str, data: torch.Tensor):
     """
     Updates the offloaded state dict for a given module. Parameter named key is replaced
     by data. This is neccesary because parameter updates for offloaded modules do not
     persist automatically between loads. This function only affects the offloaded
     state dict and not the current state of the loaded module.
-    :param module: layer containing the parameter to update
+    :param module: module containing the parameter to update
     :param key: name of parameter to update
     :param data: tensor to update parameter with in the offloaded state dict
     """
-    if not is_module_offloaded(module):
+    if not has_offloaded_params(module):
         raise ValueError("Prefix dict is only applicable to offloaded modules")
-    prefix_dict = module._hf_hook.weights_map
-    prefix_dict.dataset[f"{prefix_dict.prefix}{key}"] = data
+    weights_map = module._hf_hook.weights_map
+    offload_to_weights_map(weights_map, key, data)
 def update_parameter_data(
-    module: Module, new_param_data: torch.Tensor, param_name: str
+    module: torch.nn.Module, new_param_data: torch.Tensor, param_name: str
 ):
     """
-    Updates the paramter value named param_name for a given module. This function
-    updates both the current loaded module state and the offloaded state dict if
-    the module is offloaded. This is neccesary because parameter updates for offloaded
-    modules do not persist automatically between loads.
+    Update the data of an existing parameter and its offload dict. Supports both
+    parameters of offloaded modules and non-offloaded modules
-    :param module: layer containing the parameter to update
+    :param module: module containing the parameter to update
     :param new_param_data: tensor to update parameter with
-    :param param_name: name of layer parameter to update
+    :param param_name: name of module parameter to update
     """
-    if not hasattr(module, param_name):
-        return
+    update_offload_parameter(module, param_name, new_param_data)
+""" Candidates for Upstreaming """
+def register_offload_parameter(
+    module: torch.nn.Module,
+    name: str,
+    parameter: torch.nn.Parameter,
+    offload_device: Optional[Union[torch.device, Literal["disk"]]] = None,
+):
+    """
+    Register a parameter to the given module which may be offloaded
+    :param module: maybe offloaded module
+    :param name: name of newly registered parameter
+    :param parameter: parameter being registered
+    :param offload_device: device on which weight will be offloaded to. If None is
+        provided, then infer device from parameters on module
+    """
+    has_onload = any(p.device != torch.device("meta") for p in module.parameters())
+    module.register_parameter(name, parameter)
+    if has_offloaded_params(module):
+        weights_map = module._hf_hook.weights_map
+        offload_to_weights_map(weights_map, name, parameter.data, offload_device)
+        if not has_onload:
+            set_module_tensor_to_device(module, name, "meta")
+def update_offload_parameter(
+    module: torch.nn.Module,
+    name: str,
+    data: Optional[torch.Tensor],
+    offload_device: Optional[Union[torch.device, Literal["disk"]]] = None,
+):
+    """
+    Update the data of an existing parameter and its offload dict. Supports both
+    parameters of offloaded modules and non-offloaded modules
+    :param module: module containing the parameter to update
+    :param name: name of module parameter to update
+    :param data: tensor to update parameter with
+    :param offload_device: device on which weight will be offloaded to. If None is
+        provided, then infer device from parameters on module
+    """
+    param = getattr(module, name)
+    data = data.to(param.dtype)
+    # copy data into onloaded parameter if applicable
+    if param.device != "meta":
+        param.data.copy_(data)
+    # update offload dict
+    if has_offloaded_params(module):
+        weights_map = module._hf_hook.weights_map
+        offload_to_weights_map(weights_map, name, data, offload_device)
+def delete_offload_parameter(module: torch.nn.Module, name: str):
+    """
+    Delete a parameter from a module which may be offloaded
+    :param module: maybe offloaded module
+    :param name: name of parameter being deleted
+    """
+    delattr(module, name)
+    if has_offloaded_params(module):
+        weights_map = module._hf_hook.weights_map
+        delete_from_weights_map(weights_map, name)
-    device = next(module.parameters()).device
-    offloaded = False
-    if is_module_offloaded(module):
-        offload_device = get_offloaded_device(module)
-        offloaded = True
+@check_accelerate(fallback=contextlib.nullcontext())
+@contextlib.contextmanager
+def disable_hf_hook(module: torch.nn.Module):
+    hooks = {}
-    parameter = getattr(module, param_name, None)
-    if parameter is None:
-        raise ValueError("Attempted to update uninitialized parameter")
+    def collect_hooks(module):
+        nonlocal hooks
+        if hasattr(module, "_hf_hook"):
+            hooks[module] = module._hf_hook
+            remove_hook_from_module(module)
-    dtype = parameter.dtype
-    parameter.data = new_param_data.to(device).to(dtype)
+    module.apply(collect_hooks)
-    if offloaded:
-        prefix_dict = module._hf_hook.weights_map.dataset
-        prefix = module._hf_hook.weights_map.prefix
-        prefix_dict[f"{prefix}{param_name}"] = new_param_data.to(offload_device).to(
-            dtype
+    yield
+    for submodule, hook in hooks.items():
+        add_hook_to_module(submodule, hook)
+@check_accelerate(fallback=None)
+def offload_to_weights_map(
+    weights_map: Union[PrefixedDataset, Dict, OffloadedWeightsLoader],
+    key: str,
+    value: torch.Tensor,
+    offload_device: Optional[Union[torch.device, Literal["disk"]]] = None,
+):
+    """
+    Helper function which implements offloaded item assignment for PrefixedDataset,
+    OffloadedWeightsLoader, and Dict types.
+    :param weights_map: weight map to be updated with offload information
+    :param key: key used to identify weight location
+    :param value: weight being offloaded
+    :param offload_device: device on which weight will be offloaded to. If None is
+        provided, then infer device from parameters in weights_map
+    """
+    if isinstance(weights_map, PrefixedDataset):
+        if offload_device == "disk":
+            raise ValueError(f"Cannot offload to disk with type {type(weights_map)}")
+        dataset = weights_map.dataset
+        key = f"{weights_map.prefix}{key}"
+        offload_to_weights_map(dataset, key, value, offload_device)
+    elif isinstance(weights_map, OffloadedWeightsLoader):
+        if key not in weights_map.all_keys:
+            weights_map.all_keys.append(key)
+        if len(weights_map.index) <= 0 and offload_device != "disk":
+            offload_to_weights_map(weights_map.state_dict, key, value, offload_device)
+        else:
+            raise NotImplementedError(
+                "Updating weights_map with disk offloading is not implemented yet"
+            )
+    elif isinstance(weights_map, dict):
+        if offload_device == "disk":
+            raise ValueError(f"Cannot offload to disk with type {type(weights_map)}")
+        # infer offload device
+        if offload_device is None:
+            if key in weights_map:
+                offload_device = weights_map[key].device
+            else:
+                tens = next(iter(weights_map.values()), None)
+                if tens is None:
+                    raise ValueError(
+                        "Cannot infer offload device from empty weights_map"
+                    )
+                offload_device = tens.device
+        weights_map[key] = value.to(device=offload_device)
+    else:
+        raise NotImplementedError(
+            "Updating offload data not implemented for weights_map of type "
+            f"{type(weights_map)}"
+        )
+@check_accelerate(fallback=None)
+def delete_from_weights_map(
+    weights_map: Union[PrefixedDataset, Dict, OffloadedWeightsLoader],
+    key: str,
+):
+    if isinstance(weights_map, PrefixedDataset):
+        dataset = weights_map.dataset
+        key = f"{weights_map.prefix}{key}"
+        delete_from_weights_map(dataset, key)
+    elif isinstance(weights_map, OffloadedWeightsLoader):
+        if len(weights_map.index) <= 0:
+            delete_from_weights_map(weights_map.state_dict, key)
+        else:
+            raise NotImplementedError(
+                "Delete from weights_map with disk offloading is not implemented yet"
+            )
+    elif isinstance(weights_map, dict):
+        del weights_map[key]
+    else:
+        raise NotImplementedError(
+            "Updating offload data not implemented for weights_map of type "
+            f"{type(weights_map)}"
         )
+""" Upstreamed Functions """
+# introduced in accelerate v1.1.0
+@check_accelerate(fallback=False)
+def has_offloaded_params(module: torch.nn.Module) -> bool:
+    """
+    Checks if a module has offloaded parameters by checking if the given module has a
+    AlignDevicesHook attached with offloading enabled
+    Args:
+        module (`torch.nn.Module`): The module to check for an offload hook.
+    Returns:
+        bool: `True` if the module has an offload hook and offloading is enabled,
+        `False` otherwise.
+    """
+    return (
+        hasattr(module, "_hf_hook")
+        and isinstance(module._hf_hook, AlignDevicesHook)
+        and module._hf_hook.offload
+    )
+# introduced in accelerate v1.1.0
+@check_accelerate(fallback=contextlib.nullcontext())
+@contextlib.contextmanager
+def align_module_device(
+    module: torch.nn.Module, execution_device: Optional[torch.device] = None
+):
+    """
+    Context manager that moves a module's parameters to the specified execution device.
+    Args:
+        module (`torch.nn.Module`):
+            Module with parameters to align.
+        execution_device (`torch.device`, *optional*):
+            If provided, overrides the module's execution device within the context.
+            Otherwise, use hook execution device or pass
+    """
+    if has_offloaded_params(module):
+        if execution_device is not None:
+            original_device = module._hf_hook.execution_device
+            module._hf_hook.execution_device = execution_device
+        try:
+            module._hf_hook.pre_forward(module)
+            yield
+        finally:
+            module._hf_hook.post_forward(module, None)
+            if execution_device is not None:
+                module._hf_hook.execution_device = original_device
+    elif execution_device is not None:
+        devices = {
+            name: param.device for name, param in module.named_parameters(recurse=False)
+        }
+        try:
+            for name in devices:
+                set_module_tensor_to_device(module, name, execution_device)
+            yield
+        finally:
+            for name, device in devices.items():
+                set_module_tensor_to_device(module, name, device)
+    else:
+        yield

compressed_tensors/utils/safetensors_load.py CHANGED Viewed

@@ -16,7 +16,7 @@ import json
 import os
 import re
 import struct
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Tuple, Union
 from safetensors import safe_open
 from torch import Tensor
@@ -30,10 +30,14 @@ __all__ = [
     "merge_names",
     "get_weight_mappings",
     "get_nested_weight_mappings",
+    "get_nested_mappings_from_state_dict",
     "get_quantization_state_dict",
     "is_quantization_param",
 ]
+WeightMappingType = Dict[str, str]
+NestedWeightMappingType = Dict[str, WeightMappingType]
 def get_safetensors_folder(
     pretrained_model_name_or_path: str, cache_dir: Optional[str] = None
@@ -92,7 +96,7 @@ def get_safetensors_header(safetensors_path: str) -> Dict[str, str]:
     return header
-def match_param_name(full_name: str, param_name: str) -> str:
+def match_param_name(full_name: str, param_name: str) -> Optional[str]:
     """
     Helper function extracting the uncompressed parameterized layer name from a
     compressed name. Assumes the compressed name was merged using merge_names.
@@ -176,38 +180,100 @@ def get_weight_mappings(path_to_model_or_tensors: str) -> Dict[str, str]:
 def get_nested_weight_mappings(
-    model_path: str, params_to_nest: List[str]
-) -> Dict[str, Dict[str, str]]:
+    model_path: str, params_to_nest: List[str], return_unmatched_params: bool = False
+) -> Union[NestedWeightMappingType, Tuple[NestedWeightMappingType, WeightMappingType]]:
     """
     Takes a path to a state dict saved in safetensors format and returns a nested
-    mapping from uncompressed parameterized layer names to the file locations of each
-    of the layers compression parameters.
+    mapping from uncompressed parameterized layer names to the file locations of
+    each layer's compression parameters.
-    layer.weight: {
+    Example of the nested mapping:
+    layer: {
         bitmask: file_location,
         row_offsets: file_location,
         shape: file_location,
         compressed: file_location
     }
-    This generalizes to cases where the model is split into multiple safetensors files
+    If other parameters are found that do not match the nested parameters, they will
+    be returned in a separate dictionary only if return_unmatched_params is True.
+    This dictionary may be needed for cases where compressors are stacked (e.g.,
+    quantization compression followed by sparse compression).
+    Example of the unmatched params mapping:
+    {
+        layer.weight_scale: file_location,
+        layer.input_scale: file_location
+    }
-    :param model_path: path to safetensors state dict, must contain either a single
-    safetensors file or multiple files with an index
-    :return: nested mapping of parameterized layer name to file location
+    This generalizes to cases where the model is split into multiple safetensors
+    files.
+    :param model_path: Path to the safetensors state dict, must contain either a
+        single safetensors file or multiple files with an index.
+    :param params_to_nest: List of parameter names to nest.
+    :param return_unmatched_params: If True, return a second dictionary containing
+        the remaining parameters that were not matched to the params_to_nest.
+    :return:
+        - If return_unmatched_params is False:
+            NestedWeightMappingType: A nested mapping of parameterized layer names to
+            file locations of each layer's compression parameters.
+        - If return_unmatched_params is True:
+            Tuple[NestedWeightMappingType, WeightMappingType]: A tuple containing:
+                - NestedWeightMappingType: A nested mapping of parameterized layer
+                names to file locations of each layer's compression parameters.
+                - WeightMappingType: A mapping of the remaining parameter names to
+                their file locations that were not matched to the params_to_nest.
     """
     weight_mappings = get_weight_mappings(model_path)
     nested_weight_mappings = {}
-    for key in weight_mappings.keys():
+    unmatched_params = {}
+    for key, file_location in weight_mappings.items():
+        matched = False
         for param_name in params_to_nest:
-            maybe_match = match_param_name(key, param_name)
-            if maybe_match is not None:
-                dense_param = maybe_match
+            dense_param = match_param_name(key, param_name)
+            if dense_param:
                 if dense_param not in nested_weight_mappings:
                     nested_weight_mappings[dense_param] = {}
-                nested_weight_mappings[dense_param][param_name] = weight_mappings[key]
+                nested_weight_mappings[dense_param][param_name] = file_location
+                matched = True
+        if return_unmatched_params and not matched:
+            unmatched_params[key] = file_location
+    if return_unmatched_params:
+        return nested_weight_mappings, unmatched_params
+    return nested_weight_mappings
+def get_nested_mappings_from_state_dict(
+    state_dict, params_to_nest
+) -> NestedWeightMappingType:
+    """
+    Takes a state dict and returns a nested mapping from uncompressed
+    parameterized layer names to the value of
+    each layer's compression parameters.
+    Example of the nested mapping:
+    layer: {
+        weight_scale: ...,
+        weight: ...,
+        zero_point: ...,
+    }
+    :param state_dict: state dict of the model
+    :param params_to_nest: List of parameter names to nest.
+    :return: Nested mapping of parameterized layer names to the value of
+        each layer's compression parameters.
+    """
+    nested_weight_mappings = {}
+    for key in state_dict.keys():
+        for param_name in params_to_nest:
+            dense_param = match_param_name(key, param_name)
+            if dense_param:
+                if dense_param not in nested_weight_mappings:
+                    nested_weight_mappings[dense_param] = {}
+                nested_weight_mappings[dense_param][param_name] = state_dict[key]
     return nested_weight_mappings

compressed_tensors/version.py CHANGED Viewed

@@ -17,7 +17,7 @@ Functionality for storing and setting the version info for SparseML
 """
-version_base = "0.8.1"
+version_base = "0.9.1"
 is_release = True  # change to True to set the generated version as a release version

{compressed_tensors-0.8.1.dist-info → compressed_tensors-0.9.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors
-Version: 0.8.1
+Version: 0.9.1
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

compressed-tensors 0.8.1__py3-none-any.whl → 0.9.1__py3-none-any.whl

compressed-tensors 0.8.1py3-none-any.whl → 0.9.1py3-none-any.whl