PyPI - fusion-bench - Versions diffs - 0.2.22__py3-none-any.whl → 0.2.23__py3-none-any.whl - Mend

fusion-bench 0.2.22py3-none-any.whl → 0.2.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

fusion_bench/__init__.py +4 -0
fusion_bench/compat/method/__init__.py +5 -2
fusion_bench/compat/method/base_algorithm.py +3 -2
fusion_bench/compat/modelpool/base_pool.py +3 -3
fusion_bench/compat/taskpool/clip_image_classification.py +1 -1
fusion_bench/dataset/gpt2_glue.py +1 -1
fusion_bench/method/__init__.py +4 -2
fusion_bench/method/analysis/task_vector_cos_similarity.py +95 -12
fusion_bench/method/analysis/task_vector_violin_plot.py +160 -52
fusion_bench/method/bitdelta/bitdelta.py +7 -23
fusion_bench/method/expert_sparsity/mixtral/dynamic_skipping.py +2 -0
fusion_bench/method/expert_sparsity/mixtral/layer_wise_pruning.py +2 -0
fusion_bench/method/expert_sparsity/mixtral/progressive_pruning.py +2 -0
fusion_bench/method/model_stock/__init__.py +1 -0
fusion_bench/method/model_stock/model_stock.py +309 -0
fusion_bench/method/regmean/clip_regmean.py +3 -6
fusion_bench/method/regmean/regmean.py +27 -56
fusion_bench/method/regmean/utils.py +56 -0
fusion_bench/method/regmean_plusplus/regmean_plusplus.py +21 -60
fusion_bench/method/slerp/__init__.py +1 -1
fusion_bench/method/slerp/slerp.py +110 -14
fusion_bench/method/we_moe/flan_t5_we_moe.py +9 -20
fusion_bench/mixins/clip_classification.py +26 -6
fusion_bench/mixins/serialization.py +25 -15
fusion_bench/modelpool/base_pool.py +1 -1
fusion_bench/modelpool/causal_lm/causal_lm.py +262 -43
fusion_bench/modelpool/seq2seq_lm/modelpool.py +146 -0
fusion_bench/models/hf_utils.py +9 -4
fusion_bench/models/linearized/vision_model.py +6 -6
fusion_bench/models/modeling_smile_mistral/__init__.py +1 -0
fusion_bench/models/we_moe.py +8 -8
fusion_bench/taskpool/base_pool.py +99 -17
fusion_bench/taskpool/clip_vision/taskpool.py +1 -1
fusion_bench/taskpool/dummy.py +101 -13
fusion_bench/taskpool/lm_eval_harness/taskpool.py +80 -0
fusion_bench/taskpool/nyuv2_taskpool.py +28 -0
fusion_bench/utils/__init__.py +1 -0
fusion_bench/utils/data.py +6 -4
fusion_bench/utils/devices.py +7 -4
fusion_bench/utils/dtype.py +3 -2
fusion_bench/utils/lazy_state_dict.py +82 -19
fusion_bench/utils/packages.py +3 -3
fusion_bench/utils/parameters.py +0 -2
fusion_bench/utils/timer.py +92 -10
{fusion_bench-0.2.22.dist-info → fusion_bench-0.2.23.dist-info}/METADATA +1 -1
{fusion_bench-0.2.22.dist-info → fusion_bench-0.2.23.dist-info}/RECORD +53 -47
fusion_bench_config/_get_started/llm_slerp.yaml +12 -0
fusion_bench_config/method/model_stock/model_stock.yaml +12 -0
fusion_bench_config/method/slerp/slerp_lm.yaml +4 -0
{fusion_bench-0.2.22.dist-info → fusion_bench-0.2.23.dist-info}/WHEEL +0 -0
{fusion_bench-0.2.22.dist-info → fusion_bench-0.2.23.dist-info}/entry_points.txt +0 -0
{fusion_bench-0.2.22.dist-info → fusion_bench-0.2.23.dist-info}/licenses/LICENSE +0 -0
{fusion_bench-0.2.22.dist-info → fusion_bench-0.2.23.dist-info}/top_level.txt +0 -0

fusion_bench/utils/lazy_state_dict.py CHANGED Viewed

@@ -2,7 +2,18 @@ import json
 import logging
 import os
 from copy import deepcopy
-from typing import TYPE_CHECKING, Dict, Iterator, List, Mapping, Optional, Tuple, Type
+from typing import (
+    TYPE_CHECKING,
+    Dict,
+    Generic,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+)
 import torch
 from accelerate import init_empty_weights
@@ -11,10 +22,12 @@ from huggingface_hub import snapshot_download
 from safetensors import safe_open
 from safetensors.torch import load_file
 from torch import nn
+from torch.nn.modules.module import _IncompatibleKeys
 from transformers import AutoConfig
 from fusion_bench.utils.dtype import parse_dtype
 from fusion_bench.utils.packages import import_object
+from fusion_bench.utils.type import TorchModelType
 if TYPE_CHECKING:
     from transformers import PretrainedConfig
@@ -49,7 +62,7 @@ def resolve_checkpoint_path(
         )
-class LazyStateDict(Mapping[str, torch.Tensor]):
+class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
     """
     Dictionary-like object that lazily loads a state dict from a checkpoint path.
     """
@@ -66,8 +79,8 @@ class LazyStateDict(Mapping[str, torch.Tensor]):
     def __init__(
         self,
         checkpoint: str,
-        meta_module_class: Optional[Type[nn.Module]] = None,
-        meta_module: Optional[nn.Module] = None,
+        meta_module_class: Optional[Type[TorchModelType]] = None,
+        meta_module: Optional[TorchModelType] = None,
         cache_state_dict: bool = False,
         torch_dtype: Optional[torch.dtype] = None,
         device: str = "cpu",
@@ -88,15 +101,19 @@ class LazyStateDict(Mapping[str, torch.Tensor]):
             hf_proxies (Dict, optional): Proxies to use for downloading from Hugging Face Hub.
         """
         self.cache_state_dict = cache_state_dict
+        # Validate that both meta_module_class and meta_module are not provided
+        if meta_module_class is not None and meta_module is not None:
+            raise ValueError(
+                "Cannot provide both meta_module_class and meta_module, please provide only one."
+            )
         self.meta_module_class = meta_module_class
         if isinstance(self.meta_module_class, str):
             self.meta_module_class = import_object(self.meta_module_class)
         self.meta_module = meta_module
         if self.meta_module_class is not None:
-            if self.meta_module is not None:
-                raise ValueError(
-                    "Cannot provide both meta_module_class and meta_module, please provide only one."
-                )
             with init_empty_weights():
                 self.meta_module = self.meta_module_class.from_pretrained(
                     checkpoint,
@@ -173,9 +190,13 @@ class LazyStateDict(Mapping[str, torch.Tensor]):
         """
         `torch.dtype`: The dtype of the module (assuming that all the module parameters have the same dtype).
         """
+        if hasattr(self, "_cached_dtype"):
+            return self._cached_dtype
         first_key = next(iter(self.keys()))
         first_param = self[first_key]
-        return first_param.dtype
+        self._cached_dtype = first_param.dtype
+        return self._cached_dtype
     def state_dict(self, keep_vars: bool = False) -> "LazyStateDict":
         """
@@ -321,9 +342,7 @@ class LazyStateDict(Mapping[str, torch.Tensor]):
         if self._state_dict_cache is not None:
             self._state_dict_cache[key] = value
         else:
-            log.warning(
-                "State dict cache is disabled, setting a tensor will not update the cache."
-            )
+            log.warning("State dict cache is disabled, initializing the cache.")
             self._state_dict_cache = {key: value}
     def __contains__(self, key: str) -> bool:
@@ -339,7 +358,7 @@ class LazyStateDict(Mapping[str, torch.Tensor]):
                     self._checkpoint_files[0], key, update_cache=False
                 )
                 return tensor is not None
-            except Exception:
+            except (KeyError, FileNotFoundError, RuntimeError, EOFError):
                 return False
         return False
@@ -409,8 +428,8 @@ class LazyStateDict(Mapping[str, torch.Tensor]):
             )
     def load_state_dict(
-        self, state_dict: Dict[str, torch.Tensor], strict: bool = True
-    ) -> None:
+        self, state_dict: Mapping[str, torch.Tensor], strict: bool = True
+    ) -> _IncompatibleKeys:
         """
         Load a state dict into this LazyStateDict.
         This method is only for compatibility with nn.Module and it overrides the cache of LazyStateDict.
@@ -419,16 +438,60 @@ class LazyStateDict(Mapping[str, torch.Tensor]):
             state_dict (Dict[str, torch.Tensor]): The state dict to load.
             strict (bool): Whether to enforce that all keys in the state dict are present in this LazyStateDict.
         """
+        if not isinstance(state_dict, Mapping):
+            raise TypeError(
+                f"Expected state_dict to be dict-like, got {type(state_dict)}."
+            )
+        missing_keys: list[str] = []
+        unexpected_keys: list[str] = []
+        error_msgs: list[str] = []
         log.warning(
             "Loading state dict into LazyStateDict is not recommended, as it may lead to unexpected behavior. "
             "Use with caution."
         )
+        # Check for unexpected keys in the provided state_dict
+        for key in state_dict:
+            if key not in self:
+                unexpected_keys.append(key)
+        # Check for missing keys that are expected in this LazyStateDict
+        for key in self.keys():
+            if key not in state_dict:
+                missing_keys.append(key)
+        # Handle strict mode
         if strict:
-            for key in state_dict:
-                if key not in self:
-                    raise KeyError(f"Key {key} not found in LazyStateDict.")
+            if len(unexpected_keys) > 0:
+                error_msgs.insert(
+                    0,
+                    "Unexpected key(s) in state_dict: {}. ".format(
+                        ", ".join(f'"{k}"' for k in unexpected_keys)
+                    ),
+                )
+            if len(missing_keys) > 0:
+                error_msgs.insert(
+                    0,
+                    "Missing key(s) in state_dict: {}. ".format(
+                        ", ".join(f'"{k}"' for k in missing_keys)
+                    ),
+                )
+        if len(error_msgs) > 0:
+            raise RuntimeError(
+                "Error(s) in loading state_dict for {}:\n\t{}".format(
+                    self.__class__.__name__, "\n\t".join(error_msgs)
+                )
+            )
+        # Load the state dict values
         for key, value in state_dict.items():
-            self[key] = value
+            if key in self:  # Only set keys that exist in this LazyStateDict
+                self[key] = value
+        return _IncompatibleKeys(missing_keys, unexpected_keys)
     def __getattr__(self, name: str):
         if "meta_module" in self.__dict__:

fusion_bench/utils/packages.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import importlib.metadata
 import importlib.util
 from functools import lru_cache
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 from packaging import version
@@ -69,7 +69,7 @@ def is_vllm_available():
     return _is_package_available("vllm")
-def import_object(abs_obj_name: str):
+def import_object(abs_obj_name: str) -> Any:
     """
     Imports a class from a module given the absolute class name.
@@ -84,7 +84,7 @@ def import_object(abs_obj_name: str):
     return getattr(module, obj_name)
-def compare_versions(v1, v2):
+def compare_versions(v1: str, v2: str) -> int:
     """Compare two version strings.
     Returns -1 if v1 < v2, 0 if v1 == v2, 1 if v1 > v2"""

fusion_bench/utils/parameters.py CHANGED Viewed

@@ -129,7 +129,6 @@ def human_readable(num: int) -> str:
     Converts a number into a human-readable string with appropriate magnitude suffix.
     Examples:
         ```python
         print(human_readable(1500))
         # Output: '1.50K'
@@ -201,7 +200,6 @@ def count_parameters(module: nn.Module, non_zero_only: bool = False) -> tuple[in
         tuple: A tuple containing the number of trainable parameters and the total number of parameters.
     Examples:
         ```python
         # Count the parameters
         trainable_params, all_params = count_parameters(model)

fusion_bench/utils/timer.py CHANGED Viewed

@@ -6,38 +6,120 @@ log = logging.getLogger(__name__)
 class timeit_context:
     """
-    Usage:
+    A context manager for measuring and logging execution time of code blocks.
-    ```python
-    with timeit_context() as timer:
-        ... # code block to be measured
-    ```
+    This context manager provides precise timing measurements with automatic logging
+    of elapsed time. It supports nested timing contexts with proper indentation
+    for hierarchical timing analysis, making it ideal for profiling complex
+    operations with multiple sub-components.
+    Args:
+        msg (str, optional): Custom message to identify the timed code block.
+            If provided, logs "[BEGIN] {msg}" at start and includes context
+            in the final timing report. Defaults to None.
+        loglevel (int, optional): Python logging level for output messages.
+            Uses standard logging levels (DEBUG=10, INFO=20, WARNING=30, etc.).
+            Defaults to logging.INFO.
+    Example:
+        Basic usage:
+        ```python
+        with timeit_context("data loading"):
+            data = load_large_dataset()
+        # Logs: [BEGIN] data loading
+        # Logs: [END]   Elapsed time: 2.34s
+        ```
+        Nested timing:
+        ```python
+        with timeit_context("model training"):
+            with timeit_context("data preprocessing"):
+                preprocess_data()
+            with timeit_context("forward pass"):
+                model(data)
+        # Output shows nested structure:
+        # [BEGIN] model training
+        #   [BEGIN] data preprocessing
+        #   [END]   Elapsed time: 0.15s
+        #   [BEGIN] forward pass
+        #   [END]   Elapsed time: 0.89s
+        # [END]   Elapsed time: 1.04s
+        ```
+        Custom log level:
+        ```python
+        with timeit_context("debug operation", loglevel=logging.DEBUG):
+            debug_function()
+        ```
     """
     nest_level = -1
     def _log(self, msg):
+        """
+        Internal method for logging messages with appropriate stack level.
+        This helper method ensures that log messages appear to originate from
+        the caller's code rather than from internal timer methods, providing
+        more useful debugging information.
+        Args:
+            msg (str): The message to log at the configured log level.
+        """
         log.log(self.loglevel, msg, stacklevel=3)
     def __init__(self, msg: str = None, loglevel=logging.INFO) -> None:
+        """
+        Initialize a new timing context with optional message and log level.
+        Args:
+            msg (str, optional): Descriptive message for the timed operation.
+                If provided, will be included in the begin/end log messages
+                to help identify what is being timed. Defaults to None.
+            loglevel (int, optional): Python logging level for timer output.
+                Common values include:
+                - logging.DEBUG (10): Detailed debugging information
+                - logging.INFO (20): General information (default)
+                - logging.WARNING (30): Warning messages
+                - logging.ERROR (40): Error messages
+                Defaults to logging.INFO.
+        """
         self.loglevel = loglevel
         self.msg = msg
     def __enter__(self) -> None:
         """
-        Sets the start time and logs an optional message indicating the start of the code block execution.
+        Enter the timing context and start the timer.
-        Args:
-            msg: str, optional message to log
+        This method is automatically called when entering the 'with' statement.
+        It records the current timestamp, increments the nesting level for
+        proper log indentation, and optionally logs a begin message.
+        Returns:
+            None: This context manager doesn't return a value to the 'as' clause.
+                  All timing information is handled internally and logged automatically.
         """
         self.start_time = time.time()
         timeit_context.nest_level += 1
         if self.msg is not None:
             self._log("  " * timeit_context.nest_level + "[BEGIN] " + str(self.msg))
-    def __exit__(self, exc_type, exc_val, exc_tb):
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
         """
-        Calculates the elapsed time and logs it, along with an optional message indicating the end of the code block execution.
+        Exit the timing context and log the elapsed time.
+        This method is automatically called when exiting the 'with' statement,
+        whether through normal completion or exception. It calculates the total
+        elapsed time and logs the results with proper nesting indentation.
+        Args:
+            exc_type (type): Exception type if an exception occurred, None otherwise.
+            exc_val (Exception): Exception instance if an exception occurred, None otherwise.
+            exc_tb (traceback): Exception traceback if an exception occurred, None otherwise.
+        Returns:
+            None: Does not suppress exceptions (returns None/False implicitly).
+                  Any exceptions that occurred in the timed block will propagate normally.
         """
         end_time = time.time()
         elapsed_time = end_time - self.start_time

{fusion_bench-0.2.22.dist-info → fusion_bench-0.2.23.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fusion_bench
-Version: 0.2.22
+Version: 0.2.23
 Summary: A Comprehensive Benchmark of Deep Model Fusion
 Author-email: Anke Tang <tang.anke@foxmail.com>
 Project-URL: Repository, https://github.com/tanganke/fusion_bench

fusion-bench 0.2.22__py3-none-any.whl → 0.2.23__py3-none-any.whl

fusion-bench 0.2.22py3-none-any.whl → 0.2.23py3-none-any.whl