PyPI - fusion-bench - Versions diffs - 0.2.24__py3-none-any.whl → 0.2.25__py3-none-any.whl - Mend

fusion-bench 0.2.24py3-none-any.whl → 0.2.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

fusion_bench/__init__.py +152 -42
fusion_bench/dataset/__init__.py +27 -4
fusion_bench/dataset/clip_dataset.py +2 -2
fusion_bench/method/__init__.py +10 -1
fusion_bench/method/classification/__init__.py +27 -2
fusion_bench/method/classification/image_classification_finetune.py +214 -0
fusion_bench/method/opcm/opcm.py +1 -0
fusion_bench/method/pwe_moe/module.py +0 -2
fusion_bench/method/tall_mask/task_arithmetic.py +2 -2
fusion_bench/mixins/__init__.py +2 -0
fusion_bench/mixins/pyinstrument.py +174 -0
fusion_bench/mixins/simple_profiler.py +106 -23
fusion_bench/modelpool/__init__.py +2 -0
fusion_bench/modelpool/base_pool.py +77 -14
fusion_bench/modelpool/clip_vision/modelpool.py +56 -19
fusion_bench/modelpool/resnet_for_image_classification.py +208 -0
fusion_bench/models/__init__.py +35 -9
fusion_bench/optim/__init__.py +40 -2
fusion_bench/optim/lr_scheduler/__init__.py +27 -1
fusion_bench/optim/muon.py +339 -0
fusion_bench/programs/__init__.py +2 -0
fusion_bench/programs/fabric_fusion_program.py +2 -2
fusion_bench/programs/fusion_program.py +271 -0
fusion_bench/tasks/clip_classification/__init__.py +15 -0
fusion_bench/utils/__init__.py +167 -21
fusion_bench/utils/lazy_imports.py +91 -12
fusion_bench/utils/lazy_state_dict.py +55 -5
fusion_bench/utils/misc.py +104 -13
fusion_bench/utils/packages.py +4 -0
fusion_bench/utils/path.py +7 -0
fusion_bench/utils/pylogger.py +6 -0
fusion_bench/utils/rich_utils.py +1 -0
fusion_bench/utils/state_dict_arithmetic.py +935 -162
{fusion_bench-0.2.24.dist-info → fusion_bench-0.2.25.dist-info}/METADATA +1 -1
{fusion_bench-0.2.24.dist-info → fusion_bench-0.2.25.dist-info}/RECORD +48 -34
fusion_bench_config/method/classification/image_classification_finetune.yaml +16 -0
fusion_bench_config/method/classification/image_classification_finetune_test.yaml +6 -0
fusion_bench_config/model_fusion.yaml +45 -0
fusion_bench_config/modelpool/ResNetForImageClassfication/transformers/resnet152_cifar10.yaml +14 -0
fusion_bench_config/modelpool/ResNetForImageClassfication/transformers/resnet152_cifar100.yaml +14 -0
fusion_bench_config/modelpool/ResNetForImageClassfication/transformers/resnet18_cifar10.yaml +14 -0
fusion_bench_config/modelpool/ResNetForImageClassfication/transformers/resnet18_cifar100.yaml +14 -0
fusion_bench_config/modelpool/ResNetForImageClassfication/transformers/resnet50_cifar10.yaml +14 -0
fusion_bench_config/modelpool/ResNetForImageClassfication/transformers/resnet50_cifar100.yaml +14 -0
{fusion_bench-0.2.24.dist-info → fusion_bench-0.2.25.dist-info}/WHEEL +0 -0
{fusion_bench-0.2.24.dist-info → fusion_bench-0.2.25.dist-info}/entry_points.txt +0 -0
{fusion_bench-0.2.24.dist-info → fusion_bench-0.2.25.dist-info}/licenses/LICENSE +0 -0
{fusion_bench-0.2.24.dist-info → fusion_bench-0.2.25.dist-info}/top_level.txt +0 -0

fusion_bench/utils/lazy_state_dict.py CHANGED Viewed

@@ -1,3 +1,11 @@
+"""
+Utilities for handling model checkpoints and state dictionaries.
+This module provides classes and functions for lazily loading state dictionaries
+from various checkpoint formats, including PyTorch .bin files, SafeTensors files,
+and sharded checkpoints.
+"""
 import json
 import logging
 import os
@@ -43,6 +51,21 @@ def resolve_checkpoint_path(
     hf_cache_dir: Optional[str] = None,
     hf_proxies: Optional[Dict] = None,
 ):
+    """
+    Resolve a checkpoint path, downloading from Hugging Face Hub if necessary.
+    Args:
+        checkpoint: Path to local checkpoint or Hugging Face model ID.
+        hf_revision: Specific revision to download from HF Hub.
+        hf_cache_dir: Local cache directory for HF downloads.
+        hf_proxies: Proxy settings for HF downloads.
+    Returns:
+        Local path to the checkpoint.
+    Raises:
+        FileNotFoundError: If the checkpoint cannot be resolved.
+    """
     # If it's a local file or directory, return as is
     if os.path.exists(checkpoint):
         return checkpoint
@@ -64,11 +87,11 @@ def resolve_checkpoint_path(
 class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
     """
-    Dictionary-like object that lazily loads a state dict from a checkpoint path.
+    A dictionary-like object that lazily loads tensors from model checkpoints.
     """
     _local_path: str
-    """local path to the checkpoint."""
+    """Local path to the checkpoint."""
     _state_dict_cache: Optional[Dict]
     """Cache for the state dict, if enabled."""
     _index_filename: Optional[str]
@@ -92,6 +115,8 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
         hf_proxies: Optional[Dict] = None,
     ):
         """
+        Initialize LazyStateDict with a checkpoint path.
         Args:
             checkpoint (str): Path to the checkpoint file or directory.
             meta_module_class (Type[nn.Module], optional): Class of the meta module to instantiate.
@@ -116,6 +141,7 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
             self.meta_module_class = import_object(self.meta_module_class)
         self.meta_module = meta_module
+        # Instantiate meta module if class provided
         if self.meta_module_class is not None:
             with init_empty_weights():
                 self.meta_module = self.meta_module_class.from_pretrained(
@@ -126,6 +152,7 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
                     proxies=hf_proxies,
                 )
+        # Store original checkpoint path and resolve to local path
         self._checkpoint = checkpoint
         self._local_path = resolve_checkpoint_path(
             checkpoint,
@@ -134,10 +161,12 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
             hf_proxies=hf_proxies,
         )
+        # Detect checkpoint file type and set up indexing
         self._index, self._index_filename, self._checkpoint_files = (
             self._resolve_checkpoint_files(self._local_path)
         )
+        # Set up based on checkpoint type
         if self._index is not None:
             # if meta_module is provided, remove the keys that are not in the meta_module
             if self.meta_module is not None:
@@ -152,7 +181,7 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
         elif len(self._checkpoint_files) == 1 and self._checkpoint_files[0].endswith(
             SAFE_WEIGHTS_NAME
         ):
-            # let the keys of self._index be the keys of the state dict, the values are the checkpoint file
+            # SafeTensors file: create index mapping all keys to this file
             with safe_open(
                 self._checkpoint_files[0], framework="pt", device=device
             ) as f:
@@ -164,6 +193,7 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
         elif len(self._checkpoint_files) == 1 and self._checkpoint_files[0].endswith(
             WEIGHTS_NAME
         ):
+            # PyTorch .bin file: load entire state dict immediately
             log.info(f"Loading full state dict from {WEIGHTS_NAME}")
             self._state_dict_cache = torch.load(self._checkpoint_files[0])
             # if meta_module is provided, remove the keys that are not in the meta_module
@@ -173,6 +203,7 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
                     if key not in meta_module_state_dict:
                         self._state_dict_cache.pop(key)
         else:
+            # Unsupported checkpoint format
             raise ValueError(
                 f"Cannot determine the type of checkpoint, please provide a checkpoint path to a file containing a whole state dict with file name {WEIGHTS_NAME} or {SAFE_WEIGHTS_NAME}, or the index of a sharded checkpoint ending with `.index.json`."
             )
@@ -209,10 +240,19 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
         return deepcopy(self)
     def _resolve_checkpoint_files(self, checkpoint: str):
-        # reference: https://huggingface.co/docs/accelerate/v0.17.1/en/usage_guides/big_modeling
+        """
+        Detect and resolve checkpoint files based on the checkpoint path.
+        Handles single files, directories with state dict files, and sharded checkpoints.
+        Returns:
+            Tuple of (index_dict, index_filename, checkpoint_files)
+        """
+        # Reference: https://huggingface.co/docs/accelerate/v0.17.1/en/usage_guides/big_modeling
         checkpoint_files = None
         index_filename = None
         if os.path.isfile(checkpoint):
+            # Single file: check if it's an index or a state dict
             if str(checkpoint).endswith(".json"):
                 index_filename = checkpoint
             else:
@@ -232,7 +272,7 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
                     os.path.join(checkpoint, potential_state_safetensor[0])
                 ]
             else:
-                # otherwise check for sharded checkpoints
+                # Check for sharded checkpoints
                 potential_index = [
                     f for f in os.listdir(checkpoint) if f.endswith(".index.json")
                 ]
@@ -247,18 +287,22 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
                         f"{checkpoint} containing more than one `.index.json` file, delete the irrelevant ones."
                     )
         else:
+            # Invalid checkpoint path
             raise ValueError(
                 "`checkpoint` should be the path to a file containing a whole state dict, or the index of a sharded "
                 f"checkpoint, or a folder containing a sharded checkpoint or the whole state dict, but got {checkpoint}."
             )
+        # Load index file if present
         if index_filename is not None:
             checkpoint_folder = os.path.split(index_filename)[0]
             with open(index_filename) as f:
                 index = json.loads(f.read())
+            # Extract weight_map if present (standard format)
             if "weight_map" in index:
                 index = index["weight_map"]
+            # Get list of unique checkpoint files
             checkpoint_files = sorted(list(set(index.values())))
             checkpoint_files = [
                 os.path.join(checkpoint_folder, f) for f in checkpoint_files
@@ -270,6 +314,11 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
     def _load_tensor_from_checkpoint_file(
         self, checkpoint_file: str, key: str, update_cache: bool = True
     ) -> torch.Tensor:
+        """
+        Load a tensor from the checkpoint file.
+        For safetensors, loads only the requested tensor.
+        For PyTorch files, loads the entire state dict on first access.
+        """
         if checkpoint_file.endswith(".safetensors"):
             with safe_open(checkpoint_file, framework="pt", device=self._device) as f:
                 tensor = f.get_tensor(key)
@@ -279,6 +328,7 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
                     self._state_dict_cache[key] = tensor
                 return tensor
         else:
+            # PyTorch .bin file: load entire state dict
             state_dict = torch.load(checkpoint_file, map_location=self._device)
             if update_cache:
                 if self._state_dict_cache is not None:

fusion_bench/utils/misc.py CHANGED Viewed

@@ -1,35 +1,126 @@
 from difflib import get_close_matches
-from typing import Any, Iterable, List, Optional
+from typing import Any, Iterable, List, Optional, TypeVar, Union
+T = TypeVar("T")
 __all__ = [
     "first",
     "has_length",
-    "join_list",
+    "join_lists",
     "attr_equal",
     "validate_and_suggest_corrections",
 ]
-def first(iterable: Iterable):
-    return next(iter(iterable))
+def first(iterable: Iterable[T], default: Optional[T] = None) -> Optional[T]:
+    """
+    Return the first element of an iterable.
+    Args:
+        iterable: The iterable to get the first element from.
+        default: The value to return if the iterable is empty. If None and
+                the iterable is empty, raises StopIteration.
-def has_length(dataset):
+    Returns:
+        The first element of the iterable, or the default value if empty.
+    Raises:
+        StopIteration: If the iterable is empty and no default is provided.
+        TypeError: If the object is not iterable.
     """
-    Checks if the dataset implements __len__() and it doesn't raise an error
+    try:
+        iterator = iter(iterable)
+        return next(iterator)
+    except StopIteration:
+        if default is not None:
+            return default
+        raise
+    except TypeError as e:
+        raise TypeError(
+            f"Object of type {type(iterable).__name__} is not iterable"
+        ) from e
+def has_length(obj: Any) -> bool:
     """
+    Check if an object has a length (implements __len__) and len() works correctly.
+    Args:
+        obj: The object to check for length support.
+    Returns:
+        bool: True if the object supports len() and doesn't raise an error,
+              False otherwise.
+    """
+    if obj is None:
+        return False
     try:
-        return len(dataset) is not None
-    except TypeError:
+        # Check if __len__ method exists
+        if not hasattr(obj, "__len__"):
+            return False
+        # Try to get the length - this will raise TypeError for unsized objects
+        length = len(obj)
+        # Verify the length is a non-negative integer
+        return isinstance(length, int) and length >= 0
+    except (TypeError, AttributeError):
         # TypeError: len() of unsized object
+        # AttributeError: if __len__ is not callable somehow
         return False
+    except Exception:
+        # Any other unexpected error
+        return False
+def join_lists(list_of_lists: Iterable[Iterable[T]]) -> List[T]:
+    """
+    Flatten a collection of iterables into a single list.
+    Args:
+        list_of_lists: An iterable containing iterables to be flattened.
+    Returns:
+        List[T]: A new list containing all elements from the input iterables
+                in order.
-def join_list(list_of_list: List[List]):
-    ans = []
-    for item in list_of_list:
-        ans.extend(item)
-    return ans
+    Raises:
+        TypeError: If any item in list_of_lists is not iterable.
+    Examples:
+        >>> join_lists([[1, 2], [3, 4], [5]])
+        [1, 2, 3, 4, 5]
+        >>> join_lists([])
+        []
+        >>> join_lists([[], [1], [], [2, 3]])
+        [1, 2, 3]
+    """
+    if not list_of_lists:
+        return []
+    result = []
+    for i, item in enumerate(list_of_lists):
+        try:
+            # Check if item is iterable (but not string, which is iterable but
+            # usually not what we want to flatten character by character)
+            if isinstance(item, (str, bytes)):
+                raise TypeError(
+                    f"Item at index {i} is a string/bytes, not a list-like iterable"
+                )
+            # Try to extend with the item
+            result.extend(item)
+        except TypeError as e:
+            if "not iterable" in str(e):
+                raise TypeError(
+                    f"Item at index {i} (type: {type(item).__name__}) is not iterable"
+                ) from e
+            else:
+                # Re-raise our custom error or other TypeError
+                raise
+    return result
 def attr_equal(obj, attr: str, value):

fusion_bench/utils/packages.py CHANGED Viewed

@@ -40,6 +40,10 @@ def is_matplotlib_available():
     return _is_package_available("matplotlib")
+def is_open_clip_available():
+    return _is_package_available("open_clip")
 def is_pillow_available():
     return _is_package_available("PIL")

fusion_bench/utils/path.py CHANGED Viewed

@@ -2,6 +2,8 @@ import logging
 import os
 from typing import List
+from lightning_utilities.core.rank_zero import rank_zero_only
 log = logging.getLogger(__name__)
@@ -25,6 +27,7 @@ def listdir_fullpath(dir: str) -> List[str]:
     return [os.path.join(dir, name) for name in names]
+@rank_zero_only
 def create_symlink(src_dir: str, dst_dir: str, link_name: str = None):
     """
     Creates a symbolic link from src_dir to dst_dir.
@@ -59,6 +62,10 @@ def create_symlink(src_dir: str, dst_dir: str, link_name: str = None):
         link_name = os.path.basename(src_dir)
     link_path = os.path.join(dst_dir, link_name)
+    # if the link already exists, skip
+    if os.path.exists(link_path):
+        log.warning(f"Symbolic link already exists, skipping: {link_path}")
+        return
     try:
         # if the system is windows, use the `mklink` command in "CMD" to create the symlink

fusion_bench/utils/pylogger.py CHANGED Viewed

@@ -3,6 +3,12 @@ from typing import Mapping, Optional
 from lightning_utilities.core.rank_zero import rank_prefixed_message, rank_zero_only
+__all__ = [
+    "RankedLogger",
+    "RankZeroLogger",
+    "get_rankzero_logger",
+]
 class RankedLogger(logging.LoggerAdapter):
     """A multi-GPU-friendly python command line logger."""

fusion_bench/utils/rich_utils.py CHANGED Viewed

@@ -16,6 +16,7 @@ from rich.panel import Panel
 from rich.prompt import Prompt
 from rich.syntax import Syntax
 from rich.text import Text
+from rich.traceback import install as install_rich_traceback
 from fusion_bench.utils import pylogger

fusion-bench 0.2.24__py3-none-any.whl → 0.2.25__py3-none-any.whl

fusion-bench 0.2.24py3-none-any.whl → 0.2.25py3-none-any.whl