PyPI - fusion-bench - Versions diffs - 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl - Mend

fusion-bench 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

fusion_bench/taskpool/clip_vision/taskpool.py CHANGED Viewed

@@ -56,6 +56,8 @@ class LayerWiseFeatureSaver:
         first_token_only: bool = True,
         max_num: Optional[int] = None,
     ):
+        if isinstance(save_path, str):
+            save_path = Path(save_path)
         self.save_path = save_path
         self.first_token_only = first_token_only
         self.max_num = max_num
@@ -122,9 +124,9 @@ class CLIPVisionModelTaskPool(
         self,
         test_datasets: Union[DictConfig, Dict[str, Dataset]],
         *,
-        processor: Union[DictConfig, CLIPProcessor],
-        data_processor: Union[DictConfig, CLIPProcessor],
-        clip_model: Union[DictConfig, CLIPModel],
+        processor: Union[str, DictConfig, CLIPProcessor],
+        clip_model: Union[str, DictConfig, CLIPModel],
+        data_processor: Union[DictConfig, CLIPProcessor] = None,
         dataloader_kwargs: DictConfig = None,
         layer_wise_feature_save_path: Optional[str] = None,
         layer_wise_feature_first_token_only: bool = True,
@@ -159,21 +161,35 @@ class CLIPVisionModelTaskPool(
         Set up the processor, data processor, CLIP model, test datasets, and data loaders.
         """
         # setup processor and clip model
-        self.processor = (
-            instantiate(self._processor)
-            if isinstance(self._processor, DictConfig)
-            else self._processor
-        )
-        self.data_processor = (
-            instantiate(self._data_processor)
-            if isinstance(self._data_processor, DictConfig)
-            else self._data_processor
-        )
-        self.clip_model = (
-            instantiate(self._clip_model)
-            if isinstance(self._clip_model, DictConfig)
-            else self._clip_model
-        )
+        if isinstance(self._processor, str):
+            self.processor = CLIPProcessor.from_pretrained(self._processor)
+        elif (
+            isinstance(self._processor, (dict, DictConfig))
+            and "_target_" in self._processor
+        ):
+            self.processor = instantiate(self._processor)
+        else:
+            self.processor = self._processor
+        if self._data_processor is None:
+            self.data_processor = self.processor
+        else:
+            self.data_processor = (
+                instantiate(self._data_processor)
+                if isinstance(self._data_processor, DictConfig)
+                else self._data_processor
+            )
+        if isinstance(self._clip_model, str):
+            self.clip_model = CLIPModel.from_pretrained(self._clip_model)
+        elif (
+            isinstance(self._clip_model, (dict, DictConfig))
+            and "_target_" in self._clip_model
+        ):
+            self.clip_model = instantiate(self._clip_model)
+        else:
+            self.clip_model = self._clip_model
         self.clip_model = self.fabric.to_device(self.clip_model)
         self.clip_model.requires_grad_(False)
         self.clip_model.eval()

fusion_bench/taskpool/dummy.py CHANGED Viewed

@@ -4,13 +4,13 @@ This is the dummy task pool that is used for debugging purposes.
 from typing import Optional
+from lightning.pytorch.utilities import rank_zero_only
 from torch import nn
 from fusion_bench.models.separate_io import separate_save
 from fusion_bench.taskpool.base_pool import BaseTaskPool
 from fusion_bench.utils import timeit_context
 from fusion_bench.utils.parameters import count_parameters, print_parameters
-from lightning.pytorch.utilities import rank_zero_only
 def get_model_summary(model: nn.Module) -> dict:

fusion_bench/taskpool/lm_eval_harness/taskpool.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 import os
-from typing import List, Literal, Optional, Union, TYPE_CHECKING
+from typing import TYPE_CHECKING, List, Literal, Optional, Union
 import lightning.fabric
 import lm_eval
@@ -12,7 +12,6 @@ from fusion_bench import BaseTaskPool
 from fusion_bench.mixins import LightningFabricMixin
 from fusion_bench.utils.strenum import _version
 log = logging.getLogger(__name__)

fusion_bench/tasks/clip_classification/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import importlib
 import warnings
-from typing import Any, Callable, Dict, List
+from typing import Any, Callable, Dict, List, Tuple
 from datasets import load_dataset
@@ -79,7 +79,9 @@ class CLIPTemplateFactory:
     }
     @staticmethod
-    def get_classnames_and_templates(dataset_name: str):
+    def get_classnames_and_templates(
+        dataset_name: str,
+    ) -> Tuple[List[str], List[Callable]]:
         """
         Retrieves class names and templates for the specified dataset.
@@ -169,7 +171,7 @@ class CLIPTemplateFactory:
         CLIPTemplateFactory._dataset_mapping[dataset_name] = dataset_info
     @staticmethod
-    def get_available_datasets():
+    def get_available_datasets() -> List[str]:
         """
         Get a list of all available dataset names.
@@ -179,5 +181,5 @@ class CLIPTemplateFactory:
         return list(CLIPTemplateFactory._dataset_mapping.keys())
-def get_classnames_and_templates(dataset_name: str):
+def get_classnames_and_templates(dataset_name: str) -> Tuple[List[str], List[Callable]]:
     return CLIPTemplateFactory.get_classnames_and_templates(dataset_name)

fusion_bench/utils/__init__.py CHANGED Viewed

@@ -7,7 +7,12 @@ from .cache_utils import *
 from .devices import *
 from .dtype import parse_dtype
 from .fabric import seed_everything_by_time
-from .instantiate_utils import instantiate, is_instantiable
+from .instantiate_utils import (
+    instantiate,
+    is_instantiable,
+    set_print_function_call,
+    set_print_function_call_permeanent,
+)
 from .json import load_from_json, save_to_json
 from .lazy_state_dict import LazyStateDict
 from .misc import *

fusion_bench/utils/devices.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import gc
+import logging
 import os
 from typing import List, Optional, Union
@@ -12,7 +13,7 @@ from transformers.utils import (
 )
 __all__ = [
-    "cuda_empty_cache",
+    "clear_cuda_cache",
     "to_device",
     "num_devices",
     "get_device",
@@ -21,10 +22,19 @@ __all__ = [
     "get_device_capabilities",
 ]
+log = logging.getLogger(__name__)
-def cuda_empty_cache():
+def clear_cuda_cache():
+    """
+    Clears the CUDA memory cache to free up GPU memory.
+    Works only if CUDA is available.
+    """
     gc.collect()
-    torch.cuda.empty_cache()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    else:
+        log.warning("CUDA is not available. No cache to clear.")
 def to_device(obj, device: Optional[torch.device], **kwargs):
@@ -75,7 +85,7 @@ def num_devices(devices: Union[int, List[int], str]) -> int:
     Return the number of devices.
     Args:
-        devices: `devices` can be a single int to specify the number of devices, or a list of device ids, e.g. [0, 1, 2, 3]， or a str of device ids, e.g. "0,1,2,3" and "[0, 1, 2]".
+        devices: `devices` can be a single int to specify the number of devices, or a list of device ids, e.g. [0, 1, 2, 3], or a str of device ids, e.g. "0,1,2,3" and "[0, 1, 2]".
     Returns:
         The number of devices.

fusion_bench/utils/instantiate_utils.py CHANGED Viewed

@@ -28,7 +28,7 @@ PRINT_FUNCTION_CALL_FUNC = print
 Function to be used for printing function calls.
 """
-CATCH_EXCEPTION = True
+CATCH_EXCEPTION = False
 @contextmanager
@@ -41,10 +41,12 @@ def set_print_function_call(value: bool):
     finally:
         PRINT_FUNCTION_CALL = old_value
 def set_print_function_call_permeanent(value: bool):
     global PRINT_FUNCTION_CALL
     PRINT_FUNCTION_CALL = value
 def is_instantiable(config: Union[DictConfig, Any]) -> bool:
     if OmegaConf.is_dict(config):
         return "_target_" in config

fusion_bench/utils/misc.py CHANGED Viewed

@@ -1,6 +1,13 @@
-from typing import Iterable, List
+from difflib import get_close_matches
+from typing import Any, Iterable, List, Optional
-__all__ = ["first", "has_length", "join_list", "attr_equal"]
+__all__ = [
+    "first",
+    "has_length",
+    "join_list",
+    "attr_equal",
+    "validate_and_suggest_corrections",
+]
 def first(iterable: Iterable):
@@ -41,3 +48,42 @@ def attr_equal(obj, attr: str, value):
     if not hasattr(obj, attr):
         return False
     return getattr(obj, attr) == value
+def validate_and_suggest_corrections(
+    obj: Any, values: Iterable[Any], *, max_suggestions: int = 3, cutoff: float = 0.6
+) -> Any:
+    """
+    Return *obj* if it is contained in *values*.
+    Otherwise raise a helpful ``ValueError`` that lists the closest matches.
+    Args:
+        obj : Any
+            The value to validate.
+        values : Iterable[Any]
+            The set of allowed values.
+        max_suggestions : int, optional
+            How many typo-hints to include at most (default 3).
+        cutoff : float, optional
+            Similarity threshold for suggestions (0.0-1.0, default 0.6).
+    Returns:
+        The original *obj* if it is valid.
+    Raises:
+        ValueError: With a friendly message that points out possible typos.
+    """
+    # Normalise to a list so we can reuse it
+    value_list = list(values)
+    if obj in value_list:
+        return obj
+    # Build suggestions
+    str_values = list(map(str, value_list))
+    matches = get_close_matches(str(obj), str_values, n=max_suggestions, cutoff=cutoff)
+    msg = f"Invalid value {obj!r}. Allowed values: {value_list}"
+    if matches:
+        msg += f". Did you mean {', '.join(repr(m) for m in matches)}?"
+    raise ValueError(msg)

fusion_bench/utils/modelscope.py ADDED Viewed

@@ -0,0 +1,265 @@
+import os
+from typing import Literal, Optional
+from datasets import load_dataset as datasets_load_dataset
+from fusion_bench.utils import validate_and_suggest_corrections
+try:
+    from modelscope import dataset_file_download as modelscope_dataset_file_download
+    from modelscope import model_file_download as modelscope_model_file_download
+    from modelscope import snapshot_download as modelscope_snapshot_download
+except ImportError:
+    def _raise_modelscope_not_installed_error(*args, **kwargs):
+        raise ImportError(
+            "ModelScope is not installed. Please install it using `pip install modelscope` to use ModelScope models."
+        )
+    modelscope_snapshot_download = _raise_modelscope_not_installed_error
+    modelscope_model_file_download = _raise_modelscope_not_installed_error
+    modelscope_dataset_file_download = _raise_modelscope_not_installed_error
+try:
+    from huggingface_hub import hf_hub_download
+    from huggingface_hub import snapshot_download as huggingface_snapshot_download
+except ImportError:
+    def _raise_hugggingface_not_installed_error(*args, **kwargs):
+        raise ImportError(
+            "Hugging Face Hub is not installed. Please install it using `pip install huggingface_hub` to use Hugging Face models."
+        )
+    huggingface_snapshot_download = _raise_hugggingface_not_installed_error
+    hf_hub_download = _raise_hugggingface_not_installed_error
+__all__ = [
+    "load_dataset",
+    "resolve_repo_path",
+]
+AVAILABLE_PLATFORMS = ["hf", "huggingface", "modelscope"]
+def _raise_unknown_platform_error(platform: str):
+    raise ValueError(
+        f"Unsupported platform: {platform}. Supported platforms are 'hf', 'huggingface', and 'modelscope'."
+    )
+def load_dataset(
+    name: str,
+    split: str = "train",
+    platform: Literal["hf", "huggingface", "modelscope"] = "hf",
+):
+    """
+    Load a dataset from Hugging Face or ModelScope.
+    Args:
+        platform (Literal['hf', 'modelscope']): The platform to load the dataset from.
+        name (str): The name of the dataset.
+        split (str): The split of the dataset to load (default is "train").
+    Returns:
+        Dataset: The loaded dataset.
+    """
+    validate_and_suggest_corrections(platform, AVAILABLE_PLATFORMS)
+    if platform == "hf" or platform == "huggingface":
+        return datasets_load_dataset(name, split=split)
+    elif platform == "modelscope":
+        dataset_dir = modelscope_snapshot_download(name, repo_type="dataset")
+        return datasets_load_dataset(dataset_dir, split=split)
+    else:
+        _raise_unknown_platform_error(platform)
+def resolve_repo_path(
+    repo_id: str,
+    repo_type: Optional[str] = "model",
+    platform: Literal["hf", "huggingface", "modelscope"] = "hf",
+    **kwargs,
+):
+    """
+    Resolve and download a repository from various platforms to a local path.
+    This function handles multiple repository sources including local paths, Hugging Face,
+    and ModelScope. It automatically downloads remote repositories to local cache and
+    returns the local path for further use.
+    Args:
+        repo_id (str): Repository identifier. Can be:
+            - Local file/directory path (returned as-is if exists)
+            - Hugging Face model/dataset ID (e.g., "bert-base-uncased")
+            - ModelScope model/dataset ID
+            - URL-prefixed ID (e.g., "hf://model-name", "modelscope://model-name").
+              The prefix will override the platform argument.
+        repo_type (str, optional): Type of repository to download. Defaults to "model".
+            Common values include "model" and "dataset".
+        platform (Literal["hf", "huggingface", "modelscope"], optional):
+            Platform to download from. Defaults to "hf". Options:
+            - "hf" or "huggingface": Hugging Face Hub
+            - "modelscope": ModelScope platform
+        **kwargs: Additional arguments passed to the underlying download functions.
+    Returns:
+        str: Local path to the repository (either existing local path or downloaded cache path).
+    Raises:
+        FileNotFoundError: If the repository cannot be found or downloaded from any platform.
+        ValueError: If an unsupported platform is specified.
+        ImportError: If required dependencies for the specified platform are not installed.
+    Examples:
+        >>> # Local path (returned as-is)
+        >>> resolve_repo_path("/path/to/local/model")
+        "/path/to/local/model"
+        >>> # Hugging Face model
+        >>> resolve_repo_path("bert-base-uncased")
+        "/home/user/.cache/huggingface/hub/models--bert-base-uncased/..."
+        >>> # ModelScope model with explicit platform
+        >>> resolve_repo_path("damo/nlp_bert_backbone_base_std", platform="modelscope")
+        "/home/user/.cache/modelscope/hub/damo/nlp_bert_backbone_base_std/..."
+        >>> # URL-prefixed repository ID
+        >>> resolve_repo_path("hf://microsoft/DialoGPT-medium")
+        "/home/user/.cache/huggingface/hub/models--microsoft--DialoGPT-medium/..."
+    """
+    # If it's a HuggingFace Hub model id, download snapshot
+    if repo_id.startswith("hf://") or repo_id.startswith("huggingface://"):
+        repo_id = repo_id.replace("hf://", "").replace("huggingface://", "")
+        platform = "hf"
+    # If it's a ModelScope model id, download snapshot
+    elif repo_id.startswith("modelscope://"):
+        repo_id = repo_id.replace("modelscope://", "")
+        platform = "modelscope"
+    # If it's a local file or directory, return as is
+    if os.path.exists(repo_id):
+        return repo_id
+    try:
+        validate_and_suggest_corrections(platform, AVAILABLE_PLATFORMS)
+        # This will download the model to the cache and return the local path
+        if platform in ["hf", "huggingface"]:
+            local_path = huggingface_snapshot_download(
+                repo_id=repo_id, repo_type=repo_type, **kwargs
+            )
+        elif platform == "modelscope":
+            local_path = modelscope_snapshot_download(
+                repo_id=repo_id, repo_type=repo_type, **kwargs
+            )
+        else:
+            _raise_unknown_platform_error(platform)
+        return local_path
+    except Exception as e:
+        raise FileNotFoundError(f"Could not resolve checkpoint: {repo_id}. Error: {e}")
+def resolve_file_path(
+    repo_id: str,
+    filename: str,
+    repo_type: Literal["model", "dataset"] = "model",
+    platform: Literal["hf", "huggingface", "modelscope"] = "hf",
+    **kwargs,
+) -> str:
+    """
+    Resolve and download a specific file from a repository across multiple platforms.
+    This function downloads a specific file from repositories hosted on various platforms
+    including local paths, Hugging Face Hub, and ModelScope. It handles platform-specific
+    URL prefixes and automatically determines the appropriate download method.
+    Args:
+        repo_id (str): Repository identifier. Can be:
+            - Local directory path (file will be joined with this path if it exists)
+            - Hugging Face model/dataset ID (e.g., "bert-base-uncased")
+            - ModelScope model/dataset ID
+            - URL-prefixed ID (e.g., "hf://model-name", "modelscope://model-name").
+              The prefix will override the platform argument.
+        filename (str): The specific file to download from the repository.
+        repo_type (Literal["model", "dataset"], optional): Type of repository.
+            Defaults to "model". Used for ModelScope platform to determine the
+            correct download function.
+        platform (Literal["hf", "huggingface", "modelscope"], optional):
+            Platform to download from. Defaults to "hf". Options:
+            - "hf" or "huggingface": Hugging Face Hub
+            - "modelscope": ModelScope platform
+        **kwargs: Additional arguments passed to the underlying download functions
+            (e.g., cache_dir, force_download, use_auth_token).
+    Returns:
+        str: Local path to the downloaded file.
+    Raises:
+        ValueError: If an unsupported repo_type is specified for ModelScope platform.
+        ImportError: If required dependencies for the specified platform are not installed.
+        FileNotFoundError: If the file cannot be found or downloaded.
+    Examples:
+        >>> # Download config.json from a Hugging Face model
+        >>> resolve_file_path("bert-base-uncased", "config.json")
+        "/home/user/.cache/huggingface/hub/models--bert-base-uncased/.../config.json"
+        >>> # Download from ModelScope
+        >>> resolve_file_path(
+        ...     "damo/nlp_bert_backbone_base_std",
+        ...     "pytorch_model.bin",
+        ...     platform="modelscope"
+        ... )
+        "/home/user/.cache/modelscope/hub/.../pytorch_model.bin"
+        >>> # Local file path
+        >>> resolve_file_path("/path/to/local/model", "config.json")
+        "/path/to/local/model/config.json"
+        >>> # URL-prefixed repository
+        >>> resolve_file_path("hf://microsoft/DialoGPT-medium", "config.json")
+        "/home/user/.cache/huggingface/hub/.../config.json"
+        >>> # Download dataset file from ModelScope
+        >>> resolve_file_path(
+        ...     "DAMO_NLP/jd",
+        ...     "train.json",
+        ...     repo_type="dataset",
+        ...     platform="modelscope"
+        ... )
+        "/home/user/.cache/modelscope/datasets/.../train.json"
+    """
+    # If it's a HuggingFace Hub model id, download snapshot
+    if repo_id.startswith("hf://") or repo_id.startswith("huggingface://"):
+        repo_id = repo_id.replace("hf://", "").replace("huggingface://", "")
+        platform = "hf"
+    # If it's a ModelScope model id, download snapshot
+    elif repo_id.startswith("modelscope://"):
+        repo_id = repo_id.replace("modelscope://", "")
+        platform = "modelscope"
+    # If it's a local file or directory, return as is
+    if os.path.exists(repo_id):
+        return os.path.join(repo_id, filename)
+    if platform in ["hf", "huggingface"]:
+        return hf_hub_download(
+            repo_id=repo_id,
+            filename=filename,
+            repo_type=repo_type,
+            **kwargs,
+        )
+    elif platform == "modelscope":
+        if repo_type == "model":
+            return modelscope_model_file_download(
+                model_id=repo_id, file_path=filename, **kwargs
+            )
+        elif repo_type == "dataset":
+            return modelscope_dataset_file_download(
+                dataset_id=repo_id, file_path=filename, **kwargs
+            )
+        else:
+            raise ValueError(
+                f"Unsupported repo_type: {repo_type}. Supported types are 'model' and 'dataset'."
+            )
+    else:
+        _raise_unknown_platform_error(platform)

fusion_bench/utils/parameters.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import copy
 from collections import OrderedDict
-from typing import List, Mapping, Optional, Union
+from typing import Dict, List, Mapping, Optional, Union
 import torch
 from torch import nn
@@ -83,7 +83,7 @@ def vector_to_state_dict(
     vector: torch.Tensor,
     state_dict: Union[StateDictType, nn.Module],
     remove_keys: Optional[List[str]] = None,
-):
+) -> Dict[str, torch.Tensor]:
     """
     Convert a vector to a state dictionary.

fusion_bench/utils/rich_utils.py CHANGED Viewed

@@ -189,6 +189,9 @@ if __name__ == "__main__":
 def setup_colorlogging(force=False, **config_kwargs):
+    """
+    Sets up color logging for the application.
+    """
     FORMAT = "%(message)s"
     logging.basicConfig(

fusion-bench 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl

fusion-bench 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl