PyPI - fusion-bench - Versions diffs - 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl - Mend

fusion-bench 0.2.21py3-none-any.whl → 0.2.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

fusion_bench/__init__.py +25 -2
fusion_bench/compat/method/__init__.py +5 -2
fusion_bench/compat/method/base_algorithm.py +3 -2
fusion_bench/compat/modelpool/base_pool.py +3 -3
fusion_bench/compat/taskpool/clip_image_classification.py +1 -1
fusion_bench/constants/__init__.py +1 -0
fusion_bench/constants/runtime.py +57 -0
fusion_bench/dataset/gpt2_glue.py +1 -1
fusion_bench/method/__init__.py +12 -4
fusion_bench/method/analysis/task_vector_cos_similarity.py +95 -12
fusion_bench/method/analysis/task_vector_violin_plot.py +160 -52
fusion_bench/method/bitdelta/__init__.py +1 -0
fusion_bench/method/bitdelta/bitdelta.py +7 -23
fusion_bench/method/classification/clip_finetune.py +1 -1
fusion_bench/method/expert_sparsity/mixtral/dynamic_skipping.py +2 -0
fusion_bench/method/expert_sparsity/mixtral/layer_wise_pruning.py +2 -0
fusion_bench/method/expert_sparsity/mixtral/progressive_pruning.py +2 -0
fusion_bench/method/fisher_merging/clip_fisher_merging.py +0 -4
fusion_bench/method/fisher_merging/gpt2_fisher_merging.py +2 -2
fusion_bench/method/linear/simple_average_for_llama.py +16 -11
fusion_bench/method/model_stock/__init__.py +1 -0
fusion_bench/method/model_stock/model_stock.py +309 -0
fusion_bench/method/regmean/clip_regmean.py +3 -6
fusion_bench/method/regmean/regmean.py +27 -56
fusion_bench/method/regmean/utils.py +56 -0
fusion_bench/method/regmean_plusplus/regmean_plusplus.py +21 -60
fusion_bench/method/simple_average.py +7 -7
fusion_bench/method/slerp/__init__.py +1 -1
fusion_bench/method/slerp/slerp.py +110 -14
fusion_bench/method/smile_upscaling/causal_lm_upscaling.py +371 -0
fusion_bench/method/smile_upscaling/projected_energy.py +1 -2
fusion_bench/method/smile_upscaling/smile_mistral_upscaling.py +5 -1
fusion_bench/method/smile_upscaling/smile_qwen2_upscaling.py +40 -31
fusion_bench/method/smile_upscaling/smile_upscaling.py +1 -1
fusion_bench/method/we_moe/__init__.py +1 -0
fusion_bench/method/we_moe/entropy_loss.py +25 -0
fusion_bench/method/we_moe/flan_t5_we_moe.py +320 -0
fusion_bench/method/we_moe/utils.py +15 -0
fusion_bench/method/weighted_average/llama.py +1 -1
fusion_bench/mixins/clip_classification.py +37 -48
fusion_bench/mixins/serialization.py +30 -10
fusion_bench/modelpool/base_pool.py +1 -1
fusion_bench/modelpool/causal_lm/causal_lm.py +293 -75
fusion_bench/modelpool/seq2seq_lm/modelpool.py +146 -0
fusion_bench/models/__init__.py +5 -0
fusion_bench/models/hf_utils.py +69 -86
fusion_bench/models/linearized/vision_model.py +6 -6
fusion_bench/models/model_card_templates/default.md +46 -0
fusion_bench/models/modeling_smile_llama/__init__.py +7 -0
fusion_bench/models/modeling_smile_llama/modeling_smile_llama.py +1 -8
fusion_bench/models/modeling_smile_mistral/__init__.py +2 -1
fusion_bench/models/modeling_smile_qwen2/modeling_smile_qwen2.py +1 -5
fusion_bench/models/we_moe.py +8 -8
fusion_bench/programs/fabric_fusion_program.py +29 -60
fusion_bench/scripts/cli.py +34 -1
fusion_bench/taskpool/base_pool.py +99 -17
fusion_bench/taskpool/clip_vision/taskpool.py +10 -5
fusion_bench/taskpool/dummy.py +101 -13
fusion_bench/taskpool/lm_eval_harness/taskpool.py +80 -0
fusion_bench/taskpool/nyuv2_taskpool.py +28 -0
fusion_bench/utils/__init__.py +2 -0
fusion_bench/utils/cache_utils.py +101 -1
fusion_bench/utils/data.py +6 -4
fusion_bench/utils/devices.py +7 -4
fusion_bench/utils/dtype.py +3 -2
fusion_bench/utils/fabric.py +2 -2
fusion_bench/utils/lazy_imports.py +23 -0
fusion_bench/utils/lazy_state_dict.py +117 -19
fusion_bench/utils/modelscope.py +3 -3
fusion_bench/utils/packages.py +3 -3
fusion_bench/utils/parameters.py +0 -2
fusion_bench/utils/path.py +56 -0
fusion_bench/utils/pylogger.py +1 -1
fusion_bench/utils/timer.py +92 -10
{fusion_bench-0.2.21.dist-info → fusion_bench-0.2.23.dist-info}/METADATA +1 -23
{fusion_bench-0.2.21.dist-info → fusion_bench-0.2.23.dist-info}/RECORD +89 -75
fusion_bench_config/_get_started/llm_slerp.yaml +12 -0
fusion_bench_config/method/fisher_merging/clip_fisher_merging.yaml +0 -1
fusion_bench_config/method/linear/simple_average_for_llama.yaml +3 -2
fusion_bench_config/method/model_stock/model_stock.yaml +12 -0
fusion_bench_config/method/slerp/slerp_lm.yaml +4 -0
fusion_bench_config/method/smile_upscaling/causal_lm_upscaling.yaml +21 -0
fusion_bench_config/method/smile_upscaling/smile_qwen2_upscaling.yaml +1 -1
fusion_bench_config/method/wemoe/flan_t5_weight_ensembling_moe.yaml +20 -0
fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml +1 -1
{fusion_bench-0.2.21.dist-info → fusion_bench-0.2.23.dist-info}/WHEEL +0 -0
{fusion_bench-0.2.21.dist-info → fusion_bench-0.2.23.dist-info}/entry_points.txt +0 -0
{fusion_bench-0.2.21.dist-info → fusion_bench-0.2.23.dist-info}/licenses/LICENSE +0 -0
{fusion_bench-0.2.21.dist-info → fusion_bench-0.2.23.dist-info}/top_level.txt +0 -0

fusion_bench/mixins/clip_classification.py CHANGED Viewed

@@ -22,6 +22,7 @@ from torch.utils.data import DataLoader
 from tqdm.auto import tqdm
 from transformers import CLIPModel, CLIPProcessor, CLIPVisionModel
+from fusion_bench import cache_with_joblib
 from fusion_bench.dataset.clip_dataset import CLIPDataset
 from fusion_bench.mixins import LightningFabricMixin
 from fusion_bench.modelpool import CLIPVisionModelPool
@@ -46,7 +47,6 @@ class CLIPClassificationMixin(LightningFabricMixin):
     - `_dataloader_kwargs` (Dict[str, Any]): Keyword arguments for the dataloader.
     - `modelpool` (CLIPVisionModelPool): The model pool containing the CLIP models.
-    - `zeroshot_weights_cache_dir` (Optional[str]): The directory to cache the zero-shot weights.
     """
     dataloader_kwargs: Dict[str, Any] = {}
@@ -54,7 +54,6 @@ class CLIPClassificationMixin(LightningFabricMixin):
     modelpool: CLIPVisionModelPool = None
     _clip_processor: CLIPProcessor = None
     # a dict of zeroshot weights for each task, each key is the task name
-    zeroshot_weights_cache_dir: str = "outputs/cache/clip_zeroshot_weights"
     zeroshot_weights: Dict[str, torch.Tensor] = {}
     whether_setup_zero_shot_classification_head = False
@@ -114,11 +113,27 @@ class CLIPClassificationMixin(LightningFabricMixin):
         clip_model: Optional[CLIPModel] = None,
         task_names: Optional[List[str]] = None,
     ):
+        """
+        Initializes a zero-shot classification head.
+        This method constructs a zero-shot classification head by generating text embeddings for each class name using a set of templates.
+        These embeddings function as the weights of the classification layer. The method also extracts the `visual_projection` and `logit_scale`
+        from the provided CLIP model, which are necessary for calculating the final logits.
+        Args:
+            clip_processor (Optional[CLIPProcessor]): The processor for the CLIP model. If not provided, it is loaded from the model pool.
+            clip_model (Optional[CLIPModel]): The CLIP model to use. If not provided, a pretrained model is loaded from the model pool.
+            task_names (Optional[List[str]]): A list of task names to set up the classification head for. If not provided, all models in the model pool will be used.
+        """
         self.whether_setup_zero_shot_classification_head = True
+        # load clip model if not provided
         if clip_model is None:
             if self.modelpool.has_pretrained:
                 clip_model = self.modelpool.load_clip_model("_pretrained_")
             else:
+                log.warning(
+                    f"No pretrained CLIP model found, using the model from the model pool: {self.modelpool.model_names[0]}."
+                )
                 clip_model = self.modelpool.load_clip_model(
                     self.modelpool.model_names[0]
                 )
@@ -131,26 +146,16 @@ class CLIPClassificationMixin(LightningFabricMixin):
         self.visual_projection = self.fabric.to_device(self.visual_projection)
         self.logit_scale_exp = self.fabric.to_device(self.logit_scale_exp)
-        # get cache directory
-        if self.modelpool.has_pretrained:
-            model_name = self.modelpool.get_model_config("_pretrained_")
-            if not isinstance(model_name, str):
-                model_name = model_name.pretrained_model_name_or_path
-        else:
-            model_name = self.modelpool.get_model_config(self.modelpool.model_names[0])
-            if not isinstance(model_name, str):
-                model_name = model_name.pretrained_model_name_or_path
-        cache_dir = os.path.join(
-            self.zeroshot_weights_cache_dir,
-            os.path.normpath(model_name.split("/")[-1]),
-        )
-        if not os.path.exists(cache_dir):
-            log.info(
-                f"Creating cache directory for zero-shot classification head at {cache_dir}"
-            )
-            os.makedirs(cache_dir)
+        @cache_with_joblib()
+        def construct_classification_head(task: str):
+            nonlocal clip_classifier
+            classnames, templates = get_classnames_and_templates(task)
+            clip_classifier.set_classification_task(classnames, templates)
+            zeroshot_weights = clip_classifier.zeroshot_weights.detach().clone()
+            return zeroshot_weights
-        log.info(f"cache directory for zero-shot classification head: {cache_dir}")
         for task in tqdm(
             self.modelpool.model_names if task_names is None else task_names,
             "Setting up zero-shot classification head",
@@ -158,27 +163,7 @@ class CLIPClassificationMixin(LightningFabricMixin):
         ):
             zeroshot_weights = None
             if self.fabric.is_global_zero:
-                cache_file = os.path.join(
-                    cache_dir, os.path.normpath(f"{task}_zeroshot_weights.pt")
-                )
-                if os.path.exists(cache_file):
-                    zeroshot_weights = torch.load(
-                        cache_file,
-                        map_location="cpu",
-                        weights_only=True,
-                    ).detach()
-                    log.info(
-                        f"Loadded cached zeroshot weights for task: {task}, shape: {zeroshot_weights.shape}"
-                    )
-                else:
-                    log.info(
-                        f"Construct zero shot classification head for task: {task}"
-                    )
-                    classnames, templates = get_classnames_and_templates(task)
-                    clip_classifier.set_classification_task(classnames, templates)
-                    zeroshot_weights = clip_classifier.zeroshot_weights.detach().clone()
-                    log.info(f"save zeroshot weights to {cache_file}")
-                    torch.save(zeroshot_weights, cache_file)
+                zeroshot_weights = construct_classification_head(task)
             self.fabric.barrier()
             self.zeroshot_weights[task] = self.fabric.broadcast(zeroshot_weights, src=0)
@@ -197,16 +182,20 @@ class CLIPClassificationMixin(LightningFabricMixin):
         image_embeds: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
         """
-        Compute the logits of the images for a given task.
+        Computes the classification logits for a batch of images for a specific task.
+        This method performs zero-shot classification by calculating the cosine similarity between image and text embeddings.
+        The image embeddings are obtained from the provided vision model, and the text embeddings (zero-shot weights) are pre-computed for the task.
+        The similarity scores are then scaled by the CLIP model's `logit_scale` to produce the final logits.
         Args:
-            module (Union[nn.Module, CLIPVisionModel, "CLIPVisionTransformer"]): The module to compute the logits.
-            images (torch.Tensor): The images to compute the logits.
-            task (str): The task to compute the logits.
-            image_embeds (Optional[torch.Tensor]): The precomputed image embeddings. If None, the image embeddings will be computed.
+            module (Union[nn.Module, CLIPVisionModel, "CLIPVisionTransformer"]): The vision encoder part of the CLIP model.
+            images (torch.Tensor): A batch of images to classify.
+            task (str): The name of the classification task.
+            image_embeds (Optional[torch.Tensor]): Pre-computed image embeddings. If provided, the method skips the image encoding step.
         Returns:
-            torch.Tensor: The logits of the images.
+            torch.Tensor: A tensor of logits for each image, with shape (batch_size, num_classes).
         """
         text_embeds = self.zeroshot_weights[task]

fusion_bench/mixins/serialization.py CHANGED Viewed

@@ -4,7 +4,7 @@ from copy import deepcopy
 from functools import wraps
 from inspect import Parameter, _ParameterKind
 from pathlib import Path
-from typing import Dict, Optional, Union
+from typing import Dict, Mapping, Optional, Union
 from omegaconf import DictConfig, OmegaConf
@@ -21,6 +21,20 @@ __all__ = [
 ]
+def _get_attr_name(config_mapping: Mapping[str, str], param_name):
+    for attr_name, p in config_mapping.items():
+        if p == param_name:
+            return attr_name
+    else:
+        raise ValueError(f"Parameter {param_name} not found in config mapping.")
+def _set_attr(self, param_name: str, value):
+    attr_name = _get_attr_name(self._config_mapping, param_name)
+    log.debug(f"set {attr_name} to {value}. Parameter name: {param_name}")
+    setattr(self, attr_name, value)
 def auto_register_config(cls):
     """
     Decorator to automatically register __init__ parameters in _config_mapping.
@@ -56,8 +70,8 @@ def auto_register_config(cls):
         ```python
         @auto_register_config
         class MyAlgorithm(BaseYAMLSerializable):
-            def __init__(self, learning_rate: float = 0.001, batch_size: int = 32, model_name: str = "default"):
-                super().__init__()
+            def __init__(self, learning_rate: float = 0.001, batch_size: int = 32, model_name: str = "default", **kwargs):
+                super().__init__(**kwargs)
         # All instantiation methods work automatically:
         algo1 = MyAlgorithm(0.01, 64)  # positional args
@@ -90,14 +104,20 @@ def auto_register_config(cls):
     # Auto-register parameters in _config_mapping
     if not "_config_mapping" in cls.__dict__:
         cls._config_mapping = deepcopy(getattr(cls, "_config_mapping", {}))
+    registered_parameters = tuple(cls._config_mapping.values())
     for param_name in list(sig.parameters.keys())[1:]:  # Skip 'self'
-        if sig.parameters[param_name].kind not in [
-            _ParameterKind.VAR_POSITIONAL,
-            _ParameterKind.VAR_KEYWORD,
-        ]:
+        if (
+            sig.parameters[param_name].kind
+            not in [
+                _ParameterKind.VAR_POSITIONAL,
+                _ParameterKind.VAR_KEYWORD,
+            ]
+        ) and (param_name not in registered_parameters):
             cls._config_mapping[param_name] = param_name
     def __init__(self, *args, **kwargs):
+        log.debug(f"set attributes for {self.__class__.__name__} in {cls.__name__}")
         # auto-register the attributes based on the signature
         sig = inspect.signature(original_init)
         param_names = list(sig.parameters.keys())[1:]  # Skip 'self'
@@ -110,7 +130,7 @@ def auto_register_config(cls):
                     _ParameterKind.VAR_POSITIONAL,
                     _ParameterKind.VAR_KEYWORD,
                 ]:
-                    setattr(self, param_name, arg_value)
+                    _set_attr(self, param_name, arg_value)
         # Handle keyword arguments and defaults
         for param_name in param_names:
@@ -124,12 +144,12 @@ def auto_register_config(cls):
                     continue
                 if param_name in kwargs:
-                    setattr(self, param_name, kwargs[param_name])
+                    _set_attr(self, param_name, kwargs[param_name])
                 else:
                     # Set default value if available and attribute doesn't exist
                     default_value = sig.parameters[param_name].default
                     if default_value is not Parameter.empty:
-                        setattr(self, param_name, default_value)
+                        _set_attr(self, param_name, default_value)
         # Call the original __init__
         result = original_init(self, *args, **kwargs)

fusion_bench/modelpool/base_pool.py CHANGED Viewed

@@ -277,7 +277,7 @@ class BaseModelPool(
         for dataset_name in self.test_dataset_names:
             yield self.load_test_dataset(dataset_name)
-    def save_model(self, model: nn.Module, path: str):
+    def save_model(self, model: nn.Module, path: str, *args, **kwargs):
         """
         Save the state dictionary of the model to the specified path.

fusion-bench 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

fusion-bench 0.2.21py3-none-any.whl → 0.2.23py3-none-any.whl