PyPI - fusion-bench - Versions diffs - 0.2.12__py3-none-any.whl → 0.2.14__py3-none-any.whl - Mend

fusion-bench 0.2.12py3-none-any.whl → 0.2.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (209) hide show

fusion_bench/taskpool/openclip_vision/openclip_taskpool.py ADDED Viewed

@@ -0,0 +1,196 @@
+import itertools
+import json
+import logging
+import os
+from typing import TYPE_CHECKING, Callable, Dict, Optional, Union
+import lightning.fabric
+import open_clip
+import torch
+from omegaconf import DictConfig
+from torch.nn import functional as F
+from torch.utils.data import DataLoader, Dataset
+from torchmetrics import Accuracy, MeanMetric
+from torchmetrics.classification.accuracy import MulticlassAccuracy
+from tqdm.auto import tqdm
+from fusion_bench import BaseTaskPool
+from fusion_bench.dataset.clip_dataset import CLIPDataset
+from fusion_bench.mixins import LightningFabricMixin
+from fusion_bench.modelpool.openclip_vision.modelpool import load_classifier_head
+from fusion_bench.models.open_clip import (
+    ClassificationHead,
+    ImageClassifier,
+    ImageEncoder,
+)
+from fusion_bench.models.open_clip.variables_and_paths import OPENCLIP_CACHEDIR
+from fusion_bench.utils import count_parameters, instantiate
+if TYPE_CHECKING:
+    from fusion_bench.modelpool import OpenCLIPVisionModelPool
+    from fusion_bench.programs import FabricModelFusionProgram
+log = logging.getLogger(__name__)
+class OpenCLIPVisionModelTaskPool(
+    BaseTaskPool,
+    LightningFabricMixin,
+):
+    _is_setup = False
+    _program: "FabricModelFusionProgram"
+    processor: Optional[Callable] = None
+    test_datasets: Dict[str, CLIPDataset]
+    def __init__(
+        self,
+        test_datasets: Union[DictConfig, Dict[str, Dataset]],
+        classification_heads: Union[DictConfig, Dict[str, ClassificationHead]],
+        dataloader_kwargs: DictConfig,
+        model_name: Optional[str] = None,
+        fast_dev_run: bool = False,
+        **kwargs,
+    ):
+        self._test_datasets = test_datasets
+        self._classifier_heads = classification_heads
+        self._dataloader_kwargs = dataloader_kwargs
+        self._model_name = model_name
+        self.fast_dev_run = fast_dev_run
+        super().__init__(**kwargs)
+    def setup(self):
+        # setup the processor
+        if self._program is not None and self._program.modelpool is not None:
+            modelpool: "OpenCLIPVisionModelPool" = self._program.modelpool
+            self.processor = modelpool.test_processor
+        elif self._model_name is not None:
+            _, _, self.processor = open_clip.create_model_and_transforms(
+                self._model_name,
+                pretrained="openai",
+                cache_dir=OPENCLIP_CACHEDIR,
+            )
+        else:
+            raise ValueError("Modelpool or model_name is not set")
+        # setup the test datasets
+        self.test_datasets = {
+            name: instantiate(dataset) if isinstance(dataset, DictConfig) else dataset
+            for name, dataset in self._test_datasets.items()
+        }
+        self.test_datasets = {
+            name: CLIPDataset(dataset, self.processor)
+            for name, dataset in self.test_datasets.items()
+        }
+        self.test_dataloaders = {
+            name: self.fabric.setup_dataloaders(
+                DataLoader(dataset, **self._dataloader_kwargs)
+            )
+            for name, dataset in self.test_datasets.items()
+        }
+        # setup classifier heads
+        self.classifier_heads = {
+            name: load_classifier_head(head).to(self.fabric.device)
+            for name, head in self._classifier_heads.items()
+        }
+        self._is_setup = True
+    @torch.no_grad()
+    def _evaluate(
+        self,
+        classifier: ImageClassifier,
+        test_loader: DataLoader,
+        num_classes: int,
+        task_name: str,
+    ):
+        accuracy: MulticlassAccuracy = Accuracy(
+            task="multiclass", num_classes=num_classes
+        )
+        classifier.eval()
+        loss_metric = MeanMetric()
+        # if fast_dev_run is set, we only evaluate on a batch of the data
+        if self.fast_dev_run:
+            log.info("Running under fast_dev_run mode, evaluating on a single batch.")
+            test_loader = itertools.islice(test_loader, 1)
+        else:
+            test_loader = test_loader
+        pbar = tqdm(
+            test_loader,
+            desc=f"Evaluating {task_name}",
+            leave=False,
+            dynamic_ncols=True,
+        )
+        for batch in pbar:
+            inputs, targets = batch
+            logits = classifier(inputs)
+            loss = F.cross_entropy(logits, targets)
+            loss_metric.update(loss.detach().cpu())
+            acc = accuracy(logits.detach().cpu(), targets.detach().cpu())
+            pbar.set_postfix(
+                {
+                    "accuracy": accuracy.compute().item(),
+                    "loss": loss_metric.compute().item(),
+                }
+            )
+        acc = accuracy.compute().item()
+        loss = loss_metric.compute().item()
+        results = {"accuracy": acc, "loss": loss}
+        return results
+    def evaluate(self, model: ImageEncoder, **kwargs):
+        if not self._is_setup:
+            self.setup()
+        report = {}
+        # collect basic model information
+        training_params, all_params = count_parameters(model)
+        report["model_info"] = {
+            "trainable_params": training_params,
+            "all_params": all_params,
+            "trainable_percentage": training_params / all_params,
+        }
+        if not lightning.fabric.is_wrapped(model):
+            model = self.fabric.setup_module(model)
+        pbar = tqdm(
+            self.test_dataloaders.items(),
+            desc="Evaluating tasks",
+            total=len(self.test_dataloaders),
+        )
+        for task_name, test_dataloader in pbar:
+            classifier = ImageClassifier(model, self.classifier_heads[task_name])
+            num_classes = self.classifier_heads[task_name].weight.size(0)
+            result = self._evaluate(
+                classifier,
+                test_dataloader,
+                num_classes=num_classes,
+                task_name=task_name,
+            )
+            report[task_name] = result
+        # calculate the average accuracy and loss
+        if "average" not in report:
+            report["average"] = {}
+            accuracies = [
+                value["accuracy"]
+                for key, value in report.items()
+                if "accuracy" in value
+            ]
+            if len(accuracies) > 0:
+                average_accuracy = sum(accuracies) / len(accuracies)
+                report["average"]["accuracy"] = average_accuracy
+            losses = [value["loss"] for key, value in report.items() if "loss" in value]
+            if len(losses) > 0:
+                average_loss = sum(losses) / len(losses)
+                report["average"]["loss"] = average_loss
+        log.info(f"Evaluation Result: {report}")
+        if self.fabric.is_global_zero and len(self.fabric._loggers) > 0:
+            with open(os.path.join(self.log_dir, "report.json"), "w") as fp:
+                json.dump(report, fp)
+        return report

fusion_bench/utils/data.py CHANGED Viewed

@@ -9,6 +9,18 @@ from torch.utils.data import DataLoader, Dataset
 class InfiniteDataLoader:
+    """
+    A wrapper class for DataLoader to create an infinite data loader.
+    This is useful in case we are only interested in the number of steps and not the number of epochs.
+    This class wraps a DataLoader and provides an iterator that resets
+    when the end of the dataset is reached, creating an infinite loop.
+    Attributes:
+        data_loader (DataLoader): The DataLoader to wrap.
+        data_iter (iterator): An iterator over the DataLoader.
+    """
     def __init__(self, data_loader: DataLoader):
         self.data_loader = data_loader
         self.data_iter = iter(data_loader)

fusion_bench/utils/devices.py CHANGED Viewed

@@ -229,3 +229,17 @@ def cleanup_cuda():
     gc.collect()
     torch.cuda.empty_cache()
     torch.cuda.reset_peak_memory_stats()
+def print_memory_usage(print_fn=print):
+    """
+    Print the current GPU memory usage.
+    Returns:
+        str: A string containing the allocated and cached memory in MB.
+    """
+    allocated = torch.cuda.memory_allocated() / 1024**2  # 转换为 MB
+    cached = torch.cuda.memory_reserved() / 1024**2  # 转换为 MB
+    print_str = f"Allocated Memory: {allocated:.2f} MB\nCached Memory: {cached:.2f} MB"
+    print_fn(print_str)
+    return print_str

fusion_bench/utils/instantiate.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # Modified from Hydra
 import copy
 import functools
+from contextlib import contextmanager
 from enum import Enum
 from textwrap import dedent
 from typing import Any, Callable, Dict, List, Sequence, Tuple, Union
@@ -30,6 +31,17 @@ Function to be used for printing function calls.
 CATCH_EXCEPTION = True
+@contextmanager
+def set_print_function_call(value: bool):
+    global PRINT_FUNCTION_CALL
+    old_value = PRINT_FUNCTION_CALL
+    PRINT_FUNCTION_CALL = value
+    try:
+        yield
+    finally:
+        PRINT_FUNCTION_CALL = old_value
 def is_instantiable(config: Union[DictConfig, Any]) -> bool:
     if OmegaConf.is_dict(config):
         return "_target_" in config

fusion_bench/utils/misc.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from typing import Iterable
+from typing import Iterable, List
-__all__ = ["first", "has_length"]
+__all__ = ["first", "has_length", "join_list"]
 def first(iterable: Iterable):
@@ -16,3 +16,10 @@ def has_length(dataset):
     except TypeError:
         # TypeError: len() of unsized object
         return False
+def join_list(list_of_list: List[List]):
+    ans = []
+    for item in list_of_list:
+        ans.extend(item)
+    return ans

fusion_bench/utils/packages.py CHANGED Viewed

@@ -82,3 +82,17 @@ def import_object(abs_obj_name: str):
     module_name, obj_name = abs_obj_name.rsplit(".", 1)
     module = importlib.import_module(module_name)
     return getattr(module, obj_name)
+def compare_versions(v1, v2):
+    """Compare two version strings.
+    Returns -1 if v1 < v2, 0 if v1 == v2, 1 if v1 > v2"""
+    v1 = version.parse(v1)
+    v2 = version.parse(v2)
+    if v1 < v2:
+        return -1
+    elif v1 > v2:
+        return 1
+    else:
+        return 0

fusion_bench/utils/parameters.py CHANGED Viewed

@@ -252,7 +252,7 @@ def print_parameters(
 def check_parameters_all_equal(
-    list_of_param_names: List[Union[StateDictType, nn.Module, List[str]]]
+    list_of_param_names: List[Union[StateDictType, nn.Module, List[str]]],
 ) -> None:
     """
     Checks if all models have the same parameters.

fusion_bench/utils/tensorboard.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-functions deal with tensorboard logs.
+functions deal with tensorboard logs.
 """
 from typing import Dict, Iterable, List

{fusion_bench-0.2.12.dist-info → fusion_bench-0.2.14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fusion_bench
-Version: 0.2.12
+Version: 0.2.14
 Summary: A Comprehensive Benchmark of Deep Model Fusion
 Author-email: Anke Tang <tang.anke@foxmail.com>
 License: MIT License
@@ -45,6 +45,8 @@ Requires-Dist: rich
 Requires-Dist: scipy
 Requires-Dist: h5py
 Requires-Dist: pytest
+Provides-Extra: lm-eval-harness
+Requires-Dist: lm-eval; extra == "lm-eval-harness"
 Dynamic: license-file
 <div align='center'>
@@ -122,7 +124,7 @@ Merging multiple expert models offers a promising approach for performing multi-
 ## Installation
-install from PyPI:
+Install from PyPI:
 ```bash
 pip install fusion-bench
@@ -137,6 +139,24 @@ cd fusion_bench
 pip install -e . # install the package in editable mode
 ```
+### Install with [Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10256836.svg)](https://doi.org/10.5281/zenodo.10256836)
+```bash
+pip install "fusion-bench[lm-eval-harness]"
+```
+or install from local directory
+```bash
+pip install -e ".[lm-eval-harness]"
+```
+This will install the latest version of fusion-bench and the dependencies required for LM-Eval Harness.
+Documentation for using LM-Eval Harness within FusionBench framework can be found at [this online documentation](https://tanganke.github.io/fusion_bench/taskpool/lm_eval_harness) or in the [`docs/taskpool/lm_eval_harness.md`](docs/taskpool/lm_eval_harness.md) markdown file.
 ## Introduction to Deep Model Fusion
 Deep model fusion is a technique that merges, ensemble, or fuse multiple deep neural networks to obtain a unified model.

fusion-bench 0.2.12__py3-none-any.whl → 0.2.14__py3-none-any.whl

fusion-bench 0.2.12py3-none-any.whl → 0.2.14py3-none-any.whl