PyPI - fusion-bench - Versions diffs - 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl - Mend

fusion-bench 0.2.8py3-none-any.whl → 0.2.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

fusion_bench/__main__.py +4 -0
fusion_bench/dataset/fer2013.py +1 -0
fusion_bench/method/__init__.py +26 -4
fusion_bench/method/classification/__init__.py +1 -0
fusion_bench/method/classification/clip_finetune.py +1 -3
fusion_bench/method/classification/continual_clip_finetune.py +297 -0
fusion_bench/method/dare/__init__.py +1 -0
fusion_bench/method/dare/task_arithmetic.py +14 -7
fusion_bench/method/dare/ties_merging.py +100 -0
fusion_bench/method/isotropic_merging/__init__.py +15 -0
fusion_bench/method/isotropic_merging/iso.py +114 -0
fusion_bench/method/isotropic_merging/iso_utils.py +176 -0
fusion_bench/method/opcm/__init__.py +4 -0
fusion_bench/method/opcm/opcm.py +277 -0
fusion_bench/method/opcm/task_arithmetic.py +115 -0
fusion_bench/method/opcm/ties_merging.py +156 -0
fusion_bench/method/opcm/utils.py +73 -0
fusion_bench/method/opcm/weight_average.py +120 -0
fusion_bench/method/slerp/slerp.py +1 -1
fusion_bench/method/task_singular_vector/TSVM.py +22 -2
fusion_bench/method/task_singular_vector/utils/TSVM_utils.py +91 -93
fusion_bench/method/ties_merging/ties_merging.py +10 -0
fusion_bench/metrics/continual_learning/backward_transfer.py +22 -0
fusion_bench/mixins/clip_classification.py +4 -1
fusion_bench/programs/fabric_fusion_program.py +22 -11
fusion_bench/scripts/cli.py +1 -0
fusion_bench/taskpool/base_pool.py +1 -1
fusion_bench/taskpool/clip_vision/taskpool.py +12 -7
fusion_bench/utils/__init__.py +2 -1
fusion_bench/utils/dict.py +43 -0
fusion_bench/utils/expr.py +90 -0
fusion_bench/utils/fabric.py +17 -0
fusion_bench/utils/instantiate.py +7 -1
fusion_bench/utils/json.py +30 -0
fusion_bench/utils/parameters.py +27 -7
fusion_bench/utils/path.py +15 -0
fusion_bench/utils/plot/color_data.py +1726 -0
fusion_bench/utils/rich_utils.py +15 -0
fusion_bench/utils/set.py +8 -0
fusion_bench/utils/tensorboard.py +51 -0
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/METADATA +17 -18
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/RECORD +58 -29
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/WHEEL +1 -1
fusion_bench_config/method/classification/clip_continual_finetune.yaml +28 -0
fusion_bench_config/method/classification/clip_finetune.yaml +26 -0
fusion_bench_config/method/clip_finetune.yaml +2 -2
fusion_bench_config/method/dare/ties_merging.yaml +15 -0
fusion_bench_config/method/isotropic_merging/iso_c.yaml +4 -0
fusion_bench_config/method/isotropic_merging/iso_cts.yaml +5 -0
fusion_bench_config/method/opcm/opcm.yaml +12 -0
fusion_bench_config/method/opcm/task_arithmetic.yaml +12 -0
fusion_bench_config/method/opcm/ties_merging.yaml +18 -0
fusion_bench_config/method/opcm/weight_average.yaml +10 -0
fusion_bench_config/method/task_singular_vector/TaskSingularVectorMerging.yaml +6 -0
fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_two_tasks_control_task.yaml +18 -0
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/LICENSE +0 -0
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/entry_points.txt +0 -0
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/top_level.txt +0 -0

fusion_bench/method/task_singular_vector/utils/TSVM_utils.py CHANGED Viewed

@@ -116,7 +116,10 @@ def sum_svd_dict(svd_dict, config):
 ###############
 ##### LOSSLESS Orthogonalization
-def compute_and_sum_svd_mem_reduction_lossless(task_vectors, config):
+def compute_and_sum_svd_mem_reduction_lossless(
+    task_vectors: List[StateDictType],
+    accelerator: torch.device = "cuda" if torch.cuda.is_available() else "cpu",
+):
     """
     Computes the Singular Value Decomposition (SVD) for each task vector and merge the results.
@@ -129,40 +132,38 @@ def compute_and_sum_svd_mem_reduction_lossless(task_vectors, config):
     Args:
         task_vectors (list): A list of task vectors, where each task vector is a dictionary containing the vectors for each task.
-        config (object): A configuration object containing the device and dataset information.
+        accelerator (torch.device): The device to use for the computation.
     Returns:
         dict: A dictionary containing the new vectors after summing the SVD components.
     """
     # becareful wit vit-l on 20 task it does not fit in GPU or in 64 GB RAM (try without last layer)
-    device = config.device
     print("Computing SVD...")
     with torch.no_grad():
         new_vector = {}
-        for key in task_vectors[0].vector:
+        for key in task_vectors[0]:
+            original_device = task_vectors[0][key].device
             new_vector[key] = {}
-            for i, (task_vector, dataset) in enumerate(
-                zip(task_vectors, config.DATASETS)
-            ):
-                vec = task_vector.vector[key].to(device)
+            for i, task_vector in enumerate(task_vectors):
+                vec = task_vector[key].to(accelerator)
-                if (
-                    len(task_vector.vector[key].shape) == 2
-                    and "text_projection" not in key
-                ):
+                if len(task_vector[key].shape) == 2 and "text_projection" not in key:
                     u, s, v = torch.linalg.svd(vec, full_matrices=False)
                     if i == 0:
                         print(f"Computed SVD for {key}...")
                         sum_u = torch.zeros(
-                            u.shape[0], u.shape[1] * config.num_tasks, device=device
+                            u.shape[0],
+                            u.shape[1] * len(task_vectors),
+                            device=accelerator,
                         )
                         sum_s = torch.zeros(
-                            s.shape[0] * config.num_tasks, device=device
+                            s.shape[0] * len(task_vectors), device=accelerator
                         )
                         sum_v = torch.zeros(
-                            v.shape[0] * config.num_tasks, v.shape[1], device=device
+                            v.shape[0] * len(task_vectors),
+                            v.shape[1],
+                            device=accelerator,
                         )
                     reduced_index_s = s.shape[0]
@@ -184,7 +185,7 @@ def compute_and_sum_svd_mem_reduction_lossless(task_vectors, config):
                     else:
                         new_vector[key] += (vec - new_vector[key]) / (i + 1)
-            if len(task_vector.vector[key].shape) == 2 and "text_projection" not in key:
+            if len(task_vector[key].shape) == 2 and "text_projection" not in key:
                 u_u, s_u, v_u = torch.linalg.svd(sum_u, full_matrices=False)
                 u_v, s_v, v_v = torch.linalg.svd(sum_v, full_matrices=False)
@@ -197,13 +198,16 @@ def compute_and_sum_svd_mem_reduction_lossless(task_vectors, config):
                         v_v,
                     )
                 )
+            new_vector[key] = new_vector[key].to(original_device, non_blocking=True)
     return new_vector
 ###############
 ##### LOSSLESS EIGENDECOMP
-def compute_and_sum_svd_mem_reduction_lossless_eigen(task_vectors, config):
+def compute_and_sum_svd_mem_reduction_lossless_eigen(
+    task_vectors: List[StateDictType],
+    accelerator: torch.device = "cuda" if torch.cuda.is_available() else "cpu",
+):
     """
     Computes the Singular Value Decomposition (SVD) for each task vector and merge the results.
@@ -216,40 +220,39 @@ def compute_and_sum_svd_mem_reduction_lossless_eigen(task_vectors, config):
     Args:
         task_vectors (list): A list of task vectors, where each task vector is a dictionary containing the vectors for each task.
-        config (object): A configuration object containing the device and dataset information.
+        accelerator (torch.device): The device to use for the computation.
     Returns:
         dict: A dictionary containing the new vectors after merging the SVD components.
     """
     # becareful wit vit-l on 20 task it does not fit in GPU or in 64 GB RAM (try without last layer)
-    device = config.device
     print("Computing SVD...")
     with torch.no_grad():
         new_vector = {}
-        for key in task_vectors[0].vector:
+        for key in task_vectors[0]:
+            original_device = task_vectors[0][key].device
             new_vector[key] = {}
-            for i, (task_vector, dataset) in enumerate(
-                zip(task_vectors, config.DATASETS)
-            ):
-                vec = task_vector.vector[key].to(device)
+            for i, task_vector in enumerate(task_vectors):
+                vec = task_vector[key].to(accelerator)
-                if (
-                    len(task_vector.vector[key].shape) == 2
-                    and "text_projection" not in key
-                ):
+                if len(task_vector[key].shape) == 2 and "text_projection" not in key:
                     u, s, v = torch.linalg.svd(vec, full_matrices=False)
                     if i == 0:
                         print(f"Computed SVD for {key}...")
                         sum_u = torch.zeros(
-                            u.shape[0], u.shape[1] * config.num_tasks, device=device
+                            u.shape[0],
+                            u.shape[1] * len(task_vectors),
+                            device=accelerator,
                         )
                         sum_s = torch.zeros(
-                            s.shape[0] * config.num_tasks, device=device
+                            s.shape[0] * len(task_vectors), device=accelerator
                         )
                         sum_v = torch.zeros(
-                            v.shape[0] * config.num_tasks, v.shape[1], device=device
+                            v.shape[0] * len(task_vectors),
+                            v.shape[1],
+                            device=accelerator,
                         )
                     reduced_index_s = s.shape[0]
@@ -271,7 +274,7 @@ def compute_and_sum_svd_mem_reduction_lossless_eigen(task_vectors, config):
                     else:
                         new_vector[key] += (vec - new_vector[key]) / (i + 1)
-            if len(task_vector.vector[key].shape) == 2 and "text_projection" not in key:
+            if len(task_vector[key].shape) == 2 and "text_projection" not in key:
                 sum_s, indices = torch.sort(sum_s, stable=True)
                 sum_u = torch.index_select(sum_u, 1, indices)
@@ -293,12 +296,14 @@ def compute_and_sum_svd_mem_reduction_lossless_eigen(task_vectors, config):
                 new_vector[key] = torch.linalg.multi_dot(  # bool_mask *
                     (
+                        sum_u,
                         u_orth,
                         torch.diag(sum_s),
                         v_orth,
+                        sum_v,
                     )
                 )
+            new_vector[key] = new_vector[key].to(original_device, non_blocking=True)
     return new_vector
@@ -394,7 +399,10 @@ def compute_and_sum_svd_mem_reduction(
 ###############
 #### TSV Merge Eigendecomp
-def compute_and_sum_svd_mem_reduction_2(task_vectors, config):
+def compute_and_sum_svd_mem_reduction_2(
+    task_vectors: List[StateDictType],
+    accelerator: torch.device = "cuda" if torch.cuda.is_available() else "cpu",
+):
     """
     Computes the Singular Value Decomposition (SVD) for each vector in the task_vectors,
     reduces the dimensionality of the vectors based on the sv_reduction factor, and concatenate
@@ -404,36 +412,30 @@ def compute_and_sum_svd_mem_reduction_2(task_vectors, config):
     Args:
         task_vectors (list): A list of task vector objects, where each object contains a
                              dictionary of vectors.
-        config (object): Configuration object containing the following attributes:
-                         - DATASETS (list): List of datasets.
-                         - device (torch.device): The device to perform computations on.
+        accelerator (torch.device): The device to use for the computation.
     Returns:
         dict: A dictionary containing the new vectors after SVD computation and merging.
     """
-    sv_reduction = 1 / len(config.DATASETS)
-    device = config.device
+    sv_reduction = 1 / len(task_vectors)
     print("Computing SVD...")
     with torch.no_grad():
         new_vector = {}
-        for key in task_vectors[0].vector:
+        for key in task_vectors[0]:
+            original_device = task_vectors[0][key].device
             new_vector[key] = {}
-            for i, (task_vector, dataset) in enumerate(
-                zip(task_vectors, config.DATASETS)
-            ):
-                vec = task_vector.vector[key].to(device)
+            for i, task_vector in enumerate(task_vectors):
+                vec = task_vector[key].to(accelerator)
-                if (
-                    len(task_vector.vector[key].shape) == 2
-                    and "text_projection" not in key
-                ):
+                if len(task_vector[key].shape) == 2 and "text_projection" not in key:
                     u, s, v = torch.linalg.svd(vec, full_matrices=False)
                     if i == 0:
                         print(f"Computed SVD for {key}...")
-                        sum_u = torch.zeros_like(u, device=device)
-                        sum_s = torch.zeros_like(s, device=device)
-                        sum_v = torch.zeros_like(v, device=device)
+                        sum_u = torch.zeros_like(u, device=accelerator)
+                        sum_s = torch.zeros_like(s, device=accelerator)
+                        sum_v = torch.zeros_like(v, device=accelerator)
                     reduced_index_s = int(s.shape[0] * sv_reduction)
                     # select only the first reduced_index_s columns of u and place them
@@ -454,7 +456,7 @@ def compute_and_sum_svd_mem_reduction_2(task_vectors, config):
                     else:
                         new_vector[key] += (vec - new_vector[key]) / (i + 1)
-            if len(task_vector.vector[key].shape) == 2 and "text_projection" not in key:
+            if len(task_vector[key].shape) == 2 and "text_projection" not in key:
                 sum_s, indices = torch.sort(sum_s, stable=True)
                 sum_u = torch.index_select(sum_u, 1, indices)
@@ -483,13 +485,17 @@ def compute_and_sum_svd_mem_reduction_2(task_vectors, config):
                         sum_v,
                     )
                 )
+            new_vector[key] = new_vector[key].to(original_device, non_blocking=True)
     return new_vector
 ###############
 #### Rank Reduction TV
-def compute_and_sum_svd_mem_reduction_rank_reduction(task_vectors, config):
+def compute_and_sum_svd_mem_reduction_rank_reduction(
+    task_vectors: List[StateDictType],
+    accelerator: torch.device = "cuda" if torch.cuda.is_available() else "cpu",
+):
     """
     Compute and sum the Singular Value Decomposition (SVD) of task vectors with rank reduction.
@@ -499,36 +505,29 @@ def compute_and_sum_svd_mem_reduction_rank_reduction(task_vectors, config):
     Args:
         task_vectors (list): A list of task vector objects. Each object should have a `vector` attribute
                              which is a dictionary where keys are vector names and values are tensors.
-        config (object): Configuration object containing the following attributes:
-                         - DATASETS (list): List of datasets.
-                         - device (torch.device): The device to perform computations on.
+        accelerator (torch.device): The device to use for the computation.
     Returns:
         dict: A dictionary containing the new vectors after SVD computation and summation.
     """
-    sv_reduction = 1 / len(config.DATASETS)
-    device = config.device
+    sv_reduction = 1 / len(task_vectors)
     print("Computing SVD...")
     with torch.no_grad():
         new_vector = {}
-        for key in task_vectors[0].vector:
+        for key in task_vectors[0]:
+            original_device = task_vectors[0][key].device
             new_vector[key] = {}
-            for i, (task_vector, dataset) in enumerate(
-                zip(task_vectors, config.DATASETS)
-            ):
-                vec = task_vector.vector[key].to(device)
+            for i, task_vector in enumerate(task_vectors):
+                vec = task_vector[key].to(accelerator)
-                if (
-                    len(task_vector.vector[key].shape) == 2
-                    and "text_projection" not in key
-                ):
+                if len(task_vector[key].shape) == 2 and "text_projection" not in key:
                     u, s, v = torch.linalg.svd(vec, full_matrices=False)
                     if i == 0:
                         print(f"Computed SVD for {key}...")
-                        sum_u = torch.zeros_like(u, device=device)
-                        sum_s = torch.zeros_like(s, device=device)
-                        sum_v = torch.zeros_like(v, device=device)
+                        sum_u = torch.zeros_like(u, device=accelerator)
+                        sum_s = torch.zeros_like(s, device=accelerator)
+                        sum_v = torch.zeros_like(v, device=accelerator)
                     reduced_index_s = int(s.shape[0] * sv_reduction)
                     # select only the first reduced_index_s columns of u and place them
@@ -549,7 +548,7 @@ def compute_and_sum_svd_mem_reduction_rank_reduction(task_vectors, config):
                     else:
                         new_vector[key] += (vec - new_vector[key]) / (i + 1)
-            if len(task_vector.vector[key].shape) == 2 and "text_projection" not in key:
+            if len(task_vector[key].shape) == 2 and "text_projection" not in key:
                 new_vector[key] = torch.linalg.multi_dot(
                     (
                         sum_u,
@@ -557,26 +556,29 @@ def compute_and_sum_svd_mem_reduction_rank_reduction(task_vectors, config):
                         sum_v,
                     )
                 )
+            new_vector[key] = new_vector[key].to(original_device, non_blocking=True)
     return new_vector
-def compute_and_sum_svd_mem_reduction_dummy(task_vectors, config):
+def compute_and_sum_svd_mem_reduction_dummy(
+    task_vectors: List[StateDictType],
+    accelerator: torch.device = "cuda" if torch.cuda.is_available() else "cpu",
+):
     """To perform dummy operations."""
-    sv_reduction = 1 / 8
+    sv_reduction = 1 / len(task_vectors)
     print("Computing SVD...")
     with torch.no_grad():
         new_vector = {}
-        for key in task_vectors[0].vector:
+        for key in task_vectors[0]:
+            original_device = task_vectors[0][key].device
             new_vector[key] = {}
-            for i in range(0, 8):
-                if (
-                    len(task_vectors[0].vector[key].shape) == 2
-                    and "text_projection" not in key
-                ):
+            for i, task_vector in enumerate(task_vectors):
+                vec = task_vector[key].to(accelerator)
+                if len(task_vector[key].shape) == 2 and "text_projection" not in key:
                     if i == 0:
-                        u, s, v = torch.linalg.svd(
-                            task_vectors[0].vector[key], full_matrices=False
-                        )
+                        u, s, v = torch.linalg.svd(vec, full_matrices=False)
                         reduced_index_s = int(s.shape[0] * sv_reduction)
                         print(f"Computed SVD for {key}...")
@@ -620,16 +622,11 @@ def compute_and_sum_svd_mem_reduction_dummy(task_vectors, config):
                 else:
                     if i == 0:
-                        new_vector[key] = task_vectors[0].vector[key]
-                    # else:
-                    #     new_vector[key] += (
-                    #         task_vector.vector[key] - new_vector[key]
-                    #     ) / (i + 1)
+                        new_vector[key] = vec.clone()
+                    else:
+                        new_vector[key] += (vec - new_vector[key]) / (i + 1)
-            if (
-                len(task_vectors[0].vector[key].shape) == 2
-                and "text_projection" not in key
-            ):
+            if len(task_vector[key].shape) == 2 and "text_projection" not in key:
                 new_vector[key] = torch.linalg.multi_dot(
                     (
@@ -639,4 +636,5 @@ def compute_and_sum_svd_mem_reduction_dummy(task_vectors, config):
                     )
                 )
+            new_vector[key] = new_vector[key].to(original_device, non_blocking=True)
     return new_vector

fusion_bench/method/ties_merging/ties_merging.py CHANGED Viewed

@@ -1,3 +1,13 @@
+R"""
+Overview of Ties-Merging:
+1. Trim: For each task t, we trim the redundant parameters from the task vector $\tau_t$ to create $\hat{\tau}_t$ by keeping the top-k% values according to their magnitude and trimming the bottom $(100 - k)\%$ of the redundant parameters by resetting them to 0. This can be decomposed further as $\hat{\tau}_t = \hat{\gamma}_t \odot \hat{\mu}_t$.
+2. Elect: Next, we create an aggregate elected sign vector $\gamma_m$ for the merged model that resolves the disagreements in the sign for each parameter p across different models. To create the elected sign vector, we choose the sign with the highest total magnitude across all relevant models. For each parameter $p \in \{1, 2, \ldots, d\}$, we separate the values $\{\hat{\tau}_t^p\}_{t=1}^n$ based on their sign $(+1$ or $-1)$ and take their sum to calculate the total mass (i.e., total magnitude) in the positive and the negative direction. We then assign $\gamma_m^p$ as the sign with greater total movement. This can be efficiently computed using $\gamma_m^p = \text{sgn}(\sum_{t=1}^n \hat{\tau}_t^p)$.
+3. Disjoint Merge: Then, for each parameter p, we compute a disjoint mean by only keeping the parameter values from the models whose signs are the same as the aggregated elected sign and calculate their mean. Formally, let $A_p = \{t \in [n] \mid \hat{\gamma}_t^p = \gamma_m^p\}$, then $\tau_m^p = \frac{1}{|A_p|}\sum_{t\in A_p} \hat{\tau}_t^p$. Note that the disjoint mean always ignores the zero values.
+"""
 import logging
 from typing import Dict, List, Literal, Mapping, Union  # noqa: F401

fusion_bench/metrics/continual_learning/backward_transfer.py ADDED Viewed

@@ -0,0 +1,22 @@
+from typing import Dict
+import numpy as np
+def compute_backward_transfer(
+    acc_Ti: Dict[str, float], acc_ii: Dict[str, float]
+) -> float:
+    R"""
+    Compute the backward transfer (BWT) of a model on a set of tasks.
+    Equation:
+        BWT = \frac{1}{n} \sum_{k=1}^{n} (acc_{Ti}[k] - acc_{ii}[k])
+    Returns:
+        float: The backward transfer of the model.
+    """
+    assert set(acc_ii.keys()) == set(acc_Ti.keys())
+    bwt = 0
+    for task_name in acc_ii:
+        bwt += acc_Ti[task_name] - acc_ii[task_name]
+    return bwt / len(acc_ii)

fusion_bench/mixins/clip_classification.py CHANGED Viewed

@@ -161,12 +161,14 @@ class CLIPClassificationMixin(LightningFabricMixin):
                     cache_dir, os.path.normpath(f"{task}_zeroshot_weights.pt")
                 )
                 if os.path.exists(cache_file):
-                    log.info(f"Loading cached zeroshot weights for task: {task}")
                     zeroshot_weights = torch.load(
                         cache_file,
                         map_location="cpu",
                         weights_only=True,
                     ).detach()
+                    log.info(
+                        f"Loadded cached zeroshot weights for task: {task}, shape: {zeroshot_weights.shape}"
+                    )
                 else:
                     log.info(
                         f"Construct zero shot classification head for task: {task}"
@@ -180,6 +182,7 @@ class CLIPClassificationMixin(LightningFabricMixin):
             self.fabric.barrier()
             self.zeroshot_weights[task] = self.fabric.broadcast(zeroshot_weights, src=0)
             self.zeroshot_weights[task] = self.to_device(self.zeroshot_weights[task])
+            self.fabric.barrier()
         del clip_classifier
         if torch.cuda.is_available():

fusion_bench/programs/fabric_fusion_program.py CHANGED Viewed

@@ -103,12 +103,13 @@ class FabricModelFusionProgram(
             )
             if compat_load_fn is not None:
                 compat_load_fn = import_object(compat_load_fn)
-                print_bordered(
-                    OmegaConf.to_yaml(config),
-                    title="instantiate compat object",
-                    style="magenta",
-                    code_style="yaml",
-                )
+                if rank_zero_only.rank == 0:
+                    print_bordered(
+                        OmegaConf.to_yaml(config),
+                        title="instantiate compat object",
+                        style="magenta",
+                        code_style="yaml",
+                    )
                 obj = compat_load_fn(config)
             else:
                 raise ValueError(
@@ -159,7 +160,11 @@ class FabricModelFusionProgram(
             print("No save path specified for the merged model. Skipping saving.")
     def evaluate_merged_model(
-        self, taskpool: BaseTaskPool, merged_model: Union[nn.Module, Dict, Iterable]
+        self,
+        taskpool: BaseTaskPool,
+        merged_model: Union[nn.Module, Dict, Iterable],
+        *args,
+        **kwargs,
     ):
         """
         Evaluates the merged model using the provided task pool.
@@ -174,6 +179,8 @@ class FabricModelFusionProgram(
         Args:
             taskpool: The task pool used for evaluating the merged model.
             merged_model: The merged model to be evaluated. It can be an instance of `nn.Module`, a dictionary, or an iterable.
+            *args: Additional positional arguments to be passed to the `evaluate` method of the taskpool.
+            **kwargs: Additional keyword arguments to be passed to the `evaluate` method of the taskpool.
         Returns:
             The evaluation report. The type of the report depends on the type of the merged model:
@@ -182,20 +189,20 @@ class FabricModelFusionProgram(
             - If the merged model is an iterable, the report is a list of evaluation reports.
         """
         if isinstance(merged_model, nn.Module):
-            report = taskpool.evaluate(merged_model)
+            report = taskpool.evaluate(merged_model, *args, **kwargs)
             return report
         elif isinstance(merged_model, Dict):
             report = {}
             for key, item in merged_model.items():
                 if isinstance(item, nn.Module):
-                    report[key] = taskpool.evaluate(item)
+                    report[key] = taskpool.evaluate(item, *args, **kwargs)
                 else:
                     # metadata
                     report[key] = item
             return report
         elif isinstance(merged_model, Iterable):
             return [
-                self.evaluate_merged_model(taskpool, m)
+                self.evaluate_merged_model(taskpool, m, *args, **kwargs)
                 for m in tqdm(merged_model, desc="Evaluating models")
             ]
         else:
@@ -272,7 +279,11 @@ class FabricModelFusionProgram(
         """
         if self.log_dir is not None:
             # make symlink to the hydra output directory
-            hydra_output_dir = get_hydra_output_dir()
+            try:
+                hydra_output_dir = get_hydra_output_dir()
+            except Exception as e:
+                hydra_output_dir = None
             if hydra_output_dir is not None:
                 os.makedirs(self.log_dir, exist_ok=True)
                 try:

fusion_bench/scripts/cli.py CHANGED Viewed

@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
 """
 This is the CLI script that is executed when the user runs the `fusion-bench` command.
 The script is responsible for parsing the command-line arguments, loading the configuration file, and running the fusion algorithm.

fusion_bench/taskpool/base_pool.py CHANGED Viewed

@@ -7,7 +7,7 @@ class BaseTaskPool(BaseYAMLSerializableModel):
     _program = None
     @abstractmethod
-    def evaluate(self, model):
+    def evaluate(self, model, *args, **kwargs):
         """
         Evaluate the model on all tasks in the task pool, and return a report.

fusion_bench/taskpool/clip_vision/taskpool.py CHANGED Viewed

@@ -238,11 +238,13 @@ class CLIPVisionModelTaskPool(
         else:
             test_loader = test_loader
-        for batch in (
-            pbar := tqdm(
-                test_loader, desc="Evaluating", leave=False, dynamic_ncols=True
-            )
-        ):
+        pbar = tqdm(
+            test_loader,
+            desc=f"Evaluating {task_name}",
+            leave=False,
+            dynamic_ncols=True,
+        )
+        for batch in pbar:
             inputs, targets = batch
             outputs = classifier(
                 inputs,
@@ -309,11 +311,14 @@ class CLIPVisionModelTaskPool(
         }
         if name is not None:
             report["model_info"]["name"] = name
-        for task_name, test_dataloader in tqdm(
+        # evaluate on each task
+        pbar = tqdm(
             self.test_dataloaders.items(),
             desc="Evaluating tasks",
             total=len(self.test_dataloaders),
-        ):
+        )
+        for task_name, test_dataloader in pbar:
             classnames, templates = get_classnames_and_templates(task_name)
             self.on_task_evaluation_begin(classifier, task_name)
             classifier.set_classification_task(classnames, templates)

fusion_bench/utils/__init__.py CHANGED Viewed

@@ -6,7 +6,8 @@ from . import data, functools, path
 from .cache_utils import *
 from .devices import *
 from .dtype import parse_dtype
-from .instantiate import instantiate
+from .fabric import seed_everything_by_time
+from .instantiate import instantiate, is_instantiable
 from .misc import *
 from .packages import import_object
 from .parameters import *

fusion_bench/utils/dict.py ADDED Viewed

@@ -0,0 +1,43 @@
+from copy import deepcopy
+from typing import Iterable, List, Tuple, Union
+def dict_get(d: dict, keys: Iterable[str], default=None):
+    return [d.get(k, default) for k in keys]
+def dict_map(f, d: dict, *, max_level: int = -1, skip_levels=0, inplace=False):
+    """Apply function f to each element in dictionary d and return a new dictionary.
+    Args:
+        f (callable): function to apply
+        d (dict): input dictionary
+        max_level (int, optional): maximum depth to apply function, -1 means unlimited. Defaults to -1.
+        skip_levels (int, optional): number of levels to skip. Defaults to 0.
+        inplace (bool, optional): whether to modify input dictionary in place. Defaults to False.
+    Returns:
+        dict: transformed dictionary
+    """
+    if not isinstance(d, dict):
+        raise TypeError("dict_map: d must be a dict")
+    if inplace:
+        ans = d
+    else:
+        ans = deepcopy(d)
+    def dict_map_impl(from_dict, to_dict, level):
+        if level == max_level:
+            return
+        for k in from_dict.keys():
+            if isinstance(from_dict[k], dict):
+                dict_map_impl(from_dict[k], to_dict[k], level + 1)
+            else:
+                if level < skip_levels:
+                    continue
+                else:
+                    to_dict[k] = f(from_dict[k])
+    dict_map_impl(d, ans, 0)
+    return ans

fusion-bench 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl

fusion-bench 0.2.8py3-none-any.whl → 0.2.10py3-none-any.whl