PyPI - fusion-bench - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

fusion-bench 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

fusion_bench/compat/method/__init__.py +2 -0
fusion_bench/compat/method/base_algorithm.py +7 -2
fusion_bench/compat/modelpool/__init__.py +3 -2
fusion_bench/compat/taskpool/__init__.py +1 -1
fusion_bench/dataset/arc_agi/__init__.py +6 -1
fusion_bench/dataset/arc_agi/arc.py +26 -7
fusion_bench/dataset/arc_agi/arc_agi.py +156 -25
fusion_bench/dataset/arc_agi/np_cache.py +0 -1
fusion_bench/dataset/arc_agi/preprocess.py +51 -9
fusion_bench/dataset/llama/__init__.py +1 -0
fusion_bench/dataset/llama/alpaca.py +93 -3
fusion_bench/dataset/llama/collate.py +72 -5
fusion_bench/dataset/llama/metamathqa.py +50 -0
fusion_bench/dataset/llama/preference_700k.py +70 -0
fusion_bench/dataset/llama/stanford_shp.py +90 -0
fusion_bench/dataset/llama/ultrachat.py +58 -0
fusion_bench/dataset/llama/utils/__init__.py +0 -0
fusion_bench/method/__init__.py +4 -1
fusion_bench/method/adamerging/__init__.py +1 -1
fusion_bench/method/adamerging/layer_wise_adamerging.py +11 -4
fusion_bench/method/adamerging/min_norm_solvers.py +4 -4
fusion_bench/method/linear/expo.py +39 -0
fusion_bench/method/lm_finetune/__init__.py +1 -0
fusion_bench/method/lm_finetune/bradley_terry_rm.py +432 -0
fusion_bench/method/lm_finetune/fullfinetune_sft.py +122 -150
fusion_bench/method/lm_finetune/peftfinetune_sft.py +102 -157
fusion_bench/method/pruning/llama_magnitude_prune.py +2 -2
fusion_bench/method/pruning/llama_random_prune.py +2 -2
fusion_bench/method/pruning/magnitude_diff_pruning.py +2 -1
fusion_bench/method/rankone_moe/__init__.py +3 -0
fusion_bench/method/rankone_moe/clip_rankone_moe.py +160 -0
fusion_bench/method/rankone_moe/rankone_moe.py +249 -0
fusion_bench/method/simple_average.py +1 -1
fusion_bench/method/surgery/__init__.py +3 -0
fusion_bench/method/surgery/clip_layer_wise_adamerging_surgery.py +157 -0
fusion_bench/mixins/__init__.py +2 -0
fusion_bench/mixins/clip_classification.py +60 -12
fusion_bench/mixins/fabric_training.py +320 -0
fusion_bench/mixins/lightning_fabric.py +11 -2
fusion_bench/modelpool/__init__.py +2 -0
fusion_bench/modelpool/causal_lm/__init__.py +1 -1
fusion_bench/modelpool/causal_lm/causal_lm.py +21 -22
fusion_bench/modelpool/seq_classification_lm/__init__.py +2 -0
fusion_bench/modelpool/seq_classification_lm/reward_model.py +15 -0
fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py +98 -0
fusion_bench/models/chat_templates/__init__.py +1 -0
fusion_bench/models/chat_templates/llama_3_Instruct.py +1 -0
fusion_bench/models/chat_templates/load_tokenizer.py +43 -0
fusion_bench/models/hf_clip.py +50 -9
fusion_bench/models/rankone_moe.py +410 -0
fusion_bench/models/surgery/surgerymodelwrapper.py +157 -0
fusion_bench/models/utils.py +8 -0
fusion_bench/models/wrappers/layer_wise_fusion.py +14 -5
fusion_bench/models/wrappers/task_wise_fusion.py +5 -5
fusion_bench/optim/__init__.py +2 -0
fusion_bench/optim/exception.py +47 -0
fusion_bench/optim/lr_scheduler/__init__.py +1 -0
fusion_bench/optim/lr_scheduler/linear_warmup.py +222 -0
fusion_bench/optim/lr_scheduler/utils/__init__.py +1 -0
fusion_bench/optim/lr_scheduler/utils/visualization.py +119 -0
fusion_bench/optim/mezo.py +0 -2
fusion_bench/programs/fabric_fusion_program.py +5 -1
fusion_bench/taskpool/__init__.py +10 -2
fusion_bench/taskpool/clip_vision/__init__.py +1 -0
fusion_bench/taskpool/clip_vision/clip_rankone_moe_taskpool.py +112 -0
fusion_bench/taskpool/clip_vision/taskpool.py +43 -6
fusion_bench/taskpool/llama/reward_model.py +157 -0
fusion_bench/taskpool/nyuv2_taskpool.py +2 -0
fusion_bench/tasks/flan_t5_text_generation/glue_load_dataset.py +2 -1
fusion_bench/utils/hydra_utils.py +22 -0
fusion_bench/utils/plot/__init__.py +0 -0
fusion_bench/utils/plot/token.py +52 -0
fusion_bench/utils/plot/token_notebook.py +127 -0
fusion_bench/utils/type.py +5 -3
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/METADATA +1 -1
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/RECORD +104 -57
fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml +1 -1
fusion_bench_config/dataset/llm_sft/alpaca_cleaned.yaml +6 -0
fusion_bench_config/dataset/llm_sft/ultrachat_200k.yaml +3 -0
fusion_bench_config/fabric/llama_peft_fsdp.yaml +16 -0
fusion_bench_config/fabric/loggers/wandb_logger.yaml +2 -0
fusion_bench_config/fabric/strategy/deepspeed.yaml +10 -0
fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml +9 -0
fusion_bench_config/fabric_model_fusion.yaml +1 -1
fusion_bench_config/llama_full_finetune.yaml +19 -0
fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml +47 -0
fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml +13 -6
fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml +17 -9
fusion_bench_config/method/rankone_moe/rankone_moe.yaml +26 -0
fusion_bench_config/method/regmean/clip_regmean.yaml +1 -0
fusion_bench_config/method/surgery/adamerging_surgery.yaml +27 -0
fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml +21 -0
fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml +21 -0
fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml +19 -0
fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml +18 -0
fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml +23 -0
fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml +14 -0
fusion_bench_config/nyuv2_config.yaml +5 -1
fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml +18 -0
fusion_bench_config/taskpool/reward_model_evaluation.yaml +18 -0
fusion_bench_config/llama_weighted_average.yaml +0 -26
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/LICENSE +0 -0
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/WHEEL +0 -0
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/entry_points.txt +0 -0
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/top_level.txt +0 -0

fusion_bench/method/lm_finetune/peftfinetune_sft.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import functools
 import itertools
 import logging
 import os
@@ -10,16 +11,16 @@ import peft
 import torch
 from lightning.fabric.strategies.fsdp import FSDPStrategy
 from lightning.fabric.utilities import rank_zero_only
-from omegaconf import DictConfig
-from peft import PeftModel, get_peft_config, get_peft_model
+from omegaconf import DictConfig, OmegaConf
+from peft import LoraConfig, PeftModel, get_peft_config, get_peft_model
 from torch import nn
-from torch.utils.data import DataLoader, ConcatDataset
+from torch.utils.data import ConcatDataset, DataLoader
 from tqdm.auto import tqdm
 from typing_extensions import TYPE_CHECKING, override
 from fusion_bench import BaseAlgorithm, BaseModelPool
 from fusion_bench.dataset.llama.collate import padded_collate_sft
-from fusion_bench.mixins import LightningFabricMixin
+from fusion_bench.mixins import FabricTrainingMixin
 from fusion_bench.modelpool import CausalLMPool
 from fusion_bench.utils import instantiate
 from fusion_bench.utils.dtype import get_dtype
@@ -35,7 +36,7 @@ if TYPE_CHECKING:
 log = logging.getLogger(__name__)
-class PeftFinetuneSFT(BaseAlgorithm, LightningFabricMixin):
+class PeftFinetuneSFT(BaseAlgorithm, FabricTrainingMixin):
     model: Union[
         nn.Module, "_FabricModule", "LlamaForCausalLM", PeftModel, peft.LoraModel
@@ -65,7 +66,9 @@ class PeftFinetuneSFT(BaseAlgorithm, LightningFabricMixin):
         gradient_clip_algorithm: Literal["value", "norm"] = "norm",
         save_optimizer_state: bool = False,
         save_full_model: bool = False,
+        save_ckpt_type: Literal["lightning", "peft"] = "peft",
         ckpt_path: Optional[str] = None,
+        max_length: int = 6144,
         **kwargs,
     ):
         """
@@ -90,6 +93,7 @@ class PeftFinetuneSFT(BaseAlgorithm, LightningFabricMixin):
             gradient_clip_algorithm(str): Algorithm to use for gradient clipping. Available options: 'value', 'norm'. If set to 'value', the gradients will be clipped to the specified value. If set to 'norm', the gradients will be clipped to the specified norm.
             save_optimizer_state(bool): Whether to save the optimizer and lr_scheduler state along with the model checkpoint.
             save_full_model(bool): Whether to save the full model or only the trainable parameters in the model checkpoint.
+            save_ckpt_type(str): Type of checkpoint to save. Available options: 'lightning', 'peft'. If set to 'lightning', the model will be saved using the Lightning checkpointing mechanism. If set to 'peft', the model will be saved using the PEFT checkpointing mechanism.
             ckpt_path(str): Path to the checkpoint to load before training. If set to None, no checkpoint will be loaded.
         """
         self._optimizer = optimizer
@@ -110,23 +114,31 @@ class PeftFinetuneSFT(BaseAlgorithm, LightningFabricMixin):
         self.gradient_clip_algorithm = gradient_clip_algorithm
         self.save_optimizer_state = save_optimizer_state
         self.save_full_model = save_full_model
+        self.save_ckpt_type = save_ckpt_type
         self.ckpt_path = ckpt_path
+        self.max_length = max_length
         super().__init__(**kwargs)
     def run(self, modelpool: CausalLMPool):
         self.modelpool = modelpool
         self.setup()
-        self.train()
+        self.train(self.model, self.optimizer, self.lr_scheduler)
         if self.merge_and_unload:
             self.model = self.model.merge_and_unload()
         return self.model
     def setup_model(self):
+        # https://github.com/Lightning-AI/litgpt/blob/main/litgpt/finetune/lora.py
+        self.tokenizer = self.modelpool.load_tokenizer()
+        if self.tokenizer.pad_token_id is None:
+            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
         model = self.modelpool.load_pretrained_model()
         # get the PEFT model
-        peft_config = get_peft_config(self._peft_config)
+        peft_config = instantiate(self._peft_config, _convert_="all")
+        peft_config.save_pretrained(os.path.join(self.log_dir, "peft_config"))
         peft_model = get_peft_model(model, peft_config, self.adapter_name)
         peft_model.print_trainable_parameters()
@@ -139,21 +151,16 @@ class PeftFinetuneSFT(BaseAlgorithm, LightningFabricMixin):
             self.model.gradient_checkpointing_enable(
                 gradient_checkpointing_kwargs={"use_reentrant": True}
             )
+            self.use_cache = False
+        else:
+            self.use_cache = True
         self.model_dtype = get_dtype(self.model)
+        self.model = self.model.to(dtype=self.model_dtype)
     def configure_optimizer(self):
         # compute expected total steps
-        self.expected_total_steps = []
-        if self.max_steps > 0:
-            self.expected_total_steps.append(self.max_steps)
-        if self.max_steps_per_epoch > 0 and self.max_epochs > 0:
-            self.expected_total_steps.append(self.max_steps_per_epoch * self.max_epochs)
-        if self.max_epochs > 0:
-            self.expected_total_steps.append(
-                len(self.train_dataloader) * self.max_epochs
-            )
-        self.expected_total_steps = min(self.expected_total_steps)
-        log.info(f"Expected total steps: {self.expected_total_steps}")
+        self.compute_expected_total_steps(self.train_dataloader)
         optimizer = instantiate(self._optimizer, self.model.parameters())
         if self._lr_scheduler is not None:
@@ -192,7 +199,9 @@ class PeftFinetuneSFT(BaseAlgorithm, LightningFabricMixin):
             train_dataset,
             **self.dataloader_kwargs,
             shuffle=True,
-            collate_fn=padded_collate_sft,
+            collate_fn=functools.partial(
+                padded_collate_sft, pad_token_id=self.tokenizer.pad_token_id
+            ),
         )
         self.train_dataloader = fabric.setup_dataloaders(self.train_dataloader)
@@ -205,28 +214,19 @@ class PeftFinetuneSFT(BaseAlgorithm, LightningFabricMixin):
         optimizer = self.configure_optimizer()
         optimizer, lr_scheduler = optimizer["optimizer"], optimizer["lr_scheduler"]
-        self.model, self.optimizer = fabric.setup(self.model, optimizer)
+        self.model = self.fabric.setup_module(self.model)
+        self.optimizer = self.fabric.setup_optimizers(optimizer)
         self.lr_scheduler = lr_scheduler
-    def _clip_gradients_if_needed(self):
+    @override
+    def train_epoch(self, *args, **kwargs):
         fabric = self.fabric
-        if self.gradient_clip_val is not None:
-            if self.gradient_clip_algorithm == "value":
-                fabric.clip_gradients(self.model, clip_val=self.gradient_clip_val)
-            elif self.gradient_clip_algorithm == "norm":
-                fabric.clip_gradients(self.model, max_norm=self.gradient_clip_val)
-            else:
-                raise ValueError(
-                    f"Unknown gradient clip algorithm: {self.gradient_clip_algorithm}. Available options: 'value', 'norm'"
-                )
-    def train_epoch(self):
-        fabric = self.fabric
+        accumulated_loss = 0
         for step_idx, batch in enumerate(
             pbar := tqdm(
                 self.train_dataloader,
-                desc="Training Steps",
+                desc="Training Batches",
                 dynamic_ncols=True,
                 leave=False,
                 disable=not fabric.is_global_zero,
@@ -234,24 +234,30 @@ class PeftFinetuneSFT(BaseAlgorithm, LightningFabricMixin):
         ):
             is_accumulating = (step_idx + 1) % self.accumulate_grad_batches != 0
+            if self.max_length > 0 and batch["input_ids"].shape[1] > self.max_length:
+                log.warning(
+                    f"Input length exceeds max_length: {batch['input_ids'].shape[1]} > {self.max_length}. Truncating input."
+                )
+                batch["input_ids"] = batch["input_ids"][:, : self.max_length]
+                batch["attention_mask"] = batch["attention_mask"][:, : self.max_length]
+                batch["labels"] = batch["labels"][:, : self.max_length]
             # disable gradient synchronization if accumulating gradients across steps for improved performance
             with fabric.no_backward_sync(self.model, enabled=is_accumulating):
                 # use_cache=True is not compatible with gradient checkpointing, so we disable it here
-                output = self.model(**batch, use_cache=False)
-                loss = output["loss"]
+                output = self.model(
+                    input_ids=batch["input_ids"],
+                    attention_mask=batch["attention_mask"],
+                    labels=batch["labels"],
+                    use_cache=self.use_cache,
+                )
+                loss = output["loss"] / self.accumulate_grad_batches
                 fabric.backward(loss)
-            metrics = {
-                "train/loss": loss.item(),
-                "train/epoch_idx": self.epoch_idx,
-                "train/lr": self.optimizer.param_groups[0]["lr"],
-            }
-            fabric.log_dict(metrics, step=self.global_step_idx)
-            pbar.set_postfix(metrics)
+                accumulated_loss += loss.item()
             if not is_accumulating:
-                self._clip_gradients_if_needed()
+                self.clip_gradients_if_needed(self.model, self.optimizer)
                 # run lr_scheduler at the end of the step if interval is set to "step"
                 if (
@@ -264,104 +270,30 @@ class PeftFinetuneSFT(BaseAlgorithm, LightningFabricMixin):
                 self.optimizer.step()
                 self.optimizer.zero_grad()
-            # save the model at the end of the step if interval is set to "step" and frequency is met
-            self._try_save_checkpoint(stage="end_of_step")
+                metrics = {
+                    "train/loss": accumulated_loss,
+                    "train/epoch_idx": self.epoch_idx,
+                    "train/lr": self.optimizer.param_groups[0]["lr"],
+                }
+                fabric.log_dict(metrics, step=self.global_step_idx)
+                pbar.set_postfix(metrics)
-            # break if max_steps_per_epoch is set, and exit epoch
-            if (
-                self.max_steps_per_epoch > 0
-                and step_idx + 1 >= self.max_steps_per_epoch
-            ):
-                break
-            # break if max_steps is set, and exit training
-            if self.max_steps > 0 and self.global_step_idx >= self.max_steps:
-                self.is_training = False
-                break
+                # save the model at the end of the step if interval is set to "step" and frequency is met
+                self.conditional_checkpoint_save(stage="end_of_step")
-            self.global_step_idx += 1
+                # break if max_steps_per_epoch is set, and exit epoch
+                if (
+                    self.max_steps_per_epoch > 0
+                    and step_idx + 1 >= self.max_steps_per_epoch
+                ):
+                    break
+                # break if max_steps is set, and exit training
+                if self.max_steps > 0 and self.global_step_idx >= self.max_steps - 1:
+                    self.is_training = False
+                    break
-    def train(self):
-        fabric = self.fabric
-        self.is_training = True
-        self.global_step_idx = 0
-        self.model.train()
-        for epoch_idx in tqdm(
-            range(self.max_epochs) if self.max_epochs > 0 else itertools.count(0),
-            "Training Epoch",
-            dynamic_ncols=True,
-            leave=False,
-            disable=not fabric.is_global_zero,
-        ):
-            self.epoch_idx = epoch_idx
-            self.train_epoch()
-            # run lr_scheduler at the end of the epoch if interval is set to "epoch"
-            if (
-                self.lr_scheduler_interval == "epoch"
-                and (epoch_idx + 1) % self.lr_scheduler_frequency == 0
-            ):
-                self.lr_scheduler.step()
-            # save the model at the end of the epoch if interval is set to "epoch" and frequency is met
-            self._try_save_checkpoint(stage="end_of_epoch")
-            if not self.is_training:
-                break
-        # save the model at the end of training
-        self._try_save_checkpoint(stage="end_of_training")
-    def _try_save_checkpoint(
-        self, stage: Literal["end_of_step", "end_of_epoch", "end_of_training"]
-    ):
-        if stage == "end_of_step":
-            if (
-                self.checkpoint_save_interval == "step"
-                and (self.global_step_idx + 1) % self.checkpoint_save_frequency == 0
-            ):
-                self.save_checkpoint(
-                    os.path.join(
-                        self.log_dir, "checkpoints", f"step={self.global_step_idx}.ckpt"
-                    )
-                )
-        elif stage == "end_of_epoch":
-            if (
-                self.checkpoint_save_interval == "epoch"
-                and (self.epoch_idx + 1) % self.checkpoint_save_frequency == 0
-            ):
-                self.save_checkpoint(
-                    os.path.join(
-                        self.log_dir, "checkpoints", f"epoch={self.epoch_idx}.ckpt"
-                    )
-                )
-        elif stage == "end_of_training":
-            # if the checkpoint has not been saved yet, save it
-            if self.global_step_idx > self._latest_saved_checkpoint_global_step:
-                self.save_checkpoint(
-                    os.path.join(
-                        self.log_dir,
-                        "checkpoints",
-                        f"epoch={self.epoch_idx}_step={self.global_step_idx}.ckpt",
-                    )
-                )
-                try:
-                    os.symlink(
-                        os.path.join(
-                            self.log_dir,
-                            "checkpoints",
-                            "latest_model.ckpt",
-                        ),
-                        os.path.join(
-                            self.log_dir,
-                            "checkpoints",
-                            f"epoch={self.epoch_idx}_step={self.global_step_idx}.ckpt",
-                        ),
-                    )
-                except Exception as e:
-                    pass
-        else:
-            raise ValueError(
-                f"Unknown stage: {stage}. Available options: 'end_of_step', 'end_of_epoch', 'end_of_training'"
-            )
+                self.global_step_idx += 1
+                accumulated_loss = 0
     def save_checkpoint(
         self,
@@ -373,24 +305,37 @@ class PeftFinetuneSFT(BaseAlgorithm, LightningFabricMixin):
             return log.warning(f"Checkpoint already exists at {path}. Skipping save.")
         fabric = self.fabric
-        state = {"model": self.model}
-        # save the optimizer and lr_scheduler state if needed
-        if self.save_optimizer_state and save_optimizer_state is not False:
-            state.update(
-                {
-                    "optimizer": self.optimizer,
-                    "lr_scheduler": self.lr_scheduler,
-                    "global_step_idx": self.global_step_idx,
-                    "epoch_idx": self.epoch_idx,
-                }
+        if self.save_ckpt_type == "lightning":
+            state = {"model": self.model}
+            # save the optimizer and lr_scheduler state if needed
+            if self.save_optimizer_state and save_optimizer_state is not False:
+                state.update(
+                    {
+                        "optimizer": self.optimizer,
+                        "lr_scheduler": self.lr_scheduler,
+                        "global_step_idx": self.global_step_idx,
+                        "epoch_idx": self.epoch_idx,
+                    }
+                )
+            trainable_param_names = set(
+                name
+                for name, param in self.model.state_dict(keep_vars=True).items()
+                if param.requires_grad
+            )
+            filter = (
+                None
+                if self.save_full_model
+                else {"model": lambda k, p: k in trainable_param_names}
+            )
+            os.makedirs(os.path.dirname(path), exist_ok=True)
+            fabric.save(path, state=state, filter=filter)
+        elif self.save_ckpt_type == "peft":
+            self.model.save_pretrained(path, is_main_process=fabric.is_global_zero)
+        else:
+            raise ValueError(
+                f"Unknown save_ckpt_type: {self.save_ckpt_type}. Available options: 'lightning', 'peft'"
             )
-        filter = (
-            None if self.save_full_model else {"model": lambda k, p: p.requires_grad}
-        )
-        fabric.save(path, state=state, filter=filter)
         self._latest_saved_checkpoint_global_step = self.global_step_idx
     def load_checkpoint(self, path: Union[str, Path]):

fusion_bench/method/pruning/llama_magnitude_prune.py CHANGED Viewed

@@ -1,7 +1,7 @@
-from typing import Literal, Optional, Union
+from typing import Dict, Literal, Optional, Union
 import torch
-from torch import Dict, nn
+from torch import nn
 from tqdm.auto import tqdm
 from transformers import LlamaForCausalLM, LlamaModel

fusion_bench/method/pruning/llama_random_prune.py CHANGED Viewed

@@ -1,7 +1,7 @@
-from typing import Literal, Optional, Union  # noqa: F401
+from typing import Dict, Literal, Optional, Union  # noqa: F401
 import torch
-from torch import Dict, nn
+from torch import nn
 from tqdm.auto import tqdm
 from transformers import LlamaForCausalLM, LlamaModel

fusion_bench/method/pruning/magnitude_diff_pruning.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import functools
 import logging
 import re
 from copy import deepcopy
@@ -10,7 +11,7 @@ from tqdm.auto import tqdm
 from fusion_bench.method import BaseAlgorithm
 from fusion_bench.mixins.simple_profiler import SimpleProfilerMixin
 from fusion_bench.modelpool import BaseModelPool
-import functools
 from .prune_utils import unstructured_magnitude_prune_
 log = logging.getLogger(__name__)

fusion_bench/method/rankone_moe/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+# flake8: noqa F401
+from .clip_rankone_moe import CLIPRankOneMoEAlgorithm
+from .rankone_moe import RankOneMoEAlgorithm

fusion_bench/method/rankone_moe/clip_rankone_moe.py ADDED Viewed

@@ -0,0 +1,160 @@
+import functools
+import logging
+import os
+from copy import deepcopy
+import torch
+from torch import Tensor
+from torch.utils.data import DataLoader
+from transformers.models.clip.modeling_clip import CLIPEncoder
+from fusion_bench.dataset import CLIPDataset
+from fusion_bench.method.task_arithmetic.task_arithmetic import task_arithmetic_merge
+from fusion_bench.mixins import CLIPClassificationMixin
+from fusion_bench.modelpool import CLIPVisionModelPool
+from fusion_bench.models.rankone_moe import RankOneMoE
+from fusion_bench.utils.data import InfiniteDataLoader
+from .rankone_moe import RankOneMoEAlgorithm
+log = logging.getLogger(__name__)
+class CLIPRankOneMoEAlgorithm(
+    RankOneMoEAlgorithm,
+    CLIPClassificationMixin,
+):
+    """
+    CLIPRankOneMoEAlgorithm is a class that implements the RankOneMoEAlgorithm (https://github.com/EnnengYang/RankOne-MoE)
+    for CLIP models. It extends the RankOneMoEAlgorithm and CLIPClassificationMixin classes.
+    Attributes:
+        modelpool (CLIPVisionModelPool): The model pool containing the CLIP models.
+    """
+    modelpool: CLIPVisionModelPool = None
+    def load_checkpoint(self, model, checkpoint):
+        """
+        Load the checkpoint file.
+        Args:
+            model: The model to load the checkpoint into.
+            checkpoint: The path to the checkpoint file.
+        """
+        state = {"model": model}
+        self._fabric.load(checkpoint, state)
+    def save_checkpoint(self, model, checkpoint):
+        """
+        Save the checkpoint file.
+        Args:
+            model: The model to save the checkpoint from.
+            checkpoint: The path to the checkpoint file.
+        """
+        self._fabric.save(checkpoint, {"model": model})
+    def construct_moe_model(self) -> RankOneMoE:
+        """
+        Construct the RankOne-MoE model using the models in the model pool.
+        Returns:
+            RankOne-MoE: The constructed MoE model.
+        """
+        base_model = self.modelpool.load_model("_pretrained_")
+        expert_models = [
+            self.modelpool.load_model(m) for m in self.modelpool.model_names
+        ]
+        # Merge the models using task arithmetic
+        moe_model = task_arithmetic_merge(
+            # This function modifies the model in place, so we need to pass a deepcopy
+            deepcopy(base_model),
+            expert_models,
+            scaling_factor=self.config.init_lambda,
+        ).requires_grad_(False)
+        # Up-scale MLP modules
+        base_encoder: CLIPEncoder = base_model.vision_model.encoder
+        moe_encoder: CLIPEncoder = moe_model.vision_model.encoder
+        expert_encoders = [m.vision_model.encoder for m in expert_models]
+        num_layers = len(base_encoder.layers)
+        for layer_idx in range(num_layers):
+            base_mlp = base_encoder.layers[layer_idx].mlp
+            expert_mlps = [e.layers[layer_idx].mlp for e in expert_encoders]
+            moe_encoder.layers[layer_idx].mlp = RankOneMoE(
+                hidden_size=base_encoder.config.hidden_size,
+                base_model=base_mlp,
+                expert_models=expert_mlps,
+                init_lambda=self.config.init_lambda,
+                batch_first=True,  # For open_clip models this is False
+                router_hidden_layers=self.config.router_hidden_layers,
+                batch_reduce=self.config.batch_reduce,
+                svd_accelerator=self.config.svd_accelerator,
+                rank_k=self.config.rank_k,
+                select_k=self.config.select_k,
+            )
+        return moe_model
+    @functools.cache
+    def get_shuffled_test_loader_iter(self, tta_dataset: str):
+        """
+        Get an iterator for the shuffled test data loader.
+        Args:
+            tta_dataset (str): The name of the test-time adaptation dataset.
+        Returns:
+            Iterator: An iterator for the shuffled test data loader.
+        """
+        dataset = self.modelpool.load_test_dataset(tta_dataset)
+        dataset = CLIPDataset(dataset, processor=self.clip_processor)
+        log.info("get_shuffled_test_loader_iter")
+        loader = DataLoader(
+            dataset,
+            batch_size=self.config.batch_size,
+            shuffle=True,
+            num_workers=self.config.num_workers,
+            pin_memory=True,
+        )
+        loader = self.fabric.setup_dataloaders(loader)
+        return iter(InfiniteDataLoader(loader))
+    def on_test_time_adaptation_start(self):
+        """
+        Load the CLIP processor and construct the zero-shot classification head for each task.
+        """
+        self.setup_zero_shot_classification_head()
+    def compute_logits(self, module, batch, task) -> Tensor:
+        """
+        Compute the logits for the given batch and task.
+        Args:
+            module: The model module.
+            batch: The input batch.
+            task: The task name.
+        Returns:
+            Tensor: The computed logits.
+        """
+        images, _ = batch
+        text_embeds = self.zeroshot_weights[task]
+        image_embeds = module(images)[1]
+        image_embeds = self.visual_projection(image_embeds)
+        # Normalize embeddings
+        image_embeds = image_embeds / image_embeds.norm(p=2, dim=-1, keepdim=True)
+        # Cosine similarity
+        logits_per_text = (
+            torch.matmul(text_embeds, image_embeds.t()) * self.logit_scale_exp
+        )
+        logits_per_image = logits_per_text.t()
+        return logits_per_image

fusion-bench 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

fusion-bench 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl