PyPI - fusion-bench - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

fusion-bench 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

fusion_bench/compat/method/__init__.py +2 -0
fusion_bench/compat/method/base_algorithm.py +7 -2
fusion_bench/compat/modelpool/__init__.py +3 -2
fusion_bench/compat/taskpool/__init__.py +1 -1
fusion_bench/dataset/arc_agi/__init__.py +6 -1
fusion_bench/dataset/arc_agi/arc.py +26 -7
fusion_bench/dataset/arc_agi/arc_agi.py +156 -25
fusion_bench/dataset/arc_agi/np_cache.py +0 -1
fusion_bench/dataset/arc_agi/preprocess.py +51 -9
fusion_bench/dataset/llama/__init__.py +1 -0
fusion_bench/dataset/llama/alpaca.py +93 -3
fusion_bench/dataset/llama/collate.py +72 -5
fusion_bench/dataset/llama/metamathqa.py +50 -0
fusion_bench/dataset/llama/preference_700k.py +70 -0
fusion_bench/dataset/llama/stanford_shp.py +90 -0
fusion_bench/dataset/llama/ultrachat.py +58 -0
fusion_bench/dataset/llama/utils/__init__.py +0 -0
fusion_bench/method/__init__.py +4 -1
fusion_bench/method/adamerging/__init__.py +1 -1
fusion_bench/method/adamerging/layer_wise_adamerging.py +11 -4
fusion_bench/method/adamerging/min_norm_solvers.py +4 -4
fusion_bench/method/linear/expo.py +39 -0
fusion_bench/method/lm_finetune/__init__.py +1 -0
fusion_bench/method/lm_finetune/bradley_terry_rm.py +432 -0
fusion_bench/method/lm_finetune/fullfinetune_sft.py +122 -150
fusion_bench/method/lm_finetune/peftfinetune_sft.py +102 -157
fusion_bench/method/pruning/llama_magnitude_prune.py +2 -2
fusion_bench/method/pruning/llama_random_prune.py +2 -2
fusion_bench/method/pruning/magnitude_diff_pruning.py +2 -1
fusion_bench/method/rankone_moe/__init__.py +3 -0
fusion_bench/method/rankone_moe/clip_rankone_moe.py +160 -0
fusion_bench/method/rankone_moe/rankone_moe.py +249 -0
fusion_bench/method/simple_average.py +1 -1
fusion_bench/method/surgery/__init__.py +3 -0
fusion_bench/method/surgery/clip_layer_wise_adamerging_surgery.py +157 -0
fusion_bench/mixins/__init__.py +2 -0
fusion_bench/mixins/clip_classification.py +60 -12
fusion_bench/mixins/fabric_training.py +320 -0
fusion_bench/mixins/lightning_fabric.py +11 -2
fusion_bench/modelpool/__init__.py +2 -0
fusion_bench/modelpool/causal_lm/__init__.py +1 -1
fusion_bench/modelpool/causal_lm/causal_lm.py +21 -22
fusion_bench/modelpool/seq_classification_lm/__init__.py +2 -0
fusion_bench/modelpool/seq_classification_lm/reward_model.py +15 -0
fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py +98 -0
fusion_bench/models/chat_templates/__init__.py +1 -0
fusion_bench/models/chat_templates/llama_3_Instruct.py +1 -0
fusion_bench/models/chat_templates/load_tokenizer.py +43 -0
fusion_bench/models/hf_clip.py +50 -9
fusion_bench/models/rankone_moe.py +410 -0
fusion_bench/models/surgery/surgerymodelwrapper.py +157 -0
fusion_bench/models/utils.py +8 -0
fusion_bench/models/wrappers/layer_wise_fusion.py +14 -5
fusion_bench/models/wrappers/task_wise_fusion.py +5 -5
fusion_bench/optim/__init__.py +2 -0
fusion_bench/optim/exception.py +47 -0
fusion_bench/optim/lr_scheduler/__init__.py +1 -0
fusion_bench/optim/lr_scheduler/linear_warmup.py +222 -0
fusion_bench/optim/lr_scheduler/utils/__init__.py +1 -0
fusion_bench/optim/lr_scheduler/utils/visualization.py +119 -0
fusion_bench/optim/mezo.py +0 -2
fusion_bench/programs/fabric_fusion_program.py +5 -1
fusion_bench/taskpool/__init__.py +10 -2
fusion_bench/taskpool/clip_vision/__init__.py +1 -0
fusion_bench/taskpool/clip_vision/clip_rankone_moe_taskpool.py +112 -0
fusion_bench/taskpool/clip_vision/taskpool.py +43 -6
fusion_bench/taskpool/llama/reward_model.py +157 -0
fusion_bench/taskpool/nyuv2_taskpool.py +2 -0
fusion_bench/tasks/flan_t5_text_generation/glue_load_dataset.py +2 -1
fusion_bench/utils/hydra_utils.py +22 -0
fusion_bench/utils/plot/__init__.py +0 -0
fusion_bench/utils/plot/token.py +52 -0
fusion_bench/utils/plot/token_notebook.py +127 -0
fusion_bench/utils/type.py +5 -3
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/METADATA +1 -1
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/RECORD +104 -57
fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml +1 -1
fusion_bench_config/dataset/llm_sft/alpaca_cleaned.yaml +6 -0
fusion_bench_config/dataset/llm_sft/ultrachat_200k.yaml +3 -0
fusion_bench_config/fabric/llama_peft_fsdp.yaml +16 -0
fusion_bench_config/fabric/loggers/wandb_logger.yaml +2 -0
fusion_bench_config/fabric/strategy/deepspeed.yaml +10 -0
fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml +9 -0
fusion_bench_config/fabric_model_fusion.yaml +1 -1
fusion_bench_config/llama_full_finetune.yaml +19 -0
fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml +47 -0
fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml +13 -6
fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml +17 -9
fusion_bench_config/method/rankone_moe/rankone_moe.yaml +26 -0
fusion_bench_config/method/regmean/clip_regmean.yaml +1 -0
fusion_bench_config/method/surgery/adamerging_surgery.yaml +27 -0
fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml +21 -0
fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml +21 -0
fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml +19 -0
fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml +18 -0
fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml +23 -0
fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml +14 -0
fusion_bench_config/nyuv2_config.yaml +5 -1
fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml +18 -0
fusion_bench_config/taskpool/reward_model_evaluation.yaml +18 -0
fusion_bench_config/llama_weighted_average.yaml +0 -26
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/LICENSE +0 -0
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/WHEEL +0 -0
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/entry_points.txt +0 -0
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/top_level.txt +0 -0

fusion_bench_config/method/rankone_moe/rankone_moe.yaml ADDED Viewed

@@ -0,0 +1,26 @@
+name: ??? # this can be
+# the path for loading the model weights, if specified, skip the test-time adaptation training
+checkpoint: False
+# the path for saving the model weights.
+save_checkpoint: False
+router_hidden_layers: 1
+init_lambda: 0.3
+batch_reduce: true
+# device to compute svd
+svd_accelerator: cuda
+rank_k: 32 # How many experts are added to the pool per task?
+select_k: -1  # How many experts are selected from the pool to merge? Range is (1, rank_k*task_num). In particular -1: All the experts in the pool
+# learning rate
+lr: 1e-4
+optimizer: adam
+# this is overrided by `fabric.devices` if launched from the `fusion_bench` CLI.
+devices: 1
+batch_size: 16
+num_workers: 16
+max_steps: 1000 # default: 1000
+# if true, we will use the gradient accumulation across tasks to save memory
+use_grad_accumulate: true
+cache_dir: outputs
+fast_dev_run: ${fast_dev_run}

fusion_bench_config/method/regmean/clip_regmean.yaml CHANGED Viewed

@@ -3,6 +3,7 @@ _target_: fusion_bench.method.RegMeanAlgorithmForCLIP
 exclude_param_names_regex: []
 # numbers of examples to compute regmean weights
 num_regmean_examples: 256
+weight_transpose: true
 # float, reduce non-diagonal elements in regmean weights by multiplying this scalar
 reduce_non_diagonal_ratio: 0.6
 dataloader_kwargs:

fusion_bench_config/method/surgery/adamerging_surgery.yaml ADDED Viewed

@@ -0,0 +1,27 @@
+# this option can be "clip_task_wise_adamerging"
+name: clip_layer_wise_adamerging_surgery
+# this weights can be a list of float, or a string that points to a *.np, *.pt file containing the weights
+# if weights is specified, skip the test-time adaptation training
+weights: null
+# learning rate
+optimizer: adam
+lr: 1e-3
+init_values: 0.3
+# if `clamp_weights` is true, the weights will be clamped to [0, 1]
+clamp_weights: false
+# arguments of `functional_call`
+tie_weights: true
+strict: false
+# this is overrided by `fabric.devices` if launched from the `fusion_bench` CLI.
+devices: 1
+batch_size: 16
+num_workers: 8
+max_steps: 1000
+fast_dev_run: ${fast_dev_run}
+# the path for saving the merging weights
+save_merging_weights: 'merging_weights.pt'
+cache_dir: outputs
+# parameters of Surgery
+eval_iterations: 200
+surgery_steps: 1000

fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml ADDED Viewed

@@ -0,0 +1,21 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
+models:
+  _pretrained_:
+    _target_: transformers.AutoModelForCausalLM.from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
+train_datasets:
+  alpaca-cleaned:
+    _target_: fusion_bench.dataset.llama.alpaca.load_tokenized_alpaca_dataset
+    tokenizer: ${...tokenizer}
+    path: "yahma/alpaca-cleaned"
+    split: train
+    cache_path: null

fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml ADDED Viewed

@@ -0,0 +1,21 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
+models:
+  _pretrained_:
+    _target_: transformers.AutoModelForCausalLM.from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
+train_datasets:
+  codealpaca:
+    _target_: fusion_bench.dataset.llama.alpaca.load_tokenized_alpaca_dataset
+    tokenizer: ${...tokenizer}
+    path: sahil2801/CodeAlpaca-20k
+    split: train
+    cache_path: null

fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml ADDED Viewed

@@ -0,0 +1,19 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
+models:
+  _pretrained_:
+    _target_: transformers.AutoModelForCausalLM.from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
+train_datasets:
+  metamathqa:
+    _target_: fusion_bench.dataset.llama.metamathqa.load_tokenized_metamathqa
+    tokenizer: ${...tokenizer}
+    cache_path: null

fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml ADDED Viewed

@@ -0,0 +1,18 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+pretrained_model_name_or_path: meta-llama/Llama-3-1B-Instruct
+models:
+  _pretrained_:
+    _target_: transformers.AutoModelForCausalLM.from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
+train_datasets:
+  ultrachat-200k:
+    _target_: fusion_bench.dataset.llama.ultrachat.load_tokenized_ultrachat_200k
+    tokenizer: ${...tokenizer}

fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml ADDED Viewed

@@ -0,0 +1,23 @@
+_target_: fusion_bench.modelpool.SeqenceClassificationModelPool
+pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
+models:
+  _pretrained_:
+    _target_: fusion_bench.modelpool.seq_classification_lm.create_reward_model_from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+    use_flash_attention_2: true
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
+  pad_token: <|end_of_text|> # do not use eos token (<|eos_id|>) as padding token because it is used as the end of each content
+train_datasets:
+  preference_700k:
+    _target_: fusion_bench.dataset.llama.preference_700k.load_tokenized_preference_700k_for_rlhf
+    tokenizer: ${...tokenizer}
+    path: hendrydong/preference_700K
+    split: train
+    cache_path: null

fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml ADDED Viewed

@@ -0,0 +1,14 @@
+_target_: fusion_bench.modelpool.SeqenceClassificationModelPool
+pretrained_model_name_or_path: fusion-bench/Llama-3.2-1B-Instruct_Bradly-Terry-RM_Preference-700k
+models:
+  _pretrained_:
+    _target_: transformers.AutoModelForSequenceClassification.from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
+  pad_token: <|end_of_text|> # do not use eos token (<|eos_id|>) as padding token because it is used as the end of each content

fusion_bench_config/nyuv2_config.yaml CHANGED Viewed

@@ -1,13 +1,17 @@
 defaults:
   - hydra: default
+  - fabric: auto
   - modelpool: nyuv2_modelpool
   - method: simple_average
   - taskpool: nyuv2_taskpool
   - _self_
+_target_: fusion_bench.programs.FabricModelFusionProgram
+_recursive_: false
 fast_dev_run: false # Run a single batch of data to test the model or method
 use_lightning: true # Use the fabric to run the experiment
 print_config: true # Print the configuration to the console
 save_report: false # path to save the result report
-fabric: null
 trainer:
   devices: 1

fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml ADDED Viewed

@@ -0,0 +1,18 @@
+defaults:
+  - CLIPVisionModelTaskPool@: _template
+  - /dataset/image_classification/test@test_datasets:
+      - sun397
+      - stanford-cars
+      - resisc45
+      - eurosat
+      - svhn
+      - gtsrb
+      - mnist
+      - dtd
+  - _self_
+_target_: fusion_bench.taskpool.RankoneWEMoECLIPVisionModelTaskPool
+# === layer-wise routing weights saving ===
+layer_wise_routing_weights_save_path: null
+layer_wise_routing_weights_max_num: 1000

fusion_bench_config/taskpool/reward_model_evaluation.yaml ADDED Viewed

@@ -0,0 +1,18 @@
+_target_: fusion_bench.taskpool.llama.reward_model.RewardModelEvaluationTaskPool
+test_datasets:
+  preference_700k:
+    _target_: fusion_bench.dataset.llama.preference_700k.load_tokenized_preference_700k_for_rlhf
+    tokenizer: ${...tokenizer}
+    path: hendrydong/preference_700K
+    split: train
+    cache_path: null
+dataloader_kwargs:
+  shuffle: False
+  batch_size: 16
+tokenizer: ${..modelpool.tokenizer}
+max_num_samples: 1000
+seed: 42

fusion_bench_config/llama_weighted_average.yaml DELETED Viewed

@@ -1,26 +0,0 @@
-defaults:
-  - example_config
-  - override method: weighted_average_for_llama
-  - override modelpool: llama_for_causallm
-  - _self_
-modelpool:
-  models:
-    # the pre-trained model (base model) is optional
-    # if not provided, the first model will be used as the base model
-    - name: _pretrained_
-      path: meta-llama/Meta-Llama-3-8B
-    - name: expert_1
-      path: meta-llama/Meta-Llama-3-8B
-    - name: expert_2
-      path: meta-llama/Meta-Llama-3-8B-Instruct
-method:
-  normalize: true # if true, the weights will be normalized before merging
-  weights: # List of weights for each model
-    - 0.5
-    - 0.5
-  # if true, only the backbone of the model will be merged and the head will be keeped as the pre-trained model (if the pre-trained model is provided, otherwise the head of the first model will be used)
-  # if false, the whole model will be merged
-  backbone_only: true
-  merged_model_save_path: null
-  save_tokenizer: true
-  push_to_hub: false

{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/LICENSE RENAMED Viewed

File without changes

{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

fusion-bench 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

fusion-bench 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl