PyPI - fusion-bench - Versions diffs - 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

fusion-bench 0.2.6py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

fusion_bench/compat/method/__init__.py +1 -0
fusion_bench/compat/method/base_algorithm.py +7 -1
fusion_bench/compat/modelpool/__init__.py +1 -1
fusion_bench/compat/taskpool/__init__.py +1 -1
fusion_bench/dataset/arc_agi/arc.py +5 -0
fusion_bench/dataset/arc_agi/preprocess.py +1 -1
fusion_bench/dataset/llama/__init__.py +1 -0
fusion_bench/dataset/llama/alpaca.py +93 -3
fusion_bench/dataset/llama/collate.py +62 -2
fusion_bench/dataset/llama/metamathqa.py +50 -0
fusion_bench/dataset/llama/preference_700k.py +70 -0
fusion_bench/dataset/llama/stanford_shp.py +90 -0
fusion_bench/dataset/llama/ultrachat.py +58 -0
fusion_bench/dataset/llama/utils/__init__.py +0 -0
fusion_bench/method/__init__.py +1 -1
fusion_bench/method/adamerging/layer_wise_adamerging.py +11 -4
fusion_bench/method/adamerging/min_norm_solvers.py +4 -4
fusion_bench/method/linear/expo.py +39 -0
fusion_bench/method/lm_finetune/__init__.py +1 -0
fusion_bench/method/lm_finetune/bradley_terry_rm.py +432 -0
fusion_bench/method/lm_finetune/fullfinetune_sft.py +90 -160
fusion_bench/method/lm_finetune/peftfinetune_sft.py +49 -139
fusion_bench/method/pruning/llama_magnitude_prune.py +2 -2
fusion_bench/method/pruning/llama_random_prune.py +2 -2
fusion_bench/method/surgery/__init__.py +3 -0
fusion_bench/method/surgery/clip_layer_wise_adamerging_surgery.py +157 -0
fusion_bench/mixins/__init__.py +2 -0
fusion_bench/mixins/clip_classification.py +58 -5
fusion_bench/mixins/fabric_training.py +320 -0
fusion_bench/mixins/lightning_fabric.py +9 -0
fusion_bench/modelpool/__init__.py +2 -0
fusion_bench/modelpool/causal_lm/__init__.py +1 -1
fusion_bench/modelpool/causal_lm/causal_lm.py +21 -22
fusion_bench/modelpool/seq_classification_lm/__init__.py +2 -0
fusion_bench/modelpool/seq_classification_lm/reward_model.py +15 -0
fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py +98 -0
fusion_bench/models/chat_templates/__init__.py +1 -0
fusion_bench/models/chat_templates/llama_3_Instruct.py +1 -0
fusion_bench/models/chat_templates/load_tokenizer.py +43 -0
fusion_bench/models/hf_clip.py +50 -9
fusion_bench/models/surgery/surgerymodelwrapper.py +157 -0
fusion_bench/models/utils.py +8 -0
fusion_bench/models/wrappers/layer_wise_fusion.py +14 -5
fusion_bench/models/wrappers/task_wise_fusion.py +5 -5
fusion_bench/optim/__init__.py +2 -0
fusion_bench/optim/exception.py +47 -0
fusion_bench/optim/lr_scheduler/__init__.py +1 -0
fusion_bench/optim/lr_scheduler/linear_warmup.py +222 -0
fusion_bench/optim/lr_scheduler/utils/__init__.py +1 -0
fusion_bench/optim/lr_scheduler/utils/visualization.py +119 -0
fusion_bench/optim/mezo.py +0 -2
fusion_bench/programs/fabric_fusion_program.py +5 -1
fusion_bench/taskpool/clip_vision/taskpool.py +43 -6
fusion_bench/taskpool/llama/reward_model.py +157 -0
fusion_bench/taskpool/nyuv2_taskpool.py +2 -0
fusion_bench/utils/hydra_utils.py +22 -0
fusion_bench/utils/plot/__init__.py +0 -0
fusion_bench/utils/plot/token.py +52 -0
fusion_bench/utils/plot/token_notebook.py +127 -0
fusion_bench/utils/type.py +5 -3
{fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/METADATA +1 -1
{fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/RECORD +87 -47
fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml +1 -1
fusion_bench_config/dataset/llm_sft/alpaca_cleaned.yaml +6 -0
fusion_bench_config/dataset/llm_sft/ultrachat_200k.yaml +3 -0
fusion_bench_config/fabric/llama_peft_fsdp.yaml +16 -0
fusion_bench_config/fabric/loggers/wandb_logger.yaml +2 -0
fusion_bench_config/fabric/strategy/deepspeed.yaml +10 -0
fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml +9 -0
fusion_bench_config/fabric_model_fusion.yaml +1 -1
fusion_bench_config/llama_full_finetune.yaml +19 -0
fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml +47 -0
fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml +11 -4
fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml +4 -2
fusion_bench_config/method/surgery/adamerging_surgery.yaml +27 -0
fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml +21 -0
fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml +21 -0
fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml +19 -0
fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml +18 -0
fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml +23 -0
fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml +14 -0
fusion_bench_config/nyuv2_config.yaml +5 -1
fusion_bench_config/taskpool/reward_model_evaluation.yaml +18 -0
fusion_bench_config/llama_weighted_average.yaml +0 -26
{fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/LICENSE +0 -0
{fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/WHEEL +0 -0
{fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/entry_points.txt +0 -0
{fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/top_level.txt +0 -0

fusion_bench_config/llama_full_finetune.yaml ADDED Viewed

@@ -0,0 +1,19 @@
+defaults:
+  - hydra: default
+  - fabric: llama_fsdp
+  # --- Model, Method, Task ---
+  - method: lm_finetune/fullfinetune_sft.yaml
+  - modelpool: CausalLMPool/llama_alpaca_cleaned.yaml
+  - taskpool: dummy
+  - _self_
+_target_: fusion_bench.programs.FabricModelFusionProgram
+_recursive_: false
+fast_dev_run: false # Run a single batch of data to test the model or method
+# Run the script without actually running the experiment, use with `print_config=true`.
+# You can also use `--cfg` or `-c` to show the configuration instead of running.
+dry_run: false
+print_config: true # Print the configuration to the console
+report_save_path: null # path to save the result report
+print_function_call: true # set to false if you don't want to print the details of instantiate calls

fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml ADDED Viewed

@@ -0,0 +1,47 @@
+_target_: fusion_bench.method.BradleyTerryRewardModeling
+_recursive_: False
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1e-5
+  weight_decay: 0.01
+  fused: null
+lr_scheduler:
+  _target_: fusion_bench.optim.lr_scheduler.CosineDecayWithWarmup
+  T_max: _T_max_ # this will be replaced by the expected number of training steps
+  init_lr: 0
+  warmup_steps: 100
+  max_lr: ${..optimizer.lr}
+  min_lr: 1e-6
+dataloader_kwargs:
+  # per-gpu batch size
+  batch_size: 1
+  num_workers: 0
+  pin_memory: True
+# Training hyperparameters
+# if max_epochs=-1, max_steps will be used to determine the number of training steps
+max_epochs: 3
+max_steps: -1
+max_steps_per_epoch: -1
+accumulate_grad_batches: 1
+lr_scheduler_interval: step
+lr_scheduler_frequency: 1
+# Checkpointing may be done by epoch or step, and at the end of training
+# `checkpoint_save_interval` can be 'epoch' or 'step'
+checkpoint_save_interval: epoch
+checkpoint_save_frequency: 1
+# Whether to use gradient clipping, and if so, the value and algorithm
+gradient_clip_val: null
+gradient_clip_algorithm: norm
+save_optimizer_state: false
+# save_full_model must be true when using shared FSDP
+save_full_model: true
+# save_ckpt_type can be 'hf' or 'lightning'
+save_ckpt_type: lightning
+# Path to checkpoint to load from, used for resuming training
+ckpt_path: null
+max_length: 4096
+fix_token_embedding: true

fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml CHANGED Viewed

@@ -3,14 +3,17 @@ _recursive_: False
 optimizer:
   _target_: torch.optim.AdamW
-  fused: True
+  lr: 1e-5
   weight_decay: 0.01
-  lr: 5e-5
+  fused: null
 lr_scheduler:
-  _target_: torch.optim.lr_scheduler.CosineAnnealingLR
+  _target_: fusion_bench.optim.lr_scheduler.CosineDecayWithWarmup
   T_max: _T_max_ # this will be replaced by the expected number of training steps
-  eta_min: 1e-6
+  init_lr: 0
+  warmup_steps: 100
+  max_lr: ${..optimizer.lr}
+  min_lr: 1e-6
 dataloader_kwargs:
   # per-gpu batch size
@@ -36,5 +39,9 @@ gradient_clip_algorithm: norm
 save_optimizer_state: false
 # save_full_model must be true when using shared FSDP
 save_full_model: true
+# save_ckpt_type can be 'hf' or 'lightning'
+save_ckpt_type: lightning
 # Path to checkpoint to load from, used for resuming training
 ckpt_path: null
+max_length: 4096
+fix_token_embedding: true

fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml CHANGED Viewed

@@ -3,9 +3,9 @@ _recursive_: False
 optimizer:
   _target_: torch.optim.AdamW
-  fused: True
+  lr: 1e-4
   weight_decay: 0.01
-  lr: 5e-5
+  fused: null
 lr_scheduler:
   _target_: torch.optim.lr_scheduler.CosineAnnealingLR
@@ -56,6 +56,8 @@ gradient_clip_algorithm: norm
 save_optimizer_state: false
 # save_full_model must be true when using shared FSDP
 save_full_model: false
+# save_ckpt_type can be 'peft' or 'lightning'
+save_ckpt_type: lightning
 # Path to checkpoint to load from, used for resuming training
 ckpt_path: null
 max_length: 4096

fusion_bench_config/method/surgery/adamerging_surgery.yaml ADDED Viewed

@@ -0,0 +1,27 @@
+# this option can be "clip_task_wise_adamerging"
+name: clip_layer_wise_adamerging_surgery
+# this weights can be a list of float, or a string that points to a *.np, *.pt file containing the weights
+# if weights is specified, skip the test-time adaptation training
+weights: null
+# learning rate
+optimizer: adam
+lr: 1e-3
+init_values: 0.3
+# if `clamp_weights` is true, the weights will be clamped to [0, 1]
+clamp_weights: false
+# arguments of `functional_call`
+tie_weights: true
+strict: false
+# this is overrided by `fabric.devices` if launched from the `fusion_bench` CLI.
+devices: 1
+batch_size: 16
+num_workers: 8
+max_steps: 1000
+fast_dev_run: ${fast_dev_run}
+# the path for saving the merging weights
+save_merging_weights: 'merging_weights.pt'
+cache_dir: outputs
+# parameters of Surgery
+eval_iterations: 200
+surgery_steps: 1000

fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml ADDED Viewed

@@ -0,0 +1,21 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
+models:
+  _pretrained_:
+    _target_: transformers.AutoModelForCausalLM.from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
+train_datasets:
+  alpaca-cleaned:
+    _target_: fusion_bench.dataset.llama.alpaca.load_tokenized_alpaca_dataset
+    tokenizer: ${...tokenizer}
+    path: "yahma/alpaca-cleaned"
+    split: train
+    cache_path: null

fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml ADDED Viewed

@@ -0,0 +1,21 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
+models:
+  _pretrained_:
+    _target_: transformers.AutoModelForCausalLM.from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
+train_datasets:
+  codealpaca:
+    _target_: fusion_bench.dataset.llama.alpaca.load_tokenized_alpaca_dataset
+    tokenizer: ${...tokenizer}
+    path: sahil2801/CodeAlpaca-20k
+    split: train
+    cache_path: null

fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml ADDED Viewed

@@ -0,0 +1,19 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
+models:
+  _pretrained_:
+    _target_: transformers.AutoModelForCausalLM.from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
+train_datasets:
+  metamathqa:
+    _target_: fusion_bench.dataset.llama.metamathqa.load_tokenized_metamathqa
+    tokenizer: ${...tokenizer}
+    cache_path: null

fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml ADDED Viewed

@@ -0,0 +1,18 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+pretrained_model_name_or_path: meta-llama/Llama-3-1B-Instruct
+models:
+  _pretrained_:
+    _target_: transformers.AutoModelForCausalLM.from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
+train_datasets:
+  ultrachat-200k:
+    _target_: fusion_bench.dataset.llama.ultrachat.load_tokenized_ultrachat_200k
+    tokenizer: ${...tokenizer}

fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml ADDED Viewed

@@ -0,0 +1,23 @@
+_target_: fusion_bench.modelpool.SeqenceClassificationModelPool
+pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
+models:
+  _pretrained_:
+    _target_: fusion_bench.modelpool.seq_classification_lm.create_reward_model_from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+    use_flash_attention_2: true
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
+  pad_token: <|end_of_text|> # do not use eos token (<|eos_id|>) as padding token because it is used as the end of each content
+train_datasets:
+  preference_700k:
+    _target_: fusion_bench.dataset.llama.preference_700k.load_tokenized_preference_700k_for_rlhf
+    tokenizer: ${...tokenizer}
+    path: hendrydong/preference_700K
+    split: train
+    cache_path: null

fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml ADDED Viewed

@@ -0,0 +1,14 @@
+_target_: fusion_bench.modelpool.SeqenceClassificationModelPool
+pretrained_model_name_or_path: fusion-bench/Llama-3.2-1B-Instruct_Bradly-Terry-RM_Preference-700k
+models:
+  _pretrained_:
+    _target_: transformers.AutoModelForSequenceClassification.from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
+  pad_token: <|end_of_text|> # do not use eos token (<|eos_id|>) as padding token because it is used as the end of each content

fusion_bench_config/nyuv2_config.yaml CHANGED Viewed

@@ -1,13 +1,17 @@
 defaults:
   - hydra: default
+  - fabric: auto
   - modelpool: nyuv2_modelpool
   - method: simple_average
   - taskpool: nyuv2_taskpool
   - _self_
+_target_: fusion_bench.programs.FabricModelFusionProgram
+_recursive_: false
 fast_dev_run: false # Run a single batch of data to test the model or method
 use_lightning: true # Use the fabric to run the experiment
 print_config: true # Print the configuration to the console
 save_report: false # path to save the result report
-fabric: null
 trainer:
   devices: 1

fusion_bench_config/taskpool/reward_model_evaluation.yaml ADDED Viewed

@@ -0,0 +1,18 @@
+_target_: fusion_bench.taskpool.llama.reward_model.RewardModelEvaluationTaskPool
+test_datasets:
+  preference_700k:
+    _target_: fusion_bench.dataset.llama.preference_700k.load_tokenized_preference_700k_for_rlhf
+    tokenizer: ${...tokenizer}
+    path: hendrydong/preference_700K
+    split: train
+    cache_path: null
+dataloader_kwargs:
+  shuffle: False
+  batch_size: 16
+tokenizer: ${..modelpool.tokenizer}
+max_num_samples: 1000
+seed: 42

fusion_bench_config/llama_weighted_average.yaml DELETED Viewed

@@ -1,26 +0,0 @@
-defaults:
-  - example_config
-  - override method: weighted_average_for_llama
-  - override modelpool: llama_for_causallm
-  - _self_
-modelpool:
-  models:
-    # the pre-trained model (base model) is optional
-    # if not provided, the first model will be used as the base model
-    - name: _pretrained_
-      path: meta-llama/Meta-Llama-3-8B
-    - name: expert_1
-      path: meta-llama/Meta-Llama-3-8B
-    - name: expert_2
-      path: meta-llama/Meta-Llama-3-8B-Instruct
-method:
-  normalize: true # if true, the weights will be normalized before merging
-  weights: # List of weights for each model
-    - 0.5
-    - 0.5
-  # if true, only the backbone of the model will be merged and the head will be keeped as the pre-trained model (if the pre-trained model is provided, otherwise the head of the first model will be used)
-  # if false, the whole model will be merged
-  backbone_only: true
-  merged_model_save_path: null
-  save_tokenizer: true
-  push_to_hub: false

{fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/LICENSE RENAMED Viewed

File without changes

{fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

fusion-bench 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

fusion-bench 0.2.6py3-none-any.whl → 0.2.7py3-none-any.whl