PyPI - fusion-bench - Versions diffs - 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl - Mend

fusion-bench 0.2.14py3-none-any.whl → 0.2.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

fusion_bench_config/method/randes/superposed_model_soup.yaml ADDED Viewed

@@ -0,0 +1,18 @@
+_target_: fusion_bench.method.randes.SuperposedModelSoupAlgorithm
+#* === base randes options ===
+mode: manual_absorption
+# weights for all mlp and attn layers
+target_layer:
+  - mlp_w
+  - attn_w
+random_seed: 42 # for random_binary_diagonal_matrix
+different_across_layers: True
+joint_matrix_mode: flatten_hstack
+rank: 1 # for columnwise svd
+random_components: False
+shift_layers: 0
+absorber: None
+debug: 0
+ms_mode: average
+verbose: 0 # level of verbosity
+dropout_rate: 1 # take the target layer per n target layers

fusion_bench_config/method/randes/superposed_task_arithmetic.yaml ADDED Viewed

@@ -0,0 +1,20 @@
+name: superposed_task_arithmetic
+#* === base randes options ===
+mode: random_binary_diagonal_matrix
+# weights for all mlp and attn layers
+target_layer:
+  - mlp_w
+  - attn_w
+random_seed: 42 # for random_binary_diagonal_matrix
+different_across_layers: True
+joint_matrix_mode: flatten_hstack
+rank: 1 # for columnwise svd
+random_components: False
+shift_layers: 0
+debug: 0
+verbose: 0
+dropout_rate: 1
+#* === task arithmetic options ===
+scaling_factor: 0.5
+# path to save/load the model
+model_path: null

fusion_bench_config/method/randes/superposed_task_arithmetic_lora.yaml ADDED Viewed

@@ -0,0 +1,20 @@
+_target_: fusion_bench.method.randes.SuperposedTaskArithmeticLoRAAlgorithm
+#* === base randes options ===
+mode: random_binary_diagonal_matrix
+# weights for all mlp and attn layers
+target_layer:
+  - mlp_w
+  - attn_w
+random_seed: 42 # for random_binary_diagonal_matrix
+different_across_layers: True
+joint_matrix_mode: flatten_hstack
+rank: 1 # for columnwise svd
+random_components: False
+shift_layers: 0
+debug: 0
+verbose: 0
+dropout_rate: 1
+#* === task arithmetic options ===
+scaling_factor: 0.5
+# path to save/load the model
+model_path: null

fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml CHANGED Viewed

@@ -1,10 +1,11 @@
-_target_: fusion_bench.method.losparse.sparselo.IterativeSparseLoForLlama
+_target_: fusion_bench.method.sparselo.sparselo.IterativeSparseLoForLlama
 _recursive_: false
 nsamples: 128
 seed: 0
 rank: 128
 num_iterations: 10
 variant: wanda
+use_reference_model: false
 # `prune_type` can be either `unstructured` or `semistructured`
 prune_type: unstructured
 # device and dtype to compute the pruning mask

fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml CHANGED Viewed

@@ -1,4 +1,4 @@
-_target_: fusion_bench.method.losparse.sparselo.PCPSparseLoForLlama
+_target_: fusion_bench.method.sparselo.sparselo.PCPSparseLoForLlama
 _recursive_: false
 nsamples: 128
 seed: 0

fusion_bench_config/method/sparselo_pruning/llama_sparselo.yaml CHANGED Viewed

@@ -1,4 +1,4 @@
-_target_: fusion_bench.method.losparse.sparselo.SparseLoForLlama
+_target_: fusion_bench.method.sparselo.sparselo.SparseLoForLlama
 _recursive_: false
 nsamples: 128
 seed: 0

fusion_bench_config/method/tall_mask/task_arithmetic.yaml ADDED Viewed

@@ -0,0 +1,4 @@
+_target_: fusion_bench.method.tall_mask.TallMaskTaskArithmeticAlgorithm
+tall_mask_lambda: 0.6
+debug: 0
+verbose: 0

fusion_bench_config/model/clip-vit/clip-vit-base-patch32_TALL10.yaml ADDED Viewed

@@ -0,0 +1,29 @@
+# The 20 tasks used in the paper:
+# Wang et al. Localizing Task Information for Improved Model Merging and Compression
+# http://arxiv.org/abs/2405.07813
+defaults:
+  # pre-trained model
+  - clip-vit-base-patch32
+  # eight tasks in the task arithmetic paper
+  - clip-vit-base-patch32_sun397
+  - clip-vit-base-patch32_stanford-cars
+  - clip-vit-base-patch32_resisc45
+  - clip-vit-base-patch32_eurosat
+  - clip-vit-base-patch32_svhn
+  - clip-vit-base-patch32_gtsrb
+  - clip-vit-base-patch32_mnist
+  - clip-vit-base-patch32_dtd
+  # additional 6 tasks in the TALL mask paper (TALL 14)
+  - clip-vit-base-patch32_oxford_flowers102
+  - clip-vit-base-patch32_pcam
+  # - clip-vit-base-patch32_fer2013
+  # - clip-vit-base-patch32_oxford-iiit-pet
+  # - clip-vit-base-patch32_stl10
+  # - clip-vit-base-patch32_cifar100
+  # additional 6 tasks in the TALL mask paper (TALL 20)
+  # - clip-vit-base-patch32_cifar10
+  # - clip-vit-base-patch32_food101
+  # - clip-vit-base-patch32_fashion_mnist
+  # - clip-vit-base-patch32_emnist_letters
+  # - clip-vit-base-patch32_kmnist
+  # - clip-vit-base-patch32_rendered-sst2

fusion_bench_config/model/clip-vit/clip-vit-base-patch32_TALL12.yaml ADDED Viewed

@@ -0,0 +1,29 @@
+# The 20 task used in the paper:
+# Wang et al. Localizing Task Information for Improved Model Merging and Compression
+# http://arxiv.org/abs/2405.07813
+defaults:
+  # pre-trained model
+  - clip-vit-base-patch32
+  # eight tasks in the task arithmetic paper
+  - clip-vit-base-patch32_sun397
+  - clip-vit-base-patch32_stanford-cars
+  - clip-vit-base-patch32_resisc45
+  - clip-vit-base-patch32_eurosat
+  - clip-vit-base-patch32_svhn
+  - clip-vit-base-patch32_gtsrb
+  - clip-vit-base-patch32_mnist
+  - clip-vit-base-patch32_dtd
+  # additional 6 tasks in the TALL mask paper (TALL 14)
+  - clip-vit-base-patch32_oxford_flowers102
+  - clip-vit-base-patch32_pcam
+  - clip-vit-base-patch32_fer2013
+  - clip-vit-base-patch32_oxford-iiit-pet
+  # - clip-vit-base-patch32_stl10
+  # - clip-vit-base-patch32_cifar100
+  # additional 6 tasks in the TALL mask paper (TALL 20)
+  # - clip-vit-base-patch32_cifar10
+  # - clip-vit-base-patch32_food101
+  # - clip-vit-base-patch32_fashion_mnist
+  # - clip-vit-base-patch32_emnist_letters
+  # - clip-vit-base-patch32_kmnist
+  # - clip-vit-base-patch32_rendered-sst2

fusion_bench_config/model/clip-vit/clip-vit-base-patch32_TALL16.yaml ADDED Viewed

@@ -0,0 +1,29 @@
+# The 20 task used in the paper:
+# Wang et al. Localizing Task Information for Improved Model Merging and Compression
+# http://arxiv.org/abs/2405.07813
+defaults:
+  # pre-trained model
+  - clip-vit-base-patch32
+  # eight tasks in the task arithmetic paper
+  - clip-vit-base-patch32_sun397
+  - clip-vit-base-patch32_stanford-cars
+  - clip-vit-base-patch32_resisc45
+  - clip-vit-base-patch32_eurosat
+  - clip-vit-base-patch32_svhn
+  - clip-vit-base-patch32_gtsrb
+  - clip-vit-base-patch32_mnist
+  - clip-vit-base-patch32_dtd
+  # additional 6 tasks in the TALL mask paper (TALL 14)
+  - clip-vit-base-patch32_oxford_flowers102
+  - clip-vit-base-patch32_pcam
+  - clip-vit-base-patch32_fer2013
+  - clip-vit-base-patch32_oxford-iiit-pet
+  - clip-vit-base-patch32_stl10
+  - clip-vit-base-patch32_cifar100
+  # additional 6 tasks in the TALL mask paper (TALL 20)
+  - clip-vit-base-patch32_cifar10
+  - clip-vit-base-patch32_food101
+  # - clip-vit-base-patch32_fashion_mnist
+  # - clip-vit-base-patch32_emnist_letters
+  # - clip-vit-base-patch32_kmnist
+  # - clip-vit-base-patch32_rendered-sst2

fusion_bench_config/model/clip-vit/clip-vit-base-patch32_TALL18.yaml ADDED Viewed

@@ -0,0 +1,29 @@
+# The 20 task used in the paper:
+# Wang et al. Localizing Task Information for Improved Model Merging and Compression
+# http://arxiv.org/abs/2405.07813
+defaults:
+  # pre-trained model
+  - clip-vit-base-patch32
+  # eight tasks in the task arithmetic paper
+  - clip-vit-base-patch32_sun397
+  - clip-vit-base-patch32_stanford-cars
+  - clip-vit-base-patch32_resisc45
+  - clip-vit-base-patch32_eurosat
+  - clip-vit-base-patch32_svhn
+  - clip-vit-base-patch32_gtsrb
+  - clip-vit-base-patch32_mnist
+  - clip-vit-base-patch32_dtd
+  # additional 6 tasks in the TALL mask paper (TALL 14)
+  - clip-vit-base-patch32_oxford_flowers102
+  - clip-vit-base-patch32_pcam
+  - clip-vit-base-patch32_fer2013
+  - clip-vit-base-patch32_oxford-iiit-pet
+  - clip-vit-base-patch32_stl10
+  - clip-vit-base-patch32_cifar100
+  # additional 6 tasks in the TALL mask paper (TALL 20)
+  - clip-vit-base-patch32_cifar10
+  - clip-vit-base-patch32_food101
+  - clip-vit-base-patch32_fashion_mnist
+  - clip-vit-base-patch32_emnist_letters
+  # - clip-vit-base-patch32_kmnist
+  # - clip-vit-base-patch32_rendered-sst2

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL10.yaml ADDED Viewed

@@ -0,0 +1,8 @@
+# The 20 task used in the paper:
+# Wang et al. Localizing Task Information for Improved Model Merging and Compression
+# http://arxiv.org/abs/2405.07813
+defaults:
+  - CLIPVisionModelPool@: _template
+  - /model/clip-vit@models: clip-vit-base-patch32_TALL10
+  - /dataset/image_classification/train@train_datasets: TALL10
+  - /dataset/image_classification/test@test_datasets: TALL10

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL12.yaml ADDED Viewed

@@ -0,0 +1,8 @@
+# The 20 task used in the paper:
+# Wang et al. Localizing Task Information for Improved Model Merging and Compression
+# http://arxiv.org/abs/2405.07813
+defaults:
+  - CLIPVisionModelPool@: _template
+  - /model/clip-vit@models: clip-vit-base-patch32_TALL12
+  - /dataset/image_classification/train@train_datasets: TALL12
+  - /dataset/image_classification/test@test_datasets: TALL12

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL16.yaml ADDED Viewed

@@ -0,0 +1,8 @@
+# The 20 task used in the paper:
+# Wang et al. Localizing Task Information for Improved Model Merging and Compression
+# http://arxiv.org/abs/2405.07813
+defaults:
+  - CLIPVisionModelPool@: _template
+  - /model/clip-vit@models: clip-vit-base-patch32_TALL16
+  - /dataset/image_classification/train@train_datasets: TALL16
+  - /dataset/image_classification/test@test_datasets: TALL16

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL18.yaml ADDED Viewed

@@ -0,0 +1,8 @@
+# The 20 task used in the paper:
+# Wang et al. Localizing Task Information for Improved Model Merging and Compression
+# http://arxiv.org/abs/2405.07813
+defaults:
+  - CLIPVisionModelPool@: _template
+  - /model/clip-vit@models: clip-vit-base-patch32_TALL18
+  - /dataset/image_classification/train@train_datasets: TALL18
+  - /dataset/image_classification/test@test_datasets: TALL18

fusion_bench_config/modelpool/CausalLMPool/deepseek-v2-lite.yaml ADDED Viewed

@@ -0,0 +1,15 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+pretrained_model_name_or_path: deepseek-ai/DeepSeek-V2-Lite
+models:
+  _pretrained_:
+    _target_: fusion_bench.models.modeling_deepseek_v2.DeepseekV2ForCausalLM.from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+    device_map: auto
+    trust_remote_code: true
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}

fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.1-8B-Instruct.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: meta-llama/Llama-3.1-8B-Instruct
+  instruction: MergeBench/Llama-3.1-8B-Instruct_instruction
+  math: MergeBench/Llama-3.1-8B-Instruct_math
+  coding: MergeBench/Llama-3.1-8B-Instruct_coding
+  multilingual: MergeBench/Llama-3.1-8B-Instruct_multilingual
+  safety: MergeBench/Llama-3.1-8B-Instruct_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: meta-llama/Llama-3.1-8B-Instruct

fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.1-8B.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: meta-llama/Llama-3.1-8B
+  instruction: MergeBench/Llama-3.1-8B_instruction
+  math: MergeBench/Llama-3.1-8B_math
+  coding: MergeBench/Llama-3.1-8B_coding
+  multilingual: MergeBench/Llama-3.1-8B_multilingual
+  safety: MergeBench/Llama-3.1-8B_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: meta-llama/Llama-3.1-8B

fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.2-3B-Instruct.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: meta-llama/Llama-3.2-3B-Instruct
+  instruction: MergeBench/Llama-3.2-3B-Instruct_instruction
+  math: MergeBench/Llama-3.2-3B-Instruct_math
+  coding: MergeBench/Llama-3.2-3B-Instruct_coding
+  multilingual: MergeBench/Llama-3.2-3B-Instruct_multilingual
+  safety: MergeBench/Llama-3.2-3B-Instruct_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: meta-llama/Llama-3.2-3B-Instruct

fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.2-3B.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: meta-llama/Llama-3.2-3B
+  instruction: MergeBench/Llama-3.2-3B_instruction
+  math: MergeBench/Llama-3.2-3B_math
+  coding: MergeBench/Llama-3.2-3B_coding
+  multilingual: MergeBench/Llama-3.2-3B_multilingual
+  safety: MergeBench/Llama-3.2-3B_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: meta-llama/Llama-3.2-3B

fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-2b-it.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: google/gemma-2-2b-it
+  instruction: MergeBench/gemma-2-2b-it_instruction
+  math: MergeBench/gemma-2-2b-it_math
+  coding: MergeBench/gemma-2-2b-it_coding
+  multilingual: MergeBench/gemma-2-2b-it_multilingual
+  safety: MergeBench/gemma-2-2b-it_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: google/gemma-2-2b-it

fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-2b.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: google/gemma-2-2b
+  instruction: MergeBench/gemma-2-2b_instruction
+  math: MergeBench/gemma-2-2b_math
+  coding: MergeBench/gemma-2-2b_coding
+  multilingual: MergeBench/gemma-2-2b_multilingual
+  safety: MergeBench/gemma-2-2b_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: google/gemma-2-2b

fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-9b-it.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: google/gemma-2-9b-it
+  instruction: MergeBench/gemma-2-9b-it_instruction
+  math: MergeBench/gemma-2-9b-it_math
+  coding: MergeBench/gemma-2-9b-it_coding
+  multilingual: MergeBench/gemma-2-9b-it_multilingual
+  safety: MergeBench/gemma-2-9b-it_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: google/gemma-2-9b-it

fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-9b.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: google/gemma-2-9b
+  instruction: MergeBench/gemma-2-9b_instruction
+  math: MergeBench/gemma-2-9b_math
+  coding: MergeBench/gemma-2-9b_coding
+  multilingual: MergeBench/gemma-2-9b_multilingual
+  safety: MergeBench/gemma-2-9b_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: google/gemma-2-9b

fusion_bench_config/modelpool/CausalLMPool/mixtral-8x7b.yaml ADDED Viewed

@@ -0,0 +1,14 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+pretrained_model_name_or_path: mistralai/Mixtral-8x7B-v0.1
+models:
+  _pretrained_:
+    _target_: transformers.AutoModelForCausalLM.from_pretrained
+    pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
+    torch_dtype: bfloat16
+    device_map: auto
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}

fusion_bench_config/modelpool/SeqenceClassificationModelPool/roberta-base_glue.yaml ADDED Viewed

@@ -0,0 +1,69 @@
+defaults:
+  - Seq2SeqLMPool@: _template
+  - /model/roberta@models:
+      - roberta_base
+      - roberta_glue-cola
+      - roberta_glue-mnli
+      - roberta_glue-mrpc
+      - roberta_glue-qnli
+      - roberta_glue-qqp
+      - roberta_glue-rte
+      - roberta_glue-sst2
+      - roberta_glue-stsb
+# _target_: fusion_bench.modelpool.SequenceClassificationModelPool
+# _recursive_: false
+_dataset_loader: fusion_bench.tasks.flan_t5_text_generation.glue_load_dataset.load_glue_dataset
+test_datasets:
+  glue-cola:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: cola
+    tokenizer: ${...tokenizer}
+    split: validation
+  glue-mnli:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: mnli
+    tokenizer: ${...tokenizer}
+    split: validation_matched
+  glue-mrpc:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: mrpc
+    tokenizer: ${...tokenizer}
+    split: validation
+  glue-qnli:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: qnli
+    tokenizer: ${...tokenizer}
+    split: validation
+  glue-qqp:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: qqp
+    tokenizer: ${...tokenizer}
+    split: validation
+  glue-rte:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: rte
+    tokenizer: ${...tokenizer}
+    split: validation
+  glue-sst2:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: sst2
+    tokenizer: ${...tokenizer}
+    split: validation
+  glue-stsb:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: stsb
+    tokenizer: ${...tokenizer}
+    split: validation
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: roberta-base

{fusion_bench-0.2.14.dist-info → fusion_bench-0.2.16.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{fusion_bench-0.2.14.dist-info → fusion_bench-0.2.16.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{fusion_bench-0.2.14.dist-info → fusion_bench-0.2.16.dist-info}/top_level.txt RENAMED Viewed

File without changes

fusion-bench 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

fusion-bench 0.2.14py3-none-any.whl → 0.2.16py3-none-any.whl