PyPI - fusion-bench - Versions diffs - 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl - Mend

fusion-bench 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml CHANGED Viewed

@@ -1,9 +1,29 @@
-# The 20 task used in the paper:
+# The 20 task used in the paper:
 # Wang et al. Localizing Task Information for Improved Model Merging and Compression
 # http://arxiv.org/abs/2405.07813
-defaults:
-  - CLIPVisionModelPool@: _template
-  - /model/clip-vit@models: clip-vit-large-patch14_TALL20
-processor:
-  _target_: transformers.CLIPProcessor.from_pretrained
-  pretrained_model_name_or_path: openai/clip-vit-large-patch14
+_target_: fusion_bench.modelpool.CLIPVisionModelPool
+_recursive_: False
+processor: openai/clip-vit-large-patch14
+models:
+  _pretrained_: openai/clip-vit-large-patch14
+  sun397: tanganke/clip-vit-large-patch14_sun397
+  stanford-cars: tanganke/clip-vit-large-patch14_stanford-cars
+  resisc45: tanganke/clip-vit-large-patch14_resisc45
+  eurosat: tanganke/clip-vit-large-patch14_eurosat
+  svhn: tanganke/clip-vit-large-patch14_svhn
+  gtsrb: tanganke/clip-vit-large-patch14_gtsrb
+  mnist: tanganke/clip-vit-large-patch14_mnist
+  dtd: tanganke/clip-vit-large-patch14_dtd
+  oxford_flowers102: tanganke/clip-vit-large-patch14_oxford_flowers102
+  pcam: tanganke/clip-vit-large-patch14_pcam
+  fer2013: tanganke/clip-vit-large-patch14_fer2013
+  oxford-iiit-pet: tanganke/clip-vit-large-patch14_oxford-iiit-pet
+  stl10: tanganke/clip-vit-large-patch14_stl10
+  cifar100: tanganke/clip-vit-large-patch14_cifar100
+  cifar10: tanganke/clip-vit-large-patch14_cifar10
+  food101: tanganke/clip-vit-large-patch14_food101
+  fashion_mnist: tanganke/clip-vit-large-patch14_fashion_mnist
+  emnist_letters: tanganke/clip-vit-large-patch14_emnist_letters
+  kmnist: tanganke/clip-vit-large-patch14_kmnist
+  rendered-sst2: tanganke/clip-vit-large-patch14_rendered-sst2
+platform: hf

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml CHANGED Viewed

@@ -2,15 +2,11 @@
 #
 # fusion_bench \
 #   modelpool=CLIPVisionModelPool/clip-vit-large-patch14_individual \
-#   modelpool.base_model=${MODEL_PATH}
+#   modelpool.models._pretrained_=${MODEL_PATH}
 #   ...
-defaults:
-  - CLIPVisionModelPool@: _template
+_target_: fusion_bench.modelpool.CLIPVisionModelPool
+_recursive_: False
 models:
-  _pretrained_:
-    _target_: transformers.CLIPVisionModel.from_pretrained
-    pretrained_model_name_or_path: ${...base_model}
-processor:
-  _target_: transformers.CLIPProcessor.from_pretrained
-  pretrained_model_name_or_path: ${..base_model}
-base_model: openai/clip-vit-large-patch14
+  _pretrained_: openai/clip-vit-large-patch14
+processor: openai/clip-vit-large-patch14
+platform: hf

fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml CHANGED Viewed

@@ -4,8 +4,8 @@ _recursive_: false
 load_lazy: false
 models:
   _pretrained_: Qwen/Qwen2.5-1.5B
-  expert_1: Qwen/Qwen2.5-Math-1.5B
-  expert_2: Qwen/Qwen2.5-Coder-1.5B
+  math: Qwen/Qwen2.5-Math-1.5B
+  code: Qwen/Qwen2.5-Coder-1.5B
 model_kwargs:
   torch_dtype: bfloat16
 tokenizer: Qwen/Qwen2.5-1.5B

fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-7B-math_and_coder.yaml ADDED Viewed

@@ -0,0 +1,9 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+_recursive_: false
+models:
+  _pretrained_: Qwen/Qwen2.5-7B
+  math: Qwen/Qwen2.5-Math-7B
+  code: Qwen/Qwen2.5-Coder-7B
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: Qwen/Qwen2.5-7B

fusion_bench_config/modelpool/CausalLMPool/mistral-7b.yaml ADDED Viewed

@@ -0,0 +1,6 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: mistralai/Mistral-7B-v0.1
+tokenizer: ${.models._pretrained_}
+model_kwargs:
+  torch_dtype: bfloat16

fusion_bench_config/modelpool/CausalLMPool/mixtral_moe_merging.yaml ADDED Viewed

@@ -0,0 +1,10 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: path_to_your_pretrained_model
+  expert_1: path_to_your_expert_model_1
+  expert_2: path_to_your_expert_model_2
+  expert_3: path_to_your_expert_model_3
+  expert_4: path_to_your_expert_model_4
+tokenizer: ${.models._pretrained_}
+model_kwargs:
+  torch_dtype: bfloat16

fusion_bench_config/modelpool/CausalLMPool/qwen2_math_1.5B_and_R1.yaml CHANGED Viewed

@@ -1,17 +1,9 @@
 _target_: fusion_bench.modelpool.CausalLMPool
 _recursive_: false
 models:
-  _pretrained_:
-    _target_: transformers.AutoModelForCausalLM.from_pretrained
-    pretrained_model_name_or_path: Qwen/Qwen2.5-1.5B
-  expert_1:
-    _target_: transformers.AutoModelForCausalLM.from_pretrained
-    pretrained_model_name_or_path: Qwen/Qwen2.5-Math-1.5B
-  expert_2:
-    _target_: transformers.AutoModelForCausalLM.from_pretrained
-    pretrained_model_name_or_path: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+  _pretrained_: Qwen/Qwen2.5-1.5B
+  expert_1: Qwen/Qwen2.5-Math-1.5B
+  expert_2: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
 model_kwargs:
   torch_dtype: bfloat16
-tokenizer:
-  _target_: transformers.AutoTokenizer.from_pretrained
-  pretrained_model_name_or_path: Qwen/Qwen2.5-1.5B
+tokenizer: Qwen/Qwen2.5-1.5B

fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml CHANGED Viewed

@@ -1,20 +1,10 @@
 _target_: fusion_bench.modelpool.CausalLMPool
 _recursive_: false
 models:
-  _pretrained_:
-    _target_: transformers.AutoModelForCausalLM.from_pretrained
-    pretrained_model_name_or_path: mistralai/Mistral-7B-v0.1
-  expert_1:
-    _target_: transformers.AutoModelForCausalLM.from_pretrained
-    pretrained_model_name_or_path: meta-math/MetaMath-Mistral-7B
-  expert_2:
-    _target_: transformers.AutoModelForCausalLM.from_pretrained
-    pretrained_model_name_or_path: cognitivecomputations/dolphin-2.1-mistral-7b
-  expert_3:
-    _target_: transformers.AutoModelForCausalLM.from_pretrained
-    pretrained_model_name_or_path: uukuguy/speechless-code-mistral-7b-v1.0
+  _pretrained_: mistralai/Mistral-7B-v0.1
+  expert_1: meta-math/MetaMath-Mistral-7B
+  expert_2: cognitivecomputations/dolphin-2.1-mistral-7b
+  expert_3: uukuguy/speechless-code-mistral-7b-v1.0
 model_kwargs:
-  torch_dtype: float16
-tokenizer:
-  _target_: transformers.AutoTokenizer.from_pretrained
-  pretrained_model_name_or_path: mistralai/Mistral-7B-v0.1
+  torch_dtype: bfloat16
+tokenizer: mistralai/Mistral-7B-v0.1

fusion_bench_config/modelpool/CausalLMPool/vicuna-7b-v1.5.yaml ADDED Viewed

@@ -0,0 +1,8 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+_recursive_: false
+models:
+  _pretrained_: meta-llama/Llama-2-7b-hf
+  finetuned_model: lmsys/vicuna-7b-v1.5
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: ${.models.finetuned_model}

fusion_bench_config/modelpool/{SeqenceClassificationModelPool → SequenceClassificationModelPool}/llama_preference700k.yaml RENAMED Viewed

@@ -1,4 +1,4 @@
-_target_: fusion_bench.modelpool.SeqenceClassificationModelPool
+_target_: fusion_bench.modelpool.SequenceClassificationModelPool
 pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
 models:
   _pretrained_:

fusion_bench_config/modelpool/{SeqenceClassificationModelPool → SequenceClassificationModelPool}/single_reward_model.yaml RENAMED Viewed

@@ -1,4 +1,4 @@
-_target_: fusion_bench.modelpool.SeqenceClassificationModelPool
+_target_: fusion_bench.modelpool.SequenceClassificationModelPool
 pretrained_model_name_or_path: fusion-bench/Llama-3.2-1B-Instruct_Bradly-Terry-RM_Preference-700k
 models:
   _pretrained_:

fusion_bench_config/nyuv2_config.yaml CHANGED Viewed

@@ -1,8 +1,10 @@
 defaults:
   - hydra: default
   - fabric: auto
-  - modelpool: nyuv2_modelpool
+  - path: default
+  # --- Model, Method, Task ---
   - method: simple_average
+  - modelpool: nyuv2_modelpool
   - taskpool: nyuv2_taskpool
   - _self_
 _target_: fusion_bench.programs.FabricModelFusionProgram

fusion_bench_config/nyuv2_mtl_train.yaml CHANGED Viewed

@@ -1,5 +1,6 @@
 defaults:
   - hydra: default
+  - path: default
   - _self_
 fast_dev_run: false
 exp_name: null

fusion_bench_config/path/default.yaml ADDED Viewed

@@ -0,0 +1,28 @@
+# =============================================================================
+# FusionBench Path Configuration
+# =============================================================================
+# This configuration file defines the directory structure and path settings
+# used throughout the FusionBench framework for model fusion experiments.
+# All paths are configured using Hydra's variable interpolation syntax.
+# Root directory - uses FUSION_BENCH_PROJECT_ROOT env var or current directory
+#
+# By default:
+#
+# root_dir (defaults to current directory)
+# ├── outputs (output_dir)
+# │   ├── cache (cache_dir)
+# │   └── <config_name>
+# │       └── <timestamp> (log_dir)
+# └── data (data_dir)
+#
+root_dir: ${oc.env:FUSION_BENCH_PROJECT_ROOT,"."}
+# Output directory for experiment results and artifacts
+output_dir: ${.root_dir}/outputs
+# Data directory - uses FUSION_BENCH_DATA_DIR env var or root_dir/data
+data_dir: ${oc.env:FUSION_BENCH_DATA_DIR,${.root_dir}/data}
+# Cache directory - uses FUSION_BENCH_CACHE_DIR env var or output_dir/cache
+cache_dir: ${oc.env:FUSION_BENCH_CACHE_DIR,${.output_dir}/cache}
+# Log directory with timestamped subdirectories for each run
+log_dir: ${.output_dir}/${hydra:job.config_name}/${now:%Y-%m-%d_%H-%M-%S}
+# Current working directory at runtime
+work_dir: ${hydra:runtime.cwd}

fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-base-patch32_svhn_and_mnist.yaml ADDED Viewed

@@ -0,0 +1,24 @@
+defaults:
+  - /dataset/image_classification/test@test_datasets:
+      - svhn
+      - mnist
+_target_: fusion_bench.taskpool.CLIPVisionModelTaskPool
+_recursive_: false
+test_datasets: ??? # The datasets to evaluate the model on
+base_model: openai/clip-vit-base-patch32
+clip_model: ${.base_model} # The base model to use
+processor: ${.base_model} # The base model to use
+data_processor: ${.processor}
+dataloader_kwargs:
+  batch_size: 128 # The batch size for the data loader
+  num_workers: 8 # The number of worker processes for data loading
+  pin_memory: True # Whether to pin memory in data loader
+  drop_last: False # Whether to drop the last incomplete batch
+  shuffle: False # Whether to shuffle the data
+# === layer-wise feature saving ===
+# The path to save the features to, if none then the features are not saved
+# This is the path to a directory, the features of task `task_name` will be saved in `feature_save_path/task_name.csv`
+layer_wise_feature_save_path: null
+layer_wise_feature_first_token_only: true # Whether to save only the first token of the features
+# The maximum number of samples to save the features for
+layer_wise_feature_max_num: 1000

fusion_bench_config/method/adamerging.yaml DELETED Viewed

@@ -1,23 +0,0 @@
-# this option can be one of "clip_task_wise_adamerging" or "clip_layer_wise_adamerging"
-name: clip_layer_wise_adamerging
-# this weights can be a list of float, or a string that points to a *.np, *.pt file containing the weights
-# if weights is specified, skip the test-time adaptation training
-weights: null
-# learning rate
-optimizer: adam
-lr: 1e-3
-init_values: 0.3
-# if `clamp_weights` is true, the weights will be clamped to [0, 1]
-clamp_weights: false
-# arguments of `functional_call`
-tie_weights: true
-strict: false
-# this is overrided by `fabric.devices` if launched from the `fusion_bench` CLI.
-devices: 1
-batch_size: 16
-num_workers: 8
-max_steps: 1000
-fast_dev_run: ${fast_dev_run}
-# the path for saving the merging weights
-save_merging_weights: 'merging_weights.pt'
-cache_dir: outputs

fusion_bench_config/modelpool/mixtral_moe_merging.yaml DELETED Viewed

@@ -1,14 +0,0 @@
-type: AutoModelForCausalLMPool
-# each model should have a name and a path, and the model is loaded from the path
-# this is equivalent to `AutoModelForCausalLM.from_pretrained(path)`
-models:
-  - name: _pretrained_
-    path: path_to_your_pretrained_model
-  - name: expert_1
-    path: path_to_your_expert_model_1
-  - name: expert_2
-    path: path_to_your_expert_model_2
-  - name: expert_3
-    path: path_to_your_expert_model_3
-  - name: expert_4
-    path: path_to_your_expert_model_4

fusion_bench_config/modelpool/mixtral_moe_upscaling.yaml DELETED Viewed

@@ -1,6 +0,0 @@
-type: AutoModelForCausalLMPool
-# each model should have a name and a path, and the model is loaded from the path
-# this is equivalent to `AutoModelForCausalLM.from_pretrained(path)`
-models:
-  - name: _pretrained_
-    path: path_to_your_pretrained_model

fusion_bench_config/taskpool/clip-vit-base-patch32_svhn_and_mnist.yaml DELETED Viewed

@@ -1,22 +0,0 @@
-type: clip_vit_classification
-name: clip-vit-base-patch32_svhn_and_mnist # whatever you like
-dataset_type: huggingface_image_classification
-tasks:
-  - name: svhn
-    dataset:
-      type: instantiate
-      name: svhn
-      object:
-        _target_: datasets.load_dataset
-        _args_:
-          - svhn
-          - cropped_digits
-        split: test
-  - name: mnist
-    dataset:
-      name: mnist
-      split: test
-clip_model: openai/clip-vit-base-patch32
-batch_size: 128
-num_workers: 16
-fast_dev_run: ${fast_dev_run}

{fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/WHEEL RENAMED Viewed

File without changes

{fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/top_level.txt RENAMED Viewed

File without changes

/fusion_bench_config/modelpool/{SeqenceClassificationModelPool → SequenceClassificationModelPool}/roberta-base_glue.yaml RENAMED Viewed

File without changes

fusion-bench 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl

fusion-bench 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl