fusion-bench 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fusion_bench/__init__.py +1 -0
- fusion_bench/_get_started/__init__.py +3 -0
- fusion_bench/_get_started/greeting_program.py +49 -0
- fusion_bench/compat/method/base_algorithm.py +14 -0
- fusion_bench/constants/__init__.py +5 -0
- fusion_bench/constants/clip_vision.py +26 -2
- fusion_bench/constants/paths.py +4 -0
- fusion_bench/dataset/clip_dataset.py +2 -1
- fusion_bench/dataset/gpt2_glue.py +9 -9
- fusion_bench/dataset/image_corruption/__init__.py +0 -0
- fusion_bench/dataset/image_corruption/make_corruption.py +179 -0
- fusion_bench/dataset/image_dataset.py +1 -1
- fusion_bench/dataset/nyuv2.py +2 -2
- fusion_bench/method/__init__.py +16 -1
- fusion_bench/method/adamerging/clip_layer_wise_adamerging.py +1 -1
- fusion_bench/method/adamerging/clip_task_wise_adamerging.py +11 -7
- fusion_bench/method/adamerging/layer_wise_adamerging.py +11 -5
- fusion_bench/method/base_algorithm.py +195 -12
- fusion_bench/method/bitdelta/__init__.py +4 -0
- fusion_bench/method/bitdelta/bitdelta.py +156 -0
- fusion_bench/method/bitdelta/bitdelta_utils/__init__.py +0 -0
- fusion_bench/method/bitdelta/bitdelta_utils/binary_gemm_kernel.py +462 -0
- fusion_bench/method/bitdelta/bitdelta_utils/data.py +35 -0
- fusion_bench/method/bitdelta/bitdelta_utils/diff.py +129 -0
- fusion_bench/method/concrete_subspace/clip_concrete_adamerging.py +0 -1
- fusion_bench/method/depth_upscaling/depth_upscaling.py +4 -9
- fusion_bench/method/doge_ta/clip_layer_wise_adamerging.py +4 -5
- fusion_bench/method/doge_ta/doge_ta.py +1 -1
- fusion_bench/method/ensemble.py +12 -12
- fusion_bench/method/expert_sparsity/utils/calibration_data.py +1 -1
- fusion_bench/method/fisher_merging/clip_fisher_merging.py +2 -2
- fusion_bench/method/fisher_merging/fisher_merging.py +6 -15
- fusion_bench/method/fisher_merging/gpt2_fisher_merging.py +3 -10
- fusion_bench/method/fw_merging/fw_hard.py +1 -1
- fusion_bench/method/fw_merging/fw_soft.py +1 -1
- fusion_bench/method/gossip/clip_layer_wise_gossip.py +4 -5
- fusion_bench/method/linear/expo.py +2 -1
- fusion_bench/method/linear/linear_interpolation.py +6 -4
- fusion_bench/method/linear/simple_average_for_llama.py +16 -6
- fusion_bench/method/lm_finetune/bradley_terry_rm.py +2 -2
- fusion_bench/method/mixture_of_experts/mixtral_upcycling.py +9 -26
- fusion_bench/method/model_recombination.py +2 -5
- fusion_bench/method/moe_pruner/hooks/__init__.py +1 -2
- fusion_bench/method/moe_pruner/utils/data.py +2 -1
- fusion_bench/method/moe_pruner/utils/prune.py +6 -1
- fusion_bench/method/pruning/llama_magnitude_prune.py +1 -1
- fusion_bench/method/pruning/wanda_utils/data.py +1 -2
- fusion_bench/method/pwe_moe/clip_pwe_moe.py +12 -34
- fusion_bench/method/randes/modelsoup.py +1 -3
- fusion_bench/method/regmean/clip_regmean.py +2 -2
- fusion_bench/method/regmean/gpt2_regmean.py +3 -10
- fusion_bench/method/regmean/regmean.py +2 -11
- fusion_bench/method/regmean_plusplus/__init__.py +3 -0
- fusion_bench/method/regmean_plusplus/clip_regmean_plusplus.py +199 -0
- fusion_bench/method/regmean_plusplus/regmean_plusplus.py +383 -0
- fusion_bench/method/simple_average.py +16 -4
- fusion_bench/method/slerp/slerp.py +5 -2
- fusion_bench/method/smile_upscaling/error_accumulation.py +177 -0
- fusion_bench/method/smile_upscaling/projected_energy.py +145 -0
- fusion_bench/method/smile_upscaling/smile_qwen2_upscaling.py +39 -28
- fusion_bench/method/smile_upscaling/smile_upscaling.py +12 -5
- fusion_bench/method/tall_mask/task_arithmetic.py +3 -11
- fusion_bench/method/task_arithmetic/task_arithmetic.py +6 -10
- fusion_bench/method/ties_merging/ties_merging.py +13 -26
- fusion_bench/method/we_moe/clip_we_moe.py +5 -4
- fusion_bench/method/we_moe/we_moe.py +6 -6
- fusion_bench/method/weighted_average/llama.py +4 -16
- fusion_bench/metrics/continual_learning/__init__.py +1 -0
- fusion_bench/metrics/continual_learning/backward_transfer.py +1 -1
- fusion_bench/metrics/nyuv2/__init__.py +2 -2
- fusion_bench/metrics/nyuv2/segmentation.py +1 -1
- fusion_bench/mixins/__init__.py +10 -2
- fusion_bench/mixins/clip_classification.py +4 -3
- fusion_bench/mixins/hydra_config.py +105 -7
- fusion_bench/mixins/lightning_fabric.py +2 -0
- fusion_bench/mixins/serialization.py +265 -48
- fusion_bench/modelpool/__init__.py +2 -2
- fusion_bench/modelpool/base_pool.py +29 -9
- fusion_bench/modelpool/causal_lm/causal_lm.py +9 -0
- fusion_bench/modelpool/clip_vision/modelpool.py +43 -12
- fusion_bench/modelpool/seq_classification_lm/__init__.py +1 -1
- fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py +1 -1
- fusion_bench/models/__init__.py +2 -1
- fusion_bench/models/expert_sparsity/mixtral/__init__.py +1 -1
- fusion_bench/models/hf_utils.py +182 -0
- fusion_bench/models/linearized/linearized_model_utils.py +4 -4
- fusion_bench/models/linearized/vision_model.py +1 -1
- fusion_bench/models/modeling_deepseek_v2/__init__.py +1 -1
- fusion_bench/models/modeling_deepseek_v2/modeling_deepseek.py +4 -4
- fusion_bench/models/modeling_deepseek_v2/tokenization_deepseek_fast.py +0 -1
- fusion_bench/models/modeling_smile_gemma2/__init__.py +9 -0
- fusion_bench/models/modeling_smile_gemma2/configuration_smile_gemma2.py +20 -0
- fusion_bench/models/modeling_smile_gemma2/modeling_smile_gemma2.py +986 -0
- fusion_bench/models/modeling_smile_gemma2/register.py +26 -0
- fusion_bench/models/modeling_smile_llama/__init__.py +0 -0
- fusion_bench/models/modeling_smile_llama/configuration_smile_llama.py +20 -0
- fusion_bench/models/modeling_smile_llama/modeling_smile_llama.py +705 -0
- fusion_bench/models/modeling_smile_llama/register.py +8 -0
- fusion_bench/models/modeling_smile_mistral/__init__.py +5 -47
- fusion_bench/models/modeling_smile_qwen2/__init__.py +1 -1
- fusion_bench/models/modeling_smile_qwen2/modeling_smile_qwen2.py +6 -7
- fusion_bench/models/modeling_smile_qwen2/register.py +1 -4
- fusion_bench/models/parameter_dict.py +1 -1
- fusion_bench/models/sparse_we_moe.py +1 -53
- fusion_bench/models/utils.py +26 -0
- fusion_bench/models/we_moe.py +1 -53
- fusion_bench/models/wrappers/ensemble.py +6 -4
- fusion_bench/models/wrappers/layer_wise_fusion.py +1 -1
- fusion_bench/models/wrappers/task_wise_fusion.py +250 -72
- fusion_bench/programs/base_program.py +81 -2
- fusion_bench/programs/fabric_fusion_program.py +24 -8
- fusion_bench/scripts/cli.py +6 -6
- fusion_bench/taskpool/base_pool.py +4 -3
- fusion_bench/taskpool/clip_vision/taskpool.py +34 -18
- fusion_bench/taskpool/dummy.py +1 -1
- fusion_bench/taskpool/lm_eval_harness/taskpool.py +1 -2
- fusion_bench/tasks/clip_classification/__init__.py +6 -4
- fusion_bench/utils/__init__.py +6 -1
- fusion_bench/utils/devices.py +14 -4
- fusion_bench/utils/instantiate_utils.py +3 -1
- fusion_bench/utils/misc.py +48 -2
- fusion_bench/utils/modelscope.py +265 -0
- fusion_bench/utils/parameters.py +2 -2
- fusion_bench/utils/rich_utils.py +3 -0
- fusion_bench/utils/state_dict_arithmetic.py +34 -27
- {fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/METADATA +31 -24
- {fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/RECORD +189 -153
- fusion_bench_config/_get_started/clip_evaluate_single_model.yaml +21 -0
- fusion_bench_config/_get_started/clip_simple_average.yaml +23 -0
- fusion_bench_config/_get_started/clip_task_arithmetic.yaml +24 -0
- fusion_bench_config/_get_started/greeting_program.yaml +4 -0
- fusion_bench_config/fabric/loggers/csv_logger.yaml +3 -3
- fusion_bench_config/fabric/loggers/tensorboard_logger.yaml +3 -3
- fusion_bench_config/fabric_model_fusion.yaml +45 -17
- fusion_bench_config/hydra/default.yaml +6 -2
- fusion_bench_config/llama_full_finetune.yaml +1 -0
- fusion_bench_config/method/adamerging/clip.yaml +1 -1
- fusion_bench_config/method/bitdelta/bitdelta.yaml +12 -0
- fusion_bench_config/method/depth_upscaling.yaml +4 -1
- fusion_bench_config/method/regmean/clip_regmean.yaml +1 -1
- fusion_bench_config/method/regmean_plusplus/clip_regmean_plusplus.yaml +11 -0
- fusion_bench_config/method/smile_upscaling/error_accumulation.yaml +5 -0
- fusion_bench_config/method/smile_upscaling/projected_energy.yaml +2 -0
- fusion_bench_config/method/smile_upscaling/smile_qwen2_upscaling.yaml +1 -0
- fusion_bench_config/modelpool/CLIPVisionModelPool/_template.yaml +1 -4
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20.yaml +73 -8
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20_model_only.yaml +27 -7
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8.yaml +34 -4
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8_control_task.yaml +14 -17
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8_model_only.yaml +14 -3
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL10.yaml +39 -5
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL12.yaml +49 -5
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14.yaml +55 -5
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14_model_only.yaml +21 -4
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL16.yaml +61 -5
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL18.yaml +67 -5
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20.yaml +73 -5
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20_model_only.yaml +26 -3
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml +4 -9
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_finetuned.yaml +7 -5
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_task_projection.yaml +6 -10
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_sun397_and_cars.yaml +6 -7
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_sun397_and_dtd.yaml +6 -7
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_sun397_cars_and_dtd.yaml +7 -8
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_svhn_and_mnist.yaml +8 -6
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_two_tasks_control_task.yaml +4 -6
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8.yaml +32 -7
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8_model_only.yaml +14 -6
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20.yaml +73 -8
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml +27 -7
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml +6 -10
- fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml +2 -2
- fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-7B-math_and_coder.yaml +9 -0
- fusion_bench_config/modelpool/CausalLMPool/mistral-7b.yaml +6 -0
- fusion_bench_config/modelpool/CausalLMPool/mixtral_moe_merging.yaml +10 -0
- fusion_bench_config/modelpool/CausalLMPool/qwen2_math_1.5B_and_R1.yaml +4 -12
- fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml +6 -16
- fusion_bench_config/modelpool/CausalLMPool/vicuna-7b-v1.5.yaml +8 -0
- fusion_bench_config/modelpool/{SeqenceClassificationModelPool → SequenceClassificationModelPool}/llama_preference700k.yaml +1 -1
- fusion_bench_config/modelpool/{SeqenceClassificationModelPool → SequenceClassificationModelPool}/single_reward_model.yaml +1 -1
- fusion_bench_config/nyuv2_config.yaml +3 -1
- fusion_bench_config/nyuv2_mtl_train.yaml +1 -0
- fusion_bench_config/path/default.yaml +28 -0
- fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-base-patch32_svhn_and_mnist.yaml +24 -0
- fusion_bench_config/method/adamerging.yaml +0 -23
- fusion_bench_config/modelpool/mixtral_moe_merging.yaml +0 -14
- fusion_bench_config/modelpool/mixtral_moe_upscaling.yaml +0 -6
- fusion_bench_config/taskpool/clip-vit-base-patch32_svhn_and_mnist.yaml +0 -22
- {fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/WHEEL +0 -0
- {fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/entry_points.txt +0 -0
- {fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/licenses/LICENSE +0 -0
- {fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/top_level.txt +0 -0
- /fusion_bench_config/modelpool/{SeqenceClassificationModelPool → SequenceClassificationModelPool}/roberta-base_glue.yaml +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
_target_: fusion_bench.programs.FabricModelFusionProgram
|
|
2
|
+
_recursive_: false
|
|
3
|
+
method:
|
|
4
|
+
_target_: fusion_bench.method.DummyAlgorithm
|
|
5
|
+
modelpool:
|
|
6
|
+
_target_: fusion_bench.modelpool.CLIPVisionModelPool
|
|
7
|
+
models:
|
|
8
|
+
_pretrained_: openai/clip-vit-base-patch32
|
|
9
|
+
taskpool:
|
|
10
|
+
_target_: fusion_bench.taskpool.CLIPVisionModelTaskPool
|
|
11
|
+
test_datasets:
|
|
12
|
+
sun397:
|
|
13
|
+
_target_: datasets.load_dataset
|
|
14
|
+
path: tanganke/sun397
|
|
15
|
+
split: test
|
|
16
|
+
stanford-cars:
|
|
17
|
+
_target_: datasets.load_dataset
|
|
18
|
+
path: tanganke/stanford_cars
|
|
19
|
+
split: test
|
|
20
|
+
clip_model: openai/clip-vit-base-patch32
|
|
21
|
+
processor: openai/clip-vit-base-patch32
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
_target_: fusion_bench.programs.FabricModelFusionProgram # (1)!
|
|
2
|
+
_recursive_: false
|
|
3
|
+
method: # (2)!
|
|
4
|
+
_target_: fusion_bench.method.SimpleAverageAlgorithm
|
|
5
|
+
modelpool: # (3)!
|
|
6
|
+
_target_: fusion_bench.modelpool.CLIPVisionModelPool
|
|
7
|
+
models:
|
|
8
|
+
_pretrained_: openai/clip-vit-base-patch32
|
|
9
|
+
sun397: tanganke/clip-vit-base-patch32_sun397
|
|
10
|
+
stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
|
|
11
|
+
taskpool: # (4)!
|
|
12
|
+
_target_: fusion_bench.taskpool.CLIPVisionModelTaskPool
|
|
13
|
+
test_datasets:
|
|
14
|
+
sun397:
|
|
15
|
+
_target_: datasets.load_dataset
|
|
16
|
+
path: tanganke/sun397
|
|
17
|
+
split: test
|
|
18
|
+
stanford-cars:
|
|
19
|
+
_target_: datasets.load_dataset
|
|
20
|
+
path: tanganke/stanford_cars
|
|
21
|
+
split: test
|
|
22
|
+
clip_model: openai/clip-vit-base-patch32
|
|
23
|
+
processor: openai/clip-vit-base-patch32
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
_target_: fusion_bench.programs.FabricModelFusionProgram
|
|
2
|
+
_recursive_: false
|
|
3
|
+
method:
|
|
4
|
+
_target_: fusion_bench.method.TaskArithmeticAlgorithm
|
|
5
|
+
scaling_factor: 0.7
|
|
6
|
+
modelpool:
|
|
7
|
+
_target_: fusion_bench.modelpool.CLIPVisionModelPool
|
|
8
|
+
models:
|
|
9
|
+
_pretrained_: openai/clip-vit-base-patch32
|
|
10
|
+
sun397: tanganke/clip-vit-base-patch32_sun397
|
|
11
|
+
stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
|
|
12
|
+
taskpool:
|
|
13
|
+
_target_: fusion_bench.taskpool.CLIPVisionModelTaskPool
|
|
14
|
+
test_datasets:
|
|
15
|
+
sun397:
|
|
16
|
+
_target_: datasets.load_dataset
|
|
17
|
+
path: tanganke/sun397
|
|
18
|
+
split: test
|
|
19
|
+
stanford-cars:
|
|
20
|
+
_target_: datasets.load_dataset
|
|
21
|
+
path: tanganke/stanford_cars
|
|
22
|
+
split: test
|
|
23
|
+
clip_model: openai/clip-vit-base-patch32
|
|
24
|
+
processor: openai/clip-vit-base-patch32
|
|
@@ -3,9 +3,9 @@ _target_: lightning.fabric.loggers.CSVLogger
|
|
|
3
3
|
# for example, `outputs/logs/lightning_logs/version_0` and `outputs/logs/lightning_logs/version_1` by default
|
|
4
4
|
|
|
5
5
|
# root directory for all logging
|
|
6
|
-
root_dir:
|
|
6
|
+
root_dir: ${path.log_dir}
|
|
7
7
|
# the name of the experiment
|
|
8
|
-
name:
|
|
9
|
-
version:
|
|
8
|
+
name: ""
|
|
9
|
+
version: ""
|
|
10
10
|
prefix: ""
|
|
11
11
|
flush_logs_every_n_steps: 100
|
|
@@ -3,9 +3,9 @@ _target_: lightning.fabric.loggers.TensorBoardLogger
|
|
|
3
3
|
# for example, `outputs/logs/lightning_logs/version_0` and `outputs/logs/lightning_logs/version_1` by default
|
|
4
4
|
|
|
5
5
|
# root directory for all logging
|
|
6
|
-
root_dir:
|
|
6
|
+
root_dir: ${path.log_dir}
|
|
7
7
|
# the name of the experiment
|
|
8
|
-
name: "
|
|
9
|
-
version:
|
|
8
|
+
name: ""
|
|
9
|
+
version: ""
|
|
10
10
|
sub_dir: null
|
|
11
11
|
default_hp_metric: false
|
|
@@ -1,19 +1,47 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Fabric Model Fusion Configuration
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# This configuration file defines the settings for running model fusion experiments
|
|
5
|
+
# using PyTorch Lightning Fabric framework within FusionBench.
|
|
6
|
+
#
|
|
7
|
+
# The configuration includes:
|
|
8
|
+
#
|
|
9
|
+
# - Hydra framework settings and overrides
|
|
10
|
+
# - PyTorch Lightning Fabric configuration for distributed training
|
|
11
|
+
# - Path management for data, outputs, and logs
|
|
12
|
+
# - (core components) Model pool, fusion method, and task pool specifications
|
|
13
|
+
# - Experiment execution parameters and debugging options
|
|
14
|
+
#
|
|
15
|
+
# =============================================================================
|
|
16
|
+
# Hydra Configuration Defaults
|
|
17
|
+
# =============================================================================
|
|
1
18
|
defaults:
|
|
2
|
-
- hydra: default
|
|
3
|
-
- fabric: auto
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
-
|
|
7
|
-
-
|
|
8
|
-
-
|
|
19
|
+
- hydra: default # Hydra framework configuration
|
|
20
|
+
- fabric: auto # PyTorch Lightning Fabric auto-configuration
|
|
21
|
+
- path: default # Path management configuration
|
|
22
|
+
# --- Core Components ---
|
|
23
|
+
- modelpool: CLIPVisionModelPool/clip-vit-base-patch32_TA8 # Model pool specification
|
|
24
|
+
- method: dummy # Fusion method (placeholder)
|
|
25
|
+
- taskpool: dummy # Task pool specification (placeholder)
|
|
26
|
+
- _self_ # Self-reference for override priority
|
|
27
|
+
# =============================================================================
|
|
28
|
+
# Program Configuration
|
|
29
|
+
# =============================================================================
|
|
9
30
|
_target_: fusion_bench.programs.FabricModelFusionProgram
|
|
10
|
-
_recursive_: false
|
|
11
|
-
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
31
|
+
_recursive_: false # Disable recursive instantiation
|
|
32
|
+
# =============================================================================
|
|
33
|
+
# Experiment Execution Settings
|
|
34
|
+
# =============================================================================
|
|
35
|
+
# Development and debugging options
|
|
36
|
+
fast_dev_run: false # This option is for quick testing. For example, run single batch instead of full dataset
|
|
37
|
+
dry_run: false # Show configuration without running experiment
|
|
38
|
+
print_config: true # Display full configuration before execution
|
|
39
|
+
print_function_call: true # Show detailed instantiation calls
|
|
40
|
+
# =============================================================================
|
|
41
|
+
# Output and Logging Configuration
|
|
42
|
+
# =============================================================================
|
|
43
|
+
# Model saving configuration
|
|
44
|
+
merged_model_save_path: null # Path to save merged model.
|
|
45
|
+
merged_model_save_kwargs: null # Additional kwargs for model saving.
|
|
46
|
+
# Report generation
|
|
47
|
+
report_save_path: "{log_dir}/program_report.json" # Experiment results report path
|
|
@@ -2,7 +2,11 @@ defaults:
|
|
|
2
2
|
- override help: fusion_bench_help
|
|
3
3
|
- override job_logging: rich_logging
|
|
4
4
|
run:
|
|
5
|
-
dir:
|
|
5
|
+
dir: ${path.log_dir}
|
|
6
6
|
sweep:
|
|
7
|
-
dir:
|
|
7
|
+
dir: ${path.log_dir}
|
|
8
8
|
subdir: ${hydra.job.num}
|
|
9
|
+
job:
|
|
10
|
+
env_set:
|
|
11
|
+
HYDRA_FULL_ERROR: ${oc.env:HYDRA_FULL_ERROR,1}
|
|
12
|
+
output_subdir: ""
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# this option can be "clip_task_wise_adamerging"
|
|
2
|
-
name:
|
|
2
|
+
name: clip_layer_wise_adamerging
|
|
3
3
|
# this weights can be a list of float, or a string that points to a *.np, *.pt file containing the weights
|
|
4
4
|
# if weights is specified, skip the test-time adaptation training
|
|
5
5
|
weights: null
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
_target_: DepthUpscalingAlgorithm
|
|
2
|
-
# this should be a list of integers or string, indicating the sequence of layers.
|
|
2
|
+
# this should be a list of integers or string, indicating the sequence of layers.
|
|
3
|
+
# If the entry is an integer, it will use the n-th layer of the model.
|
|
4
|
+
# If the entry is a string, it will use the layers specified by the string.
|
|
5
|
+
# The string should be a valid python expression that evaluates to a list of integers.
|
|
3
6
|
# for example, ["range(0,12)", "range(6,12)"] will use the first 12 layers and the last 6 layers of the model to construct the new model
|
|
4
7
|
# [0, 2, 4, "range(6,12)"] will use the 1st, 3rd, 5th, and the 7th to 12th layers of the model to construct the new model
|
|
5
8
|
layer_indices: null
|
|
@@ -5,7 +5,7 @@ exclude_param_names_regex: []
|
|
|
5
5
|
num_regmean_examples: 256
|
|
6
6
|
weight_transpose: true
|
|
7
7
|
# float, reduce non-diagonal elements in regmean weights by multiplying this scalar
|
|
8
|
-
reduce_non_diagonal_ratio: 0.
|
|
8
|
+
reduce_non_diagonal_ratio: 0.95
|
|
9
9
|
dataloader_kwargs:
|
|
10
10
|
batch_size: 32
|
|
11
11
|
num_workers: 0
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
_target_: fusion_bench.method.RegMeanAlgorithmForCLIPPlusPlus
|
|
2
|
+
# list, regular expression of names of parameters that need to be excluded
|
|
3
|
+
exclude_param_names_regex: []
|
|
4
|
+
# numbers of examples to compute regmean weights
|
|
5
|
+
num_regmean_examples: 256
|
|
6
|
+
weight_transpose: true
|
|
7
|
+
# float, reduce non-diagonal elements in regmean weights by multiplying this scalar
|
|
8
|
+
reduce_non_diagonal_ratio: 0.95
|
|
9
|
+
dataloader_kwargs:
|
|
10
|
+
batch_size: 32
|
|
11
|
+
num_workers: 0
|
|
@@ -2,11 +2,8 @@ _usage_: |
|
|
|
2
2
|
defaults:
|
|
3
3
|
- CLIPVisionModelPool@: _template
|
|
4
4
|
_target_: fusion_bench.modelpool.CLIPVisionModelPool
|
|
5
|
-
_version_: "0.2"
|
|
6
5
|
_recursive_: False
|
|
7
6
|
models: ???
|
|
8
7
|
train_datasets: null
|
|
9
8
|
test_datasets: null
|
|
10
|
-
processor:
|
|
11
|
-
_target_: transformers.CLIPProcessor.from_pretrained
|
|
12
|
-
pretrained_model_name_or_path: openai/clip-vit-base-patch32
|
|
9
|
+
processor: openai/clip-vit-base-patch32
|
|
@@ -1,11 +1,76 @@
|
|
|
1
|
-
# The 20 task used in the paper:
|
|
1
|
+
# The 20 task used in the paper:
|
|
2
2
|
# Wang et al. Localizing Task Information for Improved Model Merging and Compression
|
|
3
3
|
# http://arxiv.org/abs/2405.07813
|
|
4
4
|
defaults:
|
|
5
|
-
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
5
|
+
- /dataset/image_classification/train@train_datasets:
|
|
6
|
+
# -- begin of eight tasks in the task arithmetic paper ---
|
|
7
|
+
- sun397
|
|
8
|
+
- stanford-cars
|
|
9
|
+
- resisc45
|
|
10
|
+
- eurosat
|
|
11
|
+
- svhn
|
|
12
|
+
- gtsrb
|
|
13
|
+
- mnist
|
|
14
|
+
- dtd
|
|
15
|
+
# -- end of eight tasks in the task arithmetic paper ---
|
|
16
|
+
- oxford_flowers102
|
|
17
|
+
- pcam
|
|
18
|
+
- fer2013
|
|
19
|
+
- oxford-iiit-pet
|
|
20
|
+
- stl10
|
|
21
|
+
- cifar100
|
|
22
|
+
- cifar10
|
|
23
|
+
- food101
|
|
24
|
+
- fashion_mnist
|
|
25
|
+
- emnist_letters
|
|
26
|
+
- kmnist
|
|
27
|
+
- rendered-sst2
|
|
28
|
+
- /dataset/image_classification/test@test_datasets:
|
|
29
|
+
# -- begin of eight tasks in the task arithmetic paper ---
|
|
30
|
+
- sun397
|
|
31
|
+
- stanford-cars
|
|
32
|
+
- resisc45
|
|
33
|
+
- eurosat
|
|
34
|
+
- svhn
|
|
35
|
+
- gtsrb
|
|
36
|
+
- mnist
|
|
37
|
+
- dtd
|
|
38
|
+
# -- end of eight tasks in the task arithmetic paper ---
|
|
39
|
+
- oxford_flowers102
|
|
40
|
+
- pcam
|
|
41
|
+
- fer2013
|
|
42
|
+
- oxford-iiit-pet
|
|
43
|
+
- stl10
|
|
44
|
+
- cifar100
|
|
45
|
+
- cifar10
|
|
46
|
+
- food101
|
|
47
|
+
- fashion_mnist
|
|
48
|
+
- emnist_letters
|
|
49
|
+
- kmnist
|
|
50
|
+
- rendered-sst2
|
|
51
|
+
_target_: fusion_bench.modelpool.CLIPVisionModelPool
|
|
52
|
+
_recursive_: False
|
|
53
|
+
processor: openai/clip-vit-base-patch16
|
|
54
|
+
models:
|
|
55
|
+
_pretrained_: openai/clip-vit-base-patch16
|
|
56
|
+
sun397: tanganke/clip-vit-base-patch16_sun397
|
|
57
|
+
stanford-cars: tanganke/clip-vit-base-patch16_stanford-cars
|
|
58
|
+
resisc45: tanganke/clip-vit-base-patch16_resisc45
|
|
59
|
+
eurosat: tanganke/clip-vit-base-patch16_eurosat
|
|
60
|
+
svhn: tanganke/clip-vit-base-patch16_svhn
|
|
61
|
+
gtsrb: tanganke/clip-vit-base-patch16_gtsrb
|
|
62
|
+
mnist: tanganke/clip-vit-base-patch16_mnist
|
|
63
|
+
dtd: tanganke/clip-vit-base-patch16_dtd
|
|
64
|
+
oxford_flowers102: tanganke/clip-vit-base-patch16_oxford_flowers102
|
|
65
|
+
pcam: tanganke/clip-vit-base-patch16_pcam
|
|
66
|
+
fer2013: tanganke/clip-vit-base-patch16_fer2013
|
|
67
|
+
oxford-iiit-pet: tanganke/clip-vit-base-patch16_oxford-iiit-pet
|
|
68
|
+
stl10: tanganke/clip-vit-base-patch16_stl10
|
|
69
|
+
cifar100: tanganke/clip-vit-base-patch16_cifar100
|
|
70
|
+
cifar10: tanganke/clip-vit-base-patch16_cifar10
|
|
71
|
+
food101: tanganke/clip-vit-base-patch16_food101
|
|
72
|
+
fashion_mnist: tanganke/clip-vit-base-patch16_fashion_mnist
|
|
73
|
+
emnist_letters: tanganke/clip-vit-base-patch16_emnist_letters
|
|
74
|
+
kmnist: tanganke/clip-vit-base-patch16_kmnist
|
|
75
|
+
rendered-sst2: tanganke/clip-vit-base-patch16_rendered-sst2
|
|
76
|
+
platform: hf
|
fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20_model_only.yaml
CHANGED
|
@@ -1,9 +1,29 @@
|
|
|
1
|
-
# The 20 task used in the paper:
|
|
1
|
+
# The 20 task used in the paper:
|
|
2
2
|
# Wang et al. Localizing Task Information for Improved Model Merging and Compression
|
|
3
3
|
# http://arxiv.org/abs/2405.07813
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
4
|
+
_target_: fusion_bench.modelpool.CLIPVisionModelPool
|
|
5
|
+
_recursive_: False
|
|
6
|
+
processor: openai/clip-vit-base-patch16
|
|
7
|
+
models:
|
|
8
|
+
_pretrained_: openai/clip-vit-base-patch16
|
|
9
|
+
sun397: tanganke/clip-vit-base-patch16_sun397
|
|
10
|
+
stanford-cars: tanganke/clip-vit-base-patch16_stanford-cars
|
|
11
|
+
resisc45: tanganke/clip-vit-base-patch16_resisc45
|
|
12
|
+
eurosat: tanganke/clip-vit-base-patch16_eurosat
|
|
13
|
+
svhn: tanganke/clip-vit-base-patch16_svhn
|
|
14
|
+
gtsrb: tanganke/clip-vit-base-patch16_gtsrb
|
|
15
|
+
mnist: tanganke/clip-vit-base-patch16_mnist
|
|
16
|
+
dtd: tanganke/clip-vit-base-patch16_dtd
|
|
17
|
+
oxford_flowers102: tanganke/clip-vit-base-patch16_oxford_flowers102
|
|
18
|
+
pcam: tanganke/clip-vit-base-patch16_pcam
|
|
19
|
+
fer2013: tanganke/clip-vit-base-patch16_fer2013
|
|
20
|
+
oxford-iiit-pet: tanganke/clip-vit-base-patch16_oxford-iiit-pet
|
|
21
|
+
stl10: tanganke/clip-vit-base-patch16_stl10
|
|
22
|
+
cifar100: tanganke/clip-vit-base-patch16_cifar100
|
|
23
|
+
cifar10: tanganke/clip-vit-base-patch16_cifar10
|
|
24
|
+
food101: tanganke/clip-vit-base-patch16_food101
|
|
25
|
+
fashion_mnist: tanganke/clip-vit-base-patch16_fashion_mnist
|
|
26
|
+
emnist_letters: tanganke/clip-vit-base-patch16_emnist_letters
|
|
27
|
+
kmnist: tanganke/clip-vit-base-patch16_kmnist
|
|
28
|
+
rendered-sst2: tanganke/clip-vit-base-patch16_rendered-sst2
|
|
29
|
+
platform: hf
|
|
@@ -1,5 +1,35 @@
|
|
|
1
|
+
# eight image classification tasks defined in task arithmetic paper
|
|
1
2
|
defaults:
|
|
2
|
-
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
- /dataset/image_classification/train@train_datasets:
|
|
4
|
+
- sun397
|
|
5
|
+
- stanford-cars
|
|
6
|
+
- resisc45
|
|
7
|
+
- eurosat
|
|
8
|
+
- svhn
|
|
9
|
+
- gtsrb
|
|
10
|
+
- mnist
|
|
11
|
+
- dtd
|
|
12
|
+
- /dataset/image_classification/test@test_datasets:
|
|
13
|
+
- sun397
|
|
14
|
+
- stanford-cars
|
|
15
|
+
- resisc45
|
|
16
|
+
- eurosat
|
|
17
|
+
- svhn
|
|
18
|
+
- gtsrb
|
|
19
|
+
- mnist
|
|
20
|
+
- dtd
|
|
21
|
+
- _self_
|
|
22
|
+
_target_: fusion_bench.modelpool.CLIPVisionModelPool
|
|
23
|
+
_recursive_: False
|
|
24
|
+
processor: openai/clip-vit-base-patch32
|
|
25
|
+
models:
|
|
26
|
+
_pretrained_: openai/clip-vit-base-patch32
|
|
27
|
+
sun397: tanganke/clip-vit-base-patch32_sun397
|
|
28
|
+
stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
|
|
29
|
+
resisc45: tanganke/clip-vit-base-patch32_resisc45
|
|
30
|
+
eurosat: tanganke/clip-vit-base-patch32_eurosat
|
|
31
|
+
svhn: tanganke/clip-vit-base-patch32_svhn
|
|
32
|
+
gtsrb: tanganke/clip-vit-base-patch32_gtsrb
|
|
33
|
+
mnist: tanganke/clip-vit-base-patch32_mnist
|
|
34
|
+
dtd: tanganke/clip-vit-base-patch32_dtd
|
|
35
|
+
platform: hf
|
fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8_control_task.yaml
CHANGED
|
@@ -1,21 +1,18 @@
|
|
|
1
1
|
defaults:
|
|
2
|
-
- _self_
|
|
3
|
-
- /model/clip-vit@models:
|
|
4
|
-
- clip-vit-base-patch32
|
|
5
|
-
- clip-vit-base-patch32_sun397
|
|
6
|
-
- clip-vit-base-patch32_stanford-cars
|
|
7
|
-
- clip-vit-base-patch32_resisc45
|
|
8
|
-
- clip-vit-base-patch32_eurosat
|
|
9
|
-
- clip-vit-base-patch32_svhn
|
|
10
|
-
- clip-vit-base-patch32_gtsrb
|
|
11
|
-
- clip-vit-base-patch32_mnist
|
|
12
|
-
- clip-vit-base-patch32_dtd
|
|
13
2
|
- /dataset/image_classification/train@train_datasets:
|
|
14
3
|
- tiny-imagenet
|
|
4
|
+
- _self_
|
|
15
5
|
_target_: fusion_bench.modelpool.CLIPVisionModelPool
|
|
16
|
-
_recursive_:
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
6
|
+
_recursive_: False
|
|
7
|
+
processor: openai/clip-vit-base-patch32
|
|
8
|
+
models:
|
|
9
|
+
_pretrained_: openai/clip-vit-base-patch32
|
|
10
|
+
sun397: tanganke/clip-vit-base-patch32_sun397
|
|
11
|
+
stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
|
|
12
|
+
resisc45: tanganke/clip-vit-base-patch32_resisc45
|
|
13
|
+
eurosat: tanganke/clip-vit-base-patch32_eurosat
|
|
14
|
+
svhn: tanganke/clip-vit-base-patch32_svhn
|
|
15
|
+
gtsrb: tanganke/clip-vit-base-patch32_gtsrb
|
|
16
|
+
mnist: tanganke/clip-vit-base-patch32_mnist
|
|
17
|
+
dtd: tanganke/clip-vit-base-patch32_dtd
|
|
18
|
+
platform: hf
|
|
@@ -1,3 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
_target_: fusion_bench.modelpool.CLIPVisionModelPool
|
|
2
|
+
_recursive_: False
|
|
3
|
+
processor: openai/clip-vit-base-patch32
|
|
4
|
+
models:
|
|
5
|
+
_pretrained_: openai/clip-vit-base-patch32
|
|
6
|
+
sun397: tanganke/clip-vit-base-patch32_sun397
|
|
7
|
+
stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
|
|
8
|
+
resisc45: tanganke/clip-vit-base-patch32_resisc45
|
|
9
|
+
eurosat: tanganke/clip-vit-base-patch32_eurosat
|
|
10
|
+
svhn: tanganke/clip-vit-base-patch32_svhn
|
|
11
|
+
gtsrb: tanganke/clip-vit-base-patch32_gtsrb
|
|
12
|
+
mnist: tanganke/clip-vit-base-patch32_mnist
|
|
13
|
+
dtd: tanganke/clip-vit-base-patch32_dtd
|
|
14
|
+
platform: hf
|
|
@@ -1,8 +1,42 @@
|
|
|
1
|
-
# The
|
|
1
|
+
# The 10 task used in the paper (TALL mask):
|
|
2
2
|
# Wang et al. Localizing Task Information for Improved Model Merging and Compression
|
|
3
3
|
# http://arxiv.org/abs/2405.07813
|
|
4
4
|
defaults:
|
|
5
|
-
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
- /dataset/image_classification/train@train_datasets:
|
|
6
|
+
- sun397
|
|
7
|
+
- stanford-cars
|
|
8
|
+
- resisc45
|
|
9
|
+
- eurosat
|
|
10
|
+
- svhn
|
|
11
|
+
- gtsrb
|
|
12
|
+
- mnist
|
|
13
|
+
- dtd
|
|
14
|
+
- oxford_flowers102
|
|
15
|
+
- pcam
|
|
16
|
+
- /dataset/image_classification/test@test_datasets:
|
|
17
|
+
- sun397
|
|
18
|
+
- stanford-cars
|
|
19
|
+
- resisc45
|
|
20
|
+
- eurosat
|
|
21
|
+
- svhn
|
|
22
|
+
- gtsrb
|
|
23
|
+
- mnist
|
|
24
|
+
- dtd
|
|
25
|
+
- oxford_flowers102
|
|
26
|
+
- pcam
|
|
27
|
+
_target_: fusion_bench.modelpool.CLIPVisionModelPool
|
|
28
|
+
_recursive_: False
|
|
29
|
+
processor: openai/clip-vit-base-patch32
|
|
30
|
+
models:
|
|
31
|
+
_pretrained_: openai/clip-vit-base-patch32
|
|
32
|
+
sun397: tanganke/clip-vit-base-patch32_sun397
|
|
33
|
+
stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
|
|
34
|
+
resisc45: tanganke/clip-vit-base-patch32_resisc45
|
|
35
|
+
eurosat: tanganke/clip-vit-base-patch32_eurosat
|
|
36
|
+
svhn: tanganke/clip-vit-base-patch32_svhn
|
|
37
|
+
gtsrb: tanganke/clip-vit-base-patch32_gtsrb
|
|
38
|
+
mnist: tanganke/clip-vit-base-patch32_mnist
|
|
39
|
+
dtd: tanganke/clip-vit-base-patch32_dtd
|
|
40
|
+
oxford_flowers102: tanganke/clip-vit-base-patch32_oxford_flowers102
|
|
41
|
+
pcam: tanganke/clip-vit-base-patch32_pcam
|
|
42
|
+
platform: hf
|
|
@@ -1,8 +1,52 @@
|
|
|
1
|
-
# The
|
|
1
|
+
# The 12 task used in the paper (TALL mask):
|
|
2
2
|
# Wang et al. Localizing Task Information for Improved Model Merging and Compression
|
|
3
3
|
# http://arxiv.org/abs/2405.07813
|
|
4
4
|
defaults:
|
|
5
|
-
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
- /dataset/image_classification/train@train_datasets:
|
|
6
|
+
# -- begin of eight tasks in the task arithmetic paper ---
|
|
7
|
+
- sun397
|
|
8
|
+
- stanford-cars
|
|
9
|
+
- resisc45
|
|
10
|
+
- eurosat
|
|
11
|
+
- svhn
|
|
12
|
+
- gtsrb
|
|
13
|
+
- mnist
|
|
14
|
+
- dtd
|
|
15
|
+
# -- end of eight tasks in the task arithmetic paper ---
|
|
16
|
+
- oxford_flowers102
|
|
17
|
+
- pcam
|
|
18
|
+
- fer2013
|
|
19
|
+
- oxford-iiit-pet
|
|
20
|
+
- /dataset/image_classification/test@test_datasets:
|
|
21
|
+
# -- begin of eight tasks in the task arithmetic paper ---
|
|
22
|
+
- sun397
|
|
23
|
+
- stanford-cars
|
|
24
|
+
- resisc45
|
|
25
|
+
- eurosat
|
|
26
|
+
- svhn
|
|
27
|
+
- gtsrb
|
|
28
|
+
- mnist
|
|
29
|
+
- dtd
|
|
30
|
+
# -- end of eight tasks in the task arithmetic paper ---
|
|
31
|
+
- oxford_flowers102
|
|
32
|
+
- pcam
|
|
33
|
+
- fer2013
|
|
34
|
+
- oxford-iiit-pet
|
|
35
|
+
_target_: fusion_bench.modelpool.CLIPVisionModelPool
|
|
36
|
+
_recursive_: False
|
|
37
|
+
processor: openai/clip-vit-base-patch32
|
|
38
|
+
models:
|
|
39
|
+
_pretrained_: openai/clip-vit-base-patch32
|
|
40
|
+
sun397: tanganke/clip-vit-base-patch32_sun397
|
|
41
|
+
stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
|
|
42
|
+
resisc45: tanganke/clip-vit-base-patch32_resisc45
|
|
43
|
+
eurosat: tanganke/clip-vit-base-patch32_eurosat
|
|
44
|
+
svhn: tanganke/clip-vit-base-patch32_svhn
|
|
45
|
+
gtsrb: tanganke/clip-vit-base-patch32_gtsrb
|
|
46
|
+
mnist: tanganke/clip-vit-base-patch32_mnist
|
|
47
|
+
dtd: tanganke/clip-vit-base-patch32_dtd
|
|
48
|
+
oxford_flowers102: tanganke/clip-vit-base-patch32_oxford_flowers102
|
|
49
|
+
pcam: tanganke/clip-vit-base-patch32_pcam
|
|
50
|
+
fer2013: tanganke/clip-vit-base-patch32_fer2013
|
|
51
|
+
oxford-iiit-pet: tanganke/clip-vit-base-patch32_oxford-iiit-pet
|
|
52
|
+
platform: hf
|