PyPI - fusion-bench - Versions diffs - 0.2.9__py3-none-any.whl - Mend

fusion-bench 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (727) hide show

fusion_bench_config/dataset/summarization/test/xsum.yaml ADDED Viewed

@@ -0,0 +1,4 @@
+xsum:
+  _target_: datasets.load_dataset
+  path: EdinburghNLP/xsum
+  split: test

fusion_bench_config/dataset/summarization/train/xsum.yaml ADDED Viewed

@@ -0,0 +1,4 @@
+xsum:
+  _target_: datasets.load_dataset
+  path: EdinburghNLP/xsum
+  split: train

fusion_bench_config/dataset/summarization/val/xsum.yaml ADDED Viewed

@@ -0,0 +1,4 @@
+xsum:
+  _target_: datasets.load_dataset
+  path: EdinburghNLP/xsum
+  split: validation

fusion_bench_config/dataset/summarization/xsum.yaml ADDED Viewed

@@ -0,0 +1,3 @@
+xsum:
+  _target_: datasets.load_dataset
+  path: EdinburghNLP/xsum

fusion_bench_config/dataset/text_generation/test/gsm-hard.yaml ADDED Viewed

@@ -0,0 +1,4 @@
+gsm-hard:
+  _target_: datasets.load_dataset
+  path: reasoning-machines/gsm-hard
+  split: train # this dataset is used to evaluate math reasoning

fusion_bench_config/dataset/text_generation/test/gsm8k.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+gsm8k:
+  _target_: datasets.load_dataset
+  path: openai/gsm8k
+  name: main
+  split: test

fusion_bench_config/dataset/text_generation/test/gsm8k_question_label.yaml ADDED Viewed

@@ -0,0 +1,3 @@
+qsm8k:
+  _target_: fusion_bench.dataset.gsm8k.load_gsm8k_question_label_dataset
+  dataset_name: test # this option can be 'train', 'test', 'train_socratic', and 'test_socratic'

fusion_bench_config/dataset/text_generation/train/CodeAlpaca-20k.yaml ADDED Viewed

@@ -0,0 +1,4 @@
+MetaMathQA:
+  _target_: datasets.load_dataset
+  path: sahil2801/CodeAlpaca-20k
+  split: train

fusion_bench_config/dataset/text_generation/train/gsm8k.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+gsm8k:
+  _target_: datasets.load_dataset
+  path: openai/gsm8k
+  name: main
+  split: train

fusion_bench_config/dataset/text_generation/train/gsm8k_question_label.yaml ADDED Viewed

@@ -0,0 +1,3 @@
+qsm8k:
+  _target_: fusion_bench.dataset.gsm8k.load_gsm8k_question_label_dataset
+  dataset_name: train # this option can be 'train', 'test', 'train_socratic', and 'test_socratic'

fusion_bench_config/fabric/auto.yaml ADDED Viewed

@@ -0,0 +1,16 @@
+defaults:
+  - loggers: tensorboard_logger
+  - _self_
+_target_: lightning.Fabric
+_recursive_: true
+# Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.
+# The value applies per node.
+devices: 1
+# Strategy for how to run across multiple devices. Possible choices are:
+# ``"dp"``, ``"ddp"``, ``"ddp_spawn"``, ``"deepspeed"``, ``"fsdp"``.
+strategy: auto
+# The hardware to run on. Possible choices are:
+# ``"cpu"``, ``"cuda"``, ``"mps"``, ``"gpu"``, ``"tpu"``, ``"auto"``.
+# for example: fabric.accelerator=cpu
+accelerator: auto

fusion_bench_config/fabric/llama_ddp.yaml ADDED Viewed

@@ -0,0 +1,18 @@
+defaults:
+  - loggers: tensorboard_logger
+  - _self_
+_target_: lightning.Fabric
+_recursive_: true
+# Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.
+# The value applies per node.
+devices: auto
+# Strategy for how to run across multiple devices. Possible choices are:
+# ``"dp"``, ``"ddp"``, ``"ddp_spawn"``, ``"deepspeed"``, ``"fsdp"``.
+strategy: ddp
+# The hardware to run on. Possible choices are:
+# ``"cpu"``, ``"cuda"``, ``"mps"``, ``"gpu"``, ``"tpu"``, ``"auto"``.
+# for example: fabric.accelerator=cpu
+accelerator: auto
+# reference to the precision policy: https://lightning.ai/docs/fabric/stable/api/fabric_args.html#precision
+precision: bf16-true

fusion_bench_config/fabric/llama_fsdp.yaml ADDED Viewed

@@ -0,0 +1,16 @@
+defaults:
+  - loggers: tensorboard_logger
+  - strategy: llama_fsdp
+  - _self_
+_target_: lightning.Fabric
+_recursive_: true
+# Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.
+# The value applies per node.
+devices: auto
+# The hardware to run on. Possible choices are:
+# ``"cpu"``, ``"cuda"``, ``"mps"``, ``"gpu"``, ``"tpu"``, ``"auto"``.
+# for example: fabric.accelerator=cpu
+accelerator: auto
+# reference to the precision policy: https://lightning.ai/docs/fabric/stable/api/fabric_args.html#precision
+precision: bf16-true

fusion_bench_config/fabric/llama_peft_fsdp.yaml ADDED Viewed

@@ -0,0 +1,16 @@
+defaults:
+  - loggers: tensorboard_logger
+  - strategy: llama_peft_fsdp
+  - _self_
+_target_: lightning.Fabric
+_recursive_: true
+# Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.
+# The value applies per node.
+devices: auto
+# The hardware to run on. Possible choices are:
+# ``"cpu"``, ``"cuda"``, ``"mps"``, ``"gpu"``, ``"tpu"``, ``"auto"``.
+# for example: fabric.accelerator=cpu
+accelerator: auto
+# reference to the precision policy: https://lightning.ai/docs/fabric/stable/api/fabric_args.html#precision
+precision: bf16-true

fusion_bench_config/fabric/loggers/csv_logger.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: lightning.fabric.loggers.CSVLogger
+# the logs directory would be `root_dir/name/version_X`
+# for example, `outputs/logs/lightning_logs/version_0` and `outputs/logs/lightning_logs/version_1` by default
+# root directory for all logging
+root_dir: outputs/logs
+# the name of the experiment
+name: lightning_logs
+version: null
+prefix: ""
+flush_logs_every_n_steps: 100

fusion_bench_config/fabric/loggers/tensorboard_logger.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: lightning.fabric.loggers.TensorBoardLogger
+# the logs directory would be `root_dir/name/version_X`
+# for example, `outputs/logs/lightning_logs/version_0` and `outputs/logs/lightning_logs/version_1` by default
+# root directory for all logging
+root_dir: outputs/logs
+# the name of the experiment
+name: "lightning_logs"
+version: null
+sub_dir: null
+default_hp_metric: false

fusion_bench_config/fabric/loggers/wandb_logger.yaml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # https://lightning.ai/docs/fabric/2.4.0/guide/loggers/wandb.html#weights-and-biases
2	+ _target_: wandb.integration.lightning.fabric.WandbLogger

fusion_bench_config/fabric/strategy/deepspeed.yaml ADDED Viewed

@@ -0,0 +1,10 @@
+# https://lightning.ai/docs/fabric/2.4.0/api/generated/lightning.fabric.strategies.DeepSpeedStrategy.html#deepspeedstrategy
+_target_: lightning.fabric.strategies.DeepSpeedStrategy
+accelerator: null
+zero_optimization: true
+stage: 2
+offload_optimizer: false
+offload_parameters: false
+offload_params_device: "cpu"
+offload_optimizer_device: "cpu"

fusion_bench_config/fabric/strategy/llama_fsdp.yaml ADDED Viewed

@@ -0,0 +1,8 @@
+_target_: lightning.fabric.strategies.FSDPStrategy
+sharding_strategy: FULL_SHARD
+cpu_offload: false
+auto_wrap_policy:
+  _target_: fusion_bench.mixins.lightning_fabric.get_policy
+  _args_:
+    - transformers.models.llama.modeling_llama.LlamaDecoderLayer
+activation_checkpointing_policy: ${.auto_wrap_policy}

fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml ADDED Viewed

@@ -0,0 +1,9 @@
+_target_: lightning.fabric.strategies.FSDPStrategy
+sharding_strategy: FULL_SHARD
+state_dict_type: full # Save a single, consolidated checkpoint file
+cpu_offload: false
+auto_wrap_policy:
+  _target_: fusion_bench.mixins.lightning_fabric.get_size_based_auto_wrap_policy
+activation_checkpointing_policy: ${.auto_wrap_policy}
+# limit_all_gathers: true

fusion_bench_config/fabric_model_fusion.yaml ADDED Viewed

@@ -0,0 +1,20 @@
+defaults:
+  - hydra: default
+  - fabric: auto
+  # --- Model, Method, Task ---
+  - modelpool: CLIPVisionModelPool/clip-vit-base-patch32_TA8
+  - method: dummy
+  - taskpool: dummy
+  - _self_
+_target_: fusion_bench.programs.FabricModelFusionProgram
+_recursive_: false
+fast_dev_run: false # Run a single batch of data to test the model or method
+# Run the script without actually running the experiment, use with `print_config=true`.
+# You can also use `--cfg` or `-c` to show the configuration instead of running.
+dry_run: false
+print_config: true # Print the configuration to the console
+merged_model_save_path: null # path to save the merged model, use "{log_dir}" to refer to the logger directory, for example `merged_model_save_path=\{log_dir\}/merged_model`
+merged_model_save_kwargs: null
+report_save_path: null # path to save the result report
+print_function_call: true # set to false if you don't want to print the details of instantiate calls

fusion_bench_config/hydra/default.yaml ADDED Viewed

@@ -0,0 +1,8 @@
+defaults:
+  - override help: fusion_bench_help
+  - override job_logging: rich_logging
+run:
+  dir: outputs/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S}
+sweep:
+  dir: outputs/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S}
+  subdir: ${hydra.job.num}

fusion_bench_config/hydra/help/fusion_bench_help.yaml ADDED Viewed

@@ -0,0 +1,47 @@
+# App name, override to match the name your app is known by
+app_name: fusion_bench
+# Help header, customize to describe your app to your users
+header: == ${hydra.help.app_name} ==
+footer: |-
+  Powered by Hydra (https://hydra.cc)
+  Use --hydra-help to view Hydra specific help
+# Basic Hydra flags:
+#   $FLAGS_HELP
+#
+# Config groups, choose one of:
+#   $APP_CONFIG_GROUPS: All config groups that does not start with hydra/.
+#   $HYDRA_CONFIG_GROUPS: All the Hydra config groups (starts with hydra/)
+#
+# Configuration generated with overrides:
+#   $CONFIG : Generated config
+#
+template: |-
+  ${hydra.help.header}
+  fusion_bench is the command line interface for running model fusion benchmarks in the FusionBench project.
+  It provides a flexible way to configure and execute various fusion algorithms on different model pools and evaluate them across multiple tasks.
+  == Configuration groups ==
+  Compose your configuration from these groups (method, modelpool, taskpool are the most important):
+  $APP_CONFIG_GROUPS
+  == Config ==
+  You can override options, for example:
+  fusion_bench method=task_arithmetic modelpool=clip-vit-base-patch32_svhn_and_mnist taskpool=clip-vit-base-patch32_svhn_and_mnist
+  == Basic usage ==
+  fusion_bench [--config-path CONFIG_PATH] [--config-name CONFIG_NAME] OPTION_1=VALUE_1 OPTION_2=VALUE_2 ...
+  == Key options ==
+  --help, -h            : Print this help message and exit
+  --hydra-help          : Hydra's help
+  --cfg, -c             : Show config instead of running [job|hydra|all]
+  --config-path, -cp    : Overrides the config_path
+  --config-name, -cn    : Overrides the config_name
+  --shell-completion, -sc : Install or Uninstall shell completion
+  For more detailed information on options and usage, please refer to the online documentation:
+  https://tanganke.github.io/fusion_bench/cli/fusion_bench/
+  ${hydra.help.footer}

fusion_bench_config/hydra/job_logging/rich_logging.yaml ADDED Viewed

@@ -0,0 +1,20 @@
+version: 1
+formatters:
+  simple:
+    format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+  rich_handler:
+    format: '%(message)s'
+handlers:
+  console:
+    class: rich.logging.RichHandler
+    formatter: rich_handler
+  file:
+    class: logging.FileHandler
+    formatter: simple
+    filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+root:
+  level: INFO
+  handlers:
+    - console
+    - file
+disable_existing_loggers: false

fusion_bench_config/llama_full_finetune.yaml ADDED Viewed

@@ -0,0 +1,19 @@
+defaults:
+  - hydra: default
+  - fabric: llama_fsdp
+  # --- Model, Method, Task ---
+  - method: lm_finetune/fullfinetune_sft.yaml
+  - modelpool: CausalLMPool/llama_alpaca_cleaned.yaml
+  - taskpool: dummy
+  - _self_
+_target_: fusion_bench.programs.FabricModelFusionProgram
+_recursive_: false
+fast_dev_run: false # Run a single batch of data to test the model or method
+# Run the script without actually running the experiment, use with `print_config=true`.
+# You can also use `--cfg` or `-c` to show the configuration instead of running.
+dry_run: false
+print_config: true # Print the configuration to the console
+report_save_path: null # path to save the result report
+print_function_call: true # set to false if you don't want to print the details of instantiate calls

fusion_bench_config/llama_magnitude_pruning.yaml ADDED Viewed

@@ -0,0 +1,16 @@
+defaults:
+  - fabric_model_fusion
+  - override method: pruning/llama_magnitude_pruning
+  - override modelpool: CausalLMPool/single_llama_model
+  - override taskpool: dummy
+  - _self_
+merged_model_save_path: null # path to save the merged model, use "{log_dir}" to refer to the logger directory, for example `merged_model_save_path=\{log_dir\}/merged_model`
+merged_model_save_kwargs:
+  save_tokenizer: true
+  tokenizer_kwargs:
+    unk_token: "<s>" # https://github.com/huggingface/transformers/issues/24318#issuecomment-1596801322
+modelpool:
+  model_kwargs:
+    torch_dtype: float16
+    low_cpu_mem_usage: true
+    device_map: "auto"

fusion_bench_config/llama_model_fusion.yaml ADDED Viewed

@@ -0,0 +1,17 @@
+defaults:
+  - fabric_model_fusion
+  - override modelpool: CausalLMPool/single_llama_model
+  - override taskpool: dummy
+  - _self_
+merged_model_save_path: null # path to save the merged model, use "{log_dir}" to refer to the logger directory, for example `merged_model_save_path=\{log_dir\}/merged_model`
+merged_model_save_kwargs:
+  save_tokenizer: true
+  # tokenizer_kwargs:
+  #   unk_token: "<s>" # https://github.com/huggingface/transformers/issues/24318#issuecomment-1596801322
+modelpool:
+  model_kwargs:
+    torch_dtype: float16
+    low_cpu_mem_usage: true
+    # device_map: "auto"

fusion_bench_config/method/ada_svd/clip_vision.yaml ADDED Viewed

@@ -0,0 +1,9 @@
+_target_: fusion_bench.method.AdaSVDMergingForCLIPVisionModel
+scaling_factor: null
+num_samples: 256
+gate_k: 16
+average_experts: false
+device: cuda
+upscaling_accelerator: null
+seed: 0

fusion_bench_config/method/adamerging/clip.yaml ADDED Viewed

@@ -0,0 +1,23 @@
+# this option can be "clip_task_wise_adamerging"
+name: ???
+# this weights can be a list of float, or a string that points to a *.np, *.pt file containing the weights
+# if weights is specified, skip the test-time adaptation training
+weights: null
+# learning rate
+optimizer: adam
+lr: 1e-3
+init_values: 0.3
+# if `clamp_weights` is true, the weights will be clamped to [0, 1]
+clamp_weights: false
+# arguments of `functional_call`
+tie_weights: true
+strict: false
+# this is overrided by `fabric.devices` if launched from the `fusion_bench` CLI.
+devices: 1
+batch_size: 16
+num_workers: 8
+max_steps: 1000
+fast_dev_run: ${fast_dev_run}
+# the path for saving the merging weights
+save_merging_weights: 'merging_weights.pt'
+cache_dir: outputs

fusion_bench_config/method/adamerging/layer_wise_flan_t5.yaml ADDED Viewed

@@ -0,0 +1,23 @@
+_target_: fusion_bench.method.adamerging.flan_t5_layer_wise_adamerging.FlanT5LayerWiseAdaMergingAlgorithm
+_recursive_: false
+optimizer:
+  _target_: torch.optim.Adam
+  lr: 1e-3
+dataloader_kwargs:
+  batch_size: 4
+  num_workers: 0
+init_values: 0.3
+max_steps: 1000
+# if `merging_weights_path` is specified, the merging weights will be loaded from the file and skip the training process
+merging_weights_load_path: null
+merging_weights_save_path: null
+variant: null
+clamp_weights: false
+tie_weights: false
+strict: false
+cache_dir: "outputs/cache"

fusion_bench_config/method/adamerging/layer_wise_gpt2.yaml ADDED Viewed

@@ -0,0 +1,23 @@
+_target_: fusion_bench.method.adamerging.gpt2_layer_wise_adamerging.GPT2LayerWiseAdaMergingAlgorithm
+_recursive_: false
+optimizer:
+  _target_: torch.optim.Adam
+  lr: 1e-4
+dataloader_kwargs:
+  batch_size: 16
+  num_workers: 0
+init_values: 0.3
+max_steps: 1000
+# if `merging_weights_path` is specified, the merging weights will be loaded from the file and skip the training process
+merging_weights_load_path: null
+merging_weights_save_path: null
+variant: null
+clamp_weights: false
+tie_weights: true
+strict: false
+cache_dir: "outputs/cache"

fusion_bench_config/method/adamerging/llama_sft.yaml ADDED Viewed

@@ -0,0 +1,33 @@
+_target_: fusion_bench.method.adamerging.llama_adamerging.LayerWiseAdaMergingForLlamaSFT
+seed: 0
+output_dir: null
+# path to initialize the merging weights
+# this weights can be a list of float, or a string that points to a *.np, *.pt file containing the weights
+# if weights is specified, skip the test-time adaptation training
+init_weights_path: null
+sparsity_ratio: null
+# average attention modules instead of learning merging weights
+average_attntion: true
+# start_layer_idx is a float (in [0,1]) or int or null. If it is null, start at the first layer
+start_layer_idx: 0.3
+# learning rate
+optimizer: adam
+lr: 1e-4
+init_values: 0.5
+# if `clamp_weights` is true, the weights will be clamped to [0, 1]
+clamp_weights: false
+normalized_merging_weights: true
+# arguments of `functional_call`
+tie_weights: true
+strict: false
+max_steps: 1000
+fast_dev_run: ${fast_dev_run}
+# the path for saving the merging weights
+save_interval: 100
+save_merged_model: true
+dataloader_kwargs:
+  batch_size: 24
+  num_workers: 0
+  shuffle: true

fusion_bench_config/method/adamerging.yaml ADDED Viewed

@@ -0,0 +1,23 @@
+# this option can be "clip_task_wise_adamerging"
+name: ???
+# this weights can be a list of float, or a string that points to a *.np, *.pt file containing the weights
+# if weights is specified, skip the test-time adaptation training
+weights: null
+# learning rate
+optimizer: adam
+lr: 1e-3
+init_values: 0.3
+# if `clamp_weights` is true, the weights will be clamped to [0, 1]
+clamp_weights: false
+# arguments of `functional_call`
+tie_weights: true
+strict: false
+# this is overrided by `fabric.devices` if launched from the `fusion_bench` CLI.
+devices: 1
+batch_size: 16
+num_workers: 8
+max_steps: 1000
+fast_dev_run: ${fast_dev_run}
+# the path for saving the merging weights
+save_merging_weights: 'merging_weights.pt'
+cache_dir: outputs

fusion_bench_config/method/analysis/task_vector_cos_similarity.yaml ADDED Viewed

@@ -0,0 +1,6 @@
+_target_: fusion_bench.method.TaskVectorCosSimilarity
+plot_heatmap: true
+trainable_only: true
+max_points_per_model: null
+output_path: null

fusion_bench_config/method/analysis/task_vector_violin_plot.yaml ADDED Viewed

@@ -0,0 +1,6 @@
+_target_: fusion_bench.method.TaskVectorViolinPlot
+trainable_only: true
+max_points_per_model: 1000
+fig_kwargs: null
+output_path: null

fusion_bench_config/method/classification/clip_continual_finetune.yaml ADDED Viewed

@@ -0,0 +1,28 @@
+_target_: fusion_bench.method.ContinualImageClassificationFineTuningForCLIP
+seed: 42
+# shuffle the order of the tasks
+shuffle_order: true
+learning_rate: 1e-5
+weight_decay: 0
+# number of training steps on each task
+num_steps: 4000
+batch_size: 128
+num_workers: 16
+save_interval: 500
+# if `state_dict_load_path` is not null, the training will be resumed from the state_dict_path
+state_dict_load_path: null
+# if `state_dict_save_path` is not null, the state_dict will be saved to the path after training
+state_dict_save_path: null
+# if `skip_training` is true, use with `state_dict_load_path` to skip training and only evaluate
+skip_training: false
+# === LoRA ===
+use_lora: false
+lora_config:
+  r: 16
+  lora_alpha: 32
+  target_modules:
+    - q_proj
+    - v_proj
+  lora_dropout: 0.1
+  bias: none

fusion_bench_config/method/classification/clip_finetune.yaml ADDED Viewed

@@ -0,0 +1,26 @@
+name: clip_finetune
+seed: 42
+learning_rate: 1e-5
+weight_decay: 0
+num_steps: 4000
+batch_size: 128
+num_workers: 16
+save_interval: 500
+# if `state_dict_load_path` is not null, the training will be resumed from the state_dict_path
+state_dict_load_path: null
+# if `state_dict_save_path` is not null, the state_dict will be saved to the path after training
+state_dict_save_path: null
+# if `skip_training` is true, use with `state_dict_load_path` to skip training and only evaluate
+skip_training: false
+# === LoRA ===
+use_lora: false
+lora_config:
+  r: 16
+  lora_alpha: 32
+  target_modules:
+    - q_proj
+    - v_proj
+  lora_dropout: 0.1
+  bias: none
+# === L-LoRA ===
+use_l_lora: false

fusion_bench_config/method/clip_finetune.yaml ADDED Viewed

@@ -0,0 +1,26 @@
+name: clip_finetune
+seed: 42
+learning_rate: 1e-5
+weight_decay: 0
+num_steps: 4000
+batch_size: 128
+num_workers: 16
+save_interval: 500
+# if `state_dict_load_path` is not null, the training will be resumed from the state_dict_path
+state_dict_load_path: null
+# if `state_dict_save_path` is not null, the state_dict will be saved to the path after training
+state_dict_save_path: null
+# if `skip_training` is true, use with `state_dict_load_path` to skip training and only evaluate
+skip_training: false
+# === LoRA ===
+use_lora: false
+lora_config:
+  r: 16
+  lora_alpha: 32
+  target_modules:
+    - q_proj
+    - v_proj
+  lora_dropout: 0.1
+  bias: none
+# === L-LoRA ===
+use_l_lora: false

fusion_bench_config/method/concrete_subspace/clip_concrete_layer_wise_adamerging.yaml ADDED Viewed

@@ -0,0 +1,27 @@
+name: clip_concrete_layer_wise_adamerging
+# batch size per gpu
+# if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
+batch_size: 16
+num_workers: 8
+merge_dtype: null
+optimizer: adam
+lr: 1e-3
+base_lr: 1
+adamerging_lr: 1e-3
+scaling_factor: 0.3
+max_steps: 1000
+max_adamerging_steps: 1000
+save_interval: 500
+initial_logits: 0
+temperature: 0.5
+# "discrete" or "continuous", this is the mask applied for evaluation, not during training
+# the performance of final model are expected to be similar
+eval_mask_type: continuous
+mask_checkpoint: null
+# if `clamp_weights` is true, the weights will be clamped to [0, 1]
+clamp_weights: false
+# arguments of `functional_call`
+tie_weights: true
+strict: false
+cache_dir: outputs

fusion_bench_config/method/concrete_subspace/clip_concrete_task_arithmetic.yaml ADDED Viewed

@@ -0,0 +1,25 @@
+name: clip_concrete_task_arithmetic
+# batch size per gpu
+# if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
+batch_size: 16
+num_workers: 8
+merge_dtype: null
+optimizer: adam
+lr: 1e-3
+scaling_factor: 0.3
+max_steps: 2000
+save_interval: 500
+initial_logits: 0
+temperature: 0.5
+# "discrete" or "continuous", this is the mask applied for evaluation, not during training
+# the performance of final model are expected to be similar
+eval_mask_type: continuous
+mask_checkpoint: null
+# if `clamp_weights` is true, the weights will be clamped to [0, 1]
+clamp_weights: false
+# arguments of `functional_call`
+tie_weights: true
+strict: false
+# directory to cache zero-shot classication heads
+cache_dir: outputs
+skip_training: false