fusion-bench 0.2.30__py3-none-any.whl → 0.2.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fusion_bench/__init__.py +6 -0
- fusion_bench/__main__.py +2 -2
- fusion_bench/constants/runtime.py +4 -1
- fusion_bench/dataset/__init__.py +2 -0
- fusion_bench/dataset/clip_dataset.py +4 -72
- fusion_bench/dataset/image_dataset.py +44 -18
- fusion_bench/method/base_algorithm.py +4 -0
- fusion_bench/method/classification/image_classification_finetune.py +1 -0
- fusion_bench/method/concrete_subspace/clip_concrete_tsvm.py +285 -0
- fusion_bench/method/dop/dop.py +0 -22
- fusion_bench/method/dop/dop_general.py +489 -0
- fusion_bench/method/dop/utils.py +24 -4
- fusion_bench/method/emr_merging/__init__.py +1 -0
- fusion_bench/method/emr_merging/emr_merging.py +53 -0
- fusion_bench/method/emr_merging/utils.py +162 -0
- fusion_bench/method/opcm/opcm.py +6 -2
- fusion_bench/method/opcm/opcm_general.py +356 -0
- fusion_bench/method/opcm/utils.py +1 -4
- fusion_bench/method/simple_average.py +52 -18
- fusion_bench/method/task_arithmetic/task_arithmetic.py +1 -1
- fusion_bench/method/task_singular_vector/TSVM.py +7 -6
- fusion_bench/method/task_singular_vector/utils/TSVM_utils.py +0 -1
- fusion_bench/mixins/lightning_fabric.py +110 -11
- fusion_bench/mixins/openclip_classification.py +155 -1
- fusion_bench/mixins/serialization.py +1 -1
- fusion_bench/modelpool/base_pool.py +37 -0
- fusion_bench/modelpool/convnext_for_image_classification.py +5 -2
- fusion_bench/modelpool/openclip_vision/modelpool.py +12 -3
- fusion_bench/models/hf_clip.py +20 -0
- fusion_bench/models/modulator/__init__.py +1 -0
- fusion_bench/models/modulator/base.py +123 -0
- fusion_bench/models/open_clip/modeling.py +61 -5
- fusion_bench/models/open_clip/utils.py +13 -2
- fusion_bench/models/parameter_dict.py +119 -29
- fusion_bench/models/utils.py +190 -2
- fusion_bench/models/wrappers/switch.py +90 -0
- fusion_bench/programs/base_program.py +6 -0
- fusion_bench/programs/fabric_fusion_program.py +4 -0
- fusion_bench/py.typed +1 -0
- fusion_bench/scripts/cli.py +25 -23
- fusion_bench/scripts/imgui.py +2 -2
- fusion_bench/scripts/webui.py +2 -2
- fusion_bench/taskpool/image_classification.py +270 -0
- fusion_bench/utils/__init__.py +20 -1
- fusion_bench/utils/data.py +1 -1
- fusion_bench/utils/dict.py +19 -0
- fusion_bench/utils/dtype.py +19 -0
- fusion_bench/utils/hydra_utils.py +75 -0
- fusion_bench/utils/misc.py +1 -0
- fusion_bench/utils/packages.py +4 -0
- fusion_bench/utils/parameters.py +33 -0
- fusion_bench/utils/rich_utils.py +42 -19
- fusion_bench/utils/state_dict_arithmetic.py +183 -1
- fusion_bench/utils/tensorboard.py +21 -3
- {fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/METADATA +3 -1
- {fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/RECORD +70 -53
- {fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/WHEEL +1 -1
- {fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/entry_points.txt +1 -1
- fusion_bench_config/README.md +9 -0
- fusion_bench_config/fabric/auto.yaml +1 -0
- fusion_bench_config/fabric/loggers/mlflow_logger.yaml +4 -0
- fusion_bench_config/hydra/default.yaml +3 -1
- fusion_bench_config/method/concrete_subspace/clip_concrete_tsvm.yaml +38 -0
- fusion_bench_config/method/dop/dop_general.yaml +33 -0
- fusion_bench_config/method/emr_merging/emr_merging.yaml +1 -0
- fusion_bench_config/method/opcm/opcm_general.yaml +18 -0
- fusion_bench_config/modelpool/ConvNextForImageClassification/convnext-base-224_8-tasks.yaml +15 -0
- fusion_bench_config/taskpool/ImageClassificationTaskPool/convnext-base-224_8-tasks.yaml +17 -0
- {fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/licenses/LICENSE +0 -0
- {fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/top_level.txt +0 -0
|
@@ -4,7 +4,9 @@ defaults:
|
|
|
4
4
|
run:
|
|
5
5
|
dir: ${path.log_dir}
|
|
6
6
|
sweep:
|
|
7
|
-
|
|
7
|
+
# the directory where all multirun outputs are stored
|
|
8
|
+
# can not refer to ${path.log_dir} because this is evaluated before constructing the separate run configs
|
|
9
|
+
dir: ${oc.env:FUSION_BENCH_PROJECT_ROOT,"."}/outputs/multirun/${hydra.job.config_name}/${now:%Y-%m-%d_%H-%M-%S}
|
|
8
10
|
subdir: ${hydra.job.num}
|
|
9
11
|
job:
|
|
10
12
|
env_set:
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
_target_: fusion_bench.method.concrete_subspace.clip_concrete_tsvm.ConcreteTSVMForOpenCLIP
|
|
2
|
+
# === Concrete Subspace parameters ===
|
|
3
|
+
# batch size per gpu
|
|
4
|
+
# if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
|
|
5
|
+
dataloader_kwargs:
|
|
6
|
+
batch_size: 16
|
|
7
|
+
num_workers: 8
|
|
8
|
+
optimizer:
|
|
9
|
+
_target_: torch.optim.AdamW
|
|
10
|
+
lr: 1e-3
|
|
11
|
+
weight_decay: 0.01
|
|
12
|
+
fused: null
|
|
13
|
+
lr_scheduler: null
|
|
14
|
+
merge_dtype: null
|
|
15
|
+
max_steps: 2000
|
|
16
|
+
save_interval: 500
|
|
17
|
+
initial_logits: 0
|
|
18
|
+
temperature: 0.5
|
|
19
|
+
# "discrete" or "continuous", this is the mask applied for evaluation, not during training
|
|
20
|
+
# the performance of final model are expected to be similar
|
|
21
|
+
eval_mask_type: continuous
|
|
22
|
+
mask_checkpoint: null
|
|
23
|
+
# if `clamp_weights` is true, the weights will be clamped to [0, 1]
|
|
24
|
+
clamp_weights: false
|
|
25
|
+
# arguments of `functional_call`
|
|
26
|
+
tie_weights: true
|
|
27
|
+
strict: false
|
|
28
|
+
# directory to cache zero-shot classification heads
|
|
29
|
+
cache_dir: outputs
|
|
30
|
+
skip_training: false
|
|
31
|
+
# === TSVM parameters ===
|
|
32
|
+
exclude_keys: null
|
|
33
|
+
# alpha (also known as scaling factor) is a float or a list of floats
|
|
34
|
+
# example:
|
|
35
|
+
# alpha: 1
|
|
36
|
+
# alpha: [1, 0.5, 0.25]
|
|
37
|
+
alpha: 1
|
|
38
|
+
return_single_task_models: false
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
_target_: fusion_bench.method.dop.dop_general.DOPMerging
|
|
2
|
+
|
|
3
|
+
# the random seed to use
|
|
4
|
+
seed: null
|
|
5
|
+
# shuffle the order of the models
|
|
6
|
+
shuffle_order: true
|
|
7
|
+
# save the merged model on every step
|
|
8
|
+
save_on_every_step: false
|
|
9
|
+
# evaluate the merged model on every step
|
|
10
|
+
evaluate_on_every_step: true
|
|
11
|
+
|
|
12
|
+
# optimizer (learning rate)
|
|
13
|
+
lr: 1e-4
|
|
14
|
+
# optimizer (num_steps)
|
|
15
|
+
num_steps: 200
|
|
16
|
+
|
|
17
|
+
# weighted loss
|
|
18
|
+
# if mgda is true, use mgda to optimize the loss weights
|
|
19
|
+
mgda: true
|
|
20
|
+
# if mgda is false, this is the weight for the loss of the first task
|
|
21
|
+
alpha: 0.8
|
|
22
|
+
# if mgda is true and ema is ture, using exponential moving average (ema), alpha is the initial value
|
|
23
|
+
ema: true
|
|
24
|
+
# if mgda is true and ema is ture, using exponential moving average (ema), beta is the decay rate
|
|
25
|
+
ema_beta: 0.999
|
|
26
|
+
|
|
27
|
+
# epsilon for svd (the proportion of energy retained)
|
|
28
|
+
svd_epsilon: 0.99999
|
|
29
|
+
# the space to project the delta w (left singular vectors, right singular vectors, or both)
|
|
30
|
+
svd_proj_space: uv # u or v or uv
|
|
31
|
+
|
|
32
|
+
# the number of ray actors to use for parallel merging (0 means no ray)
|
|
33
|
+
num_ray_actors: 0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
_target_: fusion_bench.method.emr_merging.EMRMerging
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: OPCM
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Incrementally merges models via SVD projection and evaluation per step.
|
|
5
|
+
# =============================================================================
|
|
6
|
+
_target_: fusion_bench.method.opcm.opcm_general.OPCM
|
|
7
|
+
# shuffle the order of the models
|
|
8
|
+
shuffle_order: true
|
|
9
|
+
# the scaling factor for the SVD projection
|
|
10
|
+
alpha: 0.5
|
|
11
|
+
# the random seed to use
|
|
12
|
+
seed: null
|
|
13
|
+
# save the merged model on every step
|
|
14
|
+
save_on_every_step: true
|
|
15
|
+
# evaluate the merged model on every step
|
|
16
|
+
evaluate_on_every_step: true
|
|
17
|
+
# the number of ray actors to use for distributed merging
|
|
18
|
+
num_ray_actors: 0
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
_target_: fusion_bench.modelpool.ConvNextForImageClassificationPool
|
|
2
|
+
_recursive_: False
|
|
3
|
+
models:
|
|
4
|
+
_pretrained_: facebook/convnext-base-224
|
|
5
|
+
sun397: tanganke/convnext-base-224_sun397_sgd_batch-size-64_lr-0.01_steps-4000
|
|
6
|
+
stanford-cars: tanganke/convnext-base-224_stanford-cars_sgd_batch-size-64_lr-0.01_steps-4000
|
|
7
|
+
resisc45: tanganke/convnext-base-224_resisc45_sgd_batch-size-64_lr-0.01_steps-4000
|
|
8
|
+
eurosat: tanganke/convnext-base-224_eurosat_sgd_batch-size-64_lr-0.01_steps-4000
|
|
9
|
+
svhn: tanganke/convnext-base-224_svhn_sgd_batch-size-64_lr-0.01_steps-4000
|
|
10
|
+
gtsrb: tanganke/convnext-base-224_gtsrb_sgd_batch-size-64_lr-0.01_steps-4000
|
|
11
|
+
mnist: tanganke/convnext-base-224_mnist_sgd_batch-size-64_lr-0.01_steps-4000
|
|
12
|
+
dtd: tanganke/convnext-base-224_dtd_sgd_batch-size-64_lr-0.01_steps-4000
|
|
13
|
+
train_datasets: null
|
|
14
|
+
val_datasets: null
|
|
15
|
+
test_datasets: null
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- /dataset/image_classification/test@test_datasets:
|
|
3
|
+
- sun397
|
|
4
|
+
- stanford-cars
|
|
5
|
+
- resisc45
|
|
6
|
+
- eurosat
|
|
7
|
+
- svhn
|
|
8
|
+
- gtsrb
|
|
9
|
+
- mnist
|
|
10
|
+
- dtd
|
|
11
|
+
- _self_
|
|
12
|
+
_target_: fusion_bench.taskpool.image_classification.ImageClassificationTaskPool
|
|
13
|
+
_recursive_: False
|
|
14
|
+
processor: facebook/convnext-base-224
|
|
15
|
+
dataloader_kwargs:
|
|
16
|
+
batch_size: 32
|
|
17
|
+
num_workers: 0
|
|
File without changes
|
|
File without changes
|