fusion-bench 0.2.30__py3-none-any.whl → 0.2.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. fusion_bench/__init__.py +6 -0
  2. fusion_bench/__main__.py +2 -2
  3. fusion_bench/constants/runtime.py +4 -1
  4. fusion_bench/dataset/__init__.py +2 -0
  5. fusion_bench/dataset/clip_dataset.py +4 -72
  6. fusion_bench/dataset/image_dataset.py +44 -18
  7. fusion_bench/method/base_algorithm.py +4 -0
  8. fusion_bench/method/classification/image_classification_finetune.py +1 -0
  9. fusion_bench/method/concrete_subspace/clip_concrete_tsvm.py +285 -0
  10. fusion_bench/method/dop/dop.py +0 -22
  11. fusion_bench/method/dop/dop_general.py +489 -0
  12. fusion_bench/method/dop/utils.py +24 -4
  13. fusion_bench/method/emr_merging/__init__.py +1 -0
  14. fusion_bench/method/emr_merging/emr_merging.py +53 -0
  15. fusion_bench/method/emr_merging/utils.py +162 -0
  16. fusion_bench/method/opcm/opcm.py +6 -2
  17. fusion_bench/method/opcm/opcm_general.py +356 -0
  18. fusion_bench/method/opcm/utils.py +1 -4
  19. fusion_bench/method/simple_average.py +52 -18
  20. fusion_bench/method/task_arithmetic/task_arithmetic.py +1 -1
  21. fusion_bench/method/task_singular_vector/TSVM.py +7 -6
  22. fusion_bench/method/task_singular_vector/utils/TSVM_utils.py +0 -1
  23. fusion_bench/mixins/lightning_fabric.py +110 -11
  24. fusion_bench/mixins/openclip_classification.py +155 -1
  25. fusion_bench/mixins/serialization.py +1 -1
  26. fusion_bench/modelpool/base_pool.py +37 -0
  27. fusion_bench/modelpool/convnext_for_image_classification.py +5 -2
  28. fusion_bench/modelpool/openclip_vision/modelpool.py +12 -3
  29. fusion_bench/models/hf_clip.py +20 -0
  30. fusion_bench/models/modulator/__init__.py +1 -0
  31. fusion_bench/models/modulator/base.py +123 -0
  32. fusion_bench/models/open_clip/modeling.py +61 -5
  33. fusion_bench/models/open_clip/utils.py +13 -2
  34. fusion_bench/models/parameter_dict.py +119 -29
  35. fusion_bench/models/utils.py +190 -2
  36. fusion_bench/models/wrappers/switch.py +90 -0
  37. fusion_bench/programs/base_program.py +6 -0
  38. fusion_bench/programs/fabric_fusion_program.py +4 -0
  39. fusion_bench/py.typed +1 -0
  40. fusion_bench/scripts/cli.py +25 -23
  41. fusion_bench/scripts/imgui.py +2 -2
  42. fusion_bench/scripts/webui.py +2 -2
  43. fusion_bench/taskpool/image_classification.py +270 -0
  44. fusion_bench/utils/__init__.py +20 -1
  45. fusion_bench/utils/data.py +1 -1
  46. fusion_bench/utils/dict.py +19 -0
  47. fusion_bench/utils/dtype.py +19 -0
  48. fusion_bench/utils/hydra_utils.py +75 -0
  49. fusion_bench/utils/misc.py +1 -0
  50. fusion_bench/utils/packages.py +4 -0
  51. fusion_bench/utils/parameters.py +33 -0
  52. fusion_bench/utils/rich_utils.py +42 -19
  53. fusion_bench/utils/state_dict_arithmetic.py +183 -1
  54. fusion_bench/utils/tensorboard.py +21 -3
  55. {fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/METADATA +3 -1
  56. {fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/RECORD +70 -53
  57. {fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/WHEEL +1 -1
  58. {fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/entry_points.txt +1 -1
  59. fusion_bench_config/README.md +9 -0
  60. fusion_bench_config/fabric/auto.yaml +1 -0
  61. fusion_bench_config/fabric/loggers/mlflow_logger.yaml +4 -0
  62. fusion_bench_config/hydra/default.yaml +3 -1
  63. fusion_bench_config/method/concrete_subspace/clip_concrete_tsvm.yaml +38 -0
  64. fusion_bench_config/method/dop/dop_general.yaml +33 -0
  65. fusion_bench_config/method/emr_merging/emr_merging.yaml +1 -0
  66. fusion_bench_config/method/opcm/opcm_general.yaml +18 -0
  67. fusion_bench_config/modelpool/ConvNextForImageClassification/convnext-base-224_8-tasks.yaml +15 -0
  68. fusion_bench_config/taskpool/ImageClassificationTaskPool/convnext-base-224_8-tasks.yaml +17 -0
  69. {fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/licenses/LICENSE +0 -0
  70. {fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/top_level.txt +0 -0
@@ -13,3 +13,4 @@ strategy: auto
13
13
  # ``"cpu"``, ``"cuda"``, ``"mps"``, ``"gpu"``, ``"tpu"``, ``"auto"``.
14
14
  # for example: fabric.accelerator=cpu
15
15
  accelerator: auto
16
+ precision: null
@@ -1,2 +1,6 @@
1
1
  # https://mlflow.org/
2
2
  _target_: lightning.pytorch.loggers.MLFlowLogger
3
+ tracking_uri: "sqlite:///${path.output_dir}/mlflow.db"
4
+ experiment_name: ${hydra:job.config_name}
5
+ run_name: ${now:%Y-%m-%d_%H-%M-%S}
6
+ save_dir: ${path.log_dir}
@@ -4,7 +4,9 @@ defaults:
4
4
  run:
5
5
  dir: ${path.log_dir}
6
6
  sweep:
7
- dir: ${path.log_dir}
7
+ # the directory where all multirun outputs are stored
8
+ # can not refer to ${path.log_dir} because this is evaluated before constructing the separate run configs
9
+ dir: ${oc.env:FUSION_BENCH_PROJECT_ROOT,"."}/outputs/multirun/${hydra.job.config_name}/${now:%Y-%m-%d_%H-%M-%S}
8
10
  subdir: ${hydra.job.num}
9
11
  job:
10
12
  env_set:
@@ -0,0 +1,38 @@
1
+ _target_: fusion_bench.method.concrete_subspace.clip_concrete_tsvm.ConcreteTSVMForOpenCLIP
2
+ # === Concrete Subspace parameters ===
3
+ # batch size per gpu
4
+ # if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
5
+ dataloader_kwargs:
6
+ batch_size: 16
7
+ num_workers: 8
8
+ optimizer:
9
+ _target_: torch.optim.AdamW
10
+ lr: 1e-3
11
+ weight_decay: 0.01
12
+ fused: null
13
+ lr_scheduler: null
14
+ merge_dtype: null
15
+ max_steps: 2000
16
+ save_interval: 500
17
+ initial_logits: 0
18
+ temperature: 0.5
19
+ # "discrete" or "continuous", this is the mask applied for evaluation, not during training
20
+ # the performance of final model are expected to be similar
21
+ eval_mask_type: continuous
22
+ mask_checkpoint: null
23
+ # if `clamp_weights` is true, the weights will be clamped to [0, 1]
24
+ clamp_weights: false
25
+ # arguments of `functional_call`
26
+ tie_weights: true
27
+ strict: false
28
+ # directory to cache zero-shot classification heads
29
+ cache_dir: outputs
30
+ skip_training: false
31
+ # === TSVM parameters ===
32
+ exclude_keys: null
33
+ # alpha (also known as scaling factor) is a float or a list of floats
34
+ # example:
35
+ # alpha: 1
36
+ # alpha: [1, 0.5, 0.25]
37
+ alpha: 1
38
+ return_single_task_models: false
@@ -0,0 +1,33 @@
1
+ _target_: fusion_bench.method.dop.dop_general.DOPMerging
2
+
3
+ # the random seed to use
4
+ seed: null
5
+ # shuffle the order of the models
6
+ shuffle_order: true
7
+ # save the merged model on every step
8
+ save_on_every_step: false
9
+ # evaluate the merged model on every step
10
+ evaluate_on_every_step: true
11
+
12
+ # optimizer (learning rate)
13
+ lr: 1e-4
14
+ # optimizer (num_steps)
15
+ num_steps: 200
16
+
17
+ # weighted loss
18
+ # if mgda is true, use mgda to optimize the loss weights
19
+ mgda: true
20
+ # if mgda is false, this is the weight for the loss of the first task
21
+ alpha: 0.8
22
+ # if mgda is true and ema is ture, using exponential moving average (ema), alpha is the initial value
23
+ ema: true
24
+ # if mgda is true and ema is ture, using exponential moving average (ema), beta is the decay rate
25
+ ema_beta: 0.999
26
+
27
+ # epsilon for svd (the proportion of energy retained)
28
+ svd_epsilon: 0.99999
29
+ # the space to project the delta w (left singular vectors, right singular vectors, or both)
30
+ svd_proj_space: uv # u or v or uv
31
+
32
+ # the number of ray actors to use for parallel merging (0 means no ray)
33
+ num_ray_actors: 0
@@ -0,0 +1 @@
1
+ _target_: fusion_bench.method.emr_merging.EMRMerging
@@ -0,0 +1,18 @@
1
+ # =============================================================================
2
+ # FusionBench Method Configuration: OPCM
3
+ # =============================================================================
4
+ # Incrementally merges models via SVD projection and evaluation per step.
5
+ # =============================================================================
6
+ _target_: fusion_bench.method.opcm.opcm_general.OPCM
7
+ # shuffle the order of the models
8
+ shuffle_order: true
9
+ # the scaling factor for the SVD projection
10
+ alpha: 0.5
11
+ # the random seed to use
12
+ seed: null
13
+ # save the merged model on every step
14
+ save_on_every_step: true
15
+ # evaluate the merged model on every step
16
+ evaluate_on_every_step: true
17
+ # the number of ray actors to use for distributed merging
18
+ num_ray_actors: 0
@@ -0,0 +1,15 @@
1
+ _target_: fusion_bench.modelpool.ConvNextForImageClassificationPool
2
+ _recursive_: False
3
+ models:
4
+ _pretrained_: facebook/convnext-base-224
5
+ sun397: tanganke/convnext-base-224_sun397_sgd_batch-size-64_lr-0.01_steps-4000
6
+ stanford-cars: tanganke/convnext-base-224_stanford-cars_sgd_batch-size-64_lr-0.01_steps-4000
7
+ resisc45: tanganke/convnext-base-224_resisc45_sgd_batch-size-64_lr-0.01_steps-4000
8
+ eurosat: tanganke/convnext-base-224_eurosat_sgd_batch-size-64_lr-0.01_steps-4000
9
+ svhn: tanganke/convnext-base-224_svhn_sgd_batch-size-64_lr-0.01_steps-4000
10
+ gtsrb: tanganke/convnext-base-224_gtsrb_sgd_batch-size-64_lr-0.01_steps-4000
11
+ mnist: tanganke/convnext-base-224_mnist_sgd_batch-size-64_lr-0.01_steps-4000
12
+ dtd: tanganke/convnext-base-224_dtd_sgd_batch-size-64_lr-0.01_steps-4000
13
+ train_datasets: null
14
+ val_datasets: null
15
+ test_datasets: null
@@ -0,0 +1,17 @@
1
+ defaults:
2
+ - /dataset/image_classification/test@test_datasets:
3
+ - sun397
4
+ - stanford-cars
5
+ - resisc45
6
+ - eurosat
7
+ - svhn
8
+ - gtsrb
9
+ - mnist
10
+ - dtd
11
+ - _self_
12
+ _target_: fusion_bench.taskpool.image_classification.ImageClassificationTaskPool
13
+ _recursive_: False
14
+ processor: facebook/convnext-base-224
15
+ dataloader_kwargs:
16
+ batch_size: 32
17
+ num_workers: 0