fusion-bench 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. fusion_bench/compat/method/__init__.py +1 -0
  2. fusion_bench/compat/method/base_algorithm.py +7 -1
  3. fusion_bench/compat/modelpool/__init__.py +1 -1
  4. fusion_bench/compat/taskpool/__init__.py +1 -1
  5. fusion_bench/dataset/arc_agi/arc.py +5 -0
  6. fusion_bench/dataset/arc_agi/preprocess.py +1 -1
  7. fusion_bench/dataset/llama/__init__.py +1 -0
  8. fusion_bench/dataset/llama/alpaca.py +93 -3
  9. fusion_bench/dataset/llama/collate.py +62 -2
  10. fusion_bench/dataset/llama/metamathqa.py +50 -0
  11. fusion_bench/dataset/llama/preference_700k.py +70 -0
  12. fusion_bench/dataset/llama/stanford_shp.py +90 -0
  13. fusion_bench/dataset/llama/ultrachat.py +58 -0
  14. fusion_bench/dataset/llama/utils/__init__.py +0 -0
  15. fusion_bench/method/__init__.py +1 -1
  16. fusion_bench/method/adamerging/layer_wise_adamerging.py +11 -4
  17. fusion_bench/method/adamerging/min_norm_solvers.py +4 -4
  18. fusion_bench/method/linear/expo.py +39 -0
  19. fusion_bench/method/lm_finetune/__init__.py +1 -0
  20. fusion_bench/method/lm_finetune/bradley_terry_rm.py +432 -0
  21. fusion_bench/method/lm_finetune/fullfinetune_sft.py +90 -160
  22. fusion_bench/method/lm_finetune/peftfinetune_sft.py +49 -139
  23. fusion_bench/method/pruning/llama_magnitude_prune.py +2 -2
  24. fusion_bench/method/pruning/llama_random_prune.py +2 -2
  25. fusion_bench/method/surgery/__init__.py +3 -0
  26. fusion_bench/method/surgery/clip_layer_wise_adamerging_surgery.py +157 -0
  27. fusion_bench/mixins/__init__.py +2 -0
  28. fusion_bench/mixins/clip_classification.py +58 -5
  29. fusion_bench/mixins/fabric_training.py +320 -0
  30. fusion_bench/mixins/lightning_fabric.py +9 -0
  31. fusion_bench/modelpool/__init__.py +2 -0
  32. fusion_bench/modelpool/causal_lm/__init__.py +1 -1
  33. fusion_bench/modelpool/causal_lm/causal_lm.py +21 -22
  34. fusion_bench/modelpool/seq_classification_lm/__init__.py +2 -0
  35. fusion_bench/modelpool/seq_classification_lm/reward_model.py +15 -0
  36. fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py +98 -0
  37. fusion_bench/models/chat_templates/__init__.py +1 -0
  38. fusion_bench/models/chat_templates/llama_3_Instruct.py +1 -0
  39. fusion_bench/models/chat_templates/load_tokenizer.py +43 -0
  40. fusion_bench/models/hf_clip.py +50 -9
  41. fusion_bench/models/surgery/surgerymodelwrapper.py +157 -0
  42. fusion_bench/models/utils.py +8 -0
  43. fusion_bench/models/wrappers/layer_wise_fusion.py +14 -5
  44. fusion_bench/models/wrappers/task_wise_fusion.py +5 -5
  45. fusion_bench/optim/__init__.py +2 -0
  46. fusion_bench/optim/exception.py +47 -0
  47. fusion_bench/optim/lr_scheduler/__init__.py +1 -0
  48. fusion_bench/optim/lr_scheduler/linear_warmup.py +222 -0
  49. fusion_bench/optim/lr_scheduler/utils/__init__.py +1 -0
  50. fusion_bench/optim/lr_scheduler/utils/visualization.py +119 -0
  51. fusion_bench/optim/mezo.py +0 -2
  52. fusion_bench/programs/fabric_fusion_program.py +5 -1
  53. fusion_bench/taskpool/clip_vision/taskpool.py +43 -6
  54. fusion_bench/taskpool/llama/reward_model.py +157 -0
  55. fusion_bench/taskpool/nyuv2_taskpool.py +2 -0
  56. fusion_bench/utils/hydra_utils.py +22 -0
  57. fusion_bench/utils/plot/__init__.py +0 -0
  58. fusion_bench/utils/plot/token.py +52 -0
  59. fusion_bench/utils/plot/token_notebook.py +127 -0
  60. fusion_bench/utils/type.py +5 -3
  61. {fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/METADATA +1 -1
  62. {fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/RECORD +87 -47
  63. fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml +1 -1
  64. fusion_bench_config/dataset/llm_sft/alpaca_cleaned.yaml +6 -0
  65. fusion_bench_config/dataset/llm_sft/ultrachat_200k.yaml +3 -0
  66. fusion_bench_config/fabric/llama_peft_fsdp.yaml +16 -0
  67. fusion_bench_config/fabric/loggers/wandb_logger.yaml +2 -0
  68. fusion_bench_config/fabric/strategy/deepspeed.yaml +10 -0
  69. fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml +9 -0
  70. fusion_bench_config/fabric_model_fusion.yaml +1 -1
  71. fusion_bench_config/llama_full_finetune.yaml +19 -0
  72. fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml +47 -0
  73. fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml +11 -4
  74. fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml +4 -2
  75. fusion_bench_config/method/surgery/adamerging_surgery.yaml +27 -0
  76. fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml +21 -0
  77. fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml +21 -0
  78. fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml +19 -0
  79. fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml +18 -0
  80. fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml +23 -0
  81. fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml +14 -0
  82. fusion_bench_config/nyuv2_config.yaml +5 -1
  83. fusion_bench_config/taskpool/reward_model_evaluation.yaml +18 -0
  84. fusion_bench_config/llama_weighted_average.yaml +0 -26
  85. {fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/LICENSE +0 -0
  86. {fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/WHEEL +0 -0
  87. {fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/entry_points.txt +0 -0
  88. {fusion_bench-0.2.6.dist-info → fusion_bench-0.2.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,52 @@
1
+ import matplotlib.pyplot as plt
2
+ import numpy as np
3
+ import seaborn as sns
4
+
5
+
6
+ def visualize_model_inputs(input_ids, attention_mask, labels, tokenizer=None):
7
+ """
8
+ Visualize model inputs: attention mask, labels and input_ids
9
+
10
+ Parameters:
11
+ -----------
12
+ attention_mask: numpy array or tensor
13
+ The attention mask array
14
+ labels: numpy array or tensor
15
+ The labels array
16
+ input_ids: numpy array or tensor
17
+ The input ids array
18
+ tokenizer: optional
19
+ The tokenizer object to decode input_ids
20
+ """
21
+
22
+ # Convert inputs to numpy if they're tensors
23
+ attention_mask = np.array(attention_mask)
24
+ labels = np.array(labels)
25
+ input_ids = np.array(input_ids)
26
+
27
+ # Create figure with 3 subplots
28
+ fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(15, 10))
29
+
30
+ # Plot attention mask
31
+ sns.heatmap(attention_mask.reshape(1, -1), ax=ax1, cmap="Blues", cbar=True)
32
+ ax1.set_title("**Attention Mask**")
33
+ ax1.set_ylabel("Sequence")
34
+
35
+ # Plot labels
36
+ sns.heatmap(labels.reshape(1, -1), ax=ax2, cmap="Reds", cbar=True)
37
+ ax2.set_title("**Labels**")
38
+ ax2.set_ylabel("Sequence")
39
+
40
+ # Plot input_ids
41
+ sns.heatmap(input_ids.reshape(1, -1), ax=ax3, cmap="Greens", cbar=True)
42
+ ax3.set_title("**Input IDs**")
43
+ ax3.set_ylabel("Sequence")
44
+
45
+ # If tokenizer is provided, add decoded tokens as x-axis labels
46
+ if tokenizer:
47
+ decoded_tokens = [tokenizer.decode(token_id) for token_id in input_ids]
48
+ ax3.set_xticks(np.arange(len(decoded_tokens)) + 0.5)
49
+ ax3.set_xticklabels(decoded_tokens, rotation=45, ha="right")
50
+
51
+ plt.tight_layout()
52
+ return fig
@@ -0,0 +1,127 @@
1
+ import numpy as np
2
+ from IPython.display import HTML, display
3
+
4
+
5
+ def create_color_style():
6
+ return """
7
+ <style>
8
+ .token-container { font-family: monospace; white-space: pre; }
9
+ .attention { background-color: #90EE90; } /* Light green */
10
+ .label { background-color: #FFB6C6; } /* Light red */
11
+ .token { color: #0066cc; } /* Blue */
12
+ .stats { font-weight: bold; }
13
+ </style>
14
+ """
15
+
16
+
17
+ def escape_special_chars(text):
18
+ """Convert special characters to their string representation"""
19
+ return (
20
+ text.replace("\n", "\\n")
21
+ .replace("\t", "\\t")
22
+ .replace("\r", "\\r")
23
+ .replace(" ", "␣")
24
+ ) # Optional: show spaces with visible character
25
+
26
+
27
+ def visualize_tokens_html(input_ids, attention_mask, labels, tokenizer):
28
+ """
29
+ Visualize model inputs using HTML colored text representation for Jupyter Notebook
30
+ with special characters shown as strings
31
+ """
32
+ # Convert to numpy if tensors
33
+ attention_mask = np.array(attention_mask).flatten()
34
+ labels = np.array(labels).flatten()
35
+ input_ids = np.array(input_ids).flatten()
36
+
37
+ # Decode tokens and escape special characters
38
+ tokens = [escape_special_chars(tokenizer.decode(id_)) for id_ in input_ids]
39
+
40
+ # Create HTML output
41
+ html_output = [create_color_style()]
42
+
43
+ # Header
44
+ html_output.append("<h3>**Token Visualization**</h3>")
45
+
46
+ # Legend
47
+ html_output.append(
48
+ """
49
+ <div style='margin: 10px 0;'>
50
+ <strong>Legend:</strong><br>
51
+ <span class='attention'>&nbsp;&nbsp;&nbsp;&nbsp;</span> Active Attention<br>
52
+ <span class='label'>&nbsp;&nbsp;&nbsp;&nbsp;</span> Label Present<br>
53
+ <span class='token'>Text</span> Token Text<br>
54
+ Special Characters: \\n (newline), \\t (tab), ␣ (space)
55
+ </div>
56
+ """
57
+ )
58
+
59
+ # Token alignment
60
+ html_output.append("<strong>Token Alignment:</strong>")
61
+ html_output.append("<div class='token-container'>")
62
+
63
+ # Calculate maximum token length for better alignment
64
+ max_token_len = max(len(str(token)) for token in tokens)
65
+
66
+ for i, (input_id, token, mask, label) in enumerate(
67
+ zip(input_ids, tokens, attention_mask, labels)
68
+ ):
69
+ # Pad token for alignment
70
+ token_text = f"{token:{max_token_len}s}"
71
+
72
+ # Create classes for styling
73
+ classes = []
74
+ if mask == 1:
75
+ classes.append("attention")
76
+ if label != -100 and label != 0:
77
+ classes.append("label")
78
+
79
+ class_str = f"class='{' '.join(classes)}'" if classes else ""
80
+
81
+ # Create the line
82
+ line = f"Position {i:3d}: <span {class_str}><span class='token'>{token_text}</span></span> "
83
+ line += (
84
+ f"(Mask: {int(mask)}, Label: {int(label)}, Inpu_id: {int(input_id)})<br>"
85
+ )
86
+ html_output.append(line)
87
+
88
+ html_output.append("</div>")
89
+
90
+ # Statistics
91
+ html_output.append(
92
+ """
93
+ <div class='stats' style='margin-top: 10px;'>
94
+ Statistics:<br>
95
+ Total tokens: {}<br>
96
+ Active attention tokens: {}<br>
97
+ Labeled tokens: {}<br>
98
+ </div>
99
+ """.format(
100
+ len(tokens), attention_mask.sum(), sum(labels != -100)
101
+ )
102
+ )
103
+
104
+ # Display the HTML
105
+ display(HTML("".join(html_output)))
106
+
107
+
108
+ # Example usage:
109
+ """
110
+ from transformers import AutoTokenizer
111
+ import torch
112
+
113
+ # Initialize tokenizer
114
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
115
+
116
+ # Sample input with special characters
117
+ text = "Hello,\nhow are\tyou?"
118
+ inputs = tokenizer(text, return_tensors='pt')
119
+ labels = torch.zeros_like(inputs['input_ids']) # dummy labels
120
+
121
+ visualize_tokens_html(
122
+ inputs['attention_mask'][0],
123
+ labels[0],
124
+ inputs['input_ids'][0],
125
+ tokenizer
126
+ )
127
+ """
@@ -6,18 +6,20 @@ from typing_extensions import TypeAlias
6
6
 
7
7
  try:
8
8
  import torch
9
- from torch import Tensor
9
+ from torch import Tensor, nn
10
10
 
11
11
  StateDictType: TypeAlias = Dict[str, Tensor]
12
+ TorchModelType = TypeVar("TorchModelType", bound=nn.Module)
13
+
12
14
  except ImportError:
13
15
  pass
14
16
 
15
17
 
16
- ModuleType = type(sys)
18
+ PyModuleType = type(sys)
17
19
  T = TypeVar("T")
18
20
  T1 = TypeVar("T1")
19
21
  T2 = TypeVar("T2")
20
22
  T3 = TypeVar("T3")
21
23
  T4 = TypeVar("T4")
22
24
 
23
- __all__ = ["StateDictType", "ModuleType", "T", "T1", "T2", "T3", "T4"]
25
+ __all__ = ["StateDictType", "PyModuleType", "TorchModelType", "T", "T1", "T2", "T3", "T4"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fusion_bench
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: A Comprehensive Benchmark of Deep Model Fusion
5
5
  Author-email: Anke Tang <tang.anke@foxmail.com>
6
6
  License: MIT License
@@ -1,12 +1,12 @@
1
1
  fusion_bench/__init__.py,sha256=68dF-zPvb8E2MgYnmgIJsxIHJBy1MApKeOrRZvQEVlg,421
2
2
  fusion_bench/compat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- fusion_bench/compat/method/__init__.py,sha256=yY-ILbwNVTCbor4Z7SOp0wRDbB8FqlaXo4sgF12EhQM,4823
4
- fusion_bench/compat/method/base_algorithm.py,sha256=Vsc9k04o6FAhu509xGYc1vZWkmegQOjqoqT7IJ8p7CA,1741
3
+ fusion_bench/compat/method/__init__.py,sha256=KUKHpX7AfvB7fmOAlruWp0r1z17xpkI9l29PMvLWR9A,4956
4
+ fusion_bench/compat/method/base_algorithm.py,sha256=0BG_QUtFCbfK8OmiYfEw8xaOj_G0unpqXDEJKXTCNYk,1952
5
5
  fusion_bench/compat/modelpool/AutoModelForSeq2SeqLM.py,sha256=m68BRGy4P-P9lLB10oXOBI-p58a-0FOPcrJ4r4MU32k,1100
6
- fusion_bench/compat/modelpool/__init__.py,sha256=C0CFrqaIKRiAvhT0PT3vM98fZwmpxL34wfb4FbeKcdo,4665
6
+ fusion_bench/compat/modelpool/__init__.py,sha256=KD8Ddr9D7rJ5YdHEQsTuNmQ0bgQfqF4l3WNMtHmRHD8,4687
7
7
  fusion_bench/compat/modelpool/base_pool.py,sha256=1gxQENvdcOSdHmUbw-x7-X-aXtoSa1Gsys_on1ys8FM,10639
8
8
  fusion_bench/compat/modelpool/huggingface_clip_vision.py,sha256=LyIPgepNOK0qrk_EnBdlTC0ZnEkEZvPUy45cO60TiPU,6918
9
- fusion_bench/compat/taskpool/__init__.py,sha256=fTHd7_7EwSM2K06gUCQZ1jxxhl8T_kP0ouv70wBLhpI,3630
9
+ fusion_bench/compat/taskpool/__init__.py,sha256=LHCRs7vrWMTtMfrqFRMmnNiSZnnZ7tZyVwXZxbi1jvQ,3651
10
10
  fusion_bench/compat/taskpool/base_pool.py,sha256=1AIZBxqUJgshq0Xo3Yo9es4b-8X8ksN1mFHxSOqnDsA,3307
11
11
  fusion_bench/compat/taskpool/clip_image_classification.py,sha256=ZYZsbsE-fPzm6yafA0p-6wcDwVGryLmtXXtuEXeQbTY,7425
12
12
  fusion_bench/compat/taskpool/flan_t5_glue_text_generation.py,sha256=O_WWr6Ivpqm-XbkaDsseDPKYcqp2jezxy-8fOrICFzY,5566
@@ -20,21 +20,26 @@ fusion_bench/dataset/image_dataset.py,sha256=MSZE_UESyRRQDwnkm2KpyIARUg9SWcwqnH4
20
20
  fusion_bench/dataset/imdb.py,sha256=YRzeq5z-Fl0aYcC2QtwEBWFkvucvpNo975jwjL5SZvs,260
21
21
  fusion_bench/dataset/nyuv2.py,sha256=2OdIEaY1ywFYMLUxCTpFcIctcBMFTq4nnoOkudSo-jI,3750
22
22
  fusion_bench/dataset/arc_agi/__init__.py,sha256=xj8BMG296qPMiL4NYs-ZwqcLJ6yT2wwbubyCbWPe91w,149
23
- fusion_bench/dataset/arc_agi/arc.py,sha256=AfRivFvuyumYKjlJq3LSbAzFAdHB0lY4NS8KlxhWqjU,9396
23
+ fusion_bench/dataset/arc_agi/arc.py,sha256=EH51Sk9tR7fEMdpeaTUT49_9LtjIO8VMTzSfzk8SQEI,9568
24
24
  fusion_bench/dataset/arc_agi/arc_agi.py,sha256=SFOjp0yZrsoln4cQgWU2b-WfI39od6IE1Wof8Ee0888,11768
25
25
  fusion_bench/dataset/arc_agi/augmenters.py,sha256=yhTqyRk0_zamXRQ5Ev10xYc8Dc9D71BTSOkt856x33I,30890
26
26
  fusion_bench/dataset/arc_agi/messagers.py,sha256=E6BqF1iL68ge1m9wOJMSb2Pz6_5i9CR0HxBb7i73plE,53076
27
27
  fusion_bench/dataset/arc_agi/np_cache.py,sha256=Ec1DQFtlBdMy-f4dvGEhSr4jyVnBLQELwvX1ztxJKBs,5439
28
- fusion_bench/dataset/arc_agi/preprocess.py,sha256=SLmkhq76RJ8zTto5JHNFORYEr2GkbrhP81pKz1A8_BE,8523
28
+ fusion_bench/dataset/arc_agi/preprocess.py,sha256=lQrXqV4SkhrxREgbqFAop-IwC5qaoxkKosoMO-ZHITY,8509
29
29
  fusion_bench/dataset/arc_agi/representers.py,sha256=-2eTYl-UcFW4zULDjkUrOQYv9P31nttMjc9eTJsaN0g,35852
30
- fusion_bench/dataset/llama/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- fusion_bench/dataset/llama/alpaca.py,sha256=sITFsghX2w0KzLwQ71KRz6rfsI2WLjuuKwt8OetvmCQ,4778
32
- fusion_bench/dataset/llama/collate.py,sha256=wcnt9Y2G4Isbdof3HAfe-xTbUThGo7IM0AZsn0FTmBs,1932
30
+ fusion_bench/dataset/llama/__init__.py,sha256=p8M7G69L6bga4qLl5lvAO6SKNeUBn99kkJrAQEeOvHw,22
31
+ fusion_bench/dataset/llama/alpaca.py,sha256=0nCQRBZzIPaMzA5VSJAsWw-nE0aVhiAQD5MGJRSrvEQ,7787
32
+ fusion_bench/dataset/llama/collate.py,sha256=fSH-vKKCGCpPT47gchETXLF2yTCMPUE3NTE-inCdczg,3869
33
+ fusion_bench/dataset/llama/metamathqa.py,sha256=z9InmEfWy_wWMbBORumFA2NakEznZWsDWgZzOsXYWhA,1617
33
34
  fusion_bench/dataset/llama/openai.py,sha256=_QXz6ciUTN8u4ILDowZPT3SQTes7ngkFZe1MRLFtVQ8,5012
35
+ fusion_bench/dataset/llama/preference_700k.py,sha256=CqD0ZnM2F2Z3u70tD3VaQ2yPAHkOv75m-eiXiQKIRp0,2582
34
36
  fusion_bench/dataset/llama/sharegpt.py,sha256=8hdh_5BcxIyK0ByZoVLdhd_I06kpHffxQdaC6ezzHkM,5249
35
37
  fusion_bench/dataset/llama/squad.py,sha256=H0L0BHFzVTtkw7jfgTA8gzvZDhzsqfIALq1ip_BVwaM,4810
38
+ fusion_bench/dataset/llama/stanford_shp.py,sha256=6ueXKnFXIBBobacU1h5WxGLZrSOtBkl6RJ6ec3Tf6UU,3763
39
+ fusion_bench/dataset/llama/ultrachat.py,sha256=Go7WvrDAYnm184fdazHGRYLbSY6Xd7jrESyQeUJtOww,1736
36
40
  fusion_bench/dataset/llama/wikitext.py,sha256=9ZHR-nMfXRumd3o-PIj3n7B83YlVeqpGkZ2zJs2B-9Y,2883
37
- fusion_bench/method/__init__.py,sha256=NSBIKPSjcZbZDVuwr8srDDfntfz3jQilozRCqHPYj_w,5751
41
+ fusion_bench/dataset/llama/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
+ fusion_bench/method/__init__.py,sha256=bwYq0wOxiTPCuR1GvLtqQ9Sx91mikfTTJKeSHFsSRn4,5781
38
43
  fusion_bench/method/base_algorithm.py,sha256=5dutGZfPqNhO8F8FOlo3UFR91TZu2Xj7O0pTB40JvWo,1135
39
44
  fusion_bench/method/dummy.py,sha256=hb1y6LR_geRZ5eRgGwt5zJUcHYorCeIbs5i76CvurUc,1031
40
45
  fusion_bench/method/ensemble.py,sha256=rGxvJTeorfcBuE_e0XO-0-MAc9un7ZCC46ikKGuAcN4,3077
@@ -48,9 +53,9 @@ fusion_bench/method/adamerging/clip_task_wise_adamerging.py,sha256=Tys9pDJzz5YNU
48
53
  fusion_bench/method/adamerging/entropy_loss.py,sha256=ZeVe0Hq1PaMfppLqDbB0MOscZUZRNh4CALrvt8pmQC0,736
49
54
  fusion_bench/method/adamerging/flan_t5_layer_wise_adamerging.py,sha256=osc6ueCgiS4u8KUV_sZkHGFBYC8dThnTSp4NB0wkQIg,12915
50
55
  fusion_bench/method/adamerging/gpt2_layer_wise_adamerging.py,sha256=jTGUbhJCV1pcJ5k5jVeAhmtHdbHK5LlEfBhF-86xWjY,13773
51
- fusion_bench/method/adamerging/layer_wise_adamerging.py,sha256=Zi5JW1V10DQNZ_0G9Ny_YH4ATP0mWuO6AjrZRwO-CxU,9432
56
+ fusion_bench/method/adamerging/layer_wise_adamerging.py,sha256=6d1vWuyiAQDh_kLLrZixPyTAxovOjfq-2T2hgLGXCWg,9734
52
57
  fusion_bench/method/adamerging/llama_adamerging.py,sha256=DHm83VaaxxHFaeFY2qbxgO1Ub3Fiqawy4p5AqCkmEp4,13112
53
- fusion_bench/method/adamerging/min_norm_solvers.py,sha256=uolDwgTt8yLFuMcsQbAKO0jT6pbsG1YbSR77S40bKNs,8287
58
+ fusion_bench/method/adamerging/min_norm_solvers.py,sha256=a7n2X0BE_YajlaUygyHV0yqW6-x5dTyZ5V0mt_Q69qE,8291
54
59
  fusion_bench/method/adamerging/task_wise_adamerging.py,sha256=tUy_P4lCn6u5srFCIyMdHs-Hc1MSge4meenK8UA25tw,6006
55
60
  fusion_bench/method/adamerging/utils.py,sha256=Yq8ovlpLJY-5MkSmpoB-_EMYG8cr6eyO-WUZTxKxMTI,432
56
61
  fusion_bench/method/analysis/__init__.py,sha256=EQzOCShS0hF958drq1yg2oSVsS0hvBznPxtTAWB9SGY,122
@@ -77,21 +82,22 @@ fusion_bench/method/fisher_merging/clip_fisher_merging.py,sha256=QCutGqjkfW3OWET
77
82
  fusion_bench/method/fisher_merging/fisher_merging.py,sha256=CPU-tJiDv9FCIBYl7Pn0zA5cdRB1Md5kWchRDlJgly0,20456
78
83
  fusion_bench/method/fisher_merging/gpt2_fisher_merging.py,sha256=LZmz41jZ5dSsAHxfOUpr3u2rlCgUPTDR7xMsIlQM-jc,7576
79
84
  fusion_bench/method/linear/__init__.py,sha256=ChfkoOEAb-rUKwpowFPel-a1hRfS8gCrbnWD-jlRbe4,283
80
- fusion_bench/method/linear/expo.py,sha256=jTZyI0dtYa4GbchJ78mJuaYrDHD8u3ywxyQvP46hZV8,2681
85
+ fusion_bench/method/linear/expo.py,sha256=LCHTWlsPm1Mjhrq0mfpWLVC7skkI9ZksGduy3TxULoU,3939
81
86
  fusion_bench/method/linear/linear_interpolation.py,sha256=IONw9BPiRJouY8bE9Abfyz7qVI_1B1n8KGZa0f7Pza8,2157
82
87
  fusion_bench/method/linear/llama_expo.py,sha256=ccECjhAqcFmzOIDyZ7e_aPzTM2Kj8u2D8TJytyz18YM,8476
83
88
  fusion_bench/method/linear/simple_average_for_llama.py,sha256=7JlVrmTMmrePvNGnZNoxSuCSq2Vu7cPQzjGC3WWUXBE,2079
84
89
  fusion_bench/method/linear/task_arithmetic_for_llama.py,sha256=4SZpiTD7OzhWUXtcdK3PYdXbBGyDqiZd7oZOQ0lraN0,1963
85
- fusion_bench/method/lm_finetune/__init__.py,sha256=rIkKoxrqKEYkA7XIR6jyhwvUK_ebX2k6Fm1d7K1kU5g,92
90
+ fusion_bench/method/lm_finetune/__init__.py,sha256=IFGAqXujX3Fabzl_tC6zZyOyPFJfVziL0qFtj5MVxj0,149
91
+ fusion_bench/method/lm_finetune/bradley_terry_rm.py,sha256=ys_td1IeL3bzPTE0Cixlj2JooCaB7qseRwSDwroAk5A,18777
86
92
  fusion_bench/method/lm_finetune/causal_lm_pretrain.py,sha256=4CL9KGFsUzrt-edMfTooo4G4apzTH_57rso3DGGvKL0,219
87
- fusion_bench/method/lm_finetune/fullfinetune_sft.py,sha256=WoVOzFhg1PRUm8iPMYJ1g98-km3wux6nrUqnWXm27Pg,18364
88
- fusion_bench/method/lm_finetune/peftfinetune_sft.py,sha256=EwJJITxYBFtjsjunOlpSdo70dWeoHUYI-qIyelgW4n4,19834
93
+ fusion_bench/method/lm_finetune/fullfinetune_sft.py,sha256=eZabmkL7QUHGxl0DX9aaCYerMVUTzH5fXQfJXiSQNEc,16226
94
+ fusion_bench/method/lm_finetune/peftfinetune_sft.py,sha256=klZ_IDr5-1xoYvyVZwug9eyKXyxA3WZuSaML2jCH_Gw,16370
89
95
  fusion_bench/method/mixture_of_experts/__init__.py,sha256=r95iu1-3tgIUP7sWuAbLuqV7xexNYMYPZkM4_8egfp8,198
90
96
  fusion_bench/method/mixture_of_experts/mixtral_merging.py,sha256=-n1CLP1o08VyMSfaTq42kRutbw-cFDSCWHTu0iNh6ok,4237
91
97
  fusion_bench/method/mixture_of_experts/mixtral_upcycling.py,sha256=tQYAeS8MLFEfH3zDFfNZrML7lRnpGLN-HquQvjPtHNw,11208
92
98
  fusion_bench/method/pruning/__init__.py,sha256=3gtmay2bkdIAEGjpAhbY2ztMZOZLKhiJcKV3mCe2H5w,252
93
- fusion_bench/method/pruning/llama_magnitude_prune.py,sha256=ihHa8SNe0WGPuZqRKI_6S6gmH4ooTmeTRARGkJHcsos,6300
94
- fusion_bench/method/pruning/llama_random_prune.py,sha256=c-qV1iFSKZK1dES6gYsgWna1BUn58dtO0NjV1eIfJrg,4566
99
+ fusion_bench/method/pruning/llama_magnitude_prune.py,sha256=40Gmy665S9XqIw027En6E5IlomOIcKECIRje7NDkH00,6300
100
+ fusion_bench/method/pruning/llama_random_prune.py,sha256=EW7zfE-1a5VlPPrQ5xO1k1aqFcpPUfs5eSO_a4M1K90,4566
95
101
  fusion_bench/method/pruning/llama_wanda_prune.py,sha256=8pcg3X1yn8vfhV0lEg1fHP3oTzAc_-ixLmsZRdH5uPo,12070
96
102
  fusion_bench/method/pruning/magnitude_diff_pruning.py,sha256=nXRHW87_Nwiash-udnwR9iOaJMBDo7fPTmAwmSqsAaI,6451
97
103
  fusion_bench/method/pruning/prune_utils.py,sha256=ITWO8WtrhcOYXTcjc_fAAw7cyjvqFa6axawPr3uTT68,5882
@@ -128,6 +134,8 @@ fusion_bench/method/sparse_we_moe/sparse_clip_we_moe.py,sha256=J8iVYks-SQ93dqh6F
128
134
  fusion_bench/method/sparse_we_moe/sparse_we_moe.py,sha256=6OYgj_D_4xTtqy_guA7whQu76LQ7gv-U2cIZkXe7bIg,10479
129
135
  fusion_bench/method/sparselo/__init__.py,sha256=0Uk4Hq5b9iwc5yl2QTDwvBHUItN4V6lwhxDYQrFb724,107
130
136
  fusion_bench/method/sparselo/sparselo.py,sha256=qkfFwovdOA7-NUXtLYiV1iM9bglQJydfuL805azQ6Xc,38806
137
+ fusion_bench/method/surgery/__init__.py,sha256=xVvJvVv9wJnVgJjZDgF2YliuSsy221AQ0KSwB7J7bjo,97
138
+ fusion_bench/method/surgery/clip_layer_wise_adamerging_surgery.py,sha256=7qoWWYcTWpfXik21mEPBlMW5BM0Kc4nxSW1GL2hzP1o,5599
131
139
  fusion_bench/method/task_arithmetic/__init__.py,sha256=pSx_NV5Ra_6UXpyYWCi6ANQoAnEtymZt_X1dDN9wT4Y,96
132
140
  fusion_bench/method/task_arithmetic/task_arithmetic.py,sha256=1D0uuNtqyA1VS35jh6AnEVsX72HnT02THyerck_lmso,5441
133
141
  fusion_bench/method/ties_merging/__init__.py,sha256=9u9teBbdILbupr9jbwk-qCXSzssCssC5FUV2BfpyZM4,67
@@ -153,34 +161,41 @@ fusion_bench/metrics/text_to_image_generation/__init__.py,sha256=OEIxpKmyy6-3iWy
153
161
  fusion_bench/metrics/text_to_image_generation/aesthetic_scorer.py,sha256=-ZaD84ENPITh_K0Fe9OKYYoiGnPhlSE9gTbBqrtnqqA,4487
154
162
  fusion_bench/metrics/text_to_image_generation/compressibility.py,sha256=x4dNTFnAN4naChBDZBO-jUghnHAyobRVOupctKYRg1w,1656
155
163
  fusion_bench/metrics/text_to_image_generation/pickscore_scorer.py,sha256=aSWzl8k7z80Cirg5qdfkPsp3sMFEv_PjA1NJv3PPWXY,3115
156
- fusion_bench/mixins/__init__.py,sha256=hMxt39JDb_uIvNDtp6ZJEDmaQFwx8GId2VK2Wajw9Rg,791
157
- fusion_bench/mixins/clip_classification.py,sha256=devw9zTpyJsCfGCR_iKuuT9iPp1XWUqqRHRdliK6riM,8030
158
- fusion_bench/mixins/lightning_fabric.py,sha256=S81Bf9IDktaz2RM5T69TgiwPewUJfliLy6kd-dq3kdc,6163
164
+ fusion_bench/mixins/__init__.py,sha256=AsUNvrHdNd6xht7-dfuVipmJuRfMNFlkgG-fn3ojt1U,892
165
+ fusion_bench/mixins/clip_classification.py,sha256=lsrh-qZln1Am0ry_rJL47EFCvVZFRHD2YYk9u3eecs8,9933
166
+ fusion_bench/mixins/fabric_training.py,sha256=ZmycEhCaNCgVi5oM9m0q6msxgk3quowmFvDAcvskFrg,13017
167
+ fusion_bench/mixins/lightning_fabric.py,sha256=XE3OwV68YmJX7aR64uk9h1j9Qs6UPU5F7ciBIgBYyv4,6461
159
168
  fusion_bench/mixins/rich_live.py,sha256=j7wNgrgwfdpl6nCXZGF_2DLtNq2aqCb_52Qhe9QSltc,495
160
169
  fusion_bench/mixins/serialization.py,sha256=9W50JUcM6wgFlaE9H29mATLLVobYniSDxg94FfY25w0,4049
161
170
  fusion_bench/mixins/simple_profiler.py,sha256=UDPB8QAA3rtsSdnVgL9KMthDLBY1Rh4h8mtiquiCPp4,2106
162
171
  fusion_bench/mixins/optim/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
163
172
  fusion_bench/mixins/optim/adamw_with_warmup.py,sha256=qTnRl8GVVIfaplOFBHnJFuZUbxPZRWRGHGNzm_EDhDE,1421
164
173
  fusion_bench/modelpool/PeftModelForSeq2SeqLM.py,sha256=rxPKTTWno3KAcTTEfydPpXx1b0EJa8PLbqrberweFF8,2108
165
- fusion_bench/modelpool/__init__.py,sha256=Ydu0bLWBA15QfHv3C5Tme0ltwHYTQFm2R-FpBVzgb-o,1266
174
+ fusion_bench/modelpool/__init__.py,sha256=LtcCWTcYkVaz7ZxEseWRwKDk3cSTg95-WQiriXBHLSA,1401
166
175
  fusion_bench/modelpool/base_pool.py,sha256=WCpDt0MpwIk_djpgpM_CANomAEJ7Uoj78GSHEYzD_oU,9142
167
176
  fusion_bench/modelpool/huggingface_automodel.py,sha256=OJ6EyYyjNv1_Bhjn-zli-e__BJ0xVa4Fx9lhXVb-DJo,552
168
177
  fusion_bench/modelpool/huggingface_gpt2_classification.py,sha256=j8nicVwtoLXY4RPE2dcepeEB3agBKkkH-xA3yMj1czw,2014
169
178
  fusion_bench/modelpool/nyuv2_modelpool.py,sha256=btuXmYxwfjI6MnGakhoOf53Iyb9fxYH20CavGTrTcnA,1375
170
- fusion_bench/modelpool/causal_lm/__init__.py,sha256=kxfDP9q5yTY-SijUu7YafgV56x7c-7LBvApbKJzP3eQ,78
171
- fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=AhuwsJLNiXUHl0Besyq2pzYo6G1_9r-iSuIIZm_70FM,4987
179
+ fusion_bench/modelpool/causal_lm/__init__.py,sha256=F432-aDIgAbUITj4GNZS9dgUKKhaDMCbTeHB-9MecaQ,99
180
+ fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=k0eOOcFbswVgBYhM9CEXvdCRU9zVC8Gw78QaiMWzeWo,4487
172
181
  fusion_bench/modelpool/clip_vision/__init__.py,sha256=3b9gN2bWUsoA1EmpitnIMnIlX7nklxbkn4WJ0QJtS2c,43
173
182
  fusion_bench/modelpool/clip_vision/modelpool.py,sha256=qG-b3ms-q3gqcRf7J6wrTDdmtu2yb1E_A25tNOjSli8,2065
174
183
  fusion_bench/modelpool/seq2seq_lm/__init__.py,sha256=FnfSMHcwNHDQEMdB2HdK4WphQ6MufsRLUkczuALjM4Q,57
175
184
  fusion_bench/modelpool/seq2seq_lm/modelpool.py,sha256=IjLHi8qycWOA4Ul9jnqR48evgVXF_pfTKLPeL9XKP-s,2052
185
+ fusion_bench/modelpool/seq_classification_lm/__init__.py,sha256=k-t4RetcDlbkRkPHNuyeV3pQEcJnFRjd9Wp5tFBb-G8,128
186
+ fusion_bench/modelpool/seq_classification_lm/reward_model.py,sha256=NKf-eoei1GdU4ojKSpN5_kQwax4uUEStnlKyh8qOrNg,540
187
+ fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py,sha256=sIKAmGJwfrNSuWtxzJ_-ME0gQksEYY2y-jVt7P82Qs0,3434
176
188
  fusion_bench/models/__init__.py,sha256=TNOEH_2yAQP51m9mdWepNEf9VGUZgDthtgXbs4rhb4M,100
177
- fusion_bench/models/hf_clip.py,sha256=yOQ6UKMymQ3GcfpPm26QiToPztij-cXukNMMKXTmUrw,5745
189
+ fusion_bench/models/hf_clip.py,sha256=056UHeSjKKDYXg-o7CC2zsx4fC9R6IBkPGI8IFhWTNw,7291
178
190
  fusion_bench/models/parameter_dict.py,sha256=hRie26WIeXU-wvY6JeGaP8LvpMqbuZA6Ia_1vOFMuu4,2294
179
191
  fusion_bench/models/rankone_moe.py,sha256=uwpAqk1cwxxprQ0hxuAwRuPvHDxxBKBDahd9vcaafXs,14248
180
192
  fusion_bench/models/separate_io.py,sha256=5AJlCxkHdVVffITnIRlF3ZIaKLRWDhJESVQN1lX-ZhU,3835
181
193
  fusion_bench/models/sparse_we_moe.py,sha256=b-yIeCsl2rz0i7BP9g_fqCEam7KUNjNX_J8oyZV6MJ8,16509
182
- fusion_bench/models/utils.py,sha256=7HKXRiWHeoNWp8LyDemG2irnMPkT9qg2ExvxjE5mUck,1858
194
+ fusion_bench/models/utils.py,sha256=AQFI2UZSItKfJpG8ex74FPjn_SjsADLhvpv1GYqu43U,2065
183
195
  fusion_bench/models/we_moe.py,sha256=0U-m3mhzb4vFLIzn2jd7j_SQOF9lot4ddzq0l_VPp9g,8424
196
+ fusion_bench/models/chat_templates/__init__.py,sha256=v9vKrCfBgZ3UsMBQatZv1Z-ayPualBl5ciV0aO3p3iY,85
197
+ fusion_bench/models/chat_templates/llama_3_Instruct.py,sha256=E6grNPECr0r1KDPIGW_DmpKQw5-Dh5WbMiTaHWDXwXo,4008
198
+ fusion_bench/models/chat_templates/load_tokenizer.py,sha256=yRs3dB2tZo0Oh-YLJcMZzWSQ5Ps8KXrggZNb5F-aBuM,1400
184
199
  fusion_bench/models/linearized/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
185
200
  fusion_bench/models/linearized/linearized_model_utils.py,sha256=5yKXReQHIwDttzT_oXwY_iIpaG1zIU0Nv93BWmmOqrg,3212
186
201
  fusion_bench/models/linearized/vision_model.py,sha256=HhbhtyoLD1qVvh1Sgl_beYF2W7AvMevmUy4Jx2XlcsY,4636
@@ -212,15 +227,21 @@ fusion_bench/models/nyuv2/resnet.py,sha256=PcCfBhEsxm7W8cu3epBbIbCYFARPrPTamIa3T
212
227
  fusion_bench/models/nyuv2/resnet_dilated.py,sha256=4EXB6vrBJS307YP6k-TRY1dFJ50LURcTuzqN4tZzYRk,3125
213
228
  fusion_bench/models/smile_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
214
229
  fusion_bench/models/smile_moe/linear.py,sha256=voFvx4Nnfgc6YReBcY9FUGG3WrxVRjyD3odX4jIS5Eg,8724
230
+ fusion_bench/models/surgery/surgerymodelwrapper.py,sha256=zkiV_IF4-7CfzUND--fGCzgz-Sa-dCUz5CmVoPjQt1c,5132
215
231
  fusion_bench/models/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
216
232
  fusion_bench/models/wrappers/ensemble.py,sha256=wIMZMRyXw5boWAm96c4Tiyebs_HDQovKxpGQ8rLnHUQ,6308
217
- fusion_bench/models/wrappers/layer_wise_fusion.py,sha256=hxyizABheJds7U_I_0h5yZh2ZvhQlkfycLJ9K5DZQ3c,12210
218
- fusion_bench/models/wrappers/task_wise_fusion.py,sha256=RBs1NL0eFDCMTIQg9kZXrWZ32n1RrKkcAN8OuKdN6Qw,8344
219
- fusion_bench/optim/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
220
- fusion_bench/optim/mezo.py,sha256=WNcJw-Az6wnctc6pqVAloDifqgef31ZI2jwlpMKmlfo,3693
233
+ fusion_bench/models/wrappers/layer_wise_fusion.py,sha256=tISTe__HvlaHVVKkfa0nX3JRRDYLHetJ4BzIqGq-058,12316
234
+ fusion_bench/models/wrappers/task_wise_fusion.py,sha256=gNOU1t1JUcBr3V0Apa1uvJDl5BDM2is85lkEF1SfPRo,8404
235
+ fusion_bench/optim/__init__.py,sha256=lemrcuiA6OLjQkpYm-RP-Ox2MgjngN1ywvCo0NgShlM,61
236
+ fusion_bench/optim/exception.py,sha256=fMgo1heiqfGhuI5RIbf30BwWSShn5RQiyeb30QtfTI0,1607
237
+ fusion_bench/optim/mezo.py,sha256=Vm4vMGh10Fhe28_9L1MK8r_U7DrurA8Liprh2_gn4_U,3646
238
+ fusion_bench/optim/lr_scheduler/__init__.py,sha256=W7CsdW4XKqXbNfzjvv2wmrvNWwfH_sQ-wiBViRPlP3U,29
239
+ fusion_bench/optim/lr_scheduler/linear_warmup.py,sha256=Dvy_TCUuAQHlbDF2jo2_502Ae4JWXGrtZL3gwA_H6ZI,6566
240
+ fusion_bench/optim/lr_scheduler/utils/__init__.py,sha256=GfZk9VYL3cFE1Qy2xQpGc1GCgnjySk5-D7EVRZ-C05Q,29
241
+ fusion_bench/optim/lr_scheduler/utils/visualization.py,sha256=Ea1n9ElNizAe0iUnjynyfteuZunv2-UBMN_NfEU2imA,3490
221
242
  fusion_bench/programs/__init__.py,sha256=oGoRp2TMI6ELxyfkeTg2h27hZJEDz9x31AsmvwvNvJw,508
222
243
  fusion_bench/programs/base_program.py,sha256=0dX_KcMWASo53pr-ldzfUBWIjEXy6oeDWZBrfc7FIk8,195
223
- fusion_bench/programs/fabric_fusion_program.py,sha256=tXUdVfLCVqjUqn8rGnc7BuXCDw0dLOkqqYtq4O6EnX4,12124
244
+ fusion_bench/programs/fabric_fusion_program.py,sha256=Bf4lnntM1J1hxKVm4Av0ohAmSqzDxOzWg75rzqps0qE,12297
224
245
  fusion_bench/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
225
246
  fusion_bench/scripts/cli.py,sha256=497nhqnJAwxkqU2WCMUqcAIvqTmGRdQaByWGNTX_onY,1131
226
247
  fusion_bench/scripts/imgui.py,sha256=r9Glbfbwu3JCsX9TKQFwcHarvwA_G7ff0jWBUPW1S1U,7613
@@ -232,12 +253,13 @@ fusion_bench/taskpool/__init__.py,sha256=_qaYgzYnvrJDrZ2DjKXMvOFbelsLrujCKa_gP3U
232
253
  fusion_bench/taskpool/base_pool.py,sha256=FaP0nndeSsrwbdd9mKa_CedbX9T5AHJmxk7Lc0NEVNY,835
233
254
  fusion_bench/taskpool/dummy.py,sha256=Di9JZO3XyDYn6wAGukrJMTnkS_NaxGTeQYo_3j1JD3Y,1675
234
255
  fusion_bench/taskpool/gpt2_text_classification.py,sha256=S4YyrcJhD4JOgvHF-AVG-gENgVGl-wpQZr1SbiThM04,4886
235
- fusion_bench/taskpool/nyuv2_taskpool.py,sha256=lnaR1oVm0pO2CA9EVV4uk3fiWYHD-F0GzPrUUARD75I,1970
256
+ fusion_bench/taskpool/nyuv2_taskpool.py,sha256=Y-TI-rzh9udCjX3FJ11ZbIG7CGrjDccGc-Ch1Ug6cRY,2059
236
257
  fusion_bench/taskpool/clip_vision/__init__.py,sha256=4xGO7rRbRpXF-I34A3WEMU4vydgfdtvXQ57ThaFcpmE,214
237
258
  fusion_bench/taskpool/clip_vision/clip_rankone_moe_taskpool.py,sha256=JKbRrGaRYztgZ-P0U767HISe40UpDVQ7fn6Tf2rrug0,4891
238
259
  fusion_bench/taskpool/clip_vision/clip_sparse_wemoe_taskpool.py,sha256=hVDTtg-oXqRFmAE2wZPFpk_kvtdk_wS-2-ev2ujEJBs,5390
239
- fusion_bench/taskpool/clip_vision/taskpool.py,sha256=NRFXsp2N8PMQzZgFHy2yfJMjoYbDaxQpPTZ4-4EHPBY,13942
260
+ fusion_bench/taskpool/clip_vision/taskpool.py,sha256=xbJHQXUYd2ZDs-oIyE-3knCsPdiUbZCKN7O86kPwpsQ,14907
240
261
  fusion_bench/taskpool/llama/__init__.py,sha256=iB4ESMgnsl0m-z0YtRdPZiwGGv96-86R8pbSnkdet8Q,57
262
+ fusion_bench/taskpool/llama/reward_model.py,sha256=y5a_dNDzjYSoV9RNRdmDPhALM7i20A5EiFYSjrGTpWo,5028
241
263
  fusion_bench/taskpool/llama/test_generation.py,sha256=kJ_5GruG12FsuJHDh_S7pbQgwEojTqhGpA_wVNH5KPc,6675
242
264
  fusion_bench/tasks/__init__.py,sha256=Z_ePIp4Xizkj78QopLg1dZkJAN_IF73MkbR_nkfHQ9Y,52
243
265
  fusion_bench/tasks/base_task.py,sha256=Fg_pdZhld-2KPKX0C1WrxaTz0EYWrvJerAHO-hA03GI,412
@@ -273,7 +295,7 @@ fusion_bench/utils/data.py,sha256=51nbgOnayyerLBUGHrlm9iilGjhJsBkXKKGXOKgLRW8,61
273
295
  fusion_bench/utils/devices.py,sha256=72HeUVVlVGTt97JA7KFG3D8BM8VHqR-y1nkr9Bm-PRE,7578
274
296
  fusion_bench/utils/dtype.py,sha256=kYoEGqsXitnwOU3W7ivqhQ0OjdI7MGu1VsyMJS4cSyQ,4299
275
297
  fusion_bench/utils/functools.py,sha256=7_tYJ2WD88_2DDuOOj5aZz3cYuslYH5tsVyIgCeLtmk,1318
276
- fusion_bench/utils/hydra_utils.py,sha256=b-5UeDnUbMc2_NAbXVabyHeCwxfhmvAIx_aYUlzVVc8,159
298
+ fusion_bench/utils/hydra_utils.py,sha256=TklUDKDEZlg4keI-TEZiqh4gFjr9-61Rt1RMlqkoSGk,1174
277
299
  fusion_bench/utils/instantiate.py,sha256=v8L9JDfh2YoEOFpIQIHomvBoqdboZdYIaHEATnD2gdQ,16972
278
300
  fusion_bench/utils/json.py,sha256=iNeZHFvpzbb4oX-52dX15De_dMcux7vQtAUFZqW12GA,1907
279
301
  fusion_bench/utils/lazy_imports.py,sha256=v5l9cpHXPMaz1IVBmB5oOqefYr9vA3XvP340xT7Wy18,2796
@@ -285,17 +307,20 @@ fusion_bench/utils/pylogger.py,sha256=a5tHfpEFbsdzw0vhQxt4BJ6CfTXaxyuwzoDFhyNy4K
285
307
  fusion_bench/utils/rich_utils.py,sha256=V_BjY3o8bXMp-kWfxle4cK48GGHDnKbVonZX65qbXAA,5464
286
308
  fusion_bench/utils/state_dict_arithmetic.py,sha256=dVPBkO8Te9_VANPbetV59ORAQTw7D3css_-d0lYgK4k,9062
287
309
  fusion_bench/utils/timer.py,sha256=RC2hP8JqaibdL0FnRyUCBRf4m7CXyfn5tE16zBWZ7hg,1338
288
- fusion_bench/utils/type.py,sha256=vS3uPx1AJz9ct-hD2Rp142yug8xcQ8YRAciAwENRKME,441
310
+ fusion_bench/utils/type.py,sha256=4CmKDVL1bKZfAfEApq79YhQavG6ZGermpQThDwiVl5w,532
311
+ fusion_bench/utils/plot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
312
+ fusion_bench/utils/plot/token.py,sha256=QGmL_qX8drmWnN_VNLD_0YjKc1o_qahJE-svXVor8dU,1634
313
+ fusion_bench/utils/plot/token_notebook.py,sha256=bsntXf46Zz_RavTxNiB9c3-KvHw7LFwyLeG0MIwOwF4,3739
289
314
  fusion_bench/utils/strenum/__init__.py,sha256=id9ORi1uXrDxhbmVxitJ1KDwLS4H3AAwFpaK5h1cQzw,8531
290
315
  fusion_bench/utils/strenum/_name_mangler.py,sha256=o11M5-bURW2RBvRTYXFQIPNeqLzburdoWLIqk8X3ydw,3397
291
316
  fusion_bench/utils/strenum/_version.py,sha256=6JQRo9LcvODbCOeVFYQb9HNJ_J9XiG_Zbn8ws2A3BV8,18466
292
317
  fusion_bench_config/README.md,sha256=Lc8YSBJ5oxf9KV5kKDivJ9LRyGuraGQPmBbgbdVA-j4,703
293
- fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=ZAmS3hcFiWOkKxrlnZXg7moXuRX6lPi5uejqhEwIFV8,1164
294
- fusion_bench_config/fabric_model_fusion.yaml,sha256=68adtP49Kn7Qo9YjGSIe0ACxNFUuZ4FH9inq4U1-lbo,949
318
+ fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=GtK3VuD2FOpFHH_1Hi6tlaYpdLE5Cz0nYKP92Ss9G2Y,1164
319
+ fusion_bench_config/fabric_model_fusion.yaml,sha256=1shmbuC0B9snkFkLErBCiroF-z7UnEHscyEmKBne7Oo,949
320
+ fusion_bench_config/llama_full_finetune.yaml,sha256=J2qCNyX1GaPHg52juSRRIkfHSb6YJdiJOM3Bn3vAo0A,771
295
321
  fusion_bench_config/llama_magnitude_pruning.yaml,sha256=xFyDJpb8gyIjosteOpEW9eayONWhl0B763r1XmO-9w8,633
296
322
  fusion_bench_config/llama_model_fusion.yaml,sha256=EJRsSbt1zttAXAAy_-5NcMkWwhYrl0osjKGXQopu4bo,588
297
- fusion_bench_config/llama_weighted_average.yaml,sha256=SUP6pTcqMF_5lGgvRd_iWgmmD_s4iMGDZBRPfW38HGo,960
298
- fusion_bench_config/nyuv2_config.yaml,sha256=1rW-5ZsJOUCCQAvShdISmmYwyvY5vl3tukPTgtcpocY,410
323
+ fusion_bench_config/nyuv2_config.yaml,sha256=SYaafywjOIKK1f-Nl_K5EuBjXH2oN1whlqjN_dXXO-A,492
299
324
  fusion_bench_config/nyuv2_mtl_train.yaml,sha256=RfsrboIpL9Cct2RkRrKxXAqH4jLi1NECHbwH8iOGtDY,591
300
325
  fusion_bench_config/dataset/image_classification/test/cifar10.yaml,sha256=wlzzTXAKzBjQXKYWhZPGcwydxwgeAV8sM4Dp3GBHbw0,78
301
326
  fusion_bench_config/dataset/image_classification/test/cifar100.yaml,sha256=f_tsvq5bbw2Trp3f6mokXV7hUlfYr_yuebZkEqJqIVI,79
@@ -330,6 +355,8 @@ fusion_bench_config/dataset/image_classification/val/stanford-cars.yaml,sha256=p
330
355
  fusion_bench_config/dataset/image_classification/val/sun397.yaml,sha256=cayl6FNzxOj2UBjw0ikJoQNCdN3DX10xQmcx4ouFP-0,245
331
356
  fusion_bench_config/dataset/image_classification/val/svhn.yaml,sha256=uMdEYmc406i9HdkOLzfzBiJ8pfbYLIWT1pA_UId8HMg,265
332
357
  fusion_bench_config/dataset/image_classification/val/the_eight_tasks.yaml,sha256=x1-xurkOIQtWX-gpSwXDxA5fVY30KmrarS7EKaje33M,101
358
+ fusion_bench_config/dataset/llm_sft/alpaca_cleaned.yaml,sha256=QJK8OM-C2cZNaC3_vbRucuWrTggw69YWEtPOzmxm6bo,174
359
+ fusion_bench_config/dataset/llm_sft/ultrachat_200k.yaml,sha256=uqOGtDu4MNAOnwCA4Qp6elcnc8X_Y7sTrwYdruAvjGU,106
333
360
  fusion_bench_config/dataset/question_answering/search_qa.yaml,sha256=u_8UTKQFnjCwbeXqx2grC2bzLDpdEQy3s3Oxip_JEoc,118
334
361
  fusion_bench_config/dataset/question_answering/test/search_qa.yaml,sha256=oB2qP5ScTTbFGp75a5VLBaQtUa9VYHkGNhNAfe-AkvE,132
335
362
  fusion_bench_config/dataset/question_answering/train/MetaMathQA.yaml,sha256=3-stubxiEKjuBQHhqS_Tc_BqGK3IOwzaAtnO4sd5SX8,90
@@ -348,9 +375,13 @@ fusion_bench_config/dataset/text_generation/train/gsm8k_question_label.yaml,sha2
348
375
  fusion_bench_config/fabric/auto.yaml,sha256=GOCIA6s_co-JpwyBj3dM-dgWUMKciD8lFRmFThUeAsA,575
349
376
  fusion_bench_config/fabric/llama_ddp.yaml,sha256=iP-3n-hehRSjmJkdQWhDb4AkLcfFa6PFq0BCrL58xso,706
350
377
  fusion_bench_config/fabric/llama_fsdp.yaml,sha256=--_G5mcyG6L3aEBNvTjvMH8D-jD0SMXGap6V8E3jH84,575
378
+ fusion_bench_config/fabric/llama_peft_fsdp.yaml,sha256=V-iBtvSg_m2o42ERYRxlDITqeEUBoRTMrLzfVOtN8VU,580
351
379
  fusion_bench_config/fabric/loggers/csv_logger.yaml,sha256=Pv8I-xbxrpTb_fwtDiUtCAEoCZ8QYCLu2GeJNzb3Z3c,373
352
380
  fusion_bench_config/fabric/loggers/tensorboard_logger.yaml,sha256=w9ZP1i8lRYQFslzEM98PmbcFhhn5dXReSJhLOdEi-do,381
381
+ fusion_bench_config/fabric/loggers/wandb_logger.yaml,sha256=eF4slc6QPRuMCMJVeFHNJirsGiB15WQIxNgioXNwezc,142
382
+ fusion_bench_config/fabric/strategy/deepspeed.yaml,sha256=uHujbd2sKrgWb5YhFTWOJCZefStv6O_HHo_GylzqYbU,344
353
383
  fusion_bench_config/fabric/strategy/llama_fsdp.yaml,sha256=WBx05GFUCuEtF-H7LhlTq95VZeaIg36hqntw478qJng,307
384
+ fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml,sha256=xoxeQ0Pp7ecZPcAX57PhQJsqRUKhqEmNc3DXmYXqx4Y,348
354
385
  fusion_bench_config/hydra/default.yaml,sha256=TT0RaUwYgfB7pKpbHgEbmuCVTB2fx2eXxvM-Xz3SQMI,241
355
386
  fusion_bench_config/hydra/help/fusion_bench_help.yaml,sha256=v8s891Cr5wyxBXGDn_VBBwwRmb0JXOL874Sl-zNoCWA,1880
356
387
  fusion_bench_config/hydra/job_logging/rich_logging.yaml,sha256=_dYGeFTCqaPrRowLXBNMXwzYhw8ns1TkQFfALwK1aCw,441
@@ -392,8 +423,9 @@ fusion_bench_config/method/linear/simple_average_for_llama.yaml,sha256=QJR5qx9z4
392
423
  fusion_bench_config/method/linear/task_arithmetic_for_llama.yaml,sha256=N7cyHm6a2QwNsV9uaJp-eZmdbs9kmdRrkxtO58QQQgM,116
393
424
  fusion_bench_config/method/linear/weighted_average.yaml,sha256=SmELszTsJU63e8KwIrPmSqKmOmH-rz42zeumQZHoVDY,187
394
425
  fusion_bench_config/method/linear/weighted_average_for_llama.yaml,sha256=r8BlNqzRfn--_gDSff6KI8FO-elWFIszZDRV7G_nvHw,499
395
- fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml,sha256=iJgRZiT-fic7jJOMSmq-4vslQXBIoE7IdrxPC4GQ9Cs,1157
396
- fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml,sha256=_LIlnNoGLJfJpchB9AYvZMRby8oG_PU3p7mdA24Eq0k,1556
426
+ fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml,sha256=em0Lnodl9bg8dos9MODMXjKtxWCXwQArjLT2z4TC3Q0,1352
427
+ fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml,sha256=edj3juaYos2I9oQ8J6NKQNcNwqwcQGD74ZMosDsB5SY,1341
428
+ fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml,sha256=9S-qsWUIALRwWd_gzNF1bwIuPPGP1MmqTpdQ53cwZmc,1628
397
429
  fusion_bench_config/method/pruning/llama_magnitude_pruning.yaml,sha256=Px8LU_UtDz-YHDFfqQ7scEPOproiFOaudKVshrhCTgc,483
398
430
  fusion_bench_config/method/pruning/llama_random_pruning.yaml,sha256=0RiZS8d42PXZzwncPG8zcbnyYJ9vtfr2sOSqS8oDyT4,325
399
431
  fusion_bench_config/method/pruning/llama_wanda_pruning.yaml,sha256=qKe5yIRsmK2KUyYENENWlw1qlGet9TpDhR-E_uO7vAw,501
@@ -409,6 +441,7 @@ fusion_bench_config/method/smile_upscaling/smile_upscaling.yaml,sha256=G88mabTTn
409
441
  fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml,sha256=A_QFhwAzbzXxDkOPmXRbPTj2TBib66d3_3mkrf-Xu0k,641
410
442
  fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml,sha256=w1OWb38nW08K_hvrRMsCwmRxHWLGQfSSXg5nTiYaP8E,635
411
443
  fusion_bench_config/method/sparselo_pruning/llama_sparselo.yaml,sha256=J6vYIwqzh95-B3ekDias3FnCrVr4sig4zxpWyvz8hZ0,613
444
+ fusion_bench_config/method/surgery/adamerging_surgery.yaml,sha256=Ne9JlJFgsRYcygBNCOBSN1ygBcLkE6I-8yusfTxyg-Y,826
412
445
  fusion_bench_config/method/trust_region/clip_task_arithmetic.yaml,sha256=mK09Ohsvj0Q6suj5qJM4DyCzRy192QBt4wjHS6W29IY,197
413
446
  fusion_bench_config/method/wemoe/sparse_weight_ensembling_moe.yaml,sha256=jiAco7M1XO0aekHFZKLKlXL_jRoCA8bgGD44Z7iB208,1001
414
447
  fusion_bench_config/method/wemoe/weight_ensembling_moe.yaml,sha256=OEv5yhyUCe5lXeT2PyXC49yrHXEM7i8SZDw6IQRDtAE,620
@@ -498,7 +531,11 @@ fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_svhn_and
498
531
  fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8.yaml,sha256=-Tt_YggxkuIGT4_q5FR16zPvW2wWhGJ5LL8omxvHjvw,380
499
532
  fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8_model_only.yaml,sha256=PrDQr04UnhAciDrdtUutx-prMxF1Cs4jrEar7uJ-1Es,238
500
533
  fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml,sha256=aSu0QUpcEZXKbL9PAUKCQAVvs5CksG1s7PPCvjTsIzA,234
534
+ fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml,sha256=BXsYH04vAUPT4Cpr8lS1px-CYYKaCTMRWuHGWgC6qE0,647
535
+ fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml,sha256=m7NDjkzFbGG8E8e_r2UUxtXjNbMFvtAcZOXyBaJOyX4,645
501
536
  fusion_bench_config/modelpool/CausalLMPool/llama_for_causallm.yaml,sha256=hbjSkVle5zpcqGDSMGaJ20CLoO0ljIXG-gtdONuaFBY,803
537
+ fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml,sha256=OOnKzmsz6iiO2jI5ZyGmCem_Pcs3a25Dveb5PLfwpUM,593
538
+ fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml,sha256=K403avKEQlK4uRhZYHbKGluCG37sMUjLRytBR3LspmI,577
502
539
  fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml,sha256=RXdm5BQmYfq9XWVli0NsQ1Xh7jD61XnhRBOSlmd9FcI,825
503
540
  fusion_bench_config/modelpool/CausalLMPool/single_llama_model.yaml,sha256=aX0rWwB-p4N94bPX1QGwqKNIWnTrkNMuF7sMAQHzjQE,549
504
541
  fusion_bench_config/modelpool/Seq2SeqLMPool/_template.yaml,sha256=mRx-Xx4s6_IBoJJRogIBW4egmqW0wi1kGVWp_YwYVvQ,233
@@ -507,6 +544,8 @@ fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16.yaml,sha256
507
544
  fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16_tta.yaml,sha256=GfTY343bt5YtxtUkQxSacrtQav9lT9Y-t1VIL1Chs4k,1726
508
545
  fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_individual.yaml,sha256=2YBIzqYGluOT2r6dOFpUYE4Cbdd2XoHAUps-kCDxVPQ,185
509
546
  fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-large_glue_lora16.yaml,sha256=W1y3fKY9UTTRyv7nqbIO5DESlQVfNsWlhkHJMUYh7B4,1824
547
+ fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml,sha256=JUzGOLANW92Y_rljOOZKmwBQvWrJsko_ziayurzHSTY,880
548
+ fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml,sha256=Cg9W315FzKP3DC_-bkIyYZp1nU6UoSQ6xe-MsMx-P8A,626
510
549
  fusion_bench_config/taskpool/clip-vit-base-patch32_robustness_clean.yaml,sha256=vcU1ygptQ7nlufCEdKDWGMyi-OH4zJs55_vxG-iNHBc,541
511
550
  fusion_bench_config/taskpool/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=Ged9KWmmGl29hq0gXzyG1DlryuLebDQAJIb_t5PvqiE,758
512
551
  fusion_bench_config/taskpool/clip-vit-base-patch32_svhn_and_mnist.yaml,sha256=gk_RB12EeYrEUNlZJHtZ3XKIm_LDraqE1hC_lpOEvtY,518
@@ -514,6 +553,7 @@ fusion_bench_config/taskpool/dummy.yaml,sha256=Id4Y_j7oc39qWjjEFG3qLmmMI1fGXXt34
514
553
  fusion_bench_config/taskpool/flan-t5_glue_text_generation.yaml,sha256=3MxfXiiwWJHEVgJ7aViTR7kzOV_YxXLL-fNHtnBaWN4,1002
515
554
  fusion_bench_config/taskpool/gpt-2_glue.yaml,sha256=16bw4-g08pL51M4OWAP08kWZPj6JcEefz4Xc4XhCTLQ,950
516
555
  fusion_bench_config/taskpool/nyuv2_taskpool.yaml,sha256=UaxDpFqEPkEz3h2CjFleUxsmnFnaY1aLXerkud8Zm9s,133
556
+ fusion_bench_config/taskpool/reward_model_evaluation.yaml,sha256=qaUqKcb6E_XfoS5J-FcteMJzFzthNb4aLpB9aKW4jbU,442
517
557
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/_template.yaml,sha256=X70J8HMoIcpKaYGjg7KaaXRvz1tPUbuCqKvK9-kGHrM,1310
518
558
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8.yaml,sha256=eoNUaX-cBjpJJt0BYb-ZCNiIlv1SarX9toiGAwHbES0,227
519
559
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_B16.yaml,sha256=AmfMrb2_wXDfRtUDsSCNgbuSicNsfC2vRlwXW-uNeJA,784
@@ -522,9 +562,9 @@ fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8
522
562
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_with_control_task.yaml,sha256=9hbvC3k5x6NpA9tRDYeORhrjEyd2VH5ztMdLU67Adjk,249
523
563
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml,sha256=iQMj2VpDTe_D8OfCo94w5Ud2MON-EGa0DzVr6UmphrA,436
524
564
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_sparse_wemoe_clip-vit-classification_TA8.yaml,sha256=i5Bn8bLl2cgqvrgtIGmoovUfSMehk_m-6C2wwcx5JMU,435
525
- fusion_bench-0.2.6.dist-info/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
526
- fusion_bench-0.2.6.dist-info/METADATA,sha256=eExQgyXjCnwYCSMfJ3h9yH4vWaviRwNogM0OMJktUDU,13528
527
- fusion_bench-0.2.6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
528
- fusion_bench-0.2.6.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
529
- fusion_bench-0.2.6.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
530
- fusion_bench-0.2.6.dist-info/RECORD,,
565
+ fusion_bench-0.2.7.dist-info/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
566
+ fusion_bench-0.2.7.dist-info/METADATA,sha256=XMvOFwwYoq1_J4Fta1kJ2J0grFb4k-I3CPA_ApmjPRM,13528
567
+ fusion_bench-0.2.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
568
+ fusion_bench-0.2.7.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
569
+ fusion_bench-0.2.7.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
570
+ fusion_bench-0.2.7.dist-info/RECORD,,
@@ -11,7 +11,7 @@ _target_: fusion_bench.programs.FabricModelFusionProgram
11
11
  _recursive_: false
12
12
  fast_dev_run: false # Run a single batch of data to test the model or method
13
13
  # Run the script without actually running the experiment, use with `print_config=true`.
14
- # You can also use `--cfg` or `-c` to show the configuration instead of runing.
14
+ # You can also use `--cfg` or `-c` to show the configuration instead of running.
15
15
  dry_run: false
16
16
  print_config: true # Print the configuration to the console
17
17
  merged_model_save_path: null # path to save the merged model, use "{log_dir}" to refer to the logger directory, for example `merged_model_save_path=\{log_dir\}/merged_model`
@@ -0,0 +1,6 @@
1
+ alpaca-cleaned:
2
+ _target_: fusion_bench.dataset.llama.alpaca.load_tokenized_alpaca_dataset
3
+ tokenizer: ???
4
+ path: "yahma/alpaca-cleaned"
5
+ split: train
6
+ cache_path: null
@@ -0,0 +1,3 @@
1
+ ultrachat-200k:
2
+ _target_: fusion_bench.dataset.ultrachat.load_tokenized_ultrachat_200k
3
+ tokenizer: ???
@@ -0,0 +1,16 @@
1
+ defaults:
2
+ - loggers: tensorboard_logger
3
+ - strategy: llama_peft_fsdp
4
+ - _self_
5
+
6
+ _target_: lightning.Fabric
7
+ _recursive_: true
8
+ # Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.
9
+ # The value applies per node.
10
+ devices: auto
11
+ # The hardware to run on. Possible choices are:
12
+ # ``"cpu"``, ``"cuda"``, ``"mps"``, ``"gpu"``, ``"tpu"``, ``"auto"``.
13
+ # for example: fabric.accelerator=cpu
14
+ accelerator: auto
15
+ # reference to the precision policy: https://lightning.ai/docs/fabric/stable/api/fabric_args.html#precision
16
+ precision: bf16-true
@@ -0,0 +1,2 @@
1
+ # https://lightning.ai/docs/fabric/2.4.0/guide/loggers/wandb.html#weights-and-biases
2
+ _target_: wandb.integration.lightning.fabric.WandbLogger
@@ -0,0 +1,10 @@
1
+ # https://lightning.ai/docs/fabric/2.4.0/api/generated/lightning.fabric.strategies.DeepSpeedStrategy.html#deepspeedstrategy
2
+ _target_: lightning.fabric.strategies.DeepSpeedStrategy
3
+
4
+ accelerator: null
5
+ zero_optimization: true
6
+ stage: 2
7
+ offload_optimizer: false
8
+ offload_parameters: false
9
+ offload_params_device: "cpu"
10
+ offload_optimizer_device: "cpu"
@@ -0,0 +1,9 @@
1
+ _target_: lightning.fabric.strategies.FSDPStrategy
2
+ sharding_strategy: FULL_SHARD
3
+ state_dict_type: full # Save a single, consolidated checkpoint file
4
+ cpu_offload: false
5
+ auto_wrap_policy:
6
+ _target_: fusion_bench.mixins.lightning_fabric.get_size_based_auto_wrap_policy
7
+ activation_checkpointing_policy: ${.auto_wrap_policy}
8
+ # limit_all_gathers: true
9
+
@@ -11,7 +11,7 @@ _target_: fusion_bench.programs.FabricModelFusionProgram
11
11
  _recursive_: false
12
12
  fast_dev_run: false # Run a single batch of data to test the model or method
13
13
  # Run the script without actually running the experiment, use with `print_config=true`.
14
- # You can also use `--cfg` or `-c` to show the configuration instead of runing.
14
+ # You can also use `--cfg` or `-c` to show the configuration instead of running.
15
15
  dry_run: false
16
16
  print_config: true # Print the configuration to the console
17
17
  merged_model_save_path: null # path to save the merged model, use "{log_dir}" to refer to the logger directory, for example `merged_model_save_path=\{log_dir\}/merged_model`