hcpdiff 0.9.0__py3-none-any.whl → 2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. hcpdiff/__init__.py +4 -4
  2. hcpdiff/ckpt_manager/__init__.py +4 -5
  3. hcpdiff/ckpt_manager/ckpt.py +24 -0
  4. hcpdiff/ckpt_manager/format/__init__.py +4 -0
  5. hcpdiff/ckpt_manager/format/diffusers.py +59 -0
  6. hcpdiff/ckpt_manager/format/emb.py +21 -0
  7. hcpdiff/ckpt_manager/format/lora_webui.py +244 -0
  8. hcpdiff/ckpt_manager/format/sd_single.py +41 -0
  9. hcpdiff/ckpt_manager/loader.py +64 -0
  10. hcpdiff/data/__init__.py +4 -28
  11. hcpdiff/data/cache/__init__.py +1 -0
  12. hcpdiff/data/cache/vae.py +102 -0
  13. hcpdiff/data/dataset.py +20 -0
  14. hcpdiff/data/handler/__init__.py +3 -0
  15. hcpdiff/data/handler/controlnet.py +18 -0
  16. hcpdiff/data/handler/diffusion.py +80 -0
  17. hcpdiff/data/handler/text.py +111 -0
  18. hcpdiff/data/source/__init__.py +1 -2
  19. hcpdiff/data/source/folder_class.py +12 -29
  20. hcpdiff/data/source/text2img.py +36 -74
  21. hcpdiff/data/source/text2img_cond.py +9 -15
  22. hcpdiff/diffusion/__init__.py +0 -0
  23. hcpdiff/diffusion/noise/__init__.py +2 -0
  24. hcpdiff/diffusion/noise/pyramid_noise.py +42 -0
  25. hcpdiff/diffusion/noise/zero_terminal.py +39 -0
  26. hcpdiff/diffusion/sampler/__init__.py +5 -0
  27. hcpdiff/diffusion/sampler/base.py +72 -0
  28. hcpdiff/diffusion/sampler/ddpm.py +20 -0
  29. hcpdiff/diffusion/sampler/diffusers.py +66 -0
  30. hcpdiff/diffusion/sampler/edm.py +22 -0
  31. hcpdiff/diffusion/sampler/sigma_scheduler/__init__.py +3 -0
  32. hcpdiff/diffusion/sampler/sigma_scheduler/base.py +14 -0
  33. hcpdiff/diffusion/sampler/sigma_scheduler/ddpm.py +197 -0
  34. hcpdiff/diffusion/sampler/sigma_scheduler/edm.py +48 -0
  35. hcpdiff/easy/__init__.py +2 -0
  36. hcpdiff/easy/cfg/__init__.py +3 -0
  37. hcpdiff/easy/cfg/sd15_train.py +201 -0
  38. hcpdiff/easy/cfg/sdxl_train.py +140 -0
  39. hcpdiff/easy/cfg/t2i.py +177 -0
  40. hcpdiff/easy/model/__init__.py +2 -0
  41. hcpdiff/easy/model/cnet.py +31 -0
  42. hcpdiff/easy/model/loader.py +79 -0
  43. hcpdiff/easy/sampler.py +46 -0
  44. hcpdiff/evaluate/__init__.py +1 -0
  45. hcpdiff/evaluate/previewer.py +60 -0
  46. hcpdiff/loss/__init__.py +4 -1
  47. hcpdiff/loss/base.py +41 -0
  48. hcpdiff/loss/gw.py +35 -0
  49. hcpdiff/loss/ssim.py +37 -0
  50. hcpdiff/loss/vlb.py +79 -0
  51. hcpdiff/loss/weighting.py +66 -0
  52. hcpdiff/models/__init__.py +2 -2
  53. hcpdiff/models/cfg_context.py +17 -14
  54. hcpdiff/models/compose/compose_hook.py +44 -23
  55. hcpdiff/models/compose/compose_tokenizer.py +21 -8
  56. hcpdiff/models/compose/sdxl_composer.py +4 -4
  57. hcpdiff/models/container.py +1 -1
  58. hcpdiff/models/controlnet.py +16 -16
  59. hcpdiff/models/lora_base_patch.py +14 -25
  60. hcpdiff/models/lora_layers.py +3 -9
  61. hcpdiff/models/lora_layers_patch.py +14 -24
  62. hcpdiff/models/text_emb_ex.py +84 -6
  63. hcpdiff/models/textencoder_ex.py +54 -18
  64. hcpdiff/models/wrapper/__init__.py +3 -0
  65. hcpdiff/models/wrapper/pixart.py +19 -0
  66. hcpdiff/models/wrapper/sd.py +218 -0
  67. hcpdiff/models/wrapper/utils.py +20 -0
  68. hcpdiff/parser/__init__.py +1 -0
  69. hcpdiff/parser/embpt.py +32 -0
  70. hcpdiff/tools/convert_caption_txt2json.py +1 -1
  71. hcpdiff/tools/dataset_generator.py +94 -0
  72. hcpdiff/tools/download_hf_model.py +24 -0
  73. hcpdiff/tools/embedding_convert.py +6 -2
  74. hcpdiff/tools/init_proj.py +3 -21
  75. hcpdiff/tools/lora_convert.py +19 -15
  76. hcpdiff/tools/save_model.py +12 -0
  77. hcpdiff/tools/sd2diffusers.py +1 -1
  78. hcpdiff/train_colo.py +1 -1
  79. hcpdiff/train_deepspeed.py +1 -1
  80. hcpdiff/trainer_ac.py +79 -0
  81. hcpdiff/trainer_ac_single.py +31 -0
  82. hcpdiff/utils/__init__.py +0 -2
  83. hcpdiff/utils/inpaint_pipe.py +790 -0
  84. hcpdiff/utils/net_utils.py +29 -6
  85. hcpdiff/utils/pipe_hook.py +46 -33
  86. hcpdiff/utils/utils.py +21 -4
  87. hcpdiff/workflow/__init__.py +15 -10
  88. hcpdiff/workflow/daam/__init__.py +1 -0
  89. hcpdiff/workflow/daam/act.py +66 -0
  90. hcpdiff/workflow/daam/hook.py +109 -0
  91. hcpdiff/workflow/diffusion.py +128 -136
  92. hcpdiff/workflow/fast.py +31 -0
  93. hcpdiff/workflow/flow.py +67 -0
  94. hcpdiff/workflow/io.py +36 -68
  95. hcpdiff/workflow/model.py +46 -43
  96. hcpdiff/workflow/text.py +84 -52
  97. hcpdiff/workflow/utils.py +32 -12
  98. hcpdiff/workflow/vae.py +37 -38
  99. hcpdiff-2.1.dist-info/METADATA +285 -0
  100. hcpdiff-2.1.dist-info/RECORD +114 -0
  101. {hcpdiff-0.9.0.dist-info → hcpdiff-2.1.dist-info}/WHEEL +1 -1
  102. hcpdiff-2.1.dist-info/entry_points.txt +5 -0
  103. hcpdiff/ckpt_manager/base.py +0 -16
  104. hcpdiff/ckpt_manager/ckpt_diffusers.py +0 -45
  105. hcpdiff/ckpt_manager/ckpt_pkl.py +0 -138
  106. hcpdiff/ckpt_manager/ckpt_safetensor.py +0 -60
  107. hcpdiff/ckpt_manager/ckpt_webui.py +0 -54
  108. hcpdiff/data/bucket.py +0 -358
  109. hcpdiff/data/caption_loader.py +0 -80
  110. hcpdiff/data/cond_dataset.py +0 -40
  111. hcpdiff/data/crop_info_dataset.py +0 -40
  112. hcpdiff/data/data_processor.py +0 -33
  113. hcpdiff/data/pair_dataset.py +0 -146
  114. hcpdiff/data/sampler.py +0 -54
  115. hcpdiff/data/source/base.py +0 -30
  116. hcpdiff/data/utils.py +0 -80
  117. hcpdiff/infer_workflow.py +0 -57
  118. hcpdiff/loggers/__init__.py +0 -13
  119. hcpdiff/loggers/base_logger.py +0 -76
  120. hcpdiff/loggers/cli_logger.py +0 -40
  121. hcpdiff/loggers/preview/__init__.py +0 -1
  122. hcpdiff/loggers/preview/image_previewer.py +0 -149
  123. hcpdiff/loggers/tensorboard_logger.py +0 -30
  124. hcpdiff/loggers/wandb_logger.py +0 -31
  125. hcpdiff/loggers/webui_logger.py +0 -9
  126. hcpdiff/loss/min_snr_loss.py +0 -52
  127. hcpdiff/models/layers.py +0 -81
  128. hcpdiff/models/plugin.py +0 -348
  129. hcpdiff/models/wrapper.py +0 -75
  130. hcpdiff/noise/__init__.py +0 -3
  131. hcpdiff/noise/noise_base.py +0 -16
  132. hcpdiff/noise/pyramid_noise.py +0 -50
  133. hcpdiff/noise/zero_terminal.py +0 -44
  134. hcpdiff/train_ac.py +0 -565
  135. hcpdiff/train_ac_single.py +0 -39
  136. hcpdiff/utils/caption_tools.py +0 -105
  137. hcpdiff/utils/cfg_net_tools.py +0 -321
  138. hcpdiff/utils/cfg_resolvers.py +0 -16
  139. hcpdiff/utils/ema.py +0 -52
  140. hcpdiff/utils/img_size_tool.py +0 -248
  141. hcpdiff/vis/__init__.py +0 -3
  142. hcpdiff/vis/base_interface.py +0 -12
  143. hcpdiff/vis/disk_interface.py +0 -48
  144. hcpdiff/vis/webui_interface.py +0 -17
  145. hcpdiff/visualizer.py +0 -258
  146. hcpdiff/visualizer_reloadable.py +0 -237
  147. hcpdiff/workflow/base.py +0 -59
  148. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/anime/text2img_anime.yaml +0 -21
  149. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/anime/text2img_anime_lora.yaml +0 -58
  150. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/change_vae.yaml +0 -6
  151. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/euler_a.yaml +0 -8
  152. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/img2img.yaml +0 -10
  153. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/img2img_controlnet.yaml +0 -19
  154. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/inpaint.yaml +0 -11
  155. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/load_lora.yaml +0 -26
  156. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/load_unet_part.yaml +0 -18
  157. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/offload_2GB.yaml +0 -6
  158. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/save_model.yaml +0 -44
  159. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/text2img.yaml +0 -53
  160. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/text2img_DA++.yaml +0 -34
  161. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/text2img_sdxl.yaml +0 -9
  162. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/plugins/plugin_controlnet.yaml +0 -17
  163. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/te_struct.txt +0 -193
  164. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/dataset/base_dataset.yaml +0 -29
  165. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/dataset/regularization_dataset.yaml +0 -31
  166. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/CustomDiffusion.yaml +0 -74
  167. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/DreamArtist++.yaml +0 -135
  168. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/DreamArtist.yaml +0 -45
  169. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/DreamBooth.yaml +0 -62
  170. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/FT_sdxl.yaml +0 -33
  171. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/Lion_optimizer.yaml +0 -17
  172. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/TextualInversion.yaml +0 -41
  173. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/add_logger_tensorboard_wandb.yaml +0 -15
  174. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/controlnet.yaml +0 -53
  175. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/ema.yaml +0 -10
  176. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/fine-tuning.yaml +0 -53
  177. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/locon.yaml +0 -24
  178. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/lora_anime_character.yaml +0 -77
  179. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/lora_conventional.yaml +0 -56
  180. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/lora_sdxl.yaml +0 -41
  181. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/min_snr.yaml +0 -7
  182. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/preview_in_training.yaml +0 -6
  183. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples_noob/DreamBooth.yaml +0 -70
  184. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples_noob/TextualInversion.yaml +0 -45
  185. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples_noob/fine-tuning.yaml +0 -45
  186. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples_noob/lora.yaml +0 -63
  187. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/train_base.yaml +0 -81
  188. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/tuning_base.yaml +0 -42
  189. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/unet_struct.txt +0 -932
  190. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/workflow/highres_fix_latent.yaml +0 -86
  191. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/workflow/highres_fix_pixel.yaml +0 -99
  192. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/workflow/text2img.yaml +0 -57
  193. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/workflow/text2img_lora.yaml +0 -70
  194. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/zero2.json +0 -32
  195. hcpdiff-0.9.0.data/data/hcpdiff/cfgs/zero3.json +0 -39
  196. hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/caption.txt +0 -1
  197. hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/name.txt +0 -1
  198. hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/name_2pt_caption.txt +0 -1
  199. hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/name_caption.txt +0 -1
  200. hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/object.txt +0 -27
  201. hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/object_caption.txt +0 -27
  202. hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/style.txt +0 -19
  203. hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/style_caption.txt +0 -19
  204. hcpdiff-0.9.0.dist-info/METADATA +0 -199
  205. hcpdiff-0.9.0.dist-info/RECORD +0 -155
  206. hcpdiff-0.9.0.dist-info/entry_points.txt +0 -2
  207. {hcpdiff-0.9.0.dist-info → hcpdiff-2.1.dist-info/licenses}/LICENSE +0 -0
  208. {hcpdiff-0.9.0.dist-info → hcpdiff-2.1.dist-info}/top_level.txt +0 -0
@@ -1,193 +0,0 @@
1
- CLIPTextModel(
2
- (text_model): CLIPTextTransformer(
3
- (embeddings): CLIPTextEmbeddings(
4
- (token_embedding): Embedding(49408, 768)
5
- (position_embedding): Embedding(77, 768)
6
- )
7
- (encoder): CLIPEncoder(
8
- (layers): ModuleList(
9
- (0): CLIPEncoderLayer(
10
- (self_attn): CLIPAttention(
11
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
12
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
13
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
14
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
15
- )
16
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
17
- (mlp): CLIPMLP(
18
- (activation_fn): QuickGELUActivation()
19
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
20
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
21
- )
22
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
23
- )
24
- (1): CLIPEncoderLayer(
25
- (self_attn): CLIPAttention(
26
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
27
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
28
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
29
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
30
- )
31
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
32
- (mlp): CLIPMLP(
33
- (activation_fn): QuickGELUActivation()
34
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
35
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
36
- )
37
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
38
- )
39
- (2): CLIPEncoderLayer(
40
- (self_attn): CLIPAttention(
41
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
42
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
43
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
44
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
45
- )
46
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
47
- (mlp): CLIPMLP(
48
- (activation_fn): QuickGELUActivation()
49
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
50
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
51
- )
52
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
53
- )
54
- (3): CLIPEncoderLayer(
55
- (self_attn): CLIPAttention(
56
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
57
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
58
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
59
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
60
- )
61
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
62
- (mlp): CLIPMLP(
63
- (activation_fn): QuickGELUActivation()
64
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
65
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
66
- )
67
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
68
- )
69
- (4): CLIPEncoderLayer(
70
- (self_attn): CLIPAttention(
71
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
72
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
73
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
74
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
75
- )
76
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
77
- (mlp): CLIPMLP(
78
- (activation_fn): QuickGELUActivation()
79
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
80
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
81
- )
82
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
83
- )
84
- (5): CLIPEncoderLayer(
85
- (self_attn): CLIPAttention(
86
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
87
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
88
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
89
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
90
- )
91
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
92
- (mlp): CLIPMLP(
93
- (activation_fn): QuickGELUActivation()
94
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
95
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
96
- )
97
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
98
- )
99
- (6): CLIPEncoderLayer(
100
- (self_attn): CLIPAttention(
101
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
102
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
103
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
104
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
105
- )
106
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
107
- (mlp): CLIPMLP(
108
- (activation_fn): QuickGELUActivation()
109
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
110
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
111
- )
112
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
113
- )
114
- (7): CLIPEncoderLayer(
115
- (self_attn): CLIPAttention(
116
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
117
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
118
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
119
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
120
- )
121
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
122
- (mlp): CLIPMLP(
123
- (activation_fn): QuickGELUActivation()
124
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
125
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
126
- )
127
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
128
- )
129
- (8): CLIPEncoderLayer(
130
- (self_attn): CLIPAttention(
131
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
132
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
133
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
134
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
135
- )
136
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
137
- (mlp): CLIPMLP(
138
- (activation_fn): QuickGELUActivation()
139
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
140
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
141
- )
142
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
143
- )
144
- (9): CLIPEncoderLayer(
145
- (self_attn): CLIPAttention(
146
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
147
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
148
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
149
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
150
- )
151
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
152
- (mlp): CLIPMLP(
153
- (activation_fn): QuickGELUActivation()
154
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
155
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
156
- )
157
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
158
- )
159
- (10): CLIPEncoderLayer(
160
- (self_attn): CLIPAttention(
161
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
162
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
163
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
164
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
165
- )
166
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
167
- (mlp): CLIPMLP(
168
- (activation_fn): QuickGELUActivation()
169
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
170
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
171
- )
172
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
173
- )
174
- (11): CLIPEncoderLayer(
175
- (self_attn): CLIPAttention(
176
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
177
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
178
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
179
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
180
- )
181
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
182
- (mlp): CLIPMLP(
183
- (activation_fn): QuickGELUActivation()
184
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
185
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
186
- )
187
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
188
- )
189
- )
190
- )
191
- (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
192
- )
193
- )
@@ -1,29 +0,0 @@
1
- data:
2
- dataset1:
3
- _target_: hcpdiff.data.TextImagePairDataset
4
- _partial_: True # Not directly instantiate the object here. There are other parameters to be added in the runtime.
5
- batch_size: 4
6
- cache_latents: True
7
- att_mask_encode: False
8
- loss_weight: 1.0
9
-
10
- source:
11
- data_source1:
12
- _target_: hcpdiff.data.source.Text2ImageAttMapSource
13
- img_root: 'imgs/'
14
- prompt_template: 'prompt_tuning_template/object.txt'
15
- caption_file: null # path to image captions (file_words)
16
- att_mask: null
17
- bg_color: [ 255, 255, 255 ] # RGB; for ARGB -> RGB
18
-
19
- word_names: {}
20
-
21
- text_transforms:
22
- _target_: torchvision.transforms.Compose
23
- transforms:
24
- - _target_: hcpdiff.utils.caption_tools.TemplateFill
25
- word_names: ${....word_names}
26
- bucket:
27
- _target_: hcpdiff.data.bucket.RatioBucket.from_files # aspect ratio bucket
28
- target_area: ${hcp.eval:"512*512"}
29
- num_bucket: 5
@@ -1,31 +0,0 @@
1
- _base_: [cfgs/train/dataset/base_dataset.yaml]
2
-
3
- data:
4
- dataset_class:
5
- _target_: hcpdiff.data.TextImagePairDataset
6
- _partial_: True
7
- batch_size: 1
8
- cache_latents: True
9
- att_mask_encode: False
10
- loss_weight: 1.0
11
-
12
- source:
13
- data_source1:
14
- _target_: hcpdiff.data.source.Text2ImageAttMapSource
15
- img_root: 'imgs/db_class'
16
- prompt_template: 'prompt_tuning_template/object.txt'
17
- caption_file: null
18
- att_mask: null
19
- bg_color: [ 255, 255, 255 ] # RGB; for ARGB -> RGB
20
-
21
- word_names:
22
- pt1: ''
23
-
24
- text_transforms:
25
- _target_: torchvision.transforms.Compose
26
- transforms:
27
- - _target_: hcpdiff.utils.caption_tools.TemplateFill
28
- word_names: ${....word_names}
29
- bucket:
30
- _target_: hcpdiff.data.bucket.FixedBucket
31
- target_size: [ 512, 512 ]
@@ -1,74 +0,0 @@
1
- _base_:
2
- - cfgs/train/dataset/regularization_dataset.yaml
3
- - cfgs/train/train_base.yaml
4
- - cfgs/train/tuning_base.yaml
5
-
6
- unet:
7
- -
8
- lr: 1e-6
9
- layers: # k,v of cross attention
10
- - 're:.*attn2\.to_k$'
11
- - 're:.*attn2\.to_v$'
12
-
13
- ## lora version of CustomDiffusion
14
- #lora_unet:
15
- # -
16
- # lr: 1e-4
17
- # layers:
18
- # - 're:.*attn2\.to_k$'
19
- # - 're:.*attn2\.to_v$'
20
-
21
- tokenizer_pt:
22
- train: # prompt tuning embeddings, needs to be created in advance
23
- - { name: 'pt-dog1', lr: 0.003 }
24
-
25
- train:
26
- gradient_accumulation_steps: 1
27
- save_step: 100
28
-
29
- scheduler:
30
- name: 'constant_with_warmup'
31
- num_warmup_steps: 50
32
- num_training_steps: 600
33
-
34
- model:
35
- pretrained_model_name_or_path: 'runwayml/stable-diffusion-v1-5'
36
- tokenizer_repeats: 1
37
- ema_unet: 0
38
- ema_text_encoder: 0
39
-
40
- data:
41
- dataset1:
42
- batch_size: 4
43
- cache_latents: True
44
-
45
- source:
46
- data_source1:
47
- img_root: 'imgs/'
48
- prompt_template: 'prompt_tuning_template/object.txt'
49
- caption_file: null # path to image captions (file_words)
50
-
51
- word_names:
52
- pt1: sks
53
- class: dog
54
- bucket:
55
- _target_: hcpdiff.data.bucket.RatioBucket.from_files # aspect ratio bucket
56
- target_area: ${hcp.eval:"512*512"}
57
- num_bucket: 1
58
-
59
- dataset_class:
60
- batch_size: 1
61
- cache_latents: True
62
- loss_weight: 1.0
63
-
64
- source:
65
- data_source1:
66
- img_root: 'imgs/db_class'
67
- prompt_template: 'prompt_tuning_template/object.txt'
68
- caption_file: null
69
-
70
- word_names:
71
- class: dog
72
- bucket:
73
- _target_: hcpdiff.data.bucket.FixedBucket
74
- target_size: [512, 512]
@@ -1,135 +0,0 @@
1
- _base_:
2
- - cfgs/train/dataset/regularization_dataset.yaml
3
- - cfgs/train/train_base.yaml
4
- - cfgs/train/tuning_base.yaml
5
-
6
- unet: null
7
-
8
- lora_unet:
9
- - lr: 1e-4
10
- rank: 0.01875
11
- branch: p
12
- dropout: 0.1
13
- layers:
14
- - 're:.*\.to_k$'
15
- - 're:.*\.to_v$'
16
- - 're:.*\.ff$'
17
- #- 're:.*\.attn.?$' # Increases fitness, but potentially reduces controllability
18
- - lr: 4e-5 # Low negative unet lr prevents image collapse
19
- rank: 0.01875
20
- branch: n
21
- dropout: 0.1
22
- layers:
23
- - 're:.*\.to_k$'
24
- - 're:.*\.to_v$'
25
- - 're:.*\.ff$'
26
- #- 're:.*\.attn.?$' # Increases fitness, but potentially reduces controllability
27
- # - lr: 1e-4
28
- # rank: 0.01875
29
- # type: p
30
- # layers:
31
- # - 're:.*\.resnets$' # Increases fitness, but potentially reduces controllability and change style
32
- # - lr: 4e-5
33
- # rank: 0.01875
34
- # type: n
35
- # layers:
36
- # - 're:.*\.resnets$' # Increases fitness, but potentially reduces controllability and change style
37
-
38
- lora_text_encoder:
39
- - lr: 2e-5
40
- rank: 2
41
- branch: p
42
- dropout: 0.1
43
- layers:
44
- - 're:.*self_attn$'
45
- - 're:.*mlp$'
46
- - lr: 2e-5
47
- rank: 2
48
- branch: n
49
- dropout: 0.1
50
- layers:
51
- - 're:.*self_attn$'
52
- - 're:.*mlp$'
53
-
54
- tokenizer_pt:
55
- train: # prompt tuning embeddings
56
- - { name: 'pt-botdog1', lr: 0.0025 }
57
- - { name: 'pt-botdog1-neg', lr: 0.0025 }
58
-
59
- train:
60
- gradient_accumulation_steps: 1
61
- save_step: 100
62
-
63
- #cfg_scale: '1.0-3.0:cos' # dynamic CFG with timestamp
64
- cfg_scale: '3.0'
65
-
66
- loss:
67
- criterion: # min SNR loss
68
- _target_: hcpdiff.loss.MinSNRLoss
69
- gamma: 2.0
70
-
71
- scheduler:
72
- name: one_cycle
73
- num_warmup_steps: 200
74
- num_training_steps: 1000
75
- scheduler_kwargs: { }
76
-
77
- scheduler_pt:
78
- name: one_cycle
79
- num_warmup_steps: 200
80
- num_training_steps: 1000
81
- scheduler_kwargs: {}
82
-
83
- model:
84
- pretrained_model_name_or_path: 'runwayml/stable-diffusion-v1-5'
85
- tokenizer_repeats: 1
86
- ema_unet: 0
87
- ema_text_encoder: 0
88
- clip_skip: 0
89
-
90
- # The dataset configuration inherits regularization_dataset.yaml
91
- # 数据集配置继承自 regularization_dataset.yaml, 只需修改部分参数
92
- data:
93
- dataset1:
94
- batch_size: 4
95
- cache_latents: True
96
-
97
- source:
98
- data_source1:
99
- img_root: 'imgs/'
100
- prompt_template: 'prompt_tuning_template/object.txt'
101
- caption_file: null # path to image captions (file_words)
102
- att_mask: null
103
-
104
- word_names: --- # remove unused item
105
- text_transforms:
106
- transforms: # without TagShuffle and TagDropout. | 去掉 TagShuffle 和 TagDropout.
107
- - _target_: hcpdiff.utils.caption_tools.TemplateFill
108
- word_names:
109
- pt1: [pt-botdog1, pt-botdog1-neg]
110
- bucket:
111
- _target_: hcpdiff.data.bucket.RatioBucket.from_files # aspect ratio bucket
112
- target_area: ${hcp.eval:"512*512"}
113
- num_bucket: 1
114
-
115
- # Add regularization to prevent image crashes
116
- # Regularization images is generated by model itself with prompt from dataset
117
- dataset_class:
118
- batch_size: 1
119
- cache_latents: True
120
- loss_weight: 1.0
121
-
122
- source:
123
- data_source1:
124
- img_root: 'imgs/v15'
125
- prompt_template: 'prompt_tuning_template/caption.txt'
126
- caption_file:
127
- _targe_: hcpdiff.data.JsonCaptionLoader
128
- path: 'imgs/v15/image_captions.json'
129
- att_mask: null
130
-
131
- word_names:
132
- pt1: ['', '']
133
- bucket:
134
- _target_: hcpdiff.data.bucket.FixedBucket
135
- target_size: [512, 512]
@@ -1,45 +0,0 @@
1
- _base_:
2
- - cfgs/train/dataset/base_dataset.yaml
3
- - cfgs/train/train_base.yaml
4
- - cfgs/train/tuning_base.yaml
5
-
6
- tokenizer_pt:
7
- train: # prompt tuning embeddings, needs to be created in advance
8
- - { name: 'pt-catgirl1', lr: 0.003 }
9
- - { name: 'pt-catgirl1-neg', lr: 0.003 }
10
-
11
- train:
12
- gradient_accumulation_steps: 1
13
- save_step: 100
14
-
15
- #cfg_scale: '1.0-3.0:cos' # dynamic CFG with timestamp
16
- cfg_scale: '3.0'
17
-
18
- scheduler:
19
- name: 'constant_with_warmup'
20
- num_warmup_steps: 50
21
- num_training_steps: 600
22
-
23
- model:
24
- pretrained_model_name_or_path: 'runwayml/stable-diffusion-v1-5'
25
- tokenizer_repeats: 1
26
- ema_unet: 0
27
- ema_text_encoder: 0
28
-
29
- data:
30
- dataset1:
31
- batch_size: 4
32
- cache_latents: True
33
-
34
- source:
35
- data_source1:
36
- img_root: 'imgs/'
37
- prompt_template: 'prompt_tuning_template/object.txt'
38
- caption_file: null # path to image captions (file_words)
39
-
40
- word_names:
41
- pt1: [ pt-catgirl1, pt-catgirl1-neg ] # A pair of word for positive and negative branches respectively.
42
- bucket:
43
- _target_: hcpdiff.data.bucket.RatioBucket.from_files # aspect ratio bucket
44
- target_area: ${hcp.eval:"512*512"}
45
- num_bucket: 1
@@ -1,62 +0,0 @@
1
- _base_:
2
- - cfgs/train/dataset/regularization_dataset.yaml
3
- - cfgs/train/train_base.yaml
4
- - cfgs/train/tuning_base.yaml
5
-
6
- unet:
7
- -
8
- lr: 1e-6
9
- layers:
10
- - ''
11
-
12
- train:
13
- gradient_accumulation_steps: 1
14
- save_step: 100
15
-
16
- scheduler:
17
- name: 'constant_with_warmup'
18
- num_warmup_steps: 50
19
- num_training_steps: 600
20
-
21
- model:
22
- pretrained_model_name_or_path: 'runwayml/stable-diffusion-v1-5'
23
- tokenizer_repeats: 1
24
-
25
- # The dataset configuration inherits regularization_dataset.yaml
26
- # 数据集配置继承自 regularization_dataset.yaml, 只需修改部分参数
27
- data:
28
- dataset1:
29
- batch_size: 4
30
- cache_latents: True
31
-
32
- source:
33
- data_source1:
34
- img_root: 'imgs/'
35
- prompt_template: 'prompt_tuning_template/object.txt'
36
- caption_file: null # path to image captions (file_words)
37
- att_mask: null
38
-
39
- word_names:
40
- pt1: sks
41
- class: dog
42
- bucket:
43
- _target_: hcpdiff.data.bucket.RatioBucket.from_files # aspect ratio bucket
44
- target_area: ${hcp.eval:"512*512"}
45
- num_bucket: 1
46
-
47
- dataset_class:
48
- batch_size: 1
49
- cache_latents: True
50
- loss_weight: 1.0
51
-
52
- source:
53
- data_source1:
54
- img_root: 'imgs/db_class'
55
- prompt_template: 'prompt_tuning_template/object.txt'
56
- caption_file: null
57
-
58
- word_names:
59
- class: dog
60
- bucket:
61
- _target_: hcpdiff.data.bucket.FixedBucket
62
- target_size: [512, 512]
@@ -1,33 +0,0 @@
1
- _base_:
2
- - cfgs/train/examples/fine-tuning.yaml
3
-
4
- mixed_precision: 'bf16'
5
-
6
- train:
7
- optimizer:
8
- _target_: transformers.optimization.Adafactor
9
- _partial_: True
10
- relative_step: False
11
- weight_decay: 1e-3
12
-
13
- model:
14
- pretrained_model_name_or_path: 'stabilityai/stable-diffusion-xl-base-1.0'
15
- clip_skip: 1
16
- clip_final_norm: False
17
- force_cast_precision: True
18
-
19
- data:
20
- dataset1:
21
- _target_: hcpdiff.data.CropInfoPairDataset
22
- batch_size: 4
23
-
24
- source:
25
- data_source1:
26
- img_root: 'imgs/'
27
- prompt_template: 'prompt_tuning_template/object.txt'
28
- caption_file: 'imgs/image_captions.json' # path to image captions (file_words)
29
-
30
- bucket:
31
- _target_: hcpdiff.data.bucket.RatioBucket.from_files # aspect ratio bucket
32
- target_area: ${hcp.eval:"1024*1024"}
33
- num_bucket: 4
@@ -1,17 +0,0 @@
1
- _base_: [cfgs/train/examples/fine-tuning.yaml]
2
-
3
- # Install: pip install lion-pytorch
4
-
5
- train:
6
- optimizer:
7
- type: --- # remove unused item
8
- _target_: lion_pytorch.Lion
9
- _partial_: True
10
- weight_decay: 1e-2
11
- #use_triton: True # set this to True to use cuda kernel w/ Triton lang (Tillet et al)
12
-
13
- optimizer_pt:
14
- type: ---
15
- _target_: lion_pytorch.Lion
16
- _partial_: True
17
- weight_decay: 1e-3
@@ -1,41 +0,0 @@
1
- _base_:
2
- - cfgs/train/dataset/base_dataset.yaml
3
- - cfgs/train/train_base.yaml
4
- - cfgs/train/tuning_base.yaml
5
-
6
- tokenizer_pt:
7
- train: # prompt tuning embeddings, needs to be created in advance
8
- - { name: 'pt-catgirl1', lr: 0.003 }
9
-
10
- train:
11
- gradient_accumulation_steps: 1
12
- save_step: 100
13
-
14
- scheduler:
15
- name: 'constant_with_warmup'
16
- num_warmup_steps: 50
17
- num_training_steps: 600
18
-
19
- model:
20
- pretrained_model_name_or_path: 'runwayml/stable-diffusion-v1-5'
21
- tokenizer_repeats: 1
22
- ema_unet: 0
23
- ema_text_encoder: 0
24
-
25
- data:
26
- dataset1:
27
- batch_size: 4
28
- cache_latents: True
29
-
30
- source:
31
- data_source1:
32
- img_root: 'imgs/'
33
- prompt_template: 'prompt_tuning_template/object.txt'
34
- caption_file: null # path to image captions (file_words)
35
-
36
- word_names:
37
- pt1: pt-catgirl1
38
- bucket:
39
- _target_: hcpdiff.data.bucket.RatioBucket.from_files # aspect ratio bucket
40
- target_area: ${hcp.eval:"512*512"}
41
- num_bucket: 1
@@ -1,15 +0,0 @@
1
- _base_: [cfgs/train/train_base.yaml]
2
-
3
- logger:
4
- -
5
- _target_: hcpdiff.loggers.CLILogger
6
- _partial_: True
7
- out_path: 'train.log'
8
- log_step: 20
9
- - _target_: hcpdiff.loggers.TBLogger
10
- _partial_: True
11
- out_path: 'tblog/'
12
- log_step: 5
13
- - _target_: hcpdiff.loggers.WanDBLogger
14
- _partial_: True
15
- log_step: 5