hcpdiff 0.9.1__py3-none-any.whl → 2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. hcpdiff/__init__.py +4 -4
  2. hcpdiff/ckpt_manager/__init__.py +4 -5
  3. hcpdiff/ckpt_manager/ckpt.py +24 -0
  4. hcpdiff/ckpt_manager/format/__init__.py +4 -0
  5. hcpdiff/ckpt_manager/format/diffusers.py +59 -0
  6. hcpdiff/ckpt_manager/format/emb.py +21 -0
  7. hcpdiff/ckpt_manager/format/lora_webui.py +244 -0
  8. hcpdiff/ckpt_manager/format/sd_single.py +41 -0
  9. hcpdiff/ckpt_manager/loader.py +64 -0
  10. hcpdiff/data/__init__.py +4 -28
  11. hcpdiff/data/cache/__init__.py +1 -0
  12. hcpdiff/data/cache/vae.py +102 -0
  13. hcpdiff/data/dataset.py +20 -0
  14. hcpdiff/data/handler/__init__.py +3 -0
  15. hcpdiff/data/handler/controlnet.py +18 -0
  16. hcpdiff/data/handler/diffusion.py +80 -0
  17. hcpdiff/data/handler/text.py +111 -0
  18. hcpdiff/data/source/__init__.py +1 -2
  19. hcpdiff/data/source/folder_class.py +12 -29
  20. hcpdiff/data/source/text2img.py +36 -74
  21. hcpdiff/data/source/text2img_cond.py +9 -15
  22. hcpdiff/diffusion/__init__.py +0 -0
  23. hcpdiff/diffusion/noise/__init__.py +2 -0
  24. hcpdiff/diffusion/noise/pyramid_noise.py +42 -0
  25. hcpdiff/diffusion/noise/zero_terminal.py +39 -0
  26. hcpdiff/diffusion/sampler/__init__.py +5 -0
  27. hcpdiff/diffusion/sampler/base.py +72 -0
  28. hcpdiff/diffusion/sampler/ddpm.py +20 -0
  29. hcpdiff/diffusion/sampler/diffusers.py +66 -0
  30. hcpdiff/diffusion/sampler/edm.py +22 -0
  31. hcpdiff/diffusion/sampler/sigma_scheduler/__init__.py +3 -0
  32. hcpdiff/diffusion/sampler/sigma_scheduler/base.py +14 -0
  33. hcpdiff/diffusion/sampler/sigma_scheduler/ddpm.py +197 -0
  34. hcpdiff/diffusion/sampler/sigma_scheduler/edm.py +48 -0
  35. hcpdiff/easy/__init__.py +2 -0
  36. hcpdiff/easy/cfg/__init__.py +3 -0
  37. hcpdiff/easy/cfg/sd15_train.py +201 -0
  38. hcpdiff/easy/cfg/sdxl_train.py +140 -0
  39. hcpdiff/easy/cfg/t2i.py +177 -0
  40. hcpdiff/easy/model/__init__.py +2 -0
  41. hcpdiff/easy/model/cnet.py +31 -0
  42. hcpdiff/easy/model/loader.py +79 -0
  43. hcpdiff/easy/sampler.py +46 -0
  44. hcpdiff/evaluate/__init__.py +1 -0
  45. hcpdiff/evaluate/previewer.py +60 -0
  46. hcpdiff/loss/__init__.py +4 -1
  47. hcpdiff/loss/base.py +41 -0
  48. hcpdiff/loss/gw.py +35 -0
  49. hcpdiff/loss/ssim.py +37 -0
  50. hcpdiff/loss/vlb.py +79 -0
  51. hcpdiff/loss/weighting.py +66 -0
  52. hcpdiff/models/__init__.py +2 -2
  53. hcpdiff/models/cfg_context.py +17 -14
  54. hcpdiff/models/compose/compose_hook.py +44 -23
  55. hcpdiff/models/compose/compose_tokenizer.py +21 -8
  56. hcpdiff/models/compose/sdxl_composer.py +4 -4
  57. hcpdiff/models/controlnet.py +16 -16
  58. hcpdiff/models/lora_base_patch.py +14 -25
  59. hcpdiff/models/lora_layers.py +3 -9
  60. hcpdiff/models/lora_layers_patch.py +14 -24
  61. hcpdiff/models/text_emb_ex.py +84 -6
  62. hcpdiff/models/textencoder_ex.py +54 -18
  63. hcpdiff/models/wrapper/__init__.py +3 -0
  64. hcpdiff/models/wrapper/pixart.py +19 -0
  65. hcpdiff/models/wrapper/sd.py +218 -0
  66. hcpdiff/models/wrapper/utils.py +20 -0
  67. hcpdiff/parser/__init__.py +1 -0
  68. hcpdiff/parser/embpt.py +32 -0
  69. hcpdiff/tools/convert_caption_txt2json.py +1 -1
  70. hcpdiff/tools/dataset_generator.py +94 -0
  71. hcpdiff/tools/download_hf_model.py +24 -0
  72. hcpdiff/tools/init_proj.py +3 -21
  73. hcpdiff/tools/lora_convert.py +18 -17
  74. hcpdiff/tools/save_model.py +12 -0
  75. hcpdiff/tools/sd2diffusers.py +1 -1
  76. hcpdiff/train_colo.py +1 -1
  77. hcpdiff/train_deepspeed.py +1 -1
  78. hcpdiff/trainer_ac.py +79 -0
  79. hcpdiff/trainer_ac_single.py +31 -0
  80. hcpdiff/utils/__init__.py +0 -2
  81. hcpdiff/utils/inpaint_pipe.py +7 -2
  82. hcpdiff/utils/net_utils.py +29 -6
  83. hcpdiff/utils/pipe_hook.py +24 -7
  84. hcpdiff/utils/utils.py +21 -4
  85. hcpdiff/workflow/__init__.py +15 -10
  86. hcpdiff/workflow/daam/__init__.py +1 -0
  87. hcpdiff/workflow/daam/act.py +66 -0
  88. hcpdiff/workflow/daam/hook.py +109 -0
  89. hcpdiff/workflow/diffusion.py +114 -125
  90. hcpdiff/workflow/fast.py +31 -0
  91. hcpdiff/workflow/flow.py +67 -0
  92. hcpdiff/workflow/io.py +36 -130
  93. hcpdiff/workflow/model.py +46 -43
  94. hcpdiff/workflow/text.py +78 -46
  95. hcpdiff/workflow/utils.py +32 -12
  96. hcpdiff/workflow/vae.py +37 -38
  97. hcpdiff-2.1.dist-info/METADATA +285 -0
  98. hcpdiff-2.1.dist-info/RECORD +114 -0
  99. {hcpdiff-0.9.1.dist-info → hcpdiff-2.1.dist-info}/WHEEL +1 -1
  100. hcpdiff-2.1.dist-info/entry_points.txt +5 -0
  101. hcpdiff/ckpt_manager/base.py +0 -16
  102. hcpdiff/ckpt_manager/ckpt_diffusers.py +0 -45
  103. hcpdiff/ckpt_manager/ckpt_pkl.py +0 -138
  104. hcpdiff/ckpt_manager/ckpt_safetensor.py +0 -64
  105. hcpdiff/ckpt_manager/ckpt_webui.py +0 -54
  106. hcpdiff/data/bucket.py +0 -358
  107. hcpdiff/data/caption_loader.py +0 -80
  108. hcpdiff/data/cond_dataset.py +0 -40
  109. hcpdiff/data/crop_info_dataset.py +0 -40
  110. hcpdiff/data/data_processor.py +0 -33
  111. hcpdiff/data/pair_dataset.py +0 -146
  112. hcpdiff/data/sampler.py +0 -54
  113. hcpdiff/data/source/base.py +0 -30
  114. hcpdiff/data/utils.py +0 -80
  115. hcpdiff/deprecated/__init__.py +0 -1
  116. hcpdiff/deprecated/cfg_converter.py +0 -81
  117. hcpdiff/deprecated/lora_convert.py +0 -31
  118. hcpdiff/infer_workflow.py +0 -57
  119. hcpdiff/loggers/__init__.py +0 -13
  120. hcpdiff/loggers/base_logger.py +0 -76
  121. hcpdiff/loggers/cli_logger.py +0 -40
  122. hcpdiff/loggers/preview/__init__.py +0 -1
  123. hcpdiff/loggers/preview/image_previewer.py +0 -149
  124. hcpdiff/loggers/tensorboard_logger.py +0 -30
  125. hcpdiff/loggers/wandb_logger.py +0 -31
  126. hcpdiff/loggers/webui_logger.py +0 -9
  127. hcpdiff/loss/min_snr_loss.py +0 -52
  128. hcpdiff/models/layers.py +0 -81
  129. hcpdiff/models/plugin.py +0 -348
  130. hcpdiff/models/wrapper.py +0 -75
  131. hcpdiff/noise/__init__.py +0 -3
  132. hcpdiff/noise/noise_base.py +0 -16
  133. hcpdiff/noise/pyramid_noise.py +0 -50
  134. hcpdiff/noise/zero_terminal.py +0 -44
  135. hcpdiff/train_ac.py +0 -566
  136. hcpdiff/train_ac_single.py +0 -39
  137. hcpdiff/utils/caption_tools.py +0 -105
  138. hcpdiff/utils/cfg_net_tools.py +0 -321
  139. hcpdiff/utils/cfg_resolvers.py +0 -16
  140. hcpdiff/utils/ema.py +0 -52
  141. hcpdiff/utils/img_size_tool.py +0 -248
  142. hcpdiff/vis/__init__.py +0 -3
  143. hcpdiff/vis/base_interface.py +0 -12
  144. hcpdiff/vis/disk_interface.py +0 -48
  145. hcpdiff/vis/webui_interface.py +0 -17
  146. hcpdiff/viser_fast.py +0 -138
  147. hcpdiff/visualizer.py +0 -265
  148. hcpdiff/visualizer_reloadable.py +0 -237
  149. hcpdiff/workflow/base.py +0 -59
  150. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/anime/text2img_anime.yaml +0 -21
  151. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/anime/text2img_anime_lora.yaml +0 -58
  152. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/change_vae.yaml +0 -6
  153. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/euler_a.yaml +0 -8
  154. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/img2img.yaml +0 -10
  155. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/img2img_controlnet.yaml +0 -19
  156. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/inpaint.yaml +0 -11
  157. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/load_lora.yaml +0 -26
  158. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/load_unet_part.yaml +0 -18
  159. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/offload_2GB.yaml +0 -6
  160. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/save_model.yaml +0 -44
  161. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/text2img.yaml +0 -53
  162. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/text2img_DA++.yaml +0 -34
  163. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/text2img_sdxl.yaml +0 -9
  164. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/plugins/plugin_controlnet.yaml +0 -17
  165. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/te_struct.txt +0 -193
  166. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/dataset/base_dataset.yaml +0 -29
  167. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/dataset/regularization_dataset.yaml +0 -31
  168. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/CustomDiffusion.yaml +0 -74
  169. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/DreamArtist++.yaml +0 -135
  170. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/DreamArtist.yaml +0 -45
  171. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/DreamBooth.yaml +0 -62
  172. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/FT_sdxl.yaml +0 -33
  173. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/Lion_optimizer.yaml +0 -17
  174. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/TextualInversion.yaml +0 -41
  175. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/add_logger_tensorboard_wandb.yaml +0 -15
  176. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/controlnet.yaml +0 -53
  177. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/ema.yaml +0 -10
  178. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/fine-tuning.yaml +0 -53
  179. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/locon.yaml +0 -24
  180. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/lora_anime_character.yaml +0 -77
  181. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/lora_conventional.yaml +0 -56
  182. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/lora_sdxl.yaml +0 -41
  183. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/min_snr.yaml +0 -7
  184. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/preview_in_training.yaml +0 -6
  185. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples_noob/DreamBooth.yaml +0 -70
  186. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples_noob/TextualInversion.yaml +0 -45
  187. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples_noob/fine-tuning.yaml +0 -45
  188. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples_noob/lora.yaml +0 -63
  189. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/train_base.yaml +0 -81
  190. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/tuning_base.yaml +0 -42
  191. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/unet_struct.txt +0 -932
  192. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/workflow/highres_fix_latent.yaml +0 -86
  193. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/workflow/highres_fix_pixel.yaml +0 -99
  194. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/workflow/text2img.yaml +0 -59
  195. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/workflow/text2img_lora.yaml +0 -70
  196. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/zero2.json +0 -32
  197. hcpdiff-0.9.1.data/data/hcpdiff/cfgs/zero3.json +0 -39
  198. hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/caption.txt +0 -1
  199. hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/name.txt +0 -1
  200. hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/name_2pt_caption.txt +0 -1
  201. hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/name_caption.txt +0 -1
  202. hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/object.txt +0 -27
  203. hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/object_caption.txt +0 -27
  204. hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/style.txt +0 -19
  205. hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/style_caption.txt +0 -19
  206. hcpdiff-0.9.1.dist-info/METADATA +0 -199
  207. hcpdiff-0.9.1.dist-info/RECORD +0 -160
  208. hcpdiff-0.9.1.dist-info/entry_points.txt +0 -2
  209. {hcpdiff-0.9.1.dist-info → hcpdiff-2.1.dist-info/licenses}/LICENSE +0 -0
  210. {hcpdiff-0.9.1.dist-info → hcpdiff-2.1.dist-info}/top_level.txt +0 -0
@@ -1,19 +0,0 @@
1
- _base_: [cfgs/infer/text2img.yaml]
2
-
3
- ex_input:
4
- cond:
5
- _target_: hcpdiff.data.data_processor.ControlNetProcessor
6
- image: 'cond_img.png'
7
-
8
- merge:
9
- plugin_cfg: cfgs/plugins/plugin_controlnet.yaml
10
-
11
- group1:
12
- type: 'unet'
13
- base_model_alpha: 1.0 # base model weight to merge with lora or part
14
- lora: null
15
- part: null
16
- plugin:
17
- controlnet1:
18
- path: 'ckpts/controlnet.ckpt'
19
- layers: 'all'
@@ -1,11 +0,0 @@
1
- _base_: [cfgs/infer/text2img.yaml]
2
-
3
- condition:
4
- type: inpaint
5
- image: 'cond_img.png'
6
- mask: 'mask.png'
7
-
8
- infer_args:
9
- guidance_scale: 7.5
10
- num_inference_steps: 50
11
- strength: 0.75
@@ -1,26 +0,0 @@
1
- _base_: [cfgs/infer/text2img.yaml]
2
-
3
- merge:
4
- group1:
5
- type: 'unet'
6
- base_model_alpha: 1.0 # base model weight to merge with lora or part
7
- lora:
8
- - path: 'lora1-unet.safetensors'
9
- alpha: 0.8
10
- layers: 'all'
11
- - path: 'lora2-unet.safetensors'
12
- alpha: 0.65
13
- layers: 'all'
14
- part: null
15
-
16
- group2:
17
- type: 'TE'
18
- base_model_alpha: 1.0 # base model weight to infer with lora or part
19
- lora:
20
- - path: 'lora1-te.safetensors'
21
- alpha: 0.8
22
- layers: 'all'
23
- - path: 'lora2-te.safetensors'
24
- alpha: 0.65
25
- layers: 'all'
26
- part: null
@@ -1,18 +0,0 @@
1
- _base_: [cfgs/infer/text2img.yaml]
2
-
3
- merge:
4
- group1:
5
- type: 'unet'
6
- base_model_alpha: 0.0 # 基础模型权重0.0,part部分新模型权重1.0,表示用part模型覆盖原有模型
7
- part:
8
- - path: 'unet-100.safetensors的路径'
9
- alpha: 1.0
10
- layers: 'all'
11
-
12
- group2: # 如果没有练text_encoder,这部分可以不加
13
- type: 'TE'
14
- base_model_alpha: 0.0
15
- lora:
16
- - path: 'text_encoder-100.safetensors的路径'
17
- alpha: 1.0
18
- layers: 'all'
@@ -1,6 +0,0 @@
1
- _base_: [cfgs/infer/text2img.yaml]
2
-
3
- offload:
4
- max_VRAM: 2GiB
5
- max_RAM: 30GiB
6
- vae_cpu: False
@@ -1,44 +0,0 @@
1
- _base_: [cfgs/infer/text2img.yaml]
2
-
3
- save_model:
4
- path: ckpts/model
5
- to_safetensors: true
6
-
7
- merge:
8
-
9
- group_lora_unet:
10
- type: 'unet'
11
- base_model_alpha: 1.0 # base model weight to merge with lora or part
12
- lora:
13
- - path: 'exps/lora1/ckpts/unet-600.safetensors'
14
- alpha: 0.8
15
- layers: 'all'
16
- - path: 'exps/lora2/ckpts/unet-800.safetensors'
17
- alpha: 0.7
18
- layers:
19
- - 're:.*\.to_k$'
20
- - 're:.*\.to_v$'
21
-
22
- group_lora_TE:
23
- type: 'TE'
24
- base_model_alpha: 1.0 # base model weight to infer with lora or part
25
- lora:
26
- - path: 'exps/lora1/ckpts/text_encoder-600.safetensors'
27
- alpha: 0.8
28
- layers: 'all'
29
-
30
- group_part_unet:
31
- type: 'unet'
32
- base_model_alpha: 0.4 # base model weight to merge with lora or part
33
- part:
34
- - path: 'exps/part1/ckpts/unet-500.safetensors'
35
- alpha: 0.6
36
- layers: 'all'
37
-
38
- group_part_TE:
39
- type: 'TE'
40
- base_model_alpha: 0.4 # base model weight to merge with lora or part
41
- part:
42
- - path: 'exps/part1/ckpts/text_encoder-500.safetensors'
43
- alpha: 0.6
44
- layers: 'all'
@@ -1,53 +0,0 @@
1
- # base_state*base_model_alpha + (lora_state[i]*lora_scale[i]*lora_alpha[i]) + (part_state[k]*part_alpha[k])
2
-
3
- pretrained_model: ''
4
- prompt: ''
5
- neg_prompt: 'lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry'
6
- emb_dir: 'embs/'
7
- N_repeats: 1
8
- clip_skip: 0
9
- clip_final_norm: True
10
- encoder_attention_mask: True
11
- bs: 4
12
- num: 1
13
- seed: null
14
- dtype: 'fp16'
15
- amp: True
16
-
17
- condition: null
18
-
19
- ex_input: {}
20
-
21
- # Syntactic sugar for interface
22
- save:
23
- out_dir: 'output/'
24
- save_cfg: True
25
- image_type: png
26
- quality: 95
27
- # image_type: webp
28
- # quality: 75
29
-
30
- offload: null
31
-
32
- #vae_optimize: null
33
- vae_optimize:
34
- tiling: False
35
- slicing: False
36
-
37
- interface:
38
- - _target_: hcpdiff.vis.DiskInterface
39
- show_steps: 0
40
- save_root: ${save.out_dir}
41
- save_cfg: ${save.save_cfg}
42
- image_type: ${save.image_type}
43
- quality: ${save.quality}
44
-
45
- infer_args:
46
- width: 512
47
- height: 512
48
- guidance_scale: 7.5
49
- num_inference_steps: 50
50
-
51
- new_components: {}
52
-
53
- merge: null
@@ -1,34 +0,0 @@
1
- _base_: [cfgs/infer/text2img.yaml]
2
-
3
- merge:
4
- exp_dir: '2023-04-03-10-10-36'
5
- alpha: 0.8
6
- alpha_neg: 0.65
7
-
8
- group1:
9
- type: 'unet'
10
- base_model_alpha: 1.0 # base model weight to merge with lora or part
11
- lora:
12
- - path: 'exps/${merge.exp_dir}/ckpts/unet-600.safetensors'
13
- alpha: ${merge.alpha}
14
- layers: 'all'
15
- mask: [ 0.5, 1 ] #
16
- - path: 'exps/${merge.exp_dir}/ckpts/unet-neg-600.safetensors'
17
- alpha: ${merge.alpha_neg}
18
- layers: 'all'
19
- mask: [ 0, 0.5 ]
20
- part: null
21
-
22
- group2:
23
- type: 'TE'
24
- base_model_alpha: 1.0 # base model weight to infer with lora or part
25
- lora:
26
- - path: 'exps/${merge.exp_dir}/ckpts/text_encoder-600.safetensors'
27
- alpha: ${merge.alpha}
28
- layers: 'all'
29
- mask: [ 0.5, 1 ]
30
- - path: 'exps/${merge.exp_dir}/ckpts/text_encoder-neg-600.safetensors'
31
- alpha: ${merge.alpha_neg}
32
- layers: 'all'
33
- mask: [ 0, 0.5 ]
34
- part: null
@@ -1,9 +0,0 @@
1
- _base_: [cfgs/infer/text2img.yaml]
2
-
3
- emb_dir: embs_sdxl
4
-
5
- prompt: ''
6
- neg_prompt: ''
7
-
8
- clip_skip: 1
9
- clip_final_norm: False
@@ -1,17 +0,0 @@
1
- _base_: [cfgs/train/tuning_base.yaml]
2
-
3
- plugin_unet:
4
- controlnet1:
5
- _target_: hcpdiff.models.controlnet.ControlNetPlugin
6
- _partial_: True
7
- lr: 1e-4
8
- from_layers:
9
- - 'pre_hook:'
10
- - 'pre_hook:conv_in' # to make forward inside autocast
11
- to_layers:
12
- - 'down_blocks.0'
13
- - 'down_blocks.1'
14
- - 'down_blocks.2'
15
- - 'down_blocks.3'
16
- - 'mid_block'
17
- - 'pre_hook:up_blocks.3.resnets.2'
@@ -1,193 +0,0 @@
1
- CLIPTextModel(
2
- (text_model): CLIPTextTransformer(
3
- (embeddings): CLIPTextEmbeddings(
4
- (token_embedding): Embedding(49408, 768)
5
- (position_embedding): Embedding(77, 768)
6
- )
7
- (encoder): CLIPEncoder(
8
- (layers): ModuleList(
9
- (0): CLIPEncoderLayer(
10
- (self_attn): CLIPAttention(
11
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
12
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
13
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
14
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
15
- )
16
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
17
- (mlp): CLIPMLP(
18
- (activation_fn): QuickGELUActivation()
19
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
20
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
21
- )
22
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
23
- )
24
- (1): CLIPEncoderLayer(
25
- (self_attn): CLIPAttention(
26
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
27
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
28
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
29
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
30
- )
31
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
32
- (mlp): CLIPMLP(
33
- (activation_fn): QuickGELUActivation()
34
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
35
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
36
- )
37
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
38
- )
39
- (2): CLIPEncoderLayer(
40
- (self_attn): CLIPAttention(
41
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
42
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
43
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
44
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
45
- )
46
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
47
- (mlp): CLIPMLP(
48
- (activation_fn): QuickGELUActivation()
49
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
50
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
51
- )
52
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
53
- )
54
- (3): CLIPEncoderLayer(
55
- (self_attn): CLIPAttention(
56
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
57
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
58
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
59
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
60
- )
61
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
62
- (mlp): CLIPMLP(
63
- (activation_fn): QuickGELUActivation()
64
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
65
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
66
- )
67
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
68
- )
69
- (4): CLIPEncoderLayer(
70
- (self_attn): CLIPAttention(
71
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
72
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
73
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
74
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
75
- )
76
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
77
- (mlp): CLIPMLP(
78
- (activation_fn): QuickGELUActivation()
79
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
80
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
81
- )
82
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
83
- )
84
- (5): CLIPEncoderLayer(
85
- (self_attn): CLIPAttention(
86
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
87
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
88
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
89
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
90
- )
91
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
92
- (mlp): CLIPMLP(
93
- (activation_fn): QuickGELUActivation()
94
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
95
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
96
- )
97
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
98
- )
99
- (6): CLIPEncoderLayer(
100
- (self_attn): CLIPAttention(
101
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
102
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
103
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
104
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
105
- )
106
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
107
- (mlp): CLIPMLP(
108
- (activation_fn): QuickGELUActivation()
109
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
110
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
111
- )
112
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
113
- )
114
- (7): CLIPEncoderLayer(
115
- (self_attn): CLIPAttention(
116
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
117
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
118
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
119
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
120
- )
121
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
122
- (mlp): CLIPMLP(
123
- (activation_fn): QuickGELUActivation()
124
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
125
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
126
- )
127
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
128
- )
129
- (8): CLIPEncoderLayer(
130
- (self_attn): CLIPAttention(
131
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
132
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
133
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
134
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
135
- )
136
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
137
- (mlp): CLIPMLP(
138
- (activation_fn): QuickGELUActivation()
139
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
140
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
141
- )
142
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
143
- )
144
- (9): CLIPEncoderLayer(
145
- (self_attn): CLIPAttention(
146
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
147
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
148
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
149
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
150
- )
151
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
152
- (mlp): CLIPMLP(
153
- (activation_fn): QuickGELUActivation()
154
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
155
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
156
- )
157
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
158
- )
159
- (10): CLIPEncoderLayer(
160
- (self_attn): CLIPAttention(
161
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
162
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
163
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
164
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
165
- )
166
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
167
- (mlp): CLIPMLP(
168
- (activation_fn): QuickGELUActivation()
169
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
170
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
171
- )
172
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
173
- )
174
- (11): CLIPEncoderLayer(
175
- (self_attn): CLIPAttention(
176
- (k_proj): Linear(in_features=768, out_features=768, bias=True)
177
- (v_proj): Linear(in_features=768, out_features=768, bias=True)
178
- (q_proj): Linear(in_features=768, out_features=768, bias=True)
179
- (out_proj): Linear(in_features=768, out_features=768, bias=True)
180
- )
181
- (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
182
- (mlp): CLIPMLP(
183
- (activation_fn): QuickGELUActivation()
184
- (fc1): Linear(in_features=768, out_features=3072, bias=True)
185
- (fc2): Linear(in_features=3072, out_features=768, bias=True)
186
- )
187
- (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
188
- )
189
- )
190
- )
191
- (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
192
- )
193
- )
@@ -1,29 +0,0 @@
1
- data:
2
- dataset1:
3
- _target_: hcpdiff.data.TextImagePairDataset
4
- _partial_: True # Not directly instantiate the object here. There are other parameters to be added in the runtime.
5
- batch_size: 4
6
- cache_latents: True
7
- att_mask_encode: False
8
- loss_weight: 1.0
9
-
10
- source:
11
- data_source1:
12
- _target_: hcpdiff.data.source.Text2ImageAttMapSource
13
- img_root: 'imgs/'
14
- prompt_template: 'prompt_tuning_template/object.txt'
15
- caption_file: null # path to image captions (file_words)
16
- att_mask: null
17
- bg_color: [ 255, 255, 255 ] # RGB; for ARGB -> RGB
18
-
19
- word_names: {}
20
-
21
- text_transforms:
22
- _target_: torchvision.transforms.Compose
23
- transforms:
24
- - _target_: hcpdiff.utils.caption_tools.TemplateFill
25
- word_names: ${....word_names}
26
- bucket:
27
- _target_: hcpdiff.data.bucket.RatioBucket.from_files # aspect ratio bucket
28
- target_area: ${hcp.eval:"512*512"}
29
- num_bucket: 5
@@ -1,31 +0,0 @@
1
- _base_: [cfgs/train/dataset/base_dataset.yaml]
2
-
3
- data:
4
- dataset_class:
5
- _target_: hcpdiff.data.TextImagePairDataset
6
- _partial_: True
7
- batch_size: 1
8
- cache_latents: True
9
- att_mask_encode: False
10
- loss_weight: 1.0
11
-
12
- source:
13
- data_source1:
14
- _target_: hcpdiff.data.source.Text2ImageAttMapSource
15
- img_root: 'imgs/db_class'
16
- prompt_template: 'prompt_tuning_template/object.txt'
17
- caption_file: null
18
- att_mask: null
19
- bg_color: [ 255, 255, 255 ] # RGB; for ARGB -> RGB
20
-
21
- word_names:
22
- pt1: ''
23
-
24
- text_transforms:
25
- _target_: torchvision.transforms.Compose
26
- transforms:
27
- - _target_: hcpdiff.utils.caption_tools.TemplateFill
28
- word_names: ${....word_names}
29
- bucket:
30
- _target_: hcpdiff.data.bucket.FixedBucket
31
- target_size: [ 512, 512 ]
@@ -1,74 +0,0 @@
1
- _base_:
2
- - cfgs/train/dataset/regularization_dataset.yaml
3
- - cfgs/train/train_base.yaml
4
- - cfgs/train/tuning_base.yaml
5
-
6
- unet:
7
- -
8
- lr: 1e-6
9
- layers: # k,v of cross attention
10
- - 're:.*attn2\.to_k$'
11
- - 're:.*attn2\.to_v$'
12
-
13
- ## lora version of CustomDiffusion
14
- #lora_unet:
15
- # -
16
- # lr: 1e-4
17
- # layers:
18
- # - 're:.*attn2\.to_k$'
19
- # - 're:.*attn2\.to_v$'
20
-
21
- tokenizer_pt:
22
- train: # prompt tuning embeddings, needs to be created in advance
23
- - { name: 'pt-dog1', lr: 0.003 }
24
-
25
- train:
26
- gradient_accumulation_steps: 1
27
- save_step: 100
28
-
29
- scheduler:
30
- name: 'constant_with_warmup'
31
- num_warmup_steps: 50
32
- num_training_steps: 600
33
-
34
- model:
35
- pretrained_model_name_or_path: 'runwayml/stable-diffusion-v1-5'
36
- tokenizer_repeats: 1
37
- ema_unet: 0
38
- ema_text_encoder: 0
39
-
40
- data:
41
- dataset1:
42
- batch_size: 4
43
- cache_latents: True
44
-
45
- source:
46
- data_source1:
47
- img_root: 'imgs/'
48
- prompt_template: 'prompt_tuning_template/object.txt'
49
- caption_file: null # path to image captions (file_words)
50
-
51
- word_names:
52
- pt1: sks
53
- class: dog
54
- bucket:
55
- _target_: hcpdiff.data.bucket.RatioBucket.from_files # aspect ratio bucket
56
- target_area: ${hcp.eval:"512*512"}
57
- num_bucket: 1
58
-
59
- dataset_class:
60
- batch_size: 1
61
- cache_latents: True
62
- loss_weight: 1.0
63
-
64
- source:
65
- data_source1:
66
- img_root: 'imgs/db_class'
67
- prompt_template: 'prompt_tuning_template/object.txt'
68
- caption_file: null
69
-
70
- word_names:
71
- class: dog
72
- bucket:
73
- _target_: hcpdiff.data.bucket.FixedBucket
74
- target_size: [512, 512]