diffsynth 2.0.2__tar.gz → 2.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (413) hide show
  1. {diffsynth-2.0.2/diffsynth.egg-info → diffsynth-2.0.4}/PKG-INFO +1 -1
  2. {diffsynth-2.0.2 → diffsynth-2.0.4}/README.md +18 -6
  3. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/configs/model_configs.py +22 -0
  4. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/data/unified_dataset.py +5 -1
  5. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/device/__init__.py +1 -1
  6. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/loader/config.py +2 -1
  7. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/loader/file.py +15 -6
  8. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/loader/model.py +5 -3
  9. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/diffusion/base_pipeline.py +3 -1
  10. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/diffusion/flow_match.py +7 -2
  11. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/diffusion/logger.py +1 -1
  12. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/diffusion/runner.py +1 -1
  13. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/diffusion/training_module.py +55 -4
  14. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/dinov3_image_encoder.py +3 -1
  15. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/flux2_dit.py +29 -38
  16. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/longcat_video_dit.py +6 -5
  17. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/model_loader.py +4 -3
  18. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/nexus_gen_ar_model.py +1 -1
  19. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/siglip2_image_encoder.py +3 -1
  20. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/step1x_text_encoder.py +10 -9
  21. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/wan_video_dit.py +2 -0
  22. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/z_image_dit.py +3 -3
  23. diffsynth-2.0.4/diffsynth/models/z_image_text_encoder.py +74 -0
  24. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/pipelines/flux2_image.py +226 -5
  25. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/pipelines/flux_image.py +6 -5
  26. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/pipelines/qwen_image.py +3 -2
  27. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/pipelines/wan_video.py +4 -3
  28. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/pipelines/z_image.py +3 -2
  29. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/controlnet/annotator.py +2 -1
  30. diffsynth-2.0.4/diffsynth/utils/state_dict_converters/z_image_text_encoder.py +6 -0
  31. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/xfuser/xdit_context_parallel.py +1 -1
  32. {diffsynth-2.0.2 → diffsynth-2.0.4/diffsynth.egg-info}/PKG-INFO +1 -1
  33. diffsynth-2.0.4/diffsynth.egg-info/SOURCES.txt +119 -0
  34. {diffsynth-2.0.2 → diffsynth-2.0.4}/pyproject.toml +1 -1
  35. diffsynth-2.0.2/data/style/move.py +0 -13
  36. diffsynth-2.0.2/data/style/test.py +0 -57
  37. diffsynth-2.0.2/diffsynth/models/z_image_text_encoder.py +0 -41
  38. diffsynth-2.0.2/diffsynth.egg-info/SOURCES.txt +0 -430
  39. diffsynth-2.0.2/examples/dev_tools/fix_path.py +0 -43
  40. diffsynth-2.0.2/examples/dev_tools/unit_test.py +0 -121
  41. diffsynth-2.0.2/examples/flux/model_inference/FLEX.2-preview.py +0 -50
  42. diffsynth-2.0.2/examples/flux/model_inference/FLUX.1-Kontext-dev.py +0 -54
  43. diffsynth-2.0.2/examples/flux/model_inference/FLUX.1-Krea-dev.py +0 -27
  44. diffsynth-2.0.2/examples/flux/model_inference/FLUX.1-dev-AttriCtrl.py +0 -19
  45. diffsynth-2.0.2/examples/flux/model_inference/FLUX.1-dev-Controlnet-Inpainting-Beta.py +0 -37
  46. diffsynth-2.0.2/examples/flux/model_inference/FLUX.1-dev-Controlnet-Union-alpha.py +0 -40
  47. diffsynth-2.0.2/examples/flux/model_inference/FLUX.1-dev-Controlnet-Upscaler.py +0 -33
  48. diffsynth-2.0.2/examples/flux/model_inference/FLUX.1-dev-EliGen.py +0 -133
  49. diffsynth-2.0.2/examples/flux/model_inference/FLUX.1-dev-IP-Adapter.py +0 -24
  50. diffsynth-2.0.2/examples/flux/model_inference/FLUX.1-dev-InfiniteYou.py +0 -61
  51. diffsynth-2.0.2/examples/flux/model_inference/FLUX.1-dev-LoRA-Encoder.py +0 -38
  52. diffsynth-2.0.2/examples/flux/model_inference/FLUX.1-dev-LoRA-Fusion.py +0 -38
  53. diffsynth-2.0.2/examples/flux/model_inference/FLUX.1-dev.py +0 -26
  54. diffsynth-2.0.2/examples/flux/model_inference/Nexus-Gen-Editing.py +0 -37
  55. diffsynth-2.0.2/examples/flux/model_inference/Nexus-Gen-Generation.py +0 -32
  56. diffsynth-2.0.2/examples/flux/model_inference/Step1X-Edit.py +0 -32
  57. diffsynth-2.0.2/examples/flux/model_inference_low_vram/FLEX.2-preview.py +0 -61
  58. diffsynth-2.0.2/examples/flux/model_inference_low_vram/FLUX.1-Kontext-dev.py +0 -65
  59. diffsynth-2.0.2/examples/flux/model_inference_low_vram/FLUX.1-Krea-dev.py +0 -38
  60. diffsynth-2.0.2/examples/flux/model_inference_low_vram/FLUX.1-dev-AttriCtrl.py +0 -30
  61. diffsynth-2.0.2/examples/flux/model_inference_low_vram/FLUX.1-dev-Controlnet-Inpainting-Beta.py +0 -48
  62. diffsynth-2.0.2/examples/flux/model_inference_low_vram/FLUX.1-dev-Controlnet-Union-alpha.py +0 -50
  63. diffsynth-2.0.2/examples/flux/model_inference_low_vram/FLUX.1-dev-Controlnet-Upscaler.py +0 -44
  64. diffsynth-2.0.2/examples/flux/model_inference_low_vram/FLUX.1-dev-EliGen.py +0 -144
  65. diffsynth-2.0.2/examples/flux/model_inference_low_vram/FLUX.1-dev-IP-Adapter.py +0 -35
  66. diffsynth-2.0.2/examples/flux/model_inference_low_vram/FLUX.1-dev-InfiniteYou.py +0 -73
  67. diffsynth-2.0.2/examples/flux/model_inference_low_vram/FLUX.1-dev-LoRA-Encoder.py +0 -49
  68. diffsynth-2.0.2/examples/flux/model_inference_low_vram/FLUX.1-dev-LoRA-Fusion.py +0 -38
  69. diffsynth-2.0.2/examples/flux/model_inference_low_vram/FLUX.1-dev.py +0 -37
  70. diffsynth-2.0.2/examples/flux/model_inference_low_vram/Nexus-Gen-Editing.py +0 -48
  71. diffsynth-2.0.2/examples/flux/model_inference_low_vram/Nexus-Gen-Generation.py +0 -43
  72. diffsynth-2.0.2/examples/flux/model_inference_low_vram/Step1X-Edit.py +0 -43
  73. diffsynth-2.0.2/examples/flux/model_training/train.py +0 -193
  74. diffsynth-2.0.2/examples/flux/model_training/validate_full/FLEX.2-preview.py +0 -20
  75. diffsynth-2.0.2/examples/flux/model_training/validate_full/FLUX.1-Kontext-dev.py +0 -26
  76. diffsynth-2.0.2/examples/flux/model_training/validate_full/FLUX.1-Krea-dev.py +0 -20
  77. diffsynth-2.0.2/examples/flux/model_training/validate_full/FLUX.1-dev-AttriCtrl.py +0 -21
  78. diffsynth-2.0.2/examples/flux/model_training/validate_full/FLUX.1-dev-Controlnet-Inpainting-Beta.py +0 -31
  79. diffsynth-2.0.2/examples/flux/model_training/validate_full/FLUX.1-dev-Controlnet-Union-alpha.py +0 -31
  80. diffsynth-2.0.2/examples/flux/model_training/validate_full/FLUX.1-dev-Controlnet-Upscaler.py +0 -30
  81. diffsynth-2.0.2/examples/flux/model_training/validate_full/FLUX.1-dev-IP-Adapter.py +0 -28
  82. diffsynth-2.0.2/examples/flux/model_training/validate_full/FLUX.1-dev-InfiniteYou.py +0 -33
  83. diffsynth-2.0.2/examples/flux/model_training/validate_full/FLUX.1-dev-LoRA-Encoder.py +0 -24
  84. diffsynth-2.0.2/examples/flux/model_training/validate_full/FLUX.1-dev.py +0 -20
  85. diffsynth-2.0.2/examples/flux/model_training/validate_full/Nexus-Gen.py +0 -28
  86. diffsynth-2.0.2/examples/flux/model_training/validate_full/Step1X-Edit.py +0 -25
  87. diffsynth-2.0.2/examples/flux/model_training/validate_lora/FLEX.2-preview.py +0 -18
  88. diffsynth-2.0.2/examples/flux/model_training/validate_lora/FLUX.1-Kontext-dev.py +0 -24
  89. diffsynth-2.0.2/examples/flux/model_training/validate_lora/FLUX.1-Krea-dev.py +0 -18
  90. diffsynth-2.0.2/examples/flux/model_training/validate_lora/FLUX.1-dev-AttriCtrl.py +0 -19
  91. diffsynth-2.0.2/examples/flux/model_training/validate_lora/FLUX.1-dev-Controlnet-Inpainting-Beta.py +0 -29
  92. diffsynth-2.0.2/examples/flux/model_training/validate_lora/FLUX.1-dev-Controlnet-Union-alpha.py +0 -29
  93. diffsynth-2.0.2/examples/flux/model_training/validate_lora/FLUX.1-dev-Controlnet-Upscaler.py +0 -28
  94. diffsynth-2.0.2/examples/flux/model_training/validate_lora/FLUX.1-dev-EliGen.py +0 -33
  95. diffsynth-2.0.2/examples/flux/model_training/validate_lora/FLUX.1-dev-IP-Adapter.py +0 -26
  96. diffsynth-2.0.2/examples/flux/model_training/validate_lora/FLUX.1-dev-InfiniteYou.py +0 -28
  97. diffsynth-2.0.2/examples/flux/model_training/validate_lora/FLUX.1-dev.py +0 -18
  98. diffsynth-2.0.2/examples/flux/model_training/validate_lora/Nexus-Gen.py +0 -26
  99. diffsynth-2.0.2/examples/flux/model_training/validate_lora/Step1X-Edit.py +0 -23
  100. diffsynth-2.0.2/examples/flux2/model_inference/FLUX.2-dev.py +0 -27
  101. diffsynth-2.0.2/examples/flux2/model_inference_low_vram/FLUX.2-dev.py +0 -27
  102. diffsynth-2.0.2/examples/flux2/model_training/train.py +0 -143
  103. diffsynth-2.0.2/examples/flux2/model_training/validate_lora/FLUX.2-dev.py +0 -28
  104. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-2512.py +0 -17
  105. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Canny.py +0 -31
  106. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Depth.py +0 -32
  107. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-Blockwise-ControlNet-Inpaint.py +0 -33
  108. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-Distill-DMD2.py +0 -25
  109. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-Distill-Full.py +0 -17
  110. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-Distill-LoRA.py +0 -20
  111. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-Edit-2509.py +0 -31
  112. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-Edit-2511.py +0 -44
  113. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-Edit-Lowres-Fix.py +0 -25
  114. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-Edit.py +0 -25
  115. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-EliGen-Poster.py +0 -114
  116. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-EliGen-V2.py +0 -106
  117. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-EliGen.py +0 -107
  118. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-In-Context-Control-Union.py +0 -35
  119. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-Layered.py +0 -36
  120. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image-i2L.py +0 -110
  121. diffsynth-2.0.2/examples/qwen_image/model_inference/Qwen-Image.py +0 -17
  122. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-2512.py +0 -28
  123. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-Blockwise-ControlNet-Canny.py +0 -42
  124. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-Blockwise-ControlNet-Depth.py +0 -43
  125. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-Blockwise-ControlNet-Inpaint.py +0 -44
  126. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-Distill-DMD2.py +0 -36
  127. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-Distill-Full.py +0 -28
  128. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-Distill-LoRA.py +0 -31
  129. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-Edit-2509.py +0 -43
  130. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-Edit-2511.py +0 -54
  131. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-Edit-Lowres-Fix.py +0 -37
  132. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-Edit.py +0 -37
  133. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-EliGen-Poster.py +0 -125
  134. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-EliGen-V2.py +0 -117
  135. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-EliGen.py +0 -118
  136. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-In-Context-Control-Union.py +0 -46
  137. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-Layered.py +0 -46
  138. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image-i2L.py +0 -134
  139. diffsynth-2.0.2/examples/qwen_image/model_inference_low_vram/Qwen-Image.py +0 -28
  140. diffsynth-2.0.2/examples/qwen_image/model_training/scripts/Qwen-Image-Blockwise-ControlNet-Initialize.py +0 -13
  141. diffsynth-2.0.2/examples/qwen_image/model_training/scripts/Qwen-Image-Blockwise-ControlNet-Inpaint-Initialize.py +0 -12
  142. diffsynth-2.0.2/examples/qwen_image/model_training/special/fp8_training/validate.py +0 -18
  143. diffsynth-2.0.2/examples/qwen_image/model_training/special/simple/train.py +0 -76
  144. diffsynth-2.0.2/examples/qwen_image/model_training/special/split_training/validate.py +0 -18
  145. diffsynth-2.0.2/examples/qwen_image/model_training/train.py +0 -169
  146. diffsynth-2.0.2/examples/qwen_image/model_training/validate_full/Qwen-Image-2512.py +0 -20
  147. diffsynth-2.0.2/examples/qwen_image/model_training/validate_full/Qwen-Image-Blockwise-ControlNet-Canny.py +0 -31
  148. diffsynth-2.0.2/examples/qwen_image/model_training/validate_full/Qwen-Image-Blockwise-ControlNet-Depth.py +0 -31
  149. diffsynth-2.0.2/examples/qwen_image/model_training/validate_full/Qwen-Image-Blockwise-ControlNet-Inpaint.py +0 -32
  150. diffsynth-2.0.2/examples/qwen_image/model_training/validate_full/Qwen-Image-Distill-Full.py +0 -20
  151. diffsynth-2.0.2/examples/qwen_image/model_training/validate_full/Qwen-Image-Edit-2509.py +0 -26
  152. diffsynth-2.0.2/examples/qwen_image/model_training/validate_full/Qwen-Image-Edit-2511.py +0 -26
  153. diffsynth-2.0.2/examples/qwen_image/model_training/validate_full/Qwen-Image-Edit.py +0 -23
  154. diffsynth-2.0.2/examples/qwen_image/model_training/validate_full/Qwen-Image-Layered.py +0 -28
  155. diffsynth-2.0.2/examples/qwen_image/model_training/validate_full/Qwen-Image.py +0 -20
  156. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image-2512.py +0 -18
  157. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image-Blockwise-ControlNet-Canny.py +0 -32
  158. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image-Blockwise-ControlNet-Depth.py +0 -33
  159. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image-Blockwise-ControlNet-Inpaint.py +0 -34
  160. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image-Distill-Full.py +0 -18
  161. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image-Distill-LoRA.py +0 -23
  162. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image-Edit-2509.py +0 -24
  163. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image-Edit-2511.py +0 -24
  164. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image-Edit.py +0 -21
  165. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image-EliGen-Poster.py +0 -29
  166. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image-EliGen.py +0 -29
  167. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image-In-Context-Control-Union.py +0 -19
  168. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image-Layered.py +0 -27
  169. diffsynth-2.0.2/examples/qwen_image/model_training/validate_lora/Qwen-Image.py +0 -18
  170. diffsynth-2.0.2/examples/wanvideo/acceleration/unified_sequence_parallel.py +0 -26
  171. diffsynth-2.0.2/examples/wanvideo/model_inference/LongCat-Video.py +0 -35
  172. diffsynth-2.0.2/examples/wanvideo/model_inference/Video-As-Prompt-Wan2.1-14B.py +0 -49
  173. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-1.3b-speedcontrol-v1.py +0 -34
  174. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-FLF2V-14B-720P.py +0 -36
  175. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-Fun-1.3B-Control.py +0 -34
  176. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-Fun-1.3B-InP.py +0 -36
  177. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-Fun-14B-Control.py +0 -34
  178. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-Fun-14B-InP.py +0 -36
  179. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-Fun-V1.1-1.3B-Control-Camera.py +0 -44
  180. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-Fun-V1.1-1.3B-Control.py +0 -36
  181. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-Fun-V1.1-1.3B-InP.py +0 -36
  182. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-Fun-V1.1-14B-Control-Camera.py +0 -44
  183. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-Fun-V1.1-14B-Control.py +0 -36
  184. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-Fun-V1.1-14B-InP.py +0 -36
  185. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-I2V-14B-480P.py +0 -34
  186. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-I2V-14B-720P.py +0 -35
  187. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-T2V-1.3B.py +0 -34
  188. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-T2V-14B.py +0 -24
  189. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-VACE-1.3B-Preview.py +0 -52
  190. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-VACE-1.3B.py +0 -53
  191. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.1-VACE-14B.py +0 -54
  192. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.2-Animate-14B.py +0 -62
  193. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.2-Fun-A14B-Control-Camera.py +0 -43
  194. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.2-Fun-A14B-Control.py +0 -35
  195. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.2-Fun-A14B-InP.py +0 -35
  196. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.2-I2V-A14B.py +0 -33
  197. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.2-S2V-14B.py +0 -73
  198. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.2-S2V-14B_multi_clips.py +0 -124
  199. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.2-T2V-A14B.py +0 -24
  200. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.2-TI2V-5B.py +0 -43
  201. diffsynth-2.0.2/examples/wanvideo/model_inference/Wan2.2-VACE-Fun-A14B.py +0 -68
  202. diffsynth-2.0.2/examples/wanvideo/model_inference/krea-realtime-video.py +0 -25
  203. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/LongCat-Video.py +0 -46
  204. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Video-As-Prompt-Wan2.1-14B.py +0 -62
  205. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-1.3b-speedcontrol-v1.py +0 -45
  206. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-FLF2V-14B-720P.py +0 -47
  207. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-Fun-1.3B-Control.py +0 -45
  208. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-Fun-1.3B-InP.py +0 -47
  209. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-Fun-14B-Control.py +0 -45
  210. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-Fun-14B-InP.py +0 -47
  211. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-Fun-V1.1-1.3B-Control-Camera.py +0 -55
  212. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-Fun-V1.1-1.3B-Control.py +0 -47
  213. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-Fun-V1.1-1.3B-InP.py +0 -47
  214. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-Fun-V1.1-14B-Control-Camera.py +0 -55
  215. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-Fun-V1.1-14B-Control.py +0 -47
  216. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-Fun-V1.1-14B-InP.py +0 -47
  217. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-I2V-14B-480P.py +0 -45
  218. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-I2V-14B-720P.py +0 -46
  219. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-T2V-1.3B.py +0 -45
  220. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-T2V-14B.py +0 -35
  221. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-VACE-1.3B-Preview.py +0 -63
  222. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-VACE-1.3B.py +0 -64
  223. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.1-VACE-14B.py +0 -65
  224. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.2-Animate-14B.py +0 -74
  225. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.2-Fun-A14B-Control-Camera.py +0 -55
  226. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.2-Fun-A14B-Control.py +0 -46
  227. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.2-Fun-A14B-InP.py +0 -46
  228. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.2-I2V-A14B.py +0 -44
  229. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.2-S2V-14B.py +0 -84
  230. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.2-S2V-14B_multi_clips.py +0 -133
  231. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.2-T2V-A14B.py +0 -35
  232. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.2-TI2V-5B.py +0 -54
  233. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/Wan2.2-VACE-Fun-A14B.py +0 -65
  234. diffsynth-2.0.2/examples/wanvideo/model_inference_low_vram/krea-realtime-video.py +0 -36
  235. diffsynth-2.0.2/examples/wanvideo/model_training/special/direct_distill/validate.py +0 -23
  236. diffsynth-2.0.2/examples/wanvideo/model_training/special/fp8_training/validate.py +0 -28
  237. diffsynth-2.0.2/examples/wanvideo/model_training/special/low_vram_training/validate.py +0 -28
  238. diffsynth-2.0.2/examples/wanvideo/model_training/special/split_training/validate.py +0 -28
  239. diffsynth-2.0.2/examples/wanvideo/model_training/train.py +0 -185
  240. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/LongCat-Video.py +0 -25
  241. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Video-As-Prompt-Wan2.1-14B.py +0 -43
  242. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-1.3b-speedcontrol-v1.py +0 -28
  243. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-FLF2V-14B-720P.py +0 -33
  244. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-Fun-1.3B-Control.py +0 -32
  245. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-Fun-1.3B-InP.py +0 -31
  246. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-Fun-14B-Control.py +0 -32
  247. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-Fun-14B-InP.py +0 -31
  248. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-Fun-V1.1-1.3B-Control-Camera.py +0 -32
  249. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-Fun-V1.1-1.3B-Control.py +0 -33
  250. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-Fun-V1.1-1.3B-InP.py +0 -31
  251. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-Fun-V1.1-14B-Control-Camera.py +0 -32
  252. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-Fun-V1.1-14B-Control.py +0 -33
  253. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-Fun-V1.1-14B-InP.py +0 -31
  254. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-I2V-14B-480P.py +0 -30
  255. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-I2V-14B-720P.py +0 -31
  256. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-T2V-1.3B.py +0 -25
  257. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-T2V-14B.py +0 -25
  258. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-VACE-1.3B-Preview.py +0 -30
  259. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-VACE-1.3B.py +0 -30
  260. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.1-VACE-14B.py +0 -30
  261. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.2-Animate-14B.py +0 -33
  262. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.2-Fun-A14B-Control-Camera.py +0 -34
  263. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.2-Fun-A14B-Control.py +0 -35
  264. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.2-Fun-A14B-InP.py +0 -32
  265. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.2-I2V-A14B.py +0 -33
  266. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.2-S2V-14B.py +0 -53
  267. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.2-T2V-A14B.py +0 -28
  268. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.2-TI2V-5B.py +0 -30
  269. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/Wan2.2-VACE-Fun-A14B.py +0 -43
  270. diffsynth-2.0.2/examples/wanvideo/model_training/validate_full/krea-realtime-video.py +0 -28
  271. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/LongCat-Video.py +0 -23
  272. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Video-As-Prompt-Wan2.1-14B.py +0 -42
  273. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-1.3b-speedcontrol-v1.py +0 -27
  274. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-FLF2V-14B-720P.py +0 -31
  275. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-Fun-1.3B-Control.py +0 -30
  276. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-Fun-1.3B-InP.py +0 -29
  277. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-Fun-14B-Control.py +0 -30
  278. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-Fun-14B-InP.py +0 -29
  279. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-Fun-V1.1-1.3B-Control-Camera.py +0 -31
  280. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-Fun-V1.1-1.3B-Control.py +0 -31
  281. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-Fun-V1.1-1.3B-InP.py +0 -29
  282. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-Fun-V1.1-14B-Control-Camera.py +0 -31
  283. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-Fun-V1.1-14B-Control.py +0 -31
  284. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-Fun-V1.1-14B-InP.py +0 -29
  285. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-I2V-14B-480P.py +0 -28
  286. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-I2V-14B-720P.py +0 -29
  287. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-T2V-1.3B.py +0 -23
  288. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-T2V-14B.py +0 -23
  289. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-VACE-1.3B-Preview.py +0 -28
  290. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-VACE-1.3B.py +0 -28
  291. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.1-VACE-14B.py +0 -28
  292. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.2-Animate-14B.py +0 -32
  293. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.2-Fun-A14B-Control-Camera.py +0 -32
  294. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.2-Fun-A14B-Control.py +0 -32
  295. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.2-Fun-A14B-InP.py +0 -30
  296. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.2-I2V-A14B.py +0 -30
  297. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.2-S2V-14B.py +0 -50
  298. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.2-T2V-A14B.py +0 -27
  299. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.2-TI2V-5B.py +0 -29
  300. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/Wan2.2-VACE-Fun-A14B.py +0 -30
  301. diffsynth-2.0.2/examples/wanvideo/model_training/validate_lora/krea-realtime-video.py +0 -28
  302. diffsynth-2.0.2/examples/z_image/model_inference/Z-Image-Omni-Base-i2L.py +0 -62
  303. diffsynth-2.0.2/examples/z_image/model_inference/Z-Image-Omni-Base.py +0 -24
  304. diffsynth-2.0.2/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py +0 -27
  305. diffsynth-2.0.2/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py +0 -40
  306. diffsynth-2.0.2/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py +0 -46
  307. diffsynth-2.0.2/examples/z_image/model_inference/Z-Image-Turbo.py +0 -17
  308. diffsynth-2.0.2/examples/z_image/model_inference_low_vram/Z-Image-Omni-Base-i2L.py +0 -62
  309. diffsynth-2.0.2/examples/z_image/model_inference_low_vram/Z-Image-Omni-Base.py +0 -33
  310. diffsynth-2.0.2/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py +0 -37
  311. diffsynth-2.0.2/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py +0 -50
  312. diffsynth-2.0.2/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py +0 -56
  313. diffsynth-2.0.2/examples/z_image/model_inference_low_vram/Z-Image-Turbo.py +0 -27
  314. diffsynth-2.0.2/examples/z_image/model_training/special/differential_training/validate.py +0 -18
  315. diffsynth-2.0.2/examples/z_image/model_training/special/trajectory_imitation/validate.py +0 -18
  316. diffsynth-2.0.2/examples/z_image/model_training/train.py +0 -153
  317. diffsynth-2.0.2/examples/z_image/model_training/validate_full/Z-Image-Omni-Base.py +0 -33
  318. diffsynth-2.0.2/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py +0 -24
  319. diffsynth-2.0.2/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py +0 -24
  320. diffsynth-2.0.2/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py +0 -24
  321. diffsynth-2.0.2/examples/z_image/model_training/validate_full/Z-Image-Turbo.py +0 -20
  322. diffsynth-2.0.2/examples/z_image/model_training/validate_lora/Z-Image-Omni-Base.py +0 -31
  323. diffsynth-2.0.2/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py +0 -23
  324. diffsynth-2.0.2/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py +0 -23
  325. diffsynth-2.0.2/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py +0 -23
  326. diffsynth-2.0.2/examples/z_image/model_training/validate_lora/Z-Image-Turbo.py +0 -18
  327. {diffsynth-2.0.2 → diffsynth-2.0.4}/LICENSE +0 -0
  328. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/__init__.py +0 -0
  329. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/configs/__init__.py +0 -0
  330. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/configs/vram_management_module_maps.py +0 -0
  331. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/__init__.py +0 -0
  332. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/attention/__init__.py +0 -0
  333. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/attention/attention.py +0 -0
  334. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/data/__init__.py +0 -0
  335. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/data/operators.py +0 -0
  336. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/device/npu_compatible_device.py +0 -0
  337. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/gradient/__init__.py +0 -0
  338. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/gradient/gradient_checkpoint.py +0 -0
  339. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/loader/__init__.py +0 -0
  340. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/vram/__init__.py +0 -0
  341. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/vram/disk_map.py +0 -0
  342. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/vram/initialization.py +0 -0
  343. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/core/vram/layers.py +0 -0
  344. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/diffusion/__init__.py +0 -0
  345. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/diffusion/loss.py +0 -0
  346. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/diffusion/parsers.py +0 -0
  347. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/flux2_text_encoder.py +0 -0
  348. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/flux2_vae.py +0 -0
  349. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/flux_controlnet.py +0 -0
  350. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/flux_dit.py +0 -0
  351. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/flux_infiniteyou.py +0 -0
  352. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/flux_ipadapter.py +0 -0
  353. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/flux_lora_encoder.py +0 -0
  354. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/flux_lora_patcher.py +0 -0
  355. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/flux_text_encoder_clip.py +0 -0
  356. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/flux_text_encoder_t5.py +0 -0
  357. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/flux_vae.py +0 -0
  358. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/flux_value_control.py +0 -0
  359. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/general_modules.py +0 -0
  360. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/nexus_gen.py +0 -0
  361. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/nexus_gen_projector.py +0 -0
  362. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/qwen_image_controlnet.py +0 -0
  363. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/qwen_image_dit.py +0 -0
  364. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/qwen_image_image2lora.py +0 -0
  365. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/qwen_image_text_encoder.py +0 -0
  366. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/qwen_image_vae.py +0 -0
  367. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/sd_text_encoder.py +0 -0
  368. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/step1x_connector.py +0 -0
  369. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/wan_video_animate_adapter.py +0 -0
  370. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/wan_video_camera_controller.py +0 -0
  371. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/wan_video_dit_s2v.py +0 -0
  372. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/wan_video_image_encoder.py +0 -0
  373. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/wan_video_mot.py +0 -0
  374. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/wan_video_motion_controller.py +0 -0
  375. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/wan_video_text_encoder.py +0 -0
  376. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/wan_video_vace.py +0 -0
  377. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/wan_video_vae.py +0 -0
  378. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/wav2vec.py +0 -0
  379. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/z_image_controlnet.py +0 -0
  380. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/models/z_image_image2lora.py +0 -0
  381. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/controlnet/__init__.py +0 -0
  382. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/controlnet/controlnet_input.py +0 -0
  383. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/data/__init__.py +0 -0
  384. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/lora/__init__.py +0 -0
  385. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/lora/flux.py +0 -0
  386. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/lora/general.py +0 -0
  387. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/lora/merge.py +0 -0
  388. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/lora/reset_rank.py +0 -0
  389. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/__init__.py +0 -0
  390. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/flux2_text_encoder.py +0 -0
  391. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/flux_controlnet.py +0 -0
  392. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/flux_dit.py +0 -0
  393. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/flux_infiniteyou.py +0 -0
  394. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/flux_ipadapter.py +0 -0
  395. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/flux_text_encoder_clip.py +0 -0
  396. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/flux_text_encoder_t5.py +0 -0
  397. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/flux_vae.py +0 -0
  398. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/nexus_gen.py +0 -0
  399. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/nexus_gen_projector.py +0 -0
  400. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/qwen_image_text_encoder.py +0 -0
  401. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/step1x_connector.py +0 -0
  402. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/wan_video_animate_adapter.py +0 -0
  403. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/wan_video_dit.py +0 -0
  404. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/wan_video_image_encoder.py +0 -0
  405. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/wan_video_mot.py +0 -0
  406. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/wan_video_vace.py +0 -0
  407. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/wan_video_vae.py +0 -0
  408. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/state_dict_converters/wans2v_audio_encoder.py +0 -0
  409. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth/utils/xfuser/__init__.py +0 -0
  410. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth.egg-info/dependency_links.txt +0 -0
  411. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth.egg-info/requires.txt +0 -0
  412. {diffsynth-2.0.2 → diffsynth-2.0.4}/diffsynth.egg-info/top_level.txt +0 -0
  413. {diffsynth-2.0.2 → diffsynth-2.0.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffsynth
3
- Version: 2.0.2
3
+ Version: 2.0.4
4
4
  Summary: Enjoy the magic of Diffusion models!
5
5
  Author: ModelScope Team
6
6
  License: Apache-2.0
@@ -33,7 +33,11 @@ We believe that a well-developed open-source code framework can lower the thresh
33
33
 
34
34
  > Currently, the development personnel of this project are limited, with most of the work handled by [Artiprocher](https://github.com/Artiprocher). Therefore, the progress of new feature development will be relatively slow, and the speed of responding to and resolving issues is limited. We apologize for this and ask developers to understand.
35
35
 
36
- - **January 12, 2026**: We trained and open-sourced a text-guided image layer separation model ([Model Link](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Layered-Control)). Given an input image and a textual description, the model isolates the image layer corresponding to the described content.
36
+ - **January 27, 2026**: [Z-Image](https://modelscope.cn/models/Tongyi-MAI/Z-Image) is released, and our [Z-Image-i2L](https://www.modelscope.cn/models/DiffSynth-Studio/Z-Image-i2L) model is released concurrently. You can use it in [ModelScope Studios](https://modelscope.cn/studios/DiffSynth-Studio/Z-Image-i2L). For details, see the [documentation](/docs/zh/Model_Details/Z-Image.md).
37
+
38
+ - **January 19, 2026**: Added support for [FLUX.2-klein-4B](https://modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B) and [FLUX.2-klein-9B](https://modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B) models, including training and inference capabilities. [Documentation](/docs/en/Model_Details/FLUX2.md) and [example code](/examples/flux2/) are now available.
39
+
40
+ - **January 12, 2026**: We trained and open-sourced a text-guided image layer separation model ([Model Link](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Layered-Control)). Given an input image and a textual description, the model isolates the image layer corresponding to the described content. For more details, please refer to our blog post ([Chinese version](https://modelscope.cn/learn/4938), [English version](https://huggingface.co/blog/kelseye/qwen-image-layered-control)).
37
41
 
38
42
  - **December 24, 2025**: Based on Qwen-Image-Edit-2511, we trained an In-Context Editing LoRA model ([Model Link](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Edit-2511-ICEdit-LoRA)). This model takes three images as input (Image A, Image B, and Image C), and automatically analyzes the transformation from Image A to Image B, then applies the same transformation to Image C to generate Image D. For more details, please refer to our blog post ([Chinese version](https://mp.weixin.qq.com/s/41aEiN3lXKGCJs1-we4Q2g), [English version](https://huggingface.co/blog/kelseye/qwen-image-edit-2511-icedit-lora)).
39
43
 
@@ -267,9 +271,14 @@ image.save("image.jpg")
267
271
 
268
272
  Example code for Z-Image is available at: [/examples/z_image/](/examples/z_image/)
269
273
 
270
- | Model ID | Inference | Low-VRAM Inference | Full Training | Full Training Validation | LoRA Training | LoRA Training Validation |
274
+ |Model ID|Inference|Low VRAM Inference|Full Training|Validation After Full Training|LoRA Training|Validation After LoRA Training|
271
275
  |-|-|-|-|-|-|-|
276
+ |[Tongyi-MAI/Z-Image](https://www.modelscope.cn/models/Tongyi-MAI/Z-Image)|[code](/examples/z_image/model_inference/Z-Image.py)|[code](/examples/z_image/model_inference_low_vram/Z-Image.py)|[code](/examples/z_image/model_training/full/Z-Image.sh)|[code](/examples/z_image/model_training/validate_full/Z-Image.py)|[code](/examples/z_image/model_training/lora/Z-Image.sh)|[code](/examples/z_image/model_training/validate_lora/Z-Image.py)|
277
+ |[DiffSynth-Studio/Z-Image-i2L](https://www.modelscope.cn/models/DiffSynth-Studio/Z-Image-i2L)|[code](/examples/z_image/model_inference/Z-Image-i2L.py)|[code](/examples/z_image/model_inference_low_vram/Z-Image-i2L.py)|-|-|-|-|
272
278
  |[Tongyi-MAI/Z-Image-Turbo](https://www.modelscope.cn/models/Tongyi-MAI/Z-Image-Turbo)|[code](/examples/z_image/model_inference/Z-Image-Turbo.py)|[code](/examples/z_image/model_inference_low_vram/Z-Image-Turbo.py)|[code](/examples/z_image/model_training/full/Z-Image-Turbo.sh)|[code](/examples/z_image/model_training/validate_full/Z-Image-Turbo.py)|[code](/examples/z_image/model_training/lora/Z-Image-Turbo.sh)|[code](/examples/z_image/model_training/validate_lora/Z-Image-Turbo.py)|
279
+ |[PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh)|[code](/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh)|[code](/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|
280
+ |[PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh)|[code](/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh)|[code](/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|
281
+ |[PAI/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh)|[code](/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh)|[code](/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|
273
282
 
274
283
  </details>
275
284
 
@@ -319,9 +328,13 @@ image.save("image.jpg")
319
328
 
320
329
  Example code for FLUX.2 is available at: [/examples/flux2/](/examples/flux2/)
321
330
 
322
- | Model ID | Inference | Low-VRAM Inference | LoRA Training | LoRA Training Validation |
323
- |-|-|-|-|-|
324
- |[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](/examples/flux2/model_inference/FLUX.2-dev.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|[code](/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)|
331
+ | Model ID | Inference | Low-VRAM Inference | Full Training | Full Training Validation | LoRA Training | LoRA Training Validation |
332
+ |-|-|-|-|-|-|-|
333
+ |[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](/examples/flux2/model_inference/FLUX.2-dev.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|-|-|[code](/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)|
334
+ |[black-forest-labs/FLUX.2-klein-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-4B.py)|
335
+ |[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)|
336
+ |[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)|
337
+ |[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)|
325
338
 
326
339
  </details>
327
340
 
@@ -774,4 +787,3 @@ https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/b54c05c5-d747-47
774
787
  https://github.com/Artiprocher/DiffSynth-Studio/assets/35051019/59fb2f7b-8de0-4481-b79f-0c3a7361a1ea
775
788
 
776
789
  </details>
777
-
@@ -510,6 +510,28 @@ flux2_series = [
510
510
  "model_name": "flux2_vae",
511
511
  "model_class": "diffsynth.models.flux2_vae.Flux2VAE",
512
512
  },
513
+ {
514
+ # Example: ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="transformer/*.safetensors")
515
+ "model_hash": "3bde7b817fec8143028b6825a63180df",
516
+ "model_name": "flux2_dit",
517
+ "model_class": "diffsynth.models.flux2_dit.Flux2DiT",
518
+ "extra_kwargs": {"guidance_embeds": False, "joint_attention_dim": 7680, "num_attention_heads": 24, "num_layers": 5, "num_single_layers": 20}
519
+ },
520
+ {
521
+ # Example: ModelConfig(model_id="black-forest-labs/FLUX.2-klein-9B", origin_file_pattern="text_encoder/*.safetensors")
522
+ "model_hash": "9195f3ea256fcd0ae6d929c203470754",
523
+ "model_name": "z_image_text_encoder",
524
+ "model_class": "diffsynth.models.z_image_text_encoder.ZImageTextEncoder",
525
+ "extra_kwargs": {"model_size": "8B"},
526
+ "state_dict_converter": "diffsynth.utils.state_dict_converters.z_image_text_encoder.ZImageTextEncoderStateDictConverter",
527
+ },
528
+ {
529
+ # Example: ModelConfig(model_id="black-forest-labs/FLUX.2-klein-9B", origin_file_pattern="transformer/*.safetensors")
530
+ "model_hash": "39c6fc48f07bebecedbbaa971ff466c8",
531
+ "model_name": "flux2_dit",
532
+ "model_class": "diffsynth.models.flux2_dit.Flux2DiT",
533
+ "extra_kwargs": {"guidance_embeds": False, "joint_attention_dim": 12288, "num_attention_heads": 32, "num_layers": 8, "num_single_layers": 24}
534
+ },
513
535
  ]
514
536
 
515
537
  z_image_series = [
@@ -10,6 +10,7 @@ class UnifiedDataset(torch.utils.data.Dataset):
10
10
  data_file_keys=tuple(),
11
11
  main_data_operator=lambda x: x,
12
12
  special_operator_map=None,
13
+ max_data_items=None,
13
14
  ):
14
15
  self.base_path = base_path
15
16
  self.metadata_path = metadata_path
@@ -18,6 +19,7 @@ class UnifiedDataset(torch.utils.data.Dataset):
18
19
  self.main_data_operator = main_data_operator
19
20
  self.cached_data_operator = LoadTorchPickle()
20
21
  self.special_operator_map = {} if special_operator_map is None else special_operator_map
22
+ self.max_data_items = max_data_items
21
23
  self.data = []
22
24
  self.cached_data = []
23
25
  self.load_from_cache = metadata_path is None
@@ -97,7 +99,9 @@ class UnifiedDataset(torch.utils.data.Dataset):
97
99
  return data
98
100
 
99
101
  def __len__(self):
100
- if self.load_from_cache:
102
+ if self.max_data_items is not None:
103
+ return self.max_data_items
104
+ elif self.load_from_cache:
101
105
  return len(self.cached_data) * self.repeat
102
106
  else:
103
107
  return len(self.data) * self.repeat
@@ -1,2 +1,2 @@
1
1
  from .npu_compatible_device import parse_device_type, parse_nccl_backend, get_available_device_type, get_device_name
2
- from .npu_compatible_device import IS_NPU_AVAILABLE
2
+ from .npu_compatible_device import IS_NPU_AVAILABLE, IS_CUDA_AVAILABLE
@@ -1,5 +1,5 @@
1
1
  import torch, glob, os
2
- from typing import Optional, Union
2
+ from typing import Optional, Union, Dict
3
3
  from dataclasses import dataclass
4
4
  from modelscope import snapshot_download
5
5
  from huggingface_hub import snapshot_download as hf_snapshot_download
@@ -23,6 +23,7 @@ class ModelConfig:
23
23
  computation_device: Optional[Union[str, torch.device]] = None
24
24
  computation_dtype: Optional[torch.dtype] = None
25
25
  clear_parameters: bool = False
26
+ state_dict: Dict[str, torch.Tensor] = None
26
27
 
27
28
  def check_input(self):
28
29
  if self.path is None and self.model_id is None:
@@ -2,16 +2,25 @@ from safetensors import safe_open
2
2
  import torch, hashlib
3
3
 
4
4
 
5
- def load_state_dict(file_path, torch_dtype=None, device="cpu"):
5
+ def load_state_dict(file_path, torch_dtype=None, device="cpu", pin_memory=False, verbose=0):
6
6
  if isinstance(file_path, list):
7
7
  state_dict = {}
8
8
  for file_path_ in file_path:
9
- state_dict.update(load_state_dict(file_path_, torch_dtype, device))
10
- return state_dict
11
- if file_path.endswith(".safetensors"):
12
- return load_state_dict_from_safetensors(file_path, torch_dtype=torch_dtype, device=device)
9
+ state_dict.update(load_state_dict(file_path_, torch_dtype, device, pin_memory=pin_memory, verbose=verbose))
13
10
  else:
14
- return load_state_dict_from_bin(file_path, torch_dtype=torch_dtype, device=device)
11
+ if verbose >= 1:
12
+ print(f"Loading file [started]: {file_path}")
13
+ if file_path.endswith(".safetensors"):
14
+ state_dict = load_state_dict_from_safetensors(file_path, torch_dtype=torch_dtype, device=device)
15
+ else:
16
+ state_dict = load_state_dict_from_bin(file_path, torch_dtype=torch_dtype, device=device)
17
+ # If load state dict in CPU memory, `pin_memory=True` will make `model.to("cuda")` faster.
18
+ if pin_memory:
19
+ for i in state_dict:
20
+ state_dict[i] = state_dict[i].pin_memory()
21
+ if verbose >= 1:
22
+ print(f"Loading file [done]: {file_path}")
23
+ return state_dict
15
24
 
16
25
 
17
26
  def load_state_dict_from_safetensors(file_path, torch_dtype=None, device="cpu"):
@@ -5,7 +5,7 @@ from .file import load_state_dict
5
5
  import torch
6
6
 
7
7
 
8
- def load_model(model_class, path, config=None, torch_dtype=torch.bfloat16, device="cpu", state_dict_converter=None, use_disk_map=False, module_map=None, vram_config=None, vram_limit=None):
8
+ def load_model(model_class, path, config=None, torch_dtype=torch.bfloat16, device="cpu", state_dict_converter=None, use_disk_map=False, module_map=None, vram_config=None, vram_limit=None, state_dict=None):
9
9
  config = {} if config is None else config
10
10
  # Why do we use `skip_model_initialization`?
11
11
  # It skips the random initialization of model parameters,
@@ -20,7 +20,7 @@ def load_model(model_class, path, config=None, torch_dtype=torch.bfloat16, devic
20
20
  dtypes = [vram_config["offload_dtype"], vram_config["onload_dtype"], vram_config["preparing_dtype"], vram_config["computation_dtype"]]
21
21
  dtype = [d for d in dtypes if d != "disk"][0]
22
22
  if vram_config["offload_device"] != "disk":
23
- state_dict = DiskMap(path, device, torch_dtype=dtype)
23
+ if state_dict is None: state_dict = DiskMap(path, device, torch_dtype=dtype)
24
24
  if state_dict_converter is not None:
25
25
  state_dict = state_dict_converter(state_dict)
26
26
  else:
@@ -35,7 +35,9 @@ def load_model(model_class, path, config=None, torch_dtype=torch.bfloat16, devic
35
35
  # Sometimes a model file contains multiple models,
36
36
  # and DiskMap can load only the parameters of a single model,
37
37
  # avoiding the need to load all parameters in the file.
38
- if use_disk_map:
38
+ if state_dict is not None:
39
+ pass
40
+ elif use_disk_map:
39
41
  state_dict = DiskMap(path, device, torch_dtype=torch_dtype)
40
42
  else:
41
43
  state_dict = load_state_dict(path, torch_dtype, device)
@@ -4,6 +4,7 @@ import numpy as np
4
4
  from einops import repeat, reduce
5
5
  from typing import Union
6
6
  from ..core import AutoTorchModule, AutoWrappedLinear, load_state_dict, ModelConfig, parse_device_type
7
+ from ..core.device.npu_compatible_device import get_device_type
7
8
  from ..utils.lora import GeneralLoRALoader
8
9
  from ..models.model_loader import ModelPool
9
10
  from ..utils.controlnet import ControlNetInput
@@ -61,7 +62,7 @@ class BasePipeline(torch.nn.Module):
61
62
 
62
63
  def __init__(
63
64
  self,
64
- device="cuda", torch_dtype=torch.float16,
65
+ device=get_device_type(), torch_dtype=torch.float16,
65
66
  height_division_factor=64, width_division_factor=64,
66
67
  time_division_factor=None, time_division_remainder=None,
67
68
  ):
@@ -295,6 +296,7 @@ class BasePipeline(torch.nn.Module):
295
296
  vram_config=vram_config,
296
297
  vram_limit=vram_limit,
297
298
  clear_parameters=model_config.clear_parameters,
299
+ state_dict=model_config.state_dict,
298
300
  )
299
301
  return model_pool
300
302
 
@@ -89,13 +89,18 @@ class FlowMatchScheduler():
89
89
  return float(mu)
90
90
 
91
91
  @staticmethod
92
- def set_timesteps_flux2(num_inference_steps=100, denoising_strength=1.0, dynamic_shift_len=1024//16*1024//16):
92
+ def set_timesteps_flux2(num_inference_steps=100, denoising_strength=1.0, dynamic_shift_len=None):
93
93
  sigma_min = 1 / num_inference_steps
94
94
  sigma_max = 1.0
95
95
  num_train_timesteps = 1000
96
96
  sigma_start = sigma_min + (sigma_max - sigma_min) * denoising_strength
97
97
  sigmas = torch.linspace(sigma_start, sigma_min, num_inference_steps)
98
- mu = FlowMatchScheduler.compute_empirical_mu(dynamic_shift_len, num_inference_steps)
98
+ if dynamic_shift_len is None:
99
+ # If you ask me why I set mu=0.8,
100
+ # I can only say that it yields better training results.
101
+ mu = 0.8
102
+ else:
103
+ mu = FlowMatchScheduler.compute_empirical_mu(dynamic_shift_len, num_inference_steps)
99
104
  sigmas = math.exp(mu) / (math.exp(mu) + (1 / sigmas - 1))
100
105
  timesteps = sigmas * num_train_timesteps
101
106
  return sigmas, timesteps
@@ -10,7 +10,7 @@ class ModelLogger:
10
10
  self.num_steps = 0
11
11
 
12
12
 
13
- def on_step_end(self, accelerator: Accelerator, model: torch.nn.Module, save_steps=None):
13
+ def on_step_end(self, accelerator: Accelerator, model: torch.nn.Module, save_steps=None, **kwargs):
14
14
  self.num_steps += 1
15
15
  if save_steps is not None and self.num_steps % save_steps == 0:
16
16
  self.save_model(accelerator, model, f"step-{self.num_steps}.safetensors")
@@ -40,7 +40,7 @@ def launch_training_task(
40
40
  loss = model(data)
41
41
  accelerator.backward(loss)
42
42
  optimizer.step()
43
- model_logger.on_step_end(accelerator, model, save_steps)
43
+ model_logger.on_step_end(accelerator, model, save_steps, loss=loss)
44
44
  scheduler.step()
45
45
  if save_steps is None:
46
46
  model_logger.on_epoch_end(accelerator, model, epoch_id)
@@ -1,4 +1,4 @@
1
- import torch, json
1
+ import torch, json, os
2
2
  from ..core import ModelConfig, load_state_dict
3
3
  from ..utils.controlnet import ControlNetInput
4
4
  from peft import LoraConfig, inject_adapter_in_model
@@ -127,16 +127,67 @@ class DiffusionTrainingModule(torch.nn.Module):
127
127
  if model_id_with_origin_paths is not None:
128
128
  model_id_with_origin_paths = model_id_with_origin_paths.split(",")
129
129
  for model_id_with_origin_path in model_id_with_origin_paths:
130
- model_id, origin_file_pattern = model_id_with_origin_path.split(":")
131
130
  vram_config = self.parse_vram_config(
132
131
  fp8=model_id_with_origin_path in fp8_models,
133
132
  offload=model_id_with_origin_path in offload_models,
134
133
  device=device
135
134
  )
136
- model_configs.append(ModelConfig(model_id=model_id, origin_file_pattern=origin_file_pattern, **vram_config))
135
+ config = self.parse_path_or_model_id(model_id_with_origin_path)
136
+ model_configs.append(ModelConfig(model_id=config.model_id, origin_file_pattern=config.origin_file_pattern, **vram_config))
137
137
  return model_configs
138
138
 
139
+
140
+ def parse_path_or_model_id(self, model_id_with_origin_path, default_value=None):
141
+ if model_id_with_origin_path is None:
142
+ return default_value
143
+ elif os.path.exists(model_id_with_origin_path):
144
+ return ModelConfig(path=model_id_with_origin_path)
145
+ else:
146
+ if ":" not in model_id_with_origin_path:
147
+ raise ValueError(f"Failed to parse model config: {model_id_with_origin_path}. This is neither a valid path nor in the format of `model_id/origin_file_pattern`.")
148
+ split_id = model_id_with_origin_path.rfind(":")
149
+ model_id = model_id_with_origin_path[:split_id]
150
+ origin_file_pattern = model_id_with_origin_path[split_id + 1:]
151
+ return ModelConfig(model_id=model_id, origin_file_pattern=origin_file_pattern)
152
+
153
+
154
+ def auto_detect_lora_target_modules(
155
+ self,
156
+ model: torch.nn.Module,
157
+ search_for_linear=False,
158
+ linear_detector=lambda x: min(x.weight.shape) >= 512,
159
+ block_list_detector=lambda x: isinstance(x, torch.nn.ModuleList) and len(x) > 1,
160
+ name_prefix="",
161
+ ):
162
+ lora_target_modules = []
163
+ if search_for_linear:
164
+ for name, module in model.named_modules():
165
+ module_name = name_prefix + ["", "."][name_prefix != ""] + name
166
+ if isinstance(module, torch.nn.Linear) and linear_detector(module):
167
+ lora_target_modules.append(module_name)
168
+ else:
169
+ for name, module in model.named_children():
170
+ module_name = name_prefix + ["", "."][name_prefix != ""] + name
171
+ lora_target_modules += self.auto_detect_lora_target_modules(
172
+ module,
173
+ search_for_linear=block_list_detector(module),
174
+ linear_detector=linear_detector,
175
+ block_list_detector=block_list_detector,
176
+ name_prefix=module_name,
177
+ )
178
+ return lora_target_modules
139
179
 
180
+
181
+ def parse_lora_target_modules(self, model, lora_target_modules):
182
+ if lora_target_modules == "":
183
+ print("No LoRA target modules specified. The framework will automatically search for them.")
184
+ lora_target_modules = self.auto_detect_lora_target_modules(model)
185
+ print(f"LoRA will be patched at {lora_target_modules}.")
186
+ else:
187
+ lora_target_modules = lora_target_modules.split(",")
188
+ return lora_target_modules
189
+
190
+
140
191
  def switch_pipe_to_training_mode(
141
192
  self,
142
193
  pipe,
@@ -166,7 +217,7 @@ class DiffusionTrainingModule(torch.nn.Module):
166
217
  return
167
218
  model = self.add_lora_to_model(
168
219
  getattr(pipe, lora_base_model),
169
- target_modules=lora_target_modules.split(","),
220
+ target_modules=self.parse_lora_target_modules(getattr(pipe, lora_base_model), lora_target_modules),
170
221
  lora_rank=lora_rank,
171
222
  upcast_dtype=pipe.torch_dtype,
172
223
  )
@@ -2,6 +2,8 @@ from transformers import DINOv3ViTModel, DINOv3ViTImageProcessorFast
2
2
  from transformers.models.dinov3_vit.modeling_dinov3_vit import DINOv3ViTConfig
3
3
  import torch
4
4
 
5
+ from ..core.device.npu_compatible_device import get_device_type
6
+
5
7
 
6
8
  class DINOv3ImageEncoder(DINOv3ViTModel):
7
9
  def __init__(self):
@@ -70,7 +72,7 @@ class DINOv3ImageEncoder(DINOv3ViTModel):
70
72
  }
71
73
  )
72
74
 
73
- def forward(self, image, torch_dtype=torch.bfloat16, device="cuda"):
75
+ def forward(self, image, torch_dtype=torch.bfloat16, device=get_device_type()):
74
76
  inputs = self.processor(images=image, return_tensors="pt")
75
77
  pixel_values = inputs["pixel_values"].to(dtype=torch_dtype, device=device)
76
78
  bool_masked_pos = None
@@ -823,7 +823,13 @@ class Flux2PosEmbed(nn.Module):
823
823
 
824
824
 
825
825
  class Flux2TimestepGuidanceEmbeddings(nn.Module):
826
- def __init__(self, in_channels: int = 256, embedding_dim: int = 6144, bias: bool = False):
826
+ def __init__(
827
+ self,
828
+ in_channels: int = 256,
829
+ embedding_dim: int = 6144,
830
+ bias: bool = False,
831
+ guidance_embeds: bool = True,
832
+ ):
827
833
  super().__init__()
828
834
 
829
835
  self.time_proj = Timesteps(num_channels=in_channels, flip_sin_to_cos=True, downscale_freq_shift=0)
@@ -831,20 +837,24 @@ class Flux2TimestepGuidanceEmbeddings(nn.Module):
831
837
  in_channels=in_channels, time_embed_dim=embedding_dim, sample_proj_bias=bias
832
838
  )
833
839
 
834
- self.guidance_embedder = TimestepEmbedding(
835
- in_channels=in_channels, time_embed_dim=embedding_dim, sample_proj_bias=bias
836
- )
840
+ if guidance_embeds:
841
+ self.guidance_embedder = TimestepEmbedding(
842
+ in_channels=in_channels, time_embed_dim=embedding_dim, sample_proj_bias=bias
843
+ )
844
+ else:
845
+ self.guidance_embedder = None
837
846
 
838
847
  def forward(self, timestep: torch.Tensor, guidance: torch.Tensor) -> torch.Tensor:
839
848
  timesteps_proj = self.time_proj(timestep)
840
849
  timesteps_emb = self.timestep_embedder(timesteps_proj.to(timestep.dtype)) # (N, D)
841
850
 
842
- guidance_proj = self.time_proj(guidance)
843
- guidance_emb = self.guidance_embedder(guidance_proj.to(guidance.dtype)) # (N, D)
844
-
845
- time_guidance_emb = timesteps_emb + guidance_emb
846
-
847
- return time_guidance_emb
851
+ if guidance is not None and self.guidance_embedder is not None:
852
+ guidance_proj = self.time_proj(guidance)
853
+ guidance_emb = self.guidance_embedder(guidance_proj.to(guidance.dtype)) # (N, D)
854
+ time_guidance_emb = timesteps_emb + guidance_emb
855
+ return time_guidance_emb
856
+ else:
857
+ return timesteps_emb
848
858
 
849
859
 
850
860
  class Flux2Modulation(nn.Module):
@@ -882,6 +892,7 @@ class Flux2DiT(torch.nn.Module):
882
892
  axes_dims_rope: Tuple[int, ...] = (32, 32, 32, 32),
883
893
  rope_theta: int = 2000,
884
894
  eps: float = 1e-6,
895
+ guidance_embeds: bool = True,
885
896
  ):
886
897
  super().__init__()
887
898
  self.out_channels = out_channels or in_channels
@@ -892,7 +903,10 @@ class Flux2DiT(torch.nn.Module):
892
903
 
893
904
  # 2. Combined timestep + guidance embedding
894
905
  self.time_guidance_embed = Flux2TimestepGuidanceEmbeddings(
895
- in_channels=timestep_guidance_channels, embedding_dim=self.inner_dim, bias=False
906
+ in_channels=timestep_guidance_channels,
907
+ embedding_dim=self.inner_dim,
908
+ bias=False,
909
+ guidance_embeds=guidance_embeds,
896
910
  )
897
911
 
898
912
  # 3. Modulation (double stream and single stream blocks share modulation parameters, resp.)
@@ -953,34 +967,9 @@ class Flux2DiT(torch.nn.Module):
953
967
  txt_ids: torch.Tensor = None,
954
968
  guidance: torch.Tensor = None,
955
969
  joint_attention_kwargs: Optional[Dict[str, Any]] = None,
956
- return_dict: bool = True,
957
970
  use_gradient_checkpointing=False,
958
971
  use_gradient_checkpointing_offload=False,
959
- ) -> Union[torch.Tensor]:
960
- """
961
- The [`FluxTransformer2DModel`] forward method.
962
-
963
- Args:
964
- hidden_states (`torch.Tensor` of shape `(batch_size, image_sequence_length, in_channels)`):
965
- Input `hidden_states`.
966
- encoder_hidden_states (`torch.Tensor` of shape `(batch_size, text_sequence_length, joint_attention_dim)`):
967
- Conditional embeddings (embeddings computed from the input conditions such as prompts) to use.
968
- timestep ( `torch.LongTensor`):
969
- Used to indicate denoising step.
970
- block_controlnet_hidden_states: (`list` of `torch.Tensor`):
971
- A list of tensors that if specified are added to the residuals of transformer blocks.
972
- joint_attention_kwargs (`dict`, *optional*):
973
- A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
974
- `self.processor` in
975
- [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
976
- return_dict (`bool`, *optional*, defaults to `True`):
977
- Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain
978
- tuple.
979
-
980
- Returns:
981
- If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
982
- `tuple` where the first element is the sample tensor.
983
- """
972
+ ):
984
973
  # 0. Handle input arguments
985
974
  if joint_attention_kwargs is not None:
986
975
  joint_attention_kwargs = joint_attention_kwargs.copy()
@@ -992,7 +981,9 @@ class Flux2DiT(torch.nn.Module):
992
981
 
993
982
  # 1. Calculate timestep embedding and modulation parameters
994
983
  timestep = timestep.to(hidden_states.dtype) * 1000
995
- guidance = guidance.to(hidden_states.dtype) * 1000
984
+
985
+ if guidance is not None:
986
+ guidance = guidance.to(hidden_states.dtype) * 1000
996
987
 
997
988
  temb = self.time_guidance_embed(timestep, guidance)
998
989
 
@@ -9,6 +9,7 @@ import numpy as np
9
9
  import torch.nn.functional as F
10
10
  from einops import rearrange, repeat
11
11
  from .wan_video_dit import flash_attention
12
+ from ..core.device.npu_compatible_device import get_device_type
12
13
  from ..core.gradient import gradient_checkpoint_forward
13
14
 
14
15
 
@@ -373,7 +374,7 @@ class FinalLayer_FP32(nn.Module):
373
374
  B, N, C = x.shape
374
375
  T, _, _ = latent_shape
375
376
 
376
- with amp.autocast('cuda', dtype=torch.float32):
377
+ with amp.autocast(get_device_type(), dtype=torch.float32):
377
378
  shift, scale = self.adaLN_modulation(t).unsqueeze(2).chunk(2, dim=-1) # [B, T, 1, C]
378
379
  x = modulate_fp32(self.norm_final, x.view(B, T, -1, C), shift, scale).view(B, N, C)
379
380
  x = self.linear(x)
@@ -583,7 +584,7 @@ class LongCatSingleStreamBlock(nn.Module):
583
584
  T, _, _ = latent_shape # S != T*H*W in case of CP split on H*W.
584
585
 
585
586
  # compute modulation params in fp32
586
- with amp.autocast(device_type='cuda', dtype=torch.float32):
587
+ with amp.autocast(device_type=get_device_type(), dtype=torch.float32):
587
588
  shift_msa, scale_msa, gate_msa, \
588
589
  shift_mlp, scale_mlp, gate_mlp = \
589
590
  self.adaLN_modulation(t).unsqueeze(2).chunk(6, dim=-1) # [B, T, 1, C]
@@ -602,7 +603,7 @@ class LongCatSingleStreamBlock(nn.Module):
602
603
  else:
603
604
  x_s = attn_outputs
604
605
 
605
- with amp.autocast(device_type='cuda', dtype=torch.float32):
606
+ with amp.autocast(device_type=get_device_type(), dtype=torch.float32):
606
607
  x = x + (gate_msa * x_s.view(B, -1, N//T, C)).view(B, -1, C) # [B, N, C]
607
608
  x = x.to(x_dtype)
608
609
 
@@ -615,7 +616,7 @@ class LongCatSingleStreamBlock(nn.Module):
615
616
  # ffn with modulation
616
617
  x_m = modulate_fp32(self.mod_norm_ffn, x.view(B, -1, N//T, C), shift_mlp, scale_mlp).view(B, -1, C)
617
618
  x_s = self.ffn(x_m)
618
- with amp.autocast(device_type='cuda', dtype=torch.float32):
619
+ with amp.autocast(device_type=get_device_type(), dtype=torch.float32):
619
620
  x = x + (gate_mlp * x_s.view(B, -1, N//T, C)).view(B, -1, C) # [B, N, C]
620
621
  x = x.to(x_dtype)
621
622
 
@@ -797,7 +798,7 @@ class LongCatVideoTransformer3DModel(torch.nn.Module):
797
798
 
798
799
  hidden_states = self.x_embedder(hidden_states) # [B, N, C]
799
800
 
800
- with amp.autocast(device_type='cuda', dtype=torch.float32):
801
+ with amp.autocast(device_type=get_device_type(), dtype=torch.float32):
801
802
  t = self.t_embedder(timestep.float().flatten(), dtype=torch.float32).reshape(B, N_t, -1) # [B, T, C_t]
802
803
 
803
804
  encoder_hidden_states = self.y_embedder(encoder_hidden_states) # [B, 1, N_token, C]
@@ -29,7 +29,7 @@ class ModelPool:
29
29
  module_map = None
30
30
  return module_map
31
31
 
32
- def load_model_file(self, config, path, vram_config, vram_limit=None):
32
+ def load_model_file(self, config, path, vram_config, vram_limit=None, state_dict=None):
33
33
  model_class = self.import_model_class(config["model_class"])
34
34
  model_config = config.get("extra_kwargs", {})
35
35
  if "state_dict_converter" in config:
@@ -43,6 +43,7 @@ class ModelPool:
43
43
  state_dict_converter,
44
44
  use_disk_map=True,
45
45
  vram_config=vram_config, module_map=module_map, vram_limit=vram_limit,
46
+ state_dict=state_dict,
46
47
  )
47
48
  return model
48
49
 
@@ -59,7 +60,7 @@ class ModelPool:
59
60
  }
60
61
  return vram_config
61
62
 
62
- def auto_load_model(self, path, vram_config=None, vram_limit=None, clear_parameters=False):
63
+ def auto_load_model(self, path, vram_config=None, vram_limit=None, clear_parameters=False, state_dict=None):
63
64
  print(f"Loading models from: {json.dumps(path, indent=4)}")
64
65
  if vram_config is None:
65
66
  vram_config = self.default_vram_config()
@@ -67,7 +68,7 @@ class ModelPool:
67
68
  loaded = False
68
69
  for config in MODEL_CONFIGS:
69
70
  if config["model_hash"] == model_hash:
70
- model = self.load_model_file(config, path, vram_config, vram_limit=vram_limit)
71
+ model = self.load_model_file(config, path, vram_config, vram_limit=vram_limit, state_dict=state_dict)
71
72
  if clear_parameters: self.clear_parameters(model)
72
73
  self.model.append(model)
73
74
  model_name = config["model_name"]
@@ -583,7 +583,7 @@ class Qwen2_5_VLForConditionalGeneration(Qwen2_5_VLPreTrainedModel, GenerationMi
583
583
  is_compileable = model_kwargs["past_key_values"].is_compileable and self._supports_static_cache
584
584
  is_compileable = is_compileable and not self.generation_config.disable_compile
585
585
  if is_compileable and (
586
- self.device.type == "cuda" or generation_config.compile_config._compile_all_devices
586
+ self.device.type in ["cuda", "npu"] or generation_config.compile_config._compile_all_devices
587
587
  ):
588
588
  os.environ["TOKENIZERS_PARALLELISM"] = "0"
589
589
  model_forward = self.get_compiled_call(generation_config.compile_config)
@@ -2,6 +2,8 @@ from transformers.models.siglip.modeling_siglip import SiglipVisionTransformer,
2
2
  from transformers import SiglipImageProcessor, Siglip2VisionModel, Siglip2VisionConfig, Siglip2ImageProcessorFast
3
3
  import torch
4
4
 
5
+ from diffsynth.core.device.npu_compatible_device import get_device_type
6
+
5
7
 
6
8
  class Siglip2ImageEncoder(SiglipVisionTransformer):
7
9
  def __init__(self):
@@ -47,7 +49,7 @@ class Siglip2ImageEncoder(SiglipVisionTransformer):
47
49
  }
48
50
  )
49
51
 
50
- def forward(self, image, torch_dtype=torch.bfloat16, device="cuda"):
52
+ def forward(self, image, torch_dtype=torch.bfloat16, device=get_device_type()):
51
53
  pixel_values = self.processor(images=[image], return_tensors="pt")["pixel_values"]
52
54
  pixel_values = pixel_values.to(device=device, dtype=torch_dtype)
53
55
  output_attentions = False