diffsynth 1.1.1__tar.gz → 1.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. {diffsynth-1.1.1 → diffsynth-1.1.2}/PKG-INFO +1 -1
  2. {diffsynth-1.1.1 → diffsynth-1.1.2}/README.md +19 -5
  3. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/configs/model_config.py +19 -1
  4. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/data/video.py +2 -2
  5. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/BLIP/__init__.py +1 -0
  6. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/BLIP/blip.py +77 -0
  7. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/BLIP/blip_pretrain.py +44 -0
  8. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/BLIP/med.py +947 -0
  9. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/BLIP/vit.py +301 -0
  10. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/__init__.py +148 -0
  11. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/aesthetic.py +148 -0
  12. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/clip.py +97 -0
  13. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/config.py +23 -0
  14. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/hps.py +118 -0
  15. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/imagereward.py +212 -0
  16. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/mps.py +129 -0
  17. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/__init__.py +14 -0
  18. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/coca_model.py +458 -0
  19. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/constants.py +2 -0
  20. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/factory.py +433 -0
  21. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/hf_configs.py +45 -0
  22. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/hf_model.py +176 -0
  23. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/loss.py +270 -0
  24. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/model.py +461 -0
  25. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/modified_resnet.py +181 -0
  26. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/openai.py +144 -0
  27. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/pretrained.py +376 -0
  28. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/push_to_hf_hub.py +243 -0
  29. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/timm_model.py +127 -0
  30. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/tokenizer.py +211 -0
  31. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/transform.py +216 -0
  32. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/transformer.py +727 -0
  33. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/utils.py +60 -0
  34. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/version.py +1 -0
  35. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/pickscore.py +112 -0
  36. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/trainer/__init__.py +1 -0
  37. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/trainer/models/__init__.py +3 -0
  38. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/trainer/models/base_model.py +7 -0
  39. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/trainer/models/clip_model.py +146 -0
  40. diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/trainer/models/cross_modeling.py +292 -0
  41. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/kolors_text_encoder.py +1 -2
  42. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/lora.py +2 -1
  43. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/model_manager.py +16 -7
  44. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sd3_text_encoder.py +2 -1
  45. diffsynth-1.1.2/diffsynth/models/stepvideo_dit.py +940 -0
  46. diffsynth-1.1.2/diffsynth/models/stepvideo_text_encoder.py +553 -0
  47. diffsynth-1.1.2/diffsynth/models/stepvideo_vae.py +1132 -0
  48. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/tiler.py +1 -1
  49. diffsynth-1.1.2/diffsynth/models/wan_video_dit.py +799 -0
  50. diffsynth-1.1.2/diffsynth/models/wan_video_image_encoder.py +904 -0
  51. diffsynth-1.1.2/diffsynth/models/wan_video_text_encoder.py +269 -0
  52. diffsynth-1.1.2/diffsynth/models/wan_video_vae.py +808 -0
  53. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/__init__.py +2 -0
  54. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/base.py +12 -2
  55. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/flux_image.py +104 -2
  56. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/omnigen_image.py +1 -1
  57. diffsynth-1.1.2/diffsynth/pipelines/step_video.py +209 -0
  58. diffsynth-1.1.2/diffsynth/pipelines/wan_video.py +276 -0
  59. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/prompters/__init__.py +2 -0
  60. diffsynth-1.1.2/diffsynth/prompters/stepvideo_prompter.py +56 -0
  61. diffsynth-1.1.2/diffsynth/prompters/wan_prompter.py +108 -0
  62. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/schedulers/flow_match.py +8 -3
  63. diffsynth-1.1.2/diffsynth/trainers/__init__.py +0 -0
  64. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/trainers/text_to_image.py +27 -2
  65. diffsynth-1.1.2/diffsynth/vram_management/__init__.py +1 -0
  66. diffsynth-1.1.2/diffsynth/vram_management/layers.py +95 -0
  67. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth.egg-info/PKG-INFO +1 -1
  68. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth.egg-info/SOURCES.txt +51 -1
  69. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth.egg-info/requires.txt +1 -0
  70. {diffsynth-1.1.1 → diffsynth-1.1.2}/setup.py +1 -1
  71. {diffsynth-1.1.1 → diffsynth-1.1.2}/LICENSE +0 -0
  72. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/__init__.py +0 -0
  73. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/configs/__init__.py +0 -0
  74. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/controlnets/__init__.py +0 -0
  75. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/controlnets/controlnet_unit.py +0 -0
  76. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/controlnets/processors.py +0 -0
  77. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/data/__init__.py +0 -0
  78. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/data/simple_text_image.py +0 -0
  79. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/extensions/ESRGAN/__init__.py +0 -0
  80. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/extensions/FastBlend/__init__.py +0 -0
  81. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/extensions/FastBlend/api.py +0 -0
  82. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/extensions/FastBlend/cupy_kernels.py +0 -0
  83. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/extensions/FastBlend/data.py +0 -0
  84. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/extensions/FastBlend/patch_match.py +0 -0
  85. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/extensions/FastBlend/runners/__init__.py +0 -0
  86. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/extensions/FastBlend/runners/accurate.py +0 -0
  87. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/extensions/FastBlend/runners/balanced.py +0 -0
  88. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/extensions/FastBlend/runners/fast.py +0 -0
  89. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/extensions/FastBlend/runners/interpolation.py +0 -0
  90. /diffsynth-1.1.1/diffsynth/extensions/__init__.py → /diffsynth-1.1.2/diffsynth/extensions/ImageQualityMetric/open_clip/generation_utils.py +0 -0
  91. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/extensions/RIFE/__init__.py +0 -0
  92. {diffsynth-1.1.1/diffsynth/processors → diffsynth-1.1.2/diffsynth/extensions}/__init__.py +0 -0
  93. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/__init__.py +0 -0
  94. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/attention.py +0 -0
  95. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/cog_dit.py +0 -0
  96. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/cog_vae.py +0 -0
  97. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/downloader.py +0 -0
  98. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/flux_controlnet.py +0 -0
  99. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/flux_dit.py +0 -0
  100. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/flux_ipadapter.py +0 -0
  101. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/flux_text_encoder.py +0 -0
  102. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/flux_vae.py +0 -0
  103. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/hunyuan_dit.py +0 -0
  104. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/hunyuan_dit_text_encoder.py +0 -0
  105. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/hunyuan_video_dit.py +0 -0
  106. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/hunyuan_video_text_encoder.py +0 -0
  107. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/hunyuan_video_vae_decoder.py +0 -0
  108. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/hunyuan_video_vae_encoder.py +0 -0
  109. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/omnigen.py +0 -0
  110. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sd3_dit.py +0 -0
  111. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sd3_vae_decoder.py +0 -0
  112. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sd3_vae_encoder.py +0 -0
  113. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sd_controlnet.py +0 -0
  114. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sd_ipadapter.py +0 -0
  115. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sd_motion.py +0 -0
  116. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sd_text_encoder.py +0 -0
  117. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sd_unet.py +0 -0
  118. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sd_vae_decoder.py +0 -0
  119. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sd_vae_encoder.py +0 -0
  120. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sdxl_controlnet.py +0 -0
  121. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sdxl_ipadapter.py +0 -0
  122. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sdxl_motion.py +0 -0
  123. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sdxl_text_encoder.py +0 -0
  124. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sdxl_unet.py +0 -0
  125. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sdxl_vae_decoder.py +0 -0
  126. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/sdxl_vae_encoder.py +0 -0
  127. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/svd_image_encoder.py +0 -0
  128. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/svd_unet.py +0 -0
  129. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/svd_vae_decoder.py +0 -0
  130. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/svd_vae_encoder.py +0 -0
  131. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/models/utils.py +0 -0
  132. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/cog_video.py +0 -0
  133. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/dancer.py +0 -0
  134. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/hunyuan_image.py +0 -0
  135. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/hunyuan_video.py +0 -0
  136. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/pipeline_runner.py +0 -0
  137. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/sd3_image.py +0 -0
  138. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/sd_image.py +0 -0
  139. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/sd_video.py +0 -0
  140. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/sdxl_image.py +0 -0
  141. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/sdxl_video.py +0 -0
  142. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/pipelines/svd_video.py +0 -0
  143. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/processors/FastBlend.py +0 -0
  144. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/processors/PILEditor.py +0 -0
  145. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/processors/RIFE.py +0 -0
  146. {diffsynth-1.1.1/diffsynth/tokenizer_configs → diffsynth-1.1.2/diffsynth/processors}/__init__.py +0 -0
  147. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/processors/base.py +0 -0
  148. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/processors/sequencial_processor.py +0 -0
  149. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/prompters/base_prompter.py +0 -0
  150. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/prompters/cog_prompter.py +0 -0
  151. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/prompters/flux_prompter.py +0 -0
  152. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/prompters/hunyuan_dit_prompter.py +0 -0
  153. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/prompters/hunyuan_video_prompter.py +0 -0
  154. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/prompters/kolors_prompter.py +0 -0
  155. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/prompters/omnigen_prompter.py +0 -0
  156. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/prompters/omost.py +0 -0
  157. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/prompters/prompt_refiners.py +0 -0
  158. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/prompters/sd3_prompter.py +0 -0
  159. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/prompters/sd_prompter.py +0 -0
  160. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/prompters/sdxl_prompter.py +0 -0
  161. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/schedulers/__init__.py +0 -0
  162. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/schedulers/continuous_ode.py +0 -0
  163. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/schedulers/ddim.py +0 -0
  164. {diffsynth-1.1.1/diffsynth/trainers → diffsynth-1.1.2/diffsynth/tokenizer_configs}/__init__.py +0 -0
  165. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/cog/tokenizer/added_tokens.json +0 -0
  166. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/cog/tokenizer/special_tokens_map.json +0 -0
  167. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/cog/tokenizer/spiece.model +0 -0
  168. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/cog/tokenizer/tokenizer_config.json +0 -0
  169. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/flux/tokenizer_1/merges.txt +0 -0
  170. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/flux/tokenizer_1/special_tokens_map.json +0 -0
  171. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/flux/tokenizer_1/tokenizer_config.json +0 -0
  172. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/flux/tokenizer_1/vocab.json +0 -0
  173. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/flux/tokenizer_2/special_tokens_map.json +0 -0
  174. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/flux/tokenizer_2/spiece.model +0 -0
  175. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/flux/tokenizer_2/tokenizer.json +0 -0
  176. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/flux/tokenizer_2/tokenizer_config.json +0 -0
  177. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/special_tokens_map.json +0 -0
  178. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/tokenizer_config.json +0 -0
  179. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/vocab.txt +0 -0
  180. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/vocab_org.txt +0 -0
  181. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/config.json +0 -0
  182. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/special_tokens_map.json +0 -0
  183. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/spiece.model +0 -0
  184. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/tokenizer_config.json +0 -0
  185. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_1/merges.txt +0 -0
  186. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_1/special_tokens_map.json +0 -0
  187. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_1/tokenizer_config.json +0 -0
  188. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_1/vocab.json +0 -0
  189. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_2/special_tokens_map.json +0 -0
  190. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_2/tokenizer.json +0 -0
  191. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_2/tokenizer_config.json +0 -0
  192. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/kolors/tokenizer/tokenizer.model +0 -0
  193. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/kolors/tokenizer/tokenizer_config.json +0 -0
  194. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/kolors/tokenizer/vocab.txt +0 -0
  195. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion/tokenizer/merges.txt +0 -0
  196. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion/tokenizer/special_tokens_map.json +0 -0
  197. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion/tokenizer/tokenizer_config.json +0 -0
  198. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion/tokenizer/vocab.json +0 -0
  199. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/merges.txt +0 -0
  200. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/special_tokens_map.json +0 -0
  201. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/tokenizer_config.json +0 -0
  202. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/vocab.json +0 -0
  203. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/merges.txt +0 -0
  204. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/special_tokens_map.json +0 -0
  205. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/tokenizer_config.json +0 -0
  206. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/vocab.json +0 -0
  207. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/special_tokens_map.json +0 -0
  208. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/spiece.model +0 -0
  209. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/tokenizer.json +0 -0
  210. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/tokenizer_config.json +0 -0
  211. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/merges.txt +0 -0
  212. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/special_tokens_map.json +0 -0
  213. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/tokenizer_config.json +0 -0
  214. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/vocab.json +0 -0
  215. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth.egg-info/dependency_links.txt +0 -0
  216. {diffsynth-1.1.1 → diffsynth-1.1.2}/diffsynth.egg-info/top_level.txt +0 -0
  217. {diffsynth-1.1.1 → diffsynth-1.1.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: diffsynth
3
- Version: 1.1.1
3
+ Version: 1.1.2
4
4
  Summary: Enjoy the magic of Diffusion models!
5
5
  Author: Artiprocher
6
6
  Classifier: Programming Language :: Python :: 3
@@ -17,6 +17,8 @@ DiffSynth Studio is a Diffusion engine. We have restructured architectures inclu
17
17
 
18
18
  Until now, DiffSynth Studio has supported the following models:
19
19
 
20
+ * [Wan-Video](https://github.com/Wan-Video/Wan2.1)
21
+ * [StepVideo](https://github.com/stepfun-ai/Step-Video-T2V)
20
22
  * [HunyuanVideo](https://github.com/Tencent/HunyuanVideo)
21
23
  * [CogVideoX](https://huggingface.co/THUDM/CogVideoX-5b)
22
24
  * [FLUX](https://huggingface.co/black-forest-labs/FLUX.1-dev)
@@ -34,11 +36,16 @@ Until now, DiffSynth Studio has supported the following models:
34
36
  * [Stable Diffusion](https://huggingface.co/runwayml/stable-diffusion-v1-5)
35
37
 
36
38
  ## News
39
+
40
+ - **February 25, 2025** We support Wan-Video, a collection of SOTA video synthesis models open-sourced by Alibaba. See [./examples/wanvideo/](./examples/wanvideo/).
41
+
42
+ - **February 17, 2025** We support [StepVideo](https://modelscope.cn/models/stepfun-ai/stepvideo-t2v/summary)! State-of-the-art video synthesis model! See [./examples/stepvideo](./examples/stepvideo/).
43
+
37
44
  - **December 31, 2024** We propose EliGen, a novel framework for precise entity-level controlled text-to-image generation, complemented by an inpainting fusion pipeline to extend its capabilities to image inpainting tasks. EliGen seamlessly integrates with existing community models, such as IP-Adapter and In-Context LoRA, enhancing its versatility. For more details, see [./examples/EntityControl](./examples/EntityControl/).
38
- * Paper: [EliGen: Entity-Level Controlled Image Generation with Regional Attention](https://arxiv.org/abs/2501.01097)
39
- * Github: [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio)
40
- * Model: [ModelScope](https://www.modelscope.cn/models/DiffSynth-Studio/Eligen)
41
- * Training dataset: Coming soon
45
+ - Paper: [EliGen: Entity-Level Controlled Image Generation with Regional Attention](https://arxiv.org/abs/2501.01097)
46
+ - Model: [ModelScope](https://www.modelscope.cn/models/DiffSynth-Studio/Eligen)
47
+ - Online Demo: [ModelScope EliGen Studio](https://www.modelscope.cn/studios/DiffSynth-Studio/EliGen)
48
+ - Training Dataset: [EliGen Train Set](https://www.modelscope.cn/datasets/DiffSynth-Studio/EliGenTrainSet)
42
49
 
43
50
  - **December 19, 2024** We implement advanced VRAM management for HunyuanVideo, making it possible to generate videos at a resolution of 129x720x1280 using 24GB of VRAM, or at 129x512x384 resolution with just 6GB of VRAM. Please refer to [./examples/HunyuanVideo/](./examples/HunyuanVideo/) for more details.
44
51
 
@@ -114,12 +121,19 @@ cd DiffSynth-Studio
114
121
  pip install -e .
115
122
  ```
116
123
 
117
- Or install from pypi:
124
+ Or install from pypi (There is a delay in the update. If you want to experience the latest features, please do not use this installation method.):
118
125
 
119
126
  ```
120
127
  pip install diffsynth
121
128
  ```
122
129
 
130
+ If you encounter issues during installation, it may be caused by the packages we depend on. Please refer to the documentation of the package that caused the problem.
131
+
132
+ * [torch](https://pytorch.org/get-started/locally/)
133
+ * [sentencepiece](https://github.com/google/sentencepiece)
134
+ * [cmake](https://cmake.org)
135
+ * [cupy](https://docs.cupy.dev/en/stable/install.html)
136
+
123
137
  ## Usage (in Python code)
124
138
 
125
139
  The Python examples are in [`examples`](./examples/). We provide an overview here.
@@ -51,6 +51,14 @@ from ..extensions.ESRGAN import RRDBNet
51
51
 
52
52
  from ..models.hunyuan_video_dit import HunyuanVideoDiT
53
53
 
54
+ from ..models.stepvideo_vae import StepVideoVAE
55
+ from ..models.stepvideo_dit import StepVideoModel
56
+
57
+ from ..models.wan_video_dit import WanModel
58
+ from ..models.wan_video_text_encoder import WanTextEncoder
59
+ from ..models.wan_video_image_encoder import WanImageEncoder
60
+ from ..models.wan_video_vae import WanVideoVAE
61
+
54
62
 
55
63
  model_loader_configs = [
56
64
  # These configs are provided for detecting model type automatically.
@@ -103,6 +111,15 @@ model_loader_configs = [
103
111
  (None, "aeb82dce778a03dcb4d726cb03f3c43f", ["hunyuan_video_vae_decoder", "hunyuan_video_vae_encoder"], [HunyuanVideoVAEDecoder, HunyuanVideoVAEEncoder], "diffusers"),
104
112
  (None, "b9588f02e78f5ccafc9d7c0294e46308", ["hunyuan_video_dit"], [HunyuanVideoDiT], "civitai"),
105
113
  (None, "84ef4bd4757f60e906b54aa6a7815dc6", ["hunyuan_video_dit"], [HunyuanVideoDiT], "civitai"),
114
+ (None, "68beaf8429b7c11aa8ca05b1bd0058bd", ["stepvideo_vae"], [StepVideoVAE], "civitai"),
115
+ (None, "5c0216a2132b082c10cb7a0e0377e681", ["stepvideo_dit"], [StepVideoModel], "civitai"),
116
+ (None, "9269f8db9040a9d860eaca435be61814", ["wan_video_dit"], [WanModel], "civitai"),
117
+ (None, "aafcfd9672c3a2456dc46e1cb6e52c70", ["wan_video_dit"], [WanModel], "civitai"),
118
+ (None, "6bfcfb3b342cb286ce886889d519a77e", ["wan_video_dit"], [WanModel], "civitai"),
119
+ (None, "9c8818c2cbea55eca56c7b447df170da", ["wan_video_text_encoder"], [WanTextEncoder], "civitai"),
120
+ (None, "5941c53e207d62f20f9025686193c40b", ["wan_video_image_encoder"], [WanImageEncoder], "civitai"),
121
+ (None, "1378ea763357eea97acdef78e65d6d96", ["wan_video_vae"], [WanVideoVAE], "civitai"),
122
+ (None, "ccc42284ea13e1ad04693284c7a09be6", ["wan_video_vae"], [WanVideoVAE], "civitai"),
106
123
  ]
107
124
  huggingface_model_loader_configs = [
108
125
  # These configs are provided for detecting model type automatically.
@@ -115,7 +132,8 @@ huggingface_model_loader_configs = [
115
132
  ("T5EncoderModel", "diffsynth.models.flux_text_encoder", "flux_text_encoder_2", "FluxTextEncoder2"),
116
133
  ("CogVideoXTransformer3DModel", "diffsynth.models.cog_dit", "cog_dit", "CogDiT"),
117
134
  ("SiglipModel", "transformers.models.siglip.modeling_siglip", "siglip_vision_model", "SiglipVisionModel"),
118
- ("LlamaForCausalLM", "diffsynth.models.hunyuan_video_text_encoder", "hunyuan_video_text_encoder_2", "HunyuanVideoLLMEncoder")
135
+ ("LlamaForCausalLM", "diffsynth.models.hunyuan_video_text_encoder", "hunyuan_video_text_encoder_2", "HunyuanVideoLLMEncoder"),
136
+ ("Step1Model", "diffsynth.models.stepvideo_text_encoder", "stepvideo_text_encoder_2", "STEP1TextEncoder"),
119
137
  ]
120
138
  patch_model_loader_configs = [
121
139
  # These configs are provided for detecting model type automatically.
@@ -135,8 +135,8 @@ class VideoData:
135
135
  frame.save(os.path.join(folder, f"{i}.png"))
136
136
 
137
137
 
138
- def save_video(frames, save_path, fps, quality=9):
139
- writer = imageio.get_writer(save_path, fps=fps, quality=quality)
138
+ def save_video(frames, save_path, fps, quality=9, ffmpeg_params=None):
139
+ writer = imageio.get_writer(save_path, fps=fps, quality=quality, ffmpeg_params=ffmpeg_params)
140
140
  for frame in tqdm(frames, desc="Saving video"):
141
141
  frame = np.array(frame)
142
142
  writer.append_data(frame)
@@ -0,0 +1 @@
1
+ from .blip_pretrain import *
@@ -0,0 +1,77 @@
1
+ '''
2
+ * Adapted from BLIP (https://github.com/salesforce/BLIP)
3
+ '''
4
+
5
+ import warnings
6
+ warnings.filterwarnings("ignore")
7
+
8
+ import torch
9
+ import os
10
+ from urllib.parse import urlparse
11
+ from timm.models.hub import download_cached_file
12
+ from transformers import BertTokenizer
13
+ from .vit import VisionTransformer, interpolate_pos_embed
14
+
15
+
16
+ def default_bert():
17
+ current_dir = os.path.dirname(os.path.abspath(__file__))
18
+ project_root = os.path.abspath(os.path.join(current_dir, '../../../../'))
19
+ model_path = os.path.join(project_root, 'models', 'QualityMetric')
20
+ return os.path.join(model_path, "bert-base-uncased")
21
+
22
+
23
+ def init_tokenizer(bert_model_path):
24
+ tokenizer = BertTokenizer.from_pretrained(bert_model_path)
25
+ tokenizer.add_special_tokens({'bos_token':'[DEC]'})
26
+ tokenizer.add_special_tokens({'additional_special_tokens':['[ENC]']})
27
+ tokenizer.enc_token_id = tokenizer.additional_special_tokens_ids[0]
28
+ return tokenizer
29
+
30
+
31
+ def create_vit(vit, image_size, use_grad_checkpointing=False, ckpt_layer=0, drop_path_rate=0):
32
+
33
+ assert vit in ['base', 'large'], "vit parameter must be base or large"
34
+ if vit=='base':
35
+ vision_width = 768
36
+ visual_encoder = VisionTransformer(img_size=image_size, patch_size=16, embed_dim=vision_width, depth=12,
37
+ num_heads=12, use_grad_checkpointing=use_grad_checkpointing, ckpt_layer=ckpt_layer,
38
+ drop_path_rate=0 or drop_path_rate
39
+ )
40
+ elif vit=='large':
41
+ vision_width = 1024
42
+ visual_encoder = VisionTransformer(img_size=image_size, patch_size=16, embed_dim=vision_width, depth=24,
43
+ num_heads=16, use_grad_checkpointing=use_grad_checkpointing, ckpt_layer=ckpt_layer,
44
+ drop_path_rate=0.1 or drop_path_rate
45
+ )
46
+ return visual_encoder, vision_width
47
+
48
+
49
+ def is_url(url_or_filename):
50
+ parsed = urlparse(url_or_filename)
51
+ return parsed.scheme in ("http", "https")
52
+
53
+ def load_checkpoint(model,url_or_filename):
54
+ if is_url(url_or_filename):
55
+ cached_file = download_cached_file(url_or_filename, check_hash=False, progress=True)
56
+ checkpoint = torch.load(cached_file, map_location='cpu')
57
+ elif os.path.isfile(url_or_filename):
58
+ checkpoint = torch.load(url_or_filename, map_location='cpu')
59
+ else:
60
+ raise RuntimeError('checkpoint url or path is invalid')
61
+
62
+ state_dict = checkpoint['model']
63
+
64
+ state_dict['visual_encoder.pos_embed'] = interpolate_pos_embed(state_dict['visual_encoder.pos_embed'],model.visual_encoder)
65
+ if 'visual_encoder_m.pos_embed' in model.state_dict().keys():
66
+ state_dict['visual_encoder_m.pos_embed'] = interpolate_pos_embed(state_dict['visual_encoder_m.pos_embed'],
67
+ model.visual_encoder_m)
68
+ for key in model.state_dict().keys():
69
+ if key in state_dict.keys():
70
+ if state_dict[key].shape!=model.state_dict()[key].shape:
71
+ print(key, ": ", state_dict[key].shape, ', ', model.state_dict()[key].shape)
72
+ del state_dict[key]
73
+
74
+ msg = model.load_state_dict(state_dict,strict=False)
75
+ print('load checkpoint from %s'%url_or_filename)
76
+ return model,msg
77
+
@@ -0,0 +1,44 @@
1
+ '''
2
+ * Adapted from BLIP (https://github.com/salesforce/BLIP)
3
+ '''
4
+
5
+ import transformers
6
+ transformers.logging.set_verbosity_error()
7
+
8
+ from torch import nn
9
+ import os
10
+ from .med import BertConfig, BertModel
11
+ from .blip import create_vit, init_tokenizer
12
+
13
+ class BLIP_Pretrain(nn.Module):
14
+ def __init__(self,
15
+ med_config = "med_config.json",
16
+ image_size = 224,
17
+ vit = 'base',
18
+ vit_grad_ckpt = False,
19
+ vit_ckpt_layer = 0,
20
+ embed_dim = 256,
21
+ queue_size = 57600,
22
+ momentum = 0.995,
23
+ bert_model_path = ""
24
+ ):
25
+ """
26
+ Args:
27
+ med_config (str): path for the mixture of encoder-decoder model's configuration file
28
+ image_size (int): input image size
29
+ vit (str): model size of vision transformer
30
+ """
31
+ super().__init__()
32
+
33
+ self.visual_encoder, vision_width = create_vit(vit,image_size, vit_grad_ckpt, vit_ckpt_layer, 0)
34
+
35
+ self.tokenizer = init_tokenizer(bert_model_path)
36
+ encoder_config = BertConfig.from_json_file(med_config)
37
+ encoder_config.encoder_width = vision_width
38
+ self.text_encoder = BertModel(config=encoder_config, add_pooling_layer=False)
39
+
40
+ text_width = self.text_encoder.config.hidden_size
41
+
42
+ self.vision_proj = nn.Linear(vision_width, embed_dim)
43
+ self.text_proj = nn.Linear(text_width, embed_dim)
44
+