diffsynth-engine 0.3.6.dev2__tar.gz → 0.3.6.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/PKG-INFO +2 -1
  2. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/pipelines/base.py +14 -8
  3. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/pipelines/controlnet_helper.py +1 -1
  4. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/pipelines/flux_image.py +11 -2
  5. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/download.py +20 -15
  6. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine.egg-info/PKG-INFO +2 -1
  7. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine.egg-info/requires.txt +1 -0
  8. diffsynth_engine-0.3.6.dev4/docs/tutorial.md +241 -0
  9. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/docs/tutorial_zh.md +25 -0
  10. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/pyproject.toml +2 -1
  11. diffsynth_engine-0.3.6.dev2/docs/tutorial.md +0 -1
  12. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/.gitignore +0 -0
  13. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/.pre-commit-config.yaml +0 -0
  14. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/LICENSE +0 -0
  15. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/MANIFEST.in +0 -0
  16. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/README.md +0 -0
  17. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/assets/dingtalk.png +0 -0
  18. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/assets/showcase.jpeg +0 -0
  19. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/__init__.py +0 -0
  20. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/__init__.py +0 -0
  21. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/noise_scheduler/__init__.py +0 -0
  22. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/noise_scheduler/base_scheduler.py +0 -0
  23. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/noise_scheduler/flow_match/__init__.py +0 -0
  24. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/noise_scheduler/flow_match/flow_beta.py +0 -0
  25. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/noise_scheduler/flow_match/flow_ddim.py +0 -0
  26. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/noise_scheduler/flow_match/recifited_flow.py +0 -0
  27. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/__init__.py +0 -0
  28. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/beta.py +0 -0
  29. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/ddim.py +0 -0
  30. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/exponential.py +0 -0
  31. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/karras.py +0 -0
  32. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/linear.py +0 -0
  33. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/sgm_uniform.py +0 -0
  34. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/sampler/__init__.py +0 -0
  35. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/sampler/flow_match/__init__.py +0 -0
  36. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/sampler/flow_match/flow_match_euler.py +0 -0
  37. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/sampler/stable_diffusion/__init__.py +0 -0
  38. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/sampler/stable_diffusion/brownian_tree.py +0 -0
  39. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/sampler/stable_diffusion/ddpm.py +0 -0
  40. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/sampler/stable_diffusion/deis.py +0 -0
  41. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/sampler/stable_diffusion/dpmpp_2m.py +0 -0
  42. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/sampler/stable_diffusion/dpmpp_2m_sde.py +0 -0
  43. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/sampler/stable_diffusion/dpmpp_3m_sde.py +0 -0
  44. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/sampler/stable_diffusion/epsilon.py +0 -0
  45. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/sampler/stable_diffusion/euler.py +0 -0
  46. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/algorithm/sampler/stable_diffusion/euler_ancestral.py +0 -0
  47. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/components/vae.json +0 -0
  48. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/flux/flux_dit.json +0 -0
  49. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/flux/flux_text_encoder.json +0 -0
  50. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/flux/flux_vae.json +0 -0
  51. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/sd/sd_text_encoder.json +0 -0
  52. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/sd/sd_unet.json +0 -0
  53. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/sd3/sd3_dit.json +0 -0
  54. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/sd3/sd3_text_encoder.json +0 -0
  55. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/sdxl/sdxl_text_encoder.json +0 -0
  56. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/sdxl/sdxl_unet.json +0 -0
  57. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/wan/dit/1.3b-t2v.json +0 -0
  58. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/wan/dit/14b-flf2v.json +0 -0
  59. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/wan/dit/14b-i2v.json +0 -0
  60. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/models/wan/dit/14b-t2v.json +0 -0
  61. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/merges.txt +0 -0
  62. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/special_tokens_map.json +0 -0
  63. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/tokenizer_config.json +0 -0
  64. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/vocab.json +0 -0
  65. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/special_tokens_map.json +0 -0
  66. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/spiece.model +0 -0
  67. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/tokenizer.json +0 -0
  68. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/tokenizer_config.json +0 -0
  69. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/merges.txt +0 -0
  70. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/special_tokens_map.json +0 -0
  71. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/tokenizer_config.json +0 -0
  72. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/vocab.json +0 -0
  73. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/merges.txt +0 -0
  74. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/special_tokens_map.json +0 -0
  75. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/tokenizer_config.json +0 -0
  76. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/vocab.json +0 -0
  77. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/special_tokens_map.json +0 -0
  78. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/spiece.model +0 -0
  79. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer.json +0 -0
  80. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer_config.json +0 -0
  81. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/kernels/__init__.py +0 -0
  82. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/__init__.py +0 -0
  83. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/base.py +0 -0
  84. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/basic/__init__.py +0 -0
  85. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/basic/attention.py +0 -0
  86. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/basic/lora.py +0 -0
  87. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/basic/relative_position_emb.py +0 -0
  88. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/basic/timestep.py +0 -0
  89. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/basic/transformer_helper.py +0 -0
  90. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/basic/unet_helper.py +0 -0
  91. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/flux/__init__.py +0 -0
  92. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/flux/flux_controlnet.py +0 -0
  93. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/flux/flux_dit.py +0 -0
  94. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/flux/flux_ipadapter.py +0 -0
  95. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/flux/flux_redux.py +0 -0
  96. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/flux/flux_text_encoder.py +0 -0
  97. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/flux/flux_vae.py +0 -0
  98. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sd/__init__.py +0 -0
  99. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sd/sd_controlnet.py +0 -0
  100. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sd/sd_text_encoder.py +0 -0
  101. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sd/sd_unet.py +0 -0
  102. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sd/sd_vae.py +0 -0
  103. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sd3/__init__.py +0 -0
  104. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sd3/sd3_dit.py +0 -0
  105. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sd3/sd3_text_encoder.py +0 -0
  106. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sd3/sd3_vae.py +0 -0
  107. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sdxl/__init__.py +0 -0
  108. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sdxl/sdxl_controlnet.py +0 -0
  109. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sdxl/sdxl_text_encoder.py +0 -0
  110. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sdxl/sdxl_unet.py +0 -0
  111. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/sdxl/sdxl_vae.py +0 -0
  112. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/text_encoder/__init__.py +0 -0
  113. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/text_encoder/clip.py +0 -0
  114. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/text_encoder/siglip.py +0 -0
  115. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/text_encoder/t5.py +0 -0
  116. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/utils.py +0 -0
  117. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/vae/__init__.py +0 -0
  118. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/vae/vae.py +0 -0
  119. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/wan/__init__.py +0 -0
  120. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/wan/wan_dit.py +0 -0
  121. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/wan/wan_image_encoder.py +0 -0
  122. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/wan/wan_text_encoder.py +0 -0
  123. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/models/wan/wan_vae.py +0 -0
  124. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/pipelines/__init__.py +0 -0
  125. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/pipelines/sd_image.py +0 -0
  126. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/pipelines/sdxl_image.py +0 -0
  127. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/pipelines/wan_video.py +0 -0
  128. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/processor/__init__.py +0 -0
  129. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/processor/canny_processor.py +0 -0
  130. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/processor/depth_processor.py +0 -0
  131. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/tokenizers/__init__.py +0 -0
  132. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/tokenizers/base.py +0 -0
  133. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/tokenizers/clip.py +0 -0
  134. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/tokenizers/t5.py +0 -0
  135. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/tokenizers/wan.py +0 -0
  136. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/tools/__init__.py +0 -0
  137. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/tools/flux_inpainting_tool.py +0 -0
  138. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/tools/flux_outpainting_tool.py +0 -0
  139. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/tools/flux_reference_tool.py +0 -0
  140. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/tools/flux_replace_tool.py +0 -0
  141. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/__init__.py +0 -0
  142. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/constants.py +0 -0
  143. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/env.py +0 -0
  144. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/flag.py +0 -0
  145. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/fp8_linear.py +0 -0
  146. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/gguf.py +0 -0
  147. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/image.py +0 -0
  148. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/loader.py +0 -0
  149. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/lock.py +0 -0
  150. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/logging.py +0 -0
  151. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/offload.py +0 -0
  152. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/onnx.py +0 -0
  153. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/parallel.py +0 -0
  154. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/platform.py +0 -0
  155. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/prompt.py +0 -0
  156. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine/utils/video.py +0 -0
  157. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine.egg-info/SOURCES.txt +0 -0
  158. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine.egg-info/dependency_links.txt +0 -0
  159. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/diffsynth_engine.egg-info/top_level.txt +0 -0
  160. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/setup.cfg +0 -0
  161. {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev4}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffsynth_engine
3
- Version: 0.3.6.dev2
3
+ Version: 0.3.6.dev4
4
4
  Author: MuseAI x ModelScope
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Operating System :: OS Independent
@@ -24,6 +24,7 @@ Requires-Dist: pillow
24
24
  Requires-Dist: imageio[ffmpeg]
25
25
  Requires-Dist: yunchang; sys_platform == "linux"
26
26
  Requires-Dist: onnxruntime
27
+ Requires-Dist: opencv-python
27
28
  Provides-Extra: dev
28
29
  Requires-Dist: diffusers==0.31.0; extra == "dev"
29
30
  Requires-Dist: transformers==4.45.2; extra == "dev"
@@ -91,15 +91,21 @@ class BasePipeline:
91
91
 
92
92
  @staticmethod
93
93
  def load_model_checkpoint(
94
- checkpoint_path: str, device: str = "cpu", dtype: torch.dtype = torch.float16
94
+ checkpoint_path: str | List[str], device: str = "cpu", dtype: torch.dtype = torch.float16
95
95
  ) -> Dict[str, torch.Tensor]:
96
- if not os.path.isfile(checkpoint_path):
97
- FileNotFoundError(f"{checkpoint_path} is not a file")
98
- if checkpoint_path.endswith(".safetensors"):
99
- return load_file(checkpoint_path, device=device)
100
- if checkpoint_path.endswith(".gguf"):
101
- return load_gguf_checkpoint(checkpoint_path, device=device, dtype=dtype)
102
- raise ValueError(f"{checkpoint_path} is not a .safetensors or .gguf file")
96
+ if isinstance(checkpoint_path, str):
97
+ checkpoint_path = [checkpoint_path]
98
+ state_dict = {}
99
+ for path in checkpoint_path:
100
+ if not os.path.isfile(path):
101
+ raise FileNotFoundError(f"{path} is not a file")
102
+ elif path.endswith(".safetensors"):
103
+ state_dict.update(**load_file(path, device=device))
104
+ elif path.endswith(".gguf"):
105
+ state_dict.update(**load_gguf_checkpoint(path, device=device, dtype=dtype))
106
+ else:
107
+ raise ValueError(f"{path} is not a .safetensors or .gguf file")
108
+ return state_dict
103
109
 
104
110
  @staticmethod
105
111
  def validate_image_size(
@@ -8,8 +8,8 @@ ImageType = Union[Image.Image, torch.Tensor, List[Image.Image], List[torch.Tenso
8
8
 
9
9
  @dataclass
10
10
  class ControlNetParams:
11
- scale: float
12
11
  image: ImageType
12
+ scale: float = 1.0
13
13
  model: Optional[nn.Module] = None
14
14
  mask: Optional[ImageType] = None
15
15
  control_start: float = 0
@@ -419,9 +419,10 @@ class ControlType(Enum):
419
419
  normal = "normal"
420
420
  bfl_control = "bfl_control"
421
421
  bfl_fill = "bfl_fill"
422
+ bfl_kontext = "bfl_kontext"
422
423
 
423
424
  def get_in_channel(self):
424
- if self == ControlType.normal:
425
+ if self in [ControlType.normal, ControlType.bfl_kontext]:
425
426
  return 64
426
427
  elif self == ControlType.bfl_control:
427
428
  return 128
@@ -764,9 +765,15 @@ class FluxImagePipeline(BasePipeline):
764
765
  current_step: int,
765
766
  total_step: int,
766
767
  ):
768
+ origin_latents_shape = latents.shape
767
769
  if self.control_type != ControlType.normal:
768
770
  controlnet_param = controlnet_params[0]
769
- latents = torch.cat((latents, controlnet_param.image * controlnet_param.scale), dim=1)
771
+ if self.control_type == ControlType.bfl_kontext:
772
+ latents = torch.cat((latents, controlnet_param.image * controlnet_param.scale), dim=2)
773
+ image_ids = image_ids.repeat(1, 2, 1)
774
+ image_ids[:, image_ids.shape[1] // 2 :, 0] += 1
775
+ else:
776
+ latents = torch.cat((latents, controlnet_param.image * controlnet_param.scale), dim=1)
770
777
  latents = latents.to(self.dtype)
771
778
  controlnet_params = []
772
779
 
@@ -797,6 +804,8 @@ class FluxImagePipeline(BasePipeline):
797
804
  controlnet_double_block_output=double_block_output,
798
805
  controlnet_single_block_output=single_block_output,
799
806
  )
807
+ if self.control_type == ControlType.bfl_kontext:
808
+ noise_pred = noise_pred[:, :, : origin_latents_shape[2], : origin_latents_shape[3]]
800
809
  return noise_pred
801
810
 
802
811
  def prepare_latents(
@@ -2,10 +2,11 @@ import os
2
2
  import shutil
3
3
  import tqdm
4
4
  import tempfile
5
- from typing import Optional
5
+ from typing import List, Optional
6
6
  from pathlib import Path
7
7
  from urllib.parse import urlparse
8
8
  import requests
9
+ import glob
9
10
 
10
11
  from modelscope import snapshot_download
11
12
  from modelscope.hub.api import HubApi
@@ -23,11 +24,11 @@ MODEL_SOURCES = ["modelscope", "civitai"]
23
24
  def fetch_model(
24
25
  model_uri: str,
25
26
  revision: Optional[str] = None,
26
- path: Optional[str] = None,
27
+ path: Optional[str | List[str]] = None,
27
28
  access_token: Optional[str] = None,
28
29
  source: str = "modelscope",
29
- fetch_safetensors: bool = True,
30
- ) -> str:
30
+ fetch_safetensors: bool = True, # TODO: supports other formats like GGUF
31
+ ) -> str | List[str]:
31
32
  if source == "modelscope":
32
33
  return fetch_modelscope_model(model_uri, revision, path, access_token, fetch_safetensors)
33
34
  if source == "civitai":
@@ -38,7 +39,7 @@ def fetch_model(
38
39
  def fetch_modelscope_model(
39
40
  model_id: str,
40
41
  revision: Optional[str] = None,
41
- path: Optional[str] = None,
42
+ path: Optional[str | List[str]] = None,
42
43
  access_token: Optional[str] = None,
43
44
  fetch_safetensors: bool = True,
44
45
  ) -> str:
@@ -52,12 +53,15 @@ def fetch_modelscope_model(
52
53
  directory = os.path.join(DIFFSYNTH_CACHE, "modelscope", model_id, revision if revision else "__version")
53
54
  dirpath = snapshot_download(model_id, revision=revision, local_dir=directory, allow_patterns=path)
54
55
 
55
- if path is not None:
56
- path = os.path.join(dirpath, path)
56
+ if isinstance(path, str):
57
+ path = glob.glob(os.path.join(dirpath, path))
58
+ path = path[0] if len(path) == 1 else path
59
+ elif isinstance(path, list):
60
+ path = [os.path.join(dirpath, p) for p in path]
57
61
  else:
58
62
  path = dirpath
59
63
 
60
- if os.path.isdir(path) and fetch_safetensors:
64
+ if isinstance(path, str) and os.path.isdir(path) and fetch_safetensors:
61
65
  return _fetch_safetensors(path)
62
66
  return path
63
67
 
@@ -122,16 +126,17 @@ def ensure_directory_exists(filename: str):
122
126
  Path(filename).parent.mkdir(parents=True, exist_ok=True)
123
127
 
124
128
 
125
- def _fetch_safetensors(dirpath: str) -> str:
129
+ def _fetch_safetensors(dirpath: str) -> str | List[str]:
126
130
  all_safetensors = []
127
131
  for filename in os.listdir(dirpath):
128
132
  if filename.endswith(".safetensors"):
129
133
  all_safetensors.append(os.path.join(dirpath, filename))
130
- if len(all_safetensors) == 1:
131
- logger.info(f"Fetch safetensors file {all_safetensors[0]}")
132
- return all_safetensors[0]
133
- elif len(all_safetensors) == 0:
134
+ if len(all_safetensors) == 0:
134
135
  logger.error(f"No safetensors file found in {dirpath}")
136
+ return dirpath
137
+ elif len(all_safetensors) == 1:
138
+ all_safetensors = all_safetensors[0]
139
+ logger.info(f"Fetch safetensors file {all_safetensors}")
135
140
  else:
136
- logger.error(f"Multiple safetensors files found in {dirpath}, please specify the file name")
137
- return dirpath
141
+ logger.info(f"Fetch safetensors files {all_safetensors}")
142
+ return all_safetensors
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffsynth_engine
3
- Version: 0.3.6.dev2
3
+ Version: 0.3.6.dev4
4
4
  Author: MuseAI x ModelScope
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Operating System :: OS Independent
@@ -24,6 +24,7 @@ Requires-Dist: pillow
24
24
  Requires-Dist: imageio[ffmpeg]
25
25
  Requires-Dist: yunchang; sys_platform == "linux"
26
26
  Requires-Dist: onnxruntime
27
+ Requires-Dist: opencv-python
27
28
  Provides-Extra: dev
28
29
  Requires-Dist: diffusers==0.31.0; extra == "dev"
29
30
  Requires-Dist: transformers==4.45.2; extra == "dev"
@@ -15,6 +15,7 @@ torchsde
15
15
  pillow
16
16
  imageio[ffmpeg]
17
17
  onnxruntime
18
+ opencv-python
18
19
 
19
20
  [:sys_platform == "linux"]
20
21
  yunchang
@@ -0,0 +1,241 @@
1
+ # DiffSynth-Engine User Guide
2
+
3
+ ## Installation
4
+
5
+ Before using DiffSynth-Engine, please ensure your device meets the following requirements:
6
+
7
+ * NVIDIA GPU with CUDA Compute Capability 8.6+ (e.g., RTX 50 Series, RTX 40 Series, RTX 30 Series, see [NVIDIA documentation](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities) for details) or Apple Silicon M-series chips.
8
+
9
+ Python environment requirements: Python 3.10+.
10
+
11
+ Use `pip3` to install DiffSynth-Engine from PyPI:
12
+
13
+ ```shell
14
+ pip3 install diffsynth-engine
15
+ ```
16
+
17
+ DiffSynth-Engine also supports installation from source, which provides access to the latest features but might come with stability issues. We recommend installing the stable version via `pip3`.
18
+
19
+ ```shell
20
+ git clone https://github.com/modelscope/diffsynth-engine.git && cd diffsynth-engine
21
+ pip3 install -e .
22
+ ```
23
+
24
+ ## Model Download
25
+
26
+ DiffSynth-Engine supports loading models from the [ModelScope Model Hub](https://www.modelscope.cn/aigc/models) by model ID. For example, on the [MajicFlus model page](https://www.modelscope.cn/models/MAILAND/majicflus_v1/summary?version=v1.0), we can find the model ID and the corresponding model filename in the image below.
27
+
28
+ ![Image](https://github.com/user-attachments/assets/a6f71768-487d-4376-8974-fe6563f2896c)
29
+
30
+ Next, download the MajicFlus model with the following code.
31
+
32
+ ```python
33
+ from diffsynth_engine import fetch_model
34
+
35
+ model_path = fetch_model("MAILAND/majicflus_v1", path="majicflus_v134.safetensors")
36
+ ```
37
+
38
+ ![Image](https://github.com/user-attachments/assets/596c3383-23b3-4372-a7ce-3c4e1c1ba81a)
39
+
40
+ For sharded models, specify multiple files using the `path` parameter.
41
+
42
+ ```python
43
+ from diffsynth_engine import fetch_model
44
+
45
+ model_path = fetch_model("Wan-AI/Wan2.1-T2V-14B", path=[
46
+ "diffusion_pytorch_model-00001-of-00006.safetensors",
47
+ "diffusion_pytorch_model-00002-of-00006.safetensors",
48
+ "diffusion_pytorch_model-00003-of-00006.safetensors",
49
+ "diffusion_pytorch_model-00004-of-00006.safetensors",
50
+ "diffusion_pytorch_model-00005-of-00006.safetensors",
51
+ "diffusion_pytorch_model-00006-of-00006.safetensors",
52
+ ])
53
+ ```
54
+
55
+ It also supports using wildcards to match multiple files.
56
+
57
+ ```python
58
+ from diffsynth_engine import fetch_model
59
+
60
+ model_path = fetch_model("Wan-AI/Wan2.1-T2V-14B", path="diffusion_pytorch_model*.safetensors")
61
+ ```
62
+
63
+ The file path `model_path` returned by the `fetch_model` function is the path to the downloaded file(s).
64
+
65
+ ## Model Types
66
+
67
+ Diffusion models come in a wide variety of architectures. Each model is loaded and run for inference by a corresponding pipeline. The model types we currently support include:
68
+
69
+ | Model Architecture | Example | Pipeline |
70
+ | :----------------- | :----------------------------------------------------------- | :-------------------- |
71
+ | SD1.5 | [DreamShaper](https://www.modelscope.cn/models/MusePublic/DreamShaper_SD_1_5) | `SDImagePipeline` |
72
+ | SDXL | [RealVisXL](https://www.modelscope.cn/models/MusePublic/42_ckpt_SD_XL) | `SDXLImagePipeline` |
73
+ | FLUX | [MajicFlus](https://www.modelscope.cn/models/MAILAND/majicflus_v1/summary?version=v1.0) | `FluxImagePipeline` |
74
+ | Wan2.1 | [Wan2.1-T2V-1.3B](https://modelscope.cn/models/Wan-AI/Wan2.1-T2V-1.3B) | `WanVideoPipeline` |
75
+ | SD1.5 LoRA | [Detail Tweaker](https://www.modelscope.cn/models/MusePublic/Detail_Tweaker_LoRA_xijietiaozheng_LoRA_SD_1_5) | `SDImagePipeline` |
76
+ | SDXL LoRA | [Aesthetic Anime](https://www.modelscope.cn/models/MusePublic/100_lora_SD_XL) | `SDXLImagePipeline` |
77
+ | FLUX LoRA | [ArtAug](https://www.modelscope.cn/models/DiffSynth-Studio/ArtAug-lora-FLUX.1dev-v1) | `FluxImagePipeline` |
78
+ | Wan2.1 LoRA | [Highres-fix](https://modelscope.cn/models/DiffSynth-Studio/Wan2.1-1.3b-lora-highresfix-v1) | `WanVideoPipeline` |
79
+
80
+ Among these, SD1.5, SDXL, and FLUX are base models for image generation, while Wan2.1 is a base model for video generation. Base models can generate content independently. SD1.5 LoRA, SDXL LoRA, FLUX LoRA, and Wan2.1 LoRA are [LoRA](https://arxiv.org/abs/2106.09685) models. LoRA models are trained as "additional branches" on top of base models to enhance specific capabilities. They must be combined with a base model to be used for generation.
81
+
82
+ We will continuously update DiffSynth-Engine to support more models.
83
+
84
+ ## Model Inference
85
+
86
+ After the model is downloaded, load the model with the corresponding pipeline and perform inference.
87
+
88
+ ### Image Generation
89
+
90
+ The following code calls `FluxImagePipeline` to load the [MajicFlus](https://www.modelscope.cn/models/MAILAND/majicflus_v1/summary?version=v1.0) model and generate an image. To load other types of models, replace `FluxImagePipeline` in the code with the corresponding pipeline.
91
+
92
+ ```python
93
+ from diffsynth_engine import fetch_model, FluxImagePipeline
94
+
95
+ model_path = fetch_model("MAILAND/majicflus_v1", path="majicflus_v134.safetensors")
96
+ pipe = FluxImagePipeline.from_pretrained(model_path, device='cuda:0')
97
+ image = pipe(prompt="a cat")
98
+ image.save("image.png")
99
+ ```
100
+
101
+ Please note that if some necessary modules, like text encoders, are missing from a model repository, the pipeline will automatically download the required files.
102
+
103
+ #### Detailed Parameters
104
+
105
+ In the image generation pipeline `pipe`, we can use the following parameters for fine-grained control:
106
+
107
+ * `prompt`: The prompt, used to describe the content of the generated image, e.g., "a cat".
108
+ * `negative_prompt`: The negative prompt, used to describe content you do not want in the image, e.g., "ugly".
109
+ * `cfg_scale`: The guidance scale for [Classifier-Free Guidance](https://arxiv.org/abs/2207.12598). A larger value usually results in stronger correlation between the text and the image but reduces the diversity of the generated content.
110
+ * `clip_skip`: The number of layers to skip in the [CLIP](https://arxiv.org/abs/2103.00020) text encoder. The more layers skipped, the lower the text-image correlation, but this can lead to interesting variations in the generated content.
111
+ * `input_image`: Input image, used for image-to-image generation.
112
+ * `mask_image`: Mask image, used for image inpainting.
113
+ * `denoising_strength`: The denoising strength. When set to 1, a full generation process is performed. When set to a value between 0 and 1, some information from the input image is preserved.
114
+ * `height`: Image height.
115
+ * `width`: Image width.
116
+ * `num_inference_steps`: The number of inference steps. Generally, more steps lead to longer computation time but higher image quality.
117
+ * `tiled`: Whether to enable tiled processing for the VAE. This option is disabled by default. Enabling it can reduce VRAM usage.
118
+ * `tile_size`: The window size for tiled VAE processing.
119
+ * `tile_stride`: The stride for tiled VAE processing.
120
+ * `seed`: The random seed. A fixed seed ensures reproducible results.
121
+ * `progress_bar_cmd`: The progress bar module. [`tqdm`](https://github.com/tqdm/tqdm) is enabled by default. To disable the progress bar, set it to `lambda x: x`.
122
+
123
+ #### Loading LoRA
124
+
125
+ We supports loading LoRA on top of the base model. For example, the following code loads a [Cheongsam LoRA](https://www.modelscope.cn/models/DonRat/MAJICFLUS_SuperChinesestyleheongsam) based on the [MajicFlus](https://www.modelscope.cn/models/MAILAND/majicflus_v1/summary?version=v1.0) model to generate images of cheongsams, which the base model might struggle to create.
126
+
127
+ ```python
128
+ from diffsynth_engine import fetch_model, FluxImagePipeline
129
+
130
+ model_path = fetch_model("MAILAND/majicflus_v1", path="majicflus_v134.safetensors")
131
+ lora_path = fetch_model("DonRat/MAJICFLUS_SuperChinesestyleheongsam", path="麦橘超国风旗袍.safetensors")
132
+
133
+ pipe = FluxImagePipeline.from_pretrained(model_path, device='cuda:0')
134
+ pipe.load_lora(path=lora_path, scale=1.0)
135
+ image = pipe(prompt="a girl, qipao")
136
+ image.save("image.png")
137
+ ```
138
+
139
+ The `scale` parameter in the code controls the degree of influence the LoRA model has on the base model. A value of 1.0 is usually sufficient. When set to a value greater than 1, the LoRA's effect will be stronger, but this may cause artifacts or degradation in the image content. Please adjust this parameter with caution.
140
+
141
+ #### VRAM Optimization
142
+
143
+ DiffSynth-Engine supports various levels of VRAM optimization, allowing models to run on GPUs with low VRAM. For example, at `bfloat16` precision and with no optimization options enabled, the FLUX model requires 35.84GB of VRAM for inference. By adding the parameter `offload_mode="cpu_offload"`, the VRAM requirement drops to 22.83GB. Furthermore, using `offload_mode="sequential_cpu_offload"` reduces the requirement to just 4.30GB, although this comes with an increase of inference time.
144
+
145
+ ```python
146
+ from diffsynth_engine import fetch_model, FluxImagePipeline
147
+
148
+ model_path = fetch_model("MAILAND/majicflus_v1", path="majicflus_v134.safetensors")
149
+ pipe = FluxImagePipeline.from_pretrained(model_path, offload_mode="sequential_cpu_offload")
150
+ image = pipe(prompt="a cat")
151
+ image.save("image.png")
152
+ ```
153
+
154
+ ### Video Generation
155
+
156
+ DiffSynth-Engine also supports video generation. The following code loads the [Wan Video Generation Model](https://modelscope.cn/models/Wan-AI/Wan2.1-T2V-1.3B) and generates a video.
157
+
158
+ ```python
159
+ from diffsynth_engine.pipelines.wan_video import WanVideoPipeline, WanModelConfig
160
+ from diffsynth_engine.utils.video import save_video
161
+ from diffsynth_engine import fetch_model
162
+
163
+ config = WanModelConfig(
164
+ model_path=fetch_model("MusePublic/wan2.1-1.3b", path="dit.safetensors"),
165
+ vae_path=fetch_model("muse/wan2.1-vae", path="vae.safetensors"),
166
+ t5_path=fetch_model("muse/wan2.1-umt5", path="umt5.safetensors"),
167
+ )
168
+ pipe = WanVideoPipeline.from_pretrained(config, device="cuda")
169
+ # The prompt translates to: "A lively puppy runs quickly on a green lawn. The puppy has brownish-yellow fur,
170
+ # its two ears are perked up, and it looks focused and cheerful. Sunlight shines on it,
171
+ # making its fur look especially soft and shiny."
172
+ video = pipe(prompt="一只活泼的小狗在绿茵茵的草地上迅速奔跑。小狗毛色棕黄,两只耳朵立起,神情专注而欢快。阳光洒在它身上,使得毛发看上去格外柔软而闪亮。")
173
+ save_video(video, "video.mp4")
174
+ ```
175
+
176
+ #### Detailed Parameters
177
+
178
+ In the video generation pipeline `pipe`, we can use the following parameters for fine-grained control:
179
+
180
+ * `prompt`: The prompt, used to describe the content of the generated video, e.g., "a cat".
181
+ * `negative_prompt`: The negative prompt, used to describe content you do not want in the video, e.g., "ugly".
182
+ * `cfg_scale`: The guidance scale for [Classifier-Free Guidance](https://arxiv.org/abs/2207.12598). A larger value usually results in stronger correlation between the text and the video but reduces the diversity of the generated content.
183
+ * `input_image`: Input image, only effective in image-to-video models, such as [Wan-AI/Wan2.1-I2V-14B-720P](https://modelscope.cn/models/Wan-AI/Wan2.1-I2V-14B-720P).
184
+ * `input_video`: Input video, used for video-to-video generation.
185
+ * `denoising_strength`: The denoising strength. When set to 1, a full generation process is performed. When set to a value between 0 and 1, some information from the input video is preserved.
186
+ * `height`: Video frame height.
187
+ * `width`: Video frame width.
188
+ * `num_frames`: Number of video frames.
189
+ * `num_inference_steps`: The number of inference steps. Generally, more steps lead to longer computation time but higher video quality.
190
+ * `tiled`: Whether to enable tiled processing for the VAE. This option is disabled by default. Enabling it can reduce VRAM usage.
191
+ * `tile_size`: The window size for tiled VAE processing.
192
+ * `tile_stride`: The stride for tiled VAE processing.
193
+ * `seed`: The random seed. A fixed seed ensures reproducible results.
194
+
195
+ #### Loading LoRA
196
+
197
+ We supports loading LoRA on top of the base model. For example, the following code loads a [High-Resolution Fix LoRA](https://modelscope.cn/models/DiffSynth-Studio/Wan2.1-1.3b-lora-highresfix-v1) on top of the [Wan2.1-T2V-1.3B](https://modelscope.cn/models/Wan-AI/Wan2.1-T2V-1.3B) model to improve the generation quality at high resolutions.
198
+
199
+ ```python
200
+ from diffsynth_engine.pipelines.wan_video import WanVideoPipeline, WanModelConfig
201
+ from diffsynth_engine.utils.video import save_video
202
+ from diffsynth_engine import fetch_model
203
+
204
+ config = WanModelConfig(
205
+ model_path=fetch_model("MusePublic/wan2.1-1.3b", path="dit.safetensors"),
206
+ vae_path=fetch_model("muse/wan2.1-vae", path="vae.safetensors"),
207
+ t5_path=fetch_model("muse/wan2.1-umt5", path="umt5.safetensors"),
208
+ )
209
+ lora_path = fetch_model("DiffSynth-Studio/Wan2.1-1.3b-lora-highresfix-v1", path="model.safetensors")
210
+ pipe = WanVideoPipeline.from_pretrained(config, device="cuda")
211
+ pipe.load_lora(path=lora_path, scale=1.0)
212
+ # The prompt translates to: "A lively puppy runs quickly on a green lawn. The puppy has brownish-yellow fur,
213
+ # its two ears are perked up, and it looks focused and cheerful. Sunlight shines on it,
214
+ # making its fur look especially soft and shiny."
215
+ video = pipe(prompt="一只活泼的小狗在绿茵茵的草地上迅速奔跑。小狗毛色棕黄,两只耳朵立起,神情专注而欢快。阳光洒在它身上,使得毛发看上去格外柔软而闪亮。")
216
+ save_video(video, "video.mp4")
217
+ ```
218
+
219
+ The `scale` parameter in the code controls the degree of influence the LoRA model has on the base model. A value of 1.0 is usually sufficient. When set to a value greater than 1, the LoRA's effect will be stronger, but this may cause artifacts or degradation in the image content. Please adjust this parameter with caution.
220
+
221
+ #### Multi-GPU Parallelism
222
+
223
+ We supports multi-GPU parallel inference of the Wan2.1 model for faster video generation. Add the parameters `parallelism=4` (the number of GPUs to use) and `use_cfg_parallel=True` into the code to enable parallelism.
224
+
225
+ ```python
226
+ from diffsynth_engine.pipelines.wan_video import WanVideoPipeline, WanModelConfig
227
+ from diffsynth_engine.utils.video import save_video
228
+ from diffsynth_engine import fetch_model
229
+
230
+ config = WanModelConfig(
231
+ model_path=fetch_model("MusePublic/wan2.1-1.3b", path="dit.safetensors"),
232
+ vae_path=fetch_model("muse/wan2.1-vae", path="vae.safetensors"),
233
+ t5_path=fetch_model("muse/wan2.1-umt5", path="umt5.safetensors"),
234
+ )
235
+ pipe = WanVideoPipeline.from_pretrained(config, device="cuda", parallelism=4, use_cfg_parallel=True)
236
+ # The prompt translates to: "A lively puppy runs quickly on a green lawn. The puppy has brownish-yellow fur,
237
+ # its two ears are perked up, and it looks focused and cheerful. Sunlight shines on it,
238
+ # making its fur look especially soft and shiny."
239
+ video = pipe(prompt="一只活泼的小狗在绿茵茵的草地上迅速奔跑。小狗毛色棕黄,两只耳朵立起,神情专注而欢快。阳光洒在它身上,使得毛发看上去格外柔软而闪亮。")
240
+ save_video(video, "video.mp4")
241
+ ```
@@ -35,6 +35,31 @@ from diffsynth_engine import fetch_model
35
35
  model_path = fetch_model("MAILAND/majicflus_v1", path="majicflus_v134.safetensors")
36
36
  ```
37
37
 
38
+ ![Image](https://github.com/user-attachments/assets/596c3383-23b3-4372-a7ce-3c4e1c1ba81a)
39
+
40
+ 对于模型分片的情况,可以通过 `path` 参数指定多个文件。
41
+
42
+ ```python
43
+ from diffsynth_engine import fetch_model
44
+
45
+ model_path = fetch_model("Wan-AI/Wan2.1-T2V-14B", path=[
46
+ "diffusion_pytorch_model-00001-of-00006.safetensors",
47
+ "diffusion_pytorch_model-00002-of-00006.safetensors",
48
+ "diffusion_pytorch_model-00003-of-00006.safetensors",
49
+ "diffusion_pytorch_model-00004-of-00006.safetensors",
50
+ "diffusion_pytorch_model-00005-of-00006.safetensors",
51
+ "diffusion_pytorch_model-00006-of-00006.safetensors",
52
+ ])
53
+ ```
54
+
55
+ 也支持使用通配符来匹配多个文件。
56
+
57
+ ```python
58
+ from diffsynth_engine import fetch_model
59
+
60
+ model_path = fetch_model("Wan-AI/Wan2.1-T2V-14B", path="diffusion_pytorch_model*.safetensors")
61
+ ```
62
+
38
63
  `fetch_model` 函数返回的文件路径 `model_path` 即为下载后的文件路径。
39
64
 
40
65
  ## 模型类型
@@ -30,7 +30,8 @@ dependencies = [
30
30
  "pillow",
31
31
  "imageio[ffmpeg]",
32
32
  "yunchang ; sys_platform == 'linux'",
33
- "onnxruntime"
33
+ "onnxruntime",
34
+ "opencv-python"
34
35
  ]
35
36
 
36
37
  [project.optional-dependencies]
@@ -1 +0,0 @@
1
- # ToDo