diffsynth-engine 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. diffsynth_engine-0.2.0/.github/workflows/python-publish.yml +41 -0
  2. diffsynth_engine-0.2.0/.gitignore +9 -0
  3. diffsynth_engine-0.2.0/.pre-commit-config.yaml +11 -0
  4. diffsynth_engine-0.2.0/PKG-INFO +34 -0
  5. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/README.md +14 -14
  6. diffsynth_engine-0.2.0/assets/dingtalk.png +0 -0
  7. diffsynth_engine-0.2.0/assets/showcase.jpeg +0 -0
  8. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/__init__.py +3 -0
  9. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/noise_scheduler/flow_match/recifited_flow.py +16 -14
  10. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/ddim.py +0 -3
  11. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/sgm_uniform.py +0 -3
  12. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/sampler/flow_match/flow_match_euler.py +1 -1
  13. diffsynth_engine-0.2.0/diffsynth_engine/conf/models/components/vae.json +254 -0
  14. diffsynth_engine-0.2.0/diffsynth_engine/conf/models/flux/flux_dit.json +105 -0
  15. diffsynth_engine-0.2.0/diffsynth_engine/conf/models/flux/flux_text_encoder.json +20 -0
  16. diffsynth_engine-0.2.0/diffsynth_engine/conf/models/flux/flux_vae.json +250 -0
  17. diffsynth_engine-0.2.0/diffsynth_engine/conf/models/sd/sd_text_encoder.json +220 -0
  18. diffsynth_engine-0.2.0/diffsynth_engine/conf/models/sd/sd_unet.json +397 -0
  19. diffsynth_engine-0.2.0/diffsynth_engine/conf/models/sd3/sd3_dit.json +908 -0
  20. diffsynth_engine-0.2.0/diffsynth_engine/conf/models/sd3/sd3_text_encoder.json +756 -0
  21. diffsynth_engine-0.2.0/diffsynth_engine/conf/models/sdxl/sdxl_text_encoder.json +455 -0
  22. diffsynth_engine-0.2.0/diffsynth_engine/conf/models/sdxl/sdxl_unet.json +1056 -0
  23. diffsynth_engine-0.2.0/diffsynth_engine/conf/models/wan/dit/1.3b-t2v.json +13 -0
  24. diffsynth_engine-0.2.0/diffsynth_engine/conf/models/wan/dit/14b-i2v.json +13 -0
  25. diffsynth_engine-0.2.0/diffsynth_engine/conf/models/wan/dit/14b-t2v.json +13 -0
  26. diffsynth_engine-0.2.0/diffsynth_engine/models/__init__.py +7 -0
  27. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/base.py +19 -10
  28. diffsynth_engine-0.2.0/diffsynth_engine/models/basic/attention.py +217 -0
  29. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/basic/unet_helper.py +2 -2
  30. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/components/vae.py +0 -1
  31. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/flux/flux_dit.py +53 -81
  32. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/flux/flux_text_encoder.py +1 -3
  33. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/flux/flux_vae.py +1 -1
  34. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/wan/wan_dit.py +145 -79
  35. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/wan/wan_image_encoder.py +2 -3
  36. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/wan/wan_text_encoder.py +46 -13
  37. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/pipelines/__init__.py +2 -1
  38. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/pipelines/base.py +40 -3
  39. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/pipelines/flux_image.py +12 -48
  40. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/pipelines/sd_image.py +6 -40
  41. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/pipelines/sdxl_image.py +8 -43
  42. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/pipelines/wan_video.py +106 -63
  43. diffsynth_engine-0.2.0/diffsynth_engine/tokenizers/__init__.py +6 -0
  44. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/tokenizers/wan.py +17 -22
  45. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/utils/download.py +1 -5
  46. diffsynth_engine-0.2.0/diffsynth_engine/utils/flag.py +46 -0
  47. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/utils/loader.py +4 -1
  48. diffsynth_engine-0.2.0/diffsynth_engine/utils/parallel.py +390 -0
  49. diffsynth_engine-0.2.0/diffsynth_engine.egg-info/PKG-INFO +34 -0
  50. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine.egg-info/SOURCES.txt +95 -2
  51. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine.egg-info/requires.txt +2 -3
  52. diffsynth_engine-0.2.0/docs/tutorial.md +1 -0
  53. diffsynth_engine-0.2.0/docs/tutorial_zh.md +207 -0
  54. diffsynth_engine-0.2.0/examples/flux_lora.py +11 -0
  55. diffsynth_engine-0.2.0/examples/flux_text_to_image.py +8 -0
  56. diffsynth_engine-0.2.0/examples/sdxl_text_to_image.py +14 -0
  57. diffsynth_engine-0.2.0/examples/wan_lora.py +33 -0
  58. diffsynth_engine-0.2.0/examples/wan_text_to_video.py +28 -0
  59. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/pyproject.toml +11 -8
  60. diffsynth_engine-0.2.0/tests/__init__.py +0 -0
  61. diffsynth_engine-0.2.0/tests/common/__init__.py +0 -0
  62. diffsynth_engine-0.2.0/tests/common/test_case.py +123 -0
  63. diffsynth_engine-0.2.0/tests/common/utils.py +29 -0
  64. diffsynth_engine-0.2.0/tests/data/expect/algorithm/beta_20steps.safetensors +0 -0
  65. diffsynth_engine-0.2.0/tests/data/expect/algorithm/ddim_20steps.safetensors +0 -0
  66. diffsynth_engine-0.2.0/tests/data/expect/algorithm/euler_i10.safetensors +0 -0
  67. diffsynth_engine-0.2.0/tests/data/expect/algorithm/exponential_20steps.safetensors +0 -0
  68. diffsynth_engine-0.2.0/tests/data/expect/algorithm/flow_match_euler_i10.safetensors +0 -0
  69. diffsynth_engine-0.2.0/tests/data/expect/algorithm/karras_20steps.safetensors +0 -0
  70. diffsynth_engine-0.2.0/tests/data/expect/algorithm/output.safetensors +0 -0
  71. diffsynth_engine-0.2.0/tests/data/expect/algorithm/recifited_flow_20steps_flux.safetensors +0 -0
  72. diffsynth_engine-0.2.0/tests/data/expect/algorithm/scaled_linear_20steps.safetensors +0 -0
  73. diffsynth_engine-0.2.0/tests/data/expect/algorithm/sgm_uniform_20steps.safetensors +0 -0
  74. diffsynth_engine-0.2.0/tests/data/expect/flux/flux_dit.safetensors +0 -0
  75. diffsynth_engine-0.2.0/tests/data/expect/flux/flux_inpainting.png +0 -0
  76. diffsynth_engine-0.2.0/tests/data/expect/flux/flux_lora.png +0 -0
  77. diffsynth_engine-0.2.0/tests/data/expect/flux/flux_text_encoder_1.safetensors +0 -0
  78. diffsynth_engine-0.2.0/tests/data/expect/flux/flux_text_encoder_2.safetensors +0 -0
  79. diffsynth_engine-0.2.0/tests/data/expect/flux/flux_txt2img.png +0 -0
  80. diffsynth_engine-0.2.0/tests/data/expect/flux/flux_vae.safetensors +0 -0
  81. diffsynth_engine-0.2.0/tests/data/expect/sd/sd_inpainting.png +0 -0
  82. diffsynth_engine-0.2.0/tests/data/expect/sd/sd_lora.png +0 -0
  83. diffsynth_engine-0.2.0/tests/data/expect/sd/sd_text_encoder.safetensors +0 -0
  84. diffsynth_engine-0.2.0/tests/data/expect/sd/sd_txt2img.png +0 -0
  85. diffsynth_engine-0.2.0/tests/data/expect/sd/sd_unet.safetensors +0 -0
  86. diffsynth_engine-0.2.0/tests/data/expect/sd/sd_vae.safetensors +0 -0
  87. diffsynth_engine-0.2.0/tests/data/expect/sdxl/sdxl_inpainting.png +0 -0
  88. diffsynth_engine-0.2.0/tests/data/expect/sdxl/sdxl_lora.png +0 -0
  89. diffsynth_engine-0.2.0/tests/data/expect/sdxl/sdxl_text_encoder_1.safetensors +0 -0
  90. diffsynth_engine-0.2.0/tests/data/expect/sdxl/sdxl_text_encoder_2.safetensors +0 -0
  91. diffsynth_engine-0.2.0/tests/data/expect/sdxl/sdxl_txt2img.png +0 -0
  92. diffsynth_engine-0.2.0/tests/data/expect/sdxl/sdxl_unet.safetensors +0 -0
  93. diffsynth_engine-0.2.0/tests/data/expect/sdxl/sdxl_vae.safetensors +0 -0
  94. diffsynth_engine-0.2.0/tests/data/expect/wan/wan_vae.safetensors +0 -0
  95. diffsynth_engine-0.2.0/tests/data/input/astronaut_320_320.mp4 +0 -0
  96. diffsynth_engine-0.2.0/tests/data/input/mask_image.png +0 -0
  97. diffsynth_engine-0.2.0/tests/data/input/test_image.png +0 -0
  98. diffsynth_engine-0.2.0/tests/data/input/wukong_1024_1024.png +0 -0
  99. diffsynth_engine-0.2.0/tests/data/input/wukong_480_480.png +0 -0
  100. diffsynth_engine-0.2.0/tests/test_algorithm/__init__.py +0 -0
  101. diffsynth_engine-0.2.0/tests/test_algorithm/test_sampler.py +42 -0
  102. diffsynth_engine-0.2.0/tests/test_algorithm/test_scheduler.py +77 -0
  103. diffsynth_engine-0.2.0/tests/test_models/__init__.py +0 -0
  104. diffsynth_engine-0.2.0/tests/test_models/flux/__init__.py +0 -0
  105. diffsynth_engine-0.2.0/tests/test_models/flux/test_flux_dit.py +208 -0
  106. diffsynth_engine-0.2.0/tests/test_models/flux/test_flux_text_encoder.py +115 -0
  107. diffsynth_engine-0.2.0/tests/test_models/flux/test_flux_vae.py +345 -0
  108. diffsynth_engine-0.2.0/tests/test_models/sd/__init__.py +0 -0
  109. diffsynth_engine-0.2.0/tests/test_models/sd/test_sd_text_encoder.py +73 -0
  110. diffsynth_engine-0.2.0/tests/test_models/sd/test_sd_unet.py +22 -0
  111. diffsynth_engine-0.2.0/tests/test_models/sd/test_sd_vae.py +354 -0
  112. diffsynth_engine-0.2.0/tests/test_models/sdxl/__init__.py +0 -0
  113. diffsynth_engine-0.2.0/tests/test_models/sdxl/test_sdxl_text_encoder.py +163 -0
  114. diffsynth_engine-0.2.0/tests/test_models/sdxl/test_sdxl_unet.py +21 -0
  115. diffsynth_engine-0.2.0/tests/test_models/sdxl/test_sdxl_vae.py +352 -0
  116. diffsynth_engine-0.2.0/tests/test_models/wan/test_wan_vae.py +35 -0
  117. diffsynth_engine-0.2.0/tests/test_pipelines/__init__.py +0 -0
  118. diffsynth_engine-0.2.0/tests/test_pipelines/test_flux_image.py +81 -0
  119. diffsynth_engine-0.2.0/tests/test_pipelines/test_sd_image.py +55 -0
  120. diffsynth_engine-0.2.0/tests/test_pipelines/test_sdxl_image.py +59 -0
  121. diffsynth_engine-0.2.0/tests/test_pipelines/test_wan_video.py +24 -0
  122. diffsynth_engine-0.2.0/tests/test_pipelines/test_wan_video_gguf.py +24 -0
  123. diffsynth_engine-0.2.0/tests/test_pipelines/test_wan_video_tp.py +25 -0
  124. diffsynth_engine-0.2.0/tests/test_tokenizers/__init__.py +0 -0
  125. diffsynth_engine-0.2.0/tests/test_tokenizers/test_clip.py +135 -0
  126. diffsynth_engine-0.2.0/tests/test_tokenizers/test_t5.py +138 -0
  127. diffsynth_engine-0.1.0/PKG-INFO +0 -213
  128. diffsynth_engine-0.1.0/diffsynth_engine/models/basic/attention.py +0 -137
  129. diffsynth_engine-0.1.0/diffsynth_engine/models/wan/attention.py +0 -200
  130. diffsynth_engine-0.1.0/diffsynth_engine/tokenizers/__init__.py +0 -4
  131. diffsynth_engine-0.1.0/diffsynth_engine/utils/parallel.py +0 -191
  132. diffsynth_engine-0.1.0/diffsynth_engine.egg-info/PKG-INFO +0 -213
  133. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/LICENSE +0 -0
  134. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/__init__.py +0 -0
  135. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/noise_scheduler/__init__.py +0 -0
  136. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/noise_scheduler/base_scheduler.py +0 -0
  137. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/noise_scheduler/flow_match/__init__.py +0 -0
  138. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/noise_scheduler/flow_match/flow_beta.py +0 -0
  139. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/noise_scheduler/flow_match/flow_ddim.py +0 -0
  140. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/__init__.py +0 -0
  141. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/beta.py +0 -0
  142. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/exponential.py +0 -0
  143. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/karras.py +0 -0
  144. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/linear.py +0 -0
  145. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/sampler/__init__.py +0 -0
  146. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/sampler/flow_match/__init__.py +0 -0
  147. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/sampler/stable_diffusion/__init__.py +0 -0
  148. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/sampler/stable_diffusion/brownian_tree.py +0 -0
  149. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/sampler/stable_diffusion/ddpm.py +0 -0
  150. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/sampler/stable_diffusion/deis.py +0 -0
  151. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/sampler/stable_diffusion/dpmpp_2m.py +0 -0
  152. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/sampler/stable_diffusion/dpmpp_2m_sde.py +0 -0
  153. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/sampler/stable_diffusion/dpmpp_3m_sde.py +0 -0
  154. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/sampler/stable_diffusion/epsilon.py +0 -0
  155. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/sampler/stable_diffusion/euler.py +0 -0
  156. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/algorithm/sampler/stable_diffusion/euler_ancestral.py +0 -0
  157. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/merges.txt +0 -0
  158. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/special_tokens_map.json +0 -0
  159. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/tokenizer_config.json +0 -0
  160. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/vocab.json +0 -0
  161. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/special_tokens_map.json +0 -0
  162. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/spiece.model +0 -0
  163. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/tokenizer.json +0 -0
  164. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/tokenizer_config.json +0 -0
  165. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/merges.txt +0 -0
  166. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/special_tokens_map.json +0 -0
  167. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/tokenizer_config.json +0 -0
  168. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/vocab.json +0 -0
  169. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/merges.txt +0 -0
  170. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/special_tokens_map.json +0 -0
  171. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/tokenizer_config.json +0 -0
  172. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/vocab.json +0 -0
  173. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/special_tokens_map.json +0 -0
  174. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/spiece.model +0 -0
  175. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer.json +0 -0
  176. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer_config.json +0 -0
  177. {diffsynth_engine-0.1.0/diffsynth_engine/models → diffsynth_engine-0.2.0/diffsynth_engine/kernels}/__init__.py +0 -0
  178. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/basic/__init__.py +0 -0
  179. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/basic/lora.py +0 -0
  180. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/basic/relative_position_emb.py +0 -0
  181. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/basic/timestep.py +0 -0
  182. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/basic/transformer_helper.py +0 -0
  183. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/components/__init__.py +0 -0
  184. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/components/clip.py +0 -0
  185. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/components/t5.py +0 -0
  186. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/flux/__init__.py +0 -0
  187. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/sd/__init__.py +0 -0
  188. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/sd/sd_text_encoder.py +0 -0
  189. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/sd/sd_unet.py +0 -0
  190. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/sd/sd_vae.py +0 -0
  191. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/sd3/__init__.py +0 -0
  192. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/sd3/sd3_dit.py +0 -0
  193. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/sd3/sd3_text_encoder.py +0 -0
  194. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/sd3/sd3_vae.py +0 -0
  195. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/sdxl/__init__.py +0 -0
  196. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/sdxl/sdxl_text_encoder.py +0 -0
  197. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/sdxl/sdxl_unet.py +0 -0
  198. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/sdxl/sdxl_vae.py +0 -0
  199. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/utils.py +0 -0
  200. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/wan/__init__.py +0 -0
  201. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/models/wan/wan_vae.py +0 -0
  202. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/tokenizers/base.py +0 -0
  203. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/tokenizers/clip.py +0 -0
  204. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/tokenizers/t5.py +0 -0
  205. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/utils/__init__.py +0 -0
  206. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/utils/constants.py +0 -0
  207. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/utils/env.py +0 -0
  208. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/utils/fp8_linear.py +0 -0
  209. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/utils/gguf.py +0 -0
  210. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/utils/lock.py +0 -0
  211. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/utils/logging.py +0 -0
  212. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/utils/offload.py +0 -0
  213. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/utils/prompt.py +0 -0
  214. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine/utils/video.py +0 -0
  215. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine.egg-info/dependency_links.txt +0 -0
  216. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/diffsynth_engine.egg-info/top_level.txt +0 -0
  217. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/setup.cfg +0 -0
  218. {diffsynth_engine-0.1.0 → diffsynth_engine-0.2.0}/setup.py +0 -0
@@ -0,0 +1,41 @@
1
+ name: release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v**'
7
+
8
+ workflow_dispatch:
9
+ inputs:
10
+ branch:
11
+ required: true
12
+ default: 'main'
13
+
14
+ permissions:
15
+ contents: read
16
+
17
+ concurrency:
18
+ group: ${{ github.workflow }}-${{ github.ref }}
19
+ cancel-in-progress: true
20
+
21
+ jobs:
22
+ build-and-publish:
23
+ runs-on: ubuntu-latest
24
+
25
+ steps:
26
+ - uses: actions/checkout@v4
27
+
28
+ - uses: actions/setup-python@v5
29
+ with:
30
+ python-version: "3.10"
31
+
32
+ - name: Install build
33
+ run: pip install build
34
+
35
+ - name: Build dist
36
+ run: python -m build
37
+
38
+ - name: Publish to PyPI
39
+ run: |
40
+ pip install twine
41
+ twine upload dist/* --skip-existing -p ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,9 @@
1
+ *.pyc
2
+ .idea/
3
+ .vscode/
4
+ __pycache__/
5
+ tmp/
6
+ build/
7
+ dist/
8
+ *.egg-info/
9
+ .DS_Store/
@@ -0,0 +1,11 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ # Ruff version.
4
+ rev: v0.11.5
5
+ hooks:
6
+ # Run the linter.
7
+ - id: ruff
8
+ types_or: [ python, pyi ]
9
+ # Run the formatter.
10
+ - id: ruff-format
11
+ types_or: [ python, pyi ]
@@ -0,0 +1,34 @@
1
+ Metadata-Version: 2.4
2
+ Name: diffsynth_engine
3
+ Version: 0.2.0
4
+ Author: MuseAI x ModelScope
5
+ Classifier: Programming Language :: Python :: 3
6
+ Classifier: Operating System :: OS Independent
7
+ Requires-Python: >=3.10
8
+ License-File: LICENSE
9
+ Requires-Dist: torch>=2.4.1
10
+ Requires-Dist: torchvision
11
+ Requires-Dist: xformers; sys_platform == "linux"
12
+ Requires-Dist: safetensors
13
+ Requires-Dist: gguf
14
+ Requires-Dist: einops
15
+ Requires-Dist: ftfy
16
+ Requires-Dist: regex
17
+ Requires-Dist: sentencepiece
18
+ Requires-Dist: tokenizers
19
+ Requires-Dist: modelscope
20
+ Requires-Dist: flufl.lock
21
+ Requires-Dist: scipy
22
+ Requires-Dist: torchsde
23
+ Requires-Dist: pillow
24
+ Requires-Dist: imageio[ffmpeg]
25
+ Requires-Dist: yunchang
26
+ Provides-Extra: dev
27
+ Requires-Dist: diffusers==0.31.0; extra == "dev"
28
+ Requires-Dist: transformers==4.45.2; extra == "dev"
29
+ Requires-Dist: build; extra == "dev"
30
+ Requires-Dist: ruff; extra == "dev"
31
+ Requires-Dist: scikit-image; extra == "dev"
32
+ Requires-Dist: pytest; extra == "dev"
33
+ Requires-Dist: pre-commit; extra == "dev"
34
+ Dynamic: license-file
@@ -6,20 +6,20 @@
6
6
  [![GitHub pull-requests](https://img.shields.io/github/issues-pr/modelscope/DiffSynth-Engine.svg)](https://GitHub.com/modelscope/DiffSynth-Engine/pull/)
7
7
  [![GitHub latest commit](https://badgen.net/github/last-commit/modelscope/DiffSynth-Engine)](https://GitHub.com/modelscope/DiffSynth-Engine/commit/)
8
8
 
9
- Diffsynth Engine is a high-performance diffusion inference engine designed for developers.
9
+ DiffSynth-Engine is a high-performance engine geared towards buidling efficient inference pipelines for diffusion models.
10
10
 
11
11
  **Key Features:**
12
12
 
13
- - **Clean and Readable Code:** Fully re-implements the Diffusion sampler and scheduler without relying on third-party libraries like k-diffusion, ldm, or sgm.
13
+ - **Thoughtfully-Designed Implementation:** We carefully re-implemented key components in Diffusion pipelines, such as sampler and scheduler, without introducing external dependencies on libraries like k-diffusion, ldm, or sgm.
14
14
 
15
- - **Extensive Model Support:** Compatible with multiple formats (e.g., CivitAI format) of base models and LoRA models , catering to diverse use cases.
15
+ - **Extensive Model Support:** Compatible with popular formats (e.g., CivitAI) of base models and LoRA models , catering to diverse use cases.
16
16
 
17
- - **Flexible Memory Management:** Supports various levels of model quantization (e.g., FP8, INT8)
18
- and offload strategies, enabling users to run large models (e.g., Flux.1 Dev) on limited GPU memory.
17
+ - **Versatile Resource Management:** Comprehensive support for varous model quantization (e.g., FP8, INT8)
18
+ and offloading strategies, enabling loading of larger diffusion models (e.g., Flux.1 Dev) on limited hardware budget of GPU memory.
19
19
 
20
- - **High-Performance Inference:** Optimizes the inference pipeline to achieve fast generation across various hardware environments.
20
+ - **Optimized Performance:** Carefully-crafted inference pipeline to achieve fast generation across various hardware environments.
21
21
 
22
- - **Platform Compatibility:** Supports Windows, macOS (Apple Silicon), and Linux, ensuring a smooth experience across different operating systems.
22
+ - **Cross-Platform Support:** Runnable on Windows, macOS (Apple Silicon), and Linux, ensuring a smooth experience across different operating systems.
23
23
 
24
24
  ## Quick Start
25
25
  ### Requirements
@@ -29,13 +29,13 @@ and offload strategies, enabling users to run large models (e.g., Flux.1 Dev) on
29
29
 
30
30
  ### Installation
31
31
 
32
- Install for PyPI (stable version)
33
- ```python
32
+ Install released version (from PyPI):
33
+ ```shell
34
34
  pip3 install diffsynth-engine
35
35
  ```
36
36
 
37
- Install for source (preview version)
38
- ```python
37
+ Install from source:
38
+ ```shell
39
39
  git clone https://github.com/modelscope/diffsynth-engine.git && cd diffsynth-engine
40
40
  pip3 install -e .
41
41
  ```
@@ -71,10 +71,10 @@ For more details, please refer to our tutorials ([English](./docs/tutorial.md),
71
71
 
72
72
  ## Contact
73
73
 
74
- If you have any questions or feedback, please scan the QR code or send email to muse@alibaba-inc.com.
74
+ If you have any questions or feedback, please scan the QR code below, or send email to muse@alibaba-inc.com.
75
75
 
76
76
  <div style="display: flex; justify-content: space-between;">
77
- <img src="assets/dingtalk.png" alt="dingtalk" style="zoom: 60%;" />
77
+ <img src="assets/dingtalk.png" alt="dingtalk" width="400" />
78
78
  </div>
79
79
 
80
80
  ## License
@@ -82,7 +82,7 @@ This project is licensed under the Apache License 2.0. See the LICENSE file for
82
82
 
83
83
  ## Citation
84
84
 
85
- If you use this codebase, or otherwise found our work valuable, please cite:
85
+ If you use this codebase, or otherwise found our work helpful, please cite:
86
86
 
87
87
  ```bibtex
88
88
  @misc{diffsynth-engine2025,
@@ -10,6 +10,7 @@ from .pipelines import (
10
10
  )
11
11
  from .utils.download import fetch_model, fetch_modelscope_model, fetch_civitai_model
12
12
  from .utils.video import load_video, save_video
13
+
13
14
  __all__ = [
14
15
  "FluxImagePipeline",
15
16
  "SDXLImagePipeline",
@@ -22,4 +23,6 @@ __all__ = [
22
23
  "fetch_model",
23
24
  "fetch_modelscope_model",
24
25
  "fetch_civitai_model",
26
+ "load_video",
27
+ "save_video",
25
28
  ]
@@ -5,18 +5,19 @@ from diffsynth_engine.algorithm.noise_scheduler.base_scheduler import append_zer
5
5
 
6
6
 
7
7
  class RecifitedFlowScheduler(BaseScheduler):
8
- def __init__(self,
9
- shift=1.0,
10
- sigma_min=0.001,
8
+ def __init__(
9
+ self,
10
+ shift=1.0,
11
+ sigma_min=0.001,
11
12
  sigma_max=1.0,
12
- num_train_timesteps=1000,
13
+ num_train_timesteps=1000,
13
14
  use_dynamic_shifting=False,
14
15
  ):
15
16
  self.shift = shift
16
17
  self.sigma_min = sigma_min
17
18
  self.sigma_max = sigma_max
18
- self.num_train_timesteps = num_train_timesteps
19
- self.use_dynamic_shifting = use_dynamic_shifting
19
+ self.num_train_timesteps = num_train_timesteps
20
+ self.use_dynamic_shifting = use_dynamic_shifting
20
21
 
21
22
  def _sigma_to_t(self, sigma):
22
23
  return sigma * self.num_train_timesteps
@@ -30,19 +31,20 @@ class RecifitedFlowScheduler(BaseScheduler):
30
31
  def _shift_sigma(self, sigma: torch.Tensor, shift: float):
31
32
  return shift * sigma / (1 + (shift - 1) * sigma)
32
33
 
33
- def schedule(self,
34
- num_inference_steps: int,
35
- mu: float | None = None,
36
- sigma_min: float | None = None,
37
- sigma_max: float | None = None
34
+ def schedule(
35
+ self,
36
+ num_inference_steps: int,
37
+ mu: float | None = None,
38
+ sigma_min: float | None = None,
39
+ sigma_max: float | None = None,
38
40
  ):
39
41
  sigma_min = self.sigma_min if sigma_min is None else sigma_min
40
- sigma_max = self.sigma_max if sigma_max is None else sigma_max
42
+ sigma_max = self.sigma_max if sigma_max is None else sigma_max
41
43
  sigmas = torch.linspace(sigma_max, sigma_min, num_inference_steps)
42
44
  if self.use_dynamic_shifting:
43
- sigmas = self._time_shift(mu, 1.0, sigmas) # FLUX
45
+ sigmas = self._time_shift(mu, 1.0, sigmas) # FLUX
44
46
  else:
45
47
  sigmas = self._shift_sigma(sigmas, self.shift)
46
48
  timesteps = sigmas * self.num_train_timesteps
47
49
  sigmas = append_zero(sigmas)
48
- return sigmas, timesteps
50
+ return sigmas, timesteps
@@ -1,7 +1,4 @@
1
1
  import torch
2
- from .linear import ScaledLinearScheduler
3
- from ..base_scheduler import append_zero
4
- import numpy as np
5
2
 
6
3
  from diffsynth_engine.algorithm.noise_scheduler.stable_diffusion.linear import ScaledLinearScheduler
7
4
  from diffsynth_engine.algorithm.noise_scheduler.base_scheduler import append_zero
@@ -1,7 +1,4 @@
1
1
  import torch
2
- from .linear import ScaledLinearScheduler
3
- from ..base_scheduler import append_zero
4
- import numpy as np
5
2
 
6
3
  from diffsynth_engine.algorithm.noise_scheduler.stable_diffusion.linear import ScaledLinearScheduler
7
4
  from diffsynth_engine.algorithm.noise_scheduler.base_scheduler import append_zero
@@ -2,7 +2,7 @@ import torch
2
2
 
3
3
 
4
4
  class FlowMatchEulerSampler:
5
- def initialize(self, init_latents, timesteps, sigmas, mask=None):
5
+ def initialize(self, init_latents, timesteps, sigmas, mask=None):
6
6
  self.init_latents = init_latents
7
7
  self.timesteps = timesteps
8
8
  self.sigmas = sigmas
@@ -0,0 +1,254 @@
1
+ {
2
+ "civitai": {
3
+ "rename_dict": {
4
+ "first_stage_model.decoder.conv_in.bias": "decoder.conv_in.bias",
5
+ "first_stage_model.decoder.conv_in.weight": "decoder.conv_in.weight",
6
+ "first_stage_model.decoder.conv_out.bias": "decoder.conv_out.bias",
7
+ "first_stage_model.decoder.conv_out.weight": "decoder.conv_out.weight",
8
+ "first_stage_model.decoder.mid.attn_1.k.bias": "decoder.blocks.1.transformer_blocks.0.to_k.bias",
9
+ "first_stage_model.decoder.mid.attn_1.k.weight": "decoder.blocks.1.transformer_blocks.0.to_k.weight",
10
+ "first_stage_model.decoder.mid.attn_1.norm.bias": "decoder.blocks.1.norm.bias",
11
+ "first_stage_model.decoder.mid.attn_1.norm.weight": "decoder.blocks.1.norm.weight",
12
+ "first_stage_model.decoder.mid.attn_1.proj_out.bias": "decoder.blocks.1.transformer_blocks.0.to_out.bias",
13
+ "first_stage_model.decoder.mid.attn_1.proj_out.weight": "decoder.blocks.1.transformer_blocks.0.to_out.weight",
14
+ "first_stage_model.decoder.mid.attn_1.q.bias": "decoder.blocks.1.transformer_blocks.0.to_q.bias",
15
+ "first_stage_model.decoder.mid.attn_1.q.weight": "decoder.blocks.1.transformer_blocks.0.to_q.weight",
16
+ "first_stage_model.decoder.mid.attn_1.v.bias": "decoder.blocks.1.transformer_blocks.0.to_v.bias",
17
+ "first_stage_model.decoder.mid.attn_1.v.weight": "decoder.blocks.1.transformer_blocks.0.to_v.weight",
18
+ "first_stage_model.decoder.mid.block_1.conv1.bias": "decoder.blocks.0.conv1.bias",
19
+ "first_stage_model.decoder.mid.block_1.conv1.weight": "decoder.blocks.0.conv1.weight",
20
+ "first_stage_model.decoder.mid.block_1.conv2.bias": "decoder.blocks.0.conv2.bias",
21
+ "first_stage_model.decoder.mid.block_1.conv2.weight": "decoder.blocks.0.conv2.weight",
22
+ "first_stage_model.decoder.mid.block_1.norm1.bias": "decoder.blocks.0.norm1.bias",
23
+ "first_stage_model.decoder.mid.block_1.norm1.weight": "decoder.blocks.0.norm1.weight",
24
+ "first_stage_model.decoder.mid.block_1.norm2.bias": "decoder.blocks.0.norm2.bias",
25
+ "first_stage_model.decoder.mid.block_1.norm2.weight": "decoder.blocks.0.norm2.weight",
26
+ "first_stage_model.decoder.mid.block_2.conv1.bias": "decoder.blocks.2.conv1.bias",
27
+ "first_stage_model.decoder.mid.block_2.conv1.weight": "decoder.blocks.2.conv1.weight",
28
+ "first_stage_model.decoder.mid.block_2.conv2.bias": "decoder.blocks.2.conv2.bias",
29
+ "first_stage_model.decoder.mid.block_2.conv2.weight": "decoder.blocks.2.conv2.weight",
30
+ "first_stage_model.decoder.mid.block_2.norm1.bias": "decoder.blocks.2.norm1.bias",
31
+ "first_stage_model.decoder.mid.block_2.norm1.weight": "decoder.blocks.2.norm1.weight",
32
+ "first_stage_model.decoder.mid.block_2.norm2.bias": "decoder.blocks.2.norm2.bias",
33
+ "first_stage_model.decoder.mid.block_2.norm2.weight": "decoder.blocks.2.norm2.weight",
34
+ "first_stage_model.decoder.norm_out.bias": "decoder.conv_norm_out.bias",
35
+ "first_stage_model.decoder.norm_out.weight": "decoder.conv_norm_out.weight",
36
+ "first_stage_model.decoder.up.0.block.0.conv1.bias": "decoder.blocks.15.conv1.bias",
37
+ "first_stage_model.decoder.up.0.block.0.conv1.weight": "decoder.blocks.15.conv1.weight",
38
+ "first_stage_model.decoder.up.0.block.0.conv2.bias": "decoder.blocks.15.conv2.bias",
39
+ "first_stage_model.decoder.up.0.block.0.conv2.weight": "decoder.blocks.15.conv2.weight",
40
+ "first_stage_model.decoder.up.0.block.0.nin_shortcut.bias": "decoder.blocks.15.conv_shortcut.bias",
41
+ "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": "decoder.blocks.15.conv_shortcut.weight",
42
+ "first_stage_model.decoder.up.0.block.0.norm1.bias": "decoder.blocks.15.norm1.bias",
43
+ "first_stage_model.decoder.up.0.block.0.norm1.weight": "decoder.blocks.15.norm1.weight",
44
+ "first_stage_model.decoder.up.0.block.0.norm2.bias": "decoder.blocks.15.norm2.bias",
45
+ "first_stage_model.decoder.up.0.block.0.norm2.weight": "decoder.blocks.15.norm2.weight",
46
+ "first_stage_model.decoder.up.0.block.1.conv1.bias": "decoder.blocks.16.conv1.bias",
47
+ "first_stage_model.decoder.up.0.block.1.conv1.weight": "decoder.blocks.16.conv1.weight",
48
+ "first_stage_model.decoder.up.0.block.1.conv2.bias": "decoder.blocks.16.conv2.bias",
49
+ "first_stage_model.decoder.up.0.block.1.conv2.weight": "decoder.blocks.16.conv2.weight",
50
+ "first_stage_model.decoder.up.0.block.1.norm1.bias": "decoder.blocks.16.norm1.bias",
51
+ "first_stage_model.decoder.up.0.block.1.norm1.weight": "decoder.blocks.16.norm1.weight",
52
+ "first_stage_model.decoder.up.0.block.1.norm2.bias": "decoder.blocks.16.norm2.bias",
53
+ "first_stage_model.decoder.up.0.block.1.norm2.weight": "decoder.blocks.16.norm2.weight",
54
+ "first_stage_model.decoder.up.0.block.2.conv1.bias": "decoder.blocks.17.conv1.bias",
55
+ "first_stage_model.decoder.up.0.block.2.conv1.weight": "decoder.blocks.17.conv1.weight",
56
+ "first_stage_model.decoder.up.0.block.2.conv2.bias": "decoder.blocks.17.conv2.bias",
57
+ "first_stage_model.decoder.up.0.block.2.conv2.weight": "decoder.blocks.17.conv2.weight",
58
+ "first_stage_model.decoder.up.0.block.2.norm1.bias": "decoder.blocks.17.norm1.bias",
59
+ "first_stage_model.decoder.up.0.block.2.norm1.weight": "decoder.blocks.17.norm1.weight",
60
+ "first_stage_model.decoder.up.0.block.2.norm2.bias": "decoder.blocks.17.norm2.bias",
61
+ "first_stage_model.decoder.up.0.block.2.norm2.weight": "decoder.blocks.17.norm2.weight",
62
+ "first_stage_model.decoder.up.1.block.0.conv1.bias": "decoder.blocks.11.conv1.bias",
63
+ "first_stage_model.decoder.up.1.block.0.conv1.weight": "decoder.blocks.11.conv1.weight",
64
+ "first_stage_model.decoder.up.1.block.0.conv2.bias": "decoder.blocks.11.conv2.bias",
65
+ "first_stage_model.decoder.up.1.block.0.conv2.weight": "decoder.blocks.11.conv2.weight",
66
+ "first_stage_model.decoder.up.1.block.0.nin_shortcut.bias": "decoder.blocks.11.conv_shortcut.bias",
67
+ "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": "decoder.blocks.11.conv_shortcut.weight",
68
+ "first_stage_model.decoder.up.1.block.0.norm1.bias": "decoder.blocks.11.norm1.bias",
69
+ "first_stage_model.decoder.up.1.block.0.norm1.weight": "decoder.blocks.11.norm1.weight",
70
+ "first_stage_model.decoder.up.1.block.0.norm2.bias": "decoder.blocks.11.norm2.bias",
71
+ "first_stage_model.decoder.up.1.block.0.norm2.weight": "decoder.blocks.11.norm2.weight",
72
+ "first_stage_model.decoder.up.1.block.1.conv1.bias": "decoder.blocks.12.conv1.bias",
73
+ "first_stage_model.decoder.up.1.block.1.conv1.weight": "decoder.blocks.12.conv1.weight",
74
+ "first_stage_model.decoder.up.1.block.1.conv2.bias": "decoder.blocks.12.conv2.bias",
75
+ "first_stage_model.decoder.up.1.block.1.conv2.weight": "decoder.blocks.12.conv2.weight",
76
+ "first_stage_model.decoder.up.1.block.1.norm1.bias": "decoder.blocks.12.norm1.bias",
77
+ "first_stage_model.decoder.up.1.block.1.norm1.weight": "decoder.blocks.12.norm1.weight",
78
+ "first_stage_model.decoder.up.1.block.1.norm2.bias": "decoder.blocks.12.norm2.bias",
79
+ "first_stage_model.decoder.up.1.block.1.norm2.weight": "decoder.blocks.12.norm2.weight",
80
+ "first_stage_model.decoder.up.1.block.2.conv1.bias": "decoder.blocks.13.conv1.bias",
81
+ "first_stage_model.decoder.up.1.block.2.conv1.weight": "decoder.blocks.13.conv1.weight",
82
+ "first_stage_model.decoder.up.1.block.2.conv2.bias": "decoder.blocks.13.conv2.bias",
83
+ "first_stage_model.decoder.up.1.block.2.conv2.weight": "decoder.blocks.13.conv2.weight",
84
+ "first_stage_model.decoder.up.1.block.2.norm1.bias": "decoder.blocks.13.norm1.bias",
85
+ "first_stage_model.decoder.up.1.block.2.norm1.weight": "decoder.blocks.13.norm1.weight",
86
+ "first_stage_model.decoder.up.1.block.2.norm2.bias": "decoder.blocks.13.norm2.bias",
87
+ "first_stage_model.decoder.up.1.block.2.norm2.weight": "decoder.blocks.13.norm2.weight",
88
+ "first_stage_model.decoder.up.1.upsample.conv.bias": "decoder.blocks.14.conv.bias",
89
+ "first_stage_model.decoder.up.1.upsample.conv.weight": "decoder.blocks.14.conv.weight",
90
+ "first_stage_model.decoder.up.2.block.0.conv1.bias": "decoder.blocks.7.conv1.bias",
91
+ "first_stage_model.decoder.up.2.block.0.conv1.weight": "decoder.blocks.7.conv1.weight",
92
+ "first_stage_model.decoder.up.2.block.0.conv2.bias": "decoder.blocks.7.conv2.bias",
93
+ "first_stage_model.decoder.up.2.block.0.conv2.weight": "decoder.blocks.7.conv2.weight",
94
+ "first_stage_model.decoder.up.2.block.0.norm1.bias": "decoder.blocks.7.norm1.bias",
95
+ "first_stage_model.decoder.up.2.block.0.norm1.weight": "decoder.blocks.7.norm1.weight",
96
+ "first_stage_model.decoder.up.2.block.0.norm2.bias": "decoder.blocks.7.norm2.bias",
97
+ "first_stage_model.decoder.up.2.block.0.norm2.weight": "decoder.blocks.7.norm2.weight",
98
+ "first_stage_model.decoder.up.2.block.1.conv1.bias": "decoder.blocks.8.conv1.bias",
99
+ "first_stage_model.decoder.up.2.block.1.conv1.weight": "decoder.blocks.8.conv1.weight",
100
+ "first_stage_model.decoder.up.2.block.1.conv2.bias": "decoder.blocks.8.conv2.bias",
101
+ "first_stage_model.decoder.up.2.block.1.conv2.weight": "decoder.blocks.8.conv2.weight",
102
+ "first_stage_model.decoder.up.2.block.1.norm1.bias": "decoder.blocks.8.norm1.bias",
103
+ "first_stage_model.decoder.up.2.block.1.norm1.weight": "decoder.blocks.8.norm1.weight",
104
+ "first_stage_model.decoder.up.2.block.1.norm2.bias": "decoder.blocks.8.norm2.bias",
105
+ "first_stage_model.decoder.up.2.block.1.norm2.weight": "decoder.blocks.8.norm2.weight",
106
+ "first_stage_model.decoder.up.2.block.2.conv1.bias": "decoder.blocks.9.conv1.bias",
107
+ "first_stage_model.decoder.up.2.block.2.conv1.weight": "decoder.blocks.9.conv1.weight",
108
+ "first_stage_model.decoder.up.2.block.2.conv2.bias": "decoder.blocks.9.conv2.bias",
109
+ "first_stage_model.decoder.up.2.block.2.conv2.weight": "decoder.blocks.9.conv2.weight",
110
+ "first_stage_model.decoder.up.2.block.2.norm1.bias": "decoder.blocks.9.norm1.bias",
111
+ "first_stage_model.decoder.up.2.block.2.norm1.weight": "decoder.blocks.9.norm1.weight",
112
+ "first_stage_model.decoder.up.2.block.2.norm2.bias": "decoder.blocks.9.norm2.bias",
113
+ "first_stage_model.decoder.up.2.block.2.norm2.weight": "decoder.blocks.9.norm2.weight",
114
+ "first_stage_model.decoder.up.2.upsample.conv.bias": "decoder.blocks.10.conv.bias",
115
+ "first_stage_model.decoder.up.2.upsample.conv.weight": "decoder.blocks.10.conv.weight",
116
+ "first_stage_model.decoder.up.3.block.0.conv1.bias": "decoder.blocks.3.conv1.bias",
117
+ "first_stage_model.decoder.up.3.block.0.conv1.weight": "decoder.blocks.3.conv1.weight",
118
+ "first_stage_model.decoder.up.3.block.0.conv2.bias": "decoder.blocks.3.conv2.bias",
119
+ "first_stage_model.decoder.up.3.block.0.conv2.weight": "decoder.blocks.3.conv2.weight",
120
+ "first_stage_model.decoder.up.3.block.0.norm1.bias": "decoder.blocks.3.norm1.bias",
121
+ "first_stage_model.decoder.up.3.block.0.norm1.weight": "decoder.blocks.3.norm1.weight",
122
+ "first_stage_model.decoder.up.3.block.0.norm2.bias": "decoder.blocks.3.norm2.bias",
123
+ "first_stage_model.decoder.up.3.block.0.norm2.weight": "decoder.blocks.3.norm2.weight",
124
+ "first_stage_model.decoder.up.3.block.1.conv1.bias": "decoder.blocks.4.conv1.bias",
125
+ "first_stage_model.decoder.up.3.block.1.conv1.weight": "decoder.blocks.4.conv1.weight",
126
+ "first_stage_model.decoder.up.3.block.1.conv2.bias": "decoder.blocks.4.conv2.bias",
127
+ "first_stage_model.decoder.up.3.block.1.conv2.weight": "decoder.blocks.4.conv2.weight",
128
+ "first_stage_model.decoder.up.3.block.1.norm1.bias": "decoder.blocks.4.norm1.bias",
129
+ "first_stage_model.decoder.up.3.block.1.norm1.weight": "decoder.blocks.4.norm1.weight",
130
+ "first_stage_model.decoder.up.3.block.1.norm2.bias": "decoder.blocks.4.norm2.bias",
131
+ "first_stage_model.decoder.up.3.block.1.norm2.weight": "decoder.blocks.4.norm2.weight",
132
+ "first_stage_model.decoder.up.3.block.2.conv1.bias": "decoder.blocks.5.conv1.bias",
133
+ "first_stage_model.decoder.up.3.block.2.conv1.weight": "decoder.blocks.5.conv1.weight",
134
+ "first_stage_model.decoder.up.3.block.2.conv2.bias": "decoder.blocks.5.conv2.bias",
135
+ "first_stage_model.decoder.up.3.block.2.conv2.weight": "decoder.blocks.5.conv2.weight",
136
+ "first_stage_model.decoder.up.3.block.2.norm1.bias": "decoder.blocks.5.norm1.bias",
137
+ "first_stage_model.decoder.up.3.block.2.norm1.weight": "decoder.blocks.5.norm1.weight",
138
+ "first_stage_model.decoder.up.3.block.2.norm2.bias": "decoder.blocks.5.norm2.bias",
139
+ "first_stage_model.decoder.up.3.block.2.norm2.weight": "decoder.blocks.5.norm2.weight",
140
+ "first_stage_model.decoder.up.3.upsample.conv.bias": "decoder.blocks.6.conv.bias",
141
+ "first_stage_model.decoder.up.3.upsample.conv.weight": "decoder.blocks.6.conv.weight",
142
+ "first_stage_model.post_quant_conv.bias": "decoder.post_quant_conv.bias",
143
+ "first_stage_model.post_quant_conv.weight": "decoder.post_quant_conv.weight",
144
+ "first_stage_model.encoder.conv_in.bias": "encoder.conv_in.bias",
145
+ "first_stage_model.encoder.conv_in.weight": "encoder.conv_in.weight",
146
+ "first_stage_model.encoder.conv_out.bias": "encoder.conv_out.bias",
147
+ "first_stage_model.encoder.conv_out.weight": "encoder.conv_out.weight",
148
+ "first_stage_model.encoder.down.0.block.0.conv1.bias": "encoder.blocks.0.conv1.bias",
149
+ "first_stage_model.encoder.down.0.block.0.conv1.weight": "encoder.blocks.0.conv1.weight",
150
+ "first_stage_model.encoder.down.0.block.0.conv2.bias": "encoder.blocks.0.conv2.bias",
151
+ "first_stage_model.encoder.down.0.block.0.conv2.weight": "encoder.blocks.0.conv2.weight",
152
+ "first_stage_model.encoder.down.0.block.0.norm1.bias": "encoder.blocks.0.norm1.bias",
153
+ "first_stage_model.encoder.down.0.block.0.norm1.weight": "encoder.blocks.0.norm1.weight",
154
+ "first_stage_model.encoder.down.0.block.0.norm2.bias": "encoder.blocks.0.norm2.bias",
155
+ "first_stage_model.encoder.down.0.block.0.norm2.weight": "encoder.blocks.0.norm2.weight",
156
+ "first_stage_model.encoder.down.0.block.1.conv1.bias": "encoder.blocks.1.conv1.bias",
157
+ "first_stage_model.encoder.down.0.block.1.conv1.weight": "encoder.blocks.1.conv1.weight",
158
+ "first_stage_model.encoder.down.0.block.1.conv2.bias": "encoder.blocks.1.conv2.bias",
159
+ "first_stage_model.encoder.down.0.block.1.conv2.weight": "encoder.blocks.1.conv2.weight",
160
+ "first_stage_model.encoder.down.0.block.1.norm1.bias": "encoder.blocks.1.norm1.bias",
161
+ "first_stage_model.encoder.down.0.block.1.norm1.weight": "encoder.blocks.1.norm1.weight",
162
+ "first_stage_model.encoder.down.0.block.1.norm2.bias": "encoder.blocks.1.norm2.bias",
163
+ "first_stage_model.encoder.down.0.block.1.norm2.weight": "encoder.blocks.1.norm2.weight",
164
+ "first_stage_model.encoder.down.0.downsample.conv.bias": "encoder.blocks.2.conv.bias",
165
+ "first_stage_model.encoder.down.0.downsample.conv.weight": "encoder.blocks.2.conv.weight",
166
+ "first_stage_model.encoder.down.1.block.0.conv1.bias": "encoder.blocks.3.conv1.bias",
167
+ "first_stage_model.encoder.down.1.block.0.conv1.weight": "encoder.blocks.3.conv1.weight",
168
+ "first_stage_model.encoder.down.1.block.0.conv2.bias": "encoder.blocks.3.conv2.bias",
169
+ "first_stage_model.encoder.down.1.block.0.conv2.weight": "encoder.blocks.3.conv2.weight",
170
+ "first_stage_model.encoder.down.1.block.0.nin_shortcut.bias": "encoder.blocks.3.conv_shortcut.bias",
171
+ "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": "encoder.blocks.3.conv_shortcut.weight",
172
+ "first_stage_model.encoder.down.1.block.0.norm1.bias": "encoder.blocks.3.norm1.bias",
173
+ "first_stage_model.encoder.down.1.block.0.norm1.weight": "encoder.blocks.3.norm1.weight",
174
+ "first_stage_model.encoder.down.1.block.0.norm2.bias": "encoder.blocks.3.norm2.bias",
175
+ "first_stage_model.encoder.down.1.block.0.norm2.weight": "encoder.blocks.3.norm2.weight",
176
+ "first_stage_model.encoder.down.1.block.1.conv1.bias": "encoder.blocks.4.conv1.bias",
177
+ "first_stage_model.encoder.down.1.block.1.conv1.weight": "encoder.blocks.4.conv1.weight",
178
+ "first_stage_model.encoder.down.1.block.1.conv2.bias": "encoder.blocks.4.conv2.bias",
179
+ "first_stage_model.encoder.down.1.block.1.conv2.weight": "encoder.blocks.4.conv2.weight",
180
+ "first_stage_model.encoder.down.1.block.1.norm1.bias": "encoder.blocks.4.norm1.bias",
181
+ "first_stage_model.encoder.down.1.block.1.norm1.weight": "encoder.blocks.4.norm1.weight",
182
+ "first_stage_model.encoder.down.1.block.1.norm2.bias": "encoder.blocks.4.norm2.bias",
183
+ "first_stage_model.encoder.down.1.block.1.norm2.weight": "encoder.blocks.4.norm2.weight",
184
+ "first_stage_model.encoder.down.1.downsample.conv.bias": "encoder.blocks.5.conv.bias",
185
+ "first_stage_model.encoder.down.1.downsample.conv.weight": "encoder.blocks.5.conv.weight",
186
+ "first_stage_model.encoder.down.2.block.0.conv1.bias": "encoder.blocks.6.conv1.bias",
187
+ "first_stage_model.encoder.down.2.block.0.conv1.weight": "encoder.blocks.6.conv1.weight",
188
+ "first_stage_model.encoder.down.2.block.0.conv2.bias": "encoder.blocks.6.conv2.bias",
189
+ "first_stage_model.encoder.down.2.block.0.conv2.weight": "encoder.blocks.6.conv2.weight",
190
+ "first_stage_model.encoder.down.2.block.0.nin_shortcut.bias": "encoder.blocks.6.conv_shortcut.bias",
191
+ "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": "encoder.blocks.6.conv_shortcut.weight",
192
+ "first_stage_model.encoder.down.2.block.0.norm1.bias": "encoder.blocks.6.norm1.bias",
193
+ "first_stage_model.encoder.down.2.block.0.norm1.weight": "encoder.blocks.6.norm1.weight",
194
+ "first_stage_model.encoder.down.2.block.0.norm2.bias": "encoder.blocks.6.norm2.bias",
195
+ "first_stage_model.encoder.down.2.block.0.norm2.weight": "encoder.blocks.6.norm2.weight",
196
+ "first_stage_model.encoder.down.2.block.1.conv1.bias": "encoder.blocks.7.conv1.bias",
197
+ "first_stage_model.encoder.down.2.block.1.conv1.weight": "encoder.blocks.7.conv1.weight",
198
+ "first_stage_model.encoder.down.2.block.1.conv2.bias": "encoder.blocks.7.conv2.bias",
199
+ "first_stage_model.encoder.down.2.block.1.conv2.weight": "encoder.blocks.7.conv2.weight",
200
+ "first_stage_model.encoder.down.2.block.1.norm1.bias": "encoder.blocks.7.norm1.bias",
201
+ "first_stage_model.encoder.down.2.block.1.norm1.weight": "encoder.blocks.7.norm1.weight",
202
+ "first_stage_model.encoder.down.2.block.1.norm2.bias": "encoder.blocks.7.norm2.bias",
203
+ "first_stage_model.encoder.down.2.block.1.norm2.weight": "encoder.blocks.7.norm2.weight",
204
+ "first_stage_model.encoder.down.2.downsample.conv.bias": "encoder.blocks.8.conv.bias",
205
+ "first_stage_model.encoder.down.2.downsample.conv.weight": "encoder.blocks.8.conv.weight",
206
+ "first_stage_model.encoder.down.3.block.0.conv1.bias": "encoder.blocks.9.conv1.bias",
207
+ "first_stage_model.encoder.down.3.block.0.conv1.weight": "encoder.blocks.9.conv1.weight",
208
+ "first_stage_model.encoder.down.3.block.0.conv2.bias": "encoder.blocks.9.conv2.bias",
209
+ "first_stage_model.encoder.down.3.block.0.conv2.weight": "encoder.blocks.9.conv2.weight",
210
+ "first_stage_model.encoder.down.3.block.0.norm1.bias": "encoder.blocks.9.norm1.bias",
211
+ "first_stage_model.encoder.down.3.block.0.norm1.weight": "encoder.blocks.9.norm1.weight",
212
+ "first_stage_model.encoder.down.3.block.0.norm2.bias": "encoder.blocks.9.norm2.bias",
213
+ "first_stage_model.encoder.down.3.block.0.norm2.weight": "encoder.blocks.9.norm2.weight",
214
+ "first_stage_model.encoder.down.3.block.1.conv1.bias": "encoder.blocks.10.conv1.bias",
215
+ "first_stage_model.encoder.down.3.block.1.conv1.weight": "encoder.blocks.10.conv1.weight",
216
+ "first_stage_model.encoder.down.3.block.1.conv2.bias": "encoder.blocks.10.conv2.bias",
217
+ "first_stage_model.encoder.down.3.block.1.conv2.weight": "encoder.blocks.10.conv2.weight",
218
+ "first_stage_model.encoder.down.3.block.1.norm1.bias": "encoder.blocks.10.norm1.bias",
219
+ "first_stage_model.encoder.down.3.block.1.norm1.weight": "encoder.blocks.10.norm1.weight",
220
+ "first_stage_model.encoder.down.3.block.1.norm2.bias": "encoder.blocks.10.norm2.bias",
221
+ "first_stage_model.encoder.down.3.block.1.norm2.weight": "encoder.blocks.10.norm2.weight",
222
+ "first_stage_model.encoder.mid.attn_1.k.bias": "encoder.blocks.12.transformer_blocks.0.to_k.bias",
223
+ "first_stage_model.encoder.mid.attn_1.k.weight": "encoder.blocks.12.transformer_blocks.0.to_k.weight",
224
+ "first_stage_model.encoder.mid.attn_1.norm.bias": "encoder.blocks.12.norm.bias",
225
+ "first_stage_model.encoder.mid.attn_1.norm.weight": "encoder.blocks.12.norm.weight",
226
+ "first_stage_model.encoder.mid.attn_1.proj_out.bias": "encoder.blocks.12.transformer_blocks.0.to_out.bias",
227
+ "first_stage_model.encoder.mid.attn_1.proj_out.weight": "encoder.blocks.12.transformer_blocks.0.to_out.weight",
228
+ "first_stage_model.encoder.mid.attn_1.q.bias": "encoder.blocks.12.transformer_blocks.0.to_q.bias",
229
+ "first_stage_model.encoder.mid.attn_1.q.weight": "encoder.blocks.12.transformer_blocks.0.to_q.weight",
230
+ "first_stage_model.encoder.mid.attn_1.v.bias": "encoder.blocks.12.transformer_blocks.0.to_v.bias",
231
+ "first_stage_model.encoder.mid.attn_1.v.weight": "encoder.blocks.12.transformer_blocks.0.to_v.weight",
232
+ "first_stage_model.encoder.mid.block_1.conv1.bias": "encoder.blocks.11.conv1.bias",
233
+ "first_stage_model.encoder.mid.block_1.conv1.weight": "encoder.blocks.11.conv1.weight",
234
+ "first_stage_model.encoder.mid.block_1.conv2.bias": "encoder.blocks.11.conv2.bias",
235
+ "first_stage_model.encoder.mid.block_1.conv2.weight": "encoder.blocks.11.conv2.weight",
236
+ "first_stage_model.encoder.mid.block_1.norm1.bias": "encoder.blocks.11.norm1.bias",
237
+ "first_stage_model.encoder.mid.block_1.norm1.weight": "encoder.blocks.11.norm1.weight",
238
+ "first_stage_model.encoder.mid.block_1.norm2.bias": "encoder.blocks.11.norm2.bias",
239
+ "first_stage_model.encoder.mid.block_1.norm2.weight": "encoder.blocks.11.norm2.weight",
240
+ "first_stage_model.encoder.mid.block_2.conv1.bias": "encoder.blocks.13.conv1.bias",
241
+ "first_stage_model.encoder.mid.block_2.conv1.weight": "encoder.blocks.13.conv1.weight",
242
+ "first_stage_model.encoder.mid.block_2.conv2.bias": "encoder.blocks.13.conv2.bias",
243
+ "first_stage_model.encoder.mid.block_2.conv2.weight": "encoder.blocks.13.conv2.weight",
244
+ "first_stage_model.encoder.mid.block_2.norm1.bias": "encoder.blocks.13.norm1.bias",
245
+ "first_stage_model.encoder.mid.block_2.norm1.weight": "encoder.blocks.13.norm1.weight",
246
+ "first_stage_model.encoder.mid.block_2.norm2.bias": "encoder.blocks.13.norm2.bias",
247
+ "first_stage_model.encoder.mid.block_2.norm2.weight": "encoder.blocks.13.norm2.weight",
248
+ "first_stage_model.encoder.norm_out.bias": "encoder.conv_norm_out.bias",
249
+ "first_stage_model.encoder.norm_out.weight": "encoder.conv_norm_out.weight",
250
+ "first_stage_model.quant_conv.bias": "encoder.quant_conv.bias",
251
+ "first_stage_model.quant_conv.weight": "encoder.quant_conv.weight"
252
+ }
253
+ }
254
+ }