diffsynth-engine 0.3.6.dev2__tar.gz → 0.3.6.dev5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/PKG-INFO +2 -1
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/wan/wan_dit.py +3 -2
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/wan/wan_vae.py +14 -15
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/pipelines/base.py +14 -8
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/pipelines/controlnet_helper.py +1 -1
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/pipelines/flux_image.py +11 -2
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/download.py +20 -15
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine.egg-info/PKG-INFO +2 -1
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine.egg-info/requires.txt +1 -0
- diffsynth_engine-0.3.6.dev5/docs/tutorial.md +241 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/docs/tutorial_zh.md +25 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/pyproject.toml +2 -1
- diffsynth_engine-0.3.6.dev2/docs/tutorial.md +0 -1
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/.gitignore +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/.pre-commit-config.yaml +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/LICENSE +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/MANIFEST.in +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/README.md +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/assets/dingtalk.png +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/assets/showcase.jpeg +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/noise_scheduler/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/noise_scheduler/base_scheduler.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/noise_scheduler/flow_match/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/noise_scheduler/flow_match/flow_beta.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/noise_scheduler/flow_match/flow_ddim.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/noise_scheduler/flow_match/recifited_flow.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/beta.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/ddim.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/exponential.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/karras.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/linear.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/sgm_uniform.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/sampler/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/sampler/flow_match/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/sampler/flow_match/flow_match_euler.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/sampler/stable_diffusion/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/sampler/stable_diffusion/brownian_tree.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/sampler/stable_diffusion/ddpm.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/sampler/stable_diffusion/deis.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/sampler/stable_diffusion/dpmpp_2m.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/sampler/stable_diffusion/dpmpp_2m_sde.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/sampler/stable_diffusion/dpmpp_3m_sde.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/sampler/stable_diffusion/epsilon.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/sampler/stable_diffusion/euler.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/sampler/stable_diffusion/euler_ancestral.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/components/vae.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/flux/flux_dit.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/flux/flux_text_encoder.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/flux/flux_vae.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/sd/sd_text_encoder.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/sd/sd_unet.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/sd3/sd3_dit.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/sd3/sd3_text_encoder.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/sdxl/sdxl_text_encoder.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/sdxl/sdxl_unet.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/wan/dit/1.3b-t2v.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/wan/dit/14b-flf2v.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/wan/dit/14b-i2v.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/models/wan/dit/14b-t2v.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/merges.txt +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/special_tokens_map.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/tokenizer_config.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/vocab.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/special_tokens_map.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/spiece.model +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/tokenizer.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/tokenizer_config.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/merges.txt +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/special_tokens_map.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/tokenizer_config.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/vocab.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/merges.txt +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/special_tokens_map.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/tokenizer_config.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/vocab.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/special_tokens_map.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/spiece.model +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer_config.json +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/kernels/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/base.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/basic/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/basic/attention.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/basic/lora.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/basic/relative_position_emb.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/basic/timestep.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/basic/transformer_helper.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/basic/unet_helper.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/flux/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/flux/flux_controlnet.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/flux/flux_dit.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/flux/flux_ipadapter.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/flux/flux_redux.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/flux/flux_text_encoder.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/flux/flux_vae.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sd/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sd/sd_controlnet.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sd/sd_text_encoder.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sd/sd_unet.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sd/sd_vae.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sd3/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sd3/sd3_dit.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sd3/sd3_text_encoder.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sd3/sd3_vae.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sdxl/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sdxl/sdxl_controlnet.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sdxl/sdxl_text_encoder.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sdxl/sdxl_unet.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/sdxl/sdxl_vae.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/text_encoder/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/text_encoder/clip.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/text_encoder/siglip.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/text_encoder/t5.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/utils.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/vae/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/vae/vae.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/wan/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/wan/wan_image_encoder.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/wan/wan_text_encoder.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/pipelines/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/pipelines/sd_image.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/pipelines/sdxl_image.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/pipelines/wan_video.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/processor/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/processor/canny_processor.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/processor/depth_processor.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/tokenizers/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/tokenizers/base.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/tokenizers/clip.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/tokenizers/t5.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/tokenizers/wan.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/tools/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/tools/flux_inpainting_tool.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/tools/flux_outpainting_tool.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/tools/flux_reference_tool.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/tools/flux_replace_tool.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/__init__.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/constants.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/env.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/flag.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/fp8_linear.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/gguf.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/image.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/loader.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/lock.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/logging.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/offload.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/onnx.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/parallel.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/platform.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/prompt.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/video.py +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine.egg-info/SOURCES.txt +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine.egg-info/dependency_links.txt +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine.egg-info/top_level.txt +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/setup.cfg +0 -0
- {diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diffsynth_engine
|
|
3
|
-
Version: 0.3.6.
|
|
3
|
+
Version: 0.3.6.dev5
|
|
4
4
|
Author: MuseAI x ModelScope
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Operating System :: OS Independent
|
|
@@ -24,6 +24,7 @@ Requires-Dist: pillow
|
|
|
24
24
|
Requires-Dist: imageio[ffmpeg]
|
|
25
25
|
Requires-Dist: yunchang; sys_platform == "linux"
|
|
26
26
|
Requires-Dist: onnxruntime
|
|
27
|
+
Requires-Dist: opencv-python
|
|
27
28
|
Provides-Extra: dev
|
|
28
29
|
Requires-Dist: diffusers==0.31.0; extra == "dev"
|
|
29
30
|
Requires-Dist: transformers==4.45.2; extra == "dev"
|
{diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/wan/wan_dit.py
RENAMED
|
@@ -334,9 +334,10 @@ class WanDiT(PreTrainedModel):
|
|
|
334
334
|
clip_feature: Optional[torch.Tensor] = None, # clip_vision_encoder(img)
|
|
335
335
|
y: Optional[torch.Tensor] = None, # vae_encoder(img)
|
|
336
336
|
):
|
|
337
|
+
use_cfg = x.shape[0] > 1
|
|
337
338
|
with (
|
|
338
339
|
gguf_inference(),
|
|
339
|
-
cfg_parallel((x, context, timestep, clip_feature, y)),
|
|
340
|
+
cfg_parallel((x, context, timestep, clip_feature, y), use_cfg=use_cfg),
|
|
340
341
|
):
|
|
341
342
|
t = self.time_embedding(sinusoidal_embedding_1d(self.freq_dim, timestep))
|
|
342
343
|
t_mod = self.time_projection(t).unflatten(1, (6, self.dim))
|
|
@@ -365,7 +366,7 @@ class WanDiT(PreTrainedModel):
|
|
|
365
366
|
x = self.head(x, t)
|
|
366
367
|
(x,) = sequence_parallel_unshard((x,), seq_dims=(1,), seq_lens=(f * h * w,))
|
|
367
368
|
x = self.unpatchify(x, (f, h, w))
|
|
368
|
-
(x,) = cfg_parallel_unshard((x,))
|
|
369
|
+
(x,) = cfg_parallel_unshard((x,), use_cfg=use_cfg)
|
|
369
370
|
return x
|
|
370
371
|
|
|
371
372
|
@classmethod
|
{diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/models/wan/wan_vae.py
RENAMED
|
@@ -515,7 +515,7 @@ class WanVideoVAEStateDictConverter(StateDictConverter):
|
|
|
515
515
|
class WanVideoVAE(PreTrainedModel):
|
|
516
516
|
converter = WanVideoVAEStateDictConverter()
|
|
517
517
|
|
|
518
|
-
def __init__(self, z_dim=16,
|
|
518
|
+
def __init__(self, z_dim=16, device: str = "cuda:0", dtype: torch.dtype = torch.float32):
|
|
519
519
|
super().__init__()
|
|
520
520
|
|
|
521
521
|
mean = [
|
|
@@ -561,12 +561,11 @@ class WanVideoVAE(PreTrainedModel):
|
|
|
561
561
|
# init model
|
|
562
562
|
self.model = VideoVAE(z_dim=z_dim).eval().requires_grad_(False)
|
|
563
563
|
self.upsampling_factor = 8
|
|
564
|
-
self.parallelism = parallelism
|
|
565
564
|
|
|
566
565
|
@classmethod
|
|
567
|
-
def from_state_dict(cls, state_dict,
|
|
566
|
+
def from_state_dict(cls, state_dict, device="cuda:0", dtype=torch.float32) -> "WanVideoVAE":
|
|
568
567
|
with no_init_weights():
|
|
569
|
-
model = torch.nn.utils.skip_init(cls,
|
|
568
|
+
model = torch.nn.utils.skip_init(cls, device=device, dtype=dtype)
|
|
570
569
|
model.load_state_dict(state_dict, assign=True)
|
|
571
570
|
model.to(device=device, dtype=dtype, non_blocking=True)
|
|
572
571
|
return model
|
|
@@ -607,7 +606,7 @@ class WanVideoVAE(PreTrainedModel):
|
|
|
607
606
|
h_, w_ = h + size_h, w + size_w
|
|
608
607
|
tasks.append((h, h_, w, w_))
|
|
609
608
|
|
|
610
|
-
data_device = device if
|
|
609
|
+
data_device = device if dist.is_initialized() else "cpu"
|
|
611
610
|
computation_device = device
|
|
612
611
|
|
|
613
612
|
out_T = T * 4 - 3
|
|
@@ -622,9 +621,9 @@ class WanVideoVAE(PreTrainedModel):
|
|
|
622
621
|
device=data_device,
|
|
623
622
|
)
|
|
624
623
|
|
|
625
|
-
|
|
626
|
-
for i, (h, h_, w, w_) in enumerate(tqdm(tasks, desc="VAE DECODING", disable=
|
|
627
|
-
if
|
|
624
|
+
hide_progress = dist.is_initialized() and dist.get_rank() != 0
|
|
625
|
+
for i, (h, h_, w, w_) in enumerate(tqdm(tasks, desc="VAE DECODING", disable=hide_progress)):
|
|
626
|
+
if dist.is_initialized() and (i % dist.get_world_size() != dist.get_rank()):
|
|
628
627
|
continue
|
|
629
628
|
hidden_states_batch = hidden_states[:, :, :, h:h_, w:w_].to(computation_device)
|
|
630
629
|
hidden_states_batch = self.model.decode(hidden_states_batch, self.scale).to(data_device)
|
|
@@ -654,11 +653,11 @@ class WanVideoVAE(PreTrainedModel):
|
|
|
654
653
|
target_h : target_h + hidden_states_batch.shape[3],
|
|
655
654
|
target_w : target_w + hidden_states_batch.shape[4],
|
|
656
655
|
] += mask
|
|
657
|
-
if progress_callback is not None and not
|
|
656
|
+
if progress_callback is not None and not hide_progress:
|
|
658
657
|
progress_callback(i + 1, len(tasks), "VAE DECODING")
|
|
659
|
-
if progress_callback is not None and not
|
|
658
|
+
if progress_callback is not None and not hide_progress:
|
|
660
659
|
progress_callback(len(tasks), len(tasks), "VAE DECODING")
|
|
661
|
-
if
|
|
660
|
+
if dist.is_initialized():
|
|
662
661
|
dist.all_reduce(values)
|
|
663
662
|
dist.all_reduce(weight)
|
|
664
663
|
values = values / weight
|
|
@@ -681,7 +680,7 @@ class WanVideoVAE(PreTrainedModel):
|
|
|
681
680
|
h_, w_ = h + size_h, w + size_w
|
|
682
681
|
tasks.append((h, h_, w, w_))
|
|
683
682
|
|
|
684
|
-
data_device = device if
|
|
683
|
+
data_device = device if dist.is_initialized() else "cpu"
|
|
685
684
|
computation_device = device
|
|
686
685
|
|
|
687
686
|
out_T = (T + 3) // 4
|
|
@@ -696,9 +695,9 @@ class WanVideoVAE(PreTrainedModel):
|
|
|
696
695
|
device=data_device,
|
|
697
696
|
)
|
|
698
697
|
|
|
699
|
-
hide_progress_bar =
|
|
698
|
+
hide_progress_bar = dist.is_initialized() and dist.get_rank() != 0
|
|
700
699
|
for i, (h, h_, w, w_) in enumerate(tqdm(tasks, desc="VAE ENCODING", disable=hide_progress_bar)):
|
|
701
|
-
if
|
|
700
|
+
if dist.is_initialized() and (i % dist.get_world_size() != dist.get_rank()):
|
|
702
701
|
continue
|
|
703
702
|
hidden_states_batch = video[:, :, :, h:h_, w:w_].to(computation_device)
|
|
704
703
|
hidden_states_batch = self.model.encode(hidden_states_batch, self.scale).to(data_device)
|
|
@@ -732,7 +731,7 @@ class WanVideoVAE(PreTrainedModel):
|
|
|
732
731
|
progress_callback(i + 1, len(tasks), "VAE ENCODING")
|
|
733
732
|
if progress_callback is not None and not hide_progress_bar:
|
|
734
733
|
progress_callback(len(tasks), len(tasks), "VAE ENCODING")
|
|
735
|
-
if
|
|
734
|
+
if dist.is_initialized():
|
|
736
735
|
dist.all_reduce(values)
|
|
737
736
|
dist.all_reduce(weight)
|
|
738
737
|
values = values / weight
|
{diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/pipelines/base.py
RENAMED
|
@@ -91,15 +91,21 @@ class BasePipeline:
|
|
|
91
91
|
|
|
92
92
|
@staticmethod
|
|
93
93
|
def load_model_checkpoint(
|
|
94
|
-
checkpoint_path: str, device: str = "cpu", dtype: torch.dtype = torch.float16
|
|
94
|
+
checkpoint_path: str | List[str], device: str = "cpu", dtype: torch.dtype = torch.float16
|
|
95
95
|
) -> Dict[str, torch.Tensor]:
|
|
96
|
-
if
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
96
|
+
if isinstance(checkpoint_path, str):
|
|
97
|
+
checkpoint_path = [checkpoint_path]
|
|
98
|
+
state_dict = {}
|
|
99
|
+
for path in checkpoint_path:
|
|
100
|
+
if not os.path.isfile(path):
|
|
101
|
+
raise FileNotFoundError(f"{path} is not a file")
|
|
102
|
+
elif path.endswith(".safetensors"):
|
|
103
|
+
state_dict.update(**load_file(path, device=device))
|
|
104
|
+
elif path.endswith(".gguf"):
|
|
105
|
+
state_dict.update(**load_gguf_checkpoint(path, device=device, dtype=dtype))
|
|
106
|
+
else:
|
|
107
|
+
raise ValueError(f"{path} is not a .safetensors or .gguf file")
|
|
108
|
+
return state_dict
|
|
103
109
|
|
|
104
110
|
@staticmethod
|
|
105
111
|
def validate_image_size(
|
|
@@ -8,8 +8,8 @@ ImageType = Union[Image.Image, torch.Tensor, List[Image.Image], List[torch.Tenso
|
|
|
8
8
|
|
|
9
9
|
@dataclass
|
|
10
10
|
class ControlNetParams:
|
|
11
|
-
scale: float
|
|
12
11
|
image: ImageType
|
|
12
|
+
scale: float = 1.0
|
|
13
13
|
model: Optional[nn.Module] = None
|
|
14
14
|
mask: Optional[ImageType] = None
|
|
15
15
|
control_start: float = 0
|
{diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/pipelines/flux_image.py
RENAMED
|
@@ -419,9 +419,10 @@ class ControlType(Enum):
|
|
|
419
419
|
normal = "normal"
|
|
420
420
|
bfl_control = "bfl_control"
|
|
421
421
|
bfl_fill = "bfl_fill"
|
|
422
|
+
bfl_kontext = "bfl_kontext"
|
|
422
423
|
|
|
423
424
|
def get_in_channel(self):
|
|
424
|
-
if self
|
|
425
|
+
if self in [ControlType.normal, ControlType.bfl_kontext]:
|
|
425
426
|
return 64
|
|
426
427
|
elif self == ControlType.bfl_control:
|
|
427
428
|
return 128
|
|
@@ -764,9 +765,15 @@ class FluxImagePipeline(BasePipeline):
|
|
|
764
765
|
current_step: int,
|
|
765
766
|
total_step: int,
|
|
766
767
|
):
|
|
768
|
+
origin_latents_shape = latents.shape
|
|
767
769
|
if self.control_type != ControlType.normal:
|
|
768
770
|
controlnet_param = controlnet_params[0]
|
|
769
|
-
|
|
771
|
+
if self.control_type == ControlType.bfl_kontext:
|
|
772
|
+
latents = torch.cat((latents, controlnet_param.image * controlnet_param.scale), dim=2)
|
|
773
|
+
image_ids = image_ids.repeat(1, 2, 1)
|
|
774
|
+
image_ids[:, image_ids.shape[1] // 2 :, 0] += 1
|
|
775
|
+
else:
|
|
776
|
+
latents = torch.cat((latents, controlnet_param.image * controlnet_param.scale), dim=1)
|
|
770
777
|
latents = latents.to(self.dtype)
|
|
771
778
|
controlnet_params = []
|
|
772
779
|
|
|
@@ -797,6 +804,8 @@ class FluxImagePipeline(BasePipeline):
|
|
|
797
804
|
controlnet_double_block_output=double_block_output,
|
|
798
805
|
controlnet_single_block_output=single_block_output,
|
|
799
806
|
)
|
|
807
|
+
if self.control_type == ControlType.bfl_kontext:
|
|
808
|
+
noise_pred = noise_pred[:, :, : origin_latents_shape[2], : origin_latents_shape[3]]
|
|
800
809
|
return noise_pred
|
|
801
810
|
|
|
802
811
|
def prepare_latents(
|
{diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/utils/download.py
RENAMED
|
@@ -2,10 +2,11 @@ import os
|
|
|
2
2
|
import shutil
|
|
3
3
|
import tqdm
|
|
4
4
|
import tempfile
|
|
5
|
-
from typing import Optional
|
|
5
|
+
from typing import List, Optional
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from urllib.parse import urlparse
|
|
8
8
|
import requests
|
|
9
|
+
import glob
|
|
9
10
|
|
|
10
11
|
from modelscope import snapshot_download
|
|
11
12
|
from modelscope.hub.api import HubApi
|
|
@@ -23,11 +24,11 @@ MODEL_SOURCES = ["modelscope", "civitai"]
|
|
|
23
24
|
def fetch_model(
|
|
24
25
|
model_uri: str,
|
|
25
26
|
revision: Optional[str] = None,
|
|
26
|
-
path: Optional[str] = None,
|
|
27
|
+
path: Optional[str | List[str]] = None,
|
|
27
28
|
access_token: Optional[str] = None,
|
|
28
29
|
source: str = "modelscope",
|
|
29
|
-
fetch_safetensors: bool = True,
|
|
30
|
-
) -> str:
|
|
30
|
+
fetch_safetensors: bool = True, # TODO: supports other formats like GGUF
|
|
31
|
+
) -> str | List[str]:
|
|
31
32
|
if source == "modelscope":
|
|
32
33
|
return fetch_modelscope_model(model_uri, revision, path, access_token, fetch_safetensors)
|
|
33
34
|
if source == "civitai":
|
|
@@ -38,7 +39,7 @@ def fetch_model(
|
|
|
38
39
|
def fetch_modelscope_model(
|
|
39
40
|
model_id: str,
|
|
40
41
|
revision: Optional[str] = None,
|
|
41
|
-
path: Optional[str] = None,
|
|
42
|
+
path: Optional[str | List[str]] = None,
|
|
42
43
|
access_token: Optional[str] = None,
|
|
43
44
|
fetch_safetensors: bool = True,
|
|
44
45
|
) -> str:
|
|
@@ -52,12 +53,15 @@ def fetch_modelscope_model(
|
|
|
52
53
|
directory = os.path.join(DIFFSYNTH_CACHE, "modelscope", model_id, revision if revision else "__version")
|
|
53
54
|
dirpath = snapshot_download(model_id, revision=revision, local_dir=directory, allow_patterns=path)
|
|
54
55
|
|
|
55
|
-
if path
|
|
56
|
-
path = os.path.join(dirpath, path)
|
|
56
|
+
if isinstance(path, str):
|
|
57
|
+
path = glob.glob(os.path.join(dirpath, path))
|
|
58
|
+
path = path[0] if len(path) == 1 else path
|
|
59
|
+
elif isinstance(path, list):
|
|
60
|
+
path = [os.path.join(dirpath, p) for p in path]
|
|
57
61
|
else:
|
|
58
62
|
path = dirpath
|
|
59
63
|
|
|
60
|
-
if os.path.isdir(path) and fetch_safetensors:
|
|
64
|
+
if isinstance(path, str) and os.path.isdir(path) and fetch_safetensors:
|
|
61
65
|
return _fetch_safetensors(path)
|
|
62
66
|
return path
|
|
63
67
|
|
|
@@ -122,16 +126,17 @@ def ensure_directory_exists(filename: str):
|
|
|
122
126
|
Path(filename).parent.mkdir(parents=True, exist_ok=True)
|
|
123
127
|
|
|
124
128
|
|
|
125
|
-
def _fetch_safetensors(dirpath: str) -> str:
|
|
129
|
+
def _fetch_safetensors(dirpath: str) -> str | List[str]:
|
|
126
130
|
all_safetensors = []
|
|
127
131
|
for filename in os.listdir(dirpath):
|
|
128
132
|
if filename.endswith(".safetensors"):
|
|
129
133
|
all_safetensors.append(os.path.join(dirpath, filename))
|
|
130
|
-
if len(all_safetensors) ==
|
|
131
|
-
logger.info(f"Fetch safetensors file {all_safetensors[0]}")
|
|
132
|
-
return all_safetensors[0]
|
|
133
|
-
elif len(all_safetensors) == 0:
|
|
134
|
+
if len(all_safetensors) == 0:
|
|
134
135
|
logger.error(f"No safetensors file found in {dirpath}")
|
|
136
|
+
return dirpath
|
|
137
|
+
elif len(all_safetensors) == 1:
|
|
138
|
+
all_safetensors = all_safetensors[0]
|
|
139
|
+
logger.info(f"Fetch safetensors file {all_safetensors}")
|
|
135
140
|
else:
|
|
136
|
-
logger.
|
|
137
|
-
return
|
|
141
|
+
logger.info(f"Fetch safetensors files {all_safetensors}")
|
|
142
|
+
return all_safetensors
|
{diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diffsynth_engine
|
|
3
|
-
Version: 0.3.6.
|
|
3
|
+
Version: 0.3.6.dev5
|
|
4
4
|
Author: MuseAI x ModelScope
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Operating System :: OS Independent
|
|
@@ -24,6 +24,7 @@ Requires-Dist: pillow
|
|
|
24
24
|
Requires-Dist: imageio[ffmpeg]
|
|
25
25
|
Requires-Dist: yunchang; sys_platform == "linux"
|
|
26
26
|
Requires-Dist: onnxruntime
|
|
27
|
+
Requires-Dist: opencv-python
|
|
27
28
|
Provides-Extra: dev
|
|
28
29
|
Requires-Dist: diffusers==0.31.0; extra == "dev"
|
|
29
30
|
Requires-Dist: transformers==4.45.2; extra == "dev"
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
# DiffSynth-Engine User Guide
|
|
2
|
+
|
|
3
|
+
## Installation
|
|
4
|
+
|
|
5
|
+
Before using DiffSynth-Engine, please ensure your device meets the following requirements:
|
|
6
|
+
|
|
7
|
+
* NVIDIA GPU with CUDA Compute Capability 8.6+ (e.g., RTX 50 Series, RTX 40 Series, RTX 30 Series, see [NVIDIA documentation](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities) for details) or Apple Silicon M-series chips.
|
|
8
|
+
|
|
9
|
+
Python environment requirements: Python 3.10+.
|
|
10
|
+
|
|
11
|
+
Use `pip3` to install DiffSynth-Engine from PyPI:
|
|
12
|
+
|
|
13
|
+
```shell
|
|
14
|
+
pip3 install diffsynth-engine
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
DiffSynth-Engine also supports installation from source, which provides access to the latest features but might come with stability issues. We recommend installing the stable version via `pip3`.
|
|
18
|
+
|
|
19
|
+
```shell
|
|
20
|
+
git clone https://github.com/modelscope/diffsynth-engine.git && cd diffsynth-engine
|
|
21
|
+
pip3 install -e .
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Model Download
|
|
25
|
+
|
|
26
|
+
DiffSynth-Engine supports loading models from the [ModelScope Model Hub](https://www.modelscope.cn/aigc/models) by model ID. For example, on the [MajicFlus model page](https://www.modelscope.cn/models/MAILAND/majicflus_v1/summary?version=v1.0), we can find the model ID and the corresponding model filename in the image below.
|
|
27
|
+
|
|
28
|
+

|
|
29
|
+
|
|
30
|
+
Next, download the MajicFlus model with the following code.
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from diffsynth_engine import fetch_model
|
|
34
|
+
|
|
35
|
+
model_path = fetch_model("MAILAND/majicflus_v1", path="majicflus_v134.safetensors")
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+

|
|
39
|
+
|
|
40
|
+
For sharded models, specify multiple files using the `path` parameter.
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from diffsynth_engine import fetch_model
|
|
44
|
+
|
|
45
|
+
model_path = fetch_model("Wan-AI/Wan2.1-T2V-14B", path=[
|
|
46
|
+
"diffusion_pytorch_model-00001-of-00006.safetensors",
|
|
47
|
+
"diffusion_pytorch_model-00002-of-00006.safetensors",
|
|
48
|
+
"diffusion_pytorch_model-00003-of-00006.safetensors",
|
|
49
|
+
"diffusion_pytorch_model-00004-of-00006.safetensors",
|
|
50
|
+
"diffusion_pytorch_model-00005-of-00006.safetensors",
|
|
51
|
+
"diffusion_pytorch_model-00006-of-00006.safetensors",
|
|
52
|
+
])
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
It also supports using wildcards to match multiple files.
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from diffsynth_engine import fetch_model
|
|
59
|
+
|
|
60
|
+
model_path = fetch_model("Wan-AI/Wan2.1-T2V-14B", path="diffusion_pytorch_model*.safetensors")
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
The file path `model_path` returned by the `fetch_model` function is the path to the downloaded file(s).
|
|
64
|
+
|
|
65
|
+
## Model Types
|
|
66
|
+
|
|
67
|
+
Diffusion models come in a wide variety of architectures. Each model is loaded and run for inference by a corresponding pipeline. The model types we currently support include:
|
|
68
|
+
|
|
69
|
+
| Model Architecture | Example | Pipeline |
|
|
70
|
+
| :----------------- | :----------------------------------------------------------- | :-------------------- |
|
|
71
|
+
| SD1.5 | [DreamShaper](https://www.modelscope.cn/models/MusePublic/DreamShaper_SD_1_5) | `SDImagePipeline` |
|
|
72
|
+
| SDXL | [RealVisXL](https://www.modelscope.cn/models/MusePublic/42_ckpt_SD_XL) | `SDXLImagePipeline` |
|
|
73
|
+
| FLUX | [MajicFlus](https://www.modelscope.cn/models/MAILAND/majicflus_v1/summary?version=v1.0) | `FluxImagePipeline` |
|
|
74
|
+
| Wan2.1 | [Wan2.1-T2V-1.3B](https://modelscope.cn/models/Wan-AI/Wan2.1-T2V-1.3B) | `WanVideoPipeline` |
|
|
75
|
+
| SD1.5 LoRA | [Detail Tweaker](https://www.modelscope.cn/models/MusePublic/Detail_Tweaker_LoRA_xijietiaozheng_LoRA_SD_1_5) | `SDImagePipeline` |
|
|
76
|
+
| SDXL LoRA | [Aesthetic Anime](https://www.modelscope.cn/models/MusePublic/100_lora_SD_XL) | `SDXLImagePipeline` |
|
|
77
|
+
| FLUX LoRA | [ArtAug](https://www.modelscope.cn/models/DiffSynth-Studio/ArtAug-lora-FLUX.1dev-v1) | `FluxImagePipeline` |
|
|
78
|
+
| Wan2.1 LoRA | [Highres-fix](https://modelscope.cn/models/DiffSynth-Studio/Wan2.1-1.3b-lora-highresfix-v1) | `WanVideoPipeline` |
|
|
79
|
+
|
|
80
|
+
Among these, SD1.5, SDXL, and FLUX are base models for image generation, while Wan2.1 is a base model for video generation. Base models can generate content independently. SD1.5 LoRA, SDXL LoRA, FLUX LoRA, and Wan2.1 LoRA are [LoRA](https://arxiv.org/abs/2106.09685) models. LoRA models are trained as "additional branches" on top of base models to enhance specific capabilities. They must be combined with a base model to be used for generation.
|
|
81
|
+
|
|
82
|
+
We will continuously update DiffSynth-Engine to support more models.
|
|
83
|
+
|
|
84
|
+
## Model Inference
|
|
85
|
+
|
|
86
|
+
After the model is downloaded, load the model with the corresponding pipeline and perform inference.
|
|
87
|
+
|
|
88
|
+
### Image Generation
|
|
89
|
+
|
|
90
|
+
The following code calls `FluxImagePipeline` to load the [MajicFlus](https://www.modelscope.cn/models/MAILAND/majicflus_v1/summary?version=v1.0) model and generate an image. To load other types of models, replace `FluxImagePipeline` in the code with the corresponding pipeline.
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
from diffsynth_engine import fetch_model, FluxImagePipeline
|
|
94
|
+
|
|
95
|
+
model_path = fetch_model("MAILAND/majicflus_v1", path="majicflus_v134.safetensors")
|
|
96
|
+
pipe = FluxImagePipeline.from_pretrained(model_path, device='cuda:0')
|
|
97
|
+
image = pipe(prompt="a cat")
|
|
98
|
+
image.save("image.png")
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Please note that if some necessary modules, like text encoders, are missing from a model repository, the pipeline will automatically download the required files.
|
|
102
|
+
|
|
103
|
+
#### Detailed Parameters
|
|
104
|
+
|
|
105
|
+
In the image generation pipeline `pipe`, we can use the following parameters for fine-grained control:
|
|
106
|
+
|
|
107
|
+
* `prompt`: The prompt, used to describe the content of the generated image, e.g., "a cat".
|
|
108
|
+
* `negative_prompt`: The negative prompt, used to describe content you do not want in the image, e.g., "ugly".
|
|
109
|
+
* `cfg_scale`: The guidance scale for [Classifier-Free Guidance](https://arxiv.org/abs/2207.12598). A larger value usually results in stronger correlation between the text and the image but reduces the diversity of the generated content.
|
|
110
|
+
* `clip_skip`: The number of layers to skip in the [CLIP](https://arxiv.org/abs/2103.00020) text encoder. The more layers skipped, the lower the text-image correlation, but this can lead to interesting variations in the generated content.
|
|
111
|
+
* `input_image`: Input image, used for image-to-image generation.
|
|
112
|
+
* `mask_image`: Mask image, used for image inpainting.
|
|
113
|
+
* `denoising_strength`: The denoising strength. When set to 1, a full generation process is performed. When set to a value between 0 and 1, some information from the input image is preserved.
|
|
114
|
+
* `height`: Image height.
|
|
115
|
+
* `width`: Image width.
|
|
116
|
+
* `num_inference_steps`: The number of inference steps. Generally, more steps lead to longer computation time but higher image quality.
|
|
117
|
+
* `tiled`: Whether to enable tiled processing for the VAE. This option is disabled by default. Enabling it can reduce VRAM usage.
|
|
118
|
+
* `tile_size`: The window size for tiled VAE processing.
|
|
119
|
+
* `tile_stride`: The stride for tiled VAE processing.
|
|
120
|
+
* `seed`: The random seed. A fixed seed ensures reproducible results.
|
|
121
|
+
* `progress_bar_cmd`: The progress bar module. [`tqdm`](https://github.com/tqdm/tqdm) is enabled by default. To disable the progress bar, set it to `lambda x: x`.
|
|
122
|
+
|
|
123
|
+
#### Loading LoRA
|
|
124
|
+
|
|
125
|
+
We supports loading LoRA on top of the base model. For example, the following code loads a [Cheongsam LoRA](https://www.modelscope.cn/models/DonRat/MAJICFLUS_SuperChinesestyleheongsam) based on the [MajicFlus](https://www.modelscope.cn/models/MAILAND/majicflus_v1/summary?version=v1.0) model to generate images of cheongsams, which the base model might struggle to create.
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
from diffsynth_engine import fetch_model, FluxImagePipeline
|
|
129
|
+
|
|
130
|
+
model_path = fetch_model("MAILAND/majicflus_v1", path="majicflus_v134.safetensors")
|
|
131
|
+
lora_path = fetch_model("DonRat/MAJICFLUS_SuperChinesestyleheongsam", path="麦橘超国风旗袍.safetensors")
|
|
132
|
+
|
|
133
|
+
pipe = FluxImagePipeline.from_pretrained(model_path, device='cuda:0')
|
|
134
|
+
pipe.load_lora(path=lora_path, scale=1.0)
|
|
135
|
+
image = pipe(prompt="a girl, qipao")
|
|
136
|
+
image.save("image.png")
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
The `scale` parameter in the code controls the degree of influence the LoRA model has on the base model. A value of 1.0 is usually sufficient. When set to a value greater than 1, the LoRA's effect will be stronger, but this may cause artifacts or degradation in the image content. Please adjust this parameter with caution.
|
|
140
|
+
|
|
141
|
+
#### VRAM Optimization
|
|
142
|
+
|
|
143
|
+
DiffSynth-Engine supports various levels of VRAM optimization, allowing models to run on GPUs with low VRAM. For example, at `bfloat16` precision and with no optimization options enabled, the FLUX model requires 35.84GB of VRAM for inference. By adding the parameter `offload_mode="cpu_offload"`, the VRAM requirement drops to 22.83GB. Furthermore, using `offload_mode="sequential_cpu_offload"` reduces the requirement to just 4.30GB, although this comes with an increase of inference time.
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
from diffsynth_engine import fetch_model, FluxImagePipeline
|
|
147
|
+
|
|
148
|
+
model_path = fetch_model("MAILAND/majicflus_v1", path="majicflus_v134.safetensors")
|
|
149
|
+
pipe = FluxImagePipeline.from_pretrained(model_path, offload_mode="sequential_cpu_offload")
|
|
150
|
+
image = pipe(prompt="a cat")
|
|
151
|
+
image.save("image.png")
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Video Generation
|
|
155
|
+
|
|
156
|
+
DiffSynth-Engine also supports video generation. The following code loads the [Wan Video Generation Model](https://modelscope.cn/models/Wan-AI/Wan2.1-T2V-1.3B) and generates a video.
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
from diffsynth_engine.pipelines.wan_video import WanVideoPipeline, WanModelConfig
|
|
160
|
+
from diffsynth_engine.utils.video import save_video
|
|
161
|
+
from diffsynth_engine import fetch_model
|
|
162
|
+
|
|
163
|
+
config = WanModelConfig(
|
|
164
|
+
model_path=fetch_model("MusePublic/wan2.1-1.3b", path="dit.safetensors"),
|
|
165
|
+
vae_path=fetch_model("muse/wan2.1-vae", path="vae.safetensors"),
|
|
166
|
+
t5_path=fetch_model("muse/wan2.1-umt5", path="umt5.safetensors"),
|
|
167
|
+
)
|
|
168
|
+
pipe = WanVideoPipeline.from_pretrained(config, device="cuda")
|
|
169
|
+
# The prompt translates to: "A lively puppy runs quickly on a green lawn. The puppy has brownish-yellow fur,
|
|
170
|
+
# its two ears are perked up, and it looks focused and cheerful. Sunlight shines on it,
|
|
171
|
+
# making its fur look especially soft and shiny."
|
|
172
|
+
video = pipe(prompt="一只活泼的小狗在绿茵茵的草地上迅速奔跑。小狗毛色棕黄,两只耳朵立起,神情专注而欢快。阳光洒在它身上,使得毛发看上去格外柔软而闪亮。")
|
|
173
|
+
save_video(video, "video.mp4")
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
#### Detailed Parameters
|
|
177
|
+
|
|
178
|
+
In the video generation pipeline `pipe`, we can use the following parameters for fine-grained control:
|
|
179
|
+
|
|
180
|
+
* `prompt`: The prompt, used to describe the content of the generated video, e.g., "a cat".
|
|
181
|
+
* `negative_prompt`: The negative prompt, used to describe content you do not want in the video, e.g., "ugly".
|
|
182
|
+
* `cfg_scale`: The guidance scale for [Classifier-Free Guidance](https://arxiv.org/abs/2207.12598). A larger value usually results in stronger correlation between the text and the video but reduces the diversity of the generated content.
|
|
183
|
+
* `input_image`: Input image, only effective in image-to-video models, such as [Wan-AI/Wan2.1-I2V-14B-720P](https://modelscope.cn/models/Wan-AI/Wan2.1-I2V-14B-720P).
|
|
184
|
+
* `input_video`: Input video, used for video-to-video generation.
|
|
185
|
+
* `denoising_strength`: The denoising strength. When set to 1, a full generation process is performed. When set to a value between 0 and 1, some information from the input video is preserved.
|
|
186
|
+
* `height`: Video frame height.
|
|
187
|
+
* `width`: Video frame width.
|
|
188
|
+
* `num_frames`: Number of video frames.
|
|
189
|
+
* `num_inference_steps`: The number of inference steps. Generally, more steps lead to longer computation time but higher video quality.
|
|
190
|
+
* `tiled`: Whether to enable tiled processing for the VAE. This option is disabled by default. Enabling it can reduce VRAM usage.
|
|
191
|
+
* `tile_size`: The window size for tiled VAE processing.
|
|
192
|
+
* `tile_stride`: The stride for tiled VAE processing.
|
|
193
|
+
* `seed`: The random seed. A fixed seed ensures reproducible results.
|
|
194
|
+
|
|
195
|
+
#### Loading LoRA
|
|
196
|
+
|
|
197
|
+
We supports loading LoRA on top of the base model. For example, the following code loads a [High-Resolution Fix LoRA](https://modelscope.cn/models/DiffSynth-Studio/Wan2.1-1.3b-lora-highresfix-v1) on top of the [Wan2.1-T2V-1.3B](https://modelscope.cn/models/Wan-AI/Wan2.1-T2V-1.3B) model to improve the generation quality at high resolutions.
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
from diffsynth_engine.pipelines.wan_video import WanVideoPipeline, WanModelConfig
|
|
201
|
+
from diffsynth_engine.utils.video import save_video
|
|
202
|
+
from diffsynth_engine import fetch_model
|
|
203
|
+
|
|
204
|
+
config = WanModelConfig(
|
|
205
|
+
model_path=fetch_model("MusePublic/wan2.1-1.3b", path="dit.safetensors"),
|
|
206
|
+
vae_path=fetch_model("muse/wan2.1-vae", path="vae.safetensors"),
|
|
207
|
+
t5_path=fetch_model("muse/wan2.1-umt5", path="umt5.safetensors"),
|
|
208
|
+
)
|
|
209
|
+
lora_path = fetch_model("DiffSynth-Studio/Wan2.1-1.3b-lora-highresfix-v1", path="model.safetensors")
|
|
210
|
+
pipe = WanVideoPipeline.from_pretrained(config, device="cuda")
|
|
211
|
+
pipe.load_lora(path=lora_path, scale=1.0)
|
|
212
|
+
# The prompt translates to: "A lively puppy runs quickly on a green lawn. The puppy has brownish-yellow fur,
|
|
213
|
+
# its two ears are perked up, and it looks focused and cheerful. Sunlight shines on it,
|
|
214
|
+
# making its fur look especially soft and shiny."
|
|
215
|
+
video = pipe(prompt="一只活泼的小狗在绿茵茵的草地上迅速奔跑。小狗毛色棕黄,两只耳朵立起,神情专注而欢快。阳光洒在它身上,使得毛发看上去格外柔软而闪亮。")
|
|
216
|
+
save_video(video, "video.mp4")
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
The `scale` parameter in the code controls the degree of influence the LoRA model has on the base model. A value of 1.0 is usually sufficient. When set to a value greater than 1, the LoRA's effect will be stronger, but this may cause artifacts or degradation in the image content. Please adjust this parameter with caution.
|
|
220
|
+
|
|
221
|
+
#### Multi-GPU Parallelism
|
|
222
|
+
|
|
223
|
+
We supports multi-GPU parallel inference of the Wan2.1 model for faster video generation. Add the parameters `parallelism=4` (the number of GPUs to use) and `use_cfg_parallel=True` into the code to enable parallelism.
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
from diffsynth_engine.pipelines.wan_video import WanVideoPipeline, WanModelConfig
|
|
227
|
+
from diffsynth_engine.utils.video import save_video
|
|
228
|
+
from diffsynth_engine import fetch_model
|
|
229
|
+
|
|
230
|
+
config = WanModelConfig(
|
|
231
|
+
model_path=fetch_model("MusePublic/wan2.1-1.3b", path="dit.safetensors"),
|
|
232
|
+
vae_path=fetch_model("muse/wan2.1-vae", path="vae.safetensors"),
|
|
233
|
+
t5_path=fetch_model("muse/wan2.1-umt5", path="umt5.safetensors"),
|
|
234
|
+
)
|
|
235
|
+
pipe = WanVideoPipeline.from_pretrained(config, device="cuda", parallelism=4, use_cfg_parallel=True)
|
|
236
|
+
# The prompt translates to: "A lively puppy runs quickly on a green lawn. The puppy has brownish-yellow fur,
|
|
237
|
+
# its two ears are perked up, and it looks focused and cheerful. Sunlight shines on it,
|
|
238
|
+
# making its fur look especially soft and shiny."
|
|
239
|
+
video = pipe(prompt="一只活泼的小狗在绿茵茵的草地上迅速奔跑。小狗毛色棕黄,两只耳朵立起,神情专注而欢快。阳光洒在它身上,使得毛发看上去格外柔软而闪亮。")
|
|
240
|
+
save_video(video, "video.mp4")
|
|
241
|
+
```
|
|
@@ -35,6 +35,31 @@ from diffsynth_engine import fetch_model
|
|
|
35
35
|
model_path = fetch_model("MAILAND/majicflus_v1", path="majicflus_v134.safetensors")
|
|
36
36
|
```
|
|
37
37
|
|
|
38
|
+

|
|
39
|
+
|
|
40
|
+
对于模型分片的情况,可以通过 `path` 参数指定多个文件。
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from diffsynth_engine import fetch_model
|
|
44
|
+
|
|
45
|
+
model_path = fetch_model("Wan-AI/Wan2.1-T2V-14B", path=[
|
|
46
|
+
"diffusion_pytorch_model-00001-of-00006.safetensors",
|
|
47
|
+
"diffusion_pytorch_model-00002-of-00006.safetensors",
|
|
48
|
+
"diffusion_pytorch_model-00003-of-00006.safetensors",
|
|
49
|
+
"diffusion_pytorch_model-00004-of-00006.safetensors",
|
|
50
|
+
"diffusion_pytorch_model-00005-of-00006.safetensors",
|
|
51
|
+
"diffusion_pytorch_model-00006-of-00006.safetensors",
|
|
52
|
+
])
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
也支持使用通配符来匹配多个文件。
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from diffsynth_engine import fetch_model
|
|
59
|
+
|
|
60
|
+
model_path = fetch_model("Wan-AI/Wan2.1-T2V-14B", path="diffusion_pytorch_model*.safetensors")
|
|
61
|
+
```
|
|
62
|
+
|
|
38
63
|
`fetch_model` 函数返回的文件路径 `model_path` 即为下载后的文件路径。
|
|
39
64
|
|
|
40
65
|
## 模型类型
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
# ToDo
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{diffsynth_engine-0.3.6.dev2 → diffsynth_engine-0.3.6.dev5}/diffsynth_engine/algorithm/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|