diffsynth-engine 0.7.1.dev1__tar.gz → 0.7.1.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/PKG-INFO +1 -1
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/noise_scheduler/base_scheduler.py +4 -1
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/z_image/qwen3.py +4 -1
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/z_image/z_image_dit.py +2 -1
- diffsynth_engine-0.7.1.dev3/diffsynth_engine/tools/qwen_image_upscaler_tool.py +340 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/image.py +84 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine.egg-info/PKG-INFO +1 -1
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine.egg-info/SOURCES.txt +1 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/.gitattributes +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/.gitignore +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/.pre-commit-config.yaml +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/LICENSE +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/MANIFEST.in +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/README.md +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/assets/dingtalk.png +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/assets/showcase.jpeg +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/assets/tongyi.svg +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/noise_scheduler/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/noise_scheduler/flow_match/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/noise_scheduler/flow_match/flow_beta.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/noise_scheduler/flow_match/flow_ddim.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/noise_scheduler/flow_match/recifited_flow.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/beta.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/ddim.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/exponential.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/karras.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/linear.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/noise_scheduler/stable_diffusion/sgm_uniform.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/sampler/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/sampler/flow_match/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/sampler/flow_match/flow_match_euler.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/sampler/stable_diffusion/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/sampler/stable_diffusion/brownian_tree.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/sampler/stable_diffusion/ddpm.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/sampler/stable_diffusion/deis.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/sampler/stable_diffusion/dpmpp_2m.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/sampler/stable_diffusion/dpmpp_2m_sde.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/sampler/stable_diffusion/dpmpp_3m_sde.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/sampler/stable_diffusion/epsilon.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/sampler/stable_diffusion/euler.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/sampler/stable_diffusion/euler_ancestral.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/components/vae.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/flux/flux_dit.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/flux/flux_text_encoder.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/flux/flux_vae.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/flux2/qwen3_8B_config.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/qwen_image/qwen2_5_vl_config.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/qwen_image/qwen2_5_vl_vision_config.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/qwen_image/qwen_image_vae.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/qwen_image/qwen_image_vae_keymap.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/sd/sd_text_encoder.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/sd/sd_unet.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/sd3/sd3_dit.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/sd3/sd3_text_encoder.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/sdxl/sdxl_text_encoder.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/sdxl/sdxl_unet.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/wan/dit/wan2.1_flf2v_14b.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/wan/dit/wan2.1_i2v_14b.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/wan/dit/wan2.1_t2v_1.3b.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/wan/dit/wan2.1_t2v_14b.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/wan/dit/wan2.2_i2v_a14b.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/wan/dit/wan2.2_s2v_14b.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/wan/dit/wan2.2_t2v_a14b.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/wan/dit/wan2.2_ti2v_5b.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/wan/dit/wan_dit_keymap.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/wan/vae/wan2.1_vae.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/wan/vae/wan2.2_vae.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/wan/vae/wan_vae_keymap.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/models/z_image/qwen3_config.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/merges.txt +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/special_tokens_map.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/tokenizer_config.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/flux/tokenizer_1/vocab.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/special_tokens_map.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/spiece.model +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/tokenizer.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/flux/tokenizer_2/tokenizer_config.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/qwen_image/qwen2_vl_image_processor.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/qwen_image/tokenizer/added_tokens.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/qwen_image/tokenizer/merges.txt +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/qwen_image/tokenizer/special_tokens_map.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/qwen_image/tokenizer/tokenizer.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/qwen_image/tokenizer/tokenizer_config.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/qwen_image/tokenizer/vocab.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/merges.txt +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/special_tokens_map.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/tokenizer_config.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer/vocab.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/merges.txt +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/special_tokens_map.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/tokenizer_config.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/sdxl/tokenizer_2/vocab.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/special_tokens_map.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/spiece.model +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer_config.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/z_image/tokenizer/merges.txt +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/z_image/tokenizer/tokenizer.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/z_image/tokenizer/tokenizer_config.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/conf/tokenizers/z_image/tokenizer/vocab.json +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/configs/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/configs/controlnet.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/configs/pipeline.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/kernels/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/base.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/basic/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/basic/attention.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/basic/lora.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/basic/lora_nunchaku.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/basic/relative_position_emb.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/basic/timestep.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/basic/transformer_helper.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/basic/unet_helper.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/basic/video_sparse_attention.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/flux/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/flux/flux_controlnet.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/flux/flux_dit.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/flux/flux_dit_fbcache.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/flux/flux_ipadapter.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/flux/flux_redux.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/flux/flux_text_encoder.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/flux/flux_vae.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/flux2/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/flux2/flux2_dit.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/flux2/flux2_vae.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/hunyuan3d/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/hunyuan3d/dino_image_encoder.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/hunyuan3d/hunyuan3d_dit.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/hunyuan3d/hunyuan3d_vae.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/hunyuan3d/moe.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/hunyuan3d/surface_extractor.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/hunyuan3d/volume_decoder.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/qwen_image/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/qwen_image/qwen2_5_vl.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/qwen_image/qwen_image_dit.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/qwen_image/qwen_image_dit_fbcache.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/qwen_image/qwen_image_dit_nunchaku.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/qwen_image/qwen_image_vae.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sd/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sd/sd_controlnet.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sd/sd_text_encoder.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sd/sd_unet.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sd/sd_vae.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sd3/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sd3/sd3_dit.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sd3/sd3_text_encoder.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sd3/sd3_vae.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sdxl/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sdxl/sdxl_controlnet.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sdxl/sdxl_text_encoder.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sdxl/sdxl_unet.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/sdxl/sdxl_vae.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/text_encoder/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/text_encoder/clip.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/text_encoder/siglip.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/text_encoder/t5.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/vae/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/vae/vae.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/wan/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/wan/wan_audio_encoder.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/wan/wan_dit.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/wan/wan_image_encoder.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/wan/wan_s2v_dit.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/wan/wan_text_encoder.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/wan/wan_vae.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/z_image/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/z_image/siglip.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/z_image/z_image_dit_omni_base.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/base.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/flux2_klein_image.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/flux_image.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/hunyuan3d_shape.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/qwen_image.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/sd_image.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/sdxl_image.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/utils.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/wan_dmd.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/wan_s2v.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/wan_video.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/z_image.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/pipelines/z_image_omni_base.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/processor/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/processor/canny_processor.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/processor/depth_processor.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/tokenizers/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/tokenizers/base.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/tokenizers/clip.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/tokenizers/qwen2.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/tokenizers/qwen2_vl_image_processor.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/tokenizers/qwen2_vl_processor.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/tokenizers/t5.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/tokenizers/wan.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/tools/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/tools/flux_inpainting_tool.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/tools/flux_outpainting_tool.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/tools/flux_reference_tool.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/tools/flux_replace_tool.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/cache.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/constants.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/download.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/env.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/flag.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/fp8_linear.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/gguf.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/loader.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/lock.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/logging.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/memory/__init__.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/memory/linear_regression.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/memory/memory_predcit_model.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/offload.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/onnx.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/parallel.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/platform.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/process_group.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/prompt.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/utils/video.py +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine.egg-info/dependency_links.txt +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine.egg-info/requires.txt +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine.egg-info/top_level.txt +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/docs/tutorial.md +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/docs/tutorial_zh.md +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/pyproject.toml +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/setup.cfg +0 -0
- {diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/setup.py +0 -0
|
@@ -19,7 +19,10 @@ class BaseScheduler:
|
|
|
19
19
|
def update_config(self, config_dict):
|
|
20
20
|
for config_name, new_value in config_dict.items():
|
|
21
21
|
if hasattr(self, config_name):
|
|
22
|
-
|
|
22
|
+
actual_value = new_value
|
|
23
|
+
if isinstance(actual_value, str) and actual_value.lower() == "none":
|
|
24
|
+
actual_value = None
|
|
25
|
+
setattr(self, config_name, actual_value)
|
|
23
26
|
|
|
24
27
|
def restore_config(self):
|
|
25
28
|
for config_name, config_value in self._stored_config.items():
|
{diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/models/z_image/qwen3.py
RENAMED
|
@@ -59,8 +59,11 @@ class Qwen3Model(PreTrainedModel):
|
|
|
59
59
|
device: str = "cuda:0",
|
|
60
60
|
dtype: torch.dtype = torch.bfloat16,
|
|
61
61
|
):
|
|
62
|
-
|
|
62
|
+
with torch.device("meta"):
|
|
63
|
+
model = cls(config=config, device="meta", dtype=dtype)
|
|
63
64
|
model.requires_grad_(False)
|
|
65
|
+
|
|
66
|
+
model.rotary_emb = Qwen3RotaryEmbedding(config=config, device=device)
|
|
64
67
|
model.load_state_dict(state_dict, assign=True)
|
|
65
68
|
model.to(device=device, dtype=dtype, non_blocking=True)
|
|
66
69
|
return model
|
|
@@ -584,7 +584,8 @@ class ZImageDiT(PreTrainedModel):
|
|
|
584
584
|
dtype: torch.dtype,
|
|
585
585
|
**kwargs,
|
|
586
586
|
):
|
|
587
|
-
|
|
587
|
+
with torch.device("meta"):
|
|
588
|
+
model = cls(device="meta", dtype=dtype, **kwargs)
|
|
588
589
|
model = model.requires_grad_(False)
|
|
589
590
|
model.load_state_dict(state_dict, assign=True)
|
|
590
591
|
model.to(device=device, dtype=dtype, non_blocking=True)
|
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
import math
|
|
4
|
+
import numpy as np
|
|
5
|
+
from typing import Literal, Optional, Dict
|
|
6
|
+
from copy import deepcopy
|
|
7
|
+
from PIL import Image
|
|
8
|
+
from einops import rearrange, repeat
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
|
|
11
|
+
from diffsynth_engine.configs import QwenImagePipelineConfig
|
|
12
|
+
from diffsynth_engine.pipelines.qwen_image import QwenImagePipeline
|
|
13
|
+
from diffsynth_engine.models.qwen_image import QwenImageVAE
|
|
14
|
+
from diffsynth_engine.models.basic.lora import LoRALinear
|
|
15
|
+
from diffsynth_engine.models.qwen_image.qwen_image_dit import QwenImageTransformerBlock
|
|
16
|
+
from diffsynth_engine.utils import logging
|
|
17
|
+
from diffsynth_engine.utils.loader import load_file
|
|
18
|
+
from diffsynth_engine.utils.download import fetch_model
|
|
19
|
+
from diffsynth_engine.utils.image import adain_color_fix, wavelet_color_fix
|
|
20
|
+
|
|
21
|
+
logger = logging.get_logger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@contextmanager
|
|
25
|
+
def odtsr_forward():
|
|
26
|
+
"""
|
|
27
|
+
Context manager for ODTSR forward pass optimization.
|
|
28
|
+
|
|
29
|
+
Replaces two methods:
|
|
30
|
+
1. LoRALinear.forward - to support batch CFG with dual outputs
|
|
31
|
+
2. QwenImageTransformerBlock._modulate - optimized version without repeat_interleave
|
|
32
|
+
"""
|
|
33
|
+
original_lora_forward = LoRALinear.forward
|
|
34
|
+
original_modulate = QwenImageTransformerBlock._modulate
|
|
35
|
+
|
|
36
|
+
def lora_batch_cfg_forward(self, x):
|
|
37
|
+
y = nn.Linear.forward(self, x)
|
|
38
|
+
if len(self._lora_dict) < 1:
|
|
39
|
+
return y
|
|
40
|
+
if x.ndim == 2:
|
|
41
|
+
y2 = y.clone()
|
|
42
|
+
for name, lora in self._lora_dict.items():
|
|
43
|
+
y2 += lora(x)
|
|
44
|
+
return torch.stack([y, y2], dim=1)
|
|
45
|
+
else:
|
|
46
|
+
L2 = x.shape[1]
|
|
47
|
+
L = L2 // 2
|
|
48
|
+
x2 = x[:, L:, :]
|
|
49
|
+
for name, lora in self._lora_dict.items():
|
|
50
|
+
y[:, L:] += lora(x2)
|
|
51
|
+
return y
|
|
52
|
+
|
|
53
|
+
def optimized_modulate(self, x, mod_params, index=None):
|
|
54
|
+
if mod_params.ndim == 2:
|
|
55
|
+
shift, scale, gate = mod_params.chunk(3, dim=-1)
|
|
56
|
+
return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1), gate.unsqueeze(1)
|
|
57
|
+
else:
|
|
58
|
+
B, L2, C = x.shape
|
|
59
|
+
L = L2 // 2
|
|
60
|
+
shift, scale, gate = mod_params.chunk(3, dim=-1) # Each: [B, 2, dim]
|
|
61
|
+
|
|
62
|
+
result = torch.empty_like(x)
|
|
63
|
+
gate_result = torch.empty(B, L2, gate.shape[-1], dtype=x.dtype, device=x.device)
|
|
64
|
+
|
|
65
|
+
result[:, :L] = x[:, :L] * (1 + scale[:, 0:1]) + shift[:, 0:1]
|
|
66
|
+
gate_result[:, :L] = gate[:, 0:1].expand(-1, L, -1)
|
|
67
|
+
|
|
68
|
+
result[:, L:] = x[:, L:] * (1 + scale[:, 1:2]) + shift[:, 1:2]
|
|
69
|
+
gate_result[:, L:] = gate[:, 1:2].expand(-1, L, -1)
|
|
70
|
+
|
|
71
|
+
return result, gate_result
|
|
72
|
+
|
|
73
|
+
LoRALinear.forward = lora_batch_cfg_forward
|
|
74
|
+
QwenImageTransformerBlock._modulate = optimized_modulate
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
yield
|
|
78
|
+
finally:
|
|
79
|
+
LoRALinear.forward = original_lora_forward
|
|
80
|
+
QwenImageTransformerBlock._modulate = original_modulate
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class QwenImageUpscalerTool:
|
|
84
|
+
"""
|
|
85
|
+
Tool for ODTSR (One-step Diffusion Transformer Super Resolution) image upscaling.
|
|
86
|
+
https://huggingface.co/double8fun/ODTSR
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def __init__(
|
|
90
|
+
self,
|
|
91
|
+
pipeline: QwenImagePipeline,
|
|
92
|
+
odtsr_weight_path: Optional[str] = None,
|
|
93
|
+
):
|
|
94
|
+
self.pipe = pipeline
|
|
95
|
+
self.device = self.pipe.device
|
|
96
|
+
self.dtype = self.pipe.dtype
|
|
97
|
+
|
|
98
|
+
# to avoid "small grid" artifacts in generated images
|
|
99
|
+
self._convert_dit_part_linear_weight()
|
|
100
|
+
|
|
101
|
+
if not odtsr_weight_path:
|
|
102
|
+
odtsr_weight_path = fetch_model("muse/ODTSR", revision="master", path="weight.safetensors")
|
|
103
|
+
odtsr_state_dict = load_file(odtsr_weight_path)
|
|
104
|
+
lora_state_dict = self._convert_odtsr_lora(odtsr_state_dict)
|
|
105
|
+
lora_state_dict_list = [(lora_state_dict, 1.0, odtsr_weight_path)]
|
|
106
|
+
self.pipe._load_lora_state_dicts(lora_state_dict_list, fused=False, save_original_weight=False)
|
|
107
|
+
|
|
108
|
+
self.new_vae = deepcopy(self.pipe.vae)
|
|
109
|
+
self._load_vae_encoder_weights(odtsr_state_dict)
|
|
110
|
+
|
|
111
|
+
sigmas = torch.linspace(1.0, 0.0, 1000 + 1)[:-1]
|
|
112
|
+
mu = 0.8
|
|
113
|
+
shift_terminal = 0.02
|
|
114
|
+
sigmas = math.exp(mu) / (math.exp(mu) + (1 / sigmas - 1))
|
|
115
|
+
one_minus_sigmas = 1 - sigmas
|
|
116
|
+
scale_factor = one_minus_sigmas[-1] / (1 - shift_terminal)
|
|
117
|
+
self.sigmas = 1 - (one_minus_sigmas / scale_factor)
|
|
118
|
+
self.sigmas = self.sigmas.to(device=self.device)
|
|
119
|
+
self.timesteps = self.sigmas * self.pipe.noise_scheduler.num_train_timesteps
|
|
120
|
+
self.timesteps = self.timesteps.to(device=self.device)
|
|
121
|
+
self.start_timestep = 750
|
|
122
|
+
self.fixed_timestep = self.timesteps[self.start_timestep].to(device=self.device)
|
|
123
|
+
self.one_step_sigma = self.sigmas[self.start_timestep].to(device=self.device)
|
|
124
|
+
|
|
125
|
+
self.prompt = "High Contrast, hyper detailed photo, 2k UHD"
|
|
126
|
+
self.prompt_emb, self.prompt_emb_mask = self.pipe.encode_prompt(self.prompt, 1, 4096)
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def from_pretrained(
|
|
130
|
+
cls,
|
|
131
|
+
qwen_model_path: str,
|
|
132
|
+
odtsr_weight_path: Optional[str] = None,
|
|
133
|
+
device: str = "cuda",
|
|
134
|
+
dtype: torch.dtype = torch.bfloat16,
|
|
135
|
+
):
|
|
136
|
+
config = QwenImagePipelineConfig(
|
|
137
|
+
model_path=qwen_model_path,
|
|
138
|
+
model_dtype=dtype,
|
|
139
|
+
device=device,
|
|
140
|
+
load_encoder=True,
|
|
141
|
+
)
|
|
142
|
+
pipe = QwenImagePipeline.from_pretrained(config)
|
|
143
|
+
return cls(pipe, odtsr_weight_path)
|
|
144
|
+
|
|
145
|
+
def _convert_dit_part_linear_weight(self):
|
|
146
|
+
"""
|
|
147
|
+
Perform dtype conversion on weights of specific Linear layers in the DIT model.
|
|
148
|
+
|
|
149
|
+
This is an important trick: for Linear layers NOT in the patterns list, convert their weights
|
|
150
|
+
to float8_e4m3fn first, then convert back to the original dtype (typically bfloat16). This operation
|
|
151
|
+
matches the weight processing method used during training to avoid "small grid" artifacts in generated images.
|
|
152
|
+
|
|
153
|
+
Layers in the patterns list (such as LoRA-related layers) are skipped and their original weights remain unchanged.
|
|
154
|
+
"""
|
|
155
|
+
patterns = [
|
|
156
|
+
"img_in",
|
|
157
|
+
"img_mod.1",
|
|
158
|
+
"attn.to_q",
|
|
159
|
+
"attn.to_k",
|
|
160
|
+
"attn.to_v",
|
|
161
|
+
"to_out",
|
|
162
|
+
"img_mlp.net.0.proj",
|
|
163
|
+
"img_mlp.net.2",
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
def _convert_weight(parent: nn.Module, name_prefix: str = ""):
|
|
167
|
+
for name, module in list(parent.named_children()):
|
|
168
|
+
full_name = f"{name_prefix}{name}"
|
|
169
|
+
if isinstance(module, torch.nn.Linear):
|
|
170
|
+
if not any(p in full_name for p in patterns):
|
|
171
|
+
origin_dtype = module.weight.data.dtype
|
|
172
|
+
module.weight.data = module.weight.data.to(torch.float8_e4m3fn)
|
|
173
|
+
module.weight.data = module.weight.data.to(origin_dtype)
|
|
174
|
+
if module.bias is not None:
|
|
175
|
+
module.bias.data = module.bias.data.to(torch.float8_e4m3fn)
|
|
176
|
+
module.bias.data = module.bias.data.to(origin_dtype)
|
|
177
|
+
else:
|
|
178
|
+
_convert_weight(module, name_prefix=full_name + ".")
|
|
179
|
+
|
|
180
|
+
_convert_weight(self.pipe.dit)
|
|
181
|
+
|
|
182
|
+
def _convert_odtsr_lora(self, odtsr_state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
|
|
183
|
+
state_dict = {}
|
|
184
|
+
for key, param in odtsr_state_dict.items():
|
|
185
|
+
if "lora_A2" in key:
|
|
186
|
+
lora_b_key = key.replace("lora_A2", "lora_B2")
|
|
187
|
+
lora_b_param = odtsr_state_dict[lora_b_key]
|
|
188
|
+
|
|
189
|
+
lora_a_key = key.replace("lora_A2", "lora_A").replace("pipe.dit.", "")
|
|
190
|
+
lora_b_key = lora_b_key.replace("lora_B2", "lora_B").replace("pipe.dit.", "")
|
|
191
|
+
state_dict[lora_a_key] = param
|
|
192
|
+
state_dict[lora_b_key] = lora_b_param
|
|
193
|
+
|
|
194
|
+
return state_dict
|
|
195
|
+
|
|
196
|
+
def _load_vae_encoder_weights(self, state_dict: Dict[str, torch.Tensor]):
|
|
197
|
+
try:
|
|
198
|
+
vae_state_dict = {}
|
|
199
|
+
for k, v in state_dict.items():
|
|
200
|
+
if 'pipe.new_vae.' in k:
|
|
201
|
+
new_key = k.replace('pipe.new_vae.', '')
|
|
202
|
+
vae_state_dict[new_key] = v
|
|
203
|
+
if vae_state_dict:
|
|
204
|
+
self.new_vae.load_state_dict(vae_state_dict, strict=False)
|
|
205
|
+
logger.info(f"Loaded {len(vae_state_dict)} trained VAE encoder parameters")
|
|
206
|
+
else:
|
|
207
|
+
logger.warning(f"No 'pipe.new_vae.' weights found, using original VAE")
|
|
208
|
+
except Exception as e:
|
|
209
|
+
logger.error(f"Failed to load VAE encoder weights: {e}")
|
|
210
|
+
raise e
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def add_noise(self, sample: torch.Tensor, noise: torch.Tensor, timestep: torch.Tensor) -> torch.Tensor:
|
|
214
|
+
timestep_id = torch.argmin((self.timesteps - timestep).abs())
|
|
215
|
+
sigma = self.sigmas[timestep_id]
|
|
216
|
+
sample = (1 - sigma) * sample + sigma * noise
|
|
217
|
+
return sample
|
|
218
|
+
|
|
219
|
+
def preprocess_image(self, image: Image.Image) -> torch.Tensor:
|
|
220
|
+
image = torch.Tensor(np.array(image, dtype=np.float32))
|
|
221
|
+
image = image.to(dtype=self.dtype, device=self.device)
|
|
222
|
+
image = image * (2 / 255) - 1
|
|
223
|
+
image = repeat(image, f"H W C -> B C H W", **({"B": 1}))
|
|
224
|
+
return image
|
|
225
|
+
|
|
226
|
+
def _prepare_condition_latents(self, image: Image.Image, vae: QwenImageVAE, vae_tiled: bool) -> torch.Tensor:
|
|
227
|
+
image_tensor = self.preprocess_image(image).to(dtype=self.pipe.config.vae_dtype)
|
|
228
|
+
image_tensor = image_tensor.unsqueeze(2)
|
|
229
|
+
|
|
230
|
+
latents = vae.encode(
|
|
231
|
+
image_tensor,
|
|
232
|
+
device=self.device,
|
|
233
|
+
tiled=vae_tiled,
|
|
234
|
+
tile_size=self.pipe.vae_tile_size,
|
|
235
|
+
tile_stride=self.pipe.vae_tile_stride,
|
|
236
|
+
)
|
|
237
|
+
latents = latents.squeeze(2).to(device=self.device, dtype=self.dtype)
|
|
238
|
+
return latents
|
|
239
|
+
|
|
240
|
+
def _single_step_denoise(
|
|
241
|
+
self,
|
|
242
|
+
latents: torch.Tensor,
|
|
243
|
+
image_latents: torch.Tensor,
|
|
244
|
+
noise: torch.Tensor,
|
|
245
|
+
prompt_emb: torch.Tensor,
|
|
246
|
+
prompt_emb_mask: torch.Tensor,
|
|
247
|
+
fidelity: float,
|
|
248
|
+
) -> torch.Tensor:
|
|
249
|
+
fidelity_timestep_id = int(self.start_timestep + fidelity * (1000 - self.start_timestep) + 0.5)
|
|
250
|
+
if fidelity_timestep_id != 1000:
|
|
251
|
+
fidelity_timestep = self.timesteps[fidelity_timestep_id].to(device=self.device)
|
|
252
|
+
image_latents = self.add_noise(image_latents, noise, fidelity_timestep)
|
|
253
|
+
|
|
254
|
+
latents = self.add_noise(latents, noise, self.fixed_timestep)
|
|
255
|
+
|
|
256
|
+
with odtsr_forward():
|
|
257
|
+
noise_pred = self.pipe.predict_noise_with_cfg(
|
|
258
|
+
latents=latents,
|
|
259
|
+
image_latents=[image_latents],
|
|
260
|
+
timestep=self.fixed_timestep.unsqueeze(0),
|
|
261
|
+
prompt_emb=prompt_emb,
|
|
262
|
+
prompt_emb_mask=prompt_emb_mask,
|
|
263
|
+
negative_prompt_emb=None,
|
|
264
|
+
negative_prompt_emb_mask=None,
|
|
265
|
+
context_latents=None,
|
|
266
|
+
entity_prompt_embs=None,
|
|
267
|
+
entity_prompt_emb_masks=None,
|
|
268
|
+
negative_entity_prompt_embs=None,
|
|
269
|
+
negative_entity_prompt_emb_masks=None,
|
|
270
|
+
entity_masks=None,
|
|
271
|
+
cfg_scale=1.0,
|
|
272
|
+
batch_cfg=self.pipe.config.batch_cfg,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
denoised = latents + (0 - self.one_step_sigma) * noise_pred
|
|
276
|
+
return denoised
|
|
277
|
+
|
|
278
|
+
@torch.no_grad()
|
|
279
|
+
def __call__(
|
|
280
|
+
self,
|
|
281
|
+
image: Image.Image,
|
|
282
|
+
scale: int = 2,
|
|
283
|
+
prompt: str = "High Contrast, hyper detailed photo, 2k UHD",
|
|
284
|
+
fidelity: float = 1.0,
|
|
285
|
+
align_method: Literal["none", "adain", "wavelet"] = "none",
|
|
286
|
+
) -> Image.Image:
|
|
287
|
+
width, height = image.size
|
|
288
|
+
target_width, target_height = width * scale, height * scale
|
|
289
|
+
target_width_round = target_width // 16 * 16
|
|
290
|
+
target_height_round = target_height // 16 * 16
|
|
291
|
+
logger.info(f"Upscaling image from {width}x{height} to {target_width}x{target_height}")
|
|
292
|
+
vae_tiled = (target_width_round * target_height_round > 2048 * 2048)
|
|
293
|
+
|
|
294
|
+
resized_image = image.resize((target_width_round, target_height_round), Image.BICUBIC)
|
|
295
|
+
|
|
296
|
+
condition_latents = self._prepare_condition_latents(resized_image, self.pipe.vae, vae_tiled)
|
|
297
|
+
latents = self._prepare_condition_latents(resized_image, self.new_vae, vae_tiled)
|
|
298
|
+
|
|
299
|
+
noise = self.pipe.generate_noise(
|
|
300
|
+
(1, 16, target_height_round // 8, target_width_round // 8),
|
|
301
|
+
seed=42,
|
|
302
|
+
device=self.device,
|
|
303
|
+
dtype=self.dtype
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
prompt_emb, prompt_emb_mask = self.prompt_emb, self.prompt_emb_mask
|
|
307
|
+
if prompt != self.prompt:
|
|
308
|
+
prompt_emb, prompt_emb_mask = self.pipe.encode_prompt(prompt, 1, 4096)
|
|
309
|
+
|
|
310
|
+
denoised_latents = self._single_step_denoise(
|
|
311
|
+
latents=latents,
|
|
312
|
+
noise=noise,
|
|
313
|
+
image_latents=condition_latents,
|
|
314
|
+
prompt_emb=prompt_emb,
|
|
315
|
+
prompt_emb_mask=prompt_emb_mask,
|
|
316
|
+
fidelity=fidelity,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# Decode
|
|
320
|
+
denoised_latents = rearrange(denoised_latents, "B C H W -> B C 1 H W")
|
|
321
|
+
vae_output = rearrange(
|
|
322
|
+
self.pipe.vae.decode(
|
|
323
|
+
denoised_latents.to(self.pipe.vae.model.encoder.conv1.weight.dtype),
|
|
324
|
+
device=self.pipe.vae.model.encoder.conv1.weight.device,
|
|
325
|
+
tiled=vae_tiled,
|
|
326
|
+
tile_size=self.pipe.vae_tile_size,
|
|
327
|
+
tile_stride=self.pipe.vae_tile_stride,
|
|
328
|
+
)[0],
|
|
329
|
+
"C B H W -> B C H W",
|
|
330
|
+
)
|
|
331
|
+
result_image = self.pipe.vae_output_to_image(vae_output)
|
|
332
|
+
self.pipe.model_lifecycle_finish(["vae"])
|
|
333
|
+
|
|
334
|
+
if align_method == "adain":
|
|
335
|
+
result_image = adain_color_fix(target=result_image, source=resized_image)
|
|
336
|
+
elif align_method == "wavelet":
|
|
337
|
+
result_image = wavelet_color_fix(target=result_image, source=resized_image)
|
|
338
|
+
|
|
339
|
+
result_image = result_image.resize((target_width, target_height), Image.BICUBIC)
|
|
340
|
+
return result_image
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import torch
|
|
2
2
|
from torchvision import transforms
|
|
3
|
+
from torchvision.transforms import ToTensor, ToPILImage
|
|
3
4
|
import numpy as np
|
|
4
5
|
import math
|
|
5
6
|
from PIL import Image
|
|
6
7
|
from enum import Enum
|
|
7
8
|
from typing import List, Tuple, Optional
|
|
9
|
+
from torch import Tensor
|
|
10
|
+
from torch.nn import functional as F
|
|
8
11
|
|
|
9
12
|
from diffsynth_engine.utils import logging
|
|
10
13
|
|
|
@@ -243,3 +246,84 @@ def _need_rescale_pil_conversion(image: np.ndarray) -> bool:
|
|
|
243
246
|
f"got [{image.min()}, {image.max()}] which cannot be converted to uint8."
|
|
244
247
|
)
|
|
245
248
|
return do_rescale
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
# --------------------------------------------------------------------------------
|
|
252
|
+
# Color Alignment Functions
|
|
253
|
+
# Based on Li Yi's implementation: https://github.com/pkuliyi2015/sd-webui-stablesr
|
|
254
|
+
# --------------------------------------------------------------------------------
|
|
255
|
+
def calc_mean_std(feat: Tensor, eps=1e-5):
|
|
256
|
+
size = feat.size()
|
|
257
|
+
assert len(size) == 4, 'The input feature should be 4D tensor.'
|
|
258
|
+
b, c = size[:2]
|
|
259
|
+
feat_var = feat.reshape(b, c, -1).var(dim=2) + eps
|
|
260
|
+
feat_std = feat_var.sqrt().reshape(b, c, 1, 1)
|
|
261
|
+
feat_mean = feat.reshape(b, c, -1).mean(dim=2).reshape(b, c, 1, 1)
|
|
262
|
+
return feat_mean, feat_std
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def adaptive_instance_normalization(content_feat: Tensor, style_feat: Tensor):
|
|
266
|
+
size = content_feat.size()
|
|
267
|
+
style_mean, style_std = calc_mean_std(style_feat)
|
|
268
|
+
content_mean, content_std = calc_mean_std(content_feat)
|
|
269
|
+
normalized_feat = (content_feat - content_mean.expand(size)) / content_std.expand(size)
|
|
270
|
+
return normalized_feat * style_std.expand(size) + style_mean.expand(size)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def wavelet_blur(image: Tensor, radius: int):
|
|
274
|
+
kernel_vals = [
|
|
275
|
+
[0.0625, 0.125, 0.0625],
|
|
276
|
+
[0.125, 0.25, 0.125],
|
|
277
|
+
[0.0625, 0.125, 0.0625],
|
|
278
|
+
]
|
|
279
|
+
kernel = torch.tensor(kernel_vals, dtype=image.dtype, device=image.device)
|
|
280
|
+
kernel = kernel[None, None]
|
|
281
|
+
kernel = kernel.repeat(3, 1, 1, 1)
|
|
282
|
+
image = F.pad(image, (radius, radius, radius, radius), mode='replicate')
|
|
283
|
+
output = F.conv2d(image, kernel, groups=3, dilation=radius)
|
|
284
|
+
return output
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def wavelet_decomposition(image: Tensor, levels=5):
|
|
288
|
+
high_freq = torch.zeros_like(image)
|
|
289
|
+
for i in range(levels):
|
|
290
|
+
radius = 2 ** i
|
|
291
|
+
low_freq = wavelet_blur(image, radius)
|
|
292
|
+
high_freq += (image - low_freq)
|
|
293
|
+
image = low_freq
|
|
294
|
+
|
|
295
|
+
return high_freq, low_freq
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def wavelet_reconstruction(content_feat: Tensor, style_feat: Tensor):
|
|
299
|
+
content_high_freq, content_low_freq = wavelet_decomposition(content_feat)
|
|
300
|
+
del content_low_freq
|
|
301
|
+
style_high_freq, style_low_freq = wavelet_decomposition(style_feat)
|
|
302
|
+
del style_high_freq
|
|
303
|
+
return content_high_freq + style_low_freq
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def adain_color_fix(target: Image.Image, source: Image.Image) -> Image.Image:
|
|
307
|
+
to_tensor = ToTensor()
|
|
308
|
+
target_tensor = to_tensor(target).unsqueeze(0)
|
|
309
|
+
source_tensor = to_tensor(source).unsqueeze(0)
|
|
310
|
+
|
|
311
|
+
result_tensor = adaptive_instance_normalization(target_tensor, source_tensor)
|
|
312
|
+
|
|
313
|
+
to_image = ToPILImage()
|
|
314
|
+
result_image = to_image(result_tensor.squeeze(0).clamp_(0.0, 1.0))
|
|
315
|
+
|
|
316
|
+
return result_image
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def wavelet_color_fix(target: Image.Image, source: Image.Image) -> Image.Image:
|
|
320
|
+
to_tensor = ToTensor()
|
|
321
|
+
target_tensor = to_tensor(target).unsqueeze(0)
|
|
322
|
+
source_tensor = to_tensor(source).unsqueeze(0)
|
|
323
|
+
|
|
324
|
+
result_tensor = wavelet_reconstruction(target_tensor, source_tensor)
|
|
325
|
+
|
|
326
|
+
to_image = ToPILImage()
|
|
327
|
+
result_image = to_image(result_tensor.squeeze(0).clamp_(0.0, 1.0))
|
|
328
|
+
|
|
329
|
+
return result_image
|
{diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine.egg-info/SOURCES.txt
RENAMED
|
@@ -202,6 +202,7 @@ diffsynth_engine/tools/flux_inpainting_tool.py
|
|
|
202
202
|
diffsynth_engine/tools/flux_outpainting_tool.py
|
|
203
203
|
diffsynth_engine/tools/flux_reference_tool.py
|
|
204
204
|
diffsynth_engine/tools/flux_replace_tool.py
|
|
205
|
+
diffsynth_engine/tools/qwen_image_upscaler_tool.py
|
|
205
206
|
diffsynth_engine/utils/__init__.py
|
|
206
207
|
diffsynth_engine/utils/cache.py
|
|
207
208
|
diffsynth_engine/utils/constants.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{diffsynth_engine-0.7.1.dev1 → diffsynth_engine-0.7.1.dev3}/diffsynth_engine/algorithm/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|