diffsynth 1.1.3__tar.gz → 1.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diffsynth-1.1.3 → diffsynth-1.1.7}/PKG-INFO +1 -1
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/configs/model_config.py +6 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/wan_video_dit.py +56 -0
- diffsynth-1.1.7/diffsynth/models/wan_video_motion_controller.py +44 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/wan_video.py +87 -10
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth.egg-info/PKG-INFO +1 -1
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth.egg-info/SOURCES.txt +1 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/setup.py +1 -1
- {diffsynth-1.1.3 → diffsynth-1.1.7}/LICENSE +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/README.md +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/configs/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/controlnets/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/controlnets/controlnet_unit.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/controlnets/processors.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/data/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/data/simple_text_image.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/data/video.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/distributed/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/distributed/xdit_context_parallel.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ESRGAN/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/FastBlend/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/FastBlend/api.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/FastBlend/cupy_kernels.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/FastBlend/data.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/FastBlend/patch_match.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/FastBlend/runners/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/FastBlend/runners/accurate.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/FastBlend/runners/balanced.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/FastBlend/runners/fast.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/FastBlend/runners/interpolation.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/BLIP/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/BLIP/blip.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/BLIP/blip_pretrain.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/BLIP/med.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/BLIP/vit.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/aesthetic.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/clip.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/config.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/hps.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/imagereward.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/mps.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/coca_model.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/constants.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/factory.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/generation_utils.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/hf_configs.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/hf_model.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/loss.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/model.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/modified_resnet.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/openai.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/pretrained.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/push_to_hf_hub.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/timm_model.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/tokenizer.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/transform.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/transformer.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/utils.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/version.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/pickscore.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/trainer/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/trainer/models/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/trainer/models/base_model.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/trainer/models/clip_model.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/trainer/models/cross_modeling.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/RIFE/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/attention.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/cog_dit.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/cog_vae.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/downloader.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/flux_controlnet.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/flux_dit.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/flux_infiniteyou.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/flux_ipadapter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/flux_text_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/flux_vae.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/hunyuan_dit.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/hunyuan_dit_text_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/hunyuan_video_dit.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/hunyuan_video_text_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/hunyuan_video_vae_decoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/hunyuan_video_vae_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/kolors_text_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/lora.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/model_manager.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/omnigen.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sd3_dit.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sd3_text_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sd3_vae_decoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sd3_vae_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sd_controlnet.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sd_ipadapter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sd_motion.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sd_text_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sd_unet.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sd_vae_decoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sd_vae_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sdxl_controlnet.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sdxl_ipadapter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sdxl_motion.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sdxl_text_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sdxl_unet.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sdxl_vae_decoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/sdxl_vae_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/stepvideo_dit.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/stepvideo_text_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/stepvideo_vae.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/svd_image_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/svd_unet.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/svd_vae_decoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/svd_vae_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/tiler.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/utils.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/wan_video_image_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/wan_video_text_encoder.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/models/wan_video_vae.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/base.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/cog_video.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/dancer.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/flux_image.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/hunyuan_image.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/hunyuan_video.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/omnigen_image.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/pipeline_runner.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/sd3_image.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/sd_image.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/sd_video.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/sdxl_image.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/sdxl_video.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/step_video.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/pipelines/svd_video.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/processors/FastBlend.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/processors/PILEditor.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/processors/RIFE.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/processors/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/processors/base.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/processors/sequencial_processor.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/base_prompter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/cog_prompter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/flux_prompter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/hunyuan_dit_prompter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/hunyuan_video_prompter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/kolors_prompter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/omnigen_prompter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/omost.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/prompt_refiners.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/sd3_prompter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/sd_prompter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/sdxl_prompter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/stepvideo_prompter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/prompters/wan_prompter.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/schedulers/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/schedulers/continuous_ode.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/schedulers/ddim.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/schedulers/flow_match.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/cog/tokenizer/added_tokens.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/cog/tokenizer/special_tokens_map.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/cog/tokenizer/spiece.model +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/cog/tokenizer/tokenizer_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/flux/tokenizer_1/merges.txt +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/flux/tokenizer_1/special_tokens_map.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/flux/tokenizer_1/tokenizer_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/flux/tokenizer_1/vocab.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/flux/tokenizer_2/special_tokens_map.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/flux/tokenizer_2/spiece.model +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/flux/tokenizer_2/tokenizer.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/flux/tokenizer_2/tokenizer_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/special_tokens_map.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/tokenizer_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/vocab.txt +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/vocab_org.txt +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/special_tokens_map.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/spiece.model +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/tokenizer_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_1/merges.txt +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_1/special_tokens_map.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_1/tokenizer_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_1/vocab.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_2/preprocessor_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_2/special_tokens_map.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_2/tokenizer.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/hunyuan_video/tokenizer_2/tokenizer_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/kolors/tokenizer/tokenizer.model +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/kolors/tokenizer/tokenizer_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/kolors/tokenizer/vocab.txt +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion/tokenizer/merges.txt +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion/tokenizer/special_tokens_map.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion/tokenizer/tokenizer_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion/tokenizer/vocab.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/merges.txt +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/special_tokens_map.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/tokenizer_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/vocab.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/merges.txt +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/special_tokens_map.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/tokenizer_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/vocab.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/special_tokens_map.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/spiece.model +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/tokenizer.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/tokenizer_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/merges.txt +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/special_tokens_map.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/tokenizer_config.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/vocab.json +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/trainers/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/trainers/text_to_image.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/vram_management/__init__.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/vram_management/layers.py +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth.egg-info/dependency_links.txt +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth.egg-info/requires.txt +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth.egg-info/top_level.txt +0 -0
- {diffsynth-1.1.3 → diffsynth-1.1.7}/setup.cfg +0 -0
|
@@ -59,6 +59,7 @@ from ..models.wan_video_dit import WanModel
|
|
|
59
59
|
from ..models.wan_video_text_encoder import WanTextEncoder
|
|
60
60
|
from ..models.wan_video_image_encoder import WanImageEncoder
|
|
61
61
|
from ..models.wan_video_vae import WanVideoVAE
|
|
62
|
+
from ..models.wan_video_motion_controller import WanMotionControllerModel
|
|
62
63
|
|
|
63
64
|
|
|
64
65
|
model_loader_configs = [
|
|
@@ -120,11 +121,16 @@ model_loader_configs = [
|
|
|
120
121
|
(None, "9269f8db9040a9d860eaca435be61814", ["wan_video_dit"], [WanModel], "civitai"),
|
|
121
122
|
(None, "aafcfd9672c3a2456dc46e1cb6e52c70", ["wan_video_dit"], [WanModel], "civitai"),
|
|
122
123
|
(None, "6bfcfb3b342cb286ce886889d519a77e", ["wan_video_dit"], [WanModel], "civitai"),
|
|
124
|
+
(None, "6d6ccde6845b95ad9114ab993d917893", ["wan_video_dit"], [WanModel], "civitai"),
|
|
125
|
+
(None, "6bfcfb3b342cb286ce886889d519a77e", ["wan_video_dit"], [WanModel], "civitai"),
|
|
126
|
+
(None, "349723183fc063b2bfc10bb2835cf677", ["wan_video_dit"], [WanModel], "civitai"),
|
|
127
|
+
(None, "efa44cddf936c70abd0ea28b6cbe946c", ["wan_video_dit"], [WanModel], "civitai"),
|
|
123
128
|
(None, "cb104773c6c2cb6df4f9529ad5c60d0b", ["wan_video_dit"], [WanModel], "diffusers"),
|
|
124
129
|
(None, "9c8818c2cbea55eca56c7b447df170da", ["wan_video_text_encoder"], [WanTextEncoder], "civitai"),
|
|
125
130
|
(None, "5941c53e207d62f20f9025686193c40b", ["wan_video_image_encoder"], [WanImageEncoder], "civitai"),
|
|
126
131
|
(None, "1378ea763357eea97acdef78e65d6d96", ["wan_video_vae"], [WanVideoVAE], "civitai"),
|
|
127
132
|
(None, "ccc42284ea13e1ad04693284c7a09be6", ["wan_video_vae"], [WanVideoVAE], "civitai"),
|
|
133
|
+
(None, "dbd5ec76bbf977983f972c151d545389", ["wan_video_motion_controller"], [WanMotionControllerModel], "civitai"),
|
|
128
134
|
]
|
|
129
135
|
huggingface_model_loader_configs = [
|
|
130
136
|
# These configs are provided for detecting model type automatically.
|
|
@@ -493,6 +493,62 @@ class WanModelStateDictConverter:
|
|
|
493
493
|
"num_layers": 40,
|
|
494
494
|
"eps": 1e-6
|
|
495
495
|
}
|
|
496
|
+
elif hash_state_dict_keys(state_dict) == "6d6ccde6845b95ad9114ab993d917893":
|
|
497
|
+
config = {
|
|
498
|
+
"has_image_input": True,
|
|
499
|
+
"patch_size": [1, 2, 2],
|
|
500
|
+
"in_dim": 36,
|
|
501
|
+
"dim": 1536,
|
|
502
|
+
"ffn_dim": 8960,
|
|
503
|
+
"freq_dim": 256,
|
|
504
|
+
"text_dim": 4096,
|
|
505
|
+
"out_dim": 16,
|
|
506
|
+
"num_heads": 12,
|
|
507
|
+
"num_layers": 30,
|
|
508
|
+
"eps": 1e-6
|
|
509
|
+
}
|
|
510
|
+
elif hash_state_dict_keys(state_dict) == "6bfcfb3b342cb286ce886889d519a77e":
|
|
511
|
+
config = {
|
|
512
|
+
"has_image_input": True,
|
|
513
|
+
"patch_size": [1, 2, 2],
|
|
514
|
+
"in_dim": 36,
|
|
515
|
+
"dim": 5120,
|
|
516
|
+
"ffn_dim": 13824,
|
|
517
|
+
"freq_dim": 256,
|
|
518
|
+
"text_dim": 4096,
|
|
519
|
+
"out_dim": 16,
|
|
520
|
+
"num_heads": 40,
|
|
521
|
+
"num_layers": 40,
|
|
522
|
+
"eps": 1e-6
|
|
523
|
+
}
|
|
524
|
+
elif hash_state_dict_keys(state_dict) == "349723183fc063b2bfc10bb2835cf677":
|
|
525
|
+
config = {
|
|
526
|
+
"has_image_input": True,
|
|
527
|
+
"patch_size": [1, 2, 2],
|
|
528
|
+
"in_dim": 48,
|
|
529
|
+
"dim": 1536,
|
|
530
|
+
"ffn_dim": 8960,
|
|
531
|
+
"freq_dim": 256,
|
|
532
|
+
"text_dim": 4096,
|
|
533
|
+
"out_dim": 16,
|
|
534
|
+
"num_heads": 12,
|
|
535
|
+
"num_layers": 30,
|
|
536
|
+
"eps": 1e-6
|
|
537
|
+
}
|
|
538
|
+
elif hash_state_dict_keys(state_dict) == "efa44cddf936c70abd0ea28b6cbe946c":
|
|
539
|
+
config = {
|
|
540
|
+
"has_image_input": True,
|
|
541
|
+
"patch_size": [1, 2, 2],
|
|
542
|
+
"in_dim": 48,
|
|
543
|
+
"dim": 5120,
|
|
544
|
+
"ffn_dim": 13824,
|
|
545
|
+
"freq_dim": 256,
|
|
546
|
+
"text_dim": 4096,
|
|
547
|
+
"out_dim": 16,
|
|
548
|
+
"num_heads": 40,
|
|
549
|
+
"num_layers": 40,
|
|
550
|
+
"eps": 1e-6
|
|
551
|
+
}
|
|
496
552
|
else:
|
|
497
553
|
config = {}
|
|
498
554
|
return state_dict, config
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
from .wan_video_dit import sinusoidal_embedding_1d
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class WanMotionControllerModel(torch.nn.Module):
|
|
8
|
+
def __init__(self, freq_dim=256, dim=1536):
|
|
9
|
+
super().__init__()
|
|
10
|
+
self.freq_dim = freq_dim
|
|
11
|
+
self.linear = nn.Sequential(
|
|
12
|
+
nn.Linear(freq_dim, dim),
|
|
13
|
+
nn.SiLU(),
|
|
14
|
+
nn.Linear(dim, dim),
|
|
15
|
+
nn.SiLU(),
|
|
16
|
+
nn.Linear(dim, dim * 6),
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
def forward(self, motion_bucket_id):
|
|
20
|
+
emb = sinusoidal_embedding_1d(self.freq_dim, motion_bucket_id * 10)
|
|
21
|
+
emb = self.linear(emb)
|
|
22
|
+
return emb
|
|
23
|
+
|
|
24
|
+
def init(self):
|
|
25
|
+
state_dict = self.linear[-1].state_dict()
|
|
26
|
+
state_dict = {i: state_dict[i] * 0 for i in state_dict}
|
|
27
|
+
self.linear[-1].load_state_dict(state_dict)
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def state_dict_converter():
|
|
31
|
+
return WanMotionControllerModelDictConverter()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class WanMotionControllerModelDictConverter:
|
|
36
|
+
def __init__(self):
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
def from_diffusers(self, state_dict):
|
|
40
|
+
return state_dict
|
|
41
|
+
|
|
42
|
+
def from_civitai(self, state_dict):
|
|
43
|
+
return state_dict
|
|
44
|
+
|
|
@@ -18,6 +18,7 @@ from ..vram_management import enable_vram_management, AutoWrappedModule, AutoWra
|
|
|
18
18
|
from ..models.wan_video_text_encoder import T5RelativeEmbedding, T5LayerNorm
|
|
19
19
|
from ..models.wan_video_dit import RMSNorm, sinusoidal_embedding_1d
|
|
20
20
|
from ..models.wan_video_vae import RMS_norm, CausalConv3d, Upsample
|
|
21
|
+
from ..models.wan_video_motion_controller import WanMotionControllerModel
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
|
|
@@ -31,7 +32,8 @@ class WanVideoPipeline(BasePipeline):
|
|
|
31
32
|
self.image_encoder: WanImageEncoder = None
|
|
32
33
|
self.dit: WanModel = None
|
|
33
34
|
self.vae: WanVideoVAE = None
|
|
34
|
-
self.
|
|
35
|
+
self.motion_controller: WanMotionControllerModel = None
|
|
36
|
+
self.model_names = ['text_encoder', 'dit', 'vae', 'image_encoder', 'motion_controller']
|
|
35
37
|
self.height_division_factor = 16
|
|
36
38
|
self.width_division_factor = 16
|
|
37
39
|
self.use_unified_sequence_parallel = False
|
|
@@ -122,6 +124,22 @@ class WanVideoPipeline(BasePipeline):
|
|
|
122
124
|
computation_device=self.device,
|
|
123
125
|
),
|
|
124
126
|
)
|
|
127
|
+
if self.motion_controller is not None:
|
|
128
|
+
dtype = next(iter(self.motion_controller.parameters())).dtype
|
|
129
|
+
enable_vram_management(
|
|
130
|
+
self.motion_controller,
|
|
131
|
+
module_map = {
|
|
132
|
+
torch.nn.Linear: AutoWrappedLinear,
|
|
133
|
+
},
|
|
134
|
+
module_config = dict(
|
|
135
|
+
offload_dtype=dtype,
|
|
136
|
+
offload_device="cpu",
|
|
137
|
+
onload_dtype=dtype,
|
|
138
|
+
onload_device="cpu",
|
|
139
|
+
computation_dtype=dtype,
|
|
140
|
+
computation_device=self.device,
|
|
141
|
+
),
|
|
142
|
+
)
|
|
125
143
|
self.enable_cpu_offload()
|
|
126
144
|
|
|
127
145
|
|
|
@@ -134,6 +152,7 @@ class WanVideoPipeline(BasePipeline):
|
|
|
134
152
|
self.dit = model_manager.fetch_model("wan_video_dit")
|
|
135
153
|
self.vae = model_manager.fetch_model("wan_video_vae")
|
|
136
154
|
self.image_encoder = model_manager.fetch_model("wan_video_image_encoder")
|
|
155
|
+
self.motion_controller = model_manager.fetch_model("wan_video_motion_controller")
|
|
137
156
|
|
|
138
157
|
|
|
139
158
|
@staticmethod
|
|
@@ -163,22 +182,47 @@ class WanVideoPipeline(BasePipeline):
|
|
|
163
182
|
return {"context": prompt_emb}
|
|
164
183
|
|
|
165
184
|
|
|
166
|
-
def encode_image(self, image, num_frames, height, width):
|
|
185
|
+
def encode_image(self, image, end_image, num_frames, height, width):
|
|
167
186
|
image = self.preprocess_image(image.resize((width, height))).to(self.device)
|
|
168
187
|
clip_context = self.image_encoder.encode_image([image])
|
|
169
188
|
msk = torch.ones(1, num_frames, height//8, width//8, device=self.device)
|
|
170
189
|
msk[:, 1:] = 0
|
|
190
|
+
if end_image is not None:
|
|
191
|
+
end_image = self.preprocess_image(end_image.resize((width, height))).to(self.device)
|
|
192
|
+
vae_input = torch.concat([image.transpose(0,1), torch.zeros(3, num_frames-2, height, width).to(image.device), end_image.transpose(0,1)],dim=1)
|
|
193
|
+
msk[:, -1:] = 1
|
|
194
|
+
else:
|
|
195
|
+
vae_input = torch.concat([image.transpose(0, 1), torch.zeros(3, num_frames-1, height, width).to(image.device)], dim=1)
|
|
196
|
+
|
|
171
197
|
msk = torch.concat([torch.repeat_interleave(msk[:, 0:1], repeats=4, dim=1), msk[:, 1:]], dim=1)
|
|
172
198
|
msk = msk.view(1, msk.shape[1] // 4, 4, height//8, width//8)
|
|
173
199
|
msk = msk.transpose(1, 2)[0]
|
|
174
200
|
|
|
175
|
-
vae_input = torch.concat([image.transpose(0, 1), torch.zeros(3, num_frames-1, height, width).to(image.device)], dim=1)
|
|
176
201
|
y = self.vae.encode([vae_input.to(dtype=self.torch_dtype, device=self.device)], device=self.device)[0]
|
|
177
202
|
y = torch.concat([msk, y])
|
|
178
203
|
y = y.unsqueeze(0)
|
|
179
204
|
clip_context = clip_context.to(dtype=self.torch_dtype, device=self.device)
|
|
180
205
|
y = y.to(dtype=self.torch_dtype, device=self.device)
|
|
181
206
|
return {"clip_feature": clip_context, "y": y}
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def encode_control_video(self, control_video, tiled=True, tile_size=(34, 34), tile_stride=(18, 16)):
|
|
210
|
+
control_video = self.preprocess_images(control_video)
|
|
211
|
+
control_video = torch.stack(control_video, dim=2).to(dtype=self.torch_dtype, device=self.device)
|
|
212
|
+
latents = self.encode_video(control_video, tiled=tiled, tile_size=tile_size, tile_stride=tile_stride).to(dtype=self.torch_dtype, device=self.device)
|
|
213
|
+
return latents
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def prepare_controlnet_kwargs(self, control_video, num_frames, height, width, clip_feature=None, y=None, tiled=True, tile_size=(34, 34), tile_stride=(18, 16)):
|
|
217
|
+
if control_video is not None:
|
|
218
|
+
control_latents = self.encode_control_video(control_video, tiled=tiled, tile_size=tile_size, tile_stride=tile_stride)
|
|
219
|
+
if clip_feature is None or y is None:
|
|
220
|
+
clip_feature = torch.zeros((1, 257, 1280), dtype=self.torch_dtype, device=self.device)
|
|
221
|
+
y = torch.zeros((1, 16, (num_frames - 1) // 4 + 1, height//8, width//8), dtype=self.torch_dtype, device=self.device)
|
|
222
|
+
else:
|
|
223
|
+
y = y[:, -16:]
|
|
224
|
+
y = torch.concat([control_latents, y], dim=1)
|
|
225
|
+
return {"clip_feature": clip_feature, "y": y}
|
|
182
226
|
|
|
183
227
|
|
|
184
228
|
def tensor2video(self, frames):
|
|
@@ -204,6 +248,11 @@ class WanVideoPipeline(BasePipeline):
|
|
|
204
248
|
|
|
205
249
|
def prepare_unified_sequence_parallel(self):
|
|
206
250
|
return {"use_unified_sequence_parallel": self.use_unified_sequence_parallel}
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def prepare_motion_bucket_id(self, motion_bucket_id):
|
|
254
|
+
motion_bucket_id = torch.Tensor((motion_bucket_id,)).to(dtype=self.torch_dtype, device=self.device)
|
|
255
|
+
return {"motion_bucket_id": motion_bucket_id}
|
|
207
256
|
|
|
208
257
|
|
|
209
258
|
@torch.no_grad()
|
|
@@ -212,7 +261,9 @@ class WanVideoPipeline(BasePipeline):
|
|
|
212
261
|
prompt,
|
|
213
262
|
negative_prompt="",
|
|
214
263
|
input_image=None,
|
|
264
|
+
end_image=None,
|
|
215
265
|
input_video=None,
|
|
266
|
+
control_video=None,
|
|
216
267
|
denoising_strength=1.0,
|
|
217
268
|
seed=None,
|
|
218
269
|
rand_device="cpu",
|
|
@@ -222,6 +273,7 @@ class WanVideoPipeline(BasePipeline):
|
|
|
222
273
|
cfg_scale=5.0,
|
|
223
274
|
num_inference_steps=50,
|
|
224
275
|
sigma_shift=5.0,
|
|
276
|
+
motion_bucket_id=None,
|
|
225
277
|
tiled=True,
|
|
226
278
|
tile_size=(30, 52),
|
|
227
279
|
tile_stride=(15, 26),
|
|
@@ -263,10 +315,21 @@ class WanVideoPipeline(BasePipeline):
|
|
|
263
315
|
# Encode image
|
|
264
316
|
if input_image is not None and self.image_encoder is not None:
|
|
265
317
|
self.load_models_to_device(["image_encoder", "vae"])
|
|
266
|
-
image_emb = self.encode_image(input_image, num_frames, height, width)
|
|
318
|
+
image_emb = self.encode_image(input_image, end_image, num_frames, height, width)
|
|
267
319
|
else:
|
|
268
320
|
image_emb = {}
|
|
269
321
|
|
|
322
|
+
# ControlNet
|
|
323
|
+
if control_video is not None:
|
|
324
|
+
self.load_models_to_device(["image_encoder", "vae"])
|
|
325
|
+
image_emb = self.prepare_controlnet_kwargs(control_video, num_frames, height, width, **image_emb, **tiler_kwargs)
|
|
326
|
+
|
|
327
|
+
# Motion Controller
|
|
328
|
+
if self.motion_controller is not None and motion_bucket_id is not None:
|
|
329
|
+
motion_kwargs = self.prepare_motion_bucket_id(motion_bucket_id)
|
|
330
|
+
else:
|
|
331
|
+
motion_kwargs = {}
|
|
332
|
+
|
|
270
333
|
# Extra input
|
|
271
334
|
extra_input = self.prepare_extra_input(latents)
|
|
272
335
|
|
|
@@ -278,14 +341,24 @@ class WanVideoPipeline(BasePipeline):
|
|
|
278
341
|
usp_kwargs = self.prepare_unified_sequence_parallel()
|
|
279
342
|
|
|
280
343
|
# Denoise
|
|
281
|
-
self.load_models_to_device(["dit"])
|
|
344
|
+
self.load_models_to_device(["dit", "motion_controller"])
|
|
282
345
|
for progress_id, timestep in enumerate(progress_bar_cmd(self.scheduler.timesteps)):
|
|
283
346
|
timestep = timestep.unsqueeze(0).to(dtype=self.torch_dtype, device=self.device)
|
|
284
347
|
|
|
285
348
|
# Inference
|
|
286
|
-
noise_pred_posi = model_fn_wan_video(
|
|
349
|
+
noise_pred_posi = model_fn_wan_video(
|
|
350
|
+
self.dit, motion_controller=self.motion_controller,
|
|
351
|
+
x=latents, timestep=timestep,
|
|
352
|
+
**prompt_emb_posi, **image_emb, **extra_input,
|
|
353
|
+
**tea_cache_posi, **usp_kwargs, **motion_kwargs
|
|
354
|
+
)
|
|
287
355
|
if cfg_scale != 1.0:
|
|
288
|
-
noise_pred_nega = model_fn_wan_video(
|
|
356
|
+
noise_pred_nega = model_fn_wan_video(
|
|
357
|
+
self.dit, motion_controller=self.motion_controller,
|
|
358
|
+
x=latents, timestep=timestep,
|
|
359
|
+
**prompt_emb_nega, **image_emb, **extra_input,
|
|
360
|
+
**tea_cache_nega, **usp_kwargs, **motion_kwargs
|
|
361
|
+
)
|
|
289
362
|
noise_pred = noise_pred_nega + cfg_scale * (noise_pred_posi - noise_pred_nega)
|
|
290
363
|
else:
|
|
291
364
|
noise_pred = noise_pred_posi
|
|
@@ -358,13 +431,15 @@ class TeaCache:
|
|
|
358
431
|
|
|
359
432
|
def model_fn_wan_video(
|
|
360
433
|
dit: WanModel,
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
434
|
+
motion_controller: WanMotionControllerModel = None,
|
|
435
|
+
x: torch.Tensor = None,
|
|
436
|
+
timestep: torch.Tensor = None,
|
|
437
|
+
context: torch.Tensor = None,
|
|
364
438
|
clip_feature: Optional[torch.Tensor] = None,
|
|
365
439
|
y: Optional[torch.Tensor] = None,
|
|
366
440
|
tea_cache: TeaCache = None,
|
|
367
441
|
use_unified_sequence_parallel: bool = False,
|
|
442
|
+
motion_bucket_id: Optional[torch.Tensor] = None,
|
|
368
443
|
**kwargs,
|
|
369
444
|
):
|
|
370
445
|
if use_unified_sequence_parallel:
|
|
@@ -375,6 +450,8 @@ def model_fn_wan_video(
|
|
|
375
450
|
|
|
376
451
|
t = dit.time_embedding(sinusoidal_embedding_1d(dit.freq_dim, timestep))
|
|
377
452
|
t_mod = dit.time_projection(t).unflatten(1, (6, dit.dim))
|
|
453
|
+
if motion_bucket_id is not None and motion_controller is not None:
|
|
454
|
+
t_mod = t_mod + motion_controller(motion_bucket_id).unflatten(1, (6, dit.dim))
|
|
378
455
|
context = dit.text_embedding(context)
|
|
379
456
|
|
|
380
457
|
if dit.has_image_input:
|
|
@@ -117,6 +117,7 @@ diffsynth/models/tiler.py
|
|
|
117
117
|
diffsynth/models/utils.py
|
|
118
118
|
diffsynth/models/wan_video_dit.py
|
|
119
119
|
diffsynth/models/wan_video_image_encoder.py
|
|
120
|
+
diffsynth/models/wan_video_motion_controller.py
|
|
120
121
|
diffsynth/models/wan_video_text_encoder.py
|
|
121
122
|
diffsynth/models/wan_video_vae.py
|
|
122
123
|
diffsynth/pipelines/__init__.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/BLIP/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/BLIP/blip_pretrain.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/__init__.py
RENAMED
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/coca_model.py
RENAMED
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/constants.py
RENAMED
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/factory.py
RENAMED
|
File without changes
|
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/hf_configs.py
RENAMED
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/hf_model.py
RENAMED
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/loss.py
RENAMED
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/model.py
RENAMED
|
File without changes
|
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/openai.py
RENAMED
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/pretrained.py
RENAMED
|
File without changes
|
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/timm_model.py
RENAMED
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/tokenizer.py
RENAMED
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/transform.py
RENAMED
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/transformer.py
RENAMED
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/utils.py
RENAMED
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/open_clip/version.py
RENAMED
|
File without changes
|
|
File without changes
|
{diffsynth-1.1.3 → diffsynth-1.1.7}/diffsynth/extensions/ImageQualityMetric/trainer/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|