PyPI - diffsynth-engine - Versions diffs - 0.4.3.dev5__py3-none-any.whl → 0.4.3.dev7__py3-none-any.whl - Mend

diffsynth-engine 0.4.3.dev5py3-none-any.whl → 0.4.3.dev7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

diffsynth_engine/__init__.py CHANGED Viewed

@@ -4,6 +4,10 @@ from .configs import (
     FluxPipelineConfig,
     WanPipelineConfig,
     QwenImagePipelineConfig,
+    SDStateDicts,
+    SDXLStateDicts,
+    FluxStateDicts,
+    QwenImageStateDicts,
     ControlNetParams,
     ControlType,
 )
@@ -38,6 +42,10 @@ __all__ = [
     "SDXLPipelineConfig",
     "FluxPipelineConfig",
     "WanPipelineConfig",
+    "SDStateDicts",
+    "SDXLStateDicts",
+    "FluxStateDicts",
+    "QwenImageStateDicts",
     "FluxImagePipeline",
     "QwenImagePipelineConfig",
     "FluxControlNet",

diffsynth_engine/pipelines/sd_image.py CHANGED Viewed

@@ -172,31 +172,40 @@ class SDImagePipeline(BasePipeline):
         else:
             config = model_path_or_config
-        logger.info(f"loading state dict from {config.model_path} ...")
-        unet_state_dict = cls.load_model_checkpoint(config.model_path, device="cpu", dtype=config.model_dtype)
-        if config.vae_path is not None:
-            logger.info(f"loading state dict from {config.vae_path} ...")
-            vae_state_dict = cls.load_model_checkpoint(config.vae_path, device="cpu", dtype=config.vae_dtype)
-        else:
-            vae_state_dict = unet_state_dict
-        if config.clip_path is not None:
-            logger.info(f"loading state dict from {config.clip_path} ...")
-            clip_state_dict = cls.load_model_checkpoint(config.clip_path, device="cpu", dtype=config.clip_dtype)
-        else:
-            clip_state_dict = unet_state_dict
+        return cls.from_state_dict(SDStateDicts(), config)
+    @classmethod
+    def from_state_dict(cls, state_dicts: SDStateDicts, config: SDPipelineConfig) -> "SDImagePipeline":
+        if state_dicts.model is None:
+            if config.model_path is None:
+                raise ValueError("`model_path` cannot be empty")
+            logger.info(f"loading state dict from {config.model_path} ...")
+            state_dicts.model = cls.load_model_checkpoint(config.model_path, device="cpu", dtype=config.model_dtype)
+        if state_dicts.vae is None:
+            if config.vae_path is None:
+                state_dicts.vae = state_dicts.model
+            else:
+                logger.info(f"loading state dict from {config.vae_path} ...")
+                state_dicts.vae = cls.load_model_checkpoint(config.vae_path, device="cpu", dtype=config.vae_dtype)
+        if state_dicts.clip is None:
+            if config.clip_path is None:
+                state_dicts.clip = state_dicts.model
+            else:
+                logger.info(f"loading state dict from {config.clip_path} ...")
+                state_dicts.clip = cls.load_model_checkpoint(config.clip_path, device="cpu", dtype=config.clip_dtype)
         init_device = "cpu" if config.offload_mode is not None else config.device
         tokenizer = CLIPTokenizer.from_pretrained(SDXL_TOKENIZER_CONF_PATH)
         with LoRAContext():
-            text_encoder = SDTextEncoder.from_state_dict(clip_state_dict, device=init_device, dtype=config.clip_dtype)
-            unet = SDUNet.from_state_dict(unet_state_dict, device=init_device, dtype=config.model_dtype)
+            text_encoder = SDTextEncoder.from_state_dict(state_dicts.clip, device=init_device, dtype=config.clip_dtype)
+            unet = SDUNet.from_state_dict(state_dicts.model, device=init_device, dtype=config.model_dtype)
         vae_decoder = SDVAEDecoder.from_state_dict(
-            vae_state_dict, device=init_device, dtype=config.vae_dtype, attn_impl="sdpa"
+            state_dicts.vae, device=init_device, dtype=config.vae_dtype, attn_impl="sdpa"
         )
         vae_encoder = SDVAEEncoder.from_state_dict(
-            vae_state_dict, device=init_device, dtype=config.vae_dtype, attn_impl="sdpa"
+            state_dicts.vae, device=init_device, dtype=config.vae_dtype, attn_impl="sdpa"
         )
         pipe = cls(
@@ -213,10 +222,6 @@ class SDImagePipeline(BasePipeline):
             pipe.enable_cpu_offload(config.offload_mode)
         return pipe
-    @classmethod
-    def from_state_dict(cls, state_dicts: SDStateDicts, pipeline_config: SDPipelineConfig) -> "SDImagePipeline":
-        raise NotImplementedError()
     def denoising_model(self):
         return self.unet

diffsynth_engine/pipelines/sdxl_image.py CHANGED Viewed

@@ -150,43 +150,53 @@ class SDXLImagePipeline(BasePipeline):
         else:
             config = model_path_or_config
-        logger.info(f"loading state dict from {config.model_path} ...")
-        unet_state_dict = cls.load_model_checkpoint(config.model_path, device="cpu", dtype=config.model_dtype)
-        if config.vae_path is not None:
-            logger.info(f"loading state dict from {config.vae_path} ...")
-            vae_state_dict = cls.load_model_checkpoint(config.vae_path, device="cpu", dtype=config.vae_dtype)
-        else:
-            vae_state_dict = unet_state_dict
-        if config.clip_l_path is not None:
-            logger.info(f"loading state dict from {config.clip_l_path} ...")
-            clip_l_state_dict = cls.load_model_checkpoint(config.clip_l_path, device="cpu", dtype=config.clip_l_dtype)
-        else:
-            clip_l_state_dict = unet_state_dict
-        if config.clip_g_path is not None:
-            logger.info(f"loading state dict from {config.clip_g_path} ...")
-            clip_g_state_dict = cls.load_model_checkpoint(config.clip_g_path, device="cpu", dtype=config.clip_g_dtype)
-        else:
-            clip_g_state_dict = unet_state_dict
+        return cls.from_state_dict(SDXLStateDicts(), config)
+    @classmethod
+    def from_state_dict(cls, state_dicts: SDXLStateDicts, config: SDXLPipelineConfig) -> "SDXLImagePipeline":
+        if state_dicts.model is None:
+            if config.model_path is None:
+                raise ValueError("`model_path` cannot be empty")
+            logger.info(f"loading state dict from {config.model_path} ...")
+            state_dicts.model = cls.load_model_checkpoint(config.model_path, device="cpu", dtype=config.model_dtype)
+        if state_dicts.vae is None:
+            if config.vae_path is None:
+                state_dicts.vae = state_dicts.model
+            else:
+                logger.info(f"loading state dict from {config.vae_path} ...")
+                state_dicts.vae = cls.load_model_checkpoint(config.vae_path, device="cpu", dtype=config.vae_dtype)
+        if state_dicts.clip_l is None:
+            if config.clip_l_path is None:
+                state_dicts.clip_l = state_dicts.model
+            else:
+                logger.info(f"loading state dict from {config.clip_l_path} ...")
+                state_dicts.clip_l = cls.load_model_checkpoint(config.clip_l_path, device="cpu", dtype=config.clip_l_dtype)
+        if state_dicts.clip_g is None:
+            if config.clip_g_path is None:
+                state_dicts.clip_g = state_dicts.model
+            else:
+                logger.info(f"loading state dict from {config.clip_g_path} ...")
+                state_dicts.clip_g = cls.load_model_checkpoint(config.clip_g_path, device="cpu", dtype=config.clip_g_dtype)
         init_device = "cpu" if config.offload_mode else config.device
         tokenizer = CLIPTokenizer.from_pretrained(SDXL_TOKENIZER_CONF_PATH)
         tokenizer_2 = CLIPTokenizer.from_pretrained(SDXL_TOKENIZER_2_CONF_PATH)
         with LoRAContext():
             text_encoder = SDXLTextEncoder.from_state_dict(
-                clip_l_state_dict, device=init_device, dtype=config.clip_l_dtype
+                state_dicts.clip_l, device=init_device, dtype=config.clip_l_dtype
             )
             text_encoder_2 = SDXLTextEncoder2.from_state_dict(
-                clip_g_state_dict, device=init_device, dtype=config.clip_g_dtype
+                state_dicts.clip_g, device=init_device, dtype=config.clip_g_dtype
             )
-            unet = SDXLUNet.from_state_dict(unet_state_dict, device=init_device, dtype=config.model_dtype)
+            unet = SDXLUNet.from_state_dict(state_dicts.model, device=init_device, dtype=config.model_dtype)
         vae_decoder = SDXLVAEDecoder.from_state_dict(
-            vae_state_dict, device=init_device, dtype=config.vae_dtype, attn_impl="sdpa"
+            state_dicts.vae, device=init_device, dtype=config.vae_dtype, attn_impl="sdpa"
         )
         vae_encoder = SDXLVAEEncoder.from_state_dict(
-            vae_state_dict, device=init_device, dtype=config.vae_dtype, attn_impl="sdpa"
+            state_dicts.vae, device=init_device, dtype=config.vae_dtype, attn_impl="sdpa"
         )
         pipe = cls(
@@ -205,10 +215,6 @@ class SDXLImagePipeline(BasePipeline):
             pipe.enable_cpu_offload(config.offload_mode)
         return pipe
-    @classmethod
-    def from_state_dict(cls, state_dicts: SDXLStateDicts, pipeline_config: SDXLPipelineConfig) -> "SDXLImagePipeline":
-        raise NotImplementedError()
     def denoising_model(self):
         return self.unet

diffsynth_engine/pipelines/wan_video.py CHANGED Viewed

@@ -417,7 +417,7 @@ class WanVideoPipeline(BasePipeline):
                 cfg_scale_ = cfg_scale if isinstance(cfg_scale, float) else cfg_scale[0]
             timestep = timestep * mask[:, :, :, ::2, ::2].flatten()  # seq_len
-            timestep = timestep.to(dtype=self.config.model_dtype, device=self.device)
+            timestep = timestep.to(dtype=self.dtype, device=self.device)
             # Classifier-free guidance
             noise_pred = self.predict_noise_with_cfg(
                 model=model,
@@ -574,6 +574,18 @@ class WanVideoPipeline(BasePipeline):
         if config.offload_mode is not None:
             pipe.enable_cpu_offload(config.offload_mode)
+        if config.model_dtype == torch.float8_e4m3fn:
+            pipe.dtype = torch.bfloat16  # compute dtype
+            pipe.enable_fp8_autocast(
+                model_names=["dit"], compute_dtype=pipe.dtype, use_fp8_linear=config.use_fp8_linear
+            )
+        if config.t5_dtype == torch.float8_e4m3fn:
+            pipe.dtype = torch.bfloat16  # compute dtype
+            pipe.enable_fp8_autocast(
+                model_names=["text_encoder"], compute_dtype=pipe.dtype, use_fp8_linear=config.use_fp8_linear
+            )
         if config.parallelism > 1:
             return ParallelWrapper(
                 pipe,

{diffsynth_engine-0.4.3.dev5.dist-info → diffsynth_engine-0.4.3.dev7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diffsynth_engine
-Version: 0.4.3.dev5
+Version: 0.4.3.dev7
 Author: MuseAI x ModelScope
 Classifier: Programming Language :: Python :: 3
 Classifier: Operating System :: OS Independent

{diffsynth_engine-0.4.3.dev5.dist-info → diffsynth_engine-0.4.3.dev7.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-diffsynth_engine/__init__.py,sha256=tXaLuKje4NQ3zARAvqBUdj1pGLjP0ttkXKE6ysuzsOc,1586
+diffsynth_engine/__init__.py,sha256=fcY1Z0QWNyrYuGX2dVTj2M8crWhVIL-vnPndfVI7mZs,1760
 diffsynth_engine/algorithm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 diffsynth_engine/algorithm/noise_scheduler/__init__.py,sha256=YvcwE2tCNua-OAX9GEPm0EXsINNWH4XvJMNZb-uaZMM,745
 diffsynth_engine/algorithm/noise_scheduler/base_scheduler.py,sha256=WICrLEh7b2TdZMMEN14NqiYydj7dxXT6RolXymKiMk8,188
@@ -132,10 +132,10 @@ diffsynth_engine/pipelines/__init__.py,sha256=9QZVhZeRm_5m7yxie08yBtgM26NB4mfVBO
 diffsynth_engine/pipelines/base.py,sha256=goe_UO1LvUXVwP5geUmu0zdFUrSms9iss3OuRyuMjXY,13726
 diffsynth_engine/pipelines/flux_image.py,sha256=gWuZaMeupB_Wz3AY97eE1eEVSAmAm14aXIxkAqNXY7E,49224
 diffsynth_engine/pipelines/qwen_image.py,sha256=3S-eL2GY-c0g9nqDyYByr9RV-kdY589m75a0k4vw_AQ,18459
-diffsynth_engine/pipelines/sd_image.py,sha256=5cIIknh2M-fOqj7urKi9nZ40yc1LnvepbH_Af7SF4UA,17789
-diffsynth_engine/pipelines/sdxl_image.py,sha256=otv1T_0fhX3UcIoKbKCqb47Yge6xg0fPM0ry-uPEanI,21548
+diffsynth_engine/pipelines/sd_image.py,sha256=GhrCadEmAWv4id0NdRpJW_EC2PgItBctXLkfPxq5gDI,18100
+diffsynth_engine/pipelines/sdxl_image.py,sha256=kmidIz8zDtrw9ggLXI3WG7AQq_jmOPVct-O3hGNra_g,21951
 diffsynth_engine/pipelines/utils.py,sha256=lk7sFGEk-fGjgadLpwwppHKG-yZ0RC-4ZmHW7pRRe8A,473
-diffsynth_engine/pipelines/wan_video.py,sha256=stoYKm0wHf_pxZ_WHRTGHTR61KVG_U21yBUaUrDjSqw,25605
+diffsynth_engine/pipelines/wan_video.py,sha256=lb0FrMFxQ6BNfOUErveWcnzPJa1gq0yYtMXUZjNTOuU,26126
 diffsynth_engine/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 diffsynth_engine/processor/canny_processor.py,sha256=hV30NlblTkEFUAmF_O-LJrNlGVM2SFrqq6okfF8VpOo,602
 diffsynth_engine/processor/depth_processor.py,sha256=dQvs3JsnyMbz4dyI9QoR8oO-mMFBFAgNvgqeCoaU5jk,1532
@@ -168,8 +168,8 @@ diffsynth_engine/utils/parallel.py,sha256=Z9jqCv4mLV4JyXR3uTHyv1rujPiKU8PSCbAfiN
 diffsynth_engine/utils/platform.py,sha256=2lXdw6YkqcRONCeT98n4cyg1Ii8Ybbyj2Ns72Se9tlk,496
 diffsynth_engine/utils/prompt.py,sha256=YItMchoVzsG6y-LB4vzzDUWrkhKRVlt1HfVhxZjSxMQ,280
 diffsynth_engine/utils/video.py,sha256=Ne0rd2lb59UT1q5EotpjlY7OT8F9oTCFDyo1ST77uoQ,1004
-diffsynth_engine-0.4.3.dev5.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
-diffsynth_engine-0.4.3.dev5.dist-info/METADATA,sha256=hMlrgbZrIStdg2Sr4iIePwnxCltZ1rttaP_1I2gXflA,1110
-diffsynth_engine-0.4.3.dev5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-diffsynth_engine-0.4.3.dev5.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
-diffsynth_engine-0.4.3.dev5.dist-info/RECORD,,
+diffsynth_engine-0.4.3.dev7.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
+diffsynth_engine-0.4.3.dev7.dist-info/METADATA,sha256=AkzJrm0DuplPG552EIlIa1um1VoHzWK6DfHCgwDHYLQ,1110
+diffsynth_engine-0.4.3.dev7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+diffsynth_engine-0.4.3.dev7.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
+diffsynth_engine-0.4.3.dev7.dist-info/RECORD,,

{diffsynth_engine-0.4.3.dev5.dist-info → diffsynth_engine-0.4.3.dev7.dist-info}/WHEEL RENAMED Viewed

File without changes

{diffsynth_engine-0.4.3.dev5.dist-info → diffsynth_engine-0.4.3.dev7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{diffsynth_engine-0.4.3.dev5.dist-info → diffsynth_engine-0.4.3.dev7.dist-info}/top_level.txt RENAMED Viewed

File without changes

diffsynth-engine 0.4.3.dev5__py3-none-any.whl → 0.4.3.dev7__py3-none-any.whl

diffsynth-engine 0.4.3.dev5py3-none-any.whl → 0.4.3.dev7py3-none-any.whl