PyPI - diffsynth-engine - Versions diffs - 0.6.1.dev20__py3-none-any.whl → 0.6.1.dev22__py3-none-any.whl - Mend

diffsynth-engine 0.6.1.dev20py3-none-any.whl → 0.6.1.dev22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

diffsynth_engine/configs/pipeline.py CHANGED Viewed

@@ -242,6 +242,8 @@ class QwenImagePipelineConfig(AttentionConfig, OptimizationConfig, ParallelConfi
     vae_tile_size: Tuple[int, int] = (34, 34)
     vae_tile_stride: Tuple[int, int] = (18, 16)
+    load_encoder: bool = True
     @classmethod
     def basic_config(
         cls,

diffsynth_engine/pipelines/flux_image.py CHANGED Viewed

@@ -830,7 +830,7 @@ class FluxImagePipeline(BasePipeline):
                 masked_image = image.clone()
                 masked_image[(mask > 0.5).repeat(1, 3, 1, 1)] = -1
                 latent = self.encode_image(masked_image)
-                mask = torch.nn.functional.interpolate(mask, size=(latent.shape[2], latent.shape[3]))
+                mask = torch.nn.functional.interpolate(mask, size=(latent.shape[2], latent.shape[3])).to(latent.dtype)
                 mask = 1 - mask
                 latent = torch.cat([latent, mask], dim=1)
             elif self.config.control_type == ControlType.bfl_fill:

diffsynth_engine/pipelines/qwen_image.py CHANGED Viewed

@@ -186,6 +186,7 @@ class QwenImagePipeline(BasePipeline):
         logger.info(f"loading state dict from {config.vae_path} ...")
         vae_state_dict = cls.load_model_checkpoint(config.vae_path, device="cpu", dtype=config.vae_dtype)
+        encoder_state_dict = None
         if config.encoder_path is None:
             config.encoder_path = fetch_model(
                 "MusePublic/Qwen-image",
@@ -197,8 +198,9 @@ class QwenImagePipeline(BasePipeline):
                     "text_encoder/model-00004-of-00004.safetensors",
                 ],
             )
-        logger.info(f"loading state dict from {config.encoder_path} ...")
-        encoder_state_dict = cls.load_model_checkpoint(config.encoder_path, device="cpu", dtype=config.encoder_dtype)
+        if config.load_encoder:
+            logger.info(f"loading state dict from {config.encoder_path} ...")
+            encoder_state_dict = cls.load_model_checkpoint(config.encoder_path, device="cpu", dtype=config.encoder_dtype)
         state_dicts = QwenImageStateDicts(
             model=model_state_dict,
@@ -225,22 +227,25 @@ class QwenImagePipeline(BasePipeline):
     @classmethod
     def _from_state_dict(cls, state_dicts: QwenImageStateDicts, config: QwenImagePipelineConfig) -> "QwenImagePipeline":
         init_device = "cpu" if config.offload_mode is not None else config.device
-        tokenizer = Qwen2TokenizerFast.from_pretrained(QWEN_IMAGE_TOKENIZER_CONF_PATH)
-        processor = Qwen2VLProcessor.from_pretrained(
-            tokenizer_config_path=QWEN_IMAGE_TOKENIZER_CONF_PATH,
-            image_processor_config_path=QWEN_IMAGE_PROCESSOR_CONFIG_FILE,
-        )
-        with open(QWEN_IMAGE_VISION_CONFIG_FILE, "r", encoding="utf-8") as f:
-            vision_config = Qwen2_5_VLVisionConfig(**json.load(f))
-        with open(QWEN_IMAGE_CONFIG_FILE, "r", encoding="utf-8") as f:
-            text_config = Qwen2_5_VLConfig(**json.load(f))
-        encoder = Qwen2_5_VLForConditionalGeneration.from_state_dict(
-            state_dicts.encoder,
-            vision_config=vision_config,
-            config=text_config,
-            device=("cpu" if config.use_fsdp else init_device),
-            dtype=config.encoder_dtype,
-        )
+        tokenizer, processor, encoder = None, None, None
+        if config.load_encoder:
+            tokenizer = Qwen2TokenizerFast.from_pretrained(QWEN_IMAGE_TOKENIZER_CONF_PATH)
+            processor = Qwen2VLProcessor.from_pretrained(
+                tokenizer_config_path=QWEN_IMAGE_TOKENIZER_CONF_PATH,
+                image_processor_config_path=QWEN_IMAGE_PROCESSOR_CONFIG_FILE,
+            )
+            with open(QWEN_IMAGE_VISION_CONFIG_FILE, "r", encoding="utf-8") as f:
+                vision_config = Qwen2_5_VLVisionConfig(**json.load(f))
+            with open(QWEN_IMAGE_CONFIG_FILE, "r", encoding="utf-8") as f:
+                text_config = Qwen2_5_VLConfig(**json.load(f))
+            encoder = Qwen2_5_VLForConditionalGeneration.from_state_dict(
+                state_dicts.encoder,
+                vision_config=vision_config,
+                config=text_config,
+                device=("cpu" if config.use_fsdp else init_device),
+                dtype=config.encoder_dtype,
+            )
         with open(QWEN_IMAGE_VAE_CONFIG_FILE, "r", encoding="utf-8") as f:
             vae_config = json.load(f)
         vae = QwenImageVAE.from_state_dict(

{diffsynth_engine-0.6.1.dev20.dist-info → diffsynth_engine-0.6.1.dev22.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diffsynth_engine
-Version: 0.6.1.dev20
+Version: 0.6.1.dev22
 Author: MuseAI x ModelScope
 Classifier: Programming Language :: Python :: 3
 Classifier: Operating System :: OS Independent

{diffsynth_engine-0.6.1.dev20.dist-info → diffsynth_engine-0.6.1.dev22.dist-info}/RECORD RENAMED Viewed

@@ -80,7 +80,7 @@ diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer.json,sha256=bhl7TT29cdoU
 diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer_config.json,sha256=7Zo6iw-qcacKMoR-BDX-A25uES1N9O23u0ipIeNE3AU,61728
 diffsynth_engine/configs/__init__.py,sha256=f6Y-j_ZQs7bM4Lr7Mh9CXFEBrSNLc9k5GJyJqjLAGiY,1187
 diffsynth_engine/configs/controlnet.py,sha256=f3vclyP3lcAjxDGD9C1vevhqqQ7W2LL_c6Wye0uxk3Q,1180
-diffsynth_engine/configs/pipeline.py,sha256=u4P0JnzSsvS_tfbTYyUARdT88k7TEGRYNqjaAPZlY40,13223
+diffsynth_engine/configs/pipeline.py,sha256=FwHIvj2VdxtmiHxOUYoAzs5YVBprxobhV9AJ2CFrV4c,13254
 diffsynth_engine/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 diffsynth_engine/models/__init__.py,sha256=8Ze7cSE8InetgXWTNb0neVA2Q44K7WlE-h7O-02m2sY,119
 diffsynth_engine/models/base.py,sha256=BA5vgMqfy_cjuL2OtXbrFD-Qg5xQnaumHpj5TabwSy8,2559
@@ -140,9 +140,9 @@ diffsynth_engine/models/wan/wan_text_encoder.py,sha256=OERlmwOqthAFPNnnT2sXJ4Ojy
 diffsynth_engine/models/wan/wan_vae.py,sha256=dC7MoUFeXRL7SIY0LG1OOUiZW-pp9IbXCghutMxpXr4,38889
 diffsynth_engine/pipelines/__init__.py,sha256=jh-4LSJ0vqlXiT8BgFgRIQxuAr2atEPyHrxXWj-Ud1U,604
 diffsynth_engine/pipelines/base.py,sha256=BWW7LW0E2qwu8G-6bP3nmeO7VCQxC8srOo8tE4aKA4o,14993
-diffsynth_engine/pipelines/flux_image.py,sha256=Dpy8AkwywuLAhvJ6cjg5TgzhSUgFQtv6p2JTTkzUHbo,50919
+diffsynth_engine/pipelines/flux_image.py,sha256=vJKvnYmeeQVX2O1Zjtm4NLrltBp66VSZ-KjAUqJ8zJ8,50936
 diffsynth_engine/pipelines/hunyuan3d_shape.py,sha256=TNV0Wr09Dj2bzzlpua9WioCClOj3YiLfE6utI9aWL8A,8164
-diffsynth_engine/pipelines/qwen_image.py,sha256=jt4rg-U5qWsFD0kUeDwKzgIiTAC80Cj8aq1YQOR1_-k,33052
+diffsynth_engine/pipelines/qwen_image.py,sha256=rksB8tiAEp9TIcLLca269dNFQRPIDxffThKRMuR06A0,33280
 diffsynth_engine/pipelines/sd_image.py,sha256=nr-Nhsnomq8CsUqhTM3i2l2zG01YjwXdfRXgr_bC3F0,17891
 diffsynth_engine/pipelines/sdxl_image.py,sha256=v7ZACGPb6EcBunL6e5E9jynSQjE7GQx8etEV-ZLP91g,21704
 diffsynth_engine/pipelines/utils.py,sha256=lk7sFGEk-fGjgadLpwwppHKG-yZ0RC-4ZmHW7pRRe8A,473
@@ -185,8 +185,8 @@ diffsynth_engine/utils/video.py,sha256=8FCaeqIdUsWMgWI_6SO9SPynsToGcLCQAVYFTc4CD
 diffsynth_engine/utils/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 diffsynth_engine/utils/memory/linear_regression.py,sha256=oW_EQEw13oPoyUrxiL8A7Ksa5AuJ2ynI2qhCbfAuZbg,3930
 diffsynth_engine/utils/memory/memory_predcit_model.py,sha256=EXprSl_zlVjgfMWNXP-iw83Ot3hyMcgYaRPv-dvyL84,3943
-diffsynth_engine-0.6.1.dev20.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
-diffsynth_engine-0.6.1.dev20.dist-info/METADATA,sha256=JZJRwz1ckJI0aMe_StTIj8LGkSYg8rRMQvqUsvqgx3s,1164
-diffsynth_engine-0.6.1.dev20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-diffsynth_engine-0.6.1.dev20.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
-diffsynth_engine-0.6.1.dev20.dist-info/RECORD,,
+diffsynth_engine-0.6.1.dev22.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
+diffsynth_engine-0.6.1.dev22.dist-info/METADATA,sha256=K5yUarSjYpbjDvqrG1i7rKrY2r1ILPkcuNupcMTDsvY,1164
+diffsynth_engine-0.6.1.dev22.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+diffsynth_engine-0.6.1.dev22.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
+diffsynth_engine-0.6.1.dev22.dist-info/RECORD,,

{diffsynth_engine-0.6.1.dev20.dist-info → diffsynth_engine-0.6.1.dev22.dist-info}/WHEEL RENAMED Viewed

File without changes

{diffsynth_engine-0.6.1.dev20.dist-info → diffsynth_engine-0.6.1.dev22.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{diffsynth_engine-0.6.1.dev20.dist-info → diffsynth_engine-0.6.1.dev22.dist-info}/top_level.txt RENAMED Viewed

File without changes

diffsynth-engine 0.6.1.dev20__py3-none-any.whl → 0.6.1.dev22__py3-none-any.whl

diffsynth-engine 0.6.1.dev20py3-none-any.whl → 0.6.1.dev22py3-none-any.whl