PyPI - diffsynth-engine - Versions diffs - 0.3.3__tar.gz → 0.3.4__tar.gz - Mend

diffsynth-engine 0.3.3tar.gz → 0.3.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (160) hide show

{diffsynth_engine-0.3.3 → diffsynth_engine-0.3.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diffsynth_engine
-Version: 0.3.3
+Version: 0.3.4
 Author: MuseAI x ModelScope
 Classifier: Programming Language :: Python :: 3
 Classifier: Operating System :: OS Independent

{diffsynth_engine-0.3.3 → diffsynth_engine-0.3.4}/README.md RENAMED Viewed

@@ -81,6 +81,7 @@ If you have any questions or feedback, please scan the QR code below, or send em
 We welcome contributions to DiffSynth-Engine. After Install from source, we recommand developers install this project using following command to setup the development environment.
 ```bash
 pip install -e '.[dev]'
+pre-commit install
 ```
 TODO: Please refer to [CONTRIBUTING.md](./CONTRIBUTING.md) for more details.

{diffsynth_engine-0.3.3 → diffsynth_engine-0.3.4}/diffsynth_engine/__init__.py RENAMED Viewed

@@ -10,6 +10,8 @@ from .pipelines import (
     ControlNetParams,
 )
 from .models.flux import FluxControlNet, FluxIPAdapter, FluxRedux
+from .models.sd import SDControlNet
+from .models.sdxl import SDXLControlNetUnion
 from .utils.download import fetch_model, fetch_modelscope_model, fetch_civitai_model
 from .utils.video import load_video, save_video
 from .tools import (
@@ -25,6 +27,8 @@ __all__ = [
     "FluxControlNet",
     "FluxIPAdapter",
     "FluxRedux",
+    "SDControlNet",
+    "SDXLControlNetUnion",
     "SDXLImagePipeline",
     "SDImagePipeline",
     "WanVideoPipeline",

{diffsynth_engine-0.3.3 → diffsynth_engine-0.3.4}/diffsynth_engine/models/base.py RENAMED Viewed

@@ -14,6 +14,7 @@ class StateDictConverter:
 class PreTrainedModel(nn.Module):
     converter = StateDictConverter()
+    _supports_parallelization = False
     def load_state_dict(self, state_dict: Dict[str, torch.Tensor], strict: bool = True, assign: bool = False):
         state_dict = self.converter.convert(state_dict)
@@ -55,6 +56,12 @@ class PreTrainedModel(nn.Module):
             if isinstance(module, (LoRALinear, LoRAConv2d)):
                 module.clear()
+    def get_tp_plan(self):
+        raise NotImplementedError(f"{self.__class__.__name__} does not support TP")
+    def get_fsdp_modules(self):
+        raise NotImplementedError(f"{self.__class__.__name__} does not support FSDP")
 def split_suffix(name: str):
     suffix_list = [

{diffsynth_engine-0.3.3 → diffsynth_engine-0.3.4}/diffsynth_engine/models/basic/attention.py RENAMED Viewed

@@ -62,7 +62,7 @@ if SAGE_ATTN_AVAILABLE:
 if SPARGE_ATTN_AVAILABLE:
     from spas_sage_attn import spas_sage2_attn_meansim_cuda
-    def sparge_attn(self, q, k, v, attn_mask=None, scale=None):
+    def sparge_attn(q, k, v, attn_mask=None, scale=None):
         q = q.transpose(1, 2)
         k = k.transpose(1, 2)
         v = v.transpose(1, 2)

{diffsynth_engine-0.3.3 → diffsynth_engine-0.3.4}/diffsynth_engine/models/basic/lora.py RENAMED Viewed

@@ -131,8 +131,7 @@ class LoRALinear(nn.Linear):
         self._lora_dict.clear()
         self._frozen_lora_list = []
         if self._original_weight is not None:
-            self.weight.data = self._original_weight
-            self._original_weight = None
+            self.weight.data.copy_(self._original_weight)
     def forward(self, x):
         w_x = super().forward(x)

{diffsynth_engine-0.3.3 → diffsynth_engine-0.3.4}/diffsynth_engine/models/flux/flux_dit.py RENAMED Viewed

@@ -18,7 +18,12 @@ from diffsynth_engine.models.utils import no_init_weights
 from diffsynth_engine.utils.gguf import gguf_inference
 from diffsynth_engine.utils.fp8_linear import fp8_inference
 from diffsynth_engine.utils.constants import FLUX_DIT_CONFIG_FILE
-from diffsynth_engine.utils.parallel import sequence_parallel, sequence_parallel_unshard
+from diffsynth_engine.utils.parallel import (
+    cfg_parallel,
+    cfg_parallel_unshard,
+    sequence_parallel,
+    sequence_parallel_unshard,
+)
 from diffsynth_engine.utils import logging
@@ -151,7 +156,7 @@ class FluxDiTStateDictConverter(StateDictConverter):
             state_dict = self._from_diffusers(state_dict)
             logger.info("use diffusers format state dict")
         else:
-            logger.info("user diffsynth format state dict")
+            logger.info("use diffsynth format state dict")
         return state_dict
@@ -323,12 +328,12 @@ class FluxSingleTransformerBlock(nn.Module):
 class FluxDiT(PreTrainedModel):
     converter = FluxDiTStateDictConverter()
+    _supports_parallelization = True
     def __init__(
         self,
         in_channel: int = 64,
         attn_impl: Optional[str] = None,
-        use_usp: bool = False,
         device: str = "cuda:0",
         dtype: torch.dtype = torch.bfloat16,
     ):
@@ -354,8 +359,6 @@ class FluxDiT(PreTrainedModel):
         self.final_norm_out = AdaLayerNorm(3072, device=device, dtype=dtype)
         self.final_proj_out = nn.Linear(3072, 64, device=device, dtype=dtype)
-        self.use_usp = use_usp
     def patchify(self, hidden_states):
         hidden_states = rearrange(hidden_states, "B C (H P) (W Q) -> B (H W) (C P Q)", P=2, Q=2)
         return hidden_states
@@ -398,6 +401,8 @@ class FluxDiT(PreTrainedModel):
         **kwargs,
     ):
         h, w = hidden_states.shape[-2:]
+        if image_ids is None:
+            image_ids = self.prepare_image_ids(hidden_states)
         controlnet_double_block_output = (
             controlnet_double_block_output if controlnet_double_block_output is not None else ()
         )
@@ -406,10 +411,26 @@ class FluxDiT(PreTrainedModel):
         )
         fp8_linear_enabled = getattr(self, "fp8_linear_enabled", False)
-        with fp8_inference(fp8_linear_enabled), gguf_inference():
-            if image_ids is None:
-                image_ids = self.prepare_image_ids(hidden_states)
+        use_cfg = hidden_states.shape[0] > 1
+        with (
+            fp8_inference(fp8_linear_enabled),
+            gguf_inference(),
+            cfg_parallel(
+                (
+                    hidden_states,
+                    timestep,
+                    prompt_emb,
+                    pooled_prompt_emb,
+                    image_emb,
+                    guidance,
+                    text_ids,
+                    image_ids,
+                    *controlnet_double_block_output,
+                    *controlnet_single_block_output,
+                ),
+                use_cfg=use_cfg,
+            ),
+        ):
             # warning: keep the order of time_embedding + guidance_embedding + pooled_text_embedding
             # addition of floating point numbers does not meet commutative law
             conditioning = self.time_embedder(timestep, hidden_states.dtype)
@@ -439,7 +460,6 @@ class FluxDiT(PreTrainedModel):
                     *(1 for _ in controlnet_double_block_output),
                     *(1 for _ in controlnet_single_block_output),
                 ),
-                enabled=self.use_usp,
             ):
                 hidden_states = self.x_embedder(hidden_states)
                 prompt_emb = self.context_embedder(prompt_emb)
@@ -465,6 +485,7 @@ class FluxDiT(PreTrainedModel):
                 (hidden_states,) = sequence_parallel_unshard((hidden_states,), seq_dims=(1,), seq_lens=(h * w // 4,))
             hidden_states = self.unpatchify(hidden_states, h, w)
+            (hidden_states,) = cfg_parallel_unshard((hidden_states,), use_cfg=use_cfg)
             return hidden_states
     @classmethod
@@ -475,7 +496,6 @@ class FluxDiT(PreTrainedModel):
         dtype: torch.dtype,
         in_channel: int = 64,
         attn_impl: Optional[str] = None,
-        use_usp: bool = False,
     ):
         with no_init_weights():
             model = torch.nn.utils.skip_init(
@@ -484,9 +504,11 @@ class FluxDiT(PreTrainedModel):
                 dtype=dtype,
                 in_channel=in_channel,
                 attn_impl=attn_impl,
-                use_usp=use_usp,
             )
             model = model.requires_grad_(False)  # for loading gguf
         model.load_state_dict(state_dict, assign=True)
         model.to(device=device, dtype=dtype, non_blocking=True)
         return model
+    def get_fsdp_modules(self):
+        return ["blocks", "single_blocks"]

{diffsynth_engine-0.3.3 → diffsynth_engine-0.3.4}/diffsynth_engine/models/flux/flux_text_encoder.py RENAMED Viewed

@@ -37,7 +37,7 @@ class FluxTextEncoder1StateDictConverter(StateDictConverter):
             state_dict = self._from_diffusers(state_dict)
             logger.info("use diffusers format state dict")
         else:
-            logger.info("user diffsynth format state dict")
+            logger.info("use diffsynth format state dict")
         return state_dict

{diffsynth_engine-0.3.3 → diffsynth_engine-0.3.4}/diffsynth_engine/models/sd/__init__.py RENAMED Viewed

@@ -1,12 +1,14 @@
 from .sd_text_encoder import SDTextEncoder, config as sd_text_encoder_config
 from .sd_unet import SDUNet, config as sd_unet_config
 from .sd_vae import SDVAEDecoder, SDVAEEncoder
+from .sd_controlnet import SDControlNet
 __all__ = [
     "SDTextEncoder",
     "SDUNet",
     "SDVAEDecoder",
     "SDVAEEncoder",
+    "SDControlNet",
     "sd_text_encoder_config",
     "sd_unet_config",
 ]

diffsynth-engine 0.3.3__tar.gz → 0.3.4__tar.gz

diffsynth-engine 0.3.3tar.gz → 0.3.4tar.gz