PyPI - diffsynth-engine - Versions diffs - 0.3.6.dev9__py3-none-any.whl → 0.3.6.dev11__py3-none-any.whl - Mend

diffsynth-engine 0.3.6.dev9py3-none-any.whl → 0.3.6.dev11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

diffsynth_engine/__init__.py +10 -8
diffsynth_engine/configs/__init__.py +23 -0
diffsynth_engine/configs/controlnet.py +17 -0
diffsynth_engine/configs/pipeline.py +206 -0
diffsynth_engine/models/basic/attention.py +43 -4
diffsynth_engine/models/flux/flux_controlnet.py +8 -5
diffsynth_engine/models/flux/flux_dit.py +22 -16
diffsynth_engine/models/flux/flux_dit_fbcache.py +7 -7
diffsynth_engine/models/flux/flux_ipadapter.py +5 -5
diffsynth_engine/models/sd/sd_controlnet.py +2 -4
diffsynth_engine/models/sdxl/sdxl_controlnet.py +1 -2
diffsynth_engine/models/wan/wan_dit.py +15 -15
diffsynth_engine/pipelines/__init__.py +5 -8
diffsynth_engine/pipelines/base.py +14 -65
diffsynth_engine/pipelines/flux_image.py +85 -158
diffsynth_engine/pipelines/sd_image.py +30 -64
diffsynth_engine/pipelines/sdxl_image.py +39 -71
diffsynth_engine/pipelines/wan_video.py +66 -105
diffsynth_engine/tools/flux_inpainting_tool.py +7 -3
diffsynth_engine/tools/flux_outpainting_tool.py +7 -3
diffsynth_engine/tools/flux_reference_tool.py +21 -5
diffsynth_engine/tools/flux_replace_tool.py +15 -3
diffsynth_engine/utils/parallel.py +1 -1
{diffsynth_engine-0.3.6.dev9.dist-info → diffsynth_engine-0.3.6.dev11.dist-info}/METADATA +1 -1
{diffsynth_engine-0.3.6.dev9.dist-info → diffsynth_engine-0.3.6.dev11.dist-info}/RECORD +28 -25
{diffsynth_engine-0.3.6.dev9.dist-info → diffsynth_engine-0.3.6.dev11.dist-info}/WHEEL +0 -0
{diffsynth_engine-0.3.6.dev9.dist-info → diffsynth_engine-0.3.6.dev11.dist-info}/licenses/LICENSE +0 -0
{diffsynth_engine-0.3.6.dev9.dist-info → diffsynth_engine-0.3.6.dev11.dist-info}/top_level.txt +0 -0

diffsynth_engine/models/sdxl/sdxl_controlnet.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import torch
-from typing import Optional, Dict
+from typing import Dict
 from diffsynth_engine.models.basic.unet_helper import (
     ResnetBlock,
     AttentionBlock,
@@ -180,7 +180,6 @@ class SDXLControlNetUnion(PreTrainedModel):
     def __init__(
         self,
-        attn_impl: Optional[str] = None,
         device: str = "cuda:0",
         dtype: torch.dtype = torch.bfloat16,
     ):

diffsynth_engine/models/wan/wan_dit.py CHANGED Viewed

@@ -2,7 +2,7 @@ import math
 import json
 import torch
 import torch.nn as nn
-from typing import Tuple, Optional
+from typing import Any, Dict, Tuple, Optional
 from einops import rearrange
 from diffsynth_engine.models.base import StateDictConverter, PreTrainedModel
@@ -69,7 +69,7 @@ class SelfAttention(nn.Module):
         dim: int,
         num_heads: int,
         eps: float = 1e-6,
-        attn_impl: Optional[str] = None,
+        attn_kwargs: Optional[Dict[str, Any]] = None,
         device: str = "cuda:0",
         dtype: torch.dtype = torch.bfloat16,
     ):
@@ -82,7 +82,7 @@ class SelfAttention(nn.Module):
         self.o = nn.Linear(dim, dim, device=device, dtype=dtype)
         self.norm_q = RMSNorm(dim, eps=eps, device=device, dtype=dtype)
         self.norm_k = RMSNorm(dim, eps=eps, device=device, dtype=dtype)
-        self.attn_impl = attn_impl
+        self.attn_kwargs = attn_kwargs if attn_kwargs is not None else {}
     def forward(self, x, freqs):
         q, k, v = self.norm_q(self.q(x)), self.norm_k(self.k(x)), self.v(x)
@@ -94,7 +94,7 @@ class SelfAttention(nn.Module):
             q=rope_apply(q, freqs),
             k=rope_apply(k, freqs),
             v=v,
-            attn_impl=self.attn_impl,
+            **self.attn_kwargs,
         )
         x = x.flatten(2)
         return self.o(x)
@@ -107,7 +107,7 @@ class CrossAttention(nn.Module):
         num_heads: int,
         eps: float = 1e-6,
         has_image_input: bool = False,
-        attn_impl: Optional[str] = None,
+        attn_kwargs: Optional[Dict[str, Any]] = None,
         device: str = "cuda:0",
         dtype: torch.dtype = torch.bfloat16,
     ):
@@ -126,7 +126,7 @@ class CrossAttention(nn.Module):
             self.k_img = nn.Linear(dim, dim, device=device, dtype=dtype)
             self.v_img = nn.Linear(dim, dim, device=device, dtype=dtype)
             self.norm_k_img = RMSNorm(dim, eps=eps, device=device, dtype=dtype)
-        self.attn_impl = attn_impl
+        self.attn_kwargs = attn_kwargs if attn_kwargs is not None else {}
     def forward(self, x: torch.Tensor, y: torch.Tensor):
         if self.has_image_input:
@@ -140,12 +140,12 @@ class CrossAttention(nn.Module):
         k = rearrange(k, "b s (n d) -> b s n d", n=num_heads)
         v = rearrange(v, "b s (n d) -> b s n d", n=num_heads)
-        x = attention_ops.attention(q, k, v, attn_impl=self.attn_impl).flatten(2)
+        x = attention_ops.attention(q, k, v, **self.attn_kwargs).flatten(2)
         if self.has_image_input:
             k_img, v_img = self.norm_k_img(self.k_img(img)), self.v_img(img)
             k_img = rearrange(k_img, "b s (n d) -> b s n d", n=num_heads)
             v_img = rearrange(v_img, "b s (n d) -> b s n d", n=num_heads)
-            y = attention_ops.attention(q, k_img, v_img, attn_impl=self.attn_impl).flatten(2)
+            y = attention_ops.attention(q, k_img, v_img, **self.attn_kwargs).flatten(2)
             x = x + y
         return self.o(x)
@@ -158,7 +158,7 @@ class DiTBlock(nn.Module):
         num_heads: int,
         ffn_dim: int,
         eps: float = 1e-6,
-        attn_impl: Optional[str] = None,
+        attn_kwargs: Optional[Dict[str, Any]] = None,
         device: str = "cuda:0",
         dtype: torch.dtype = torch.bfloat16,
     ):
@@ -166,9 +166,9 @@ class DiTBlock(nn.Module):
         self.dim = dim
         self.num_heads = num_heads
         self.ffn_dim = ffn_dim
-        self.self_attn = SelfAttention(dim, num_heads, eps, attn_impl=attn_impl, device=device, dtype=dtype)
+        self.self_attn = SelfAttention(dim, num_heads, eps, attn_kwargs=attn_kwargs, device=device, dtype=dtype)
         self.cross_attn = CrossAttention(
-            dim, num_heads, eps, has_image_input=has_image_input, attn_impl=attn_impl, device=device, dtype=dtype
+            dim, num_heads, eps, has_image_input=has_image_input, attn_kwargs=attn_kwargs, device=device, dtype=dtype
         )
         self.norm1 = nn.LayerNorm(dim, eps=eps, elementwise_affine=False, device=device, dtype=dtype)
         self.norm2 = nn.LayerNorm(dim, eps=eps, elementwise_affine=False, device=device, dtype=dtype)
@@ -265,7 +265,7 @@ class WanDiT(PreTrainedModel):
         num_layers: int,
         has_image_input: bool,
         flf_pos_emb: bool = False,
-        attn_impl: Optional[str] = None,
+        attn_kwargs: Optional[Dict[str, Any]] = None,
         device: str = "cpu",
         dtype: torch.dtype = torch.bfloat16,
     ):
@@ -296,7 +296,7 @@ class WanDiT(PreTrainedModel):
         )
         self.blocks = nn.ModuleList(
             [
-                DiTBlock(has_image_input, dim, num_heads, ffn_dim, eps, attn_impl, device=device, dtype=dtype)
+                DiTBlock(has_image_input, dim, num_heads, ffn_dim, eps, attn_kwargs, device=device, dtype=dtype)
                 for _ in range(num_layers)
             ]
         )
@@ -376,7 +376,7 @@ class WanDiT(PreTrainedModel):
         device,
         dtype,
         model_type="1.3b-t2v",
-        attn_impl: Optional[str] = None,
+        attn_kwargs: Optional[Dict[str, Any]] = None,
         assign=True,
     ):
         if model_type == "1.3b-t2v":
@@ -390,7 +390,7 @@ class WanDiT(PreTrainedModel):
         else:
             raise ValueError(f"Unsupported model type: {model_type}")
         with no_init_weights():
-            model = torch.nn.utils.skip_init(cls, **config, device=device, dtype=dtype, attn_impl=attn_impl)
+            model = torch.nn.utils.skip_init(cls, **config, device=device, dtype=dtype, attn_kwargs=attn_kwargs)
             model = model.requires_grad_(False)
         model.load_state_dict(state_dict, assign=assign)
         model.to(device=device, dtype=dtype)

diffsynth_engine/pipelines/__init__.py CHANGED Viewed

@@ -1,20 +1,17 @@
 from .base import BasePipeline, LoRAStateDictConverter
 from .controlnet_helper import ControlNetParams
-from .flux_image import FluxImagePipeline, FluxModelConfig
-from .sdxl_image import SDXLImagePipeline, SDXLModelConfig
-from .sd_image import SDImagePipeline, SDModelConfig
-from .wan_video import WanVideoPipeline, WanModelConfig
+from .flux_image import FluxImagePipeline
+from .sdxl_image import SDXLImagePipeline
+from .sd_image import SDImagePipeline
+from .wan_video import WanVideoPipeline
 __all__ = [
     "BasePipeline",
     "LoRAStateDictConverter",
     "FluxImagePipeline",
-    "FluxModelConfig",
     "SDXLImagePipeline",
-    "SDXLModelConfig",
     "SDImagePipeline",
-    "SDModelConfig",
     "WanVideoPipeline",
-    "WanModelConfig",
     "ControlNetParams",
 ]

diffsynth_engine/pipelines/base.py CHANGED Viewed

@@ -3,7 +3,7 @@ import torch
 import numpy as np
 from typing import Dict, List, Tuple
 from PIL import Image
-from dataclasses import dataclass
+from diffsynth_engine.configs import BaseConfig
 from diffsynth_engine.utils.offload import enable_sequential_cpu_offload
 from diffsynth_engine.utils.fp8_linear import enable_fp8_autocast
 from diffsynth_engine.utils.gguf import load_gguf_checkpoint
@@ -14,11 +14,6 @@ from diffsynth_engine.utils.platform import empty_cache
 logger = logging.get_logger(__name__)
-@dataclass
-class ModelConfig:
-    pass
 class LoRAStateDictConverter:
     def convert(self, lora_state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
         return {"lora": lora_state_dict}
@@ -30,8 +25,8 @@ class BasePipeline:
     def __init__(
         self,
         vae_tiled: bool = False,
-        vae_tile_size: int = -1,
-        vae_tile_stride: int = -1,
+        vae_tile_size: int | Tuple[int, int] = -1,
+        vae_tile_stride: int | Tuple[int, int] = -1,
         device="cuda",
         dtype=torch.float16,
     ):
@@ -46,13 +41,7 @@ class BasePipeline:
         self._models_offload_params = {}
     @classmethod
-    def from_pretrained(
-        cls,
-        model_path_or_config: str | os.PathLike | ModelConfig,
-        device: str = "cuda",
-        dtype: torch.dtype = torch.float16,
-        offload_mode: str | None = None,
-    ) -> "BasePipeline":
+    def from_pretrained(cls, model_path_or_config: str | BaseConfig) -> "BasePipeline":
         raise NotImplementedError()
     @classmethod
@@ -224,54 +213,6 @@ class BasePipeline:
                 model.eval()
         return self
-    @staticmethod
-    def init_parallel_config(
-        parallelism: int,
-        use_cfg_parallel: bool,
-        model_config: ModelConfig,
-    ):
-        assert parallelism in (2, 4, 8), "parallelism must be 2, 4 or 8"
-        cfg_degree = 2 if use_cfg_parallel else 1
-        sp_ulysses_degree = getattr(model_config, "sp_ulysses_degree", None)
-        sp_ring_degree = getattr(model_config, "sp_ring_degree", None)
-        tp_degree = getattr(model_config, "tp_degree", None)
-        use_fsdp = getattr(model_config, "use_fsdp", False)
-        if tp_degree is not None:
-            assert sp_ulysses_degree is None and sp_ring_degree is None, (
-                "not allowed to enable sequence parallel and tensor parallel together; "
-                "either set sp_ulysses_degree=None, sp_ring_degree=None or set tp_degree=None during pipeline initialization"
-            )
-            assert use_fsdp is False, (
-                "not allowed to enable fully sharded data parallel and tensor parallel together; "
-                "either set use_fsdp=False or set tp_degree=None during pipeline initialization"
-            )
-            assert parallelism == cfg_degree * tp_degree, (
-                f"parallelism ({parallelism}) must be equal to cfg_degree ({cfg_degree}) * tp_degree ({tp_degree})"
-            )
-            sp_ulysses_degree = 1
-            sp_ring_degree = 1
-        elif sp_ulysses_degree is None and sp_ring_degree is None:
-            # use ulysses if not specified
-            sp_ulysses_degree = parallelism // cfg_degree
-            sp_ring_degree = 1
-            tp_degree = 1
-        elif sp_ulysses_degree is not None and sp_ring_degree is not None:
-            assert parallelism == cfg_degree * sp_ulysses_degree * sp_ring_degree, (
-                f"parallelism ({parallelism}) must be equal to cfg_degree ({cfg_degree}) * "
-                f"sp_ulysses_degree ({sp_ulysses_degree}) * sp_ring_degree ({sp_ring_degree})"
-            )
-            tp_degree = 1
-        else:
-            raise ValueError("sp_ulysses_degree and sp_ring_degree must be specified together")
-        return {
-            "cfg_degree": cfg_degree,
-            "sp_ulysses_degree": sp_ulysses_degree,
-            "sp_ring_degree": sp_ring_degree,
-            "tp_degree": tp_degree,
-            "use_fsdp": use_fsdp,
-        }
     def enable_cpu_offload(self, offload_mode: str):
         valid_offload_mode = ("cpu_offload", "sequential_cpu_offload")
         if offload_mode not in valid_offload_mode:
@@ -326,14 +267,22 @@ class BasePipeline:
         for model_name in self.model_names:
             if model_name not in load_model_names:
                 model = getattr(self, model_name)
-                if model is not None and (p := next(model.parameters(), None)) is not None and p.device != torch.device("cpu"):
+                if (
+                    model is not None
+                    and (p := next(model.parameters(), None)) is not None
+                    and p.device != torch.device("cpu")
+                ):
                     param_cache = self._models_offload_params[model_name]
                     for name, param in model.named_parameters(recurse=True):
                         param.data = param_cache[name]
         # load the needed models to device
         for model_name in load_model_names:
             model = getattr(self, model_name)
-            if model is not None and (p := next(model.parameters(), None)) is not None and p.device != torch.device(self.device):
+            if (
+                model is not None
+                and (p := next(model.parameters(), None)) is not None
+                and p.device != torch.device(self.device)
+            ):
                 model.to(self.device)
         # fresh the cuda cache
         empty_cache()

diffsynth-engine 0.3.6.dev9__py3-none-any.whl → 0.3.6.dev11__py3-none-any.whl

diffsynth-engine 0.3.6.dev9py3-none-any.whl → 0.3.6.dev11py3-none-any.whl