PyPI - diffsynth-engine - Versions diffs - 0.2.4__tar.gz → 0.2.6__tar.gz - Mend

diffsynth-engine 0.2.4tar.gz → 0.2.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (157) hide show

{diffsynth_engine-0.2.4 → diffsynth_engine-0.2.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diffsynth_engine
-Version: 0.2.4
+Version: 0.2.6
 Author: MuseAI x ModelScope
 Classifier: Programming Language :: Python :: 3
 Classifier: Operating System :: OS Independent
@@ -23,6 +23,7 @@ Requires-Dist: torchsde
 Requires-Dist: pillow
 Requires-Dist: imageio[ffmpeg]
 Requires-Dist: yunchang; sys_platform == "linux"
+Requires-Dist: onnxruntime
 Provides-Extra: dev
 Requires-Dist: diffusers==0.31.0; extra == "dev"
 Requires-Dist: transformers==4.45.2; extra == "dev"

{diffsynth_engine-0.2.4 → diffsynth_engine-0.2.6}/README.md RENAMED Viewed

@@ -45,7 +45,7 @@ Text to image
 ```python
 from diffsynth_engine import fetch_model, FluxImagePipeline
-model_path = fetch_model("muse/flux-with-vae", path="flux_with_vae.safetensors")
+model_path = fetch_model("muse/flux-with-vae", path="flux1-dev-with-vae.safetensors")
 pipe = FluxImagePipeline.from_pretrained(model_path, device='cuda:0')
 image = pipe(prompt="a cat")
 image.save("image.png")
@@ -54,7 +54,7 @@ Text to image with LoRA
 ```python
 from diffsynth_engine import fetch_model, FluxImagePipeline
-model_path = fetch_model("muse/flux-with-vae", path="flux_with_vae.safetensors")
+model_path = fetch_model("muse/flux-with-vae", path="flux1-dev-with-vae.safetensors")
 lora_path = fetch_model("DonRat/MAJICFLUS_SuperChinesestyleheongsam", path="麦橘超国风旗袍.safetensors")
 pipe = FluxImagePipeline.from_pretrained(model_path, device='cuda:0')
@@ -77,6 +77,13 @@ If you have any questions or feedback, please scan the QR code below, or send em
     <img src="assets/dingtalk.png" alt="dingtalk" width="400" />
 </div>
+## Contributing
+We welcome contributions to DiffSynth-Engine. After Install from source, we recommand developers install this project using following command to setup the development environment.
+```bash
+pip install -e '.[dev]'
+```
+TODO: Please refer to [CONTRIBUTING.md](./CONTRIBUTING.md) for more details.
 ## License
 This project is licensed under the Apache License 2.0. See the LICENSE file for details.

{diffsynth_engine-0.2.4 → diffsynth_engine-0.2.6}/diffsynth_engine/models/basic/attention.py RENAMED Viewed

@@ -201,10 +201,8 @@ def long_context_attention(
     assert attn_impl in [
         None,
         "auto",
-        "eager",
         "flash_attn_2",
         "flash_attn_3",
-        "xformers",
         "sdpa",
         "sage_attn",
         "sparge_attn",

{diffsynth_engine-0.2.4 → diffsynth_engine-0.2.6}/diffsynth_engine/models/flux/flux_dit.py RENAMED Viewed

@@ -13,11 +13,12 @@ from diffsynth_engine.models.basic.transformer_helper import (
 )
 from diffsynth_engine.models.basic.timestep import TimestepEmbeddings
 from diffsynth_engine.models.base import PreTrainedModel, StateDictConverter
+from diffsynth_engine.models.basic import attention as attention_ops
 from diffsynth_engine.models.utils import no_init_weights
 from diffsynth_engine.utils.gguf import gguf_inference
 from diffsynth_engine.utils.fp8_linear import fp8_inference
 from diffsynth_engine.utils.constants import FLUX_DIT_CONFIG_FILE
-from diffsynth_engine.models.basic.attention import attention
+from diffsynth_engine.utils.parallel import sequence_parallel, sequence_parallel_unshard
 from diffsynth_engine.utils import logging
@@ -198,7 +199,7 @@ class FluxDoubleAttention(nn.Module):
         k = torch.cat([self.norm_k_b(k_b), self.norm_k_a(k_a)], dim=1)
         v = torch.cat([v_b, v_a], dim=1)
         q, k = apply_rope(q, k, rope_emb)
-        attn_out = attention(q, k, v, attn_impl=self.attn_impl)
+        attn_out = attention_ops.attention(q, k, v, attn_impl=self.attn_impl)
         attn_out = rearrange(attn_out, "b s h d -> b s (h d)").to(q.dtype)
         text_out, image_out = attn_out[:, : text.shape[1]], attn_out[:, text.shape[1] :]
         image_out, text_out = self.attention_callback(
@@ -286,7 +287,7 @@ class FluxSingleAttention(nn.Module):
     def forward(self, x, rope_emb, image_emb):
         q, k, v = rearrange(self.to_qkv(x), "b s (h d) -> b s h d", h=(3 * self.num_heads)).chunk(3, dim=2)
         q, k = apply_rope(self.norm_q_a(q), self.norm_k_a(k), rope_emb)
-        attn_out = attention(q, k, v, attn_impl=self.attn_impl)
+        attn_out = attention_ops.attention(q, k, v, attn_impl=self.attn_impl)
         attn_out = rearrange(attn_out, "b s h d -> b s (h d)").to(q.dtype)
         return self.attention_callback(attn_out=attn_out, x=x, q=q, k=k, v=v, rope_emb=rope_emb, image_emb=image_emb)
@@ -322,7 +323,9 @@ class FluxDiT(PreTrainedModel):
     def __init__(
         self,
+        in_channel: int = 64,
         attn_impl: Optional[str] = None,
+        use_usp: bool = False,
         device: str = "cuda:0",
         dtype: torch.dtype = torch.bfloat16,
     ):
@@ -336,7 +339,8 @@ class FluxDiT(PreTrainedModel):
             nn.Linear(3072, 3072, device=device, dtype=dtype),
         )
         self.context_embedder = nn.Linear(4096, 3072, device=device, dtype=dtype)
-        self.x_embedder = nn.Linear(64, 3072, device=device, dtype=dtype)
+        # normal flux has 64 channels, bfl canny and depth has 128 channels, bfl fill has 384 channels, bfl redux has 64 channels
+        self.x_embedder = nn.Linear(in_channel, 3072, device=device, dtype=dtype)
         self.blocks = nn.ModuleList(
             [FluxDoubleTransformerBlock(3072, 24, attn_impl=attn_impl, device=device, dtype=dtype) for _ in range(19)]
@@ -347,6 +351,8 @@ class FluxDiT(PreTrainedModel):
         self.final_norm_out = AdaLayerNorm(3072, device=device, dtype=dtype)
         self.final_proj_out = nn.Linear(3072, 64, device=device, dtype=dtype)
+        self.use_usp = use_usp
     def patchify(self, hidden_states):
         hidden_states = rearrange(hidden_states, "B C (H P) (W Q) -> B (H W) (C P Q)", P=2, Q=2)
         return hidden_states
@@ -357,7 +363,8 @@ class FluxDiT(PreTrainedModel):
         )
         return hidden_states
-    def prepare_image_ids(self, latents):
+    @staticmethod
+    def prepare_image_ids(latents: torch.Tensor):
         batch_size, _, height, width = latents.shape
         latent_image_ids = torch.zeros(height // 2, width // 2, 3)
         latent_image_ids[..., 1] = latent_image_ids[..., 1] + torch.arange(height // 2)[:, None]
@@ -387,7 +394,14 @@ class FluxDiT(PreTrainedModel):
         controlnet_single_block_output=None,
         **kwargs,
     ):
-        height, width = hidden_states.shape[-2:]
+        h, w = hidden_states.shape[-2:]
+        controlnet_double_block_output = (
+            controlnet_double_block_output if controlnet_double_block_output is not None else ()
+        )
+        controlnet_single_block_output = (
+            controlnet_single_block_output if controlnet_single_block_output is not None else ()
+        )
         fp8_linear_enabled = getattr(self, "fp8_linear_enabled", False)
         with fp8_inference(fp8_linear_enabled), gguf_inference():
             if image_ids is None:
@@ -400,28 +414,54 @@ class FluxDiT(PreTrainedModel):
                 guidance = guidance * 1000
                 conditioning += self.guidance_embedder(guidance, hidden_states.dtype)
             conditioning += self.pooled_text_embedder(pooled_prompt_emb)
-            prompt_emb = self.context_embedder(prompt_emb)
             rope_emb = self.pos_embedder(torch.cat((text_ids, image_ids), dim=1))
+            text_rope_emb = rope_emb[:, :, : text_ids.size(1)]
+            image_rope_emb = rope_emb[:, :, text_ids.size(1) :]
             hidden_states = self.patchify(hidden_states)
-            hidden_states = self.x_embedder(hidden_states)
-            for i, block in enumerate(self.blocks):
-                hidden_states, prompt_emb = block(hidden_states, prompt_emb, conditioning, rope_emb, image_emb)
-                if controlnet_double_block_output is not None:
-                    interval_control = len(self.blocks) / len(controlnet_double_block_output)
-                    interval_control = int(np.ceil(interval_control))
-                    hidden_states = hidden_states + controlnet_double_block_output[i // interval_control]
-            hidden_states = torch.cat([prompt_emb, hidden_states], dim=1)
-            for i, block in enumerate(self.single_blocks):
-                hidden_states = block(hidden_states, conditioning, rope_emb, image_emb)
-                if controlnet_single_block_output is not None:
-                    interval_control = len(self.single_blocks) / len(controlnet_double_block_output)
-                    interval_control = int(np.ceil(interval_control))
-                    hidden_states = hidden_states + controlnet_single_block_output[i // interval_control]
-            hidden_states = hidden_states[:, prompt_emb.shape[1] :]
-            hidden_states = self.final_norm_out(hidden_states, conditioning)
-            hidden_states = self.final_proj_out(hidden_states)
-            hidden_states = self.unpatchify(hidden_states, height, width)
+            with sequence_parallel(
+                (
+                    hidden_states,
+                    prompt_emb,
+                    text_rope_emb,
+                    image_rope_emb,
+                    *controlnet_double_block_output,
+                    *controlnet_single_block_output,
+                ),
+                seq_dims=(
+                    1,
+                    1,
+                    2,
+                    2,
+                    *(1 for _ in controlnet_double_block_output),
+                    *(1 for _ in controlnet_single_block_output),
+                ),
+                enabled=self.use_usp,
+            ):
+                hidden_states = self.x_embedder(hidden_states)
+                prompt_emb = self.context_embedder(prompt_emb)
+                rope_emb = torch.cat((text_rope_emb, image_rope_emb), dim=2)
+                for i, block in enumerate(self.blocks):
+                    hidden_states, prompt_emb = block(hidden_states, prompt_emb, conditioning, rope_emb, image_emb)
+                    if len(controlnet_double_block_output) > 0:
+                        interval_control = len(self.blocks) / len(controlnet_double_block_output)
+                        interval_control = int(np.ceil(interval_control))
+                        hidden_states = hidden_states + controlnet_double_block_output[i // interval_control]
+                hidden_states = torch.cat([prompt_emb, hidden_states], dim=1)
+                for i, block in enumerate(self.single_blocks):
+                    hidden_states = block(hidden_states, conditioning, rope_emb, image_emb)
+                    if len(controlnet_single_block_output) > 0:
+                        interval_control = len(self.single_blocks) / len(controlnet_double_block_output)
+                        interval_control = int(np.ceil(interval_control))
+                        hidden_states = hidden_states + controlnet_single_block_output[i // interval_control]
+                hidden_states = hidden_states[:, prompt_emb.shape[1] :]
+                hidden_states = self.final_norm_out(hidden_states, conditioning)
+                hidden_states = self.final_proj_out(hidden_states)
+                (hidden_states,) = sequence_parallel_unshard((hidden_states,), seq_dims=(1,), seq_lens=(h * w // 4,))
+            hidden_states = self.unpatchify(hidden_states, h, w)
             return hidden_states
     @classmethod
@@ -430,14 +470,18 @@ class FluxDiT(PreTrainedModel):
         state_dict: Dict[str, torch.Tensor],
         device: str,
         dtype: torch.dtype,
+        in_channel: int = 64,
         attn_impl: Optional[str] = None,
+        use_usp: bool = False,
     ):
         with no_init_weights():
             model = torch.nn.utils.skip_init(
                 cls,
                 device=device,
                 dtype=dtype,
+                in_channel=in_channel,
                 attn_impl=attn_impl,
+                use_usp=use_usp,
             )
             model = model.requires_grad_(False)  # for loading gguf
         model.load_state_dict(state_dict, assign=True)

{diffsynth_engine-0.2.4 → diffsynth_engine-0.2.6}/diffsynth_engine/models/utils.py RENAMED Viewed

@@ -2,7 +2,6 @@ import torch
 import torch.nn as nn
 from contextlib import contextmanager
 # mofified from transformers.modeling_utils
 TORCH_INIT_FUNCTIONS = {
     "uniform_": nn.init.uniform_,

{diffsynth_engine-0.2.4 → diffsynth_engine-0.2.6}/diffsynth_engine/models/vae/vae.py RENAMED Viewed

@@ -167,6 +167,8 @@ class VAEDecoder(PreTrainedModel):
         self.conv_norm_out = nn.GroupNorm(num_channels=128, num_groups=32, eps=1e-6, device=device, dtype=dtype)
         self.conv_act = nn.SiLU()
         self.conv_out = nn.Conv2d(128, 3, kernel_size=3, padding=1, device=device, dtype=dtype)
+        self.device = device
+        self.dtype = dtype
     def forward(self, sample, tiled=False, tile_size=64, tile_stride=32, **kwargs):
         original_dtype = sample.dtype
@@ -277,6 +279,8 @@ class VAEEncoder(PreTrainedModel):
         self.conv_norm_out = nn.GroupNorm(num_channels=512, num_groups=32, eps=1e-6, device=device, dtype=dtype)
         self.conv_act = nn.SiLU()
         self.conv_out = nn.Conv2d(512, 2 * latent_channels, kernel_size=3, padding=1, device=device, dtype=dtype)
+        self.device = device
+        self.dtype = dtype
     def forward(self, sample, tiled=False, tile_size=64, tile_stride=32, **kwargs):
         original_dtype = sample.dtype

{diffsynth_engine-0.2.4 → diffsynth_engine-0.2.6}/diffsynth_engine/models/wan/wan_dit.py RENAMED Viewed

@@ -2,12 +2,11 @@ import math
 import json
 import torch
 import torch.nn as nn
-import torch.distributed as dist
 from typing import Tuple, Optional
 from einops import rearrange
 from diffsynth_engine.models.base import StateDictConverter, PreTrainedModel
-from diffsynth_engine.models.basic.attention import attention, long_context_attention
+from diffsynth_engine.models.basic import attention as attention_ops
 from diffsynth_engine.models.basic.transformer_helper import RMSNorm
 from diffsynth_engine.models.utils import no_init_weights
 from diffsynth_engine.utils.constants import (
@@ -17,11 +16,7 @@ from diffsynth_engine.utils.constants import (
     WAN_DIT_14B_FLF2V_CONFIG_FILE,
 )
 from diffsynth_engine.utils.gguf import gguf_inference
-from diffsynth_engine.utils.parallel import (
-    get_sp_group,
-    get_sp_world_size,
-    get_sp_rank,
-)
+from diffsynth_engine.utils.parallel import sequence_parallel, sequence_parallel_unshard
 T5_TOKEN_NUM = 512
 FLF_TOKEN_NUM = 257 * 2
@@ -90,20 +85,12 @@ class SelfAttention(nn.Module):
         q = rearrange(q, "b s (n d) -> b s n d", n=num_heads)
         k = rearrange(k, "b s (n d) -> b s n d", n=num_heads)
         v = rearrange(v, "b s (n d) -> b s n d", n=num_heads)
-        if getattr(self, "use_usp", False):
-            x = long_context_attention(
-                q=rope_apply(q, freqs),
-                k=rope_apply(k, freqs),
-                v=v,
-                attn_impl=self.attn_impl,
-            )
-        else:
-            x = attention(
-                q=rope_apply(q, freqs),
-                k=rope_apply(k, freqs),
-                v=v,
-                attn_impl=self.attn_impl,
-            )
+        x = attention_ops.attention(
+            q=rope_apply(q, freqs),
+            k=rope_apply(k, freqs),
+            v=v,
+            attn_impl=self.attn_impl,
+        )
         x = x.flatten(2)
         return self.o(x)
@@ -148,12 +135,12 @@ class CrossAttention(nn.Module):
         k = rearrange(k, "b s (n d) -> b s n d", n=num_heads)
         v = rearrange(v, "b s (n d) -> b s n d", n=num_heads)
-        x = attention(q, k, v, attn_impl=self.attn_impl).flatten(2)
+        x = attention_ops.attention(q, k, v, attn_impl=self.attn_impl).flatten(2)
         if self.has_image_input:
             k_img, v_img = self.norm_k_img(self.k_img(img)), self.v_img(img)
             k_img = rearrange(k_img, "b s (n d) -> b s n d", n=num_heads)
             v_img = rearrange(v_img, "b s (n d) -> b s n d", n=num_heads)
-            y = attention(q, k_img, v_img, attn_impl=self.attn_impl).flatten(2)
+            y = attention_ops.attention(q, k_img, v_img, attn_impl=self.attn_impl).flatten(2)
             x = x + y
         return self.o(x)
@@ -316,10 +303,7 @@ class WanDiT(PreTrainedModel):
         if has_image_input:
             self.img_emb = MLP(1280, dim, flf_pos_emb, device=device, dtype=dtype)  # clip_feature_dim = 1280
-        if use_usp:
-            setattr(self, "use_usp", True)
-            for block in self.blocks:
-                setattr(block.self_attn, "use_usp", True)
+        self.use_usp = use_usp
     def patchify(self, x: torch.Tensor):
         x = self.patch_embedding(x)  # b c f h w -> b 4c f h/2 w/2
@@ -368,21 +352,12 @@ class WanDiT(PreTrainedModel):
                 .reshape(f * h * w, 1, -1)
                 .to(x.device)
             )
-            if getattr(self, "use_usp", False):
-                s, p = x.size(1), get_sp_world_size()  # (sequence_length, parallelism)
-                split_size = [s // p + 1 if i < s % p else s // p for i in range(p)]
-                x = torch.split(x, split_size, dim=1)[get_sp_rank()]
-                freqs = torch.split(freqs, split_size, dim=0)[get_sp_rank()]
-            for block in self.blocks:
-                x = block(x, context, t_mod, freqs)
-            x = self.head(x, t)
-            if getattr(self, "use_usp", False):
-                b, d = x.size(0), x.size(2)  # (batch_size, out_dim)
-                xs = [torch.zeros((b, s, d), dtype=x.dtype, device=x.device) for s in split_size]
-                dist.all_gather(xs, x, group=get_sp_group())
-                x = torch.concat(xs, dim=1)
+            with sequence_parallel([x, freqs], seq_dims=(1, 0), enabled=self.use_usp):
+                for block in self.blocks:
+                    x = block(x, context, t_mod, freqs)
+                x = self.head(x, t)
+                (x,) = sequence_parallel_unshard((x,), seq_dims=(1,), seq_lens=(f * h * w,))
             x = self.unpatchify(x, (f, h, w))
             return x

{diffsynth_engine-0.2.4 → diffsynth_engine-0.2.6}/diffsynth_engine/pipelines/base.py RENAMED Viewed

@@ -4,10 +4,11 @@ import numpy as np
 from typing import Dict, List, Tuple
 from PIL import Image
 from dataclasses import dataclass
-from diffsynth_engine.utils.loader import load_file
 from diffsynth_engine.utils.offload import enable_sequential_cpu_offload
 from diffsynth_engine.utils.gguf import load_gguf_checkpoint
 from diffsynth_engine.utils import logging
+from diffsynth_engine.utils.loader import load_file
+from diffsynth_engine.utils.platform import empty_cache
 logger = logging.get_logger(__name__)
@@ -25,14 +26,21 @@ class LoRAStateDictConverter:
 class BasePipeline:
     lora_converter = LoRAStateDictConverter()
-    def __init__(self, vae_tiled, vae_tile_size, vae_tile_stride, device="cuda:0", dtype=torch.float16):
+    def __init__(
+        self,
+        vae_tiled: bool = False,
+        vae_tile_size: int = -1,
+        vae_tile_stride: int = -1,
+        device="cuda:0",
+        dtype=torch.float16,
+    ):
         super().__init__()
-        self.device = device
-        self.dtype = dtype
-        self.offload_mode = None
         self.vae_tiled = vae_tiled
         self.vae_tile_size = vae_tile_size
         self.vae_tile_stride = vae_tile_stride
+        self.device = device
+        self.dtype = dtype
+        self.offload_mode = None
         self.model_names = []
     @classmethod
@@ -144,6 +152,7 @@ class BasePipeline:
         return noise
     def encode_image(self, image: torch.Tensor) -> torch.Tensor:
+        image = image.to(self.device, self.vae_encoder.dtype)
         latents = self.vae_encoder(
             image, tiled=self.vae_tiled, tile_size=self.vae_tile_size, tile_stride=self.vae_tile_stride
         )
@@ -151,8 +160,9 @@ class BasePipeline:
     def decode_image(self, latent: torch.Tensor) -> torch.Tensor:
         vae_dtype = self.vae_decoder.conv_in.weight.dtype
+        latent = latent.to(self.device, vae_dtype)
         image = self.vae_decoder(
-            latent.to(vae_dtype), tiled=self.vae_tiled, tile_size=self.vae_tile_size, tile_stride=self.vae_tile_stride
+            latent, tiled=self.vae_tiled, tile_size=self.vae_tile_size, tile_stride=self.vae_tile_stride
         )
         return image
@@ -196,8 +206,53 @@ class BasePipeline:
                 model.eval()
         return self
-    def enable_fp8_linear(self):
-        raise NotImplementedError()
+    @staticmethod
+    def init_parallel_config(
+        parallelism: int,
+        use_cfg_parallel: bool,
+        model_config: ModelConfig,
+    ):
+        assert parallelism in (2, 4, 8), "parallelism must be 2, 4 or 8"
+        cfg_degree = 2 if use_cfg_parallel else 1
+        sp_ulysses_degree = getattr(model_config, "sp_ulysses_degree", None)
+        sp_ring_degree = getattr(model_config, "sp_ring_degree", None)
+        tp_degree = getattr(model_config, "tp_degree", None)
+        use_fsdp = getattr(model_config, "use_fsdp", False)
+        if tp_degree is not None:
+            assert sp_ulysses_degree is None and sp_ring_degree is None, (
+                "not allowed to enable sequence parallel and tensor parallel together; "
+                "either set sp_ulysses_degree=None, sp_ring_degree=None or set tp_degree=None during pipeline initialization"
+            )
+            assert use_fsdp is False, (
+                "not allowed to enable fully sharded data parallel and tensor parallel together; "
+                "either set use_fsdp=False or set tp_degree=None during pipeline initialization"
+            )
+            assert parallelism == cfg_degree * tp_degree, (
+                f"parallelism ({parallelism}) must be equal to cfg_degree ({cfg_degree}) * tp_degree ({tp_degree})"
+            )
+            sp_ulysses_degree = 1
+            sp_ring_degree = 1
+        elif sp_ulysses_degree is None and sp_ring_degree is None:
+            # use ulysses if not specified
+            sp_ulysses_degree = parallelism // cfg_degree
+            sp_ring_degree = 1
+            tp_degree = 1
+        elif sp_ulysses_degree is not None and sp_ring_degree is not None:
+            assert parallelism == cfg_degree * sp_ulysses_degree * sp_ring_degree, (
+                f"parallelism ({parallelism}) must be equal to cfg_degree ({cfg_degree}) * "
+                f"sp_ulysses_degree ({sp_ulysses_degree}) * sp_ring_degree ({sp_ring_degree})"
+            )
+            tp_degree = 1
+        else:
+            raise ValueError("sp_ulysses_degree and sp_ring_degree must be specified together")
+        return {
+            "cfg_degree": cfg_degree,
+            "sp_ulysses_degree": sp_ulysses_degree,
+            "sp_ring_degree": sp_ring_degree,
+            "tp_degree": tp_degree,
+            "use_fsdp": use_fsdp,
+        }
     @staticmethod
     def validate_offload_mode(offload_mode: str | None):
@@ -233,7 +288,7 @@ class BasePipeline:
             return
         if self.offload_mode == "sequential_cpu_offload":
             # fresh the cuda cache
-            torch.cuda.empty_cache()
+            empty_cache()
             return
         # offload unnecessary models to cpu
@@ -248,4 +303,4 @@ class BasePipeline:
             if model is not None and (p := next(model.parameters(), None)) is not None and p.device != self.device:
                 model.to(self.device)
         # fresh the cuda cache
-        torch.cuda.empty_cache()
+        empty_cache()

diffsynth-engine 0.2.4__tar.gz → 0.2.6__tar.gz

diffsynth-engine 0.2.4tar.gz → 0.2.6tar.gz