PyPI - diffsynth-engine - Versions diffs - 0.6.1.dev27__py3-none-any.whl → 0.6.1.dev29__py3-none-any.whl - Mend

diffsynth-engine 0.6.1.dev27py3-none-any.whl → 0.6.1.dev29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

diffsynth_engine/configs/pipeline.py CHANGED Viewed

@@ -251,6 +251,11 @@ class QwenImagePipelineConfig(AttentionConfig, OptimizationConfig, ParallelConfi
     # override OptimizationConfig
     fbcache_relative_l1_threshold = 0.009
+    # svd
+    use_nunchaku: Optional[bool] = field(default=None, init=False)
+    use_nunchaku_awq: Optional[bool] = field(default=None, init=False)
+    use_nunchaku_attn: Optional[bool] = field(default=None, init=False)
     @classmethod
     def basic_config(
         cls,

diffsynth_engine/models/base.py CHANGED Viewed

@@ -40,7 +40,7 @@ class PreTrainedModel(nn.Module):
     def load_loras(self, lora_args: List[Dict[str, Any]], fused: bool = True):
         for args in lora_args:
-            key = args["name"]
+            key = args["key"]
             module = self.get_submodule(key)
             if not isinstance(module, (LoRALinear, LoRAConv2d)):
                 raise ValueError(f"Unsupported lora key: {key}")

diffsynth_engine/models/basic/lora.py CHANGED Viewed

@@ -132,6 +132,7 @@ class LoRALinear(nn.Linear):
         device: str,
         dtype: torch.dtype,
         save_original_weight: bool = True,
+        **kwargs,
     ):
         if save_original_weight and self._original_weight is None:
             if self.weight.dtype == torch.float8_e4m3fn:

diffsynth_engine/models/basic/lora_nunchaku.py ADDED Viewed

@@ -0,0 +1,221 @@
+import torch
+import torch.nn as nn
+from collections import OrderedDict
+from .lora import LoRA
+from nunchaku.models.linear import AWQW4A16Linear, SVDQW4A4Linear
+from nunchaku.lora.flux.nunchaku_converter import (
+    pack_lowrank_weight,
+    unpack_lowrank_weight,
+)
+class LoRASVDQW4A4Linear(nn.Module):
+    def __init__(
+        self,
+        origin_linear: SVDQW4A4Linear,
+    ):
+        super().__init__()
+        self.origin_linear = origin_linear
+        self.base_rank = self.origin_linear.rank
+        self._lora_dict = OrderedDict()
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.origin_linear(x)
+    def __getattr__(self, name: str):
+        try:
+            return super().__getattr__(name)
+        except AttributeError:
+            return getattr(self.origin_linear, name)
+    def _apply_lora_weights(self, name: str, down: torch.Tensor, up: torch.Tensor, alpha: int, scale: float, rank: int):
+        final_scale = scale * (alpha / rank)
+        up_scaled = (up * final_scale).to(
+            dtype=self.origin_linear.proj_up.dtype, device=self.origin_linear.proj_up.device
+        )
+        down_final = down.to(dtype=self.origin_linear.proj_down.dtype, device=self.origin_linear.proj_down.device)
+        with torch.no_grad():
+            pd_packed = self.origin_linear.proj_down.data
+            pu_packed = self.origin_linear.proj_up.data
+            pd = unpack_lowrank_weight(pd_packed, down=True)
+            pu = unpack_lowrank_weight(pu_packed, down=False)
+            new_proj_down = torch.cat([pd, down_final], dim=0)
+            new_proj_up = torch.cat([pu, up_scaled], dim=1)
+            self.origin_linear.proj_down.data = pack_lowrank_weight(new_proj_down, down=True)
+            self.origin_linear.proj_up.data = pack_lowrank_weight(new_proj_up, down=False)
+        current_total_rank = self.origin_linear.rank
+        self.origin_linear.rank += rank
+        self._lora_dict[name] = {"rank": rank, "alpha": alpha, "scale": scale, "start_idx": current_total_rank}
+    def add_frozen_lora(
+        self,
+        name: str,
+        scale: float,
+        rank: int,
+        alpha: int,
+        up: torch.Tensor,
+        down: torch.Tensor,
+        device: str,
+        dtype: torch.dtype,
+        **kwargs,
+    ):
+        if name in self._lora_dict:
+            raise ValueError(f"LoRA with name '{name}' already exists.")
+        self._apply_lora_weights(name, down, up, alpha, scale, rank)
+    def add_qkv_lora(
+        self,
+        name: str,
+        scale: float,
+        rank: int,
+        alpha: int,
+        q_up: torch.Tensor,
+        q_down: torch.Tensor,
+        k_up: torch.Tensor,
+        k_down: torch.Tensor,
+        v_up: torch.Tensor,
+        v_down: torch.Tensor,
+        device: str,
+        dtype: torch.dtype,
+        **kwargs,
+    ):
+        if name in self._lora_dict:
+            raise ValueError(f"LoRA with name '{name}' already exists.")
+        fused_down = torch.cat([q_down, k_down, v_down], dim=0)
+        fused_rank = 3 * rank
+        out_q, out_k = q_up.shape[0], k_up.shape[0]
+        fused_up = torch.zeros((self.out_features, fused_rank), device=q_up.device, dtype=q_up.dtype)
+        fused_up[:out_q, :rank] = q_up
+        fused_up[out_q : out_q + out_k, rank : 2 * rank] = k_up
+        fused_up[out_q + out_k :, 2 * rank :] = v_up
+        self._apply_lora_weights(name, fused_down, fused_up, alpha, scale, rank)
+    def modify_scale(self, name: str, scale: float):
+        if name not in self._lora_dict:
+            raise ValueError(f"LoRA name {name} not found in {self.__class__.__name__}")
+        info = self._lora_dict[name]
+        old_scale = info["scale"]
+        if old_scale == scale:
+            return
+        if old_scale == 0:
+            scale_factor = 0.0
+        else:
+            scale_factor = scale / old_scale
+        with torch.no_grad():
+            lora_rank = info["rank"]
+            start_idx = info["start_idx"]
+            end_idx = start_idx + lora_rank
+            pu_packed = self.origin_linear.proj_up.data
+            pu = unpack_lowrank_weight(pu_packed, down=False)
+            pu[:, start_idx:end_idx] *= scale_factor
+            self.origin_linear.proj_up.data = pack_lowrank_weight(pu, down=False)
+        self._lora_dict[name]["scale"] = scale
+    def clear(self, release_all_cpu_memory: bool = False):
+        if not self._lora_dict:
+            return
+        with torch.no_grad():
+            pd_packed = self.origin_linear.proj_down.data
+            pu_packed = self.origin_linear.proj_up.data
+            pd = unpack_lowrank_weight(pd_packed, down=True)
+            pu = unpack_lowrank_weight(pu_packed, down=False)
+            pd_reset = pd[: self.base_rank, :].clone()
+            pu_reset = pu[:, : self.base_rank].clone()
+            self.origin_linear.proj_down.data = pack_lowrank_weight(pd_reset, down=True)
+            self.origin_linear.proj_up.data = pack_lowrank_weight(pu_reset, down=False)
+            self.origin_linear.rank = self.base_rank
+        self._lora_dict.clear()
+class LoRAAWQW4A16Linear(nn.Module):
+    def __init__(self, origin_linear: AWQW4A16Linear):
+        super().__init__()
+        self.origin_linear = origin_linear
+        self._lora_dict = OrderedDict()
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        quantized_output = self.origin_linear(x)
+        for name, lora in self._lora_dict.items():
+            quantized_output += lora(x.to(lora.dtype)).to(quantized_output.dtype)
+        return quantized_output
+    def __getattr__(self, name: str):
+        try:
+            return super().__getattr__(name)
+        except AttributeError:
+            return getattr(self.origin_linear, name)
+    def add_lora(
+        self,
+        name: str,
+        scale: float,
+        rank: int,
+        alpha: int,
+        up: torch.Tensor,
+        down: torch.Tensor,
+        device: str,
+        dtype: torch.dtype,
+        **kwargs,
+    ):
+        up_linear = nn.Linear(rank, self.out_features, bias=False, device="meta", dtype=dtype).to_empty(device=device)
+        down_linear = nn.Linear(self.in_features, rank, bias=False, device="meta", dtype=dtype).to_empty(device=device)
+        up_linear.weight.data = up.reshape(self.out_features, rank)
+        down_linear.weight.data = down.reshape(rank, self.in_features)
+        lora = LoRA(scale, rank, alpha, up_linear, down_linear, device, dtype)
+        self._lora_dict[name] = lora
+    def modify_scale(self, name: str, scale: float):
+        if name not in self._lora_dict:
+            raise ValueError(f"LoRA name {name} not found in {self.__class__.__name__}")
+        self._lora_dict[name].scale = scale
+    def add_frozen_lora(self, *args, **kwargs):
+        raise NotImplementedError("Frozen LoRA (merging weights) is not supported for AWQW4A16Linear.")
+    def clear(self, *args, **kwargs):
+        self._lora_dict.clear()
+def patch_nunchaku_model_for_lora(model: nn.Module):
+    def _recursive_patch(module: nn.Module):
+        for name, child_module in module.named_children():
+            replacement = None
+            if isinstance(child_module, AWQW4A16Linear):
+                replacement = LoRAAWQW4A16Linear(child_module)
+            elif isinstance(child_module, SVDQW4A4Linear):
+                replacement = LoRASVDQW4A4Linear(child_module)
+            if replacement:
+                setattr(module, name, replacement)
+            else:
+                _recursive_patch(child_module)
+    _recursive_patch(model)

diffsynth_engine/models/basic/video_sparse_attention.py CHANGED Viewed

@@ -3,10 +3,15 @@ import math
 import functools
 from diffsynth_engine.utils.flag import VIDEO_SPARSE_ATTN_AVAILABLE
-from diffsynth_engine.utils.parallel import get_sp_ulysses_group, get_sp_ring_world_size
+from diffsynth_engine.utils.process_group import get_sp_ulysses_group, get_sp_ring_world_size
+vsa_core = None
 if VIDEO_SPARSE_ATTN_AVAILABLE:
-    from vsa import video_sparse_attn as vsa_core
+    try:
+        from vsa import video_sparse_attn as vsa_core
+    except Exception:
+        vsa_core = None
 VSA_TILE_SIZE = (4, 4, 4)
@@ -171,6 +176,12 @@ def video_sparse_attn(
     variable_block_sizes: torch.LongTensor,
     non_pad_index: torch.LongTensor,
 ):
+    if vsa_core is None:
+        raise RuntimeError(
+            "Video sparse attention (VSA) is not available. "
+            "Please install the 'vsa' package and ensure all its dependencies (including pytest) are installed."
+        )
     q = tile(q, num_tiles, tile_partition_indices, non_pad_index)
     k = tile(k, num_tiles, tile_partition_indices, non_pad_index)
     v = tile(v, num_tiles, tile_partition_indices, non_pad_index)
@@ -212,7 +223,8 @@ def distributed_video_sparse_attn(
 ):
     from yunchang.comm.all_to_all import SeqAllToAll4D
-    assert get_sp_ring_world_size() == 1, "distributed video sparse attention requires ring degree to be 1"
+    ring_world_size = get_sp_ring_world_size()
+    assert ring_world_size == 1, "distributed video sparse attention requires ring degree to be 1"
     sp_ulysses_group = get_sp_ulysses_group()
     q = SeqAllToAll4D.apply(sp_ulysses_group, q, scatter_idx, gather_idx)

diffsynth_engine/models/qwen_image/__init__.py CHANGED Viewed

@@ -11,3 +11,11 @@ __all__ = [
     "Qwen2_5_VLVisionConfig",
     "Qwen2_5_VLConfig",
 ]
+try:
+    from .qwen_image_dit_nunchaku import QwenImageDiTNunchaku
+    __all__.append("QwenImageDiTNunchaku")
+except (ImportError, ModuleNotFoundError):
+    pass

diffsynth_engine/models/qwen_image/qwen_image_dit_nunchaku.py ADDED Viewed

@@ -0,0 +1,341 @@
+import torch
+import torch.nn as nn
+from typing import Any, Dict, List, Tuple, Optional
+from einops import rearrange
+from diffsynth_engine.models.basic import attention as attention_ops
+from diffsynth_engine.models.basic.timestep import TimestepEmbeddings
+from diffsynth_engine.models.basic.transformer_helper import AdaLayerNorm, RMSNorm
+from diffsynth_engine.models.qwen_image.qwen_image_dit import (
+    QwenFeedForward,
+    apply_rotary_emb_qwen,
+    QwenDoubleStreamAttention,
+    QwenImageTransformerBlock,
+    QwenImageDiT,
+    QwenEmbedRope,
+)
+from nunchaku.models.utils import fuse_linears
+from nunchaku.ops.fused import fused_gelu_mlp
+from nunchaku.models.linear import AWQW4A16Linear, SVDQW4A4Linear
+from diffsynth_engine.models.basic.lora import LoRALinear, LoRAConv2d
+from diffsynth_engine.models.basic.lora_nunchaku import LoRASVDQW4A4Linear, LoRAAWQW4A16Linear
+class QwenDoubleStreamAttentionNunchaku(QwenDoubleStreamAttention):
+    def __init__(
+        self,
+        dim_a,
+        dim_b,
+        num_heads,
+        head_dim,
+        device: str = "cuda:0",
+        dtype: torch.dtype = torch.bfloat16,
+        nunchaku_rank: int = 32,
+    ):
+        super().__init__(dim_a, dim_b, num_heads, head_dim, device=device, dtype=dtype)
+        to_qkv = fuse_linears([self.to_q, self.to_k, self.to_v])
+        self.to_qkv = SVDQW4A4Linear.from_linear(to_qkv, rank=nunchaku_rank)
+        self.to_out = SVDQW4A4Linear.from_linear(self.to_out, rank=nunchaku_rank)
+        del self.to_q, self.to_k, self.to_v
+        add_qkv_proj = fuse_linears([self.add_q_proj, self.add_k_proj, self.add_v_proj])
+        self.add_qkv_proj = SVDQW4A4Linear.from_linear(add_qkv_proj, rank=nunchaku_rank)
+        self.to_add_out = SVDQW4A4Linear.from_linear(self.to_add_out, rank=nunchaku_rank)
+        del self.add_q_proj, self.add_k_proj, self.add_v_proj
+    def forward(
+        self,
+        image: torch.FloatTensor,
+        text: torch.FloatTensor,
+        rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+        attn_mask: Optional[torch.Tensor] = None,
+        attn_kwargs: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
+        img_q, img_k, img_v = self.to_qkv(image).chunk(3, dim=-1)
+        txt_q, txt_k, txt_v = self.add_qkv_proj(text).chunk(3, dim=-1)
+        img_q = rearrange(img_q, "b s (h d) -> b s h d", h=self.num_heads)
+        img_k = rearrange(img_k, "b s (h d) -> b s h d", h=self.num_heads)
+        img_v = rearrange(img_v, "b s (h d) -> b s h d", h=self.num_heads)
+        txt_q = rearrange(txt_q, "b s (h d) -> b s h d", h=self.num_heads)
+        txt_k = rearrange(txt_k, "b s (h d) -> b s h d", h=self.num_heads)
+        txt_v = rearrange(txt_v, "b s (h d) -> b s h d", h=self.num_heads)
+        img_q, img_k = self.norm_q(img_q), self.norm_k(img_k)
+        txt_q, txt_k = self.norm_added_q(txt_q), self.norm_added_k(txt_k)
+        if rotary_emb is not None:
+            img_freqs, txt_freqs = rotary_emb
+            img_q = apply_rotary_emb_qwen(img_q, img_freqs)
+            img_k = apply_rotary_emb_qwen(img_k, img_freqs)
+            txt_q = apply_rotary_emb_qwen(txt_q, txt_freqs)
+            txt_k = apply_rotary_emb_qwen(txt_k, txt_freqs)
+        joint_q = torch.cat([txt_q, img_q], dim=1)
+        joint_k = torch.cat([txt_k, img_k], dim=1)
+        joint_v = torch.cat([txt_v, img_v], dim=1)
+        attn_kwargs = attn_kwargs if attn_kwargs is not None else {}
+        joint_attn_out = attention_ops.attention(joint_q, joint_k, joint_v, attn_mask=attn_mask, **attn_kwargs)
+        joint_attn_out = rearrange(joint_attn_out, "b s h d -> b s (h d)").to(joint_q.dtype)
+        txt_attn_output = joint_attn_out[:, : text.shape[1], :]
+        img_attn_output = joint_attn_out[:, text.shape[1] :, :]
+        img_attn_output = self.to_out(img_attn_output)
+        txt_attn_output = self.to_add_out(txt_attn_output)
+        return img_attn_output, txt_attn_output
+class QwenFeedForwardNunchaku(QwenFeedForward):
+    def __init__(
+        self,
+        dim: int,
+        dim_out: Optional[int] = None,
+        dropout: float = 0.0,
+        device: str = "cuda:0",
+        dtype: torch.dtype = torch.bfloat16,
+        rank: int = 32,
+    ):
+        super().__init__(dim, dim_out, dropout, device=device, dtype=dtype)
+        self.net[0].proj = SVDQW4A4Linear.from_linear(self.net[0].proj, rank=rank)
+        self.net[2] = SVDQW4A4Linear.from_linear(self.net[2], rank=rank)
+        self.net[2].act_unsigned = self.net[2].precision != "nvfp4"
+    def forward(self, hidden_states: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+        return fused_gelu_mlp(hidden_states, self.net[0].proj, self.net[2])
+class QwenImageTransformerBlockNunchaku(QwenImageTransformerBlock):
+    def __init__(
+        self,
+        dim: int,
+        num_attention_heads: int,
+        attention_head_dim: int,
+        eps: float = 1e-6,
+        device: str = "cuda:0",
+        dtype: torch.dtype = torch.bfloat16,
+        scale_shift: float = 1.0,
+        use_nunchaku_awq: bool = True,
+        use_nunchaku_attn: bool = True,
+        nunchaku_rank: int = 32,
+    ):
+        super().__init__(dim, num_attention_heads, attention_head_dim, eps, device=device, dtype=dtype)
+        self.use_nunchaku_awq = use_nunchaku_awq
+        if use_nunchaku_awq:
+            self.img_mod[1] = AWQW4A16Linear.from_linear(self.img_mod[1], rank=nunchaku_rank)
+        if use_nunchaku_attn:
+            self.attn = QwenDoubleStreamAttentionNunchaku(
+                dim_a=dim,
+                dim_b=dim,
+                num_heads=num_attention_heads,
+                head_dim=attention_head_dim,
+                device=device,
+                dtype=dtype,
+                nunchaku_rank=nunchaku_rank,
+            )
+        else:
+            self.attn = QwenDoubleStreamAttention(
+                dim_a=dim,
+                dim_b=dim,
+                num_heads=num_attention_heads,
+                head_dim=attention_head_dim,
+                device=device,
+                dtype=dtype,
+            )
+        self.img_mlp = QwenFeedForwardNunchaku(dim=dim, dim_out=dim, device=device, dtype=dtype, rank=nunchaku_rank)
+        if use_nunchaku_awq:
+            self.txt_mod[1] = AWQW4A16Linear.from_linear(self.txt_mod[1], rank=nunchaku_rank)
+        self.txt_mlp = QwenFeedForwardNunchaku(dim=dim, dim_out=dim, device=device, dtype=dtype, rank=nunchaku_rank)
+        self.scale_shift = scale_shift
+    def _modulate(self, x, mod_params):
+        shift, scale, gate = mod_params.chunk(3, dim=-1)
+        if self.use_nunchaku_awq:
+            if self.scale_shift != 0:
+                scale.add_(self.scale_shift)
+            return x * scale.unsqueeze(1) + shift.unsqueeze(1), gate.unsqueeze(1)
+        else:
+            return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1), gate.unsqueeze(1)
+    def forward(
+        self,
+        image: torch.Tensor,
+        text: torch.Tensor,
+        temb: torch.Tensor,
+        rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+        attn_mask: Optional[torch.Tensor] = None,
+        attn_kwargs: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        if self.use_nunchaku_awq:
+            img_mod_params = self.img_mod(temb)  # [B, 6*dim]
+            txt_mod_params = self.txt_mod(temb)  # [B, 6*dim]
+            # nunchaku's mod_params is [B, 6*dim] instead of [B, dim*6]
+            img_mod_params = (
+                img_mod_params.view(img_mod_params.shape[0], -1, 6).transpose(1, 2).reshape(img_mod_params.shape[0], -1)
+            )
+            txt_mod_params = (
+                txt_mod_params.view(txt_mod_params.shape[0], -1, 6).transpose(1, 2).reshape(txt_mod_params.shape[0], -1)
+            )
+            img_mod_attn, img_mod_mlp = img_mod_params.chunk(2, dim=-1)  # [B, 3*dim] each
+            txt_mod_attn, txt_mod_mlp = txt_mod_params.chunk(2, dim=-1)  # [B, 3*dim] each
+        else:
+            img_mod_attn, img_mod_mlp = self.img_mod(temb).chunk(2, dim=-1)  # [B, 3*dim] each
+            txt_mod_attn, txt_mod_mlp = self.txt_mod(temb).chunk(2, dim=-1)  # [B, 3*dim] each
+        img_normed = self.img_norm1(image)
+        img_modulated, img_gate = self._modulate(img_normed, img_mod_attn)
+        txt_normed = self.txt_norm1(text)
+        txt_modulated, txt_gate = self._modulate(txt_normed, txt_mod_attn)
+        img_attn_out, txt_attn_out = self.attn(
+            image=img_modulated,
+            text=txt_modulated,
+            rotary_emb=rotary_emb,
+            attn_mask=attn_mask,
+            attn_kwargs=attn_kwargs,
+        )
+        image = image + img_gate * img_attn_out
+        text = text + txt_gate * txt_attn_out
+        img_normed_2 = self.img_norm2(image)
+        img_modulated_2, img_gate_2 = self._modulate(img_normed_2, img_mod_mlp)
+        txt_normed_2 = self.txt_norm2(text)
+        txt_modulated_2, txt_gate_2 = self._modulate(txt_normed_2, txt_mod_mlp)
+        img_mlp_out = self.img_mlp(img_modulated_2)
+        txt_mlp_out = self.txt_mlp(txt_modulated_2)
+        image = image + img_gate_2 * img_mlp_out
+        text = text + txt_gate_2 * txt_mlp_out
+        return text, image
+class QwenImageDiTNunchaku(QwenImageDiT):
+    def __init__(
+        self,
+        num_layers: int = 60,
+        device: str = "cuda:0",
+        dtype: torch.dtype = torch.bfloat16,
+        use_nunchaku_awq: bool = True,
+        use_nunchaku_attn: bool = True,
+        nunchaku_rank: int = 32,
+    ):
+        super().__init__()
+        self.pos_embed = QwenEmbedRope(theta=10000, axes_dim=[16, 56, 56], scale_rope=True, device=device)
+        self.time_text_embed = TimestepEmbeddings(256, 3072, device=device, dtype=dtype)
+        self.txt_norm = RMSNorm(3584, eps=1e-6, device=device, dtype=dtype)
+        self.img_in = nn.Linear(64, 3072, device=device, dtype=dtype)
+        self.txt_in = nn.Linear(3584, 3072, device=device, dtype=dtype)
+        self.transformer_blocks = nn.ModuleList(
+            [
+                QwenImageTransformerBlockNunchaku(
+                    dim=3072,
+                    num_attention_heads=24,
+                    attention_head_dim=128,
+                    device=device,
+                    dtype=dtype,
+                    scale_shift=0,
+                    use_nunchaku_awq=use_nunchaku_awq,
+                    use_nunchaku_attn=use_nunchaku_attn,
+                    nunchaku_rank=nunchaku_rank,
+                )
+                for _ in range(num_layers)
+            ]
+        )
+        self.norm_out = AdaLayerNorm(3072, device=device, dtype=dtype)
+        self.proj_out = nn.Linear(3072, 64, device=device, dtype=dtype)
+    @classmethod
+    def from_state_dict(
+        cls,
+        state_dict: Dict[str, torch.Tensor],
+        device: str,
+        dtype: torch.dtype,
+        num_layers: int = 60,
+        use_nunchaku_awq: bool = True,
+        use_nunchaku_attn: bool = True,
+        nunchaku_rank: int = 32,
+    ):
+        model = cls(
+            device="meta",
+            dtype=dtype,
+            num_layers=num_layers,
+            use_nunchaku_awq=use_nunchaku_awq,
+            use_nunchaku_attn=use_nunchaku_attn,
+            nunchaku_rank=nunchaku_rank,
+        )
+        model = model.requires_grad_(False)
+        model.load_state_dict(state_dict, assign=True)
+        model.to(device=device, non_blocking=True)
+        return model
+    def load_loras(self, lora_args: List[Dict[str, Any]], fused: bool = False):
+        fuse_dict = {}
+        for args in lora_args:
+            key = args["key"]
+            if any(suffix in key for suffix in {"add_q_proj", "add_k_proj", "add_v_proj"}):
+                fuse_key = f"{key.rsplit('.', 1)[0]}.add_qkv_proj"
+                type = key.rsplit(".", 1)[-1].split("_")[1]
+                fuse_dict[fuse_key] = fuse_dict.get(fuse_key, {})
+                fuse_dict[fuse_key][type] = args
+                continue
+            if any(suffix in key for suffix in {"to_q", "to_k", "to_v"}):
+                fuse_key = f"{key.rsplit('.', 1)[0]}.to_qkv"
+                type = key.rsplit(".", 1)[-1].split("_")[1]
+                fuse_dict[fuse_key] = fuse_dict.get(fuse_key, {})
+                fuse_dict[fuse_key][type] = args
+                continue
+            module = self.get_submodule(key)
+            if not isinstance(module, (LoRALinear, LoRAConv2d, LoRASVDQW4A4Linear, LoRAAWQW4A16Linear)):
+                raise ValueError(f"Unsupported lora key: {key}")
+            if fused and not isinstance(module, LoRAAWQW4A16Linear):
+                module.add_frozen_lora(**args)
+            else:
+                module.add_lora(**args)
+        for key in fuse_dict.keys():
+            module = self.get_submodule(key)
+            if not isinstance(module, LoRASVDQW4A4Linear):
+                raise ValueError(f"Unsupported lora key: {key}")
+            module.add_qkv_lora(
+                name=args["name"],
+                scale=fuse_dict[key]["q"]["scale"],
+                rank=fuse_dict[key]["q"]["rank"],
+                alpha=fuse_dict[key]["q"]["alpha"],
+                q_up=fuse_dict[key]["q"]["up"],
+                q_down=fuse_dict[key]["q"]["down"],
+                k_up=fuse_dict[key]["k"]["up"],
+                k_down=fuse_dict[key]["k"]["down"],
+                v_up=fuse_dict[key]["v"]["up"],
+                v_down=fuse_dict[key]["v"]["down"],
+                device=fuse_dict[key]["q"]["device"],
+                dtype=fuse_dict[key]["q"]["dtype"],
+            )

diffsynth_engine/pipelines/base.py CHANGED Viewed

@@ -106,7 +106,8 @@ class BasePipeline:
                 for key, param in state_dict.items():
                     lora_args.append(
                         {
-                            "name": key,
+                            "name": lora_path,
+                            "key": key,
                             "scale": lora_scale,
                             "rank": param["rank"],
                             "alpha": param["alpha"],
@@ -130,7 +131,10 @@ class BasePipeline:
     @staticmethod
     def load_model_checkpoint(
-        checkpoint_path: str | List[str], device: str = "cpu", dtype: torch.dtype = torch.float16
+        checkpoint_path: str | List[str],
+        device: str = "cpu",
+        dtype: torch.dtype = torch.float16,
+        convert_dtype: bool = True,
     ) -> Dict[str, torch.Tensor]:
         if isinstance(checkpoint_path, str):
             checkpoint_path = [checkpoint_path]
@@ -140,8 +144,11 @@ class BasePipeline:
                 raise FileNotFoundError(f"{path} is not a file")
             elif path.endswith(".safetensors"):
                 state_dict_ = load_file(path, device=device)
-                for key, value in state_dict_.items():
-                    state_dict[key] = value.to(dtype)
+                if convert_dtype:
+                    for key, value in state_dict_.items():
+                        state_dict[key] = value.to(dtype)
+                else:
+                    state_dict.update(state_dict_)
             elif path.endswith(".gguf"):
                 state_dict.update(**load_gguf_checkpoint(path, device=device, dtype=dtype))

diffsynth_engine/pipelines/qwen_image.py CHANGED Viewed

@@ -2,6 +2,7 @@ import json
 import torch
 import torch.distributed as dist
 import math
+import sys
 from typing import Callable, List, Dict, Tuple, Optional, Union
 from tqdm import tqdm
 from einops import rearrange
@@ -38,11 +39,13 @@ from diffsynth_engine.utils.parallel import ParallelWrapper
 from diffsynth_engine.utils import logging
 from diffsynth_engine.utils.fp8_linear import enable_fp8_linear
 from diffsynth_engine.utils.download import fetch_model
+from diffsynth_engine.utils.flag import NUNCHAKU_AVAILABLE
 logger = logging.get_logger(__name__)
 class QwenImageLoRAConverter(LoRAStateDictConverter):
     def _from_diffsynth(self, lora_state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
         dit_dict = {}
@@ -77,6 +80,7 @@ class QwenImageLoRAConverter(LoRAStateDictConverter):
             key = key.replace(f".{lora_a_suffix}", "")
             key = key.replace("base_model.model.", "")
+            key = key.replace("transformer.", "")
             if key.startswith("transformer") and "attn.to_out.0" in key:
                 key = key.replace("attn.to_out.0", "attn.to_out")
@@ -177,6 +181,36 @@ class QwenImagePipeline(BasePipeline):
             "vae",
         ]
+    @classmethod
+    def _setup_nunchaku_config(
+        cls, model_state_dict: Dict[str, torch.Tensor], config: QwenImagePipelineConfig
+    ) -> QwenImagePipelineConfig:
+        is_nunchaku_model = any("qweight" in key for key in model_state_dict)
+        if is_nunchaku_model:
+            logger.info("Nunchaku quantized model detected. Configuring for nunchaku.")
+            config.use_nunchaku = True
+            config.nunchaku_rank = model_state_dict["transformer_blocks.0.img_mlp.net.0.proj.proj_up"].shape[1]
+            if "transformer_blocks.0.img_mod.1.qweight" in model_state_dict:
+                config.use_nunchaku_awq = True
+                logger.info("Enable nunchaku AWQ.")
+            else:
+                config.use_nunchaku_awq = False
+                logger.info("Disable nunchaku AWQ.")
+            if "transformer_blocks.0.attn.to_qkv.qweight" in model_state_dict:
+                config.use_nunchaku_attn = True
+                logger.info("Enable nunchaku attention quantization.")
+            else:
+                config.use_nunchaku_attn = False
+                logger.info("Disable nunchaku attention quantization.")
+        else:
+            config.use_nunchaku = False
+        return config
     @classmethod
     def from_pretrained(cls, model_path_or_config: str | QwenImagePipelineConfig) -> "QwenImagePipeline":
         if isinstance(model_path_or_config, str):
@@ -185,7 +219,16 @@ class QwenImagePipeline(BasePipeline):
             config = model_path_or_config
         logger.info(f"loading state dict from {config.model_path} ...")
-        model_state_dict = cls.load_model_checkpoint(config.model_path, device="cpu", dtype=config.model_dtype)
+        model_state_dict = cls.load_model_checkpoint(
+            config.model_path, device="cpu", dtype=config.model_dtype, convert_dtype=False
+        )
+        config = cls._setup_nunchaku_config(model_state_dict, config)
+        # for svd quant model fp4/int4 linear layers, do not convert dtype here
+        if not config.use_nunchaku:
+            for key, value in model_state_dict.items():
+                model_state_dict[key] = value.to(config.model_dtype)
         if config.vae_path is None:
             config.vae_path = fetch_model(
@@ -221,6 +264,8 @@ class QwenImagePipeline(BasePipeline):
     @classmethod
     def from_state_dict(cls, state_dicts: QwenImageStateDicts, config: QwenImagePipelineConfig) -> "QwenImagePipeline":
+        config = cls._setup_nunchaku_config(state_dicts.model, config)
         if config.parallelism > 1:
             pipe = ParallelWrapper(
                 cfg_degree=config.cfg_degree,
@@ -270,13 +315,30 @@ class QwenImagePipeline(BasePipeline):
                     dtype=config.model_dtype,
                     relative_l1_threshold=config.fbcache_relative_l1_threshold,
                 )
+            elif config.use_nunchaku:
+                if not NUNCHAKU_AVAILABLE:
+                    from diffsynth_engine.utils.flag import NUNCHAKU_IMPORT_ERROR
+                    raise ImportError(NUNCHAKU_IMPORT_ERROR)
+                from diffsynth_engine.models.qwen_image import QwenImageDiTNunchaku
+                from diffsynth_engine.models.basic.lora_nunchaku import patch_nunchaku_model_for_lora
+                dit = QwenImageDiTNunchaku.from_state_dict(
+                    state_dicts.model,
+                    device=init_device,
+                    dtype=config.model_dtype,
+                    use_nunchaku_awq=config.use_nunchaku_awq,
+                    use_nunchaku_attn=config.use_nunchaku_attn,
+                    nunchaku_rank=config.nunchaku_rank,
+                )
+                patch_nunchaku_model_for_lora(dit)
             else:
                 dit = QwenImageDiT.from_state_dict(
                     state_dicts.model,
                     device=("cpu" if config.use_fsdp else init_device),
                     dtype=config.model_dtype,
                 )
-            if config.use_fp8_linear:
+            if config.use_fp8_linear and not config.use_nunchaku:
                 enable_fp8_linear(dit)
         pipe = cls(

diffsynth_engine/pipelines/wan_video.py CHANGED Viewed

@@ -650,7 +650,7 @@ class WanVideoPipeline(BasePipeline):
                 dit_type = "wan2.2-i2v-a14b"
             elif model_state_dict["high_noise_model"]["patch_embedding.weight"].shape[1] == 16:
                 dit_type = "wan2.2-t2v-a14b"
-        elif model_state_dict["patch_embedding.weight"].shape[1] == 48:
+        elif has_any_key("patch_embedding.weight") and model_state_dict["patch_embedding.weight"].shape[1] == 48:
             dit_type = "wan2.2-ti2v-5b"
         elif has_any_key("img_emb.emb_pos", "condition_embedder.image_embedder.pos_embed"):
             dit_type = "wan2.1-flf2v-14b"
@@ -680,6 +680,30 @@ class WanVideoPipeline(BasePipeline):
             if config.attn_params is None:
                 config.attn_params = VideoSparseAttentionParams(sparsity=0.9)
+    def update_weights(self, state_dicts: WanStateDicts) -> None:
+        is_dual_model_state_dict = (isinstance(state_dicts.model, dict) and
+                                     ("high_noise_model" in state_dicts.model or "low_noise_model" in state_dicts.model))
+        is_dual_model_pipeline = self.dit2 is not None
+        if is_dual_model_state_dict != is_dual_model_pipeline:
+            raise ValueError(
+                f"Model structure mismatch: pipeline has {'dual' if is_dual_model_pipeline else 'single'} model "
+                f"but state_dict is for {'dual' if is_dual_model_state_dict else 'single'} model. "
+                f"Cannot update weights between WAN 2.1 (single model) and WAN 2.2 (dual model)."
+            )
+        if is_dual_model_state_dict:
+            if "high_noise_model" in state_dicts.model:
+                self.update_component(self.dit, state_dicts.model["high_noise_model"], self.config.device, self.config.model_dtype)
+            if "low_noise_model" in state_dicts.model:
+                self.update_component(self.dit2, state_dicts.model["low_noise_model"], self.config.device, self.config.model_dtype)
+        else:
+            self.update_component(self.dit, state_dicts.model, self.config.device, self.config.model_dtype)
+        self.update_component(self.text_encoder, state_dicts.t5, self.config.device, self.config.t5_dtype)
+        self.update_component(self.vae, state_dicts.vae, self.config.device, self.config.vae_dtype)
+        self.update_component(self.image_encoder, state_dicts.image_encoder, self.config.device, self.config.image_encoder_dtype)
     def compile(self):
         self.dit.compile_repeated_blocks()
         if self.dit2 is not None:

diffsynth_engine/utils/flag.py CHANGED Viewed

@@ -55,3 +55,27 @@ if VIDEO_SPARSE_ATTN_AVAILABLE:
     logger.info("Video sparse attention is available")
 else:
     logger.info("Video sparse attention is not available")
+NUNCHAKU_AVAILABLE = importlib.util.find_spec("nunchaku") is not None
+NUNCHAKU_IMPORT_ERROR = None
+if NUNCHAKU_AVAILABLE:
+    logger.info("Nunchaku is available")
+else:
+    logger.info("Nunchaku is not available")
+    import sys
+    torch_version = getattr(torch, "__version__", "unknown")
+    python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
+    NUNCHAKU_IMPORT_ERROR = (
+        "\n\n"
+        "ERROR: This model requires the 'nunchaku' library for quantized inference, but it is not installed.\n"
+        "'nunchaku' is not available on PyPI and must be installed manually.\n\n"
+        "Please follow these steps:\n"
+        "1. Visit the nunchaku releases page: https://github.com/nunchaku-tech/nunchaku/releases\n"
+        "2. Find the wheel (.whl) file that matches your environment:\n"
+        f"   - PyTorch version: {torch_version}\n"
+        f"   - Python version: {python_version}\n"
+        f"   - Operating System: {sys.platform}\n"
+        "3. Copy the URL of the correct wheel file.\n"
+        "4. Install it using pip, for example:\n"
+        "   pip install nunchaku @ https://.../your_specific_nunchaku_file.whl\n"
+    )

diffsynth_engine/utils/parallel.py CHANGED Viewed

@@ -21,117 +21,33 @@ from queue import Empty
 import diffsynth_engine.models.basic.attention as attention_ops
 from diffsynth_engine.utils.platform import empty_cache
 from diffsynth_engine.utils import logging
+from diffsynth_engine.utils.process_group import (
+    PROCESS_GROUP,
+    get_cfg_group,
+    get_cfg_world_size,
+    get_cfg_rank,
+    get_cfg_ranks,
+    get_sp_group,
+    get_sp_world_size,
+    get_sp_rank,
+    get_sp_ranks,
+    get_sp_ulysses_group,
+    get_sp_ulysses_world_size,
+    get_sp_ulysses_rank,
+    get_sp_ulysses_ranks,
+    get_sp_ring_group,
+    get_sp_ring_world_size,
+    get_sp_ring_rank,
+    get_sp_ring_ranks,
+    get_tp_group,
+    get_tp_world_size,
+    get_tp_rank,
+    get_tp_ranks,
+)
 logger = logging.get_logger(__name__)
-class Singleton:
-    _instance = None
-    def __new__(cls, *args, **kwargs):
-        if not cls._instance:
-            cls._instance = super(Singleton, cls).__new__(cls, *args, **kwargs)
-        return cls._instance
-class ProcessGroupSingleton(Singleton):
-    def __init__(self):
-        self.CFG_GROUP: Optional[dist.ProcessGroup] = None
-        self.SP_GROUP: Optional[dist.ProcessGroup] = None
-        self.SP_ULYSSUES_GROUP: Optional[dist.ProcessGroup] = None
-        self.SP_RING_GROUP: Optional[dist.ProcessGroup] = None
-        self.TP_GROUP: Optional[dist.ProcessGroup] = None
-        self.CFG_RANKS: List[int] = []
-        self.SP_RANKS: List[int] = []
-        self.SP_ULYSSUES_RANKS: List[int] = []
-        self.SP_RING_RANKS: List[int] = []
-        self.TP_RANKS: List[int] = []
-PROCESS_GROUP = ProcessGroupSingleton()
-def get_cfg_group():
-    return PROCESS_GROUP.CFG_GROUP
-def get_cfg_world_size():
-    return PROCESS_GROUP.CFG_GROUP.size() if PROCESS_GROUP.CFG_GROUP is not None else 1
-def get_cfg_rank():
-    return PROCESS_GROUP.CFG_GROUP.rank() if PROCESS_GROUP.CFG_GROUP is not None else 0
-def get_cfg_ranks():
-    return PROCESS_GROUP.CFG_RANKS
-def get_sp_group():
-    return PROCESS_GROUP.SP_GROUP
-def get_sp_world_size():
-    return PROCESS_GROUP.SP_GROUP.size() if PROCESS_GROUP.SP_GROUP is not None else 1
-def get_sp_rank():
-    return PROCESS_GROUP.SP_GROUP.rank() if PROCESS_GROUP.SP_GROUP is not None else 0
-def get_sp_ranks():
-    return PROCESS_GROUP.SP_RANKS
-def get_sp_ulysses_group():
-    return PROCESS_GROUP.SP_ULYSSUES_GROUP
-def get_sp_ulysses_world_size():
-    return PROCESS_GROUP.SP_ULYSSUES_GROUP.size() if PROCESS_GROUP.SP_ULYSSUES_GROUP is not None else 1
-def get_sp_ulysses_rank():
-    return PROCESS_GROUP.SP_ULYSSUES_GROUP.rank() if PROCESS_GROUP.SP_ULYSSUES_GROUP is not None else 0
-def get_sp_ulysses_ranks():
-    return PROCESS_GROUP.SP_ULYSSUES_RANKS
-def get_sp_ring_group():
-    return PROCESS_GROUP.SP_RING_GROUP
-def get_sp_ring_world_size():
-    return PROCESS_GROUP.SP_RING_GROUP.size() if PROCESS_GROUP.SP_RING_GROUP is not None else 1
-def get_sp_ring_rank():
-    return PROCESS_GROUP.SP_RING_GROUP.rank() if PROCESS_GROUP.SP_RING_GROUP is not None else 0
-def get_sp_ring_ranks():
-    return PROCESS_GROUP.SP_RING_RANKS
-def get_tp_group():
-    return PROCESS_GROUP.TP_GROUP
-def get_tp_world_size():
-    return PROCESS_GROUP.TP_GROUP.size() if PROCESS_GROUP.TP_GROUP is not None else 1
-def get_tp_rank():
-    return PROCESS_GROUP.TP_GROUP.rank() if PROCESS_GROUP.TP_GROUP is not None else 0
-def get_tp_ranks():
-    return PROCESS_GROUP.TP_RANKS
 def init_parallel_pgs(
     cfg_degree: int = 1,
     sp_ulysses_degree: int = 1,

diffsynth_engine/utils/process_group.py ADDED Viewed

@@ -0,0 +1,149 @@
+"""
+Process group management for distributed training.
+This module provides singleton-based process group management for distributed training,
+including support for CFG parallelism, sequence parallelism (Ulysses + Ring), and tensor parallelism.
+"""
+import torch.distributed as dist
+from typing import Optional, List
+class Singleton:
+    _instance = None
+    def __new__(cls, *args, **kwargs):
+        if not cls._instance:
+            cls._instance = super(Singleton, cls).__new__(cls, *args, **kwargs)
+        return cls._instance
+class ProcessGroupSingleton(Singleton):
+    def __init__(self):
+        if not hasattr(self, 'initialized'):
+            self.CFG_GROUP: Optional[dist.ProcessGroup] = None
+            self.SP_GROUP: Optional[dist.ProcessGroup] = None
+            self.SP_ULYSSUES_GROUP: Optional[dist.ProcessGroup] = None
+            self.SP_RING_GROUP: Optional[dist.ProcessGroup] = None
+            self.TP_GROUP: Optional[dist.ProcessGroup] = None
+            self.CFG_RANKS: List[int] = []
+            self.SP_RANKS: List[int] = []
+            self.SP_ULYSSUES_RANKS: List[int] = []
+            self.SP_RING_RANKS: List[int] = []
+            self.TP_RANKS: List[int] = []
+            self.initialized = True
+PROCESS_GROUP = ProcessGroupSingleton()
+# CFG parallel group functions
+def get_cfg_group():
+    return PROCESS_GROUP.CFG_GROUP
+def get_cfg_world_size():
+    return PROCESS_GROUP.CFG_GROUP.size() if PROCESS_GROUP.CFG_GROUP is not None else 1
+def get_cfg_rank():
+    return PROCESS_GROUP.CFG_GROUP.rank() if PROCESS_GROUP.CFG_GROUP is not None else 0
+def get_cfg_ranks():
+    return PROCESS_GROUP.CFG_RANKS
+# Sequence parallel group functions
+def get_sp_group():
+    return PROCESS_GROUP.SP_GROUP
+def get_sp_world_size():
+    return PROCESS_GROUP.SP_GROUP.size() if PROCESS_GROUP.SP_GROUP is not None else 1
+def get_sp_rank():
+    return PROCESS_GROUP.SP_GROUP.rank() if PROCESS_GROUP.SP_GROUP is not None else 0
+def get_sp_ranks():
+    return PROCESS_GROUP.SP_RANKS
+# Sequence parallel Ulysses group functions
+def get_sp_ulysses_group():
+    return PROCESS_GROUP.SP_ULYSSUES_GROUP
+def get_sp_ulysses_world_size():
+    return PROCESS_GROUP.SP_ULYSSUES_GROUP.size() if PROCESS_GROUP.SP_ULYSSUES_GROUP is not None else 1
+def get_sp_ulysses_rank():
+    return PROCESS_GROUP.SP_ULYSSUES_GROUP.rank() if PROCESS_GROUP.SP_ULYSSUES_GROUP is not None else 0
+def get_sp_ulysses_ranks():
+    return PROCESS_GROUP.SP_ULYSSUES_RANKS
+# Sequence parallel Ring group functions
+def get_sp_ring_group():
+    return PROCESS_GROUP.SP_RING_GROUP
+def get_sp_ring_world_size():
+    return PROCESS_GROUP.SP_RING_GROUP.size() if PROCESS_GROUP.SP_RING_GROUP is not None else 1
+def get_sp_ring_rank():
+    return PROCESS_GROUP.SP_RING_GROUP.rank() if PROCESS_GROUP.SP_RING_GROUP is not None else 0
+def get_sp_ring_ranks():
+    return PROCESS_GROUP.SP_RING_RANKS
+# Tensor parallel group functions
+def get_tp_group():
+    return PROCESS_GROUP.TP_GROUP
+def get_tp_world_size():
+    return PROCESS_GROUP.TP_GROUP.size() if PROCESS_GROUP.TP_GROUP is not None else 1
+def get_tp_rank():
+    return PROCESS_GROUP.TP_GROUP.rank() if PROCESS_GROUP.TP_GROUP is not None else 0
+def get_tp_ranks():
+    return PROCESS_GROUP.TP_RANKS
+__all__ = [
+    "PROCESS_GROUP",
+    "get_cfg_group",
+    "get_cfg_world_size",
+    "get_cfg_rank",
+    "get_cfg_ranks",
+    "get_sp_group",
+    "get_sp_world_size",
+    "get_sp_rank",
+    "get_sp_ranks",
+    "get_sp_ulysses_group",
+    "get_sp_ulysses_world_size",
+    "get_sp_ulysses_rank",
+    "get_sp_ulysses_ranks",
+    "get_sp_ring_group",
+    "get_sp_ring_world_size",
+    "get_sp_ring_rank",
+    "get_sp_ring_ranks",
+    "get_tp_group",
+    "get_tp_world_size",
+    "get_tp_rank",
+    "get_tp_ranks",
+]

{diffsynth_engine-0.6.1.dev27.dist-info → diffsynth_engine-0.6.1.dev29.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diffsynth_engine
-Version: 0.6.1.dev27
+Version: 0.6.1.dev29
 Author: MuseAI x ModelScope
 Classifier: Programming Language :: Python :: 3
 Classifier: Operating System :: OS Independent

{diffsynth_engine-0.6.1.dev27.dist-info → diffsynth_engine-0.6.1.dev29.dist-info}/RECORD RENAMED Viewed

@@ -81,18 +81,19 @@ diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer.json,sha256=bhl7TT29cdoU
 diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer_config.json,sha256=7Zo6iw-qcacKMoR-BDX-A25uES1N9O23u0ipIeNE3AU,61728
 diffsynth_engine/configs/__init__.py,sha256=vSjJToEdq3JX7t81_z4nwNwIdD4bYnFjxnMZH7PXMKo,1309
 diffsynth_engine/configs/controlnet.py,sha256=f3vclyP3lcAjxDGD9C1vevhqqQ7W2LL_c6Wye0uxk3Q,1180
-diffsynth_engine/configs/pipeline.py,sha256=ADgWJa7bA3Z3Z1JtVLgmt4N3eS1KRp9yHu1QvTBzTm0,13404
+diffsynth_engine/configs/pipeline.py,sha256=7duSdoD0LIROtepsLW9PxYsK59p7qSv34BVz0k29vu4,13633
 diffsynth_engine/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 diffsynth_engine/models/__init__.py,sha256=8Ze7cSE8InetgXWTNb0neVA2Q44K7WlE-h7O-02m2sY,119
-diffsynth_engine/models/base.py,sha256=BA5vgMqfy_cjuL2OtXbrFD-Qg5xQnaumHpj5TabwSy8,2559
+diffsynth_engine/models/base.py,sha256=svao__9WH8VNcyXz5o5dzywYXDcGV0YV9IfkLzDKews,2558
 diffsynth_engine/models/basic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 diffsynth_engine/models/basic/attention.py,sha256=mvgk8LTqFwgtPdBeRv797IZNg9k7--X9wD92Hcr188c,15682
-diffsynth_engine/models/basic/lora.py,sha256=PT-A3pwIuUrW2w3TnNlBPb1KRj70QYiBaoCvLnkR5cs,10652
+diffsynth_engine/models/basic/lora.py,sha256=Y6cBgrBsuDAP9FZz_fgK8vBi_EMg23saFIUSAsPIG-M,10670
+diffsynth_engine/models/basic/lora_nunchaku.py,sha256=7qhzGCzUIfDrwtWG0nspwdyZ7YUkaM4vMqzxZby2Zds,7510
 diffsynth_engine/models/basic/relative_position_emb.py,sha256=rCXOweZMcayVnNUVvBcYXMdhHS257B_PC8PZSWxvhNQ,2540
 diffsynth_engine/models/basic/timestep.py,sha256=WJODYqkSXEM0wcS42YkkfrGwxWt0e60zMTkDdUBQqBw,2810
 diffsynth_engine/models/basic/transformer_helper.py,sha256=6K7A5bVnN2bOoq6I0IQf7RJBhSZUP4jNf1n7NPGu8zA,5287
 diffsynth_engine/models/basic/unet_helper.py,sha256=4lN6F80Ubm6ip4dkLVmB-Og5-Y25Wduhs9Q8qjyzK6E,9044
-diffsynth_engine/models/basic/video_sparse_attention.py,sha256=iXA3sHDLWk1ns1lVCNbZdiaDu94kBIsw-9vrCGAll7g,7843
+diffsynth_engine/models/basic/video_sparse_attention.py,sha256=GxDN6PTpA1rCoQaXUwSPgH4708bEzVI1qsD48WVDXLA,8201
 diffsynth_engine/models/flux/__init__.py,sha256=x0JoxL0CdiiVrY0BjkIrGinud7mcXecLleGO0km91XQ,686
 diffsynth_engine/models/flux/flux_controlnet.py,sha256=NvFKQIx0NldX5uUxdmYwuS2s-xaFRlKotiE6lr3-HRY,8018
 diffsynth_engine/models/flux/flux_dit.py,sha256=7sdV8KFQiHcK-8aqyvXBgC7E_-D9rcgBcnMXUq_AybI,23403
@@ -108,10 +109,11 @@ diffsynth_engine/models/hunyuan3d/hunyuan3d_vae.py,sha256=0IUrUSBi-6eWeaScUoi0e6
 diffsynth_engine/models/hunyuan3d/moe.py,sha256=FAuUqgrB2ZFb0uGBhI-Afv850HmzDFP5yJKKogf4A4U,3552
 diffsynth_engine/models/hunyuan3d/surface_extractor.py,sha256=b15mb1N4PYwAvDk1Gude8qlccRKrSg461xT59RjMEQk,4167
 diffsynth_engine/models/hunyuan3d/volume_decoder.py,sha256=sgflj1a8sIerqGSalBAVQOlyiIihkLOLXYysNbulCoQ,2355
-diffsynth_engine/models/qwen_image/__init__.py,sha256=X5pig621WEsDZ6L7HVkmYspV53-GDfs_la1ncaq_NFw,417
+diffsynth_engine/models/qwen_image/__init__.py,sha256=_6f0LWaoLdDvD2CsjK2OzEIQryt9efge8DFS4_GUnHQ,582
 diffsynth_engine/models/qwen_image/qwen2_5_vl.py,sha256=Eu-r-c42t_q74Qpwz21ToCGHpvSi7VND4B1EI0e-ePA,57748
 diffsynth_engine/models/qwen_image/qwen_image_dit.py,sha256=iJ-FinDyXa982Uao1is37bxUttyPu0Eldyd7qPJO_XQ,22582
 diffsynth_engine/models/qwen_image/qwen_image_dit_fbcache.py,sha256=LIv9X_BohKk5rcEzyl3ATLwd8MSoFX43wjkArQ68nq8,4828
+diffsynth_engine/models/qwen_image/qwen_image_dit_nunchaku.py,sha256=TCzNsFxw-QBHrRg94f_ITs5u85Em-aoCAeCr2AylPpE,13478
 diffsynth_engine/models/qwen_image/qwen_image_vae.py,sha256=eO7f4YqiYXfw7NncBNFTu-xEvdJ5uKY-SnfP15QY0tE,38443
 diffsynth_engine/models/sd/__init__.py,sha256=hjoKRnwoXOLD0wude-w7I6wK5ak7ACMbnbkPuBB2oU0,380
 diffsynth_engine/models/sd/sd_controlnet.py,sha256=kMGfIdriXhC7reT6iO2Z0rPICXEkXpytjeBQcR_sjT8,50577
@@ -141,15 +143,15 @@ diffsynth_engine/models/wan/wan_s2v_dit.py,sha256=j63ulcWLY4XGITOKUMGX292LtSEtP-
 diffsynth_engine/models/wan/wan_text_encoder.py,sha256=OERlmwOqthAFPNnnT2sXJ4OjyyRmsRLx7VGp1zlBkLU,11021
 diffsynth_engine/models/wan/wan_vae.py,sha256=dC7MoUFeXRL7SIY0LG1OOUiZW-pp9IbXCghutMxpXr4,38889
 diffsynth_engine/pipelines/__init__.py,sha256=jh-4LSJ0vqlXiT8BgFgRIQxuAr2atEPyHrxXWj-Ud1U,604
-diffsynth_engine/pipelines/base.py,sha256=Yvb2xiHT1Jhx4HDkNPHdXjzhUkM9_65D4zM-GSSOWoU,16133
+diffsynth_engine/pipelines/base.py,sha256=BNMNL-OU-9ilUv7O60trA3_rjHA21d6Oc5PKzKYBa80,16347
 diffsynth_engine/pipelines/flux_image.py,sha256=L0ggxpthLD8a5-zdPHu9z668uWBei9YzPb4PFVypDNU,50707
 diffsynth_engine/pipelines/hunyuan3d_shape.py,sha256=TNV0Wr09Dj2bzzlpua9WioCClOj3YiLfE6utI9aWL8A,8164
-diffsynth_engine/pipelines/qwen_image.py,sha256=n6Nnin8OyC9Mfp8O-3N4GNq12Mws8_hHWv-SwU4-HCc,33054
+diffsynth_engine/pipelines/qwen_image.py,sha256=ktOirdU2ljgb6vHhXosC0tWgXI3gwvsoAtrYKYvMwzI,35719
 diffsynth_engine/pipelines/sd_image.py,sha256=nr-Nhsnomq8CsUqhTM3i2l2zG01YjwXdfRXgr_bC3F0,17891
 diffsynth_engine/pipelines/sdxl_image.py,sha256=v7ZACGPb6EcBunL6e5E9jynSQjE7GQx8etEV-ZLP91g,21704
 diffsynth_engine/pipelines/utils.py,sha256=HZbJHErNJS1DhlwJKvZ9dY7Kh8Zdlsw3zE2e88TYGRY,2277
 diffsynth_engine/pipelines/wan_s2v.py,sha256=QHlCLMqlmnp55iYm2mzg4qCq4jceRAP3Zt5Mubz3mAM,29384
-diffsynth_engine/pipelines/wan_video.py,sha256=rJq60LiaCoLq1PkqUzzrdvFkp6h73fc-ZUu0MiMQC-c,29668
+diffsynth_engine/pipelines/wan_video.py,sha256=9xjSvQ4mlVEDdaL6QuUURj4iyxhJ2xABBphQjkfzK8s,31323
 diffsynth_engine/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 diffsynth_engine/processor/canny_processor.py,sha256=hV30NlblTkEFUAmF_O-LJrNlGVM2SFrqq6okfF8VpOo,602
 diffsynth_engine/processor/depth_processor.py,sha256=dQvs3JsnyMbz4dyI9QoR8oO-mMFBFAgNvgqeCoaU5jk,1532
@@ -171,7 +173,7 @@ diffsynth_engine/utils/cache.py,sha256=Ivef22pCuhEq-4H00gSvkLS8ceVZoGis7OSitYL6g
 diffsynth_engine/utils/constants.py,sha256=sJio3Vy8i0-PWYRnqquYt6ez9k6Tc9JdjCv6pn2BU_4,3551
 diffsynth_engine/utils/download.py,sha256=w9QQjllPfTUEY371UTREU7o_vvdMY-Q2DymDel3ZEZY,6792
 diffsynth_engine/utils/env.py,sha256=k749eYt_qKGq38GocDiXfkhp8nZrowFefNVTZ8R755I,363
-diffsynth_engine/utils/flag.py,sha256=v9GcRFYiNMonD9qmDLWdbXONuF-AcQ_KABPFtRZd0Tc,1767
+diffsynth_engine/utils/flag.py,sha256=KSzjnzRe7sleNCJm8IpbJQbmBY4KNV2kDrijxi27Jek,2928
 diffsynth_engine/utils/fp8_linear.py,sha256=k34YFWo2dc3t8aKjHaCW9CbQMOTqXxaDHk8aw8aKif4,3857
 diffsynth_engine/utils/gguf.py,sha256=ZWvw46V4g4uVyAR_oCq-4K5nPdKVrYk3u47uXMgA9lU,14092
 diffsynth_engine/utils/image.py,sha256=PiDButjv0fsRS23kpQgCLZAlBumpzQmNnolfvb5EKQ0,9626
@@ -180,15 +182,16 @@ diffsynth_engine/utils/lock.py,sha256=1Ipgst9eEFfFdViAvD5bxdB6HnHHBcqWYOb__fGaPU
 diffsynth_engine/utils/logging.py,sha256=XB0xTT8PBN6btkOjFtOvjlrOCRVgDGT8PFAp1vmse28,467
 diffsynth_engine/utils/offload.py,sha256=94og79TIkxldwYUgZT3L4OVu1WBlE7gfVPvO2MRhm6c,3551
 diffsynth_engine/utils/onnx.py,sha256=jeWUudJHnESjuiEAHyUZYUZz7dCj34O9aGjHCe8yjWo,1149
-diffsynth_engine/utils/parallel.py,sha256=6T8oCTp-7Gb3qsgNRB2Bp3DF4eyx1FzvS6pFnEJbsek,19789
+diffsynth_engine/utils/parallel.py,sha256=OBGsAK-3ncArRyMU1lea7tbYgxSdCucQvXheL3Ssl5M,17653
 diffsynth_engine/utils/platform.py,sha256=nbpG-XHJFRmYY6u_e7IBQ9Q6GyItrIkKf3VKuBPTUpY,627
+diffsynth_engine/utils/process_group.py,sha256=P-X04a--Zb4M4kjc3DddmusrxCKqv8wiDGhXG4Al-rE,3783
 diffsynth_engine/utils/prompt.py,sha256=YItMchoVzsG6y-LB4vzzDUWrkhKRVlt1HfVhxZjSxMQ,280
 diffsynth_engine/utils/video.py,sha256=8FCaeqIdUsWMgWI_6SO9SPynsToGcLCQAVYFTc4CDhg,2200
 diffsynth_engine/utils/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 diffsynth_engine/utils/memory/linear_regression.py,sha256=oW_EQEw13oPoyUrxiL8A7Ksa5AuJ2ynI2qhCbfAuZbg,3930
 diffsynth_engine/utils/memory/memory_predcit_model.py,sha256=EXprSl_zlVjgfMWNXP-iw83Ot3hyMcgYaRPv-dvyL84,3943
-diffsynth_engine-0.6.1.dev27.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
-diffsynth_engine-0.6.1.dev27.dist-info/METADATA,sha256=w8FRm_Fr7AZp3TPFh1TUHk93eWxm9CFAZcU8S4qwKj0,1164
-diffsynth_engine-0.6.1.dev27.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-diffsynth_engine-0.6.1.dev27.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
-diffsynth_engine-0.6.1.dev27.dist-info/RECORD,,
+diffsynth_engine-0.6.1.dev29.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
+diffsynth_engine-0.6.1.dev29.dist-info/METADATA,sha256=8A5q0qhRMxeJi7IOvP3dcqk58BsgIBxy16ndlnDM_6I,1164
+diffsynth_engine-0.6.1.dev29.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+diffsynth_engine-0.6.1.dev29.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
+diffsynth_engine-0.6.1.dev29.dist-info/RECORD,,

{diffsynth_engine-0.6.1.dev27.dist-info → diffsynth_engine-0.6.1.dev29.dist-info}/WHEEL RENAMED Viewed

File without changes

{diffsynth_engine-0.6.1.dev27.dist-info → diffsynth_engine-0.6.1.dev29.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{diffsynth_engine-0.6.1.dev27.dist-info → diffsynth_engine-0.6.1.dev29.dist-info}/top_level.txt RENAMED Viewed

File without changes

diffsynth-engine 0.6.1.dev27__py3-none-any.whl → 0.6.1.dev29__py3-none-any.whl

diffsynth-engine 0.6.1.dev27py3-none-any.whl → 0.6.1.dev29py3-none-any.whl