diffsynth-engine 0.7.1.dev3__py3-none-any.whl → 0.7.1.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffsynth_engine/models/qwen_image/qwen_image_dit_nunchaku.py +1 -0
- diffsynth_engine/tools/qwen_image_upscaler_tool.py +47 -1
- {diffsynth_engine-0.7.1.dev3.dist-info → diffsynth_engine-0.7.1.dev5.dist-info}/METADATA +1 -1
- {diffsynth_engine-0.7.1.dev3.dist-info → diffsynth_engine-0.7.1.dev5.dist-info}/RECORD +7 -7
- {diffsynth_engine-0.7.1.dev3.dist-info → diffsynth_engine-0.7.1.dev5.dist-info}/WHEEL +0 -0
- {diffsynth_engine-0.7.1.dev3.dist-info → diffsynth_engine-0.7.1.dev5.dist-info}/licenses/LICENSE +0 -0
- {diffsynth_engine-0.7.1.dev3.dist-info → diffsynth_engine-0.7.1.dev5.dist-info}/top_level.txt +0 -0
|
@@ -179,6 +179,7 @@ class QwenImageTransformerBlockNunchaku(QwenImageTransformerBlock):
|
|
|
179
179
|
rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
|
|
180
180
|
attn_mask: Optional[torch.Tensor] = None,
|
|
181
181
|
attn_kwargs: Optional[Dict[str, Any]] = None,
|
|
182
|
+
modulate_index: Optional[List[int]] = None,
|
|
182
183
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
183
184
|
if self.use_nunchaku_awq:
|
|
184
185
|
img_mod_params = self.img_mod(temb) # [B, 6*dim]
|
|
@@ -12,7 +12,7 @@ from diffsynth_engine.configs import QwenImagePipelineConfig
|
|
|
12
12
|
from diffsynth_engine.pipelines.qwen_image import QwenImagePipeline
|
|
13
13
|
from diffsynth_engine.models.qwen_image import QwenImageVAE
|
|
14
14
|
from diffsynth_engine.models.basic.lora import LoRALinear
|
|
15
|
-
from diffsynth_engine.models.qwen_image.qwen_image_dit import QwenImageTransformerBlock
|
|
15
|
+
from diffsynth_engine.models.qwen_image.qwen_image_dit import QwenImageTransformerBlock, QwenEmbedRope
|
|
16
16
|
from diffsynth_engine.utils import logging
|
|
17
17
|
from diffsynth_engine.utils.loader import load_file
|
|
18
18
|
from diffsynth_engine.utils.download import fetch_model
|
|
@@ -32,6 +32,7 @@ def odtsr_forward():
|
|
|
32
32
|
"""
|
|
33
33
|
original_lora_forward = LoRALinear.forward
|
|
34
34
|
original_modulate = QwenImageTransformerBlock._modulate
|
|
35
|
+
original_rope_forward = QwenEmbedRope.forward
|
|
35
36
|
|
|
36
37
|
def lora_batch_cfg_forward(self, x):
|
|
37
38
|
y = nn.Linear.forward(self, x)
|
|
@@ -50,6 +51,49 @@ def odtsr_forward():
|
|
|
50
51
|
y[:, L:] += lora(x2)
|
|
51
52
|
return y
|
|
52
53
|
|
|
54
|
+
def optimized_rope_forward(self, video_fhw, txt_length, device):
|
|
55
|
+
if self.pos_freqs.device != device:
|
|
56
|
+
self.pos_freqs = self.pos_freqs.to(device)
|
|
57
|
+
self.neg_freqs = self.neg_freqs.to(device)
|
|
58
|
+
|
|
59
|
+
vid_freqs = []
|
|
60
|
+
max_vid_index = 0
|
|
61
|
+
idx = 0
|
|
62
|
+
for fhw in video_fhw:
|
|
63
|
+
frame, height, width = fhw
|
|
64
|
+
rope_key = f"{idx}_{height}_{width}"
|
|
65
|
+
|
|
66
|
+
if rope_key not in self.rope_cache:
|
|
67
|
+
seq_lens = frame * height * width
|
|
68
|
+
freqs_pos = self.pos_freqs.split([x // 2 for x in self.axes_dim], dim=1)
|
|
69
|
+
freqs_neg = self.neg_freqs.split([x // 2 for x in self.axes_dim], dim=1)
|
|
70
|
+
freqs_frame = freqs_pos[0][idx : idx + frame].view(frame, 1, 1, -1).expand(frame, height, width, -1)
|
|
71
|
+
if self.scale_rope:
|
|
72
|
+
freqs_height = torch.cat(
|
|
73
|
+
[freqs_neg[1][-(height - height // 2) :], freqs_pos[1][: height // 2]], dim=0
|
|
74
|
+
)
|
|
75
|
+
freqs_height = freqs_height.view(1, height, 1, -1).expand(frame, height, width, -1)
|
|
76
|
+
freqs_width = torch.cat([freqs_neg[2][-(width - width // 2) :], freqs_pos[2][: width // 2]], dim=0)
|
|
77
|
+
freqs_width = freqs_width.view(1, 1, width, -1).expand(frame, height, width, -1)
|
|
78
|
+
|
|
79
|
+
else:
|
|
80
|
+
freqs_height = freqs_pos[1][:height].view(1, height, 1, -1).expand(frame, height, width, -1)
|
|
81
|
+
freqs_width = freqs_pos[2][:width].view(1, 1, width, -1).expand(frame, height, width, -1)
|
|
82
|
+
|
|
83
|
+
freqs = torch.cat([freqs_frame, freqs_height, freqs_width], dim=-1).reshape(seq_lens, -1)
|
|
84
|
+
self.rope_cache[rope_key] = freqs.clone().contiguous()
|
|
85
|
+
vid_freqs.append(self.rope_cache[rope_key])
|
|
86
|
+
if self.scale_rope:
|
|
87
|
+
max_vid_index = max(height // 2, width // 2, max_vid_index)
|
|
88
|
+
else:
|
|
89
|
+
max_vid_index = max(height, width, max_vid_index)
|
|
90
|
+
|
|
91
|
+
txt_freqs = self.pos_freqs[max_vid_index : max_vid_index + txt_length, ...]
|
|
92
|
+
vid_freqs = torch.cat(vid_freqs, dim=0)
|
|
93
|
+
|
|
94
|
+
return vid_freqs, txt_freqs
|
|
95
|
+
|
|
96
|
+
|
|
53
97
|
def optimized_modulate(self, x, mod_params, index=None):
|
|
54
98
|
if mod_params.ndim == 2:
|
|
55
99
|
shift, scale, gate = mod_params.chunk(3, dim=-1)
|
|
@@ -72,12 +116,14 @@ def odtsr_forward():
|
|
|
72
116
|
|
|
73
117
|
LoRALinear.forward = lora_batch_cfg_forward
|
|
74
118
|
QwenImageTransformerBlock._modulate = optimized_modulate
|
|
119
|
+
QwenEmbedRope.forward = optimized_rope_forward
|
|
75
120
|
|
|
76
121
|
try:
|
|
77
122
|
yield
|
|
78
123
|
finally:
|
|
79
124
|
LoRALinear.forward = original_lora_forward
|
|
80
125
|
QwenImageTransformerBlock._modulate = original_modulate
|
|
126
|
+
QwenEmbedRope.forward = original_rope_forward
|
|
81
127
|
|
|
82
128
|
|
|
83
129
|
class QwenImageUpscalerTool:
|
|
@@ -122,7 +122,7 @@ diffsynth_engine/models/qwen_image/__init__.py,sha256=_6f0LWaoLdDvD2CsjK2OzEIQry
|
|
|
122
122
|
diffsynth_engine/models/qwen_image/qwen2_5_vl.py,sha256=Eu-r-c42t_q74Qpwz21ToCGHpvSi7VND4B1EI0e-ePA,57748
|
|
123
123
|
diffsynth_engine/models/qwen_image/qwen_image_dit.py,sha256=mMU4zeZi8-uJe9voznNIxZCTCqJPbPXkMxHwgcqJ6z8,24640
|
|
124
124
|
diffsynth_engine/models/qwen_image/qwen_image_dit_fbcache.py,sha256=LIv9X_BohKk5rcEzyl3ATLwd8MSoFX43wjkArQ68nq8,4828
|
|
125
|
-
diffsynth_engine/models/qwen_image/qwen_image_dit_nunchaku.py,sha256=
|
|
125
|
+
diffsynth_engine/models/qwen_image/qwen_image_dit_nunchaku.py,sha256=EIojuf27haxqI4wkJE_Y17HMjP82-iqvyJ5v5Kjns3o,13568
|
|
126
126
|
diffsynth_engine/models/qwen_image/qwen_image_vae.py,sha256=FpauZV9IVvpvBeS9volu7kzH2mmCISS86AbHt0Jk2bQ,38442
|
|
127
127
|
diffsynth_engine/models/sd/__init__.py,sha256=hjoKRnwoXOLD0wude-w7I6wK5ak7ACMbnbkPuBB2oU0,380
|
|
128
128
|
diffsynth_engine/models/sd/sd_controlnet.py,sha256=kMGfIdriXhC7reT6iO2Z0rPICXEkXpytjeBQcR_sjT8,50577
|
|
@@ -186,7 +186,7 @@ diffsynth_engine/tools/flux_inpainting_tool.py,sha256=qHsYKUG20A19ujRdocpIPC4a_H
|
|
|
186
186
|
diffsynth_engine/tools/flux_outpainting_tool.py,sha256=ff4qUj2mMYW6GMts7ifnJG7Rth55pfuggopRCyAXwJ8,3894
|
|
187
187
|
diffsynth_engine/tools/flux_reference_tool.py,sha256=6v0NRZPsDEHFlPruO-ZJTB4rYWxKVAlmnYEeandD3r8,4723
|
|
188
188
|
diffsynth_engine/tools/flux_replace_tool.py,sha256=AOyEGxHsaNwpTS2VChAieIfECgMxlKsRw0lWPm1k9C0,4627
|
|
189
|
-
diffsynth_engine/tools/qwen_image_upscaler_tool.py,sha256=
|
|
189
|
+
diffsynth_engine/tools/qwen_image_upscaler_tool.py,sha256=GMhV7Sphg2zgkOJhnZeLVWQJQv1d6QnOuQZXEvHgIyI,16222
|
|
190
190
|
diffsynth_engine/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
191
191
|
diffsynth_engine/utils/cache.py,sha256=Ivef22pCuhEq-4H00gSvkLS8ceVZoGis7OSitYL6gH4,2101
|
|
192
192
|
diffsynth_engine/utils/constants.py,sha256=Tsn3EAByfZra-nGcx0NEcP9nWTPKaDGdatosE3BuPGE,3846
|
|
@@ -209,8 +209,8 @@ diffsynth_engine/utils/video.py,sha256=8FCaeqIdUsWMgWI_6SO9SPynsToGcLCQAVYFTc4CD
|
|
|
209
209
|
diffsynth_engine/utils/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
210
210
|
diffsynth_engine/utils/memory/linear_regression.py,sha256=oW_EQEw13oPoyUrxiL8A7Ksa5AuJ2ynI2qhCbfAuZbg,3930
|
|
211
211
|
diffsynth_engine/utils/memory/memory_predcit_model.py,sha256=EXprSl_zlVjgfMWNXP-iw83Ot3hyMcgYaRPv-dvyL84,3943
|
|
212
|
-
diffsynth_engine-0.7.1.
|
|
213
|
-
diffsynth_engine-0.7.1.
|
|
214
|
-
diffsynth_engine-0.7.1.
|
|
215
|
-
diffsynth_engine-0.7.1.
|
|
216
|
-
diffsynth_engine-0.7.1.
|
|
212
|
+
diffsynth_engine-0.7.1.dev5.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
|
|
213
|
+
diffsynth_engine-0.7.1.dev5.dist-info/METADATA,sha256=76gzYfIIeo_71jVybkzGLWiMpkm95ifPNZkL12gCRj8,1163
|
|
214
|
+
diffsynth_engine-0.7.1.dev5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
215
|
+
diffsynth_engine-0.7.1.dev5.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
|
|
216
|
+
diffsynth_engine-0.7.1.dev5.dist-info/RECORD,,
|
|
File without changes
|
{diffsynth_engine-0.7.1.dev3.dist-info → diffsynth_engine-0.7.1.dev5.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{diffsynth_engine-0.7.1.dev3.dist-info → diffsynth_engine-0.7.1.dev5.dist-info}/top_level.txt
RENAMED
|
File without changes
|