PyPI - diffsynth-engine - Versions diffs - 0.6.1.dev33__py3-none-any.whl → 0.6.1.dev34__py3-none-any.whl - Mend

diffsynth-engine 0.6.1.dev33py3-none-any.whl → 0.6.1.dev34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

diffsynth_engine/models/basic/attention.py CHANGED Viewed

@@ -343,7 +343,7 @@ def long_context_attention(
                     f"head_dim={q.shape[-1]}, but aiter_flash_attn only supports head dimension at most {FA3_MAX_HEADDIM}, will use fallback attention implementation"
                 )
         if SDPA_AVAILABLE:
-            return LongContextAttention(attn_type=AttnType.TORCH)(q, k, v, softmax_scale=scale)
+            return LongContextAttention(attn_type=AttnType.TORCH_EFFICIENT)(q, k, v, softmax_scale=scale)
         if FLASH_ATTN_2_AVAILABLE:
             return LongContextAttention(attn_type=AttnType.FA)(q, k, v, softmax_scale=scale)
         raise ValueError("No available long context attention implementation")
@@ -379,7 +379,7 @@ def long_context_attention(
         if attn_impl == "fa2":
             return LongContextAttention(attn_type=AttnType.FA)(q, k, v, softmax_scale=scale)
         if attn_impl == "sdpa":
-            return LongContextAttention(attn_type=AttnType.TORCH)(q, k, v, softmax_scale=scale)
+            return LongContextAttention(attn_type=AttnType.TORCH_EFFICIENT)(q, k, v, softmax_scale=scale)
         if attn_impl == "sage":
             return LongContextAttention(attn_type=AttnType.SAGE_AUTO)(q, k, v, softmax_scale=scale)
         if attn_impl == "sparge":

diffsynth_engine/models/qwen_image/qwen_image_dit.py CHANGED Viewed

@@ -286,16 +286,15 @@ class QwenImageTransformerBlock(nn.Module):
             shift_0, shift_1 = shift[:actual_batch], shift[actual_batch:]
             scale_0, scale_1 = scale[:actual_batch], scale[actual_batch:]
             gate_0, gate_1 = gate[:actual_batch], gate[actual_batch:]
-            index_expanded = index.unsqueeze(-1)
             shift_0_exp = shift_0.unsqueeze(1)
             shift_1_exp = shift_1.unsqueeze(1)
             scale_0_exp = scale_0.unsqueeze(1)
             scale_1_exp = scale_1.unsqueeze(1)
             gate_0_exp = gate_0.unsqueeze(1)
             gate_1_exp = gate_1.unsqueeze(1)
-            shift_result = torch.where(index_expanded == 0, shift_0_exp, shift_1_exp)
-            scale_result = torch.where(index_expanded == 0, scale_0_exp, scale_1_exp)
-            gate_result = torch.where(index_expanded == 0, gate_0_exp, gate_1_exp)
+            shift_result = torch.where(index == 0, shift_0_exp, shift_1_exp)
+            scale_result = torch.where(index == 0, scale_0_exp, scale_1_exp)
+            gate_result = torch.where(index == 0, gate_0_exp, gate_1_exp)
         else:
             shift_result = shift.unsqueeze(1)
             scale_result = scale.unsqueeze(1)
@@ -514,6 +513,7 @@ class QwenImageDiT(PreTrainedModel):
                     device=timestep.device,
                     dtype=torch.int,
                 )
+                modulate_index = modulate_index.unsqueeze(-1)
             rotary_emb = self.pos_embed(video_fhw, text_seq_len, image.device)
             image = self.img_in(image)
@@ -535,7 +535,7 @@ class QwenImageDiT(PreTrainedModel):
             # warning: Eligen does not work with sequence parallel because long context attention does not support attention masks
             img_freqs, txt_freqs = rotary_emb
-            with sequence_parallel((image, text, img_freqs, txt_freqs), seq_dims=(1, 1, 0, 0)):
+            with sequence_parallel((image, text, img_freqs, txt_freqs, modulate_index), seq_dims=(1, 1, 0, 0, 1)):
                 rotary_emb = (img_freqs, txt_freqs)
                 for block in self.transformer_blocks:
                     text, image = block(

diffsynth_engine/models/qwen_image/qwen_image_vae.py CHANGED Viewed

@@ -685,7 +685,6 @@ class VideoVAE(nn.Module):
         x = patchify(x, patch_size=2 if self.in_channels == 12 else 1)
         t = x.shape[2]
         iter_ = 1 + (t - 1) // 4
         for i in range(iter_):
             if i == 0:
                 out = self.encoder(x[:, :, :1, :, :], feat_cache=feat_cache)

diffsynth_engine/pipelines/qwen_image.py CHANGED Viewed

@@ -165,7 +165,7 @@ class QwenImagePipeline(BasePipeline):
         self.edit_prompt_template_encode_start_idx = 64
         # sampler
-        self.noise_scheduler = RecifitedFlowScheduler(shift=3.0, use_dynamic_shifting=True)
+        self.noise_scheduler = RecifitedFlowScheduler(shift=3.0, use_dynamic_shifting=True, shift_terminal=0.02)
         self.sampler = FlowMatchEulerSampler()
         # models
         self.tokenizer = tokenizer
@@ -690,8 +690,9 @@ class QwenImagePipeline(BasePipeline):
                 img_width, img_height = img.size
                 condition_width, condition_height = self.calculate_dimensions(384 * 384, img_width / img_height)
                 vae_width, vae_height = self.calculate_dimensions(1024 * 1024, img_width / img_height)
-                condition_images.append(img.resize((condition_width, condition_height), Image.LANCZOS))
-                vae_images.append(img.resize((vae_width, vae_height), Image.LANCZOS))
+                condition_images.append(img.resize((condition_width, condition_height)))
+                vae_images.append(img.resize((vae_width, vae_height)))
             if width is None and height is None:
                 width, height = vae_images[-1].size

{diffsynth_engine-0.6.1.dev33.dist-info → diffsynth_engine-0.6.1.dev34.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diffsynth_engine
-Version: 0.6.1.dev33
+Version: 0.6.1.dev34
 Author: MuseAI x ModelScope
 Classifier: Programming Language :: Python :: 3
 Classifier: Operating System :: OS Independent

{diffsynth_engine-0.6.1.dev33.dist-info → diffsynth_engine-0.6.1.dev34.dist-info}/RECORD RENAMED Viewed

@@ -86,7 +86,7 @@ diffsynth_engine/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
 diffsynth_engine/models/__init__.py,sha256=8Ze7cSE8InetgXWTNb0neVA2Q44K7WlE-h7O-02m2sY,119
 diffsynth_engine/models/base.py,sha256=svao__9WH8VNcyXz5o5dzywYXDcGV0YV9IfkLzDKews,2558
 diffsynth_engine/models/basic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-diffsynth_engine/models/basic/attention.py,sha256=62Ar8_ydnn28F1qH9ueXtvISgNszQK3q8k14gCIXGEs,15681
+diffsynth_engine/models/basic/attention.py,sha256=YrIxkYoekC3I7-sMTw60CL4GIKMLOTrn-eCk-iHT7E4,15701
 diffsynth_engine/models/basic/lora.py,sha256=Y6cBgrBsuDAP9FZz_fgK8vBi_EMg23saFIUSAsPIG-M,10670
 diffsynth_engine/models/basic/lora_nunchaku.py,sha256=7qhzGCzUIfDrwtWG0nspwdyZ7YUkaM4vMqzxZby2Zds,7510
 diffsynth_engine/models/basic/relative_position_emb.py,sha256=rCXOweZMcayVnNUVvBcYXMdhHS257B_PC8PZSWxvhNQ,2540
@@ -111,10 +111,10 @@ diffsynth_engine/models/hunyuan3d/surface_extractor.py,sha256=b15mb1N4PYwAvDk1Gu
 diffsynth_engine/models/hunyuan3d/volume_decoder.py,sha256=sgflj1a8sIerqGSalBAVQOlyiIihkLOLXYysNbulCoQ,2355
 diffsynth_engine/models/qwen_image/__init__.py,sha256=_6f0LWaoLdDvD2CsjK2OzEIQryt9efge8DFS4_GUnHQ,582
 diffsynth_engine/models/qwen_image/qwen2_5_vl.py,sha256=Eu-r-c42t_q74Qpwz21ToCGHpvSi7VND4B1EI0e-ePA,57748
-diffsynth_engine/models/qwen_image/qwen_image_dit.py,sha256=JEyK_yOa0A5xaqlmxI3nfD7NdCaHuvLDA10aWVbnac4,24635
+diffsynth_engine/models/qwen_image/qwen_image_dit.py,sha256=mMU4zeZi8-uJe9voznNIxZCTCqJPbPXkMxHwgcqJ6z8,24640
 diffsynth_engine/models/qwen_image/qwen_image_dit_fbcache.py,sha256=LIv9X_BohKk5rcEzyl3ATLwd8MSoFX43wjkArQ68nq8,4828
 diffsynth_engine/models/qwen_image/qwen_image_dit_nunchaku.py,sha256=1y1BkPRrX4_RioKjM09D9f9PK9neug1nSGJka0D9bvM,13516
-diffsynth_engine/models/qwen_image/qwen_image_vae.py,sha256=eO7f4YqiYXfw7NncBNFTu-xEvdJ5uKY-SnfP15QY0tE,38443
+diffsynth_engine/models/qwen_image/qwen_image_vae.py,sha256=FpauZV9IVvpvBeS9volu7kzH2mmCISS86AbHt0Jk2bQ,38442
 diffsynth_engine/models/sd/__init__.py,sha256=hjoKRnwoXOLD0wude-w7I6wK5ak7ACMbnbkPuBB2oU0,380
 diffsynth_engine/models/sd/sd_controlnet.py,sha256=kMGfIdriXhC7reT6iO2Z0rPICXEkXpytjeBQcR_sjT8,50577
 diffsynth_engine/models/sd/sd_text_encoder.py,sha256=BUOsBtSb7WH4Z37JhtYxOtpXMDJcQXZWzx_7JNbsJwM,5369
@@ -146,7 +146,7 @@ diffsynth_engine/pipelines/__init__.py,sha256=jh-4LSJ0vqlXiT8BgFgRIQxuAr2atEPyHr
 diffsynth_engine/pipelines/base.py,sha256=ShRiX5MY6bUkRKfuGrA1aalAqeHyeZxhzT87Mwc30b4,17231
 diffsynth_engine/pipelines/flux_image.py,sha256=L0ggxpthLD8a5-zdPHu9z668uWBei9YzPb4PFVypDNU,50707
 diffsynth_engine/pipelines/hunyuan3d_shape.py,sha256=TNV0Wr09Dj2bzzlpua9WioCClOj3YiLfE6utI9aWL8A,8164
-diffsynth_engine/pipelines/qwen_image.py,sha256=lrqwF3fikgQouifb-8KwWCxQhNVZard_7buoJqxHD7s,35759
+diffsynth_engine/pipelines/qwen_image.py,sha256=9n0fZCYw5E1iloXqd7vOU0XfHVPxQp_pm-v4D3Oloos,35751
 diffsynth_engine/pipelines/sd_image.py,sha256=nr-Nhsnomq8CsUqhTM3i2l2zG01YjwXdfRXgr_bC3F0,17891
 diffsynth_engine/pipelines/sdxl_image.py,sha256=v7ZACGPb6EcBunL6e5E9jynSQjE7GQx8etEV-ZLP91g,21704
 diffsynth_engine/pipelines/utils.py,sha256=HZbJHErNJS1DhlwJKvZ9dY7Kh8Zdlsw3zE2e88TYGRY,2277
@@ -190,8 +190,8 @@ diffsynth_engine/utils/video.py,sha256=8FCaeqIdUsWMgWI_6SO9SPynsToGcLCQAVYFTc4CD
 diffsynth_engine/utils/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 diffsynth_engine/utils/memory/linear_regression.py,sha256=oW_EQEw13oPoyUrxiL8A7Ksa5AuJ2ynI2qhCbfAuZbg,3930
 diffsynth_engine/utils/memory/memory_predcit_model.py,sha256=EXprSl_zlVjgfMWNXP-iw83Ot3hyMcgYaRPv-dvyL84,3943
-diffsynth_engine-0.6.1.dev33.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
-diffsynth_engine-0.6.1.dev33.dist-info/METADATA,sha256=pgyNkuwU3lMQA66waiIU3BVtw-7zN3s8pEvinWC_LpI,1164
-diffsynth_engine-0.6.1.dev33.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-diffsynth_engine-0.6.1.dev33.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
-diffsynth_engine-0.6.1.dev33.dist-info/RECORD,,
+diffsynth_engine-0.6.1.dev34.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
+diffsynth_engine-0.6.1.dev34.dist-info/METADATA,sha256=Uu-yhnydrVudp5RdK0ifk9-q4J_18zulQge4fNs24Z0,1164
+diffsynth_engine-0.6.1.dev34.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+diffsynth_engine-0.6.1.dev34.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
+diffsynth_engine-0.6.1.dev34.dist-info/RECORD,,

{diffsynth_engine-0.6.1.dev33.dist-info → diffsynth_engine-0.6.1.dev34.dist-info}/WHEEL RENAMED Viewed

File without changes

{diffsynth_engine-0.6.1.dev33.dist-info → diffsynth_engine-0.6.1.dev34.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{diffsynth_engine-0.6.1.dev33.dist-info → diffsynth_engine-0.6.1.dev34.dist-info}/top_level.txt RENAMED Viewed

File without changes

diffsynth-engine 0.6.1.dev33__py3-none-any.whl → 0.6.1.dev34__py3-none-any.whl

diffsynth-engine 0.6.1.dev33py3-none-any.whl → 0.6.1.dev34py3-none-any.whl