PyPI - diffsynth-engine - Versions diffs - 0.6.1.dev25__py3-none-any.whl → 0.6.1.dev26__py3-none-any.whl - Mend

diffsynth-engine 0.6.1.dev25py3-none-any.whl → 0.6.1.dev26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

diffsynth_engine/configs/pipeline.py CHANGED Viewed

@@ -26,6 +26,8 @@ class AttnImpl(Enum):
     FA2 = "fa2"  # Flash Attention 2
     FA3 = "fa3"  # Flash Attention 3
     FA3_FP8 = "fa3_fp8"  # Flash Attention 3 with FP8
+    AITER = "aiter"  # Aiter Flash Attention
+    AITER_FP8 = "aiter_fp8"  # Aiter Flash Attention with FP8
     XFORMERS = "xformers"  # XFormers
     SDPA = "sdpa"  # Scaled Dot Product Attention
     SAGE = "sage"  # Sage Attention

diffsynth_engine/models/basic/attention.py CHANGED Viewed

@@ -13,6 +13,7 @@ from diffsynth_engine.utils.flag import (
     SAGE_ATTN_AVAILABLE,
     SPARGE_ATTN_AVAILABLE,
     VIDEO_SPARSE_ATTN_AVAILABLE,
+    AITER_AVAILABLE,
 )
 from diffsynth_engine.utils.platform import DTYPE_FP8
@@ -93,6 +94,9 @@ if SPARGE_ATTN_AVAILABLE:
         )
         return out.transpose(1, 2)
+if AITER_AVAILABLE:
+    from aiter import flash_attn_func as aiter_flash_attn
+    from aiter import flash_attn_fp8_pertensor_func as aiter_flash_attn_fp8
 if VIDEO_SPARSE_ATTN_AVAILABLE:
     from diffsynth_engine.models.basic.video_sparse_attention import (
@@ -137,6 +141,8 @@ def attention(
         "fa2",
         "fa3",
         "fa3_fp8",
+        "aiter",
+        "aiter_fp8",
         "xformers",
         "sdpa",
         "sage",
@@ -157,6 +163,13 @@ def attention(
                     logger.debug(
                         "flash_attn_3 does not support attention mask, will use fallback attention implementation"
                     )
+        if AITER_AVAILABLE:
+            if flash_attn3_compatible:
+                return aiter_flash_attn(q, k, v, softmax_scale=scale)
+            else:
+                logger.warning(
+                    f"head_dim={q.shape[-1]}, but aiter_flash_attn only supports head dimension at most {FA3_MAX_HEADDIM}, will use fallback attention implementation"
+                )
         if XFORMERS_AVAILABLE:
             return xformers_attn(q, k, v, attn_mask=attn_mask, scale=scale)
         if SDPA_AVAILABLE:
@@ -183,6 +196,22 @@ def attention(
                 v = v.to(dtype=DTYPE_FP8)
                 out = flash_attn3(q, k, v, softmax_scale=scale)
                 return out.to(dtype=origin_dtype)
+        if attn_impl == "aiter" or attn_impl == "aiter_fp8":
+            if not flash_attn3_compatible:
+                raise RuntimeError(
+                    f"head_dim={q.shape[-1]}, but aiter_flash_attn only supports head dimension at most {FA3_MAX_HEADDIM}"
+                )
+            if attn_mask is not None:
+                raise RuntimeError("aiter_flash_attn does not support attention mask")
+            if attn_impl == "aiter" :
+                return aiter_flash_attn(q, k, v, softmax_scale=scale)
+            else:
+                origin_dtype = q.dtype
+                q = q.to(dtype=DTYPE_FP8)
+                k = k.to(dtype=DTYPE_FP8)
+                v = v.to(dtype=DTYPE_FP8)
+                out = aiter_flash_attn_fp8(q, k, v, softmax_scale=scale)
+                return out.to(dtype=origin_dtype)
         if attn_impl == "fa2":
             return flash_attn2(q, k, v, softmax_scale=scale)
         if attn_impl == "xformers":
@@ -288,6 +317,8 @@ def long_context_attention(
         "fa2",
         "fa3",
         "fa3_fp8",
+        "aiter",
+        "aiter_fp8",
         "sdpa",
         "sage",
         "sparge",
@@ -303,6 +334,13 @@ def long_context_attention(
                 logger.warning(
                     f"head_dim={q.shape[-1]}, but flash_attn_3 only supports head dimension at most {FA3_MAX_HEADDIM}, will use fallback attention implementation"
                 )
+        if AITER_AVAILABLE:
+            if flash_attn3_compatible:
+                return LongContextAttention(attn_type=AttnType.AITER)(q, k, v, softmax_scale=scale)
+            else:
+                logger.warning(
+                    f"head_dim={q.shape[-1]}, but aiter_flash_attn only supports head dimension at most {FA3_MAX_HEADDIM}, will use fallback attention implementation"
+                )
         if SDPA_AVAILABLE:
             return LongContextAttention(attn_type=AttnType.TORCH)(q, k, v, softmax_scale=scale)
         if FLASH_ATTN_2_AVAILABLE:
@@ -323,6 +361,20 @@ def long_context_attention(
             v = v.to(dtype=DTYPE_FP8)
             out = LongContextAttention(attn_type=AttnType.FA3)(q, k, v, softmax_scale=scale)
             return out.to(dtype=origin_dtype)
+        if attn_impl == "aiter" or attn_impl == "aiter_fp8":
+            if not flash_attn3_compatible:
+                raise RuntimeError(
+                    f"head_dim={q.shape[-1]}, but aiter_flash_attn only supports head dimension at most {FA3_MAX_HEADDIM}"
+                )
+            if attn_impl == "aiter":
+                return LongContextAttention(attn_type=AttnType.AITER)(q, k, v, softmax_scale=scale)
+            origin_dtype = q.dtype
+            q = q.to(dtype=DTYPE_FP8)
+            k = k.to(dtype=DTYPE_FP8)
+            v = v.to(dtype=DTYPE_FP8)
+            out = LongContextAttention(attn_type=AttnType.AITER)(q, k, v, softmax_scale=scale)
+            return out.to(dtype=origin_dtype)
         if attn_impl == "fa2":
             return LongContextAttention(attn_type=AttnType.FA)(q, k, v, softmax_scale=scale)
         if attn_impl == "sdpa":

diffsynth_engine/utils/flag.py CHANGED Viewed

@@ -31,6 +31,11 @@ if SDPA_AVAILABLE:
 else:
     logger.info("Torch SDPA is not available")
+AITER_AVAILABLE = importlib.util.find_spec("aiter") is not None
+if AITER_AVAILABLE:
+    logger.info("Aiter is available")
+else:
+    logger.info("Aiter is not available")
 # 有损
 SAGE_ATTN_AVAILABLE = importlib.util.find_spec("sageattention") is not None

{diffsynth_engine-0.6.1.dev25.dist-info → diffsynth_engine-0.6.1.dev26.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diffsynth_engine
-Version: 0.6.1.dev25
+Version: 0.6.1.dev26
 Author: MuseAI x ModelScope
 Classifier: Programming Language :: Python :: 3
 Classifier: Operating System :: OS Independent

{diffsynth_engine-0.6.1.dev25.dist-info → diffsynth_engine-0.6.1.dev26.dist-info}/RECORD RENAMED Viewed

@@ -81,12 +81,12 @@ diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer.json,sha256=bhl7TT29cdoU
 diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer_config.json,sha256=7Zo6iw-qcacKMoR-BDX-A25uES1N9O23u0ipIeNE3AU,61728
 diffsynth_engine/configs/__init__.py,sha256=vSjJToEdq3JX7t81_z4nwNwIdD4bYnFjxnMZH7PXMKo,1309
 diffsynth_engine/configs/controlnet.py,sha256=f3vclyP3lcAjxDGD9C1vevhqqQ7W2LL_c6Wye0uxk3Q,1180
-diffsynth_engine/configs/pipeline.py,sha256=2tCcW3qndx5GdzYNvpbAsR6ZGnzY8q7EzJjWDIATBr0,13297
+diffsynth_engine/configs/pipeline.py,sha256=ADgWJa7bA3Z3Z1JtVLgmt4N3eS1KRp9yHu1QvTBzTm0,13404
 diffsynth_engine/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 diffsynth_engine/models/__init__.py,sha256=8Ze7cSE8InetgXWTNb0neVA2Q44K7WlE-h7O-02m2sY,119
 diffsynth_engine/models/base.py,sha256=BA5vgMqfy_cjuL2OtXbrFD-Qg5xQnaumHpj5TabwSy8,2559
 diffsynth_engine/models/basic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-diffsynth_engine/models/basic/attention.py,sha256=iFxpvXdaEJZHddTTRuKL1grKb6beU53y-VuRPX8FpFw,13127
+diffsynth_engine/models/basic/attention.py,sha256=mvgk8LTqFwgtPdBeRv797IZNg9k7--X9wD92Hcr188c,15682
 diffsynth_engine/models/basic/lora.py,sha256=PT-A3pwIuUrW2w3TnNlBPb1KRj70QYiBaoCvLnkR5cs,10652
 diffsynth_engine/models/basic/relative_position_emb.py,sha256=rCXOweZMcayVnNUVvBcYXMdhHS257B_PC8PZSWxvhNQ,2540
 diffsynth_engine/models/basic/timestep.py,sha256=WJODYqkSXEM0wcS42YkkfrGwxWt0e60zMTkDdUBQqBw,2810
@@ -171,7 +171,7 @@ diffsynth_engine/utils/cache.py,sha256=Ivef22pCuhEq-4H00gSvkLS8ceVZoGis7OSitYL6g
 diffsynth_engine/utils/constants.py,sha256=sJio3Vy8i0-PWYRnqquYt6ez9k6Tc9JdjCv6pn2BU_4,3551
 diffsynth_engine/utils/download.py,sha256=w9QQjllPfTUEY371UTREU7o_vvdMY-Q2DymDel3ZEZY,6792
 diffsynth_engine/utils/env.py,sha256=k749eYt_qKGq38GocDiXfkhp8nZrowFefNVTZ8R755I,363
-diffsynth_engine/utils/flag.py,sha256=wODDbMMLTGOl7yoLMZDKGyqXSYANPaDQdZGXOJryGeI,1597
+diffsynth_engine/utils/flag.py,sha256=v9GcRFYiNMonD9qmDLWdbXONuF-AcQ_KABPFtRZd0Tc,1767
 diffsynth_engine/utils/fp8_linear.py,sha256=k34YFWo2dc3t8aKjHaCW9CbQMOTqXxaDHk8aw8aKif4,3857
 diffsynth_engine/utils/gguf.py,sha256=ZWvw46V4g4uVyAR_oCq-4K5nPdKVrYk3u47uXMgA9lU,14092
 diffsynth_engine/utils/image.py,sha256=PiDButjv0fsRS23kpQgCLZAlBumpzQmNnolfvb5EKQ0,9626
@@ -187,8 +187,8 @@ diffsynth_engine/utils/video.py,sha256=8FCaeqIdUsWMgWI_6SO9SPynsToGcLCQAVYFTc4CD
 diffsynth_engine/utils/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 diffsynth_engine/utils/memory/linear_regression.py,sha256=oW_EQEw13oPoyUrxiL8A7Ksa5AuJ2ynI2qhCbfAuZbg,3930
 diffsynth_engine/utils/memory/memory_predcit_model.py,sha256=EXprSl_zlVjgfMWNXP-iw83Ot3hyMcgYaRPv-dvyL84,3943
-diffsynth_engine-0.6.1.dev25.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
-diffsynth_engine-0.6.1.dev25.dist-info/METADATA,sha256=hbm3Xm8GajphVodptdo1vPnvB098xLQk8B1ORFoUQ8k,1164
-diffsynth_engine-0.6.1.dev25.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-diffsynth_engine-0.6.1.dev25.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
-diffsynth_engine-0.6.1.dev25.dist-info/RECORD,,
+diffsynth_engine-0.6.1.dev26.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
+diffsynth_engine-0.6.1.dev26.dist-info/METADATA,sha256=z6sjXpooZoFJJGqqdE_DFtsi2f3aqhjLBbyXPX0RdgE,1164
+diffsynth_engine-0.6.1.dev26.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+diffsynth_engine-0.6.1.dev26.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
+diffsynth_engine-0.6.1.dev26.dist-info/RECORD,,

{diffsynth_engine-0.6.1.dev25.dist-info → diffsynth_engine-0.6.1.dev26.dist-info}/WHEEL RENAMED Viewed

File without changes

{diffsynth_engine-0.6.1.dev25.dist-info → diffsynth_engine-0.6.1.dev26.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{diffsynth_engine-0.6.1.dev25.dist-info → diffsynth_engine-0.6.1.dev26.dist-info}/top_level.txt RENAMED Viewed

File without changes

diffsynth-engine 0.6.1.dev25__py3-none-any.whl → 0.6.1.dev26__py3-none-any.whl

diffsynth-engine 0.6.1.dev25py3-none-any.whl → 0.6.1.dev26py3-none-any.whl