PyPI - diffsynth-engine - Versions diffs - 0.3.6.dev8__py3-none-any.whl → 0.3.6.dev9__py3-none-any.whl - Mend

diffsynth-engine 0.3.6.dev8py3-none-any.whl → 0.3.6.dev9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

diffsynth_engine/utils/fp8_linear.py CHANGED Viewed

@@ -2,6 +2,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from contextlib import contextmanager
+from diffsynth_engine.utils.platform import DTYPE_FP8
 def enable_fp8_autocast(module: nn.Module, compute_dtype: torch.dtype = torch.bfloat16, use_fp8_linear: bool = False):
@@ -51,7 +52,7 @@ def enable_fp8_linear(module: nn.Module):
 def _enable_fp8_linear(module: nn.Module):
     if isinstance(module, nn.Linear) and torch.is_floating_point(module.weight.data):
         # avoid conversion for int weights like GGUF
-        module.weight.data = module.weight.data.to(torch.float8_e4m3fn)
+        module.weight.data = module.weight.data.to(DTYPE_FP8)
     for submodule in module.children():
         _enable_fp8_linear(submodule)
@@ -71,8 +72,16 @@ def fp8_inference(enabled=True):
     ) -> torch.Tensor:
         device = input.device
         origin_dtype = input.dtype
-        input = input.to(torch.float8_e4m3fn)
-        weight = weight.to(torch.float8_e4m3fn)
+        scale_a = 1.0
+        # For float8_e4m3fnuz, the maximum representable value is half of that of e4m3fn.
+        # To avoid overflow and ensure numerical compatibility during FP8 computation,
+        # we scale down the input by 2.0 in advance.
+        # This scaling will be compensated later during the final result scaling.
+        if DTYPE_FP8 == torch.float8_e4m3fnuz:
+            scale_a = 2.0
+            input = input / scale_a
+        input = input.to(DTYPE_FP8)
+        weight = weight.to(DTYPE_FP8)
         if len(input.shape) > 2:
             origin_shape = input.shape
@@ -80,7 +89,7 @@ def fp8_inference(enabled=True):
             result = torch._scaled_mm(
                 input,
                 weight.T,
-                scale_a=torch.tensor(1.0).to(device=device),
+                scale_a=torch.tensor(scale_a).to(device=device),
                 scale_b=torch.tensor(1.0).to(device=device),
                 bias=bias,
                 out_dtype=origin_dtype,
@@ -91,7 +100,7 @@ def fp8_inference(enabled=True):
             result = torch._scaled_mm(
                 input,
                 weight.T,
-                scale_a=torch.tensor(1.0).to(device=device),
+                scale_a=torch.tensor(scale_a).to(device=device),
                 scale_b=torch.tensor(1.0).to(device=device),
                 bias=bias,
                 out_dtype=origin_dtype,

diffsynth_engine/utils/platform.py CHANGED Viewed

@@ -1,7 +1,15 @@
+# cross-platform definitions and utilities
 import torch
 import gc
-# 存放跨平台的工具类
+# data type
+# AMD only supports float8_e4m3fnuz
+# https://onnx.ai/onnx/technical/float8.html
+if torch.version.hip and "gfx94" in torch.cuda.get_device_properties(0).gcnArchName:
+    DTYPE_FP8 = torch.float8_e4m3fnuz
+else:
+    DTYPE_FP8 = torch.float8_e4m3fn
 def empty_cache():

{diffsynth_engine-0.3.6.dev8.dist-info → diffsynth_engine-0.3.6.dev9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diffsynth_engine
-Version: 0.3.6.dev8
+Version: 0.3.6.dev9
 Author: MuseAI x ModelScope
 Classifier: Programming Language :: Python :: 3
 Classifier: Operating System :: OS Independent

{diffsynth_engine-0.3.6.dev8.dist-info → diffsynth_engine-0.3.6.dev9.dist-info}/RECORD RENAMED Viewed

@@ -129,7 +129,7 @@ diffsynth_engine/utils/constants.py,sha256=L7sIxGNMfCvcZG66ul7GIT6fDctkcwhePAjMj
 diffsynth_engine/utils/download.py,sha256=NCgfL9tUca-sOhT41k6w4o__Ktbw-1aDwFTR4JDkT28,5639
 diffsynth_engine/utils/env.py,sha256=43x-kBjt5zI2cwZ9G4BOeTbedi2k6TuBzHGOBeFbFvU,280
 diffsynth_engine/utils/flag.py,sha256=6zQLnoEaU69pBEyhavCgydQfP0khw5ppCU7sue4yRqg,1370
-diffsynth_engine/utils/fp8_linear.py,sha256=qu6Hzi7dqmDFgtoP-Uf0p-GDKW03AK9338YeLuzw2nw,3589
+diffsynth_engine/utils/fp8_linear.py,sha256=NosnWMoAr_IpFcLn-OYbAx-vXySphjxutDZqmXLNjJI,4064
 diffsynth_engine/utils/gguf.py,sha256=ZWvw46V4g4uVyAR_oCq-4K5nPdKVrYk3u47uXMgA9lU,14092
 diffsynth_engine/utils/image.py,sha256=_46CVs1Qe7GdZNulWWJISnR_Y6FotC2tZGLKtr04gIE,562
 diffsynth_engine/utils/loader.py,sha256=Z5v1WNDWFY0OrVubB70j5VU3zeaAfEK_j8c1KrGI4yM,1240
@@ -138,11 +138,11 @@ diffsynth_engine/utils/logging.py,sha256=XB0xTT8PBN6btkOjFtOvjlrOCRVgDGT8PFAp1vm
 diffsynth_engine/utils/offload.py,sha256=jUR4u7J60o4KZIRxHhMCwaeDkiXJvBa0KJkYKKT6mrg,1587
 diffsynth_engine/utils/onnx.py,sha256=jeWUudJHnESjuiEAHyUZYUZz7dCj34O9aGjHCe8yjWo,1149
 diffsynth_engine/utils/parallel.py,sha256=2WISMBTTmW0v2qPvpms421-B59v3bYlS6YrLq9BZ5Zo,16909
-diffsynth_engine/utils/platform.py,sha256=q9ifmdzoa66Cj9YKfwps21DsDdwA0JGpwroKQbG6shU,224
+diffsynth_engine/utils/platform.py,sha256=2lXdw6YkqcRONCeT98n4cyg1Ii8Ybbyj2Ns72Se9tlk,496
 diffsynth_engine/utils/prompt.py,sha256=YItMchoVzsG6y-LB4vzzDUWrkhKRVlt1HfVhxZjSxMQ,280
 diffsynth_engine/utils/video.py,sha256=Ne0rd2lb59UT1q5EotpjlY7OT8F9oTCFDyo1ST77uoQ,1004
-diffsynth_engine-0.3.6.dev8.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
-diffsynth_engine-0.3.6.dev8.dist-info/METADATA,sha256=bZRH8-guipkEhR276zMywqxu72Ayr-c2XPttJedLZ2o,1068
-diffsynth_engine-0.3.6.dev8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-diffsynth_engine-0.3.6.dev8.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
-diffsynth_engine-0.3.6.dev8.dist-info/RECORD,,
+diffsynth_engine-0.3.6.dev9.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
+diffsynth_engine-0.3.6.dev9.dist-info/METADATA,sha256=k33lHBGOXqN3YNxtge0TI6C3ICnOhGAxwXCEtfr3kTY,1068
+diffsynth_engine-0.3.6.dev9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+diffsynth_engine-0.3.6.dev9.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
+diffsynth_engine-0.3.6.dev9.dist-info/RECORD,,

{diffsynth_engine-0.3.6.dev8.dist-info → diffsynth_engine-0.3.6.dev9.dist-info}/WHEEL RENAMED Viewed

File without changes

{diffsynth_engine-0.3.6.dev8.dist-info → diffsynth_engine-0.3.6.dev9.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{diffsynth_engine-0.3.6.dev8.dist-info → diffsynth_engine-0.3.6.dev9.dist-info}/top_level.txt RENAMED Viewed

File without changes

diffsynth-engine 0.3.6.dev8__py3-none-any.whl → 0.3.6.dev9__py3-none-any.whl

diffsynth-engine 0.3.6.dev8py3-none-any.whl → 0.3.6.dev9py3-none-any.whl