PyPI - cache-dit - Versions diffs - 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl - Mend

cache-dit 0.2.10py3-none-any.whl → 0.2.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cache-dit might be problematic. Click here for more details.

Files changed (9) hide show

cache_dit/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.2.10'
-__version_tuple__ = version_tuple = (0, 2, 10)
+__version__ = version = '0.2.12'
+__version_tuple__ = version_tuple = (0, 2, 12)

cache_dit/compile/utils.py CHANGED Viewed

@@ -39,6 +39,14 @@ def set_custom_compile_configs(
     # https://github.com/pytorch/pytorch/issues/153791
     torch._inductor.config.autotune_local_cache = False
+    if dist.is_initialized():
+        # Enable compute comm overlap
+        torch._inductor.config.reorder_for_compute_comm_overlap = True
+        # L20 64 GB/s, PCIe; A100/A800 NVLink 300 GB/s.
+        torch._inductor.config.intra_node_bw = (
+            64 if "L20" in torch.cuda.get_device_name() else 300
+        )
     FORCE_DISABLE_CUSTOM_COMPILE_CONFIG = (
         os.environ.get("CACHE_DIT_FORCE_DISABLE_CUSTOM_COMPILE_CONFIG", "0")
         == "1"
@@ -51,14 +59,6 @@ def set_custom_compile_configs(
         )
         return
-    if dist.is_initialized():
-        # Enable compute comm overlap
-        torch._inductor.config.reorder_for_compute_comm_overlap = True
-        # L20 64 GB/s, PCIe; A100/A800 NVLink 300 GB/s.
-        torch._inductor.config.intra_node_bw = (
-            64 if "L20" in torch.cuda.get_device_name() else 300
-        )
     # Below are default settings for torch.compile, you can change
     # them to your needs and test the performance
     torch._inductor.config.max_fusion_size = 64

cache_dit/metrics/metrics.py CHANGED Viewed

@@ -334,19 +334,22 @@ compute_video_mse = partial(
 )
+METRICS_CHOICES = [
+    "psnr",
+    "ssim",
+    "mse",
+    "fid",
+    "all",
+]
 # Entrypoints
 def get_args():
+    global METRICS_CHOICES
     parser = argparse.ArgumentParser(
         description="CacheDiT's Metrics CLI",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
-    METRICS_CHOICES = [
-        "psnr",
-        "ssim",
-        "mse",
-        "fid",
-        "all",
-    ]
     parser.add_argument(
         "metrics",
         type=str,
@@ -383,13 +386,6 @@ def get_args():
         default=None,
         help="Path to predicted video or Dir to predicted videos",
     )
-    parser.add_argument(
-        "--enable-verbose",
-        "-verbose",
-        action="store_true",
-        default=False,
-        help="Show metrics progress verbose",
-    )
     # Image 1 vs N pattern
     parser.add_argument(
@@ -431,10 +427,52 @@ def get_args():
         default=1,
         help="Batch size for FID compute",
     )
+    # Verbose
+    parser.add_argument(
+        "--enable-verbose",
+        "-verbose",
+        action="store_true",
+        default=False,
+        help="Show metrics progress verbose",
+    )
+    # Format output
+    parser.add_argument(
+        "--summary",
+        "-s",
+        action="store_true",
+        default=False,
+        help="Summary the outupt metrics results",
+    )
+    # Addtional perf log
+    parser.add_argument(
+        "--perf-log",
+        "-plog",
+        type=str,
+        default=None,
+        help="Path to addtional perf log",
+    )
+    parser.add_argument(
+        "--perf-tag",
+        "-ptag",
+        type=str,
+        default=None,
+        help="Tag to parse perf time from perf log",
+    )
+    parser.add_argument(
+        "--extra-perf-tags",
+        "-extra-ptags",
+        nargs="+",
+        default=[],
+        help="Extra tags to parse perf time from perf log",
+    )
     return parser.parse_args()
 def entrypoint():
+    global METRICS_CHOICES
     args = get_args()
     logger.debug(args)
@@ -449,16 +487,19 @@ def entrypoint():
             batch_size=args.fid_batch_size,
         )
+    METRICS_META: dict[str, float] = {}
     # run one metric
     def _run_metric(
-        mertric: str,
+        metric: str,
         img_true: str = None,
         img_test: str = None,
         video_true: str = None,
         video_test: str = None,
     ) -> None:
         nonlocal FID
-        mertric = mertric.lower()
+        nonlocal METRICS_META
+        metric = metric.lower()
         if img_true is not None and img_test is not None:
             if any(
                 (
@@ -470,30 +511,30 @@ def entrypoint():
             # img_true and img_test can be files or dirs
             img_true_info = os.path.basename(img_true)
             img_test_info = os.path.basename(img_test)
-            if mertric == "psnr" or mertric == "all":
-                img_psnr, n = compute_psnr(img_true, img_test)
-                logger.info(
+            def _logging_msg(value: float, name, n: int):
+                if value is None or n is None:
+                    return
+                msg = (
                     f"{img_true_info} vs {img_test_info}, "
-                    f"Num: {n}, PSNR: {img_psnr}"
+                    f"Num: {n}, {name.upper()}: {value:.5f}"
                 )
-            if mertric == "ssim" or mertric == "all":
+                METRICS_META[msg] = value
+                logger.info(msg)
+            if metric == "psnr" or metric == "all":
+                img_psnr, n = compute_psnr(img_true, img_test)
+                _logging_msg(img_psnr, "psnr", n)
+            if metric == "ssim" or metric == "all":
                 img_ssim, n = compute_ssim(img_true, img_test)
-                logger.info(
-                    f"{img_true_info} vs {img_test_info}, "
-                    f"Num: {n}, SSIM: {img_ssim}"
-                )
-            if mertric == "mse" or mertric == "all":
+                _logging_msg(img_ssim, "ssim", n)
+            if metric == "mse" or metric == "all":
                 img_mse, n = compute_mse(img_true, img_test)
-                logger.info(
-                    f"{img_true_info} vs {img_test_info}, "
-                    f"Num: {n},  MSE: {img_mse}"
-                )
-            if mertric == "fid" or mertric == "all":
+                _logging_msg(img_mse, "mse", n)
+            if metric == "fid" or metric == "all":
                 img_fid, n = FID.compute_fid(img_true, img_test)
-                logger.info(
-                    f"{img_true_info} vs {img_test_info}, "
-                    f"Num: {n},  FID: {img_fid}"
-                )
+                _logging_msg(img_fid, "fid", n)
         if video_true is not None and video_test is not None:
             if any(
                 (
@@ -502,33 +543,33 @@ def entrypoint():
                 )
             ):
                 return
             # video_true and video_test can be files or dirs
             video_true_info = os.path.basename(video_true)
             video_test_info = os.path.basename(video_test)
-            if mertric == "psnr" or mertric == "all":
-                video_psnr, n = compute_video_psnr(video_true, video_test)
-                logger.info(
+            def _logging_msg(value: float, name, n: int):
+                if value is None or n is None:
+                    return
+                msg = (
                     f"{video_true_info} vs {video_test_info}, "
-                    f"Frames: {n}, PSNR: {video_psnr}"
+                    f"Frames: {n}, {name.upper()}: {value:.5f}"
                 )
-            if mertric == "ssim" or mertric == "all":
+                METRICS_META[msg] = value
+                logger.info(msg)
+            if metric == "psnr" or metric == "all":
+                video_psnr, n = compute_video_psnr(video_true, video_test)
+                _logging_msg(video_psnr, "psnr", n)
+            if metric == "ssim" or metric == "all":
                 video_ssim, n = compute_video_ssim(video_true, video_test)
-                logger.info(
-                    f"{video_true_info} vs {video_test_info}, "
-                    f"Frames: {n}, SSIM: {video_ssim}"
-                )
-            if mertric == "mse" or mertric == "all":
+                _logging_msg(video_ssim, "ssim", n)
+            if metric == "mse" or metric == "all":
                 video_mse, n = compute_video_mse(video_true, video_test)
-                logger.info(
-                    f"{video_true_info} vs {video_test_info}, "
-                    f"Frames: {n},  MSE: {video_mse}"
-                )
-            if mertric == "fid" or mertric == "all":
+                _logging_msg(video_mse, "mse", n)
+            if metric == "fid" or metric == "all":
                 video_fid, n = FID.compute_video_fid(video_true, video_test)
-                logger.info(
-                    f"{video_true_info} vs {video_test_info}, "
-                    f"Frames: {n},  FID: {video_fid}"
-                )
+                _logging_msg(video_fid, "fid", n)
     # run selected metrics
     if not DISABLE_VERBOSE:
@@ -574,7 +615,7 @@ def entrypoint():
         for metric in args.metrics:
             for img_test_dir in directories:
                 _run_metric(
-                    mertric=metric,
+                    metric=metric,
                     img_true=args.ref_img_dir,
                     img_test=img_test_dir,
                 )
@@ -619,7 +660,7 @@ def entrypoint():
         for metric in args.metrics:
             for video_test in video_source_selected:
                 _run_metric(
-                    mertric=metric,
+                    metric=metric,
                     video_true=args.ref_video,
                     video_test=video_test,
                 )
@@ -627,13 +668,169 @@ def entrypoint():
     else:
         for metric in args.metrics:
             _run_metric(
-                mertric=metric,
+                metric=metric,
                 img_true=args.img_true,
                 img_test=args.img_test,
                 video_true=args.video_true,
                 video_test=args.video_test,
             )
+    if args.summary:
+        def _fetch_perf():
+            if args.perf_log is None or args.perf_tag is None:
+                return []
+            if not os.path.exists(args.perf_log):
+                return []
+            perf_texts = []
+            with open(args.perf_log, "r") as file:
+                perf_lines = file.readlines()
+                for line in perf_lines:
+                    line = line.strip()
+                    if args.perf_tag.lower() in line.lower():
+                        if len(args.extra_perf_tags) == 0:
+                            perf_texts.append(line)
+                        else:
+                            has_all_extra_tag = True
+                            for ext_tag in args.extra_perf_tags:
+                                if ext_tag.lower() not in line.lower():
+                                    has_all_extra_tag = False
+                                    break
+                            if has_all_extra_tag:
+                                perf_texts.append(line)
+            return perf_texts
+        PERF_TEXTS: list[str] = _fetch_perf()
+        def _parse_value(
+            text: str,
+            tag: str = "Num",
+        ) -> float | None:
+            import re
+            escaped_tag = re.escape(tag)
+            processed_tag = escaped_tag.replace(r"\ ", r"\s+")
+            pattern = re.compile(
+                rf"{processed_tag}:\s*(\d+\.?\d*)\D*", re.IGNORECASE
+            )
+            match = pattern.search(text)
+            if not match:
+                return None
+            value_str = match.group(1)
+            try:
+                if tag.lower() in METRICS_CHOICES:
+                    return float(value_str)
+                if args.perf_tag is not None:
+                    if tag.lower() == args.perf_tag.lower():
+                        return float(value_str)
+                return int(value_str)
+            except ValueError:
+                return None
+        def _parse_perf(
+            compare_tag: str,
+        ) -> float | None:
+            nonlocal PERF_TEXTS
+            perf_times = []
+            for line in PERF_TEXTS:
+                if compare_tag in line:
+                    perf_time = _parse_value(line, args.perf_tag)
+                    if perf_time is not None:
+                        perf_times.append(perf_time)
+            if len(perf_times) == 0:
+                return None
+            return sum(perf_times) / len(perf_times)
+        def _format_item(
+            key: str,
+            metric: str,
+            value: float,
+            max_key_len: int,
+        ):
+            nonlocal PERF_TEXTS
+            # U1-Q0-C0-NONE vs U4-Q1-C1-NONE
+            header = key.split(",")[0].strip()
+            compare_tag = header.split("vs")[1].strip()  # U4-Q1-C1-NONE
+            has_perf_texts = len(PERF_TEXTS) > 0
+            format_str = ""
+            # Num / Frames
+            if n := _parse_value(key, "Num"):
+                if not has_perf_texts:
+                    format_str = (
+                        f"{header:<{max_key_len}}  Num: {n}  "
+                        f"{metric.upper()}: {value:<7.4f}"
+                    )
+                else:
+                    perf_time = _parse_perf(compare_tag)
+                    perf_time = f"{perf_time:<.2f}" if perf_time else None
+                    format_str = (
+                        f"{header:<{max_key_len}}  Num: {n}  "
+                        f"{metric.upper()}: {value:<7.4f}  "
+                        f"Perf: {perf_time}"
+                    )
+            elif n := _parse_value(key, "Frames"):
+                if not has_perf_texts:
+                    format_str = (
+                        f"{header:<{max_key_len}}  Frames: {n}  "
+                        f"{metric.upper()}: {value:<7.4f}"
+                    )
+                else:
+                    perf_time = _parse_perf(compare_tag)
+                    perf_time = f"{perf_time:<.2f}" if perf_time else None
+                    format_str = (
+                        f"{header:<{max_key_len}}  Frames: {n}  "
+                        f"{metric.upper()}: {value:<7.4f}  "
+                        f"Perf: {perf_time}"
+                    )
+            else:
+                raise ValueError("Num or Frames can not be NoneType.")
+            return format_str
+        selected_metrics = args.metrics
+        if "all" in selected_metrics:
+            selected_metrics = METRICS_CHOICES.copy()
+            selected_metrics.remove("all")
+        for metric in selected_metrics:
+            selected_items = {}
+            for key in METRICS_META.keys():
+                if metric.upper() in key or metric.lower() in key:
+                    selected_items[key] = METRICS_META[key]
+            reverse = True if metric.lower() in ["psnr", "ssim"] else False
+            sorted_items = sorted(
+                selected_items.items(), key=lambda x: x[1], reverse=reverse
+            )
+            selected_keys = [
+                key.split(",")[0].strip() for key in selected_items.keys()
+            ]
+            max_key_len = max(len(key) for key in selected_keys)
+            format_strs = []
+            for key, value in sorted_items:
+                format_strs.append(
+                    _format_item(key, metric, value, max_key_len)
+                )
+            format_len = max(len(format_str) for format_str in format_strs)
+            res_len = format_len - len(f"Summary: {metric.upper()}")
+            left_len = res_len // 2
+            right_len = res_len - left_len
+            print("-" * format_len)
+            print(
+                " " * left_len + f"Summary: {metric.upper()}" + " " * right_len
+            )
+            print("-" * format_len)
+            for format_str in format_strs:
+                print(format_str)
+            print("-" * format_len)
 if __name__ == "__main__":
     entrypoint()

{cache_dit-0.2.10.dist-info → cache_dit-0.2.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cache_dit
-Version: 0.2.10
+Version: 0.2.12
 Summary: 🤗 CacheDiT: A Training-free and Easy-to-use Cache Acceleration Toolbox for Diffusion Transformers
 Author: DefTruth, vipshop.com, etc.
 Maintainer: DefTruth, vipshop.com, etc
@@ -63,9 +63,8 @@ Dynamic: requires-python
 </div>
 ## 🔥News🔥
-- [2025-07-13] **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)** is released! A forked version of [huggingface/flux-fast](https://github.com/huggingface/flux-fast) that **makes flux-fast even faster** with **[cache-dit](https://github.com/vipshop/cache-dit)**, **3.3x** speedup on NVIDIA L20 while still maintaining **high precision**.
+- [2025-07-18] 🎉First caching mechanism in **[🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast)** with **[cache-dit](https://github.com/vipshop/cache-dit)**, also check the **[PR](https://github.com/huggingface/flux-fast/pull/13)**.
+- [2025-07-13] **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)** is released! A forked version of **[🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast)** that **makes flux-fast even faster** with **[cache-dit](https://github.com/vipshop/cache-dit)**, **3.3x** speedup on NVIDIA L20.
 ## 🤗 Introduction

{cache_dit-0.2.10.dist-info → cache_dit-0.2.12.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 cache_dit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cache_dit/_version.py,sha256=fMOyoyXAggjNTgl2YJ-8HW1bnjjDPiNACUsDoNufScI,513
+cache_dit/_version.py,sha256=7CFcHKqzy7OglwTX58ipGg1TD8MpDORqWvEBO3W1dHI,513
 cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
 cache_dit/primitives.py,sha256=A2iG9YLot3gOsZSPp-_gyjqjLgJvWQRx8aitD4JQ23Y,3877
 cache_dit/cache_factory/__init__.py,sha256=iYQwLwB_XLoYl0OB9unZGDbBtrYvZaLkOAmhGRwdW2E,191
@@ -31,17 +31,17 @@ cache_dit/cache_factory/first_block_cache/diffusers_adapters/hunyuan_video.py,sh
 cache_dit/cache_factory/first_block_cache/diffusers_adapters/mochi.py,sha256=lQTClo52OwPbNEE4jiBZQhfC7hbtYqnYIABp_vbm_dk,2363
 cache_dit/cache_factory/first_block_cache/diffusers_adapters/wan.py,sha256=dBNzHBECAuTTA1a7kLdvZL20YzaKTAS3iciVLzKKEWA,2638
 cache_dit/compile/__init__.py,sha256=DfMdPleFFGADXLsr7zXui8BTz_y9futY6rNmNdh9y7k,63
-cache_dit/compile/utils.py,sha256=OTvkwcezSrApZ2M1IMkYtkEmFbkfpTknhHMgoBApd6U,3786
+cache_dit/compile/utils.py,sha256=N4A55_8uIbEd-S4xyJPcrdKceI2MGM9BTIhJE63jyL4,3786
 cache_dit/custom_ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cache_dit/custom_ops/triton_taylorseer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cache_dit/metrics/__init__.py,sha256=RaUhl5dieF40RqnizGzR30qoJJ9dyMUEADwgwMaMQrE,575
 cache_dit/metrics/config.py,sha256=ieOgD9ayz722RjVzk24bSIqS2D6o7TZjGk8KeXV-OLQ,551
 cache_dit/metrics/fid.py,sha256=9Ivtazl6mW0Bon2VXa-Ia5Xj2ewxRD3V1Qkd69zYM3Y,17066
 cache_dit/metrics/inception.py,sha256=pBVe2X6ylLPIXTG4-GWDM9DWnCviMJbJ45R3ulhktR0,12759
-cache_dit/metrics/metrics.py,sha256=O9a8qV6deQDWEoez7UZ_aqDLQ9rJXAJUMHGnJM7RUMs,19927
-cache_dit-0.2.10.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
-cache_dit-0.2.10.dist-info/METADATA,sha256=f5PF-lhexcLdB2HmBWETBEyg011eZOc99tlVI1lozYA,28002
-cache_dit-0.2.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-cache_dit-0.2.10.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
-cache_dit-0.2.10.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
-cache_dit-0.2.10.dist-info/RECORD,,
+cache_dit/metrics/metrics.py,sha256=1TTbfaj_-vdUfxopLnc5kVrXs5rMpAoSi8D0ItYdPu8,26439
+cache_dit-0.2.12.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
+cache_dit-0.2.12.dist-info/METADATA,sha256=-AIWGVOFsY-nhMkDeFErUFcELTWmza96-0IUN3od88A,28219
+cache_dit-0.2.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+cache_dit-0.2.12.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
+cache_dit-0.2.12.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
+cache_dit-0.2.12.dist-info/RECORD,,

{cache_dit-0.2.10.dist-info → cache_dit-0.2.12.dist-info}/WHEEL RENAMED Viewed

File without changes

{cache_dit-0.2.10.dist-info → cache_dit-0.2.12.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{cache_dit-0.2.10.dist-info → cache_dit-0.2.12.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{cache_dit-0.2.10.dist-info → cache_dit-0.2.12.dist-info}/top_level.txt RENAMED Viewed

File without changes

cache-dit 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl

Potentially problematic release.

cache-dit 0.2.10py3-none-any.whl → 0.2.12py3-none-any.whl