cache-dit 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cache-dit might be problematic. Click here for more details.

cache_dit/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.2.8'
21
- __version_tuple__ = version_tuple = (0, 2, 8)
20
+ __version__ = version = '0.2.9'
21
+ __version_tuple__ = version_tuple = (0, 2, 9)
@@ -1,6 +1,7 @@
1
1
  import os
2
2
 
3
3
  import torch
4
+ import torch.distributed as dist
4
5
  from cache_dit.logger import init_logger, logging_rank_0
5
6
 
6
7
  logger = init_logger(__name__)
@@ -50,12 +51,13 @@ def set_custom_compile_configs(
50
51
  )
51
52
  return
52
53
 
53
- # Enable compute comm overlap
54
- torch._inductor.config.reorder_for_compute_comm_overlap = True
55
- # L20 64 GB/s, PCIe; A100/A800 NVLink 300 GB/s.
56
- torch._inductor.config.intra_node_bw = (
57
- 64 if "L20" in torch.cuda.get_device_name() else 300
58
- )
54
+ if dist.is_initialized():
55
+ # Enable compute comm overlap
56
+ torch._inductor.config.reorder_for_compute_comm_overlap = True
57
+ # L20 64 GB/s, PCIe; A100/A800 NVLink 300 GB/s.
58
+ torch._inductor.config.intra_node_bw = (
59
+ 64 if "L20" in torch.cuda.get_device_name() else 300
60
+ )
59
61
 
60
62
  # Below are default settings for torch.compile, you can change
61
63
  # them to your needs and test the performance
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cache_dit
3
- Version: 0.2.8
3
+ Version: 0.2.9
4
4
  Summary: 🤗 CacheDiT: A Training-free and Easy-to-use Cache Acceleration Toolbox for Diffusion Transformers
5
5
  Author: DefTruth, vipshop.com, etc.
6
6
  Maintainer: DefTruth, vipshop.com, etc
@@ -61,6 +61,11 @@ Dynamic: requires-python
61
61
  </p>
62
62
  </div>
63
63
 
64
+ ## 🔥News🔥
65
+
66
+ - [2025-07-13] An end2end speedup example for FLUX using cache-dit is released! **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)**: A forked version of [huggingface/flux-fast](https://github.com/huggingface/flux-fast) that **makes flux-fast even faster** with **[cache-dit](https://github.com/vipshop/cache-dit)**, **3.3x** speedup on NVIDIA L20 while still maintaining **high precision**.
67
+
68
+
64
69
  ## 🤗 Introduction
65
70
 
66
71
  <div align="center">
@@ -1,5 +1,5 @@
1
1
  cache_dit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- cache_dit/_version.py,sha256=zkhRarrvPoGA1yWjS9_zVM80dWqpDesNn9DiHcF4JWM,511
2
+ cache_dit/_version.py,sha256=Iq6CyehddPOWDVsW9Hnb65BEkCEkAnt4bl0MAuqXKLA,511
3
3
  cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
4
4
  cache_dit/primitives.py,sha256=A2iG9YLot3gOsZSPp-_gyjqjLgJvWQRx8aitD4JQ23Y,3877
5
5
  cache_dit/cache_factory/__init__.py,sha256=5RNuhWakvvqrOV4vkqrEBA7d-V1LwcNSsjtW14mkqK8,5255
@@ -30,7 +30,7 @@ cache_dit/cache_factory/first_block_cache/diffusers_adapters/hunyuan_video.py,sh
30
30
  cache_dit/cache_factory/first_block_cache/diffusers_adapters/mochi.py,sha256=lQTClo52OwPbNEE4jiBZQhfC7hbtYqnYIABp_vbm_dk,2363
31
31
  cache_dit/cache_factory/first_block_cache/diffusers_adapters/wan.py,sha256=dBNzHBECAuTTA1a7kLdvZL20YzaKTAS3iciVLzKKEWA,2638
32
32
  cache_dit/compile/__init__.py,sha256=DfMdPleFFGADXLsr7zXui8BTz_y9futY6rNmNdh9y7k,63
33
- cache_dit/compile/utils.py,sha256=KU60xc474Anbj7Y_FLRFmNxEjVYLLXkhbtCLXO7o_Tc,3699
33
+ cache_dit/compile/utils.py,sha256=OTvkwcezSrApZ2M1IMkYtkEmFbkfpTknhHMgoBApd6U,3786
34
34
  cache_dit/custom_ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  cache_dit/custom_ops/triton_taylorseer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  cache_dit/metrics/__init__.py,sha256=RaUhl5dieF40RqnizGzR30qoJJ9dyMUEADwgwMaMQrE,575
@@ -38,9 +38,9 @@ cache_dit/metrics/config.py,sha256=ieOgD9ayz722RjVzk24bSIqS2D6o7TZjGk8KeXV-OLQ,5
38
38
  cache_dit/metrics/fid.py,sha256=9Ivtazl6mW0Bon2VXa-Ia5Xj2ewxRD3V1Qkd69zYM3Y,17066
39
39
  cache_dit/metrics/inception.py,sha256=pBVe2X6ylLPIXTG4-GWDM9DWnCviMJbJ45R3ulhktR0,12759
40
40
  cache_dit/metrics/metrics.py,sha256=tzAtG_-fM1xPIBfRVFIBupvOWYzIO3xDq29Vy5rOBWc,14730
41
- cache_dit-0.2.8.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
42
- cache_dit-0.2.8.dist-info/METADATA,sha256=8E51DpSKDGqk3_cG9buahoXN-7fub6M8VCiPb_Idg64,27608
43
- cache_dit-0.2.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
44
- cache_dit-0.2.8.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
45
- cache_dit-0.2.8.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
46
- cache_dit-0.2.8.dist-info/RECORD,,
41
+ cache_dit-0.2.9.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
42
+ cache_dit-0.2.9.dist-info/METADATA,sha256=TdvKAftNWwijdCW8K-8iO7fITEcfllWX3FJdZ-qcRqA,28032
43
+ cache_dit-0.2.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
44
+ cache_dit-0.2.9.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
45
+ cache_dit-0.2.9.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
46
+ cache_dit-0.2.9.dist-info/RECORD,,