cache-dit 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cache-dit might be problematic. Click here for more details.

cache_dit/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.2.7'
21
- __version_tuple__ = version_tuple = (0, 2, 7)
20
+ __version__ = version = '0.2.9'
21
+ __version_tuple__ = version_tuple = (0, 2, 9)
@@ -1162,7 +1162,6 @@ class DBCachedTransformerBlocks(torch.nn.Module):
1162
1162
 
1163
1163
  torch._dynamo.graph_break()
1164
1164
  if can_use_cache:
1165
- torch._dynamo.graph_break()
1166
1165
  add_cached_step()
1167
1166
  del Fn_hidden_states_residual
1168
1167
  hidden_states, encoder_hidden_states = apply_hidden_states_residual(
@@ -1189,7 +1188,6 @@ class DBCachedTransformerBlocks(torch.nn.Module):
1189
1188
  )
1190
1189
  )
1191
1190
  else:
1192
- torch._dynamo.graph_break()
1193
1191
  set_Fn_buffer(Fn_hidden_states_residual, prefix="Fn_residual")
1194
1192
  if is_l1_diff_enabled():
1195
1193
  # for hidden states L1 diff
@@ -1797,7 +1795,6 @@ class DBCachedTransformerBlocks(torch.nn.Module):
1797
1795
  f"the number of single transformer blocks {len(self.single_transformer_blocks)}"
1798
1796
  )
1799
1797
 
1800
- torch._dynamo.graph_break()
1801
1798
  hidden_states = torch.cat(
1802
1799
  [encoder_hidden_states, hidden_states], dim=1
1803
1800
  )
@@ -1829,13 +1826,11 @@ class DBCachedTransformerBlocks(torch.nn.Module):
1829
1826
  ],
1830
1827
  dim=1,
1831
1828
  )
1832
- torch._dynamo.graph_break()
1833
1829
  else:
1834
1830
  assert Bn_compute_blocks() <= len(self.transformer_blocks), (
1835
1831
  f"Bn_compute_blocks {Bn_compute_blocks()} must be less than "
1836
1832
  f"the number of transformer blocks {len(self.transformer_blocks)}"
1837
1833
  )
1838
- torch._dynamo.graph_break()
1839
1834
  if len(Bn_compute_blocks_ids()) > 0:
1840
1835
  for i, block in enumerate(self._Bn_transformer_blocks()):
1841
1836
  hidden_states, encoder_hidden_states = (
@@ -1864,7 +1859,6 @@ class DBCachedTransformerBlocks(torch.nn.Module):
1864
1859
  encoder_hidden_states,
1865
1860
  hidden_states,
1866
1861
  )
1867
- torch._dynamo.graph_break()
1868
1862
 
1869
1863
  hidden_states = (
1870
1864
  hidden_states.reshape(-1)
@@ -1,6 +1,7 @@
1
1
  import os
2
2
 
3
3
  import torch
4
+ import torch.distributed as dist
4
5
  from cache_dit.logger import init_logger, logging_rank_0
5
6
 
6
7
  logger = init_logger(__name__)
@@ -50,12 +51,13 @@ def set_custom_compile_configs(
50
51
  )
51
52
  return
52
53
 
53
- # Enable compute comm overlap
54
- torch._inductor.config.reorder_for_compute_comm_overlap = True
55
- # L20 64 GB/s, PCIe; A100/A800 NVLink 300 GB/s.
56
- torch._inductor.config.intra_node_bw = (
57
- 64 if "L20" in torch.cuda.get_device_name() else 300
58
- )
54
+ if dist.is_initialized():
55
+ # Enable compute comm overlap
56
+ torch._inductor.config.reorder_for_compute_comm_overlap = True
57
+ # L20 64 GB/s, PCIe; A100/A800 NVLink 300 GB/s.
58
+ torch._inductor.config.intra_node_bw = (
59
+ 64 if "L20" in torch.cuda.get_device_name() else 300
60
+ )
59
61
 
60
62
  # Below are default settings for torch.compile, you can change
61
63
  # them to your needs and test the performance
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cache_dit
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: 🤗 CacheDiT: A Training-free and Easy-to-use Cache Acceleration Toolbox for Diffusion Transformers
5
5
  Author: DefTruth, vipshop.com, etc.
6
6
  Maintainer: DefTruth, vipshop.com, etc
@@ -61,6 +61,11 @@ Dynamic: requires-python
61
61
  </p>
62
62
  </div>
63
63
 
64
+ ## 🔥News🔥
65
+
66
+ - [2025-07-13] An end2end speedup example for FLUX using cache-dit is released! **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)**: A forked version of [huggingface/flux-fast](https://github.com/huggingface/flux-fast) that **makes flux-fast even faster** with **[cache-dit](https://github.com/vipshop/cache-dit)**, **3.3x** speedup on NVIDIA L20 while still maintaining **high precision**.
67
+
68
+
64
69
  ## 🤗 Introduction
65
70
 
66
71
  <div align="center">
@@ -1,12 +1,12 @@
1
1
  cache_dit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- cache_dit/_version.py,sha256=Xk20v7uvkFqkpy9aLJzVngs1eKQn0FYUP2oyA1MEQUU,511
2
+ cache_dit/_version.py,sha256=Iq6CyehddPOWDVsW9Hnb65BEkCEkAnt4bl0MAuqXKLA,511
3
3
  cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
4
4
  cache_dit/primitives.py,sha256=A2iG9YLot3gOsZSPp-_gyjqjLgJvWQRx8aitD4JQ23Y,3877
5
5
  cache_dit/cache_factory/__init__.py,sha256=5RNuhWakvvqrOV4vkqrEBA7d-V1LwcNSsjtW14mkqK8,5255
6
6
  cache_dit/cache_factory/taylorseer.py,sha256=LKSNo2ode69EVo9xrxjxAMEjz0yDGiGADeDYnEqddA8,3987
7
7
  cache_dit/cache_factory/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  cache_dit/cache_factory/dual_block_cache/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- cache_dit/cache_factory/dual_block_cache/cache_context.py,sha256=7kMk6hvMDi-m2HP1qlj4p6qJhzZPjJol6IBneVGDs3E,71396
9
+ cache_dit/cache_factory/dual_block_cache/cache_context.py,sha256=itVEb6gT2eZuncAHUmP51ZS0r6v6cGtRvnPjyeXqKH8,71156
10
10
  cache_dit/cache_factory/dual_block_cache/diffusers_adapters/__init__.py,sha256=krNAICf-aS3JLmSG8vOB9tpLa04uYRcABsC8PMbVUKY,1870
11
11
  cache_dit/cache_factory/dual_block_cache/diffusers_adapters/cogvideox.py,sha256=3xUjvDzor9AkBkDUc0N7kZqM86MIdajuigesnicNzXE,2260
12
12
  cache_dit/cache_factory/dual_block_cache/diffusers_adapters/flux.py,sha256=cIsov6Pf0dRyddqkzTA2CU-jSDotof8LQr-HIoY9T9M,2615
@@ -30,7 +30,7 @@ cache_dit/cache_factory/first_block_cache/diffusers_adapters/hunyuan_video.py,sh
30
30
  cache_dit/cache_factory/first_block_cache/diffusers_adapters/mochi.py,sha256=lQTClo52OwPbNEE4jiBZQhfC7hbtYqnYIABp_vbm_dk,2363
31
31
  cache_dit/cache_factory/first_block_cache/diffusers_adapters/wan.py,sha256=dBNzHBECAuTTA1a7kLdvZL20YzaKTAS3iciVLzKKEWA,2638
32
32
  cache_dit/compile/__init__.py,sha256=DfMdPleFFGADXLsr7zXui8BTz_y9futY6rNmNdh9y7k,63
33
- cache_dit/compile/utils.py,sha256=KU60xc474Anbj7Y_FLRFmNxEjVYLLXkhbtCLXO7o_Tc,3699
33
+ cache_dit/compile/utils.py,sha256=OTvkwcezSrApZ2M1IMkYtkEmFbkfpTknhHMgoBApd6U,3786
34
34
  cache_dit/custom_ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  cache_dit/custom_ops/triton_taylorseer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  cache_dit/metrics/__init__.py,sha256=RaUhl5dieF40RqnizGzR30qoJJ9dyMUEADwgwMaMQrE,575
@@ -38,9 +38,9 @@ cache_dit/metrics/config.py,sha256=ieOgD9ayz722RjVzk24bSIqS2D6o7TZjGk8KeXV-OLQ,5
38
38
  cache_dit/metrics/fid.py,sha256=9Ivtazl6mW0Bon2VXa-Ia5Xj2ewxRD3V1Qkd69zYM3Y,17066
39
39
  cache_dit/metrics/inception.py,sha256=pBVe2X6ylLPIXTG4-GWDM9DWnCviMJbJ45R3ulhktR0,12759
40
40
  cache_dit/metrics/metrics.py,sha256=tzAtG_-fM1xPIBfRVFIBupvOWYzIO3xDq29Vy5rOBWc,14730
41
- cache_dit-0.2.7.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
42
- cache_dit-0.2.7.dist-info/METADATA,sha256=S0C1VGcXoWVjxmfX_755xttdoE9J0toSuHCZW9xaUBM,27608
43
- cache_dit-0.2.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
44
- cache_dit-0.2.7.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
45
- cache_dit-0.2.7.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
46
- cache_dit-0.2.7.dist-info/RECORD,,
41
+ cache_dit-0.2.9.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
42
+ cache_dit-0.2.9.dist-info/METADATA,sha256=TdvKAftNWwijdCW8K-8iO7fITEcfllWX3FJdZ-qcRqA,28032
43
+ cache_dit-0.2.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
44
+ cache_dit-0.2.9.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
45
+ cache_dit-0.2.9.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
46
+ cache_dit-0.2.9.dist-info/RECORD,,