cache-dit 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cache-dit might be problematic. Click here for more details.
- cache_dit/_version.py +2 -2
- cache_dit/cache_factory/dual_block_cache/cache_context.py +0 -6
- cache_dit/compile/utils.py +8 -6
- {cache_dit-0.2.7.dist-info → cache_dit-0.2.9.dist-info}/METADATA +6 -1
- {cache_dit-0.2.7.dist-info → cache_dit-0.2.9.dist-info}/RECORD +9 -9
- {cache_dit-0.2.7.dist-info → cache_dit-0.2.9.dist-info}/WHEEL +0 -0
- {cache_dit-0.2.7.dist-info → cache_dit-0.2.9.dist-info}/entry_points.txt +0 -0
- {cache_dit-0.2.7.dist-info → cache_dit-0.2.9.dist-info}/licenses/LICENSE +0 -0
- {cache_dit-0.2.7.dist-info → cache_dit-0.2.9.dist-info}/top_level.txt +0 -0
cache_dit/_version.py
CHANGED
|
@@ -1162,7 +1162,6 @@ class DBCachedTransformerBlocks(torch.nn.Module):
|
|
|
1162
1162
|
|
|
1163
1163
|
torch._dynamo.graph_break()
|
|
1164
1164
|
if can_use_cache:
|
|
1165
|
-
torch._dynamo.graph_break()
|
|
1166
1165
|
add_cached_step()
|
|
1167
1166
|
del Fn_hidden_states_residual
|
|
1168
1167
|
hidden_states, encoder_hidden_states = apply_hidden_states_residual(
|
|
@@ -1189,7 +1188,6 @@ class DBCachedTransformerBlocks(torch.nn.Module):
|
|
|
1189
1188
|
)
|
|
1190
1189
|
)
|
|
1191
1190
|
else:
|
|
1192
|
-
torch._dynamo.graph_break()
|
|
1193
1191
|
set_Fn_buffer(Fn_hidden_states_residual, prefix="Fn_residual")
|
|
1194
1192
|
if is_l1_diff_enabled():
|
|
1195
1193
|
# for hidden states L1 diff
|
|
@@ -1797,7 +1795,6 @@ class DBCachedTransformerBlocks(torch.nn.Module):
|
|
|
1797
1795
|
f"the number of single transformer blocks {len(self.single_transformer_blocks)}"
|
|
1798
1796
|
)
|
|
1799
1797
|
|
|
1800
|
-
torch._dynamo.graph_break()
|
|
1801
1798
|
hidden_states = torch.cat(
|
|
1802
1799
|
[encoder_hidden_states, hidden_states], dim=1
|
|
1803
1800
|
)
|
|
@@ -1829,13 +1826,11 @@ class DBCachedTransformerBlocks(torch.nn.Module):
|
|
|
1829
1826
|
],
|
|
1830
1827
|
dim=1,
|
|
1831
1828
|
)
|
|
1832
|
-
torch._dynamo.graph_break()
|
|
1833
1829
|
else:
|
|
1834
1830
|
assert Bn_compute_blocks() <= len(self.transformer_blocks), (
|
|
1835
1831
|
f"Bn_compute_blocks {Bn_compute_blocks()} must be less than "
|
|
1836
1832
|
f"the number of transformer blocks {len(self.transformer_blocks)}"
|
|
1837
1833
|
)
|
|
1838
|
-
torch._dynamo.graph_break()
|
|
1839
1834
|
if len(Bn_compute_blocks_ids()) > 0:
|
|
1840
1835
|
for i, block in enumerate(self._Bn_transformer_blocks()):
|
|
1841
1836
|
hidden_states, encoder_hidden_states = (
|
|
@@ -1864,7 +1859,6 @@ class DBCachedTransformerBlocks(torch.nn.Module):
|
|
|
1864
1859
|
encoder_hidden_states,
|
|
1865
1860
|
hidden_states,
|
|
1866
1861
|
)
|
|
1867
|
-
torch._dynamo.graph_break()
|
|
1868
1862
|
|
|
1869
1863
|
hidden_states = (
|
|
1870
1864
|
hidden_states.reshape(-1)
|
cache_dit/compile/utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
3
|
import torch
|
|
4
|
+
import torch.distributed as dist
|
|
4
5
|
from cache_dit.logger import init_logger, logging_rank_0
|
|
5
6
|
|
|
6
7
|
logger = init_logger(__name__)
|
|
@@ -50,12 +51,13 @@ def set_custom_compile_configs(
|
|
|
50
51
|
)
|
|
51
52
|
return
|
|
52
53
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
54
|
+
if dist.is_initialized():
|
|
55
|
+
# Enable compute comm overlap
|
|
56
|
+
torch._inductor.config.reorder_for_compute_comm_overlap = True
|
|
57
|
+
# L20 64 GB/s, PCIe; A100/A800 NVLink 300 GB/s.
|
|
58
|
+
torch._inductor.config.intra_node_bw = (
|
|
59
|
+
64 if "L20" in torch.cuda.get_device_name() else 300
|
|
60
|
+
)
|
|
59
61
|
|
|
60
62
|
# Below are default settings for torch.compile, you can change
|
|
61
63
|
# them to your needs and test the performance
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cache_dit
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.9
|
|
4
4
|
Summary: 🤗 CacheDiT: A Training-free and Easy-to-use Cache Acceleration Toolbox for Diffusion Transformers
|
|
5
5
|
Author: DefTruth, vipshop.com, etc.
|
|
6
6
|
Maintainer: DefTruth, vipshop.com, etc
|
|
@@ -61,6 +61,11 @@ Dynamic: requires-python
|
|
|
61
61
|
</p>
|
|
62
62
|
</div>
|
|
63
63
|
|
|
64
|
+
## 🔥News🔥
|
|
65
|
+
|
|
66
|
+
- [2025-07-13] An end2end speedup example for FLUX using cache-dit is released! **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)**: A forked version of [huggingface/flux-fast](https://github.com/huggingface/flux-fast) that **makes flux-fast even faster** with **[cache-dit](https://github.com/vipshop/cache-dit)**, **3.3x** speedup on NVIDIA L20 while still maintaining **high precision**.
|
|
67
|
+
|
|
68
|
+
|
|
64
69
|
## 🤗 Introduction
|
|
65
70
|
|
|
66
71
|
<div align="center">
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
cache_dit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
cache_dit/_version.py,sha256=
|
|
2
|
+
cache_dit/_version.py,sha256=Iq6CyehddPOWDVsW9Hnb65BEkCEkAnt4bl0MAuqXKLA,511
|
|
3
3
|
cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
|
|
4
4
|
cache_dit/primitives.py,sha256=A2iG9YLot3gOsZSPp-_gyjqjLgJvWQRx8aitD4JQ23Y,3877
|
|
5
5
|
cache_dit/cache_factory/__init__.py,sha256=5RNuhWakvvqrOV4vkqrEBA7d-V1LwcNSsjtW14mkqK8,5255
|
|
6
6
|
cache_dit/cache_factory/taylorseer.py,sha256=LKSNo2ode69EVo9xrxjxAMEjz0yDGiGADeDYnEqddA8,3987
|
|
7
7
|
cache_dit/cache_factory/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
cache_dit/cache_factory/dual_block_cache/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
cache_dit/cache_factory/dual_block_cache/cache_context.py,sha256=
|
|
9
|
+
cache_dit/cache_factory/dual_block_cache/cache_context.py,sha256=itVEb6gT2eZuncAHUmP51ZS0r6v6cGtRvnPjyeXqKH8,71156
|
|
10
10
|
cache_dit/cache_factory/dual_block_cache/diffusers_adapters/__init__.py,sha256=krNAICf-aS3JLmSG8vOB9tpLa04uYRcABsC8PMbVUKY,1870
|
|
11
11
|
cache_dit/cache_factory/dual_block_cache/diffusers_adapters/cogvideox.py,sha256=3xUjvDzor9AkBkDUc0N7kZqM86MIdajuigesnicNzXE,2260
|
|
12
12
|
cache_dit/cache_factory/dual_block_cache/diffusers_adapters/flux.py,sha256=cIsov6Pf0dRyddqkzTA2CU-jSDotof8LQr-HIoY9T9M,2615
|
|
@@ -30,7 +30,7 @@ cache_dit/cache_factory/first_block_cache/diffusers_adapters/hunyuan_video.py,sh
|
|
|
30
30
|
cache_dit/cache_factory/first_block_cache/diffusers_adapters/mochi.py,sha256=lQTClo52OwPbNEE4jiBZQhfC7hbtYqnYIABp_vbm_dk,2363
|
|
31
31
|
cache_dit/cache_factory/first_block_cache/diffusers_adapters/wan.py,sha256=dBNzHBECAuTTA1a7kLdvZL20YzaKTAS3iciVLzKKEWA,2638
|
|
32
32
|
cache_dit/compile/__init__.py,sha256=DfMdPleFFGADXLsr7zXui8BTz_y9futY6rNmNdh9y7k,63
|
|
33
|
-
cache_dit/compile/utils.py,sha256=
|
|
33
|
+
cache_dit/compile/utils.py,sha256=OTvkwcezSrApZ2M1IMkYtkEmFbkfpTknhHMgoBApd6U,3786
|
|
34
34
|
cache_dit/custom_ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
35
|
cache_dit/custom_ops/triton_taylorseer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
cache_dit/metrics/__init__.py,sha256=RaUhl5dieF40RqnizGzR30qoJJ9dyMUEADwgwMaMQrE,575
|
|
@@ -38,9 +38,9 @@ cache_dit/metrics/config.py,sha256=ieOgD9ayz722RjVzk24bSIqS2D6o7TZjGk8KeXV-OLQ,5
|
|
|
38
38
|
cache_dit/metrics/fid.py,sha256=9Ivtazl6mW0Bon2VXa-Ia5Xj2ewxRD3V1Qkd69zYM3Y,17066
|
|
39
39
|
cache_dit/metrics/inception.py,sha256=pBVe2X6ylLPIXTG4-GWDM9DWnCviMJbJ45R3ulhktR0,12759
|
|
40
40
|
cache_dit/metrics/metrics.py,sha256=tzAtG_-fM1xPIBfRVFIBupvOWYzIO3xDq29Vy5rOBWc,14730
|
|
41
|
-
cache_dit-0.2.
|
|
42
|
-
cache_dit-0.2.
|
|
43
|
-
cache_dit-0.2.
|
|
44
|
-
cache_dit-0.2.
|
|
45
|
-
cache_dit-0.2.
|
|
46
|
-
cache_dit-0.2.
|
|
41
|
+
cache_dit-0.2.9.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
|
|
42
|
+
cache_dit-0.2.9.dist-info/METADATA,sha256=TdvKAftNWwijdCW8K-8iO7fITEcfllWX3FJdZ-qcRqA,28032
|
|
43
|
+
cache_dit-0.2.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
44
|
+
cache_dit-0.2.9.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
|
|
45
|
+
cache_dit-0.2.9.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
|
|
46
|
+
cache_dit-0.2.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|