cache-dit 0.1.8__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cache-dit might be problematic. Click here for more details.

@@ -0,0 +1,295 @@
1
+ # Adapted from: https://github.com/chengzeyi/ParaAttention/blob/main/src/para_attn/first_block_cache/diffusers_adapters/hunyuan_video.py
2
+ import functools
3
+ import unittest
4
+ from typing import Any, Dict, Optional, Union
5
+
6
+ import torch
7
+ from diffusers import DiffusionPipeline, HunyuanVideoTransformer3DModel
8
+ from diffusers.models.modeling_outputs import Transformer2DModelOutput
9
+ from diffusers.utils import (
10
+ scale_lora_layers,
11
+ unscale_lora_layers,
12
+ USE_PEFT_BACKEND,
13
+ )
14
+
15
+ from cache_dit.cache_factory.dynamic_block_prune import prune_context
16
+ from cache_dit.logger import init_logger
17
+
18
+ try:
19
+ from para_attn.para_attn_interface import SparseKVAttnMode
20
+
21
+ def is_sparse_kv_attn_available():
22
+ return True
23
+
24
+ except ImportError:
25
+
26
+ class SparseKVAttnMode:
27
+ def __enter__(self):
28
+ pass
29
+
30
+ def __exit__(self, exc_type, exc_value, traceback):
31
+ pass
32
+
33
+ def is_sparse_kv_attn_available():
34
+ return False
35
+
36
+
37
+ logger = init_logger(__name__) # pylint: disable=invalid-name
38
+
39
+
40
+ def apply_db_prune_on_transformer(
41
+ transformer: HunyuanVideoTransformer3DModel,
42
+ ):
43
+ if getattr(transformer, "_is_pruned", False):
44
+ return transformer
45
+
46
+ cached_transformer_blocks = torch.nn.ModuleList(
47
+ [
48
+ prune_context.DBPrunedTransformerBlocks(
49
+ transformer.transformer_blocks
50
+ + transformer.single_transformer_blocks,
51
+ transformer=transformer,
52
+ )
53
+ ]
54
+ )
55
+ dummy_single_transformer_blocks = torch.nn.ModuleList()
56
+
57
+ original_forward = transformer.forward
58
+
59
+ @functools.wraps(transformer.__class__.forward)
60
+ def new_forward(
61
+ self,
62
+ hidden_states: torch.Tensor,
63
+ timestep: torch.LongTensor,
64
+ encoder_hidden_states: torch.Tensor,
65
+ encoder_attention_mask: torch.Tensor,
66
+ pooled_projections: torch.Tensor,
67
+ guidance: torch.Tensor = None,
68
+ attention_kwargs: Optional[Dict[str, Any]] = None,
69
+ return_dict: bool = True,
70
+ **kwargs,
71
+ ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
72
+ with (
73
+ unittest.mock.patch.object(
74
+ self,
75
+ "transformer_blocks",
76
+ cached_transformer_blocks,
77
+ ),
78
+ unittest.mock.patch.object(
79
+ self,
80
+ "single_transformer_blocks",
81
+ dummy_single_transformer_blocks,
82
+ ),
83
+ ):
84
+ if getattr(self, "_is_parallelized", False):
85
+ return original_forward(
86
+ hidden_states,
87
+ timestep,
88
+ encoder_hidden_states,
89
+ encoder_attention_mask,
90
+ pooled_projections,
91
+ guidance=guidance,
92
+ attention_kwargs=attention_kwargs,
93
+ return_dict=return_dict,
94
+ **kwargs,
95
+ )
96
+ else:
97
+ if attention_kwargs is not None:
98
+ attention_kwargs = attention_kwargs.copy()
99
+ lora_scale = attention_kwargs.pop("scale", 1.0)
100
+ else:
101
+ lora_scale = 1.0
102
+
103
+ if USE_PEFT_BACKEND:
104
+ # weight the lora layers by setting `lora_scale` for each PEFT layer
105
+ scale_lora_layers(self, lora_scale)
106
+ else:
107
+ if (
108
+ attention_kwargs is not None
109
+ and attention_kwargs.get("scale", None) is not None
110
+ ):
111
+ logger.warning(
112
+ "Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective."
113
+ )
114
+
115
+ batch_size, num_channels, num_frames, height, width = (
116
+ hidden_states.shape
117
+ )
118
+ p, p_t = self.config.patch_size, self.config.patch_size_t
119
+ post_patch_num_frames = num_frames // p_t
120
+ post_patch_height = height // p
121
+ post_patch_width = width // p
122
+
123
+ # 1. RoPE
124
+ image_rotary_emb = self.rope(hidden_states)
125
+
126
+ # 2. Conditional embeddings
127
+ temb = self.time_text_embed(
128
+ timestep, guidance, pooled_projections
129
+ )
130
+ hidden_states = self.x_embedder(hidden_states)
131
+ encoder_hidden_states = self.context_embedder(
132
+ encoder_hidden_states, timestep, encoder_attention_mask
133
+ )
134
+
135
+ # 3. Attention mask preparation
136
+ latent_sequence_length = hidden_states.shape[1]
137
+ latent_attention_mask = torch.ones(
138
+ batch_size,
139
+ 1,
140
+ latent_sequence_length,
141
+ device=hidden_states.device,
142
+ dtype=torch.bool,
143
+ ) # [B, 1, N]
144
+ attention_mask = torch.cat(
145
+ [
146
+ latent_attention_mask,
147
+ encoder_attention_mask.unsqueeze(1).to(torch.bool),
148
+ ],
149
+ dim=-1,
150
+ ) # [B, 1, N + M]
151
+
152
+ with SparseKVAttnMode():
153
+ # 4. Transformer blocks
154
+ hidden_states, encoder_hidden_states = (
155
+ self.call_transformer_blocks(
156
+ hidden_states,
157
+ encoder_hidden_states,
158
+ temb,
159
+ attention_mask,
160
+ image_rotary_emb,
161
+ )
162
+ )
163
+
164
+ # 5. Output projection
165
+ hidden_states = self.norm_out(hidden_states, temb)
166
+ hidden_states = self.proj_out(hidden_states)
167
+
168
+ hidden_states = hidden_states.reshape(
169
+ batch_size,
170
+ post_patch_num_frames,
171
+ post_patch_height,
172
+ post_patch_width,
173
+ -1,
174
+ p_t,
175
+ p,
176
+ p,
177
+ )
178
+ hidden_states = hidden_states.permute(0, 4, 1, 5, 2, 6, 3, 7)
179
+ hidden_states = (
180
+ hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3)
181
+ )
182
+
183
+ hidden_states = hidden_states.to(timestep.dtype)
184
+
185
+ if USE_PEFT_BACKEND:
186
+ # remove `lora_scale` from each PEFT layer
187
+ unscale_lora_layers(self, lora_scale)
188
+
189
+ if not return_dict:
190
+ return (hidden_states,)
191
+
192
+ return Transformer2DModelOutput(sample=hidden_states)
193
+
194
+ transformer.forward = new_forward.__get__(transformer)
195
+
196
+ def call_transformer_blocks(
197
+ self, hidden_states, encoder_hidden_states, *args, **kwargs
198
+ ):
199
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
200
+
201
+ def create_custom_forward(module, return_dict=None):
202
+ def custom_forward(*inputs):
203
+ if return_dict is not None:
204
+ return module(*inputs, return_dict=return_dict)
205
+ else:
206
+ return module(*inputs)
207
+
208
+ return custom_forward
209
+
210
+ ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False}
211
+
212
+ for block in self.transformer_blocks:
213
+ hidden_states, encoder_hidden_states = (
214
+ torch.utils.checkpoint.checkpoint(
215
+ create_custom_forward(block),
216
+ hidden_states,
217
+ encoder_hidden_states,
218
+ *args,
219
+ **kwargs,
220
+ **ckpt_kwargs,
221
+ )
222
+ )
223
+
224
+ for block in self.single_transformer_blocks:
225
+ hidden_states, encoder_hidden_states = (
226
+ torch.utils.checkpoint.checkpoint(
227
+ create_custom_forward(block),
228
+ hidden_states,
229
+ encoder_hidden_states,
230
+ *args,
231
+ **kwargs,
232
+ **ckpt_kwargs,
233
+ )
234
+ )
235
+
236
+ else:
237
+ for block in self.transformer_blocks:
238
+ hidden_states, encoder_hidden_states = block(
239
+ hidden_states, encoder_hidden_states, *args, **kwargs
240
+ )
241
+
242
+ for block in self.single_transformer_blocks:
243
+ hidden_states, encoder_hidden_states = block(
244
+ hidden_states, encoder_hidden_states, *args, **kwargs
245
+ )
246
+
247
+ return hidden_states, encoder_hidden_states
248
+
249
+ transformer.call_transformer_blocks = call_transformer_blocks.__get__(
250
+ transformer
251
+ )
252
+
253
+ transformer._is_pruned = True
254
+
255
+ return transformer
256
+
257
+
258
+ def apply_db_prune_on_pipe(
259
+ pipe: DiffusionPipeline,
260
+ *,
261
+ shallow_patch: bool = False,
262
+ residual_diff_threshold=0.06,
263
+ downsample_factor=1,
264
+ warmup_steps=0,
265
+ max_cached_steps=-1,
266
+ **kwargs,
267
+ ):
268
+ cache_kwargs, kwargs = prune_context.collect_prune_kwargs(
269
+ default_attrs={
270
+ "residual_diff_threshold": residual_diff_threshold,
271
+ "downsample_factor": downsample_factor,
272
+ "warmup_steps": warmup_steps,
273
+ "max_cached_steps": max_cached_steps,
274
+ },
275
+ **kwargs,
276
+ )
277
+ if not getattr(pipe, "_is_pruned", False):
278
+ original_call = pipe.__class__.__call__
279
+
280
+ @functools.wraps(original_call)
281
+ def new_call(self, *args, **kwargs):
282
+ with prune_context.prune_context(
283
+ prune_context.create_prune_context(
284
+ **cache_kwargs,
285
+ )
286
+ ):
287
+ return original_call(self, *args, **kwargs)
288
+
289
+ pipe.__class__.__call__ = new_call
290
+ pipe.__class__._is_pruned = True
291
+
292
+ if not shallow_patch:
293
+ apply_db_prune_on_transformer(pipe.transformer, **kwargs)
294
+
295
+ return pipe
@@ -0,0 +1,99 @@
1
+ # Adapted from: https://github.com/chengzeyi/ParaAttention/tree/main/src/para_attn/first_block_cache/wan.py
2
+
3
+ import functools
4
+ import unittest
5
+
6
+ import torch
7
+ from diffusers import DiffusionPipeline, WanTransformer3DModel
8
+
9
+ from cache_dit.cache_factory.dynamic_block_prune import prune_context
10
+
11
+
12
+ def apply_db_prune_on_transformer(
13
+ transformer: WanTransformer3DModel,
14
+ ):
15
+ if getattr(transformer, "_is_pruned", False):
16
+ return transformer
17
+
18
+ blocks = torch.nn.ModuleList(
19
+ [
20
+ prune_context.DBPrunedTransformerBlocks(
21
+ transformer.blocks,
22
+ transformer=transformer,
23
+ return_hidden_states_only=True,
24
+ )
25
+ ]
26
+ )
27
+
28
+ original_forward = transformer.forward
29
+
30
+ @functools.wraps(transformer.__class__.forward)
31
+ def new_forward(
32
+ self,
33
+ *args,
34
+ **kwargs,
35
+ ):
36
+ with unittest.mock.patch.object(
37
+ self,
38
+ "blocks",
39
+ blocks,
40
+ ):
41
+ return original_forward(
42
+ *args,
43
+ **kwargs,
44
+ )
45
+
46
+ transformer.forward = new_forward.__get__(transformer)
47
+
48
+ transformer._is_pruned = True
49
+
50
+ return transformer
51
+
52
+
53
+ def apply_cache_on_pipe(
54
+ pipe: DiffusionPipeline,
55
+ *,
56
+ shallow_patch: bool = False,
57
+ residual_diff_threshold=0.03,
58
+ downsample_factor=1,
59
+ # SLG is not supported in WAN with DBCache yet
60
+ # slg_layers=None,
61
+ # slg_start: float = 0.0,
62
+ # slg_end: float = 0.1,
63
+ warmup_steps=0,
64
+ max_cached_steps=-1,
65
+ **kwargs,
66
+ ):
67
+ cache_kwargs, kwargs = prune_context.collect_prune_kwargs(
68
+ default_attrs={
69
+ "residual_diff_threshold": residual_diff_threshold,
70
+ "downsample_factor": downsample_factor,
71
+ # "enable_alter_cache": True,
72
+ # "slg_layers": slg_layers,
73
+ # "slg_start": slg_start,
74
+ # "slg_end": slg_end,
75
+ "num_inference_steps": kwargs.get("num_inference_steps", 50),
76
+ "warmup_steps": warmup_steps,
77
+ "max_cached_steps": max_cached_steps,
78
+ },
79
+ **kwargs,
80
+ )
81
+ if not getattr(pipe, "_is_pruned", False):
82
+ original_call = pipe.__class__.__call__
83
+
84
+ @functools.wraps(original_call)
85
+ def new_call(self, *args, **kwargs):
86
+ with prune_context.prune_context(
87
+ prune_context.create_prune_context(
88
+ **cache_kwargs,
89
+ )
90
+ ):
91
+ return original_call(self, *args, **kwargs)
92
+
93
+ pipe.__class__.__call__ = new_call
94
+ pipe.__class__._is_pruned = True
95
+
96
+ if not shallow_patch:
97
+ apply_db_prune_on_transformer(pipe.transformer, **kwargs)
98
+
99
+ return pipe
@@ -16,6 +16,8 @@ def apply_fb_cache_on_transformer(transformer, *args, **kwargs):
16
16
  adapter_name = "cogvideox"
17
17
  elif transformer_cls_name.startswith("Wan"):
18
18
  adapter_name = "wan"
19
+ elif transformer_cls_name.startswith("HunyuanVideo"):
20
+ adapter_name = "hunyuan_video"
19
21
  else:
20
22
  raise ValueError(
21
23
  f"Unknown transformer class name: {transformer_cls_name}"
@@ -40,6 +42,8 @@ def apply_fb_cache_on_pipe(pipe: DiffusionPipeline, *args, **kwargs):
40
42
  adapter_name = "cogvideox"
41
43
  elif pipe_cls_name.startswith("Wan"):
42
44
  adapter_name = "wan"
45
+ elif pipe_cls_name.startswith("HunyuanVideo"):
46
+ adapter_name = "hunyuan_video"
43
47
  else:
44
48
  raise ValueError(f"Unknown pipeline class name: {pipe_cls_name}")
45
49
 
@@ -0,0 +1,295 @@
1
+ # Adapted from: https://github.com/chengzeyi/ParaAttention/blob/main/src/para_attn/first_block_cache/diffusers_adapters/hunyuan_video.py
2
+ import functools
3
+ import unittest
4
+ from typing import Any, Dict, Optional, Union
5
+
6
+ import torch
7
+ from diffusers import DiffusionPipeline, HunyuanVideoTransformer3DModel
8
+ from diffusers.models.modeling_outputs import Transformer2DModelOutput
9
+ from diffusers.utils import (
10
+ scale_lora_layers,
11
+ unscale_lora_layers,
12
+ USE_PEFT_BACKEND,
13
+ )
14
+
15
+ from cache_dit.cache_factory.first_block_cache import cache_context
16
+ from cache_dit.logger import init_logger
17
+
18
+ try:
19
+ from para_attn.para_attn_interface import SparseKVAttnMode
20
+
21
+ def is_sparse_kv_attn_available():
22
+ return True
23
+
24
+ except ImportError:
25
+
26
+ class SparseKVAttnMode:
27
+ def __enter__(self):
28
+ pass
29
+
30
+ def __exit__(self, exc_type, exc_value, traceback):
31
+ pass
32
+
33
+ def is_sparse_kv_attn_available():
34
+ return False
35
+
36
+
37
+ logger = init_logger(__name__) # pylint: disable=invalid-name
38
+
39
+
40
+ def apply_cache_on_transformer(
41
+ transformer: HunyuanVideoTransformer3DModel,
42
+ ):
43
+ if getattr(transformer, "_is_cached", False):
44
+ return transformer
45
+
46
+ cached_transformer_blocks = torch.nn.ModuleList(
47
+ [
48
+ cache_context.CachedTransformerBlocks(
49
+ transformer.transformer_blocks
50
+ + transformer.single_transformer_blocks,
51
+ transformer=transformer,
52
+ )
53
+ ]
54
+ )
55
+ dummy_single_transformer_blocks = torch.nn.ModuleList()
56
+
57
+ original_forward = transformer.forward
58
+
59
+ @functools.wraps(transformer.__class__.forward)
60
+ def new_forward(
61
+ self,
62
+ hidden_states: torch.Tensor,
63
+ timestep: torch.LongTensor,
64
+ encoder_hidden_states: torch.Tensor,
65
+ encoder_attention_mask: torch.Tensor,
66
+ pooled_projections: torch.Tensor,
67
+ guidance: torch.Tensor = None,
68
+ attention_kwargs: Optional[Dict[str, Any]] = None,
69
+ return_dict: bool = True,
70
+ **kwargs,
71
+ ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
72
+ with (
73
+ unittest.mock.patch.object(
74
+ self,
75
+ "transformer_blocks",
76
+ cached_transformer_blocks,
77
+ ),
78
+ unittest.mock.patch.object(
79
+ self,
80
+ "single_transformer_blocks",
81
+ dummy_single_transformer_blocks,
82
+ ),
83
+ ):
84
+ if getattr(self, "_is_parallelized", False):
85
+ return original_forward(
86
+ hidden_states,
87
+ timestep,
88
+ encoder_hidden_states,
89
+ encoder_attention_mask,
90
+ pooled_projections,
91
+ guidance=guidance,
92
+ attention_kwargs=attention_kwargs,
93
+ return_dict=return_dict,
94
+ **kwargs,
95
+ )
96
+ else:
97
+ if attention_kwargs is not None:
98
+ attention_kwargs = attention_kwargs.copy()
99
+ lora_scale = attention_kwargs.pop("scale", 1.0)
100
+ else:
101
+ lora_scale = 1.0
102
+
103
+ if USE_PEFT_BACKEND:
104
+ # weight the lora layers by setting `lora_scale` for each PEFT layer
105
+ scale_lora_layers(self, lora_scale)
106
+ else:
107
+ if (
108
+ attention_kwargs is not None
109
+ and attention_kwargs.get("scale", None) is not None
110
+ ):
111
+ logger.warning(
112
+ "Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective."
113
+ )
114
+
115
+ batch_size, num_channels, num_frames, height, width = (
116
+ hidden_states.shape
117
+ )
118
+ p, p_t = self.config.patch_size, self.config.patch_size_t
119
+ post_patch_num_frames = num_frames // p_t
120
+ post_patch_height = height // p
121
+ post_patch_width = width // p
122
+
123
+ # 1. RoPE
124
+ image_rotary_emb = self.rope(hidden_states)
125
+
126
+ # 2. Conditional embeddings
127
+ temb = self.time_text_embed(
128
+ timestep, guidance, pooled_projections
129
+ )
130
+ hidden_states = self.x_embedder(hidden_states)
131
+ encoder_hidden_states = self.context_embedder(
132
+ encoder_hidden_states, timestep, encoder_attention_mask
133
+ )
134
+
135
+ # 3. Attention mask preparation
136
+ latent_sequence_length = hidden_states.shape[1]
137
+ latent_attention_mask = torch.ones(
138
+ batch_size,
139
+ 1,
140
+ latent_sequence_length,
141
+ device=hidden_states.device,
142
+ dtype=torch.bool,
143
+ ) # [B, 1, N]
144
+ attention_mask = torch.cat(
145
+ [
146
+ latent_attention_mask,
147
+ encoder_attention_mask.unsqueeze(1).to(torch.bool),
148
+ ],
149
+ dim=-1,
150
+ ) # [B, 1, N + M]
151
+
152
+ with SparseKVAttnMode():
153
+ # 4. Transformer blocks
154
+ hidden_states, encoder_hidden_states = (
155
+ self.call_transformer_blocks(
156
+ hidden_states,
157
+ encoder_hidden_states,
158
+ temb,
159
+ attention_mask,
160
+ image_rotary_emb,
161
+ )
162
+ )
163
+
164
+ # 5. Output projection
165
+ hidden_states = self.norm_out(hidden_states, temb)
166
+ hidden_states = self.proj_out(hidden_states)
167
+
168
+ hidden_states = hidden_states.reshape(
169
+ batch_size,
170
+ post_patch_num_frames,
171
+ post_patch_height,
172
+ post_patch_width,
173
+ -1,
174
+ p_t,
175
+ p,
176
+ p,
177
+ )
178
+ hidden_states = hidden_states.permute(0, 4, 1, 5, 2, 6, 3, 7)
179
+ hidden_states = (
180
+ hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3)
181
+ )
182
+
183
+ hidden_states = hidden_states.to(timestep.dtype)
184
+
185
+ if USE_PEFT_BACKEND:
186
+ # remove `lora_scale` from each PEFT layer
187
+ unscale_lora_layers(self, lora_scale)
188
+
189
+ if not return_dict:
190
+ return (hidden_states,)
191
+
192
+ return Transformer2DModelOutput(sample=hidden_states)
193
+
194
+ transformer.forward = new_forward.__get__(transformer)
195
+
196
+ def call_transformer_blocks(
197
+ self, hidden_states, encoder_hidden_states, *args, **kwargs
198
+ ):
199
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
200
+
201
+ def create_custom_forward(module, return_dict=None):
202
+ def custom_forward(*inputs):
203
+ if return_dict is not None:
204
+ return module(*inputs, return_dict=return_dict)
205
+ else:
206
+ return module(*inputs)
207
+
208
+ return custom_forward
209
+
210
+ ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False}
211
+
212
+ for block in self.transformer_blocks:
213
+ hidden_states, encoder_hidden_states = (
214
+ torch.utils.checkpoint.checkpoint(
215
+ create_custom_forward(block),
216
+ hidden_states,
217
+ encoder_hidden_states,
218
+ *args,
219
+ **kwargs,
220
+ **ckpt_kwargs,
221
+ )
222
+ )
223
+
224
+ for block in self.single_transformer_blocks:
225
+ hidden_states, encoder_hidden_states = (
226
+ torch.utils.checkpoint.checkpoint(
227
+ create_custom_forward(block),
228
+ hidden_states,
229
+ encoder_hidden_states,
230
+ *args,
231
+ **kwargs,
232
+ **ckpt_kwargs,
233
+ )
234
+ )
235
+
236
+ else:
237
+ for block in self.transformer_blocks:
238
+ hidden_states, encoder_hidden_states = block(
239
+ hidden_states, encoder_hidden_states, *args, **kwargs
240
+ )
241
+
242
+ for block in self.single_transformer_blocks:
243
+ hidden_states, encoder_hidden_states = block(
244
+ hidden_states, encoder_hidden_states, *args, **kwargs
245
+ )
246
+
247
+ return hidden_states, encoder_hidden_states
248
+
249
+ transformer.call_transformer_blocks = call_transformer_blocks.__get__(
250
+ transformer
251
+ )
252
+
253
+ transformer._is_cached = True
254
+
255
+ return transformer
256
+
257
+
258
+ def apply_cache_on_pipe(
259
+ pipe: DiffusionPipeline,
260
+ *,
261
+ shallow_patch: bool = False,
262
+ residual_diff_threshold=0.06,
263
+ downsample_factor=1,
264
+ warmup_steps=0,
265
+ max_cached_steps=-1,
266
+ **kwargs,
267
+ ):
268
+ cache_kwargs, kwargs = cache_context.collect_cache_kwargs(
269
+ default_attrs={
270
+ "residual_diff_threshold": residual_diff_threshold,
271
+ "downsample_factor": downsample_factor,
272
+ "warmup_steps": warmup_steps,
273
+ "max_cached_steps": max_cached_steps,
274
+ },
275
+ **kwargs,
276
+ )
277
+ if not getattr(pipe, "_is_cached", False):
278
+ original_call = pipe.__class__.__call__
279
+
280
+ @functools.wraps(original_call)
281
+ def new_call(self, *args, **kwargs):
282
+ with cache_context.cache_context(
283
+ cache_context.create_cache_context(
284
+ **cache_kwargs,
285
+ )
286
+ ):
287
+ return original_call(self, *args, **kwargs)
288
+
289
+ pipe.__class__.__call__ = new_call
290
+ pipe.__class__._is_cached = True
291
+
292
+ if not shallow_patch:
293
+ apply_cache_on_transformer(pipe.transformer, **kwargs)
294
+
295
+ return pipe