optimum-rbln 0.9.3__py3-none-any.whl → 0.9.3rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. optimum/rbln/__init__.py +0 -12
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +2 -4
  4. optimum/rbln/diffusers/__init__.py +0 -12
  5. optimum/rbln/diffusers/configurations/__init__.py +0 -3
  6. optimum/rbln/diffusers/configurations/models/__init__.py +0 -2
  7. optimum/rbln/diffusers/configurations/pipelines/__init__.py +0 -3
  8. optimum/rbln/diffusers/models/__init__.py +3 -17
  9. optimum/rbln/diffusers/models/autoencoders/__init__.py +0 -1
  10. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +3 -3
  11. optimum/rbln/diffusers/models/autoencoders/vae.py +8 -27
  12. optimum/rbln/diffusers/models/controlnet.py +1 -16
  13. optimum/rbln/diffusers/models/transformers/prior_transformer.py +2 -16
  14. optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +1 -16
  15. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +1 -14
  16. optimum/rbln/diffusers/models/unets/__init__.py +0 -1
  17. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +1 -17
  18. optimum/rbln/diffusers/pipelines/__init__.py +0 -4
  19. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +0 -20
  20. optimum/rbln/modeling.py +45 -20
  21. optimum/rbln/modeling_base.py +1 -0
  22. optimum/rbln/transformers/configuration_generic.py +27 -0
  23. optimum/rbln/transformers/modeling_attention_utils.py +109 -242
  24. optimum/rbln/transformers/modeling_generic.py +61 -2
  25. optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +2 -28
  26. optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +5 -68
  27. optimum/rbln/transformers/models/bart/modeling_bart.py +2 -23
  28. optimum/rbln/transformers/models/bert/modeling_bert.py +1 -86
  29. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +15 -42
  30. optimum/rbln/transformers/models/clip/modeling_clip.py +2 -40
  31. optimum/rbln/transformers/models/colpali/modeling_colpali.py +44 -5
  32. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +1 -6
  33. optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +2 -6
  34. optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +9 -17
  35. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +12 -36
  36. optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py +0 -17
  37. optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +0 -24
  38. optimum/rbln/transformers/models/dpt/modeling_dpt.py +0 -17
  39. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +5 -3
  40. optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py +8 -24
  41. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +5 -3
  42. optimum/rbln/transformers/models/llava/modeling_llava.py +24 -36
  43. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +4 -2
  44. optimum/rbln/transformers/models/opt/modeling_opt.py +2 -2
  45. optimum/rbln/transformers/models/pegasus/modeling_pegasus.py +1 -1
  46. optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +1 -13
  47. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +3 -2
  48. optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +3 -2
  49. optimum/rbln/transformers/models/resnet/configuration_resnet.py +0 -17
  50. optimum/rbln/transformers/models/resnet/modeling_resnet.py +0 -73
  51. optimum/rbln/transformers/models/roberta/modeling_roberta.py +0 -33
  52. optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +4 -2
  53. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +10 -34
  54. optimum/rbln/transformers/models/siglip/modeling_siglip.py +1 -17
  55. optimum/rbln/transformers/models/swin/modeling_swin.py +1 -14
  56. optimum/rbln/transformers/models/t5/modeling_t5.py +2 -2
  57. optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +2 -16
  58. optimum/rbln/transformers/models/vit/modeling_vit.py +0 -19
  59. optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +3 -15
  60. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +8 -60
  61. optimum/rbln/transformers/models/whisper/generation_whisper.py +14 -48
  62. optimum/rbln/transformers/models/whisper/modeling_whisper.py +2 -2
  63. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +0 -43
  64. optimum/rbln/transformers/utils/rbln_quantization.py +0 -9
  65. optimum/rbln/utils/depreacate_utils.py +16 -0
  66. optimum/rbln/utils/hub.py +3 -14
  67. optimum/rbln/utils/runtime_utils.py +0 -32
  68. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.3rc0.dist-info}/METADATA +2 -2
  69. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.3rc0.dist-info}/RECORD +72 -79
  70. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.3rc0.dist-info}/WHEEL +1 -1
  71. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_temporal_decoder.py +0 -67
  72. optimum/rbln/diffusers/configurations/models/configuration_unet_spatio_temporal_condition.py +0 -59
  73. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_video_diffusion.py +0 -114
  74. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +0 -275
  75. optimum/rbln/diffusers/models/unets/unet_spatio_temporal_condition.py +0 -201
  76. optimum/rbln/diffusers/pipelines/stable_video_diffusion/__init__.py +0 -15
  77. optimum/rbln/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +0 -46
  78. optimum/rbln/utils/deprecation.py +0 -213
  79. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.3rc0.dist-info}/entry_points.txt +0 -0
  80. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.3rc0.dist-info}/licenses/LICENSE +0 -0
@@ -1,275 +0,0 @@
1
- # Copyright 2025 Rebellions Inc. All rights reserved.
2
-
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at:
6
-
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from typing import TYPE_CHECKING, Dict, List, Tuple, Union
16
-
17
- import rebel
18
- import torch # noqa: I001
19
- from diffusers import AutoencoderKLTemporalDecoder
20
- from diffusers.models.autoencoders.vae import DecoderOutput
21
- from diffusers.models.modeling_outputs import AutoencoderKLOutput
22
- from transformers import PretrainedConfig
23
-
24
- from ....configuration_utils import RBLNCompileConfig
25
- from ....modeling import RBLNModel
26
- from ....utils.logging import get_logger
27
- from ...configurations import RBLNAutoencoderKLTemporalDecoderConfig
28
- from ...modeling_diffusers import RBLNDiffusionMixin
29
- from .vae import (
30
- DiagonalGaussianDistribution,
31
- RBLNRuntimeVAEDecoder,
32
- RBLNRuntimeVAEEncoder,
33
- _VAEEncoder,
34
- _VAETemporalDecoder,
35
- )
36
-
37
-
38
- if TYPE_CHECKING:
39
- from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PretrainedConfig, PreTrainedModel
40
-
41
- from ...modeling_diffusers import RBLNDiffusionMixin, RBLNDiffusionMixinConfig
42
-
43
- logger = get_logger(__name__)
44
-
45
-
46
- class RBLNAutoencoderKLTemporalDecoder(RBLNModel):
47
- auto_model_class = AutoencoderKLTemporalDecoder
48
- hf_library_name = "diffusers"
49
- _rbln_config_class = RBLNAutoencoderKLTemporalDecoderConfig
50
-
51
- def __post_init__(self, **kwargs):
52
- super().__post_init__(**kwargs)
53
-
54
- if self.rbln_config.uses_encoder:
55
- self.encoder = RBLNRuntimeVAEEncoder(runtime=self.model[0], main_input_name="x")
56
- self.decoder = RBLNRuntimeVAEDecoder(runtime=self.model[-1], main_input_name="z")
57
- self.image_size = self.rbln_config.image_size
58
-
59
- @classmethod
60
- def _wrap_model_if_needed(
61
- cls, model: torch.nn.Module, rbln_config: RBLNAutoencoderKLTemporalDecoderConfig
62
- ) -> torch.nn.Module:
63
- decoder_model = _VAETemporalDecoder(model)
64
- decoder_model.num_frames = rbln_config.decode_chunk_size
65
- decoder_model.eval()
66
-
67
- if rbln_config.uses_encoder:
68
- encoder_model = _VAEEncoder(model)
69
- encoder_model.eval()
70
- return encoder_model, decoder_model
71
- else:
72
- return decoder_model
73
-
74
- @classmethod
75
- def get_compiled_model(
76
- cls, model, rbln_config: RBLNAutoencoderKLTemporalDecoderConfig
77
- ) -> Dict[str, rebel.RBLNCompiledModel]:
78
- compiled_models = {}
79
- if rbln_config.uses_encoder:
80
- encoder_model, decoder_model = cls._wrap_model_if_needed(model, rbln_config)
81
- enc_compiled_model = cls.compile(
82
- encoder_model,
83
- rbln_compile_config=rbln_config.compile_cfgs[0],
84
- create_runtimes=rbln_config.create_runtimes,
85
- device=rbln_config.device_map["encoder"],
86
- )
87
- compiled_models["encoder"] = enc_compiled_model
88
- else:
89
- decoder_model = cls._wrap_model_if_needed(model, rbln_config)
90
- dec_compiled_model = cls.compile(
91
- decoder_model,
92
- rbln_compile_config=rbln_config.compile_cfgs[-1],
93
- create_runtimes=rbln_config.create_runtimes,
94
- device=rbln_config.device_map["decoder"],
95
- )
96
- compiled_models["decoder"] = dec_compiled_model
97
-
98
- return compiled_models
99
-
100
- @classmethod
101
- def get_vae_sample_size(
102
- cls,
103
- pipe: "RBLNDiffusionMixin",
104
- rbln_config: RBLNAutoencoderKLTemporalDecoderConfig,
105
- return_vae_scale_factor: bool = False,
106
- ) -> Tuple[int, int]:
107
- sample_size = rbln_config.sample_size
108
- if hasattr(pipe, "vae_scale_factor"):
109
- vae_scale_factor = pipe.vae_scale_factor
110
- else:
111
- if hasattr(pipe.vae.config, "block_out_channels"):
112
- vae_scale_factor = 2 ** (len(pipe.vae.config.block_out_channels) - 1)
113
- else:
114
- vae_scale_factor = 8 # vae image processor default value 8 (int)
115
-
116
- if sample_size is None:
117
- sample_size = pipe.unet.config.sample_size
118
- if isinstance(sample_size, int):
119
- sample_size = (sample_size, sample_size)
120
- sample_size = (sample_size[0] * vae_scale_factor, sample_size[1] * vae_scale_factor)
121
-
122
- if return_vae_scale_factor:
123
- return sample_size, vae_scale_factor
124
- else:
125
- return sample_size
126
-
127
- @classmethod
128
- def update_rbln_config_using_pipe(
129
- cls, pipe: "RBLNDiffusionMixin", rbln_config: "RBLNDiffusionMixinConfig", submodule_name: str
130
- ) -> "RBLNDiffusionMixinConfig":
131
- rbln_config.vae.sample_size, rbln_config.vae.vae_scale_factor = cls.get_vae_sample_size(
132
- pipe, rbln_config.vae, return_vae_scale_factor=True
133
- )
134
-
135
- if rbln_config.vae.num_frames is None:
136
- if hasattr(pipe.unet.config, "num_frames"):
137
- rbln_config.vae.num_frames = pipe.unet.config.num_frames
138
- else:
139
- raise ValueError("num_frames should be specified in unet config.json")
140
-
141
- if rbln_config.vae.decode_chunk_size is None:
142
- rbln_config.vae.decode_chunk_size = rbln_config.vae.num_frames
143
-
144
- def chunk_frame(num_frames, decode_chunk_size):
145
- # get closest divisor to num_frames
146
- divisors = [i for i in range(1, num_frames) if num_frames % i == 0]
147
- closest = min(divisors, key=lambda x: abs(x - decode_chunk_size))
148
- if decode_chunk_size != closest:
149
- logger.warning(
150
- f"To ensure successful model compilation and prevent device OOM, {decode_chunk_size} is set to {closest}."
151
- )
152
- return closest
153
-
154
- decode_chunk_size = chunk_frame(rbln_config.vae.num_frames, rbln_config.vae.decode_chunk_size)
155
- rbln_config.vae.decode_chunk_size = decode_chunk_size
156
- return rbln_config
157
-
158
- @classmethod
159
- def _update_rbln_config(
160
- cls,
161
- preprocessors: Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"],
162
- model: "PreTrainedModel",
163
- model_config: "PretrainedConfig",
164
- rbln_config: RBLNAutoencoderKLTemporalDecoderConfig,
165
- ) -> RBLNAutoencoderKLTemporalDecoderConfig:
166
- if rbln_config.sample_size is None:
167
- rbln_config.sample_size = model_config.sample_size
168
-
169
- if rbln_config.vae_scale_factor is None:
170
- if hasattr(model_config, "block_out_channels"):
171
- rbln_config.vae_scale_factor = 2 ** (len(model_config.block_out_channels) - 1)
172
- else:
173
- # vae image processor default value 8 (int)
174
- rbln_config.vae_scale_factor = 8
175
-
176
- compile_cfgs = []
177
- if rbln_config.uses_encoder:
178
- vae_enc_input_info = [
179
- (
180
- "x",
181
- [
182
- rbln_config.batch_size,
183
- model_config.in_channels,
184
- rbln_config.sample_size[0],
185
- rbln_config.sample_size[1],
186
- ],
187
- "float32",
188
- )
189
- ]
190
- compile_cfgs.append(RBLNCompileConfig(compiled_model_name="encoder", input_info=vae_enc_input_info))
191
-
192
- decode_batch_size = rbln_config.batch_size * rbln_config.decode_chunk_size
193
- vae_dec_input_info = [
194
- (
195
- "z",
196
- [
197
- decode_batch_size,
198
- model_config.latent_channels,
199
- rbln_config.latent_sample_size[0],
200
- rbln_config.latent_sample_size[1],
201
- ],
202
- "float32",
203
- )
204
- ]
205
- compile_cfgs.append(RBLNCompileConfig(compiled_model_name="decoder", input_info=vae_dec_input_info))
206
-
207
- rbln_config.set_compile_cfgs(compile_cfgs)
208
- return rbln_config
209
-
210
- @classmethod
211
- def _create_runtimes(
212
- cls,
213
- compiled_models: List[rebel.RBLNCompiledModel],
214
- rbln_config: RBLNAutoencoderKLTemporalDecoderConfig,
215
- ) -> List[rebel.Runtime]:
216
- if len(compiled_models) == 1:
217
- # decoder
218
- expected_models = ["decoder"]
219
- else:
220
- expected_models = ["encoder", "decoder"]
221
-
222
- if any(model_name not in rbln_config.device_map for model_name in expected_models):
223
- cls._raise_missing_compiled_file_error(expected_models)
224
-
225
- device_vals = [rbln_config.device_map[model_name] for model_name in expected_models]
226
- return [
227
- rebel.Runtime(
228
- compiled_model,
229
- tensor_type="pt",
230
- device=device_val,
231
- activate_profiler=rbln_config.activate_profiler,
232
- timeout=rbln_config.timeout,
233
- )
234
- for compiled_model, device_val in zip(compiled_models, device_vals)
235
- ]
236
-
237
- def encode(
238
- self, x: torch.FloatTensor, return_dict: bool = True
239
- ) -> Union[AutoencoderKLOutput, Tuple[DiagonalGaussianDistribution]]:
240
- """
241
- Encode an input image into a latent representation.
242
-
243
- Args:
244
- x: The input image to encode.
245
- return_dict:
246
- Whether to return output as a dictionary. Defaults to True.
247
-
248
- Returns:
249
- The latent representation or AutoencoderKLOutput if return_dict=True
250
- """
251
- posterior = self.encoder.encode(x)
252
-
253
- if not return_dict:
254
- return (posterior,)
255
-
256
- return AutoencoderKLOutput(latent_dist=posterior)
257
-
258
- def decode(self, z: torch.FloatTensor, return_dict: bool = True) -> torch.FloatTensor:
259
- """
260
- Decode a latent representation into a video.
261
-
262
- Args:
263
- z: The latent representation to decode.
264
- return_dict:
265
- Whether to return output as a dictionary. Defaults to True.
266
-
267
- Returns:
268
- The decoded video or DecoderOutput if return_dict=True
269
- """
270
- decoded = self.decoder.decode(z)
271
-
272
- if not return_dict:
273
- return (decoded,)
274
-
275
- return DecoderOutput(sample=decoded)
@@ -1,201 +0,0 @@
1
- # Copyright 2025 Rebellions Inc. All rights reserved.
2
-
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at:
6
-
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from dataclasses import dataclass
16
- from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
17
-
18
- import torch
19
- from diffusers.models.unets.unet_spatio_temporal_condition import (
20
- UNetSpatioTemporalConditionModel,
21
- UNetSpatioTemporalConditionOutput,
22
- )
23
- from transformers import PretrainedConfig
24
-
25
- from ....configuration_utils import RBLNCompileConfig
26
- from ....modeling import RBLNModel
27
- from ....utils.logging import get_logger
28
- from ...configurations import RBLNUNetSpatioTemporalConditionModelConfig
29
- from ...modeling_diffusers import RBLNDiffusionMixin, RBLNDiffusionMixinConfig
30
-
31
-
32
- if TYPE_CHECKING:
33
- from transformers import AutoFeatureExtractor, AutoProcessor, PreTrainedModel
34
-
35
- logger = get_logger(__name__)
36
-
37
-
38
- class _UNet_STCM(torch.nn.Module):
39
- def __init__(self, unet: "UNetSpatioTemporalConditionModel"):
40
- super().__init__()
41
- self.unet = unet
42
-
43
- def forward(
44
- self,
45
- sample: torch.Tensor,
46
- timestep: Union[torch.Tensor, float, int],
47
- encoder_hidden_states: torch.Tensor,
48
- added_time_ids: torch.Tensor,
49
- ) -> torch.Tensor:
50
- unet_out = self.unet(
51
- sample=sample,
52
- timestep=timestep,
53
- encoder_hidden_states=encoder_hidden_states,
54
- added_time_ids=added_time_ids,
55
- return_dict=False,
56
- )
57
- return unet_out
58
-
59
-
60
- class RBLNUNetSpatioTemporalConditionModel(RBLNModel):
61
- hf_library_name = "diffusers"
62
- auto_model_class = UNetSpatioTemporalConditionModel
63
- _rbln_config_class = RBLNUNetSpatioTemporalConditionModelConfig
64
- output_class = UNetSpatioTemporalConditionOutput
65
- output_key = "sample"
66
-
67
- def __post_init__(self, **kwargs):
68
- super().__post_init__(**kwargs)
69
- self.in_features = self.rbln_config.in_features
70
- if self.in_features is not None:
71
-
72
- @dataclass
73
- class LINEAR1:
74
- in_features: int
75
-
76
- @dataclass
77
- class ADDEMBEDDING:
78
- linear_1: LINEAR1
79
-
80
- self.add_embedding = ADDEMBEDDING(LINEAR1(self.in_features))
81
-
82
- @classmethod
83
- def _wrap_model_if_needed(
84
- cls, model: torch.nn.Module, rbln_config: RBLNUNetSpatioTemporalConditionModelConfig
85
- ) -> torch.nn.Module:
86
- return _UNet_STCM(model).eval()
87
-
88
- @classmethod
89
- def get_unet_sample_size(
90
- cls,
91
- pipe: RBLNDiffusionMixin,
92
- rbln_config: RBLNUNetSpatioTemporalConditionModelConfig,
93
- image_size: Optional[Tuple[int, int]] = None,
94
- ) -> Union[int, Tuple[int, int]]:
95
- scale_factor = pipe.vae_scale_factor
96
-
97
- if image_size is None:
98
- vae_sample_size = pipe.vae.config.sample_size
99
- if isinstance(vae_sample_size, int):
100
- vae_sample_size = (vae_sample_size, vae_sample_size)
101
-
102
- sample_size = (
103
- vae_sample_size[0] // scale_factor,
104
- vae_sample_size[1] // scale_factor,
105
- )
106
- else:
107
- sample_size = (image_size[0] // scale_factor, image_size[1] // scale_factor)
108
- return sample_size
109
-
110
- @classmethod
111
- def update_rbln_config_using_pipe(
112
- cls, pipe: RBLNDiffusionMixin, rbln_config: "RBLNDiffusionMixinConfig", submodule_name: str
113
- ) -> Dict[str, Any]:
114
- rbln_config.unet.sample_size = cls.get_unet_sample_size(
115
- pipe, rbln_config.unet, image_size=rbln_config.image_size
116
- )
117
- return rbln_config
118
-
119
- @classmethod
120
- def _update_rbln_config(
121
- cls,
122
- preprocessors: Union["AutoFeatureExtractor", "AutoProcessor"],
123
- model: "PreTrainedModel",
124
- model_config: "PretrainedConfig",
125
- rbln_config: RBLNUNetSpatioTemporalConditionModelConfig,
126
- ) -> RBLNUNetSpatioTemporalConditionModelConfig:
127
- if rbln_config.num_frames is None:
128
- rbln_config.num_frames = model_config.num_frames
129
-
130
- if rbln_config.sample_size is None:
131
- rbln_config.sample_size = model_config.sample_size
132
-
133
- input_info = [
134
- (
135
- "sample",
136
- [
137
- rbln_config.batch_size,
138
- rbln_config.num_frames,
139
- model_config.in_channels,
140
- rbln_config.sample_size[0],
141
- rbln_config.sample_size[1],
142
- ],
143
- "float32",
144
- ),
145
- ("timestep", [], "float32"),
146
- ("encoder_hidden_states", [rbln_config.batch_size, 1, model_config.cross_attention_dim], "float32"),
147
- ("added_time_ids", [rbln_config.batch_size, 3], "float32"),
148
- ]
149
-
150
- if hasattr(model_config, "addition_time_embed_dim"):
151
- rbln_config.in_features = model_config.projection_class_embeddings_input_dim
152
-
153
- rbln_compile_config = RBLNCompileConfig(input_info=input_info)
154
- rbln_config.set_compile_cfgs([rbln_compile_config])
155
-
156
- return rbln_config
157
-
158
- @property
159
- def compiled_batch_size(self):
160
- return self.rbln_config.compile_cfgs[0].input_info[0][1][0]
161
-
162
- def forward(
163
- self,
164
- sample: torch.Tensor,
165
- timestep: Union[torch.Tensor, float, int],
166
- encoder_hidden_states: torch.Tensor,
167
- added_time_ids: torch.Tensor,
168
- return_dict: bool = True,
169
- **kwargs,
170
- ) -> Union[UNetSpatioTemporalConditionOutput, Tuple]:
171
- """
172
- Forward pass for the RBLN-optimized UNetSpatioTemporalConditionModel.
173
-
174
- Args:
175
- sample (torch.Tensor): The noisy input tensor with the following shape `(batch, channel, height, width)`.
176
- timestep (Union[torch.Tensor, float, int]): The number of timesteps to denoise an input.
177
- encoder_hidden_states (torch.Tensor): The encoder hidden states.
178
- added_time_ids (torch.Tensor): A tensor containing additional sinusoidal embeddings and added to the time embeddings.
179
- return_dict (bool): Whether or not to return a [`~diffusers.models.unets.unet_spatio_temporal_condition.UNetSpatioTemporalConditionOutput`] instead of a plain tuple.
180
-
181
- Returns:
182
- (Union[`~diffusers.models.unets.unet_spatio_temporal_condition.UNetSpatioTemporalConditionOutput`], Tuple)
183
- """
184
- sample_batch_size = sample.size()[0]
185
- compiled_batch_size = self.compiled_batch_size
186
- if sample_batch_size != compiled_batch_size and (
187
- sample_batch_size * 2 == compiled_batch_size or sample_batch_size == compiled_batch_size * 2
188
- ):
189
- raise ValueError(
190
- f"Mismatch between UNet's runtime batch size ({sample_batch_size}) and compiled batch size ({compiled_batch_size}). "
191
- "This may be caused by the 'guidance scale' parameter, which doubles the runtime batch size in Stable Diffusion. "
192
- "Adjust the batch size during compilation or modify the 'guidance scale' to match the compiled batch size.\n\n"
193
- "For details, see: https://docs.rbln.ai/software/optimum/model_api.html#stable-diffusion"
194
- )
195
- return super().forward(
196
- sample.contiguous(),
197
- timestep.float(),
198
- encoder_hidden_states,
199
- added_time_ids,
200
- return_dict=return_dict,
201
- )
@@ -1,15 +0,0 @@
1
- # Copyright 2025 Rebellions Inc. All rights reserved.
2
-
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at:
6
-
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from .pipeline_stable_video_diffusion import RBLNStableVideoDiffusionPipeline
@@ -1,46 +0,0 @@
1
- # Copyright 2025 Rebellions Inc. All rights reserved.
2
-
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at:
6
-
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
-
16
- from diffusers import StableVideoDiffusionPipeline
17
-
18
- from ....utils.logging import get_logger
19
- from ...configurations import RBLNStableVideoDiffusionPipelineConfig
20
- from ...modeling_diffusers import RBLNDiffusionMixin
21
-
22
-
23
- logger = get_logger(__name__)
24
-
25
-
26
- class RBLNStableVideoDiffusionPipeline(RBLNDiffusionMixin, StableVideoDiffusionPipeline):
27
- """
28
- RBLN-accelerated implementation of Stable Video Diffusion pipeline for image-to-video generation.
29
-
30
- This pipeline compiles Stable Video Diffusion models to run efficiently on RBLN NPUs, enabling high-performance
31
- inference for generating videos from images with optimized memory usage and throughput.
32
- """
33
-
34
- original_class = StableVideoDiffusionPipeline
35
- _rbln_config_class = RBLNStableVideoDiffusionPipelineConfig
36
- _submodules = ["image_encoder", "unet", "vae"]
37
-
38
- def handle_additional_kwargs(self, **kwargs):
39
- compiled_num_frames = self.unet.rbln_config.num_frames
40
- if compiled_num_frames is not None:
41
- kwargs["num_frames"] = compiled_num_frames
42
-
43
- compiled_decode_chunk_size = self.vae.rbln_config.decode_chunk_size
44
- if compiled_decode_chunk_size is not None:
45
- kwargs["decode_chunk_size"] = compiled_decode_chunk_size
46
- return kwargs