optimum-rbln 0.8.0.post2__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +24 -0
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/configuration_utils.py +45 -33
- optimum/rbln/diffusers/__init__.py +21 -1
- optimum/rbln/diffusers/configurations/__init__.py +4 -0
- optimum/rbln/diffusers/configurations/models/__init__.py +2 -0
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +9 -2
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +84 -0
- optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +4 -2
- optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +9 -2
- optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +70 -0
- optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +4 -2
- optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +9 -2
- optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +9 -2
- optimum/rbln/diffusers/configurations/pipelines/__init__.py +1 -0
- optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +29 -9
- optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +114 -0
- optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +28 -12
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +18 -6
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +13 -6
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +12 -6
- optimum/rbln/diffusers/modeling_diffusers.py +72 -65
- optimum/rbln/diffusers/models/__init__.py +4 -0
- optimum/rbln/diffusers/models/autoencoders/__init__.py +1 -0
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +17 -1
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +219 -0
- optimum/rbln/diffusers/models/autoencoders/vae.py +45 -8
- optimum/rbln/diffusers/models/autoencoders/vq_model.py +17 -1
- optimum/rbln/diffusers/models/controlnet.py +14 -8
- optimum/rbln/diffusers/models/transformers/__init__.py +1 -0
- optimum/rbln/diffusers/models/transformers/prior_transformer.py +10 -0
- optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +321 -0
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py +2 -0
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +11 -1
- optimum/rbln/diffusers/pipelines/__init__.py +10 -0
- optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +1 -4
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +7 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/cosmos/__init__.py +17 -0
- optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py +102 -0
- optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +455 -0
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +98 -0
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +98 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +7 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +48 -27
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +7 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +7 -0
- optimum/rbln/modeling.py +71 -37
- optimum/rbln/modeling_base.py +63 -109
- optimum/rbln/transformers/__init__.py +41 -47
- optimum/rbln/transformers/configuration_generic.py +16 -13
- optimum/rbln/transformers/modeling_generic.py +21 -22
- optimum/rbln/transformers/modeling_rope_utils.py +5 -2
- optimum/rbln/transformers/models/__init__.py +54 -4
- optimum/rbln/transformers/models/{wav2vec2/configuration_wav2vec.py → audio_spectrogram_transformer/__init__.py} +2 -4
- optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +21 -0
- optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +28 -0
- optimum/rbln/transformers/models/auto/auto_factory.py +35 -12
- optimum/rbln/transformers/models/bart/bart_architecture.py +14 -1
- optimum/rbln/transformers/models/bart/configuration_bart.py +12 -2
- optimum/rbln/transformers/models/bart/modeling_bart.py +16 -7
- optimum/rbln/transformers/models/bert/configuration_bert.py +18 -3
- optimum/rbln/transformers/models/bert/modeling_bert.py +24 -0
- optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +15 -3
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +50 -4
- optimum/rbln/transformers/models/clip/configuration_clip.py +15 -5
- optimum/rbln/transformers/models/clip/modeling_clip.py +38 -13
- optimum/rbln/transformers/models/colpali/__init__.py +2 -0
- optimum/rbln/transformers/models/colpali/colpali_architecture.py +221 -0
- optimum/rbln/transformers/models/colpali/configuration_colpali.py +68 -0
- optimum/rbln/transformers/models/colpali/modeling_colpali.py +383 -0
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +111 -14
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +102 -35
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +253 -195
- optimum/rbln/transformers/models/distilbert/__init__.py +19 -0
- optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +24 -0
- optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +27 -0
- optimum/rbln/transformers/models/dpt/configuration_dpt.py +6 -1
- optimum/rbln/transformers/models/dpt/modeling_dpt.py +6 -1
- optimum/rbln/transformers/models/exaone/configuration_exaone.py +24 -1
- optimum/rbln/transformers/models/exaone/exaone_architecture.py +5 -1
- optimum/rbln/transformers/models/exaone/modeling_exaone.py +66 -5
- optimum/rbln/transformers/models/gemma/configuration_gemma.py +24 -1
- optimum/rbln/transformers/models/gemma/gemma_architecture.py +5 -1
- optimum/rbln/transformers/models/gemma/modeling_gemma.py +49 -0
- optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +3 -3
- optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +18 -250
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +89 -244
- optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +4 -1
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +6 -1
- optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +12 -2
- optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +41 -4
- optimum/rbln/transformers/models/llama/configuration_llama.py +24 -1
- optimum/rbln/transformers/models/llama/modeling_llama.py +49 -0
- optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +10 -2
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +32 -4
- optimum/rbln/transformers/models/midm/configuration_midm.py +24 -1
- optimum/rbln/transformers/models/midm/midm_architecture.py +6 -1
- optimum/rbln/transformers/models/midm/modeling_midm.py +66 -5
- optimum/rbln/transformers/models/mistral/configuration_mistral.py +24 -1
- optimum/rbln/transformers/models/mistral/modeling_mistral.py +62 -4
- optimum/rbln/transformers/models/opt/configuration_opt.py +4 -1
- optimum/rbln/transformers/models/opt/modeling_opt.py +10 -0
- optimum/rbln/transformers/models/opt/opt_architecture.py +7 -1
- optimum/rbln/transformers/models/phi/configuration_phi.py +24 -1
- optimum/rbln/transformers/models/phi/modeling_phi.py +49 -0
- optimum/rbln/transformers/models/phi/phi_architecture.py +1 -1
- optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +24 -1
- optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +67 -4
- optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +31 -3
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +54 -25
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +6 -4
- optimum/rbln/transformers/models/resnet/__init__.py +23 -0
- optimum/rbln/transformers/models/resnet/configuration_resnet.py +25 -0
- optimum/rbln/transformers/models/resnet/modeling_resnet.py +26 -0
- optimum/rbln/transformers/models/roberta/__init__.py +24 -0
- optimum/rbln/transformers/{configuration_alias.py → models/roberta/configuration_roberta.py} +12 -28
- optimum/rbln/transformers/{modeling_alias.py → models/roberta/modeling_roberta.py} +14 -28
- optimum/rbln/transformers/models/seq2seq/__init__.py +1 -1
- optimum/rbln/transformers/models/seq2seq/{configuration_seq2seq2.py → configuration_seq2seq.py} +2 -2
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +7 -3
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +41 -3
- optimum/rbln/transformers/models/siglip/configuration_siglip.py +10 -0
- optimum/rbln/transformers/models/siglip/modeling_siglip.py +69 -21
- optimum/rbln/transformers/models/t5/configuration_t5.py +12 -2
- optimum/rbln/transformers/models/t5/modeling_t5.py +56 -8
- optimum/rbln/transformers/models/t5/t5_architecture.py +5 -1
- optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/__init__.py +1 -1
- optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/configuration_time_series_transformer.py +9 -2
- optimum/rbln/transformers/models/{time_series_transformers/modeling_time_series_transformers.py → time_series_transformer/modeling_time_series_transformer.py} +20 -11
- optimum/rbln/transformers/models/vit/__init__.py +19 -0
- optimum/rbln/transformers/models/vit/configuration_vit.py +24 -0
- optimum/rbln/transformers/models/vit/modeling_vit.py +25 -0
- optimum/rbln/transformers/models/wav2vec2/__init__.py +1 -1
- optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +26 -0
- optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -1
- optimum/rbln/transformers/models/whisper/configuration_whisper.py +10 -1
- optimum/rbln/transformers/models/whisper/modeling_whisper.py +41 -17
- optimum/rbln/transformers/models/xlm_roberta/__init__.py +16 -2
- optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +15 -2
- optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +12 -3
- optimum/rbln/utils/model_utils.py +20 -0
- optimum/rbln/utils/runtime_utils.py +49 -1
- optimum/rbln/utils/submodule.py +6 -8
- {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1.dist-info}/METADATA +6 -6
- optimum_rbln-0.8.1.dist-info/RECORD +211 -0
- optimum_rbln-0.8.0.post2.dist-info/RECORD +0 -184
- /optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/time_series_transformers_architecture.py +0 -0
- {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1.dist-info}/licenses/LICENSE +0 -0
@@ -12,12 +12,19 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import Optional, Tuple
|
15
|
+
from typing import Any, Dict, Optional, Tuple
|
16
16
|
|
17
17
|
from ....configuration_utils import RBLNModelConfig
|
18
18
|
|
19
19
|
|
20
20
|
class RBLNVQModelConfig(RBLNModelConfig):
|
21
|
+
"""
|
22
|
+
Configuration class for RBLN VQModel models, used in Kandinsky.
|
23
|
+
|
24
|
+
This class inherits from RBLNModelConfig and provides specific configuration options
|
25
|
+
for VQModel, which acts similarly to a VAE but uses vector quantization.
|
26
|
+
"""
|
27
|
+
|
21
28
|
def __init__(
|
22
29
|
self,
|
23
30
|
batch_size: Optional[int] = None,
|
@@ -26,7 +33,7 @@ class RBLNVQModelConfig(RBLNModelConfig):
|
|
26
33
|
vqmodel_scale_factor: Optional[float] = None, # TODO: rename to scaling_factor
|
27
34
|
in_channels: Optional[int] = None,
|
28
35
|
latent_channels: Optional[int] = None,
|
29
|
-
**kwargs,
|
36
|
+
**kwargs: Dict[str, Any],
|
30
37
|
):
|
31
38
|
"""
|
32
39
|
Args:
|
@@ -4,6 +4,7 @@ from .configuration_controlnet import (
|
|
4
4
|
RBLNStableDiffusionXLControlNetImg2ImgPipelineConfig,
|
5
5
|
RBLNStableDiffusionXLControlNetPipelineConfig,
|
6
6
|
)
|
7
|
+
from .configuration_cosmos import RBLNCosmosTextToWorldPipelineConfig, RBLNCosmosVideoToWorldPipelineConfig
|
7
8
|
from .configuration_kandinsky2_2 import (
|
8
9
|
RBLNKandinskyV22CombinedPipelineConfig,
|
9
10
|
RBLNKandinskyV22Img2ImgCombinedPipelineConfig,
|
@@ -12,14 +12,14 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import Optional, Tuple
|
15
|
+
from typing import Any, Dict, Optional, Tuple
|
16
16
|
|
17
17
|
from ....configuration_utils import RBLNModelConfig
|
18
18
|
from ....transformers import RBLNCLIPTextModelConfig, RBLNCLIPTextModelWithProjectionConfig
|
19
19
|
from ..models import RBLNAutoencoderKLConfig, RBLNControlNetModelConfig, RBLNUNet2DConditionModelConfig
|
20
20
|
|
21
21
|
|
22
|
-
class
|
22
|
+
class RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
|
23
23
|
submodules = ["text_encoder", "unet", "vae", "controlnet"]
|
24
24
|
_vae_uses_encoder = False
|
25
25
|
|
@@ -38,7 +38,7 @@ class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
|
|
38
38
|
sample_size: Optional[Tuple[int, int]] = None,
|
39
39
|
image_size: Optional[Tuple[int, int]] = None,
|
40
40
|
guidance_scale: Optional[float] = None,
|
41
|
-
**kwargs,
|
41
|
+
**kwargs: Dict[str, Any],
|
42
42
|
):
|
43
43
|
"""
|
44
44
|
Args:
|
@@ -138,15 +138,27 @@ class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
|
|
138
138
|
return self.vae.sample_size
|
139
139
|
|
140
140
|
|
141
|
-
class RBLNStableDiffusionControlNetPipelineConfig(
|
141
|
+
class RBLNStableDiffusionControlNetPipelineConfig(RBLNStableDiffusionControlNetPipelineBaseConfig):
|
142
|
+
"""
|
143
|
+
Configuration for Stable Diffusion ControlNet pipeline.
|
144
|
+
"""
|
145
|
+
|
142
146
|
_vae_uses_encoder = False
|
143
147
|
|
144
148
|
|
145
|
-
class RBLNStableDiffusionControlNetImg2ImgPipelineConfig(
|
149
|
+
class RBLNStableDiffusionControlNetImg2ImgPipelineConfig(RBLNStableDiffusionControlNetPipelineBaseConfig):
|
150
|
+
"""
|
151
|
+
Configuration for Stable Diffusion ControlNet image-to-image pipeline.
|
152
|
+
"""
|
153
|
+
|
146
154
|
_vae_uses_encoder = True
|
147
155
|
|
148
156
|
|
149
|
-
class
|
157
|
+
class RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
|
158
|
+
"""
|
159
|
+
Base configuration for Stable Diffusion XL ControlNet pipelines.
|
160
|
+
"""
|
161
|
+
|
150
162
|
submodules = ["text_encoder", "text_encoder_2", "unet", "vae", "controlnet"]
|
151
163
|
_vae_uses_encoder = False
|
152
164
|
|
@@ -166,7 +178,7 @@ class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
|
|
166
178
|
sample_size: Optional[Tuple[int, int]] = None,
|
167
179
|
image_size: Optional[Tuple[int, int]] = None,
|
168
180
|
guidance_scale: Optional[float] = None,
|
169
|
-
**kwargs,
|
181
|
+
**kwargs: Dict[str, Any],
|
170
182
|
):
|
171
183
|
"""
|
172
184
|
Args:
|
@@ -272,9 +284,17 @@ class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
|
|
272
284
|
return self.vae.sample_size
|
273
285
|
|
274
286
|
|
275
|
-
class RBLNStableDiffusionXLControlNetPipelineConfig(
|
287
|
+
class RBLNStableDiffusionXLControlNetPipelineConfig(RBLNStableDiffusionXLControlNetPipelineBaseConfig):
|
288
|
+
"""
|
289
|
+
Configuration for Stable Diffusion XL ControlNet pipeline.
|
290
|
+
"""
|
291
|
+
|
276
292
|
_vae_uses_encoder = False
|
277
293
|
|
278
294
|
|
279
|
-
class RBLNStableDiffusionXLControlNetImg2ImgPipelineConfig(
|
295
|
+
class RBLNStableDiffusionXLControlNetImg2ImgPipelineConfig(RBLNStableDiffusionXLControlNetPipelineBaseConfig):
|
296
|
+
"""
|
297
|
+
Configuration for Stable Diffusion XL ControlNet image-to-image pipeline.
|
298
|
+
"""
|
299
|
+
|
280
300
|
_vae_uses_encoder = True
|
@@ -0,0 +1,114 @@
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
2
|
+
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from typing import Any, Dict, Optional
|
16
|
+
|
17
|
+
from ....configuration_utils import RBLNModelConfig
|
18
|
+
from ....transformers import RBLNT5EncoderModelConfig
|
19
|
+
from ....utils.logging import get_logger
|
20
|
+
from ...pipelines.cosmos.cosmos_guardrail import RBLNCosmosSafetyCheckerConfig
|
21
|
+
from ..models import RBLNAutoencoderKLCosmosConfig, RBLNCosmosTransformer3DModelConfig
|
22
|
+
|
23
|
+
|
24
|
+
logger = get_logger(__name__)
|
25
|
+
|
26
|
+
|
27
|
+
class RBLNCosmosPipelineBaseConfig(RBLNModelConfig):
|
28
|
+
submodules = ["text_encoder", "transformer", "vae", "safety_checker"]
|
29
|
+
_vae_uses_encoder = False
|
30
|
+
|
31
|
+
def __init__(
|
32
|
+
self,
|
33
|
+
text_encoder: Optional[RBLNT5EncoderModelConfig] = None,
|
34
|
+
transformer: Optional[RBLNCosmosTransformer3DModelConfig] = None,
|
35
|
+
vae: Optional[RBLNAutoencoderKLCosmosConfig] = None,
|
36
|
+
safety_checker: Optional[RBLNCosmosSafetyCheckerConfig] = None,
|
37
|
+
*,
|
38
|
+
batch_size: Optional[int] = None,
|
39
|
+
height: Optional[int] = None,
|
40
|
+
width: Optional[int] = None,
|
41
|
+
num_frames: Optional[int] = None,
|
42
|
+
fps: Optional[int] = None,
|
43
|
+
max_seq_len: Optional[int] = None,
|
44
|
+
**kwargs: Dict[str, Any],
|
45
|
+
):
|
46
|
+
"""
|
47
|
+
Args:
|
48
|
+
text_encoder (Optional[RBLNT5EncoderModelConfig]): Configuration for the text encoder component.
|
49
|
+
Initialized as RBLNT5EncoderModelConfig if not provided.
|
50
|
+
transformer (Optional[RBLNCosmosTransformer3DModelConfig]): Configuration for the Transformer model component.
|
51
|
+
Initialized as RBLNCosmosTransformer3DModelConfig if not provided.
|
52
|
+
vae (Optional[RBLNAutoencoderKLCosmosConfig]): Configuration for the VAE model component.
|
53
|
+
Initialized as RBLNAutoencoderKLCosmosConfig if not provided.
|
54
|
+
safety_checker (Optional[RBLNCosmosSafetyCheckerConfig]): Configuration for the safety checker component.
|
55
|
+
Initialized as RBLNCosmosSafetyCheckerConfig if not provided.
|
56
|
+
batch_size (Optional[int]): Batch size for inference, applied to all submodules.
|
57
|
+
height (Optional[int]): Height of the generated videos.
|
58
|
+
width (Optional[int]): Width of the generated videos.
|
59
|
+
num_frames (Optional[int]): The number of frames in the generated video.
|
60
|
+
fps (Optional[int]): The frames per second of the generated video.
|
61
|
+
max_seq_len (Optional[int]): Maximum sequence length supported by the model.
|
62
|
+
**kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
63
|
+
"""
|
64
|
+
super().__init__(**kwargs)
|
65
|
+
|
66
|
+
self.text_encoder = self.init_submodule_config(
|
67
|
+
RBLNT5EncoderModelConfig, text_encoder, batch_size=batch_size, max_seq_len=max_seq_len
|
68
|
+
)
|
69
|
+
self.transformer = self.init_submodule_config(
|
70
|
+
RBLNCosmosTransformer3DModelConfig,
|
71
|
+
transformer,
|
72
|
+
batch_size=batch_size,
|
73
|
+
max_seq_len=max_seq_len,
|
74
|
+
height=height,
|
75
|
+
width=width,
|
76
|
+
num_frames=num_frames,
|
77
|
+
fps=fps,
|
78
|
+
)
|
79
|
+
self.vae = self.init_submodule_config(
|
80
|
+
RBLNAutoencoderKLCosmosConfig,
|
81
|
+
vae,
|
82
|
+
batch_size=batch_size,
|
83
|
+
uses_encoder=self.__class__._vae_uses_encoder,
|
84
|
+
height=height,
|
85
|
+
width=width,
|
86
|
+
num_frames=num_frames,
|
87
|
+
)
|
88
|
+
self.safety_checker = self.init_submodule_config(
|
89
|
+
RBLNCosmosSafetyCheckerConfig,
|
90
|
+
safety_checker,
|
91
|
+
batch_size=batch_size,
|
92
|
+
height=height,
|
93
|
+
width=width,
|
94
|
+
)
|
95
|
+
|
96
|
+
@property
|
97
|
+
def batch_size(self):
|
98
|
+
return self.vae.batch_size
|
99
|
+
|
100
|
+
@property
|
101
|
+
def max_seq_len(self):
|
102
|
+
return self.text_encoder.max_seq_len
|
103
|
+
|
104
|
+
|
105
|
+
class RBLNCosmosTextToWorldPipelineConfig(RBLNCosmosPipelineBaseConfig):
|
106
|
+
"""Config for Cosmos Text2World Pipeline"""
|
107
|
+
|
108
|
+
_vae_uses_encoder = False
|
109
|
+
|
110
|
+
|
111
|
+
class RBLNCosmosVideoToWorldPipelineConfig(RBLNCosmosPipelineBaseConfig):
|
112
|
+
"""Config for Cosmos Video2World Pipeline"""
|
113
|
+
|
114
|
+
_vae_uses_encoder = True
|
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import Optional, Tuple
|
15
|
+
from typing import Any, Dict, Optional, Tuple
|
16
16
|
|
17
17
|
from ....configuration_utils import RBLNModelConfig
|
18
18
|
from ....transformers import RBLNCLIPTextModelWithProjectionConfig, RBLNCLIPVisionModelWithProjectionConfig
|
@@ -20,7 +20,7 @@ from ..models import RBLNUNet2DConditionModelConfig, RBLNVQModelConfig
|
|
20
20
|
from ..models.configuration_prior_transformer import RBLNPriorTransformerConfig
|
21
21
|
|
22
22
|
|
23
|
-
class
|
23
|
+
class RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
|
24
24
|
submodules = ["unet", "movq"]
|
25
25
|
_movq_uses_encoder = False
|
26
26
|
|
@@ -37,7 +37,7 @@ class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
|
|
37
37
|
img_width: Optional[int] = None,
|
38
38
|
height: Optional[int] = None,
|
39
39
|
width: Optional[int] = None,
|
40
|
-
**kwargs,
|
40
|
+
**kwargs: Dict[str, Any],
|
41
41
|
):
|
42
42
|
"""
|
43
43
|
Args:
|
@@ -117,19 +117,27 @@ class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
|
|
117
117
|
return self.movq.sample_size
|
118
118
|
|
119
119
|
|
120
|
-
class RBLNKandinskyV22PipelineConfig(
|
120
|
+
class RBLNKandinskyV22PipelineConfig(RBLNKandinskyV22PipelineBaseConfig):
|
121
|
+
"""Configuration class for the Kandinsky V2.2 text-to-image decoder pipeline."""
|
122
|
+
|
121
123
|
_movq_uses_encoder = False
|
122
124
|
|
123
125
|
|
124
|
-
class RBLNKandinskyV22Img2ImgPipelineConfig(
|
126
|
+
class RBLNKandinskyV22Img2ImgPipelineConfig(RBLNKandinskyV22PipelineBaseConfig):
|
127
|
+
"""Configuration class for the Kandinsky V2.2 image-to-image decoder pipeline."""
|
128
|
+
|
125
129
|
_movq_uses_encoder = True
|
126
130
|
|
127
131
|
|
128
|
-
class RBLNKandinskyV22InpaintPipelineConfig(
|
132
|
+
class RBLNKandinskyV22InpaintPipelineConfig(RBLNKandinskyV22PipelineBaseConfig):
|
133
|
+
"""Configuration class for the Kandinsky V2.2 inpainting decoder pipeline."""
|
134
|
+
|
129
135
|
_movq_uses_encoder = True
|
130
136
|
|
131
137
|
|
132
138
|
class RBLNKandinskyV22PriorPipelineConfig(RBLNModelConfig):
|
139
|
+
"""Configuration class for the Kandinsky V2.2 Prior pipeline."""
|
140
|
+
|
133
141
|
submodules = ["text_encoder", "image_encoder", "prior"]
|
134
142
|
|
135
143
|
def __init__(
|
@@ -140,7 +148,7 @@ class RBLNKandinskyV22PriorPipelineConfig(RBLNModelConfig):
|
|
140
148
|
*,
|
141
149
|
batch_size: Optional[int] = None,
|
142
150
|
guidance_scale: Optional[float] = None,
|
143
|
-
**kwargs,
|
151
|
+
**kwargs: Dict[str, Any],
|
144
152
|
):
|
145
153
|
"""
|
146
154
|
Initialize a configuration for Kandinsky 2.2 prior pipeline optimized for RBLN NPU.
|
@@ -194,7 +202,9 @@ class RBLNKandinskyV22PriorPipelineConfig(RBLNModelConfig):
|
|
194
202
|
return self.image_encoder.image_size
|
195
203
|
|
196
204
|
|
197
|
-
class
|
205
|
+
class RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
|
206
|
+
"""Base configuration class for Kandinsky V2.2 combined pipelines."""
|
207
|
+
|
198
208
|
submodules = ["prior_pipe", "decoder_pipe"]
|
199
209
|
_decoder_pipe_cls = RBLNKandinskyV22PipelineConfig
|
200
210
|
|
@@ -216,7 +226,7 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
|
|
216
226
|
prior_text_encoder: Optional[RBLNCLIPTextModelWithProjectionConfig] = None,
|
217
227
|
unet: Optional[RBLNUNet2DConditionModelConfig] = None,
|
218
228
|
movq: Optional[RBLNVQModelConfig] = None,
|
219
|
-
**kwargs,
|
229
|
+
**kwargs: Dict[str, Any],
|
220
230
|
):
|
221
231
|
"""
|
222
232
|
Initialize a configuration for combined Kandinsky 2.2 pipelines optimized for RBLN NPU.
|
@@ -325,13 +335,19 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
|
|
325
335
|
return self.decoder_pipe.movq
|
326
336
|
|
327
337
|
|
328
|
-
class RBLNKandinskyV22CombinedPipelineConfig(
|
338
|
+
class RBLNKandinskyV22CombinedPipelineConfig(RBLNKandinskyV22CombinedPipelineBaseConfig):
|
339
|
+
"""Configuration class for the Kandinsky V2.2 combined text-to-image pipeline."""
|
340
|
+
|
329
341
|
_decoder_pipe_cls = RBLNKandinskyV22PipelineConfig
|
330
342
|
|
331
343
|
|
332
|
-
class RBLNKandinskyV22InpaintCombinedPipelineConfig(
|
344
|
+
class RBLNKandinskyV22InpaintCombinedPipelineConfig(RBLNKandinskyV22CombinedPipelineBaseConfig):
|
345
|
+
"""Configuration class for the Kandinsky V2.2 combined inpainting pipeline."""
|
346
|
+
|
333
347
|
_decoder_pipe_cls = RBLNKandinskyV22InpaintPipelineConfig
|
334
348
|
|
335
349
|
|
336
|
-
class RBLNKandinskyV22Img2ImgCombinedPipelineConfig(
|
350
|
+
class RBLNKandinskyV22Img2ImgCombinedPipelineConfig(RBLNKandinskyV22CombinedPipelineBaseConfig):
|
351
|
+
"""Configuration class for the Kandinsky V2.2 combined image-to-image pipeline."""
|
352
|
+
|
337
353
|
_decoder_pipe_cls = RBLNKandinskyV22Img2ImgPipelineConfig
|
@@ -12,14 +12,14 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import Optional, Tuple
|
15
|
+
from typing import Any, Dict, Optional, Tuple
|
16
16
|
|
17
17
|
from ....configuration_utils import RBLNModelConfig
|
18
18
|
from ....transformers import RBLNCLIPTextModelConfig
|
19
19
|
from ..models import RBLNAutoencoderKLConfig, RBLNUNet2DConditionModelConfig
|
20
20
|
|
21
21
|
|
22
|
-
class
|
22
|
+
class RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
|
23
23
|
submodules = ["text_encoder", "unet", "vae"]
|
24
24
|
_vae_uses_encoder = False
|
25
25
|
|
@@ -37,7 +37,7 @@ class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
|
|
37
37
|
sample_size: Optional[Tuple[int, int]] = None,
|
38
38
|
image_size: Optional[Tuple[int, int]] = None,
|
39
39
|
guidance_scale: Optional[float] = None,
|
40
|
-
**kwargs,
|
40
|
+
**kwargs: Dict[str, Any],
|
41
41
|
):
|
42
42
|
"""
|
43
43
|
Args:
|
@@ -128,13 +128,25 @@ class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
|
|
128
128
|
return self.vae.sample_size
|
129
129
|
|
130
130
|
|
131
|
-
class RBLNStableDiffusionPipelineConfig(
|
131
|
+
class RBLNStableDiffusionPipelineConfig(RBLNStableDiffusionPipelineBaseConfig):
|
132
|
+
"""
|
133
|
+
Configuration for Stable Diffusion pipeline.
|
134
|
+
"""
|
135
|
+
|
132
136
|
_vae_uses_encoder = False
|
133
137
|
|
134
138
|
|
135
|
-
class RBLNStableDiffusionImg2ImgPipelineConfig(
|
139
|
+
class RBLNStableDiffusionImg2ImgPipelineConfig(RBLNStableDiffusionPipelineBaseConfig):
|
140
|
+
"""
|
141
|
+
Configuration for Stable Diffusion image-to-image pipeline.
|
142
|
+
"""
|
143
|
+
|
136
144
|
_vae_uses_encoder = True
|
137
145
|
|
138
146
|
|
139
|
-
class RBLNStableDiffusionInpaintPipelineConfig(
|
147
|
+
class RBLNStableDiffusionInpaintPipelineConfig(RBLNStableDiffusionPipelineBaseConfig):
|
148
|
+
"""
|
149
|
+
Configuration for Stable Diffusion inpainting pipeline.
|
150
|
+
"""
|
151
|
+
|
140
152
|
_vae_uses_encoder = True
|
@@ -12,14 +12,14 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import Optional, Tuple
|
15
|
+
from typing import Any, Dict, Optional, Tuple
|
16
16
|
|
17
17
|
from ....configuration_utils import RBLNModelConfig
|
18
18
|
from ....transformers import RBLNCLIPTextModelWithProjectionConfig, RBLNT5EncoderModelConfig
|
19
19
|
from ..models import RBLNAutoencoderKLConfig, RBLNSD3Transformer2DModelConfig
|
20
20
|
|
21
21
|
|
22
|
-
class
|
22
|
+
class RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
|
23
23
|
submodules = ["transformer", "text_encoder", "text_encoder_2", "text_encoder_3", "vae"]
|
24
24
|
_vae_uses_encoder = False
|
25
25
|
|
@@ -40,7 +40,7 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
|
|
40
40
|
height: Optional[int] = None,
|
41
41
|
width: Optional[int] = None,
|
42
42
|
guidance_scale: Optional[float] = None,
|
43
|
-
**kwargs,
|
43
|
+
**kwargs: Dict[str, Any],
|
44
44
|
):
|
45
45
|
"""
|
46
46
|
Args:
|
@@ -111,6 +111,7 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
|
|
111
111
|
text_encoder_3,
|
112
112
|
batch_size=batch_size,
|
113
113
|
max_seq_len=max_seq_len,
|
114
|
+
model_input_names=["input_ids"],
|
114
115
|
)
|
115
116
|
self.transformer = self.init_submodule_config(
|
116
117
|
RBLNSD3Transformer2DModelConfig,
|
@@ -153,13 +154,19 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
|
|
153
154
|
return self.vae.sample_size
|
154
155
|
|
155
156
|
|
156
|
-
class RBLNStableDiffusion3PipelineConfig(
|
157
|
+
class RBLNStableDiffusion3PipelineConfig(RBLNStableDiffusion3PipelineBaseConfig):
|
158
|
+
"""Config for SD3 Text2Img Pipeline"""
|
159
|
+
|
157
160
|
_vae_uses_encoder = False
|
158
161
|
|
159
162
|
|
160
|
-
class RBLNStableDiffusion3Img2ImgPipelineConfig(
|
163
|
+
class RBLNStableDiffusion3Img2ImgPipelineConfig(RBLNStableDiffusion3PipelineBaseConfig):
|
164
|
+
"""Config for SD3 Img2Img Pipeline"""
|
165
|
+
|
161
166
|
_vae_uses_encoder = True
|
162
167
|
|
163
168
|
|
164
|
-
class RBLNStableDiffusion3InpaintPipelineConfig(
|
169
|
+
class RBLNStableDiffusion3InpaintPipelineConfig(RBLNStableDiffusion3PipelineBaseConfig):
|
170
|
+
"""Config for SD3 Inpainting Pipeline"""
|
171
|
+
|
165
172
|
_vae_uses_encoder = True
|
@@ -12,14 +12,14 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import Optional, Tuple
|
15
|
+
from typing import Any, Dict, Optional, Tuple
|
16
16
|
|
17
17
|
from ....configuration_utils import RBLNModelConfig
|
18
18
|
from ....transformers import RBLNCLIPTextModelConfig, RBLNCLIPTextModelWithProjectionConfig
|
19
19
|
from ..models import RBLNAutoencoderKLConfig, RBLNUNet2DConditionModelConfig
|
20
20
|
|
21
21
|
|
22
|
-
class
|
22
|
+
class RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
|
23
23
|
submodules = ["text_encoder", "text_encoder_2", "unet", "vae"]
|
24
24
|
_vae_uses_encoder = False
|
25
25
|
|
@@ -38,7 +38,7 @@ class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
|
|
38
38
|
sample_size: Optional[Tuple[int, int]] = None,
|
39
39
|
image_size: Optional[Tuple[int, int]] = None,
|
40
40
|
guidance_scale: Optional[float] = None,
|
41
|
-
**kwargs,
|
41
|
+
**kwargs: Dict[str, Any],
|
42
42
|
):
|
43
43
|
"""
|
44
44
|
Args:
|
@@ -134,13 +134,19 @@ class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
|
|
134
134
|
return self.vae.sample_size
|
135
135
|
|
136
136
|
|
137
|
-
class RBLNStableDiffusionXLPipelineConfig(
|
137
|
+
class RBLNStableDiffusionXLPipelineConfig(RBLNStableDiffusionXLPipelineBaseConfig):
|
138
|
+
"""Config for SDXL Text2Img Pipeline"""
|
139
|
+
|
138
140
|
_vae_uses_encoder = False
|
139
141
|
|
140
142
|
|
141
|
-
class RBLNStableDiffusionXLImg2ImgPipelineConfig(
|
143
|
+
class RBLNStableDiffusionXLImg2ImgPipelineConfig(RBLNStableDiffusionXLPipelineBaseConfig):
|
144
|
+
"""Config for SDXL Img2Img Pipeline"""
|
145
|
+
|
142
146
|
_vae_uses_encoder = True
|
143
147
|
|
144
148
|
|
145
|
-
class RBLNStableDiffusionXLInpaintPipelineConfig(
|
149
|
+
class RBLNStableDiffusionXLInpaintPipelineConfig(RBLNStableDiffusionXLPipelineBaseConfig):
|
150
|
+
"""Config for SDXL Inpainting Pipeline"""
|
151
|
+
|
146
152
|
_vae_uses_encoder = True
|