optimum-rbln 0.8.0.post2__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. optimum/rbln/__init__.py +24 -0
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +45 -33
  4. optimum/rbln/diffusers/__init__.py +21 -1
  5. optimum/rbln/diffusers/configurations/__init__.py +4 -0
  6. optimum/rbln/diffusers/configurations/models/__init__.py +2 -0
  7. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +9 -2
  8. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +84 -0
  9. optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +4 -2
  10. optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +9 -2
  11. optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +70 -0
  12. optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +4 -2
  13. optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +9 -2
  14. optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +9 -2
  15. optimum/rbln/diffusers/configurations/pipelines/__init__.py +1 -0
  16. optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +29 -9
  17. optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +114 -0
  18. optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +28 -12
  19. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +18 -6
  20. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +13 -6
  21. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +12 -6
  22. optimum/rbln/diffusers/modeling_diffusers.py +72 -65
  23. optimum/rbln/diffusers/models/__init__.py +4 -0
  24. optimum/rbln/diffusers/models/autoencoders/__init__.py +1 -0
  25. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +17 -1
  26. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +219 -0
  27. optimum/rbln/diffusers/models/autoencoders/vae.py +45 -8
  28. optimum/rbln/diffusers/models/autoencoders/vq_model.py +17 -1
  29. optimum/rbln/diffusers/models/controlnet.py +14 -8
  30. optimum/rbln/diffusers/models/transformers/__init__.py +1 -0
  31. optimum/rbln/diffusers/models/transformers/prior_transformer.py +10 -0
  32. optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +321 -0
  33. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +2 -0
  34. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +11 -1
  35. optimum/rbln/diffusers/pipelines/__init__.py +10 -0
  36. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +1 -4
  37. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -0
  38. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -0
  39. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +7 -0
  40. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +7 -0
  41. optimum/rbln/diffusers/pipelines/cosmos/__init__.py +17 -0
  42. optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py +102 -0
  43. optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +455 -0
  44. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +98 -0
  45. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +98 -0
  46. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +7 -0
  47. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +48 -27
  48. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +7 -0
  49. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +7 -0
  50. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -0
  51. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +7 -0
  52. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +7 -0
  53. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +7 -0
  54. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -0
  55. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -0
  56. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -0
  57. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +7 -0
  58. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -0
  59. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +7 -0
  60. optimum/rbln/modeling.py +71 -37
  61. optimum/rbln/modeling_base.py +63 -109
  62. optimum/rbln/transformers/__init__.py +41 -47
  63. optimum/rbln/transformers/configuration_generic.py +16 -13
  64. optimum/rbln/transformers/modeling_generic.py +21 -22
  65. optimum/rbln/transformers/modeling_rope_utils.py +5 -2
  66. optimum/rbln/transformers/models/__init__.py +54 -4
  67. optimum/rbln/transformers/models/{wav2vec2/configuration_wav2vec.py → audio_spectrogram_transformer/__init__.py} +2 -4
  68. optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +21 -0
  69. optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +28 -0
  70. optimum/rbln/transformers/models/auto/auto_factory.py +35 -12
  71. optimum/rbln/transformers/models/bart/bart_architecture.py +14 -1
  72. optimum/rbln/transformers/models/bart/configuration_bart.py +12 -2
  73. optimum/rbln/transformers/models/bart/modeling_bart.py +16 -7
  74. optimum/rbln/transformers/models/bert/configuration_bert.py +18 -3
  75. optimum/rbln/transformers/models/bert/modeling_bert.py +24 -0
  76. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +15 -3
  77. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +50 -4
  78. optimum/rbln/transformers/models/clip/configuration_clip.py +15 -5
  79. optimum/rbln/transformers/models/clip/modeling_clip.py +38 -13
  80. optimum/rbln/transformers/models/colpali/__init__.py +2 -0
  81. optimum/rbln/transformers/models/colpali/colpali_architecture.py +221 -0
  82. optimum/rbln/transformers/models/colpali/configuration_colpali.py +68 -0
  83. optimum/rbln/transformers/models/colpali/modeling_colpali.py +383 -0
  84. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +111 -14
  85. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +102 -35
  86. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +253 -195
  87. optimum/rbln/transformers/models/distilbert/__init__.py +19 -0
  88. optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +24 -0
  89. optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +27 -0
  90. optimum/rbln/transformers/models/dpt/configuration_dpt.py +6 -1
  91. optimum/rbln/transformers/models/dpt/modeling_dpt.py +6 -1
  92. optimum/rbln/transformers/models/exaone/configuration_exaone.py +24 -1
  93. optimum/rbln/transformers/models/exaone/exaone_architecture.py +5 -1
  94. optimum/rbln/transformers/models/exaone/modeling_exaone.py +66 -5
  95. optimum/rbln/transformers/models/gemma/configuration_gemma.py +24 -1
  96. optimum/rbln/transformers/models/gemma/gemma_architecture.py +5 -1
  97. optimum/rbln/transformers/models/gemma/modeling_gemma.py +49 -0
  98. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +3 -3
  99. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +18 -250
  100. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +89 -244
  101. optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +4 -1
  102. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +6 -1
  103. optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +12 -2
  104. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +41 -4
  105. optimum/rbln/transformers/models/llama/configuration_llama.py +24 -1
  106. optimum/rbln/transformers/models/llama/modeling_llama.py +49 -0
  107. optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +10 -2
  108. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +32 -4
  109. optimum/rbln/transformers/models/midm/configuration_midm.py +24 -1
  110. optimum/rbln/transformers/models/midm/midm_architecture.py +6 -1
  111. optimum/rbln/transformers/models/midm/modeling_midm.py +66 -5
  112. optimum/rbln/transformers/models/mistral/configuration_mistral.py +24 -1
  113. optimum/rbln/transformers/models/mistral/modeling_mistral.py +62 -4
  114. optimum/rbln/transformers/models/opt/configuration_opt.py +4 -1
  115. optimum/rbln/transformers/models/opt/modeling_opt.py +10 -0
  116. optimum/rbln/transformers/models/opt/opt_architecture.py +7 -1
  117. optimum/rbln/transformers/models/phi/configuration_phi.py +24 -1
  118. optimum/rbln/transformers/models/phi/modeling_phi.py +49 -0
  119. optimum/rbln/transformers/models/phi/phi_architecture.py +1 -1
  120. optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +24 -1
  121. optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +67 -4
  122. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +31 -3
  123. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +54 -25
  124. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +6 -4
  125. optimum/rbln/transformers/models/resnet/__init__.py +23 -0
  126. optimum/rbln/transformers/models/resnet/configuration_resnet.py +25 -0
  127. optimum/rbln/transformers/models/resnet/modeling_resnet.py +26 -0
  128. optimum/rbln/transformers/models/roberta/__init__.py +24 -0
  129. optimum/rbln/transformers/{configuration_alias.py → models/roberta/configuration_roberta.py} +12 -28
  130. optimum/rbln/transformers/{modeling_alias.py → models/roberta/modeling_roberta.py} +14 -28
  131. optimum/rbln/transformers/models/seq2seq/__init__.py +1 -1
  132. optimum/rbln/transformers/models/seq2seq/{configuration_seq2seq2.py → configuration_seq2seq.py} +2 -2
  133. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +7 -3
  134. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +41 -3
  135. optimum/rbln/transformers/models/siglip/configuration_siglip.py +10 -0
  136. optimum/rbln/transformers/models/siglip/modeling_siglip.py +69 -21
  137. optimum/rbln/transformers/models/t5/configuration_t5.py +12 -2
  138. optimum/rbln/transformers/models/t5/modeling_t5.py +56 -8
  139. optimum/rbln/transformers/models/t5/t5_architecture.py +5 -1
  140. optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/__init__.py +1 -1
  141. optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/configuration_time_series_transformer.py +9 -2
  142. optimum/rbln/transformers/models/{time_series_transformers/modeling_time_series_transformers.py → time_series_transformer/modeling_time_series_transformer.py} +20 -11
  143. optimum/rbln/transformers/models/vit/__init__.py +19 -0
  144. optimum/rbln/transformers/models/vit/configuration_vit.py +24 -0
  145. optimum/rbln/transformers/models/vit/modeling_vit.py +25 -0
  146. optimum/rbln/transformers/models/wav2vec2/__init__.py +1 -1
  147. optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +26 -0
  148. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -1
  149. optimum/rbln/transformers/models/whisper/configuration_whisper.py +10 -1
  150. optimum/rbln/transformers/models/whisper/modeling_whisper.py +41 -17
  151. optimum/rbln/transformers/models/xlm_roberta/__init__.py +16 -2
  152. optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +15 -2
  153. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +12 -3
  154. optimum/rbln/utils/model_utils.py +20 -0
  155. optimum/rbln/utils/runtime_utils.py +49 -1
  156. optimum/rbln/utils/submodule.py +6 -8
  157. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1.dist-info}/METADATA +6 -6
  158. optimum_rbln-0.8.1.dist-info/RECORD +211 -0
  159. optimum_rbln-0.8.0.post2.dist-info/RECORD +0 -184
  160. /optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/time_series_transformers_architecture.py +0 -0
  161. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1.dist-info}/WHEEL +0 -0
  162. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1.dist-info}/licenses/LICENSE +0 -0
@@ -38,6 +38,13 @@ from .pipeline_kandinsky2_2_prior import RBLNKandinskyV22PriorPipeline
38
38
 
39
39
 
40
40
  class RBLNKandinskyV22CombinedPipeline(RBLNDiffusionMixin, KandinskyV22CombinedPipeline):
41
+ """
42
+ RBLN-accelerated implementation of Kandinsky 2.2 combined pipeline for end-to-end text-to-image generation.
43
+
44
+ This pipeline compiles both prior and decoder Kandinsky 2.2 models to run efficiently on RBLN NPUs, enabling
45
+ high-performance inference for complete text-to-image generation with distinctive artistic style.
46
+ """
47
+
41
48
  original_class = KandinskyV22CombinedPipeline
42
49
  _rbln_config_class = RBLNKandinskyV22CombinedPipelineConfig
43
50
  _connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22Pipeline}
@@ -46,15 +53,15 @@ class RBLNKandinskyV22CombinedPipeline(RBLNDiffusionMixin, KandinskyV22CombinedP
46
53
 
47
54
  def __init__(
48
55
  self,
49
- unet: "UNet2DConditionModel",
50
- scheduler: "DDPMScheduler",
51
- movq: "VQModel",
52
- prior_prior: "PriorTransformer",
53
- prior_image_encoder: "CLIPVisionModelWithProjection",
54
- prior_text_encoder: "CLIPTextModelWithProjection",
55
- prior_tokenizer: "CLIPTokenizer",
56
- prior_scheduler: "UnCLIPScheduler",
57
- prior_image_processor: "CLIPImageProcessor",
56
+ unet: UNet2DConditionModel,
57
+ scheduler: DDPMScheduler,
58
+ movq: VQModel,
59
+ prior_prior: PriorTransformer,
60
+ prior_image_encoder: CLIPVisionModelWithProjection,
61
+ prior_text_encoder: CLIPTextModelWithProjection,
62
+ prior_tokenizer: CLIPTokenizer,
63
+ prior_scheduler: UnCLIPScheduler,
64
+ prior_image_processor: CLIPImageProcessor,
58
65
  ):
59
66
  RBLNDiffusionMixin.__init__(self)
60
67
  super(KandinskyV22CombinedPipeline, self).__init__()
@@ -90,6 +97,13 @@ class RBLNKandinskyV22CombinedPipeline(RBLNDiffusionMixin, KandinskyV22CombinedP
90
97
 
91
98
 
92
99
  class RBLNKandinskyV22Img2ImgCombinedPipeline(RBLNDiffusionMixin, KandinskyV22Img2ImgCombinedPipeline):
100
+ """
101
+ RBLN-accelerated implementation of Kandinsky 2.2 combined pipeline for end-to-end image-to-image generation.
102
+
103
+ This pipeline compiles both prior and decoder Kandinsky 2.2 models to run efficiently on RBLN NPUs, enabling
104
+ high-performance inference for complete image-to-image transformation with distinctive artistic style.
105
+ """
106
+
93
107
  original_class = KandinskyV22Img2ImgCombinedPipeline
94
108
  _connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22Img2ImgPipeline}
95
109
  _submodules = ["prior_image_encoder", "prior_text_encoder", "prior_prior", "unet", "movq"]
@@ -97,15 +111,15 @@ class RBLNKandinskyV22Img2ImgCombinedPipeline(RBLNDiffusionMixin, KandinskyV22Im
97
111
 
98
112
  def __init__(
99
113
  self,
100
- unet: "UNet2DConditionModel",
101
- scheduler: "DDPMScheduler",
102
- movq: "VQModel",
103
- prior_prior: "PriorTransformer",
104
- prior_image_encoder: "CLIPVisionModelWithProjection",
105
- prior_text_encoder: "CLIPTextModelWithProjection",
106
- prior_tokenizer: "CLIPTokenizer",
107
- prior_scheduler: "UnCLIPScheduler",
108
- prior_image_processor: "CLIPImageProcessor",
114
+ unet: UNet2DConditionModel,
115
+ scheduler: DDPMScheduler,
116
+ movq: VQModel,
117
+ prior_prior: PriorTransformer,
118
+ prior_image_encoder: CLIPVisionModelWithProjection,
119
+ prior_text_encoder: CLIPTextModelWithProjection,
120
+ prior_tokenizer: CLIPTokenizer,
121
+ prior_scheduler: UnCLIPScheduler,
122
+ prior_image_processor: CLIPImageProcessor,
109
123
  ):
110
124
  RBLNDiffusionMixin.__init__(self)
111
125
  super(KandinskyV22Img2ImgCombinedPipeline, self).__init__()
@@ -141,6 +155,13 @@ class RBLNKandinskyV22Img2ImgCombinedPipeline(RBLNDiffusionMixin, KandinskyV22Im
141
155
 
142
156
 
143
157
  class RBLNKandinskyV22InpaintCombinedPipeline(RBLNDiffusionMixin, KandinskyV22InpaintCombinedPipeline):
158
+ """
159
+ RBLN-accelerated implementation of Kandinsky 2.2 combined pipeline for end-to-end image inpainting.
160
+
161
+ This pipeline compiles both prior and decoder Kandinsky 2.2 models to run efficiently on RBLN NPUs, enabling
162
+ high-performance inference for complete image inpainting with distinctive artistic style and seamless integration.
163
+ """
164
+
144
165
  original_class = KandinskyV22InpaintCombinedPipeline
145
166
  _connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22InpaintPipeline}
146
167
  _submodules = ["prior_image_encoder", "prior_text_encoder", "prior_prior", "unet", "movq"]
@@ -148,15 +169,15 @@ class RBLNKandinskyV22InpaintCombinedPipeline(RBLNDiffusionMixin, KandinskyV22In
148
169
 
149
170
  def __init__(
150
171
  self,
151
- unet: "UNet2DConditionModel",
152
- scheduler: "DDPMScheduler",
153
- movq: "VQModel",
154
- prior_prior: "PriorTransformer",
155
- prior_image_encoder: "CLIPVisionModelWithProjection",
156
- prior_text_encoder: "CLIPTextModelWithProjection",
157
- prior_tokenizer: "CLIPTokenizer",
158
- prior_scheduler: "UnCLIPScheduler",
159
- prior_image_processor: "CLIPImageProcessor",
172
+ unet: UNet2DConditionModel,
173
+ scheduler: DDPMScheduler,
174
+ movq: VQModel,
175
+ prior_prior: PriorTransformer,
176
+ prior_image_encoder: CLIPVisionModelWithProjection,
177
+ prior_text_encoder: CLIPTextModelWithProjection,
178
+ prior_tokenizer: CLIPTokenizer,
179
+ prior_scheduler: UnCLIPScheduler,
180
+ prior_image_processor: CLIPImageProcessor,
160
181
  ):
161
182
  RBLNDiffusionMixin.__init__(self)
162
183
  super(KandinskyV22InpaintCombinedPipeline, self).__init__()
@@ -19,6 +19,13 @@ from ...modeling_diffusers import RBLNDiffusionMixin
19
19
 
20
20
 
21
21
  class RBLNKandinskyV22Img2ImgPipeline(RBLNDiffusionMixin, KandinskyV22Img2ImgPipeline):
22
+ """
23
+ RBLN-accelerated implementation of Kandinsky 2.2 pipeline for image-to-image generation.
24
+
25
+ This pipeline compiles Kandinsky 2.2 models to run efficiently on RBLN NPUs, enabling high-performance
26
+ inference for transforming input images with distinctive artistic style and enhanced visual fidelity.
27
+ """
28
+
22
29
  original_class = KandinskyV22Img2ImgPipeline
23
30
  _rbln_config_class = RBLNKandinskyV22Img2ImgPipelineConfig
24
31
  _submodules = ["unet", "movq"]
@@ -19,6 +19,13 @@ from ...modeling_diffusers import RBLNDiffusionMixin
19
19
 
20
20
 
21
21
  class RBLNKandinskyV22InpaintPipeline(RBLNDiffusionMixin, KandinskyV22InpaintPipeline):
22
+ """
23
+ RBLN-accelerated implementation of Kandinsky 2.2 pipeline for image inpainting.
24
+
25
+ This pipeline compiles Kandinsky 2.2 models to run efficiently on RBLN NPUs, enabling high-performance
26
+ inference for filling masked regions with distinctive artistic style and seamless content integration.
27
+ """
28
+
22
29
  original_class = KandinskyV22InpaintPipeline
23
30
  _rbln_config_class = RBLNKandinskyV22InpaintPipelineConfig
24
31
  _submodules = ["unet", "movq"]
@@ -19,6 +19,13 @@ from ...modeling_diffusers import RBLNDiffusionMixin
19
19
 
20
20
 
21
21
  class RBLNKandinskyV22PriorPipeline(RBLNDiffusionMixin, KandinskyV22PriorPipeline):
22
+ """
23
+ RBLN-accelerated implementation of Kandinsky 2.2 prior pipeline for text and image embedding generation.
24
+
25
+ This pipeline compiles Kandinsky 2.2 prior models to run efficiently on RBLN NPUs, enabling high-performance
26
+ inference for generating image embeddings from text prompts and image inputs for downstream generation tasks.
27
+ """
28
+
22
29
  original_class = KandinskyV22PriorPipeline
23
30
  _rbln_config_class = RBLNKandinskyV22PriorPipelineConfig
24
31
  _submodules = ["text_encoder", "image_encoder", "prior"]
@@ -20,6 +20,13 @@ from ...modeling_diffusers import RBLNDiffusionMixin
20
20
 
21
21
 
22
22
  class RBLNStableDiffusionPipeline(RBLNDiffusionMixin, StableDiffusionPipeline):
23
+ """
24
+ RBLN-accelerated implementation of Stable Diffusion pipeline for text-to-image generation.
25
+
26
+ This pipeline compiles Stable Diffusion models to run efficiently on RBLN NPUs, enabling high-performance
27
+ inference for generating images from text prompts with optimized memory usage and throughput.
28
+ """
29
+
23
30
  original_class = StableDiffusionPipeline
24
31
  _rbln_config_class = RBLNStableDiffusionPipelineConfig
25
32
  _submodules = ["vae", "text_encoder", "unet"]
@@ -19,6 +19,13 @@ from ...modeling_diffusers import RBLNDiffusionMixin
19
19
 
20
20
 
21
21
  class RBLNStableDiffusionImg2ImgPipeline(RBLNDiffusionMixin, StableDiffusionImg2ImgPipeline):
22
+ """
23
+ RBLN-accelerated implementation of Stable Diffusion pipeline for image-to-image generation.
24
+
25
+ This pipeline compiles Stable Diffusion models to run efficiently on RBLN NPUs, enabling high-performance
26
+ inference for transforming input images based on text prompts with controlled strength and guidance.
27
+ """
28
+
22
29
  original_class = StableDiffusionImg2ImgPipeline
23
30
  _rbln_config_class = RBLNStableDiffusionImg2ImgPipelineConfig
24
31
  _submodules = ["text_encoder", "unet", "vae"]
@@ -19,6 +19,13 @@ from ...modeling_diffusers import RBLNDiffusionMixin
19
19
 
20
20
 
21
21
  class RBLNStableDiffusionInpaintPipeline(RBLNDiffusionMixin, StableDiffusionInpaintPipeline):
22
+ """
23
+ RBLN-accelerated implementation of Stable Diffusion pipeline for image inpainting.
24
+
25
+ This pipeline compiles Stable Diffusion models to run efficiently on RBLN NPUs, enabling high-performance
26
+ inference for filling masked regions of images based on text prompts with seamless integration.
27
+ """
28
+
22
29
  original_class = StableDiffusionInpaintPipeline
23
30
  _rbln_config_class = RBLNStableDiffusionInpaintPipelineConfig
24
31
  _submodules = ["text_encoder", "unet", "vae"]
@@ -19,6 +19,13 @@ from ...modeling_diffusers import RBLNDiffusionMixin
19
19
 
20
20
 
21
21
  class RBLNStableDiffusion3Pipeline(RBLNDiffusionMixin, StableDiffusion3Pipeline):
22
+ """
23
+ RBLN-accelerated implementation of Stable Diffusion 3 pipeline for advanced text-to-image generation.
24
+
25
+ This pipeline compiles Stable Diffusion 3 models to run efficiently on RBLN NPUs, enabling high-performance
26
+ inference with improved text understanding, enhanced image quality, and superior prompt adherence.
27
+ """
28
+
22
29
  original_class = StableDiffusion3Pipeline
23
30
  _rbln_config_class = RBLNStableDiffusion3PipelineConfig
24
31
  _submodules = ["transformer", "text_encoder_3", "text_encoder", "text_encoder_2", "vae"]
@@ -19,6 +19,13 @@ from ...modeling_diffusers import RBLNDiffusionMixin
19
19
 
20
20
 
21
21
  class RBLNStableDiffusion3Img2ImgPipeline(RBLNDiffusionMixin, StableDiffusion3Img2ImgPipeline):
22
+ """
23
+ RBLN-accelerated implementation of Stable Diffusion 3 pipeline for advanced image-to-image generation.
24
+
25
+ This pipeline compiles Stable Diffusion 3 models to run efficiently on RBLN NPUs, enabling high-performance
26
+ inference for transforming input images with superior text understanding and enhanced visual quality.
27
+ """
28
+
22
29
  original_class = StableDiffusion3Img2ImgPipeline
23
30
  _rbln_config_class = RBLNStableDiffusion3Img2ImgPipelineConfig
24
31
  _submodules = ["transformer", "text_encoder_3", "text_encoder", "text_encoder_2", "vae"]
@@ -19,6 +19,13 @@ from ...modeling_diffusers import RBLNDiffusionMixin
19
19
 
20
20
 
21
21
  class RBLNStableDiffusion3InpaintPipeline(RBLNDiffusionMixin, StableDiffusion3InpaintPipeline):
22
+ """
23
+ RBLN-accelerated implementation of Stable Diffusion 3 pipeline for advanced image inpainting.
24
+
25
+ This pipeline compiles Stable Diffusion 3 models to run efficiently on RBLN NPUs, enabling high-performance
26
+ inference for filling masked regions with superior text understanding and seamless content generation.
27
+ """
28
+
22
29
  original_class = StableDiffusion3InpaintPipeline
23
30
  _rbln_config_class = RBLNStableDiffusion3InpaintPipelineConfig
24
31
  _submodules = ["transformer", "text_encoder_3", "text_encoder", "text_encoder_2", "vae"]
@@ -19,6 +19,13 @@ from ...modeling_diffusers import RBLNDiffusionMixin
19
19
 
20
20
 
21
21
  class RBLNStableDiffusionXLPipeline(RBLNDiffusionMixin, StableDiffusionXLPipeline):
22
+ """
23
+ RBLN-accelerated implementation of Stable Diffusion XL pipeline for high-resolution text-to-image generation.
24
+
25
+ This pipeline compiles Stable Diffusion XL models to run efficiently on RBLN NPUs, enabling high-performance
26
+ inference for generating high-quality images with enhanced detail and improved prompt adherence.
27
+ """
28
+
22
29
  original_class = StableDiffusionXLPipeline
23
30
  _rbln_config_class = RBLNStableDiffusionXLPipelineConfig
24
31
  _submodules = ["text_encoder", "text_encoder_2", "unet", "vae"]
@@ -19,6 +19,13 @@ from ...modeling_diffusers import RBLNDiffusionMixin
19
19
 
20
20
 
21
21
  class RBLNStableDiffusionXLImg2ImgPipeline(RBLNDiffusionMixin, StableDiffusionXLImg2ImgPipeline):
22
+ """
23
+ RBLN-accelerated implementation of Stable Diffusion XL pipeline for high-resolution image-to-image generation.
24
+
25
+ This pipeline compiles Stable Diffusion XL models to run efficiently on RBLN NPUs, enabling high-performance
26
+ inference for transforming input images with enhanced quality and detail preservation.
27
+ """
28
+
22
29
  original_class = StableDiffusionXLImg2ImgPipeline
23
30
  _rbln_config_class = RBLNStableDiffusionXLImg2ImgPipelineConfig
24
31
  _submodules = ["text_encoder", "text_encoder_2", "unet", "vae"]
@@ -19,6 +19,13 @@ from ...modeling_diffusers import RBLNDiffusionMixin
19
19
 
20
20
 
21
21
  class RBLNStableDiffusionXLInpaintPipeline(RBLNDiffusionMixin, StableDiffusionXLInpaintPipeline):
22
+ """
23
+ RBLN-accelerated implementation of Stable Diffusion XL pipeline for high-resolution image inpainting.
24
+
25
+ This pipeline compiles Stable Diffusion XL models to run efficiently on RBLN NPUs, enabling high-performance
26
+ inference for filling masked regions with enhanced quality and seamless blending capabilities.
27
+ """
28
+
22
29
  original_class = StableDiffusionXLInpaintPipeline
23
30
  _rbln_config_class = RBLNStableDiffusionXLInpaintPipelineConfig
24
31
  _submodules = ["text_encoder", "text_encoder_2", "unet", "vae"]
optimum/rbln/modeling.py CHANGED
@@ -14,7 +14,7 @@
14
14
 
15
15
  from pathlib import Path
16
16
  from tempfile import TemporaryDirectory
17
- from typing import TYPE_CHECKING, Dict, List, Optional, Union, get_args, get_origin, get_type_hints
17
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, get_args, get_origin, get_type_hints
18
18
 
19
19
  import rebel
20
20
  import torch
@@ -35,27 +35,12 @@ logger = get_logger(__name__)
35
35
 
36
36
 
37
37
  class RBLNModel(RBLNBaseModel):
38
- """
39
- A class that inherits from RBLNBaseModel for models consisting of a single `torch.nn.Module`.
40
-
41
- This class supports all the functionality of RBLNBaseModel, including loading and saving models using
42
- the `from_pretrained` and `save_pretrained` methods, compiling PyTorch models for execution on RBLN NPU
43
- devices.
44
-
45
- Example:
46
- ```python
47
- model = RBLNModel.from_pretrained("model_id", export=True, rbln_npu="npu_name")
48
- outputs = model(**inputs)
49
- ```
50
- """
51
-
52
38
  _output_class = None
53
39
 
54
40
  @classmethod
55
41
  def update_kwargs(cls, kwargs):
56
- """
57
- Update user-given kwargs to get proper pytorch model.
58
- """
42
+ # Update user-given kwargs to get proper pytorch model.
43
+
59
44
  return kwargs
60
45
 
61
46
  @classmethod
@@ -66,10 +51,9 @@ class RBLNModel(RBLNBaseModel):
66
51
  subfolder: str,
67
52
  rbln_config: RBLNModelConfig,
68
53
  ):
69
- """
70
- If you are unavoidably running on a CPU rather than an RBLN device,
71
- store the torch tensor, weight, etc. in this function.
72
- """
54
+ # If you are unavoidably running on a CPU rather than an RBLN device,
55
+ # store the torch tensor, weight, etc. in this function.
56
+ pass
73
57
 
74
58
  @classmethod
75
59
  def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
@@ -80,7 +64,12 @@ class RBLNModel(RBLNBaseModel):
80
64
  def get_compiled_model(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
81
65
  model = cls.wrap_model_if_needed(model, rbln_config)
82
66
  rbln_compile_config = rbln_config.compile_cfgs[0]
83
- compiled_model = cls.compile(model, rbln_compile_config=rbln_compile_config)
67
+ compiled_model = cls.compile(
68
+ model,
69
+ rbln_compile_config=rbln_compile_config,
70
+ create_runtimes=rbln_config.create_runtimes,
71
+ device=rbln_config.device,
72
+ )
84
73
  return compiled_model
85
74
 
86
75
  @classmethod
@@ -88,11 +77,32 @@ class RBLNModel(RBLNBaseModel):
88
77
  cls,
89
78
  model: "PreTrainedModel",
90
79
  config: Optional[PretrainedConfig] = None,
91
- rbln_config: Optional[RBLNModelConfig] = None,
80
+ rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
92
81
  model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
93
82
  subfolder: str = "",
94
- **kwargs,
95
- ):
83
+ **kwargs: Dict[str, Any],
84
+ ) -> "RBLNModel":
85
+ """
86
+ Converts and compiles a pre-trained HuggingFace library model into a RBLN model.
87
+ This method performs the actual model conversion and compilation process.
88
+
89
+ Args:
90
+ model: The PyTorch model to be compiled. The object must be an instance of the HuggingFace transformers PreTrainedModel class.
91
+ rbln_config: Configuration for RBLN model compilation and runtime. This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
92
+ For detailed configuration options, see the specific model's configuration class documentation.
93
+
94
+ kwargs: Additional keyword arguments. Arguments with the prefix 'rbln_' are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
95
+
96
+ The method performs the following steps:
97
+
98
+ 1. Compiles the PyTorch model into an optimized RBLN graph
99
+ 2. Configures the model for the specified NPU device
100
+ 3. Creates the necessary runtime objects if requested
101
+ 4. Saves the compiled model and configurations
102
+
103
+ Returns:
104
+ A RBLN model instance ready for inference on RBLN NPU devices.
105
+ """
96
106
  preprocessors = kwargs.pop("preprocessors", [])
97
107
  rbln_config, kwargs = cls.prepare_rbln_config(rbln_config=rbln_config, **kwargs)
98
108
 
@@ -232,7 +242,38 @@ class RBLNModel(RBLNBaseModel):
232
242
  for compiled_model in compiled_models
233
243
  ]
234
244
 
235
- def forward(self, *args, return_dict: Optional[bool] = None, **kwargs):
245
+ def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Dict[str, Any]) -> Any:
246
+ """
247
+ Defines the forward pass of the RBLN model, providing a drop-in replacement for HuggingFace PreTrainedModel.
248
+
249
+ This method executes the compiled RBLN model on RBLN NPU devices while maintaining full compatibility
250
+ with HuggingFace transformers and diffusers APIs. The RBLNModel can be used as a direct substitute
251
+ for any HuggingFace nn.Module/PreTrainedModel, enabling seamless integration into existing workflows.
252
+
253
+ Args:
254
+ *args: Variable length argument list containing model inputs. The format matches the original
255
+ HuggingFace model's forward method signature (e.g., input_ids, attention_mask for
256
+ transformers models, or sample, timestep for diffusers models).
257
+ return_dict:
258
+ Whether to return outputs as a dictionary-like object or as a tuple. When `None`:
259
+ - For transformers models: Uses `self.config.use_return_dict` (typically `True`)
260
+ - For diffusers models: Defaults to `True`
261
+ **kwargs: Arbitrary keyword arguments containing additional model inputs and parameters,
262
+ matching the original HuggingFace model's interface.
263
+
264
+ Returns:
265
+ Model outputs in the same format as the original HuggingFace model.
266
+
267
+ - If `return_dict=True`: Returns a dictionary-like object (e.g., BaseModelOutput,
268
+ CausalLMOutput) with named fields such as `logits`, `hidden_states`, etc.
269
+ - If `return_dict=False`: Returns a tuple containing the raw model outputs.
270
+
271
+ Note:
272
+ - This method maintains the exact same interface as the original HuggingFace model's forward method
273
+ - The compiled model runs on RBLN NPU hardware for accelerated inference
274
+ - All HuggingFace model features (generation, attention patterns, etc.) are preserved
275
+ - Can be used directly in HuggingFace pipelines, transformers.Trainer, and other workflows
276
+ """
236
277
  if self.hf_library_name == "transformers":
237
278
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
238
279
  else:
@@ -246,12 +287,7 @@ class RBLNModel(RBLNBaseModel):
246
287
 
247
288
  @classmethod
248
289
  def get_hf_output_class(cls):
249
- """
250
- Dynamically gets the output class from the corresponding HuggingFace model class.
251
-
252
- Returns:
253
- type: The appropriate output class from transformers or diffusers
254
- """
290
+ # Dynamically gets the output class from the corresponding HuggingFace model class.
255
291
  if cls._output_class:
256
292
  return cls._output_class
257
293
 
@@ -278,10 +314,8 @@ class RBLNModel(RBLNBaseModel):
278
314
  return BaseModelOutput
279
315
 
280
316
  def _prepare_output(self, output, return_dict):
281
- """
282
- Prepare model output based on return_dict flag.
283
- This method can be overridden by subclasses to provide task-specific output handling.
284
- """
317
+ # Prepare model output based on return_dict flag.
318
+ # This method can be overridden by subclasses to provide task-specific output handling.
285
319
  tuple_output = (output,) if not isinstance(output, (tuple, list)) else tuple(output)
286
320
  if not return_dict:
287
321
  return tuple_output