optimum-rbln 0.8.2a0__py3-none-any.whl → 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. optimum/rbln/__init__.py +116 -9
  2. optimum/rbln/__version__.py +16 -3
  3. optimum/rbln/cli.py +660 -0
  4. optimum/rbln/configuration_utils.py +171 -43
  5. optimum/rbln/diffusers/__init__.py +19 -0
  6. optimum/rbln/diffusers/configurations/__init__.py +3 -0
  7. optimum/rbln/diffusers/configurations/models/__init__.py +2 -0
  8. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +3 -3
  9. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +1 -1
  10. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_temporal_decoder.py +67 -0
  11. optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +3 -3
  12. optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +4 -4
  13. optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +12 -4
  14. optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +9 -4
  15. optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +3 -3
  16. optimum/rbln/diffusers/configurations/models/configuration_unet_spatio_temporal_condition.py +59 -0
  17. optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +3 -3
  18. optimum/rbln/diffusers/configurations/pipelines/__init__.py +3 -0
  19. optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +35 -19
  20. optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +14 -11
  21. optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +30 -20
  22. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +13 -9
  23. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +17 -13
  24. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +17 -10
  25. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_video_diffusion.py +114 -0
  26. optimum/rbln/diffusers/modeling_diffusers.py +33 -18
  27. optimum/rbln/diffusers/models/__init__.py +4 -0
  28. optimum/rbln/diffusers/models/autoencoders/__init__.py +1 -0
  29. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +32 -3
  30. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +32 -6
  31. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +275 -0
  32. optimum/rbln/diffusers/models/autoencoders/vae.py +27 -8
  33. optimum/rbln/diffusers/models/autoencoders/vq_model.py +32 -3
  34. optimum/rbln/diffusers/models/controlnet.py +16 -1
  35. optimum/rbln/diffusers/models/transformers/prior_transformer.py +17 -3
  36. optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +26 -3
  37. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +23 -2
  38. optimum/rbln/diffusers/models/unets/__init__.py +1 -0
  39. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +23 -4
  40. optimum/rbln/diffusers/models/unets/unet_spatio_temporal_condition.py +201 -0
  41. optimum/rbln/diffusers/pipelines/__init__.py +15 -5
  42. optimum/rbln/diffusers/pipelines/auto_pipeline.py +307 -0
  43. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +20 -0
  44. optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py +23 -12
  45. optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +16 -46
  46. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +31 -1
  47. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +31 -1
  48. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +1 -6
  49. optimum/rbln/diffusers/pipelines/stable_video_diffusion/__init__.py +15 -0
  50. optimum/rbln/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +46 -0
  51. optimum/rbln/modeling.py +50 -24
  52. optimum/rbln/modeling_base.py +116 -35
  53. optimum/rbln/ops/attn.py +158 -0
  54. optimum/rbln/ops/flash_attn.py +166 -0
  55. optimum/rbln/ops/kv_cache_update.py +5 -0
  56. optimum/rbln/ops/linear.py +7 -0
  57. optimum/rbln/transformers/__init__.py +100 -0
  58. optimum/rbln/transformers/configuration_generic.py +7 -32
  59. optimum/rbln/transformers/modeling_attention_utils.py +385 -0
  60. optimum/rbln/transformers/modeling_generic.py +48 -65
  61. optimum/rbln/transformers/modeling_outputs.py +37 -0
  62. optimum/rbln/transformers/models/__init__.py +93 -30
  63. optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +28 -2
  64. optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +68 -5
  65. optimum/rbln/transformers/models/auto/__init__.py +2 -0
  66. optimum/rbln/transformers/models/auto/auto_factory.py +92 -17
  67. optimum/rbln/transformers/models/auto/modeling_auto.py +45 -0
  68. optimum/rbln/transformers/models/bart/bart_architecture.py +2 -7
  69. optimum/rbln/transformers/models/bart/configuration_bart.py +2 -0
  70. optimum/rbln/transformers/models/bart/modeling_bart.py +23 -2
  71. optimum/rbln/transformers/models/bert/bert_architecture.py +16 -0
  72. optimum/rbln/transformers/models/bert/modeling_bert.py +93 -4
  73. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +42 -11
  74. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +135 -44
  75. optimum/rbln/transformers/models/clip/configuration_clip.py +21 -7
  76. optimum/rbln/transformers/models/clip/modeling_clip.py +183 -27
  77. optimum/rbln/transformers/models/colpali/colpali_architecture.py +3 -6
  78. optimum/rbln/transformers/models/colpali/configuration_colpali.py +37 -21
  79. optimum/rbln/transformers/models/colpali/modeling_colpali.py +82 -104
  80. optimum/rbln/transformers/models/colqwen2/__init__.py +2 -0
  81. optimum/rbln/transformers/models/colqwen2/colqwen2_architecture.py +233 -0
  82. optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +74 -0
  83. optimum/rbln/transformers/models/colqwen2/modeling_colqwen2.py +446 -0
  84. optimum/rbln/transformers/models/decoderonly/__init__.py +3 -2
  85. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +114 -37
  86. optimum/rbln/transformers/models/decoderonly/configuration_lora.py +411 -0
  87. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +323 -316
  88. optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +508 -0
  89. optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +119 -0
  90. optimum/rbln/transformers/models/decoderonly/lora_architecture.py +204 -0
  91. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +486 -892
  92. optimum/rbln/transformers/models/depth_anything/__init__.py +16 -0
  93. optimum/rbln/transformers/models/depth_anything/configuration_depth_anything.py +24 -0
  94. optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py +42 -0
  95. optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +24 -0
  96. optimum/rbln/transformers/models/dpt/modeling_dpt.py +17 -0
  97. optimum/rbln/transformers/models/exaone/modeling_exaone.py +42 -4
  98. optimum/rbln/transformers/models/gemma/__init__.py +2 -2
  99. optimum/rbln/transformers/models/gemma/configuration_gemma.py +9 -1
  100. optimum/rbln/transformers/models/gemma/gemma_architecture.py +1 -4
  101. optimum/rbln/transformers/models/gemma/modeling_gemma.py +22 -1
  102. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +49 -14
  103. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +12 -2
  104. optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py +245 -0
  105. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +212 -504
  106. optimum/rbln/transformers/models/gpt2/__init__.py +2 -2
  107. optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +31 -3
  108. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +10 -8
  109. optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +18 -1
  110. optimum/rbln/transformers/models/grounding_dino/__init__.py +10 -0
  111. optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +92 -0
  112. optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +599 -0
  113. optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py +1048 -0
  114. optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +35 -7
  115. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +29 -32
  116. optimum/rbln/transformers/models/llama/__init__.py +2 -2
  117. optimum/rbln/transformers/models/llama/configuration_llama.py +9 -1
  118. optimum/rbln/transformers/models/llama/modeling_llama.py +22 -1
  119. optimum/rbln/transformers/models/llava/__init__.py +16 -0
  120. optimum/rbln/transformers/models/llava/configuration_llava.py +72 -0
  121. optimum/rbln/transformers/models/llava/modeling_llava.py +490 -0
  122. optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +21 -6
  123. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +234 -376
  124. optimum/rbln/transformers/models/midm/midm_architecture.py +4 -1
  125. optimum/rbln/transformers/models/midm/modeling_midm.py +42 -4
  126. optimum/rbln/transformers/models/mistral/__init__.py +2 -2
  127. optimum/rbln/transformers/models/mistral/configuration_mistral.py +9 -1
  128. optimum/rbln/transformers/models/mistral/mistral_architecture.py +1 -1
  129. optimum/rbln/transformers/models/mistral/modeling_mistral.py +26 -3
  130. optimum/rbln/transformers/models/opt/__init__.py +2 -2
  131. optimum/rbln/transformers/models/opt/configuration_opt.py +8 -1
  132. optimum/rbln/transformers/models/opt/modeling_opt.py +29 -17
  133. optimum/rbln/transformers/models/opt/opt_architecture.py +4 -4
  134. optimum/rbln/transformers/models/pegasus/__init__.py +17 -0
  135. optimum/rbln/transformers/models/pegasus/configuration_pegasus.py +38 -0
  136. optimum/rbln/transformers/models/pegasus/modeling_pegasus.py +71 -0
  137. optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +161 -0
  138. optimum/rbln/transformers/models/phi/__init__.py +2 -2
  139. optimum/rbln/transformers/models/phi/configuration_phi.py +9 -1
  140. optimum/rbln/transformers/models/phi/modeling_phi.py +10 -1
  141. optimum/rbln/transformers/models/phi/phi_architecture.py +11 -7
  142. optimum/rbln/transformers/models/pixtral/__init__.py +16 -0
  143. optimum/rbln/transformers/models/pixtral/configuration_pixtral.py +43 -0
  144. optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +322 -0
  145. optimum/rbln/transformers/models/pixtral/pixtral_architecture.py +73 -0
  146. optimum/rbln/transformers/models/qwen2/__init__.py +2 -2
  147. optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +9 -1
  148. optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +27 -1
  149. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +21 -6
  150. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +15 -22
  151. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +28 -7
  152. optimum/rbln/transformers/models/qwen2_vl/__init__.py +19 -0
  153. optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +88 -0
  154. optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +513 -0
  155. optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +165 -0
  156. optimum/rbln/transformers/models/qwen3/__init__.py +16 -0
  157. optimum/rbln/transformers/models/qwen3/configuration_qwen3.py +71 -0
  158. optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +133 -0
  159. optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +31 -0
  160. optimum/rbln/transformers/models/resnet/configuration_resnet.py +17 -0
  161. optimum/rbln/transformers/models/resnet/modeling_resnet.py +73 -0
  162. optimum/rbln/transformers/models/roberta/modeling_roberta.py +33 -0
  163. optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +21 -16
  164. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +60 -13
  165. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +2 -2
  166. optimum/rbln/transformers/models/siglip/__init__.py +2 -6
  167. optimum/rbln/transformers/models/siglip/configuration_siglip.py +1 -1
  168. optimum/rbln/transformers/models/siglip/modeling_siglip.py +21 -16
  169. optimum/rbln/transformers/models/swin/__init__.py +16 -0
  170. optimum/rbln/transformers/models/swin/configuration_swin.py +42 -0
  171. optimum/rbln/transformers/models/swin/modeling_swin.py +354 -0
  172. optimum/rbln/transformers/models/t5/configuration_t5.py +2 -0
  173. optimum/rbln/transformers/models/t5/modeling_t5.py +2 -2
  174. optimum/rbln/transformers/models/t5/t5_architecture.py +8 -1
  175. optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +3 -3
  176. optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +22 -16
  177. optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +7 -1
  178. optimum/rbln/transformers/models/vit/modeling_vit.py +19 -0
  179. optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +15 -3
  180. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +61 -8
  181. optimum/rbln/transformers/models/whisper/configuration_whisper.py +12 -13
  182. optimum/rbln/transformers/models/whisper/generation_whisper.py +62 -6
  183. optimum/rbln/transformers/models/whisper/modeling_whisper.py +32 -5
  184. optimum/rbln/transformers/models/xlm_roberta/__init__.py +2 -8
  185. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +43 -0
  186. optimum/rbln/transformers/utils/rbln_quantization.py +400 -75
  187. optimum/rbln/transformers/utils/rbln_runtime_wrapper.py +79 -0
  188. optimum/rbln/utils/deprecation.py +213 -0
  189. optimum/rbln/utils/hub.py +22 -50
  190. optimum/rbln/utils/runtime_utils.py +85 -17
  191. optimum/rbln/utils/submodule.py +31 -9
  192. {optimum_rbln-0.8.2a0.dist-info → optimum_rbln-0.9.3.dist-info}/METADATA +8 -7
  193. optimum_rbln-0.9.3.dist-info/RECORD +264 -0
  194. {optimum_rbln-0.8.2a0.dist-info → optimum_rbln-0.9.3.dist-info}/WHEEL +1 -1
  195. optimum_rbln-0.9.3.dist-info/entry_points.txt +2 -0
  196. optimum_rbln-0.8.2a0.dist-info/RECORD +0 -211
  197. {optimum_rbln-0.8.2a0.dist-info → optimum_rbln-0.9.3.dist-info}/licenses/LICENSE +0 -0
@@ -33,9 +33,9 @@ if is_cosmos_guardrail_available():
33
33
  from cosmos_guardrail import CosmosSafetyChecker
34
34
  from cosmos_guardrail.cosmos_guardrail import (
35
35
  COSMOS_GUARDRAIL_CHECKPOINT,
36
- Aegis,
37
36
  Blocklist,
38
37
  GuardrailRunner,
38
+ LlamaGuard3,
39
39
  ModelConfig,
40
40
  RetinaFaceFilter,
41
41
  SafetyClassifier,
@@ -55,7 +55,7 @@ else:
55
55
 
56
56
  COSMOS_GUARDRAIL_CHECKPOINT = None
57
57
 
58
- class Aegis(FailToImportCosmosGuardrail): ...
58
+ class LlamaGuard3(FailToImportCosmosGuardrail): ...
59
59
 
60
60
  class Blocklist(FailToImportCosmosGuardrail): ...
61
61
 
@@ -127,25 +127,13 @@ class RBLNSigLIPEncoder(SigLIPEncoder):
127
127
 
128
128
  # We don't use RBLNSiglipModel, but we need to override get_image_features to return pooler_output
129
129
  self.model = RBLNSiglipVisionModel.from_pretrained(
130
- self.checkpoint_dir,
131
- rbln_device=rbln_config.siglip_encoder.device,
132
- rbln_create_runtimes=rbln_config.siglip_encoder.create_runtimes,
133
- rbln_activate_profiler=rbln_config.siglip_encoder.activate_profiler,
134
- rbln_optimize_host_memory=rbln_config.siglip_encoder.optimize_host_memory,
130
+ self.checkpoint_dir, rbln_config=rbln_config.siglip_encoder
135
131
  )
136
132
  else:
137
133
  super().__init__(model_name, checkpoint_id)
138
134
  model = self.model
139
135
  del self.model
140
- self.model = RBLNSiglipVisionModel.from_model(
141
- model,
142
- rbln_device=rbln_config.siglip_encoder.device,
143
- rbln_image_size=rbln_config.siglip_encoder.image_size,
144
- rbln_npu=rbln_config.siglip_encoder.npu,
145
- rbln_create_runtimes=rbln_config.siglip_encoder.create_runtimes,
146
- rbln_activate_profiler=rbln_config.siglip_encoder.activate_profiler,
147
- rbln_optimize_host_memory=rbln_config.siglip_encoder.optimize_host_memory,
148
- )
136
+ self.model = RBLNSiglipVisionModel.from_model(model, rbln_config=rbln_config.siglip_encoder)
149
137
  self.rbln_config = rbln_config
150
138
 
151
139
  # Override get_image_features to return pooler_output
@@ -324,47 +312,31 @@ class RBLNVideoContentSafetyFilter(VideoContentSafetyFilter):
324
312
  self.encoder.save_pretrained(checkpoint_id)
325
313
 
326
314
 
327
- class RBLNAegis(Aegis):
315
+ class RBLNLlamaGuard3(LlamaGuard3):
328
316
  def __init__(
329
317
  self,
330
318
  checkpoint_id: str = COSMOS_GUARDRAIL_CHECKPOINT,
331
- base_model_id: str = "meta-llama/LlamaGuard-7b",
332
- aegis_adapter: str = "nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0",
319
+ base_model_id: str = "meta-llama/Llama-Guard-3-8B",
333
320
  rbln_config: Optional[RBLNCosmosSafetyCheckerConfig] = None,
334
321
  ) -> None:
335
322
  if is_compiled_dir(checkpoint_id):
336
323
  torch.nn.Module.__init__(self)
337
- cache_dir = pathlib.Path(checkpoint_id) / "aegis"
324
+ cache_dir = pathlib.Path(checkpoint_id) / "llamaguard3"
338
325
  self.tokenizer = AutoTokenizer.from_pretrained(cache_dir)
339
- self.model = RBLNAutoModelForCausalLM.from_pretrained(
340
- cache_dir,
341
- rbln_device=rbln_config.aegis.device,
342
- rbln_create_runtimes=rbln_config.aegis.create_runtimes,
343
- rbln_activate_profiler=rbln_config.aegis.activate_profiler,
344
- rbln_optimize_host_memory=rbln_config.aegis.optimize_host_memory,
345
- )
326
+ self.model = RBLNAutoModelForCausalLM.from_pretrained(cache_dir, rbln_config=rbln_config.llamaguard3)
346
327
 
347
328
  else:
348
- super().__init__(checkpoint_id, base_model_id, aegis_adapter)
349
- model = self.model.merge_and_unload() # peft merge
329
+ super().__init__(checkpoint_id, base_model_id)
330
+ model = self.model
350
331
  del self.model
351
-
352
- self.model = RBLNAutoModelForCausalLM.from_model(
353
- model,
354
- rbln_tensor_parallel_size=4,
355
- rbln_device=rbln_config.aegis.device,
356
- rbln_create_runtimes=rbln_config.aegis.create_runtimes,
357
- rbln_npu=rbln_config.aegis.npu,
358
- rbln_activate_profiler=rbln_config.aegis.activate_profiler,
359
- rbln_optimize_host_memory=rbln_config.aegis.optimize_host_memory,
360
- )
332
+ self.model = RBLNAutoModelForCausalLM.from_model(model, rbln_config=rbln_config.llamaguard3)
361
333
 
362
334
  self.rbln_config = rbln_config
363
335
  self.dtype = torch.bfloat16
364
336
  self.device = torch.device("cpu")
365
337
 
366
338
  def save_pretrained(self, checkpoint_id: str):
367
- cache_dir = pathlib.Path(checkpoint_id) / "aegis"
339
+ cache_dir = pathlib.Path(checkpoint_id) / "llamaguard3"
368
340
  self.model.save_pretrained(cache_dir)
369
341
  self.tokenizer.save_pretrained(cache_dir)
370
342
 
@@ -377,8 +349,7 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
377
349
  def __init__(
378
350
  self,
379
351
  checkpoint_id: str = COSMOS_GUARDRAIL_CHECKPOINT,
380
- aegis_model_id: str = "meta-llama/LlamaGuard-7b",
381
- aegis_adapter_id: str = "nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0",
352
+ llamaguard_model_id: str = "meta-llama/Llama-Guard-3-8B",
382
353
  rbln_config: Optional[RBLNCosmosSafetyCheckerConfig] = None,
383
354
  ) -> None:
384
355
  torch.nn.Module.__init__(self)
@@ -395,10 +366,9 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
395
366
  self.text_guardrail = GuardrailRunner(
396
367
  safety_models=[
397
368
  Blocklist(COSMOS_GUARDRAIL_CHECKPOINT), # Changed since it cannot be saved
398
- RBLNAegis(
369
+ RBLNLlamaGuard3(
399
370
  checkpoint_id=checkpoint_id,
400
- base_model_id=aegis_model_id,
401
- aegis_adapter=aegis_adapter_id,
371
+ base_model_id=llamaguard_model_id,
402
372
  rbln_config=rbln_config,
403
373
  ),
404
374
  ]
@@ -413,7 +383,7 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
413
383
 
414
384
  def save_pretrained(self, save_dir: str):
415
385
  for text_safety_models in self.text_guardrail.safety_models:
416
- if isinstance(text_safety_models, RBLNAegis):
386
+ if isinstance(text_safety_models, RBLNLlamaGuard3):
417
387
  text_safety_models.save_pretrained(save_dir)
418
388
 
419
389
  for video_safety_models in self.video_guardrail.safety_models:
@@ -87,8 +87,38 @@ class RBLNCosmosTextToWorldPipeline(RBLNDiffusionMixin, CosmosTextToWorldPipelin
87
87
  export: bool = False,
88
88
  safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
89
89
  rbln_config: Dict[str, Any] = {},
90
- **kwargs: Dict[str, Any],
90
+ **kwargs: Any,
91
91
  ):
92
+ """
93
+ Load a pretrained diffusion pipeline from a model checkpoint, with optional compilation for RBLN NPUs.
94
+
95
+ This method has two distinct operating modes:
96
+ - When `export=True`: Takes a PyTorch-based diffusion model, compiles it for RBLN NPUs, and loads the compiled model
97
+ - When `export=False`: Loads an already compiled RBLN model from `model_id` without recompilation
98
+
99
+ It supports various diffusion pipelines including Stable Diffusion, Kandinsky, ControlNet, and other diffusers-based models.
100
+
101
+ Args:
102
+ model_id (`str`):
103
+ The model ID or path to the pretrained model to load. Can be either:
104
+
105
+ - A model ID from the HuggingFace Hub
106
+ - A local path to a saved model directory
107
+ export:
108
+ If True, takes a PyTorch model from `model_id` and compiles it for RBLN NPU execution.
109
+ If False, loads an already compiled RBLN model from `model_id` without recompilation.
110
+ safety_checker:
111
+ Optional custom safety checker to use instead of the default one. Only used when `export=True`.
112
+ rbln_config:
113
+ Configuration options for RBLN compilation. Can include settings for specific submodules
114
+ such as `text_encoder`, `unet`, and `vae`. Configuration can be tailored to the specific
115
+ pipeline being compiled.
116
+ kwargs:
117
+ Additional arguments to pass to the underlying diffusion pipeline constructor or the
118
+ RBLN compilation process. These may include parameters specific to individual submodules
119
+ or the particular diffusion pipeline being used.
120
+ """
121
+
92
122
  rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
93
123
  if safety_checker is None and export:
94
124
  safety_checker = RBLNCosmosSafetyChecker(rbln_config=rbln_config.safety_checker)
@@ -87,8 +87,38 @@ class RBLNCosmosVideoToWorldPipeline(RBLNDiffusionMixin, CosmosVideoToWorldPipel
87
87
  export: bool = False,
88
88
  safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
89
89
  rbln_config: Dict[str, Any] = {},
90
- **kwargs: Dict[str, Any],
90
+ **kwargs: Any,
91
91
  ):
92
+ """
93
+ Load a pretrained diffusion pipeline from a model checkpoint, with optional compilation for RBLN NPUs.
94
+
95
+ This method has two distinct operating modes:
96
+ - When `export=True`: Takes a PyTorch-based diffusion model, compiles it for RBLN NPUs, and loads the compiled model
97
+ - When `export=False`: Loads an already compiled RBLN model from `model_id` without recompilation
98
+
99
+ It supports various diffusion pipelines including Stable Diffusion, Kandinsky, ControlNet, and other diffusers-based models.
100
+
101
+ Args:
102
+ model_id (`str`):
103
+ The model ID or path to the pretrained model to load. Can be either:
104
+
105
+ - A model ID from the HuggingFace Hub
106
+ - A local path to a saved model directory
107
+ export:
108
+ If True, takes a PyTorch model from `model_id` and compiles it for RBLN NPU execution.
109
+ If False, loads an already compiled RBLN model from `model_id` without recompilation.
110
+ safety_checker:
111
+ Optional custom safety checker to use instead of the default one. Only used when `export=True`.
112
+ rbln_config:
113
+ Configuration options for RBLN compilation. Can include settings for specific submodules
114
+ such as `text_encoder`, `unet`, and `vae`. Configuration can be tailored to the specific
115
+ pipeline being compiled.
116
+ kwargs:
117
+ Additional arguments to pass to the underlying diffusion pipeline constructor or the
118
+ RBLN compilation process. These may include parameters specific to individual submodules
119
+ or the particular diffusion pipeline being used.
120
+ """
121
+
92
122
  rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
93
123
  if safety_checker is None and export:
94
124
  safety_checker = RBLNCosmosSafetyChecker(rbln_config=rbln_config.safety_checker)
@@ -22,12 +22,7 @@ from diffusers import (
22
22
  UNet2DConditionModel,
23
23
  VQModel,
24
24
  )
25
- from transformers import (
26
- CLIPImageProcessor,
27
- CLIPTextModelWithProjection,
28
- CLIPTokenizer,
29
- CLIPVisionModelWithProjection,
30
- )
25
+ from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTokenizer, CLIPVisionModelWithProjection
31
26
 
32
27
  from ...configurations import RBLNKandinskyV22CombinedPipelineConfig
33
28
  from ...modeling_diffusers import RBLNDiffusionMixin
@@ -0,0 +1,15 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .pipeline_stable_video_diffusion import RBLNStableVideoDiffusionPipeline
@@ -0,0 +1,46 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from diffusers import StableVideoDiffusionPipeline
17
+
18
+ from ....utils.logging import get_logger
19
+ from ...configurations import RBLNStableVideoDiffusionPipelineConfig
20
+ from ...modeling_diffusers import RBLNDiffusionMixin
21
+
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ class RBLNStableVideoDiffusionPipeline(RBLNDiffusionMixin, StableVideoDiffusionPipeline):
27
+ """
28
+ RBLN-accelerated implementation of Stable Video Diffusion pipeline for image-to-video generation.
29
+
30
+ This pipeline compiles Stable Video Diffusion models to run efficiently on RBLN NPUs, enabling high-performance
31
+ inference for generating videos from images with optimized memory usage and throughput.
32
+ """
33
+
34
+ original_class = StableVideoDiffusionPipeline
35
+ _rbln_config_class = RBLNStableVideoDiffusionPipelineConfig
36
+ _submodules = ["image_encoder", "unet", "vae"]
37
+
38
+ def handle_additional_kwargs(self, **kwargs):
39
+ compiled_num_frames = self.unet.rbln_config.num_frames
40
+ if compiled_num_frames is not None:
41
+ kwargs["num_frames"] = compiled_num_frames
42
+
43
+ compiled_decode_chunk_size = self.vae.rbln_config.decode_chunk_size
44
+ if compiled_decode_chunk_size is not None:
45
+ kwargs["decode_chunk_size"] = compiled_decode_chunk_size
46
+ return kwargs
optimum/rbln/modeling.py CHANGED
@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, get_args, ge
19
19
  import rebel
20
20
  import torch
21
21
  from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
22
- from transformers import AutoConfig, PretrainedConfig
22
+ from transformers import PretrainedConfig
23
23
  from transformers.modeling_outputs import BaseModelOutput
24
24
 
25
25
  from .configuration_utils import DEFAULT_COMPILED_MODEL_NAME, RBLNModelConfig
@@ -35,8 +35,6 @@ logger = get_logger(__name__)
35
35
 
36
36
 
37
37
  class RBLNModel(RBLNBaseModel):
38
- _output_class = None
39
-
40
38
  @classmethod
41
39
  def update_kwargs(cls, kwargs):
42
40
  # Update user-given kwargs to get proper pytorch model.
@@ -56,13 +54,16 @@ class RBLNModel(RBLNBaseModel):
56
54
  pass
57
55
 
58
56
  @classmethod
59
- def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
57
+ def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
60
58
  # Wrap the model if needed.
61
59
  return model
62
60
 
63
61
  @classmethod
64
62
  def get_compiled_model(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
65
- model = cls.wrap_model_if_needed(model, rbln_config)
63
+ if rbln_config._allow_no_compile_cfgs:
64
+ return {}
65
+
66
+ model = cls._wrap_model_if_needed(model, rbln_config)
66
67
  rbln_compile_config = rbln_config.compile_cfgs[0]
67
68
  compiled_model = cls.compile(
68
69
  model,
@@ -72,6 +73,22 @@ class RBLNModel(RBLNBaseModel):
72
73
  )
73
74
  return compiled_model
74
75
 
76
+ @classmethod
77
+ def _update_rbln_config(
78
+ cls,
79
+ preprocessors: Optional[Any],
80
+ model: Optional["PreTrainedModel"] = None,
81
+ model_config: Optional["PretrainedConfig"] = None,
82
+ rbln_config: Optional[RBLNModelConfig] = None,
83
+ ) -> RBLNModelConfig:
84
+ # Default implementation: return config as-is
85
+ # Subclasses should override to set compile_cfgs if needed
86
+ return rbln_config
87
+
88
+ @classmethod
89
+ def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
90
+ return model
91
+
75
92
  @classmethod
76
93
  def from_model(
77
94
  cls,
@@ -80,18 +97,20 @@ class RBLNModel(RBLNBaseModel):
80
97
  rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
81
98
  model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
82
99
  subfolder: str = "",
83
- **kwargs: Dict[str, Any],
100
+ **kwargs: Any,
84
101
  ) -> "RBLNModel":
85
102
  """
86
103
  Converts and compiles a pre-trained HuggingFace library model into a RBLN model.
87
104
  This method performs the actual model conversion and compilation process.
88
105
 
89
106
  Args:
90
- model: The PyTorch model to be compiled. The object must be an instance of the HuggingFace transformers PreTrainedModel class.
91
- rbln_config: Configuration for RBLN model compilation and runtime. This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
107
+ model (PreTrainedModel): The PyTorch model to be compiled.
108
+ The object must be an instance of the HuggingFace transformers PreTrainedModel class.
109
+ config (Optional[PretrainedConfig]): The configuration object associated with the model.
110
+ rbln_config (Optional[Union[RBLNModelConfig, Dict]]): Configuration for RBLN model compilation and runtime.
111
+ This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
92
112
  For detailed configuration options, see the specific model's configuration class documentation.
93
-
94
- kwargs: Additional keyword arguments. Arguments with the prefix 'rbln_' are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
113
+ kwargs: Additional keyword arguments. Arguments with the prefix `rbln_` are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
95
114
 
96
115
  The method performs the following steps:
97
116
 
@@ -101,8 +120,10 @@ class RBLNModel(RBLNBaseModel):
101
120
  4. Saves the compiled model and configurations
102
121
 
103
122
  Returns:
104
- A RBLN model instance ready for inference on RBLN NPU devices.
123
+ (RBLNModel): A RBLN model instance ready for inference on RBLN NPU devices.
105
124
  """
125
+
126
+ model = cls._reconstruct_model_if_needed(model)
106
127
  preprocessors = kwargs.pop("preprocessors", [])
107
128
  rbln_config, kwargs = cls.prepare_rbln_config(rbln_config=rbln_config, **kwargs)
108
129
 
@@ -121,9 +142,6 @@ class RBLNModel(RBLNBaseModel):
121
142
  # Save configs
122
143
  if config is None:
123
144
  config = model.config
124
- # remote_config
125
- if hasattr(config, "auto_map") and "AutoConfig" in config.auto_map:
126
- config = AutoConfig.from_pretrained(config._name_or_path, **kwargs)
127
145
 
128
146
  if hasattr(model, "can_generate") and model.can_generate():
129
147
  import json
@@ -149,6 +167,7 @@ class RBLNModel(RBLNBaseModel):
149
167
  model=model,
150
168
  model_save_dir=save_dir,
151
169
  rbln_config=rbln_config,
170
+ preprocessors=preprocessors,
152
171
  **kwargs,
153
172
  )
154
173
  else:
@@ -211,6 +230,7 @@ class RBLNModel(RBLNBaseModel):
211
230
  **kwargs,
212
231
  ) -> "PreTrainedModel":
213
232
  kwargs = cls.update_kwargs(kwargs)
233
+
214
234
  return cls.get_hf_class().from_pretrained(
215
235
  model_id,
216
236
  subfolder=subfolder,
@@ -229,6 +249,9 @@ class RBLNModel(RBLNBaseModel):
229
249
  compiled_models: List[rebel.RBLNCompiledModel],
230
250
  rbln_config: RBLNModelConfig,
231
251
  ) -> List[rebel.Runtime]:
252
+ if len(rbln_config.compile_cfgs) == 0:
253
+ return []
254
+
232
255
  if DEFAULT_COMPILED_MODEL_NAME not in rbln_config.device_map:
233
256
  cls._raise_missing_compiled_file_error([DEFAULT_COMPILED_MODEL_NAME])
234
257
 
@@ -238,35 +261,38 @@ class RBLNModel(RBLNBaseModel):
238
261
  tensor_type="pt",
239
262
  device=rbln_config.device_map[DEFAULT_COMPILED_MODEL_NAME],
240
263
  activate_profiler=rbln_config.activate_profiler,
264
+ timeout=rbln_config.timeout,
241
265
  )
242
266
  for compiled_model in compiled_models
243
267
  ]
244
268
 
245
- def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Dict[str, Any]) -> Any:
269
+ def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Any) -> Any:
246
270
  """
247
- Defines the forward pass of the RBLN model, providing a drop-in replacement for HuggingFace PreTrainedModel.
271
+ Defines the forward pass of `RBLNModel`. The interface mirrors HuggingFace conventions so it can act as a drop-in
272
+ replacement in many cases.
248
273
 
249
- This method executes the compiled RBLN model on RBLN NPU devices while maintaining full compatibility
250
- with HuggingFace transformers and diffusers APIs. The RBLNModel can be used as a direct substitute
251
- for any HuggingFace nn.Module/PreTrainedModel, enabling seamless integration into existing workflows.
274
+ This method executes the compiled RBLN model on RBLN NPU devices while remaining fully compatible with Hugging Face
275
+ Transformers and Diffusers APIs. In practice, `RBLNModel` can replace models built on `torch.nn.Module` — including
276
+ `transformers.PreTrainedModel` implementations and Diffusers components based on `diffusers.ModelMixin` enabling
277
+ seamless integration into existing workflows.
252
278
 
253
279
  Args:
254
- *args: Variable length argument list containing model inputs. The format matches the original
280
+ args: Variable length argument list containing model inputs. The format matches the original
255
281
  HuggingFace model's forward method signature (e.g., input_ids, attention_mask for
256
282
  transformers models, or sample, timestep for diffusers models).
257
283
  return_dict:
258
284
  Whether to return outputs as a dictionary-like object or as a tuple. When `None`:
259
285
  - For transformers models: Uses `self.config.use_return_dict` (typically `True`)
260
286
  - For diffusers models: Defaults to `True`
261
- **kwargs: Arbitrary keyword arguments containing additional model inputs and parameters,
287
+ kwargs: Arbitrary keyword arguments containing additional model inputs and parameters,
262
288
  matching the original HuggingFace model's interface.
263
289
 
264
290
  Returns:
265
291
  Model outputs in the same format as the original HuggingFace model.
266
292
 
267
- - If `return_dict=True`: Returns a dictionary-like object (e.g., BaseModelOutput,
293
+ If `return_dict=True`, Returns a dictionary-like object (e.g., BaseModelOutput,
268
294
  CausalLMOutput) with named fields such as `logits`, `hidden_states`, etc.
269
- - If `return_dict=False`: Returns a tuple containing the raw model outputs.
295
+ If `return_dict=False`, Returns a tuple containing the raw model outputs.
270
296
 
271
297
  Note:
272
298
  - This method maintains the exact same interface as the original HuggingFace model's forward method
@@ -288,7 +314,7 @@ class RBLNModel(RBLNBaseModel):
288
314
  @classmethod
289
315
  def get_hf_output_class(cls):
290
316
  # Dynamically gets the output class from the corresponding HuggingFace model class.
291
- if cls._output_class:
317
+ if "_output_class" in cls.__dict__ and cls._output_class is not None:
292
318
  return cls._output_class
293
319
 
294
320
  hf_class = cls.get_hf_class()