optimum-rbln 0.8.1a6__py3-none-any.whl → 0.8.1a7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. optimum/rbln/__version__.py +2 -2
  2. optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +0 -4
  3. optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +0 -2
  4. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +0 -4
  5. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +0 -4
  6. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +0 -4
  7. optimum/rbln/diffusers/modeling_diffusers.py +16 -18
  8. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +6 -1
  9. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +13 -3
  10. optimum/rbln/diffusers/models/autoencoders/vq_model.py +6 -1
  11. optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +70 -14
  12. optimum/rbln/modeling.py +38 -2
  13. optimum/rbln/modeling_base.py +18 -2
  14. optimum/rbln/transformers/modeling_generic.py +3 -3
  15. optimum/rbln/transformers/models/bart/configuration_bart.py +12 -2
  16. optimum/rbln/transformers/models/bart/modeling_bart.py +16 -7
  17. optimum/rbln/transformers/models/bert/configuration_bert.py +18 -3
  18. optimum/rbln/transformers/models/bert/modeling_bert.py +24 -0
  19. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +13 -1
  20. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +15 -0
  21. optimum/rbln/transformers/models/clip/configuration_clip.py +12 -2
  22. optimum/rbln/transformers/models/clip/modeling_clip.py +27 -1
  23. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +22 -20
  24. optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +6 -1
  25. optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +8 -0
  26. optimum/rbln/transformers/models/dpt/configuration_dpt.py +6 -1
  27. optimum/rbln/transformers/models/dpt/modeling_dpt.py +6 -1
  28. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +5 -3
  29. optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +8 -0
  30. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +16 -0
  31. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +8 -0
  32. optimum/rbln/transformers/models/resnet/configuration_resnet.py +6 -1
  33. optimum/rbln/transformers/models/resnet/modeling_resnet.py +5 -1
  34. optimum/rbln/transformers/models/roberta/configuration_roberta.py +12 -2
  35. optimum/rbln/transformers/models/roberta/modeling_roberta.py +16 -0
  36. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +6 -2
  37. optimum/rbln/transformers/models/siglip/configuration_siglip.py +7 -0
  38. optimum/rbln/transformers/models/siglip/modeling_siglip.py +7 -0
  39. optimum/rbln/transformers/models/t5/configuration_t5.py +12 -2
  40. optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +7 -0
  41. optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +6 -2
  42. optimum/rbln/transformers/models/vit/configuration_vit.py +6 -1
  43. optimum/rbln/transformers/models/vit/modeling_vit.py +7 -1
  44. optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +7 -0
  45. optimum/rbln/transformers/models/whisper/configuration_whisper.py +7 -0
  46. optimum/rbln/transformers/models/whisper/modeling_whisper.py +6 -2
  47. optimum/rbln/utils/runtime_utils.py +46 -1
  48. {optimum_rbln-0.8.1a6.dist-info → optimum_rbln-0.8.1a7.dist-info}/METADATA +1 -1
  49. {optimum_rbln-0.8.1a6.dist-info → optimum_rbln-0.8.1a7.dist-info}/RECORD +51 -51
  50. {optimum_rbln-0.8.1a6.dist-info → optimum_rbln-0.8.1a7.dist-info}/WHEEL +0 -0
  51. {optimum_rbln-0.8.1a6.dist-info → optimum_rbln-0.8.1a7.dist-info}/licenses/LICENSE +0 -0
@@ -43,6 +43,13 @@ class _TextEncoder(torch.nn.Module):
43
43
 
44
44
 
45
45
  class RBLNCLIPTextModel(RBLNModel):
46
+ """
47
+ RBLN optimized CLIP text encoder model.
48
+
49
+ This class provides hardware-accelerated inference for CLIP text encoders
50
+ on RBLN devices, supporting text encoding for multimodal tasks.
51
+ """
52
+
46
53
  @classmethod
47
54
  def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPTextModelConfig) -> torch.nn.Module:
48
55
  return _TextEncoder(model).eval()
@@ -95,7 +102,12 @@ class RBLNCLIPTextModel(RBLNModel):
95
102
 
96
103
 
97
104
  class RBLNCLIPTextModelWithProjection(RBLNCLIPTextModel):
98
- pass
105
+ """
106
+ RBLN optimized CLIP text encoder model with projection layer.
107
+
108
+ This class extends RBLNCLIPTextModel with a projection layer for
109
+ multimodal embedding alignment tasks.
110
+ """
99
111
 
100
112
 
101
113
  class _VisionEncoder(torch.nn.Module):
@@ -109,6 +121,13 @@ class _VisionEncoder(torch.nn.Module):
109
121
 
110
122
 
111
123
  class RBLNCLIPVisionModel(RBLNModel):
124
+ """
125
+ RBLN optimized CLIP vision encoder model.
126
+
127
+ This class provides hardware-accelerated inference for CLIP vision encoders
128
+ on RBLN devices, supporting image encoding for multimodal tasks.
129
+ """
130
+
112
131
  @classmethod
113
132
  def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPVisionModelConfig) -> torch.nn.Module:
114
133
  return _VisionEncoder(model).eval()
@@ -182,6 +201,13 @@ class RBLNCLIPVisionModel(RBLNModel):
182
201
 
183
202
 
184
203
  class RBLNCLIPVisionModelWithProjection(RBLNCLIPVisionModel):
204
+ """
205
+ RBLN optimized CLIP vision encoder model with projection layer.
206
+
207
+ This class extends RBLNCLIPVisionModel with a projection layer for
208
+ multimodal embedding alignment tasks.
209
+ """
210
+
185
211
  def forward(
186
212
  self,
187
213
  pixel_values: Optional[torch.FloatTensor] = None,
@@ -78,7 +78,7 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
78
78
  torch.ones(1, 1, self.rbln_config.prefill_chunk_size, self.rbln_config.prefill_chunk_size), diagonal=1
79
79
  )
80
80
 
81
- def get_block_tables(self, cache_position: torch.Tensor, batch_idx: int = None):
81
+ def get_block_tables(self, cache_position: torch.Tensor, batch_idx: int = None) -> torch.Tensor:
82
82
  """
83
83
  Manages and returns the KV cache block tables.
84
84
  Updates the block tables based on the given cache_position, allocating new blocks or reusing existing ones as needed.
@@ -88,7 +88,7 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
88
88
  batch_idx (int, optional): Specific batch index, used when phase is 'prefill'.
89
89
 
90
90
  Returns:
91
- torch.Tensor: Updated block tables.
91
+ Updated block tables.
92
92
  """
93
93
 
94
94
  NO_BLOCKS_ERROR = (
@@ -458,6 +458,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
458
458
  This class serves as the foundation for various decoder-only architectures like GPT, LLaMA, etc.
459
459
 
460
460
  The class provides core functionality for:
461
+
461
462
  1. Converting pre-trained transformer models to RBLN-optimized format
462
463
  2. Handling the compilation process for RBLN devices
463
464
  3. Managing inference operations for causal language modeling
@@ -532,7 +533,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
532
533
  @classmethod
533
534
  def save_torch_artifacts(
534
535
  cls,
535
- model: "PreTrainedModel",
536
+ model: PreTrainedModel,
536
537
  save_dir_path: Path,
537
538
  subfolder: str,
538
539
  rbln_config: RBLNDecoderOnlyModelForCausalLMConfig,
@@ -566,7 +567,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
566
567
  def get_quantized_model(
567
568
  cls,
568
569
  model_id: str,
569
- config: Optional["PretrainedConfig"] = None,
570
+ config: Optional[PretrainedConfig] = None,
570
571
  use_auth_token: Optional[Union[bool, str]] = None,
571
572
  revision: Optional[str] = None,
572
573
  force_download: bool = False,
@@ -605,16 +606,15 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
605
606
  return model
606
607
 
607
608
  def __getattr__(self, __name: str) -> Any:
608
- """
609
- Special method to delegate attribute access to the original Huggingface LM class.
610
- This method is called when an attribute is not found in the current instance's dictionary.
611
- It enables transparent access to the original model's attributes and methods while maintaining
612
- proper method binding.
613
-
614
- The method implements a delegation pattern that:
615
- 1. For methods: Creates a wrapper that properly binds 'self' to method calls
616
- 2. For other attributes: Returns them directly from the original class
617
- """
609
+ # Special method to delegate attribute access to the original Huggingface LM class.
610
+ # This method is called when an attribute is not found in the current instance's dictionary.
611
+ # It enables transparent access to the original model's attributes and methods while maintaining
612
+ # proper method binding.
613
+
614
+ # The method implements a delegation pattern that:
615
+
616
+ # 1. For methods: Creates a wrapper that properly binds 'self' to method calls
617
+ # 2. For other attributes: Returns them directly from the original class
618
618
 
619
619
  def redirect(func):
620
620
  return lambda *pargs, **kwargs: func(self, *pargs, **kwargs)
@@ -627,7 +627,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
627
627
  @classmethod
628
628
  def get_pytorch_model(
629
629
  cls, *args, rbln_config: Optional[RBLNDecoderOnlyModelForCausalLMConfig] = None, **kwargs
630
- ) -> "PreTrainedModel":
630
+ ) -> PreTrainedModel:
631
631
  if rbln_config and rbln_config.quantization:
632
632
  model = cls.get_quantized_model(*args, **kwargs)
633
633
  else:
@@ -636,7 +636,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
636
636
  return model
637
637
 
638
638
  @classmethod
639
- def wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: "RBLNDecoderOnlyModelForCausalLMConfig"):
639
+ def wrap_model_if_needed(cls, model: PreTrainedModel, rbln_config: "RBLNDecoderOnlyModelForCausalLMConfig"):
640
640
  wrapper_cfg = {
641
641
  "max_seq_len": rbln_config.max_seq_len,
642
642
  "attn_impl": rbln_config.attn_impl,
@@ -654,7 +654,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
654
654
 
655
655
  @classmethod
656
656
  @torch.inference_mode()
657
- def get_compiled_model(cls, model: "PreTrainedModel", rbln_config: RBLNDecoderOnlyModelForCausalLMConfig):
657
+ def get_compiled_model(cls, model: PreTrainedModel, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig):
658
658
  wrapped_model = cls.wrap_model_if_needed(model, rbln_config)
659
659
 
660
660
  rbln_compile_configs = rbln_config.compile_cfgs
@@ -679,9 +679,11 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
679
679
  quantization.maybe_set_quantization_env()
680
680
  original_linear = torch.nn.functional.linear
681
681
  torch.nn.functional.linear = torch.ops.rbln_custom_ops.linear
682
- compiled_model = RBLNModel.compile(
682
+ compiled_model = cls.compile(
683
683
  wrapped_model,
684
684
  compile_config,
685
+ create_runtimes=rbln_config.create_runtimes,
686
+ device=rbln_config.device,
685
687
  example_inputs=example_inputs,
686
688
  compile_context=compile_context,
687
689
  )
@@ -973,8 +975,8 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
973
975
  def _update_rbln_config(
974
976
  cls,
975
977
  preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]] = None,
976
- model: Optional["PreTrainedModel"] = None,
977
- model_config: Optional["PretrainedConfig"] = None,
978
+ model: Optional[PreTrainedModel] = None,
979
+ model_config: Optional[PretrainedConfig] = None,
978
980
  rbln_config: Optional[RBLNDecoderOnlyModelForCausalLMConfig] = None,
979
981
  ) -> RBLNDecoderOnlyModelForCausalLMConfig:
980
982
  if rbln_config.max_seq_len is None:
@@ -16,4 +16,9 @@ from ...configuration_generic import RBLNModelForQuestionAnsweringConfig
16
16
 
17
17
 
18
18
  class RBLNDistilBertForQuestionAnsweringConfig(RBLNModelForQuestionAnsweringConfig):
19
- ""
19
+ """
20
+ Configuration class for RBLNDistilBertForQuestionAnswering.
21
+
22
+ This configuration class stores the configuration parameters specific to
23
+ RBLN-optimized DistilBERT models for question answering tasks.
24
+ """
@@ -16,4 +16,12 @@ from ...modeling_generic import RBLNModelForQuestionAnswering
16
16
 
17
17
 
18
18
  class RBLNDistilBertForQuestionAnswering(RBLNModelForQuestionAnswering):
19
+ """
20
+ RBLN optimized DistilBERT model for question answering tasks.
21
+
22
+ This class provides hardware-accelerated inference for DistilBERT models
23
+ on RBLN devices, supporting extractive question answering tasks where
24
+ the model predicts start and end positions of answers in text.
25
+ """
26
+
19
27
  rbln_model_input_names = ["input_ids", "attention_mask"]
@@ -16,4 +16,9 @@ from ...configuration_generic import RBLNModelForDepthEstimationConfig
16
16
 
17
17
 
18
18
  class RBLNDPTForDepthEstimationConfig(RBLNModelForDepthEstimationConfig):
19
- pass
19
+ """
20
+ Configuration class for RBLNDPTForDepthEstimation.
21
+
22
+ This configuration class stores the configuration parameters specific to
23
+ RBLN-optimized DPT (Dense Prediction Transformer) models for depth estimation tasks.
24
+ """
@@ -17,4 +17,9 @@ from ...modeling_generic import RBLNModelForDepthEstimation
17
17
 
18
18
 
19
19
  class RBLNDPTForDepthEstimation(RBLNModelForDepthEstimation):
20
- pass
20
+ """
21
+ RBLN optimized DPT model for depth estimation tasks.
22
+
23
+ This class provides hardware-accelerated inference for DPT (Dense Prediction Transformer)
24
+ models on RBLN devices, supporting monocular depth estimation from single images.
25
+ """
@@ -326,7 +326,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
326
326
  attention_mask: torch.Tensor,
327
327
  position_ids: torch.Tensor,
328
328
  token_type_ids: Optional[torch.Tensor] = None,
329
- ):
329
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, int, torch.Tensor]:
330
330
  """
331
331
  Pads inputs, attention_mask, and position_ids so image token groups (256 tokens with token_type_ids == 1)
332
332
  start at multiples of prefill_chunk_size (256). Returns padded tensors and total padded length.
@@ -338,7 +338,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
338
338
  token_type_ids: (1, seq_len) tensor, 0 for text, 1 for image.
339
339
 
340
340
  Returns:
341
- Tuple: (inputs_padded, attention_mask_padded, position_ids_padded, padded_len, token_type_ids_padded).
341
+ (inputs_padded, attention_mask_padded, position_ids_padded, padded_len, token_type_ids_padded).
342
342
  """
343
343
 
344
344
  if token_type_ids is None:
@@ -816,9 +816,11 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
816
816
  quantization.maybe_set_quantization_env()
817
817
  original_linear = torch.nn.functional.linear
818
818
  torch.nn.functional.linear = torch.ops.rbln_custom_ops.linear
819
- compiled_model = RBLNModel.compile(
819
+ compiled_model = cls.compile(
820
820
  wrapped_model,
821
821
  compile_config,
822
+ create_runtimes=rbln_config.create_runtimes,
823
+ device=rbln_config.device,
822
824
  example_inputs=example_inputs,
823
825
  compile_context=compile_context,
824
826
  )
@@ -18,6 +18,14 @@ from ....configuration_utils import RBLNModelConfig
18
18
 
19
19
 
20
20
  class RBLNLlavaNextForConditionalGenerationConfig(RBLNModelConfig):
21
+ """
22
+ Configuration class for RBLNLlavaNextForConditionalGeneration.
23
+
24
+ This configuration class stores the configuration parameters specific to
25
+ RBLN-optimized LLaVA-Next models for multimodal conditional generation tasks
26
+ that combine vision and language processing capabilities.
27
+ """
28
+
21
29
  submodules = ["vision_tower", "language_model"]
22
30
 
23
31
  def __init__(
@@ -19,6 +19,14 @@ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausa
19
19
 
20
20
 
21
21
  class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausalLMConfig):
22
+ """
23
+ Configuration class for RBLNQwen2_5_VLForConditionalGeneration.
24
+
25
+ This configuration class stores the configuration parameters specific to
26
+ RBLN-optimized Qwen2.5-VL models for multimodal conditional generation tasks
27
+ that combine vision and language processing capabilities.
28
+ """
29
+
22
30
  submodules = ["visual"]
23
31
 
24
32
  def __init__(
@@ -37,6 +45,14 @@ class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausal
37
45
 
38
46
 
39
47
  class RBLNQwen2_5_VisionTransformerPretrainedModelConfig(RBLNModelConfig):
48
+ """
49
+ Configuration class for RBLNQwen2_5_VisionTransformerPretrainedModel.
50
+
51
+ This configuration class stores the configuration parameters specific to
52
+ RBLN-optimized Qwen2.5-VL vision transformer models with window-based attention
53
+ mechanisms for processing images and videos.
54
+ """
55
+
40
56
  def __init__(self, max_seq_lens: Union[int, List[int]] = None, **kwargs: Dict[str, Any]):
41
57
  """
42
58
  Args:
@@ -54,6 +54,14 @@ if TYPE_CHECKING:
54
54
 
55
55
 
56
56
  class RBLNQwen2_5_VisionTransformerPretrainedModel(RBLNModel):
57
+ """
58
+ RBLN optimized Qwen2.5-VL vision transformer model.
59
+
60
+ This class provides hardware-accelerated inference for Qwen2.5-VL vision transformers
61
+ on RBLN devices, supporting image and video encoding for multimodal vision-language tasks
62
+ with window-based attention mechanisms.
63
+ """
64
+
57
65
  auto_model_class = None
58
66
 
59
67
  def __post_init__(self, **kwargs):
@@ -17,4 +17,9 @@ from ...configuration_generic import RBLNModelForImageClassificationConfig
17
17
 
18
18
 
19
19
  class RBLNResNetForImageClassificationConfig(RBLNModelForImageClassificationConfig):
20
- ""
20
+ """
21
+ Configuration class for RBLNResNetForImageClassification.
22
+
23
+ This configuration class stores the configuration parameters specific to
24
+ RBLN-optimized ResNet models for image classification tasks.
25
+ """
@@ -18,5 +18,9 @@ from ...modeling_generic import RBLNModelForImageClassification
18
18
 
19
19
  class RBLNResNetForImageClassification(RBLNModelForImageClassification):
20
20
  """
21
- ResNet model for image classification tasks on RBLN NPU.
21
+ RBLN optimized ResNet model for image classification tasks.
22
+
23
+ This class provides hardware-accelerated inference for ResNet models
24
+ on RBLN devices, supporting image classification with convolutional neural networks
25
+ designed for computer vision tasks.
22
26
  """
@@ -16,8 +16,18 @@ from ...configuration_generic import RBLNModelForMaskedLMConfig, RBLNModelForSeq
16
16
 
17
17
 
18
18
  class RBLNRobertaForMaskedLMConfig(RBLNModelForMaskedLMConfig):
19
- ""
19
+ """
20
+ Configuration class for RBLNRobertaForMaskedLM.
21
+
22
+ This configuration class stores the configuration parameters specific to
23
+ RBLN-optimized RoBERTa models for masked language modeling tasks.
24
+ """
20
25
 
21
26
 
22
27
  class RBLNRobertaForSequenceClassificationConfig(RBLNModelForSequenceClassificationConfig):
23
- ""
28
+ """
29
+ Configuration class for RBLNRobertaForSequenceClassification.
30
+
31
+ This configuration class stores the configuration parameters specific to
32
+ RBLN-optimized RoBERTa models for sequence classification tasks.
33
+ """
@@ -16,8 +16,24 @@ from ...modeling_generic import RBLNModelForMaskedLM, RBLNModelForSequenceClassi
16
16
 
17
17
 
18
18
  class RBLNRobertaForMaskedLM(RBLNModelForMaskedLM):
19
+ """
20
+ RBLN optimized RoBERTa model for masked language modeling tasks.
21
+
22
+ This class provides hardware-accelerated inference for RoBERTa models
23
+ on RBLN devices, supporting masked language modeling tasks such as
24
+ token prediction and text completion.
25
+ """
26
+
19
27
  rbln_model_input_names = ["input_ids", "attention_mask"]
20
28
 
21
29
 
22
30
  class RBLNRobertaForSequenceClassification(RBLNModelForSequenceClassification):
31
+ """
32
+ RBLN optimized RoBERTa model for sequence classification tasks.
33
+
34
+ This class provides hardware-accelerated inference for RoBERTa models
35
+ on RBLN devices, supporting text classification tasks such as sentiment analysis,
36
+ topic classification, and other sequence-level prediction tasks.
37
+ """
38
+
23
39
  rbln_model_input_names = ["input_ids", "attention_mask"]
@@ -161,16 +161,20 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
161
161
  if "key_value_states" in name:
162
162
  context.mark_static_address(tensor)
163
163
 
164
- compiled_encoder = super().compile(
164
+ compiled_encoder = cls.compile(
165
165
  wrapped_model.encoder,
166
166
  enc_compile_config,
167
+ create_runtimes=rbln_config.create_runtimes,
168
+ device=rbln_config.device,
167
169
  example_inputs=enc_example_inputs,
168
170
  compile_context=context,
169
171
  )
170
172
 
171
- compiled_decoder = super().compile(
173
+ compiled_decoder = cls.compile(
172
174
  wrapped_model.decoder,
173
175
  dec_compile_config,
176
+ create_runtimes=rbln_config.create_runtimes,
177
+ device=rbln_config.device,
174
178
  example_inputs=dec_example_inputs,
175
179
  compile_context=context,
176
180
  )
@@ -18,6 +18,13 @@ from ....configuration_utils import RBLNModelConfig
18
18
 
19
19
 
20
20
  class RBLNSiglipVisionModelConfig(RBLNModelConfig):
21
+ """
22
+ Configuration class for RBLNSiglipVisionModel.
23
+
24
+ This configuration class stores the configuration parameters specific to
25
+ RBLN-optimized SigLIP vision models for image encoding in multimodal tasks.
26
+ """
27
+
21
28
  def __init__(
22
29
  self,
23
30
  batch_size: Optional[int] = None,
@@ -58,6 +58,13 @@ class _SiglipVisionModel(torch.nn.Module):
58
58
 
59
59
 
60
60
  class RBLNSiglipVisionModel(RBLNModel):
61
+ """
62
+ RBLN optimized SigLIP vision model.
63
+
64
+ This class provides hardware-accelerated inference for SigLIP vision models
65
+ on RBLN devices, supporting image encoding for multimodal vision-language tasks.
66
+ """
67
+
61
68
  @classmethod
62
69
  def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNSiglipVisionModelConfig) -> torch.nn.Module:
63
70
  wrapper_cfg = {
@@ -17,8 +17,18 @@ from ..seq2seq import RBLNModelForSeq2SeqLMConfig
17
17
 
18
18
 
19
19
  class RBLNT5EncoderModelConfig(RBLNTransformerEncoderForFeatureExtractionConfig):
20
- pass
20
+ """
21
+ Configuration class for RBLNT5EncoderModel.
22
+
23
+ This configuration class stores the configuration parameters specific to
24
+ RBLN-optimized T5 encoder models for feature extraction tasks.
25
+ """
21
26
 
22
27
 
23
28
  class RBLNT5ForConditionalGenerationConfig(RBLNModelForSeq2SeqLMConfig):
24
- pass
29
+ """
30
+ Configuration class for RBLNT5ForConditionalGeneration.
31
+
32
+ This configuration class stores the configuration parameters specific to
33
+ RBLN-optimized T5 models for conditional text generation tasks.
34
+ """
@@ -4,6 +4,13 @@ from ....configuration_utils import RBLNModelConfig
4
4
 
5
5
 
6
6
  class RBLNTimeSeriesTransformerForPredictionConfig(RBLNModelConfig):
7
+ """
8
+ Configuration class for RBLNTimeSeriesTransformerForPrediction.
9
+
10
+ This configuration class stores the configuration parameters specific to
11
+ RBLN-optimized Time Series Transformer models for time series forecasting tasks.
12
+ """
13
+
7
14
  def __init__(
8
15
  self,
9
16
  batch_size: Optional[int] = None,
@@ -194,15 +194,19 @@ class RBLNTimeSeriesTransformerForPrediction(RBLNModel):
194
194
  if "key_value_states" in name:
195
195
  context.mark_static_address(tensor)
196
196
 
197
- compiled_decoder = super().compile(
197
+ compiled_decoder = cls.compile(
198
198
  wrapped_model.decoder,
199
199
  dec_compile_config,
200
+ create_runtimes=rbln_config.create_runtimes,
201
+ device=rbln_config.device,
200
202
  example_inputs=dec_example_inputs,
201
203
  compile_context=context,
202
204
  )
203
- compiled_encoder = super().compile(
205
+ compiled_encoder = cls.compile(
204
206
  wrapped_model.encoder,
205
207
  enc_compile_config,
208
+ create_runtimes=rbln_config.create_runtimes,
209
+ device=rbln_config.device,
206
210
  example_inputs=enc_example_inputs,
207
211
  compile_context=context,
208
212
  )
@@ -16,4 +16,9 @@ from ...configuration_generic import RBLNModelForImageClassificationConfig
16
16
 
17
17
 
18
18
  class RBLNViTForImageClassificationConfig(RBLNModelForImageClassificationConfig):
19
- ""
19
+ """
20
+ Configuration class for RBLNViTForImageClassification.
21
+
22
+ This configuration class stores the configuration parameters specific to
23
+ RBLN-optimized Vision Transformer (ViT) models for image classification tasks.
24
+ """
@@ -16,4 +16,10 @@ from ...modeling_generic import RBLNModelForImageClassification
16
16
 
17
17
 
18
18
  class RBLNViTForImageClassification(RBLNModelForImageClassification):
19
- ""
19
+ """
20
+ RBLN optimized Vision Transformer (ViT) model for image classification tasks.
21
+
22
+ This class provides hardware-accelerated inference for Vision Transformer models
23
+ on RBLN devices, supporting image classification with transformer-based architectures
24
+ that process images as sequences of patches.
25
+ """
@@ -16,4 +16,11 @@ from ...configuration_generic import RBLNModelForMaskedLMConfig
16
16
 
17
17
 
18
18
  class RBLNWav2Vec2ForCTCConfig(RBLNModelForMaskedLMConfig):
19
+ """
20
+ Configuration class for RBLNWav2Vec2ForCTC.
21
+
22
+ This configuration class stores the configuration parameters specific to
23
+ RBLN-optimized Wav2Vec2 models for Connectionist Temporal Classification (CTC) tasks.
24
+ """
25
+
19
26
  rbln_model_input_names = ["input_values"]
@@ -24,6 +24,13 @@ logger = get_logger()
24
24
 
25
25
 
26
26
  class RBLNWhisperForConditionalGenerationConfig(RBLNModelConfig):
27
+ """
28
+ Configuration class for RBLNWhisperForConditionalGeneration.
29
+
30
+ This configuration class stores the configuration parameters specific to
31
+ RBLN-optimized Whisper models for speech recognition and transcription tasks.
32
+ """
33
+
27
34
  def __init__(
28
35
  self,
29
36
  batch_size: int = None,
@@ -230,15 +230,19 @@ class RBLNWhisperForConditionalGeneration(RBLNModel, RBLNWhisperGenerationMixin)
230
230
  if "key_value_states" in name:
231
231
  context.mark_static_address(tensor)
232
232
 
233
- compiled_encoder = super().compile(
233
+ compiled_encoder = cls.compile(
234
234
  wrapped_model.encoder,
235
235
  enc_compile_config,
236
+ create_runtimes=rbln_config.create_runtimes,
237
+ device=rbln_config.device,
236
238
  example_inputs=enc_example_inputs,
237
239
  compile_context=context,
238
240
  )
239
- compiled_decoder = super().compile(
241
+ compiled_decoder = cls.compile(
240
242
  wrapped_model.decoder,
241
243
  dec_compile_config,
244
+ create_runtimes=rbln_config.create_runtimes,
245
+ device=rbln_config.device,
242
246
  example_inputs=dec_example_inputs,
243
247
  compile_context=context,
244
248
  )
@@ -13,12 +13,57 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import threading
16
- from typing import Any, Dict, List
16
+ from typing import Any, Dict, List, Optional, Union
17
17
 
18
18
  import rebel
19
19
  import torch
20
20
 
21
21
 
22
+ def tp_and_devices_are_ok(
23
+ tensor_parallel_size: Optional[int] = None,
24
+ device: Optional[Union[int, List[int]]] = None,
25
+ npu: Optional[str] = None,
26
+ ) -> Optional[str]:
27
+ if tensor_parallel_size is None:
28
+ tensor_parallel_size = 1
29
+
30
+ if rebel.device_count() < tensor_parallel_size:
31
+ return (
32
+ f"Tensor parallel size {tensor_parallel_size} is greater than "
33
+ f"the number of available devices {rebel.device_count()}."
34
+ )
35
+
36
+ if device is None:
37
+ device = list(range(tensor_parallel_size))
38
+ elif isinstance(device, int):
39
+ device = [device]
40
+ elif isinstance(device, list):
41
+ if any(not isinstance(d, int) for d in device):
42
+ return "Device must be a(n) (list of) integer(s)."
43
+ if len(device) != tensor_parallel_size:
44
+ return (
45
+ f"The number of devices ({len(device)}) does not match tensor parallel size ({tensor_parallel_size})."
46
+ )
47
+ else:
48
+ return f"Invalid device: {device}"
49
+
50
+ for device_id in device:
51
+ if device_id < 0: # if any device is dummy device, skip it
52
+ return None
53
+ if rebel.get_npu_name(device_id) is None:
54
+ return (
55
+ f"Device {device_id} is not a valid NPU device. Please check your NPU status with 'rbln-stat' command."
56
+ )
57
+
58
+ if npu is not None:
59
+ for device_id in device:
60
+ npu_name = rebel.get_npu_name(device_id)
61
+ if npu_name != npu:
62
+ return f"Device {device_id} ({npu_name}) is not on the same NPU as {npu}."
63
+
64
+ return None
65
+
66
+
22
67
  class RBLNPytorchRuntime:
23
68
  mandatory_members = []
24
69
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.8.1a6
3
+ Version: 0.8.1a7
4
4
  Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai