optimum-rbln 0.9.3__py3-none-any.whl → 0.9.3rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. optimum/rbln/__init__.py +0 -12
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +2 -4
  4. optimum/rbln/diffusers/__init__.py +0 -12
  5. optimum/rbln/diffusers/configurations/__init__.py +0 -3
  6. optimum/rbln/diffusers/configurations/models/__init__.py +0 -2
  7. optimum/rbln/diffusers/configurations/pipelines/__init__.py +0 -3
  8. optimum/rbln/diffusers/models/__init__.py +3 -17
  9. optimum/rbln/diffusers/models/autoencoders/__init__.py +0 -1
  10. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +3 -3
  11. optimum/rbln/diffusers/models/autoencoders/vae.py +8 -27
  12. optimum/rbln/diffusers/models/controlnet.py +1 -16
  13. optimum/rbln/diffusers/models/transformers/prior_transformer.py +2 -16
  14. optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +1 -16
  15. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +1 -14
  16. optimum/rbln/diffusers/models/unets/__init__.py +0 -1
  17. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +1 -17
  18. optimum/rbln/diffusers/pipelines/__init__.py +0 -4
  19. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +0 -20
  20. optimum/rbln/modeling.py +45 -20
  21. optimum/rbln/modeling_base.py +1 -0
  22. optimum/rbln/transformers/configuration_generic.py +27 -0
  23. optimum/rbln/transformers/modeling_attention_utils.py +109 -242
  24. optimum/rbln/transformers/modeling_generic.py +61 -2
  25. optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +2 -28
  26. optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +5 -68
  27. optimum/rbln/transformers/models/bart/modeling_bart.py +2 -23
  28. optimum/rbln/transformers/models/bert/modeling_bert.py +1 -86
  29. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +15 -42
  30. optimum/rbln/transformers/models/clip/modeling_clip.py +2 -40
  31. optimum/rbln/transformers/models/colpali/modeling_colpali.py +44 -5
  32. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +1 -6
  33. optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +2 -6
  34. optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +9 -17
  35. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +12 -36
  36. optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py +0 -17
  37. optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +0 -24
  38. optimum/rbln/transformers/models/dpt/modeling_dpt.py +0 -17
  39. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +5 -3
  40. optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py +8 -24
  41. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +5 -3
  42. optimum/rbln/transformers/models/llava/modeling_llava.py +24 -36
  43. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +4 -2
  44. optimum/rbln/transformers/models/opt/modeling_opt.py +2 -2
  45. optimum/rbln/transformers/models/pegasus/modeling_pegasus.py +1 -1
  46. optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +1 -13
  47. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +3 -2
  48. optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +3 -2
  49. optimum/rbln/transformers/models/resnet/configuration_resnet.py +0 -17
  50. optimum/rbln/transformers/models/resnet/modeling_resnet.py +0 -73
  51. optimum/rbln/transformers/models/roberta/modeling_roberta.py +0 -33
  52. optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +4 -2
  53. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +10 -34
  54. optimum/rbln/transformers/models/siglip/modeling_siglip.py +1 -17
  55. optimum/rbln/transformers/models/swin/modeling_swin.py +1 -14
  56. optimum/rbln/transformers/models/t5/modeling_t5.py +2 -2
  57. optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +2 -16
  58. optimum/rbln/transformers/models/vit/modeling_vit.py +0 -19
  59. optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +3 -15
  60. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +8 -60
  61. optimum/rbln/transformers/models/whisper/generation_whisper.py +14 -48
  62. optimum/rbln/transformers/models/whisper/modeling_whisper.py +2 -2
  63. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +0 -43
  64. optimum/rbln/transformers/utils/rbln_quantization.py +0 -9
  65. optimum/rbln/utils/depreacate_utils.py +16 -0
  66. optimum/rbln/utils/hub.py +3 -14
  67. optimum/rbln/utils/runtime_utils.py +0 -32
  68. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.3rc0.dist-info}/METADATA +2 -2
  69. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.3rc0.dist-info}/RECORD +72 -79
  70. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.3rc0.dist-info}/WHEEL +1 -1
  71. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_temporal_decoder.py +0 -67
  72. optimum/rbln/diffusers/configurations/models/configuration_unet_spatio_temporal_condition.py +0 -59
  73. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_video_diffusion.py +0 -114
  74. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +0 -275
  75. optimum/rbln/diffusers/models/unets/unet_spatio_temporal_condition.py +0 -201
  76. optimum/rbln/diffusers/pipelines/stable_video_diffusion/__init__.py +0 -15
  77. optimum/rbln/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +0 -46
  78. optimum/rbln/utils/deprecation.py +0 -213
  79. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.3rc0.dist-info}/entry_points.txt +0 -0
  80. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.3rc0.dist-info}/licenses/LICENSE +0 -0
@@ -20,9 +20,8 @@ import rebel
20
20
  import torch
21
21
  from rebel.compile_context import CompileContext
22
22
  from transformers import AutoModelForSeq2SeqLM, PretrainedConfig, PreTrainedModel
23
- from transformers.generation.configuration_utils import GenerationConfig
24
23
  from transformers.generation.utils import GenerationMixin
25
- from transformers.modeling_outputs import BaseModelOutput, ModelOutput, Seq2SeqLMOutput
24
+ from transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput
26
25
 
27
26
  from ....configuration_utils import RBLNCompileConfig
28
27
  from ....modeling import RBLNModel
@@ -34,7 +33,7 @@ from .configuration_seq2seq import RBLNModelForSeq2SeqLMConfig
34
33
  logger = get_logger(__name__)
35
34
 
36
35
  if TYPE_CHECKING:
37
- from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PretrainedConfig
36
+ from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, GenerationConfig, PretrainedConfig
38
37
 
39
38
 
40
39
  class RBLNRuntimeEncoder(RBLNPytorchRuntime):
@@ -141,7 +140,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, GenerationMixin, ABC):
141
140
  @classmethod
142
141
  @torch.inference_mode()
143
142
  def get_compiled_model(cls, model: PreTrainedModel, rbln_config: RBLNModelForSeq2SeqLMConfig):
144
- wrapped_model = cls._wrap_model_if_needed(model, rbln_config)
143
+ wrapped_model = cls.wrap_model_if_needed(model, rbln_config)
145
144
 
146
145
  enc_compile_config = rbln_config.compile_cfgs[0]
147
146
  dec_compile_config = rbln_config.compile_cfgs[1]
@@ -222,6 +221,12 @@ class RBLNModelForSeq2SeqLM(RBLNModel, GenerationMixin, ABC):
222
221
  model_config, "max_position_embeddings", None
223
222
  )
224
223
 
224
+ pad_token_id = getattr(model_config, "pad_token_id", None)
225
+ pad_token_id = pad_token_id or getattr(model_config, "bos_token_id", None)
226
+ pad_token_id = pad_token_id or getattr(model_config, "eos_token_id", None)
227
+ pad_token_id = pad_token_id or -1
228
+ rbln_config.pad_token_id = pad_token_id
229
+
225
230
  if rbln_config.enc_max_seq_len is None:
226
231
  enc_max_seq_len = max_position_embeddings
227
232
  for tokenizer in preprocessors:
@@ -427,7 +432,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, GenerationMixin, ABC):
427
432
  inputs_tensor = torch.nn.functional.pad(
428
433
  inputs_tensor,
429
434
  (0, self.rbln_config.enc_max_seq_len - input_len),
430
- value=self.config.pad_token_id,
435
+ value=self.rbln_config.pad_token_id,
431
436
  )
432
437
  model_kwargs["attention_mask"] = torch.nn.functional.pad(
433
438
  model_kwargs["attention_mask"], (0, self.rbln_config.enc_max_seq_len - input_len)
@@ -446,32 +451,3 @@ class RBLNModelForSeq2SeqLM(RBLNModel, GenerationMixin, ABC):
446
451
  model_kwargs["encoder_outputs"] = encoder(**encoder_kwargs, block_tables=block_tables)
447
452
 
448
453
  return model_kwargs
449
-
450
- def generate(
451
- self,
452
- input_ids: torch.LongTensor,
453
- attention_mask: Optional[torch.LongTensor] = None,
454
- generation_config: Optional[GenerationConfig] = None,
455
- **kwargs,
456
- ) -> Union[ModelOutput, torch.LongTensor]:
457
- """
458
- The generate function is utilized in its standard form as in the HuggingFace transformers library. User can use this function to generate text from the model.
459
- Check the [HuggingFace transformers documentation](https://huggingface.co/docs/transformers/v4.57.1/en/main_classes/text_generation#transformers.GenerationMixin.generate) for more details.
460
-
461
- Args:
462
- input_ids (torch.LongTensor): The input ids to the model.
463
- attention_mask (torch.LongTensor, optional): The attention mask to the model.
464
- generation_config (GenerationConfig, optional): The generation configuration to be used as base parametrization for the generation call. **kwargs passed to generate matching the attributes of generation_config will override them.
465
- If generation_config is not provided, the default will be used, which had the following loading priority: 1) from the generation_config.json model file, if it exists; 2) from the model configuration.
466
- Please note that unspecified parameters will inherit [GenerationConfig](https://huggingface.co/docs/transformers/v4.57.1/en/main_classes/text_generation#transformers.GenerationConfig)’s default values.
467
- kwargs (dict[str, Any], optional): Additional arguments passed to the generate function. See the HuggingFace transformers documentation for more details.
468
-
469
- Returns:
470
- Generates sequences of token ids for models with a language modeling head.
471
- """
472
- if generation_config is not None:
473
- kwargs["generation_config"] = generation_config
474
- if attention_mask is not None:
475
- kwargs["attention_mask"] = attention_mask
476
-
477
- return super().generate(input_ids, **kwargs)
@@ -66,9 +66,7 @@ class RBLNSiglipVisionModel(RBLNModel):
66
66
  _tp_support = False
67
67
 
68
68
  @classmethod
69
- def _wrap_model_if_needed(
70
- cls, model: torch.nn.Module, rbln_config: RBLNSiglipVisionModelConfig
71
- ) -> torch.nn.Module:
69
+ def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNSiglipVisionModelConfig) -> torch.nn.Module:
72
70
  wrapper_cfg = {
73
71
  "interpolate_pos_encoding": rbln_config.interpolate_pos_encoding,
74
72
  "output_hidden_states": rbln_config.output_hidden_states,
@@ -124,20 +122,6 @@ class RBLNSiglipVisionModel(RBLNModel):
124
122
  interpolate_pos_encoding: bool = False,
125
123
  **kwargs: Any,
126
124
  ) -> Union[Tuple, BaseModelOutputWithPooling]:
127
- """
128
- Forward pass for the RBLN-optimized SigLIP vision model.
129
-
130
- Args:
131
- pixel_values (torch.FloatTensor of shape (batch_size, num_channels, image_size, image_size), optional): The tensors corresponding to the input images. Pixel values can be obtained using ViTImageProcessor. See ViTImageProcessor.call() for details (processor_class uses ViTImageProcessor for processing images).
132
- return_dict (bool, optional): Whether or not to return a ModelOutput instead of a plain tuple.
133
- output_attentions (bool, optional): Whether or not to return the attentions tensors of all attention layers. See attentions under returned tensors for more detail.
134
- output_hidden_states (bool, optional): Whether or not to return the hidden states of all layers. See hidden_states under returned tensors for more detail.
135
- interpolate_pos_encoding (bool, defaults to False): Whether to interpolate the pre-trained position encodings.
136
-
137
- Returns:
138
- The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutputWithPooling object.
139
- """
140
-
141
125
  output_attentions = output_attentions if output_attentions is not None else self.rbln_config.output_attentions
142
126
  output_hidden_states = (
143
127
  output_hidden_states if output_hidden_states is not None else self.rbln_config.output_hidden_states
@@ -203,7 +203,7 @@ class _SwinBackbone(torch.nn.Module):
203
203
 
204
204
  class RBLNSwinBackbone(RBLNModel):
205
205
  @classmethod
206
- def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNSwinBackboneConfig) -> torch.nn.Module:
206
+ def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNSwinBackboneConfig) -> torch.nn.Module:
207
207
  for layer in model.encoder.layers:
208
208
  for block in layer.blocks:
209
209
  block.get_attn_mask = types.MethodType(get_attn_mask, block)
@@ -278,19 +278,6 @@ class RBLNSwinBackbone(RBLNModel):
278
278
  output_hidden_states: bool = None,
279
279
  **kwargs,
280
280
  ) -> Union[Tuple, BackboneOutput]:
281
- """
282
- Forward pass for the RBLN-optimized Swin backbone model.
283
-
284
- Args:
285
- pixel_values (torch.FloatTensor of shape (batch_size, num_channels, image_size, image_size), optional): The tensors corresponding to the input images. Pixel values can be obtained using ViTImageProcessor. See ViTImageProcessor.call() for details (processor_class uses ViTImageProcessor for processing images).
286
- return_dict (bool, optional): Whether or not to return a ModelOutput instead of a plain tuple.
287
- output_attentions (bool, optional): Whether or not to return the attentions tensors of all attention layers. See attentions under returned tensors for more detail.
288
- output_hidden_states (bool, optional): Whether or not to return the hidden states of all layers. See hidden_states under returned tensors for more detail.
289
-
290
- Returns:
291
- The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BackboneOutput object.
292
- """
293
-
294
281
  if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
295
282
  logger.warning(
296
283
  f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__.__name__}."
@@ -68,7 +68,7 @@ class RBLNT5EncoderModel(RBLNTransformerEncoderForFeatureExtraction):
68
68
  output_class = BaseModelOutputWithPastAndCrossAttentions
69
69
 
70
70
  @classmethod
71
- def _wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNT5EncoderModelConfig):
71
+ def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNT5EncoderModelConfig):
72
72
  return T5EncoderWrapper(model)
73
73
 
74
74
  @classmethod
@@ -113,7 +113,7 @@ class RBLNT5ForConditionalGeneration(RBLNModelForSeq2SeqLM):
113
113
  support_causal_attn = False
114
114
 
115
115
  @classmethod
116
- def _wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNT5ForConditionalGenerationConfig):
116
+ def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNT5ForConditionalGenerationConfig):
117
117
  return T5Wrapper(
118
118
  model, enc_max_seq_len=rbln_config.enc_max_seq_len, dec_max_seq_len=rbln_config.dec_max_seq_len
119
119
  )
@@ -153,7 +153,7 @@ class RBLNTimeSeriesTransformerForPrediction(RBLNModel):
153
153
  return redirect(val)
154
154
 
155
155
  @classmethod
156
- def _wrap_model_if_needed(
156
+ def wrap_model_if_needed(
157
157
  self, model: "PreTrainedModel", rbln_config: RBLNTimeSeriesTransformerForPredictionConfig
158
158
  ):
159
159
  return TimeSeriesTransformersWrapper(model, rbln_config.num_parallel_samples)
@@ -161,7 +161,7 @@ class RBLNTimeSeriesTransformerForPrediction(RBLNModel):
161
161
  @classmethod
162
162
  @torch.inference_mode()
163
163
  def get_compiled_model(cls, model, rbln_config: RBLNTimeSeriesTransformerForPredictionConfig):
164
- wrapped_model = cls._wrap_model_if_needed(model, rbln_config)
164
+ wrapped_model = cls.wrap_model_if_needed(model, rbln_config)
165
165
 
166
166
  enc_compile_config = rbln_config.compile_cfgs[0]
167
167
  dec_compile_config = rbln_config.compile_cfgs[1]
@@ -353,20 +353,6 @@ class RBLNTimeSeriesTransformerForPrediction(RBLNModel):
353
353
  static_real_features: Optional[torch.Tensor] = None,
354
354
  **kwargs,
355
355
  ) -> SampleTSPredictionOutput:
356
- """
357
- Generate pass for the RBLN-optimized Time Series Transformer model for time series forecasting.
358
-
359
- Args:
360
- past_values (torch.FloatTensor of shape (batch_size, sequence_length) or (batch_size, sequence_length, input_size)): Past values of the time series, that serve as context in order to predict the future.
361
- past_time_features (torch.FloatTensor of shape (batch_size, sequence_length, num_features)): Required time features, which the model internally will add to past_values.
362
- future_time_features (torch.FloatTensor of shape (batch_size, prediction_length, num_features)): Required time features for the prediction window, which the model internally will add to future_values.
363
- past_observed_mask (torch.BoolTensor of shape (batch_size, sequence_length) or (batch_size, sequence_length, input_size), optional): Boolean mask to indicate which past_values were observed and which were missing.
364
- static_categorical_features (torch.LongTensor of shape (batch_size, number of static categorical features), optional): Optional static categorical features for which the model will learn an embedding, which it will add to the values of the time series.
365
- static_real_features (torch.FloatTensor of shape (batch_size, number of static real features), optional): Optional static real features which the model will add to the values of the time series.
366
-
367
- Returns:
368
- The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a SampleTSPredictionOutput object.
369
- """
370
356
  self.validate_batch_size(**{k: v for k, v in locals().items() if isinstance(v, torch.Tensor)})
371
357
 
372
358
  outputs = self.encoder(
@@ -12,11 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Tuple, Union
16
-
17
- import torch
18
- from transformers.modeling_outputs import ImageClassifierOutput
19
-
20
15
  from ...modeling_generic import RBLNModelForImageClassification
21
16
 
22
17
 
@@ -28,17 +23,3 @@ class RBLNViTForImageClassification(RBLNModelForImageClassification):
28
23
  on RBLN devices, supporting image classification with transformer-based architectures
29
24
  that process images as sequences of patches.
30
25
  """
31
-
32
- def forward(self, pixel_values: torch.Tensor, **kwargs) -> Union[ImageClassifierOutput, Tuple]:
33
- """
34
- Forward pass for the RBLN-optimized Vision Transformer model for image classification.
35
-
36
- Args:
37
- pixel_values (torch.FloatTensor of shape (batch_size, channels, height, width)):
38
- The tensors corresponding to the input images.
39
-
40
- Returns:
41
- The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns an ImageClassifierOutput object.
42
-
43
- """
44
- return super().forward(pixel_values, **kwargs)
@@ -12,12 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Optional
15
+ from ...configuration_generic import RBLNModelForMaskedLMConfig
16
16
 
17
- from ....configuration_utils import RBLNModelConfig
18
17
 
19
-
20
- class RBLNWav2Vec2ForCTCConfig(RBLNModelConfig):
18
+ class RBLNWav2Vec2ForCTCConfig(RBLNModelForMaskedLMConfig):
21
19
  """
22
20
  Configuration class for RBLNWav2Vec2ForCTC.
23
21
 
@@ -25,14 +23,4 @@ class RBLNWav2Vec2ForCTCConfig(RBLNModelConfig):
25
23
  RBLN-optimized Wav2Vec2 models for Connectionist Temporal Classification (CTC) tasks.
26
24
  """
27
25
 
28
- def __init__(
29
- self,
30
- max_seq_len: Optional[int] = None,
31
- batch_size: Optional[int] = None,
32
- **kwargs: Any,
33
- ):
34
- super().__init__(**kwargs)
35
- self.max_seq_len = max_seq_len
36
- self.batch_size = batch_size or 1
37
- if not isinstance(self.batch_size, int) or self.batch_size < 0:
38
- raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
26
+ rbln_model_input_names = ["input_values"]
@@ -13,21 +13,13 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
- from typing import TYPE_CHECKING, Optional, Union
17
-
18
16
  import torch
19
- from transformers import AutoModelForCTC, Wav2Vec2Config, Wav2Vec2ForCTC
20
- from transformers.modeling_outputs import CausalLMOutput
17
+ from transformers import AutoModelForMaskedLM, Wav2Vec2ForCTC
21
18
 
22
- from ....configuration_utils import RBLNCompileConfig
23
- from ....modeling import RBLNModel
19
+ from ...modeling_generic import RBLNModelForMaskedLM
24
20
  from .configuration_wav2vec2 import RBLNWav2Vec2ForCTCConfig
25
21
 
26
22
 
27
- if TYPE_CHECKING:
28
- from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel
29
-
30
-
31
23
  class _Wav2Vec2(torch.nn.Module):
32
24
  def __init__(self, model: "Wav2Vec2ForCTC"):
33
25
  super().__init__()
@@ -38,10 +30,13 @@ class _Wav2Vec2(torch.nn.Module):
38
30
  return self.model.lm_head(output[0])
39
31
 
40
32
 
41
- class RBLNWav2Vec2ForCTC(RBLNModel):
33
+ class RBLNWav2Vec2ForCTC(RBLNModelForMaskedLM):
42
34
  """
43
35
  Wav2Vec2 Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
44
36
 
37
+ This model inherits from [`RBLNModelForMaskedLM`]. Check the superclass documentation for the generic methods the
38
+ library implements for all its model.
39
+
45
40
  It implements the methods to convert a pre-trained Wav2Vec2 model into a RBLN Wav2Vec2 model by:
46
41
 
47
42
  - transferring the checkpoint weights of the original into an optimized RBLN graph,
@@ -49,56 +44,9 @@ class RBLNWav2Vec2ForCTC(RBLNModel):
49
44
  """
50
45
 
51
46
  main_input_name = "input_values"
52
- auto_model_class = AutoModelForCTC
47
+ auto_model_class = AutoModelForMaskedLM
53
48
  rbln_dtype = "float32"
54
49
 
55
50
  @classmethod
56
- def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNWav2Vec2ForCTCConfig) -> torch.nn.Module:
51
+ def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNWav2Vec2ForCTCConfig) -> torch.nn.Module:
57
52
  return _Wav2Vec2(model).eval()
58
-
59
- @classmethod
60
- def _update_rbln_config(
61
- cls,
62
- preprocessors: Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"],
63
- model: Optional["PreTrainedModel"] = None,
64
- model_config: "Wav2Vec2Config" = None,
65
- rbln_config: Optional[RBLNWav2Vec2ForCTCConfig] = None,
66
- ) -> RBLNWav2Vec2ForCTCConfig:
67
- if rbln_config.max_seq_len is None:
68
- for tokenizer in preprocessors:
69
- if hasattr(tokenizer, "model_max_length"):
70
- rbln_config.max_seq_len = tokenizer.model_max_length
71
- break
72
- if rbln_config.max_seq_len is None:
73
- raise ValueError("`rbln_max_seq_len` should be specified!")
74
-
75
- rbln_compile_config = RBLNCompileConfig(
76
- input_info=[
77
- (
78
- "input_values",
79
- [
80
- rbln_config.batch_size,
81
- rbln_config.max_seq_len,
82
- ],
83
- "float32",
84
- )
85
- ]
86
- )
87
-
88
- rbln_config.set_compile_cfgs([rbln_compile_config])
89
- return rbln_config
90
-
91
- def forward(
92
- self, input_values: torch.Tensor, return_dict: Optional[bool] = None, **kwargs
93
- ) -> Union[CausalLMOutput, tuple]:
94
- """
95
- Forward pass for the RBLN-optimized Wav2Vec2 model for Connectionist Temporal Classification (CTC).
96
-
97
- Args:
98
- input_values (torch.FloatTensor of shape (batch_size, sequence_length)): Float values of input raw speech waveform. Values can be obtained by loading a .flac or .wav audio file into an array of type List[float] or a numpy.ndarray, e.g. via the soundfile library (pip install soundfile). To prepare the array into input_values, the AutoProcessor should be used for padding and conversion into a tensor of type torch.FloatTensor.
99
- return_dict (bool, optional): Whether or not to return a ModelOutput instead of a plain tuple.
100
-
101
- Returns:
102
- The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a CausalLMOutput object.
103
- """
104
- return super().forward(input_values=input_values, return_dict=return_dict, **kwargs)
@@ -31,63 +31,29 @@ Generation utilities for Whisper.
31
31
  Modified from `transformers.models.whisper.generation_whisper.py`
32
32
  """
33
33
 
34
- from typing import Any, Dict, Optional, Union
35
-
36
34
  import torch
37
35
  import transformers
38
36
  from packaging import version
39
37
  from transformers import GenerationMixin
40
- from transformers.generation.configuration_utils import GenerationConfig
41
- from transformers.modeling_outputs import ModelOutput
42
38
  from transformers.models.whisper.generation_whisper import WhisperGenerationMixin
43
39
 
44
40
 
45
41
  class RBLNWhisperGenerationMixin(WhisperGenerationMixin, GenerationMixin):
46
- def generate(
47
- self,
48
- input_features: Optional[torch.Tensor] = None,
49
- attention_mask: Optional[torch.Tensor] = None,
50
- generation_config: Optional[GenerationConfig] = None,
51
- return_segments: Optional[bool] = None,
52
- return_timestamps: Optional[bool] = None,
53
- return_token_timestamps: Optional[bool] = None,
54
- **kwargs,
55
- ) -> Union[ModelOutput, Dict[str, Any], torch.LongTensor]:
56
- """
57
- The generate function is utilized in its standard form as in the HuggingFace transformers library. User can use this function to generate text from the model.
58
- Check the [HuggingFace transformers documentation](https://huggingface.co/docs/transformers/v4.57.1/en/model_doc/whisper#transformers.WhisperForConditionalGeneration.generate) for more details.
59
-
60
- Args:
61
- input_features(torch.Tensor, optional): The input features to the model.
62
- attention_mask(torch.Tensor, optional): Attention mask needs to be passed when doing long-form transcription using a batch size > 1.
63
- generation_config(GenerationConfig, optional): The generation configuration to be used as base parametrization for the generation call. **kwargs passed to generate matching the attributes of generation_config will override them.
64
- If generation_config is not provided, the default will be used, which had the following loading priority: 1) from the generation_config.json model file, if it exists; 2) from the model configuration.
65
- Please note that unspecified parameters will inherit [GenerationConfig](https://huggingface.co/docs/transformers/v4.57.1/en/main_classes/text_generation#transformers.GenerationConfig)’s default values.
66
- return_segments(bool, optional): Whether to return segments.
67
- return_timestamps(bool, optional): Whether to return the timestamps with the text. For audios longer than 30 seconds, it is necessary to set return_timestamps=True.
68
- return_token_timestamps(bool, optional): Whether to return token timestamps.
69
- kwargs(dict[str, Any], optional): Additional arguments passed to the generate function.
70
-
71
- Returns:
72
- Transcribes or translates log-mel input features to a sequence of auto-regressively generated token ids.
73
- """
74
- if kwargs.get("num_beams", None) is not None:
75
- if kwargs.get("num_beams") != 1:
76
- raise ValueError(
77
- "Beam search is not supported in RBLNWhisperGenerationMixin. "
78
- "Received num_beams={num_beams}, but only num_beams=1 is allowed. "
79
- "Please set num_beams=1 for greedy search or adjust your configuration."
80
- )
81
-
82
- return super().generate(
83
- input_features,
84
- attention_mask=attention_mask,
85
- generation_config=generation_config,
86
- return_segments=return_segments,
87
- return_timestamps=return_timestamps,
88
- return_token_timestamps=return_token_timestamps,
89
- **kwargs,
42
+ def generate(self, *args, generation_config=None, **kwargs):
43
+ num_beams = kwargs.get(
44
+ "num_beams",
45
+ generation_config.num_beams
46
+ if hasattr(generation_config, "num_beams") and generation_config.num_beams is not None
47
+ else 1,
90
48
  )
49
+ if num_beams > 1:
50
+ raise ValueError(
51
+ f"Beam search is not supported in RBLNWhisperGenerationMixin. "
52
+ f"Received num_beams={num_beams}, but only num_beams=1 is allowed. "
53
+ f"Please set num_beams=1 for greedy search or adjust your configuration."
54
+ )
55
+
56
+ return super().generate(*args, **kwargs)
91
57
 
92
58
  def _postprocess_outputs(
93
59
  self,
@@ -203,7 +203,7 @@ class RBLNWhisperForConditionalGeneration(RBLNModel, RBLNWhisperGenerationMixin)
203
203
  raise NotImplementedError
204
204
 
205
205
  @classmethod
206
- def _wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNWhisperForConditionalGenerationConfig):
206
+ def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNWhisperForConditionalGenerationConfig):
207
207
  return WhisperWrapper(
208
208
  model,
209
209
  use_attention_mask=rbln_config.use_attention_mask,
@@ -213,7 +213,7 @@ class RBLNWhisperForConditionalGeneration(RBLNModel, RBLNWhisperGenerationMixin)
213
213
  @classmethod
214
214
  @torch.inference_mode()
215
215
  def get_compiled_model(cls, model, rbln_config: RBLNWhisperForConditionalGenerationConfig):
216
- wrapped_model = cls._wrap_model_if_needed(model, rbln_config)
216
+ wrapped_model = cls.wrap_model_if_needed(model, rbln_config)
217
217
 
218
218
  enc_compile_config = rbln_config.compile_cfgs[0]
219
219
  dec_compile_config = rbln_config.compile_cfgs[1]
@@ -12,11 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional, Union
16
-
17
- import torch
18
- from transformers.modeling_outputs import BaseModelOutputWithPoolingAndCrossAttentions, SequenceClassifierOutput
19
-
20
15
  from ...modeling_generic import RBLNModelForSequenceClassification, RBLNTransformerEncoderForFeatureExtraction
21
16
 
22
17
 
@@ -25,25 +20,6 @@ class RBLNXLMRobertaModel(RBLNTransformerEncoderForFeatureExtraction):
25
20
  XLM-RoBERTa base model optimized for RBLN NPU.
26
21
  """
27
22
 
28
- def forward(
29
- self,
30
- input_ids: Optional[torch.Tensor] = None,
31
- attention_mask: Optional[torch.Tensor] = None,
32
- **kwargs,
33
- ) -> Union[BaseModelOutputWithPoolingAndCrossAttentions, tuple]:
34
- """
35
- Forward pass for the RBLN-optimized XLM-RoBERTa base model.
36
-
37
- Args:
38
- input_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
39
- attention_mask (torch.Tensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
40
-
41
- Returns:
42
- The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutputWithPoolingAndCrossAttentions object.
43
- """
44
-
45
- return super().forward(input_ids, attention_mask, **kwargs)
46
-
47
23
 
48
24
  class RBLNXLMRobertaForSequenceClassification(RBLNModelForSequenceClassification):
49
25
  """
@@ -51,22 +27,3 @@ class RBLNXLMRobertaForSequenceClassification(RBLNModelForSequenceClassification
51
27
  """
52
28
 
53
29
  rbln_model_input_names = ["input_ids", "attention_mask"]
54
-
55
- def forward(
56
- self,
57
- input_ids: Optional[torch.LongTensor] = None,
58
- attention_mask: Optional[torch.FloatTensor] = None,
59
- **kwargs,
60
- ) -> Union[SequenceClassifierOutput, tuple]:
61
- """
62
- Forward pass for the RBLN-optimized XLM-RoBERTa model for sequence classification.
63
-
64
- Args:
65
- input_ids (torch.LongTensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
66
- attention_mask (torch.FloatTensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
67
-
68
- Returns:
69
- The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a SequenceClassifierOutput object.
70
- """
71
-
72
- return super().forward(input_ids, attention_mask, **kwargs)
@@ -123,15 +123,6 @@ class RBLNQuantizationConfig(RBLNSerializableConfigProtocol):
123
123
  if self.RBLN_QUANT_BITS_ENV in os.environ:
124
124
  os.environ.pop(self.RBLN_QUANT_BITS_ENV)
125
125
 
126
- @property
127
- def nbits_per_param(self) -> int:
128
- if self.weights in ["int4", "fp4"]:
129
- return 4
130
- elif self.weights in ["int8", "fp8"]:
131
- return 8
132
- else:
133
- raise ValueError(f"Invalid weights: {self.weights}")
134
-
135
126
 
136
127
  class QuantizedLayerFactory:
137
128
  def __init__(self, quantization_config: RBLNQuantizationConfig):
@@ -0,0 +1,16 @@
1
+ from typing import Optional
2
+
3
+ import rebel
4
+
5
+ from .logging import get_logger
6
+
7
+
8
+ logger = get_logger(__name__)
9
+
10
+
11
+ def warn_deprecated_npu(npu: Optional[str] = None):
12
+ npu = npu or rebel.get_npu_name()
13
+ if npu == "RBLN-CA02":
14
+ logger.warning_once(
15
+ "Support for the RBLN-CA02 device is provided only up to optimum-rbln v0.8.0 and has reached end of life.",
16
+ )
optimum/rbln/utils/hub.py CHANGED
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import json
16
15
  from pathlib import Path
17
16
  from typing import List, Optional, Union
18
17
 
@@ -68,25 +67,15 @@ def validate_files(
68
67
  location: str,
69
68
  ):
70
69
  """Validate the presence and count of required files."""
70
+ if len(files) == 0:
71
+ raise FileNotFoundError(f"Could not find any rbln model file in {location}")
72
+
71
73
  if len(config_files) == 0:
72
74
  raise FileNotFoundError(f"Could not find `rbln_config.json` file in {location}")
73
75
 
74
76
  if len(config_files) > 1:
75
77
  raise FileExistsError(f"Multiple rbln_config.json files found in {location}. This is not expected.")
76
78
 
77
- try:
78
- with open(config_files[0], "r") as f:
79
- config_data = json.load(f)
80
- compile_cfgs = config_data.get("_compile_cfgs", [])
81
- if len(compile_cfgs) == 0:
82
- # If compile_cfgs is empty, we don't need .rbln files
83
- return
84
- except (json.JSONDecodeError, KeyError, OSError):
85
- pass
86
-
87
- if len(files) == 0:
88
- raise FileNotFoundError(f"Could not find any rbln model file in {location}")
89
-
90
79
 
91
80
  def _get_huggingface_token(token: Union[bool, str]) -> str:
92
81
  if isinstance(token, str):
@@ -20,38 +20,6 @@ import rebel
20
20
  import torch
21
21
 
22
22
 
23
- def get_available_dram(npu: Optional[str] = None) -> int:
24
- """
25
- Get the available DRAM size of the specified NPU.
26
-
27
- Args:
28
- npu : Optional[str], default=None
29
- The NPU to get the available DRAM size.
30
- If None, the function will attempt to retrieve through `ensure_valid_npu()`
31
-
32
- Returns:
33
- int
34
- The available DRAM size in bytes.
35
- """
36
- if npu is None:
37
- if not rebel.npu_is_available(0):
38
- raise RuntimeError("No NPU is available to get available DRAM size.")
39
-
40
- npu = rebel.get_npu_name(0)
41
-
42
- if npu.startswith("RBLN-CR"):
43
- # TODO(jongho): Assuming 4 chiplets.
44
- DRAM_NBYTES = 144 * 2**30
45
- SYS_DRAM_NBYTES = 4 * 2**30
46
- elif npu.startswith("RBLN-CA"):
47
- DRAM_NBYTES = 16 * 2**30
48
- SYS_DRAM_NBYTES = 288 * 2**20
49
- else:
50
- raise ValueError(f"Unknown npu name: {npu}")
51
-
52
- return DRAM_NBYTES - SYS_DRAM_NBYTES
53
-
54
-
55
23
  def normalize_npu(npu: str) -> str:
56
24
  """Normalize the NPU string by removing the form factor."""
57
25
  match = re.match(r"(RBLN-CA|RBLN-CR)(\d+)", npu)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.9.3
3
+ Version: 0.9.3rc0
4
4
  Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -24,7 +24,7 @@ Classifier: Programming Language :: Python :: 3.13
24
24
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
25
  Requires-Python: <3.14,>=3.9
26
26
  Requires-Dist: accelerate>=1.0.1
27
- Requires-Dist: diffusers==0.35.2
27
+ Requires-Dist: diffusers==0.35.1
28
28
  Requires-Dist: packaging>=24.1
29
29
  Requires-Dist: torch==2.8.0
30
30
  Requires-Dist: torchaudio<=2.8.0