PyPI - optimum-rbln - Versions diffs - 0.8.2a4__py3-none-any.whl → 0.9.3rc0__py3-none-any.whl - Mend

optimum-rbln 0.8.2a4py3-none-any.whl → 0.9.3rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

optimum/rbln/ops/attn.py CHANGED Viewed

@@ -53,6 +53,45 @@ def paged_attn_decode_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_attn_decode_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_attn_decode_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_attn_decode_kv_fp8.register_fake
+def paged_attn_decode_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_attn_prefill",
     mutates_args=(["kcache", "vcache"]),
@@ -112,6 +151,45 @@ def paged_attn_prefill_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_attn_prefill_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_attn_prefill_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_attn_prefill_kv_fp8.register_fake
+def paged_attn_prefill_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_causal_attn_decode",
     mutates_args=(["kcache", "vcache"]),
@@ -236,6 +314,86 @@ def paged_causal_attn_prefill_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_causal_attn_decode_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_causal_attn_decode_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_causal_attn_decode_kv_fp8.register_fake
+def paged_causal_attn_decode_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_causal_attn_prefill_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_causal_attn_prefill_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    is_bidirectional: bool,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_causal_attn_prefill_kv_fp8.register_fake
+def paged_causal_attn_prefill_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    is_bidirectional: bool,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_add_softmax_attn_decode",
     mutates_args=(["kcache", "vcache"]),

optimum/rbln/ops/flash_attn.py CHANGED Viewed

@@ -59,6 +59,47 @@ def paged_flash_attn_decode_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_flash_attn_decode_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_flash_attn_decode_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_flash_attn_decode_kv_fp8.register_fake
+def paged_flash_attn_decode_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_flash_attn_prefill",
     mutates_args=(["kcache", "vcache"]),
@@ -100,6 +141,47 @@ def paged_flash_attn_prefill_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_flash_attn_prefill_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_flash_attn_prefill_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_flash_attn_prefill_kv_fp8.register_fake
+def paged_flash_attn_prefill_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_flash_causal_attn_decode",
     mutates_args=(["kcache", "vcache"]),
@@ -141,6 +223,47 @@ def paged_flash_causal_attn_decode_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_flash_causal_attn_decode_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_flash_causal_attn_decode_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_flash_causal_attn_decode_kv_fp8.register_fake
+def paged_flash_causal_attn_decode_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_flash_causal_attn_prefill",
     mutates_args=(["kcache", "vcache"]),
@@ -182,3 +305,46 @@ def paged_flash_causal_attn_prefill_fake(
     mask: Optional[Tensor] = None,
 ) -> Tensor:
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_flash_causal_attn_prefill_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_flash_causal_attn_prefill_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    is_bidirectional: bool,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_flash_causal_attn_prefill_kv_fp8.register_fake
+def paged_flash_causal_attn_prefill_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    is_bidirectional: bool,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)

optimum/rbln/ops/kv_cache_update.py CHANGED Viewed

@@ -22,3 +22,8 @@ def rbln_cache_update(cache: Tensor, state: Tensor, position: Tensor, axis: Tens
     # This operation is designed to perform in-place updates directly on the device without needing to transfer the cache back to the host.
     # The `position` parameter specifies the start index for the update along the specified axis, allowing flexible updates to any part of the cache tensor.
     return torch.empty_like(cache)
+@rbln_cache_update.register_fake
+def rbln_cache_update_fake(cache: Tensor, state: Tensor, position: Tensor, axis: Tensor) -> Tensor:
+    return torch.empty_like(cache)

optimum/rbln/ops/linear.py CHANGED Viewed

@@ -23,3 +23,10 @@ def linear(input: Tensor, weight: Tensor, bias: Optional[Tensor] = None) -> Tens
     output_shape = list(input.shape[:-1])
     output_shape += [weight.shape[0]]
     return torch.empty(size=output_shape, dtype=input.dtype, device=input.device, requires_grad=input.requires_grad)
+@linear.register_fake
+def linear_fake(input: Tensor, weight: Tensor, bias: Optional[Tensor] = None) -> Tensor:
+    output_shape = list(input.shape[:-1])
+    output_shape += [weight.shape[0]]
+    return torch.empty(size=output_shape, dtype=input.dtype, device=input.device, requires_grad=input.requires_grad)

optimum/rbln/transformers/__init__.py CHANGED Viewed

@@ -34,6 +34,8 @@ _import_structure = {
         "RBLNAutoModelForSequenceClassification",
         "RBLNAutoModelForSpeechSeq2Seq",
         "RBLNAutoModelForVision2Seq",
+        "RBLNAutoModelForTextEncoding",
+        "RBLNAutoModelForZeroShotObjectDetection",
         "RBLNBartForConditionalGeneration",
         "RBLNBartForConditionalGenerationConfig",
         "RBLNBartModel",
@@ -52,6 +54,8 @@ _import_structure = {
         "RBLNBlip2VisionModelConfig",
         "RBLNColPaliForRetrieval",
         "RBLNColPaliForRetrievalConfig",
+        "RBLNColQwen2ForRetrieval",
+        "RBLNColQwen2ForRetrievalConfig",
         "RBLNCLIPTextModel",
         "RBLNCLIPTextModelConfig",
         "RBLNCLIPTextModelWithProjection",
@@ -62,12 +66,18 @@ _import_structure = {
         "RBLNCLIPVisionModelWithProjectionConfig",
         "RBLNDecoderOnlyModelForCausalLM",
         "RBLNDecoderOnlyModelForCausalLMConfig",
+        "RBLNDecoderOnlyModelConfig",
+        "RBLNDecoderOnlyModel",
         "RBLNDistilBertForQuestionAnswering",
         "RBLNDistilBertForQuestionAnsweringConfig",
         "RBLNDPTForDepthEstimation",
         "RBLNDPTForDepthEstimationConfig",
+        "RBLNDepthAnythingForDepthEstimation",
+        "RBLNDepthAnythingForDepthEstimationConfig",
         "RBLNExaoneForCausalLM",
         "RBLNExaoneForCausalLMConfig",
+        "RBLNGemmaModel",
+        "RBLNGemmaModelConfig",
         "RBLNGemma3ForCausalLM",
         "RBLNGemma3ForCausalLMConfig",
         "RBLNGemma3ForConditionalGeneration",
@@ -76,26 +86,60 @@ _import_structure = {
         "RBLNGemmaForCausalLMConfig",
         "RBLNGPT2LMHeadModel",
         "RBLNGPT2LMHeadModelConfig",
+        "RBLNGPT2Model",
+        "RBLNGPT2ModelConfig",
+        "RBLNGroundingDinoDecoder",
+        "RBLNGroundingDinoDecoderConfig",
+        "RBLNGroundingDinoForObjectDetection",
+        "RBLNGroundingDinoForObjectDetectionConfig",
+        "RBLNGroundingDinoEncoder",
+        "RBLNGroundingDinoEncoderConfig",
         "RBLNIdefics3ForConditionalGeneration",
         "RBLNIdefics3ForConditionalGenerationConfig",
         "RBLNIdefics3VisionTransformer",
         "RBLNIdefics3VisionTransformerConfig",
         "RBLNLlamaForCausalLM",
         "RBLNLlamaForCausalLMConfig",
+        "RBLNLlavaForConditionalGeneration",
+        "RBLNLlavaForConditionalGenerationConfig",
+        "RBLNLlamaModel",
+        "RBLNLlamaModelConfig",
+        "RBLNOPTForCausalLM",
+        "RBLNOPTForCausalLMConfig",
+        "RBLNPegasusForConditionalGeneration",
+        "RBLNPegasusForConditionalGenerationConfig",
+        "RBLNPegasusModel",
+        "RBLNPegasusModelConfig",
         "RBLNLlavaNextForConditionalGeneration",
         "RBLNLlavaNextForConditionalGenerationConfig",
+        "RBLNLoRAAdapterConfig",
+        "RBLNLoRAConfig",
         "RBLNMidmLMHeadModel",
         "RBLNMidmLMHeadModelConfig",
         "RBLNMistralForCausalLM",
         "RBLNMistralForCausalLMConfig",
+        "RBLNMistralModel",
+        "RBLNMistralModelConfig",
         "RBLNOPTForCausalLM",
         "RBLNOPTForCausalLMConfig",
+        "RBLNOPTModel",
+        "RBLNOPTModelConfig",
         "RBLNPhiForCausalLM",
         "RBLNPhiForCausalLMConfig",
+        "RBLNPixtralVisionModelConfig",
+        "RBLNPixtralVisionModel",
+        "RBLNPhiModel",
+        "RBLNPhiModelConfig",
         "RBLNQwen2_5_VisionTransformerPretrainedModel",
         "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
         "RBLNQwen2_5_VLForConditionalGeneration",
         "RBLNQwen2_5_VLForConditionalGenerationConfig",
+        "RBLNQwen2VisionTransformerPretrainedModel",
+        "RBLNQwen2VisionTransformerPretrainedModelConfig",
+        "RBLNQwen2VLForConditionalGeneration",
+        "RBLNQwen2VLForConditionalGenerationConfig",
+        "RBLNQwen2Model",
+        "RBLNQwen2ModelConfig",
         "RBLNQwen2ForCausalLM",
         "RBLNQwen2ForCausalLMConfig",
         "RBLNQwen3ForCausalLM",
@@ -110,6 +154,8 @@ _import_structure = {
         "RBLNRobertaForSequenceClassificationConfig",
         "RBLNSiglipVisionModel",
         "RBLNSiglipVisionModelConfig",
+        "RBLNSwinBackbone",
+        "RBLNSwinBackboneConfig",
         "RBLNT5EncoderModel",
         "RBLNT5EncoderModelConfig",
         "RBLNT5ForConditionalGeneration",
@@ -145,7 +191,9 @@ if TYPE_CHECKING:
         RBLNAutoModelForSeq2SeqLM,
         RBLNAutoModelForSequenceClassification,
         RBLNAutoModelForSpeechSeq2Seq,
+        RBLNAutoModelForTextEncoding,
         RBLNAutoModelForVision2Seq,
+        RBLNAutoModelForZeroShotObjectDetection,
         RBLNBartForConditionalGeneration,
         RBLNBartForConditionalGenerationConfig,
         RBLNBartModel,
@@ -170,8 +218,16 @@ if TYPE_CHECKING:
         RBLNCLIPVisionModelConfig,
         RBLNCLIPVisionModelWithProjection,
         RBLNCLIPVisionModelWithProjectionConfig,
+        RBLNColPaliForRetrieval,
+        RBLNColPaliForRetrievalConfig,
+        RBLNColQwen2ForRetrieval,
+        RBLNColQwen2ForRetrievalConfig,
+        RBLNDecoderOnlyModel,
+        RBLNDecoderOnlyModelConfig,
         RBLNDecoderOnlyModelForCausalLM,
         RBLNDecoderOnlyModelForCausalLMConfig,
+        RBLNDepthAnythingForDepthEstimation,
+        RBLNDepthAnythingForDepthEstimationConfig,
         RBLNDistilBertForQuestionAnswering,
         RBLNDistilBertForQuestionAnsweringConfig,
         RBLNDPTForDepthEstimation,
@@ -184,30 +240,64 @@ if TYPE_CHECKING:
         RBLNGemma3ForConditionalGenerationConfig,
         RBLNGemmaForCausalLM,
         RBLNGemmaForCausalLMConfig,
+        RBLNGemmaModel,
+        RBLNGemmaModelConfig,
         RBLNGPT2LMHeadModel,
         RBLNGPT2LMHeadModelConfig,
+        RBLNGPT2Model,
+        RBLNGPT2ModelConfig,
+        RBLNGroundingDinoDecoder,
+        RBLNGroundingDinoDecoderConfig,
+        RBLNGroundingDinoEncoder,
+        RBLNGroundingDinoEncoderConfig,
+        RBLNGroundingDinoForObjectDetection,
+        RBLNGroundingDinoForObjectDetectionConfig,
         RBLNIdefics3ForConditionalGeneration,
         RBLNIdefics3ForConditionalGenerationConfig,
         RBLNIdefics3VisionTransformer,
         RBLNIdefics3VisionTransformerConfig,
         RBLNLlamaForCausalLM,
         RBLNLlamaForCausalLMConfig,
+        RBLNLlamaModel,
+        RBLNLlamaModelConfig,
+        RBLNLlavaForConditionalGeneration,
+        RBLNLlavaForConditionalGenerationConfig,
         RBLNLlavaNextForConditionalGeneration,
         RBLNLlavaNextForConditionalGenerationConfig,
+        RBLNLoRAAdapterConfig,
+        RBLNLoRAConfig,
         RBLNMidmLMHeadModel,
         RBLNMidmLMHeadModelConfig,
         RBLNMistralForCausalLM,
         RBLNMistralForCausalLMConfig,
+        RBLNMistralModel,
+        RBLNMistralModelConfig,
         RBLNOPTForCausalLM,
         RBLNOPTForCausalLMConfig,
+        RBLNOPTModel,
+        RBLNOPTModelConfig,
+        RBLNPegasusForConditionalGeneration,
+        RBLNPegasusForConditionalGenerationConfig,
+        RBLNPegasusModel,
+        RBLNPegasusModelConfig,
         RBLNPhiForCausalLM,
         RBLNPhiForCausalLMConfig,
+        RBLNPhiModel,
+        RBLNPhiModelConfig,
+        RBLNPixtralVisionModel,
+        RBLNPixtralVisionModelConfig,
         RBLNQwen2_5_VisionTransformerPretrainedModel,
         RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
         RBLNQwen2_5_VLForConditionalGeneration,
         RBLNQwen2_5_VLForConditionalGenerationConfig,
         RBLNQwen2ForCausalLM,
         RBLNQwen2ForCausalLMConfig,
+        RBLNQwen2Model,
+        RBLNQwen2ModelConfig,
+        RBLNQwen2VisionTransformerPretrainedModel,
+        RBLNQwen2VisionTransformerPretrainedModelConfig,
+        RBLNQwen2VLForConditionalGeneration,
+        RBLNQwen2VLForConditionalGenerationConfig,
         RBLNQwen3ForCausalLM,
         RBLNQwen3ForCausalLMConfig,
         RBLNQwen3Model,
@@ -220,6 +310,8 @@ if TYPE_CHECKING:
         RBLNRobertaForSequenceClassificationConfig,
         RBLNSiglipVisionModel,
         RBLNSiglipVisionModelConfig,
+        RBLNSwinBackbone,
+        RBLNSwinBackboneConfig,
         RBLNT5EncoderModel,
         RBLNT5EncoderModelConfig,
         RBLNT5ForConditionalGeneration,

optimum/rbln/transformers/configuration_generic.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, List, Optional, Tuple, Union
 from ..configuration_utils import RBLNModelConfig
@@ -25,7 +25,8 @@ class RBLNTransformerEncoderConfig(RBLNModelConfig):
         max_seq_len: Optional[int] = None,
         batch_size: Optional[int] = None,
         model_input_names: Optional[List[str]] = None,
-        **kwargs: Dict[str, Any],
+        model_input_shapes: Optional[List[Tuple[int, int]]] = None,
+        **kwargs: Any,
     ):
         """
         Args:
@@ -33,7 +34,7 @@ class RBLNTransformerEncoderConfig(RBLNModelConfig):
             batch_size (Optional[int]): The batch size for inference. Defaults to 1.
             model_input_names (Optional[List[str]]): Names of the input tensors for the model.
                 Defaults to class-specific rbln_model_input_names if not provided.
-            **kwargs: Additional arguments passed to the parent RBLNModelConfig.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
         Raises:
             ValueError: If batch_size is not a positive integer.
@@ -45,6 +46,7 @@ class RBLNTransformerEncoderConfig(RBLNModelConfig):
             raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
         self.model_input_names = model_input_names or self.rbln_model_input_names
+        self.model_input_shapes = model_input_shapes
 class RBLNImageModelConfig(RBLNModelConfig):
@@ -52,14 +54,14 @@ class RBLNImageModelConfig(RBLNModelConfig):
         self,
         image_size: Optional[Union[int, Tuple[int, int]]] = None,
         batch_size: Optional[int] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         """
         Args:
             image_size (Optional[Union[int, Tuple[int, int]]]): The size of input images.
                 Can be an integer for square images or a tuple (height, width).
             batch_size (Optional[int]): The batch size for inference. Defaults to 1.
-            **kwargs: Additional arguments passed to the parent RBLNModelConfig.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
         Raises:
             ValueError: If batch_size is not a positive integer.
@@ -124,14 +126,14 @@ class RBLNModelForAudioClassificationConfig(RBLNModelConfig):
         batch_size: Optional[int] = None,
         max_length: Optional[int] = None,
         num_mel_bins: Optional[int] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         """
         Args:
             batch_size (Optional[int]): The batch size for inference. Defaults to 1.
             max_length (Optional[int]): Maximum length of the audio input in time dimension.
             num_mel_bins (Optional[int]): Number of Mel frequency bins for audio processing.
-            **kwargs: Additional arguments passed to the parent RBLNModelConfig.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
         Raises:
             ValueError: If batch_size is not a positive integer.

optimum-rbln 0.8.2a4__py3-none-any.whl → 0.9.3rc0__py3-none-any.whl

optimum-rbln 0.8.2a4py3-none-any.whl → 0.9.3rc0py3-none-any.whl