PyPI - optimum-rbln - Versions diffs - 0.8.2a7__py3-none-any.whl → 0.8.3a0__py3-none-any.whl - Mend

optimum-rbln 0.8.2a7py3-none-any.whl → 0.8.3a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of optimum-rbln might be problematic. Click here for more details.

Files changed (90) hide show

optimum/rbln/modeling.py CHANGED Viewed

@@ -78,7 +78,7 @@ class RBLNModel(RBLNBaseModel):
         rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
         model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
         subfolder: str = "",
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> "RBLNModel":
         """
         Converts and compiles a pre-trained HuggingFace library model into a RBLN model.
@@ -241,7 +241,7 @@ class RBLNModel(RBLNBaseModel):
             for compiled_model in compiled_models
         ]
-    def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Dict[str, Any]) -> Any:
+    def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Any) -> Any:
         """
         Defines the forward pass of the RBLN model, providing a drop-in replacement for HuggingFace PreTrainedModel.

optimum/rbln/modeling_base.py CHANGED Viewed

@@ -348,7 +348,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         model_id: Union[str, Path],
         export: bool = False,
         rbln_config: Optional[Union[Dict, RBLNModelConfig]] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> "RBLNBaseModel":
         """
         The `from_pretrained()` function is utilized in its standard form as in the HuggingFace transformers library.
@@ -523,10 +523,18 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
             # First copy everything to a temporary directory
             shutil.copytree(real_save_dir, tmp_dir)
-            # If everything succeeded, atomically replace the target directory
+            # If everything succeeded, move files to target directory
             if os.path.exists(save_directory_path):
-                shutil.rmtree(save_directory_path)
-            os.rename(tmp_dir, save_directory_path)
+                # Move files from tmp_dir to existing directory (overwrite existing files)
+                for item in os.listdir(tmp_dir):
+                    src_path = os.path.join(tmp_dir, item)
+                    dst_path = os.path.join(save_directory_path, item)
+                    shutil.move(src_path, dst_path)
+                # Clean up empty tmp_dir
+                os.rmdir(tmp_dir)
+            else:
+                # If target doesn't exist, just rename tmp_dir to target
+                os.rename(tmp_dir, save_directory_path)
         except Exception as e:
             # Clean up the temporary directory if anything fails

optimum/rbln/ops/attn.py CHANGED Viewed

@@ -53,6 +53,45 @@ def paged_attn_decode_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_attn_decode_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_attn_decode_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_attn_decode_kv_fp8.register_fake
+def paged_attn_decode_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_attn_prefill",
     mutates_args=(["kcache", "vcache"]),
@@ -112,6 +151,45 @@ def paged_attn_prefill_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_attn_prefill_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_attn_prefill_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_attn_prefill_kv_fp8.register_fake
+def paged_attn_prefill_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_causal_attn_decode",
     mutates_args=(["kcache", "vcache"]),
@@ -236,6 +314,86 @@ def paged_causal_attn_prefill_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_causal_attn_decode_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_causal_attn_decode_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_causal_attn_decode_kv_fp8.register_fake
+def paged_causal_attn_decode_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_causal_attn_prefill_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_causal_attn_prefill_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    is_bidirectional: bool,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_causal_attn_prefill_kv_fp8.register_fake
+def paged_causal_attn_prefill_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    is_bidirectional: bool,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_add_softmax_attn_decode",
     mutates_args=(["kcache", "vcache"]),

optimum/rbln/ops/flash_attn.py CHANGED Viewed

@@ -59,6 +59,47 @@ def paged_flash_attn_decode_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_flash_attn_decode_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_flash_attn_decode_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_flash_attn_decode_kv_fp8.register_fake
+def paged_flash_attn_decode_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_flash_attn_prefill",
     mutates_args=(["kcache", "vcache"]),
@@ -100,6 +141,47 @@ def paged_flash_attn_prefill_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_flash_attn_prefill_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_flash_attn_prefill_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_flash_attn_prefill_kv_fp8.register_fake
+def paged_flash_attn_prefill_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_flash_causal_attn_decode",
     mutates_args=(["kcache", "vcache"]),
@@ -141,6 +223,47 @@ def paged_flash_causal_attn_decode_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_flash_causal_attn_decode_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_flash_causal_attn_decode_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_flash_causal_attn_decode_kv_fp8.register_fake
+def paged_flash_causal_attn_decode_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_flash_causal_attn_prefill",
     mutates_args=(["kcache", "vcache"]),
@@ -182,3 +305,46 @@ def paged_flash_causal_attn_prefill_fake(
     mask: Optional[Tensor] = None,
 ) -> Tensor:
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_flash_causal_attn_prefill_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_flash_causal_attn_prefill_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    is_bidirectional: bool,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_flash_causal_attn_prefill_kv_fp8.register_fake
+def paged_flash_causal_attn_prefill_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+    is_bidirectional: bool,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)

optimum/rbln/transformers/__init__.py CHANGED Viewed

@@ -34,6 +34,7 @@ _import_structure = {
         "RBLNAutoModelForSequenceClassification",
         "RBLNAutoModelForSpeechSeq2Seq",
         "RBLNAutoModelForVision2Seq",
+        "RBLNAutoModelForTextEncoding",
         "RBLNBartForConditionalGeneration",
         "RBLNBartForConditionalGenerationConfig",
         "RBLNBartModel",
@@ -62,6 +63,8 @@ _import_structure = {
         "RBLNCLIPVisionModelWithProjectionConfig",
         "RBLNDecoderOnlyModelForCausalLM",
         "RBLNDecoderOnlyModelForCausalLMConfig",
+        "RBLNDecoderOnlyModelConfig",
+        "RBLNDecoderOnlyModel",
         "RBLNDistilBertForQuestionAnswering",
         "RBLNDistilBertForQuestionAnsweringConfig",
         "RBLNDPTForDepthEstimation",
@@ -169,6 +172,7 @@ if TYPE_CHECKING:
         RBLNAutoModelForSeq2SeqLM,
         RBLNAutoModelForSequenceClassification,
         RBLNAutoModelForSpeechSeq2Seq,
+        RBLNAutoModelForTextEncoding,
         RBLNAutoModelForVision2Seq,
         RBLNBartForConditionalGeneration,
         RBLNBartForConditionalGenerationConfig,
@@ -196,6 +200,8 @@ if TYPE_CHECKING:
         RBLNCLIPVisionModelWithProjectionConfig,
         RBLNColPaliForRetrieval,
         RBLNColPaliForRetrievalConfig,
+        RBLNDecoderOnlyModel,
+        RBLNDecoderOnlyModelConfig,
         RBLNDecoderOnlyModelForCausalLM,
         RBLNDecoderOnlyModelForCausalLMConfig,
         RBLNDistilBertForQuestionAnswering,

optimum/rbln/transformers/configuration_generic.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, List, Optional, Tuple, Union
 from ..configuration_utils import RBLNModelConfig
@@ -25,7 +25,7 @@ class RBLNTransformerEncoderConfig(RBLNModelConfig):
         max_seq_len: Optional[int] = None,
         batch_size: Optional[int] = None,
         model_input_names: Optional[List[str]] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         """
         Args:
@@ -52,7 +52,7 @@ class RBLNImageModelConfig(RBLNModelConfig):
         self,
         image_size: Optional[Union[int, Tuple[int, int]]] = None,
         batch_size: Optional[int] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         """
         Args:
@@ -124,7 +124,7 @@ class RBLNModelForAudioClassificationConfig(RBLNModelConfig):
         batch_size: Optional[int] = None,
         max_length: Optional[int] = None,
         num_mel_bins: Optional[int] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         """
         Args:

optimum/rbln/transformers/modeling_generic.py CHANGED Viewed

@@ -34,10 +34,7 @@ from transformers import (
     AutoModelForTextEncoding,
     PretrainedConfig,
 )
-from transformers.modeling_outputs import (
-    BaseModelOutput,
-    QuestionAnsweringModelOutput,
-)
+from transformers.modeling_outputs import BaseModelOutput, QuestionAnsweringModelOutput
 from ..configuration_utils import RBLNCompileConfig
 from ..modeling import RBLNModel

optimum/rbln/transformers/modeling_outputs.py ADDED Viewed

@@ -0,0 +1,37 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass
+from typing import Optional, Tuple
+import torch
+from transformers.modeling_outputs import ModelOutput
+@dataclass
+class RBLNDecoderOnlyOutput(ModelOutput):
+    logits: torch.FloatTensor = None
+    generate_idx: torch.Tensor = None
+    padded_cache_lengths: int = None
+@dataclass
+class RBLNGemma3ForCausalLMOutput(RBLNDecoderOnlyOutput):
+    attention_mask: Optional[torch.Tensor] = None
+@dataclass
+class RBLNSeq2SeqTSDecoderOutput(ModelOutput):
+    last_hidden_states: torch.FloatTensor = None
+    params: Tuple[torch.FloatTensor] = None

optimum/rbln/transformers/models/__init__.py CHANGED Viewed

@@ -36,6 +36,7 @@ _import_structure = {
         "RBLNAutoModelForSpeechSeq2Seq",
         "RBLNAutoModelForVision2Seq",
         "RBLNAutoModelForImageTextToText",
+        "RBLNAutoModelForTextEncoding",
     ],
     "bart": [
         "RBLNBartForConditionalGeneration",
@@ -84,6 +85,8 @@ _import_structure = {
         "RBLNQwen2_5_VLForConditionalGenerationConfig",
     ],
     "decoderonly": [
+        "RBLNDecoderOnlyModelConfig",
+        "RBLNDecoderOnlyModel",
         "RBLNDecoderOnlyModelForCausalLM",
         "RBLNDecoderOnlyModelForCausalLMConfig",
     ],
@@ -160,10 +163,7 @@ _import_structure = {
 }
 if TYPE_CHECKING:
-    from .audio_spectrogram_transformer import (
-        RBLNASTForAudioClassification,
-        RBLNASTForAudioClassificationConfig,
-    )
+    from .audio_spectrogram_transformer import RBLNASTForAudioClassification, RBLNASTForAudioClassificationConfig
     from .auto import (
         RBLNAutoModel,
         RBLNAutoModelForAudioClassification,
@@ -177,6 +177,7 @@ if TYPE_CHECKING:
         RBLNAutoModelForSeq2SeqLM,
         RBLNAutoModelForSequenceClassification,
         RBLNAutoModelForSpeechSeq2Seq,
+        RBLNAutoModelForTextEncoding,
         RBLNAutoModelForVision2Seq,
     )
     from .bart import (
@@ -211,22 +212,15 @@ if TYPE_CHECKING:
         RBLNCLIPVisionModelWithProjection,
         RBLNCLIPVisionModelWithProjectionConfig,
     )
-    from .colpali import (
-        RBLNColPaliForRetrieval,
-        RBLNColPaliForRetrievalConfig,
-    )
+    from .colpali import RBLNColPaliForRetrieval, RBLNColPaliForRetrievalConfig
     from .decoderonly import (
+        RBLNDecoderOnlyModel,
+        RBLNDecoderOnlyModelConfig,
         RBLNDecoderOnlyModelForCausalLM,
         RBLNDecoderOnlyModelForCausalLMConfig,
     )
-    from .distilbert import (
-        RBLNDistilBertForQuestionAnswering,
-        RBLNDistilBertForQuestionAnsweringConfig,
-    )
-    from .dpt import (
-        RBLNDPTForDepthEstimation,
-        RBLNDPTForDepthEstimationConfig,
-    )
+    from .distilbert import RBLNDistilBertForQuestionAnswering, RBLNDistilBertForQuestionAnsweringConfig
+    from .dpt import RBLNDPTForDepthEstimation, RBLNDPTForDepthEstimationConfig
     from .exaone import RBLNExaoneForCausalLM, RBLNExaoneForCausalLMConfig
     from .gemma import RBLNGemmaForCausalLM, RBLNGemmaForCausalLMConfig, RBLNGemmaModel, RBLNGemmaModelConfig
     from .gemma3 import (

optimum/rbln/transformers/models/auto/__init__.py CHANGED Viewed

@@ -25,5 +25,6 @@ from .modeling_auto import (
     RBLNAutoModelForSeq2SeqLM,
     RBLNAutoModelForSequenceClassification,
     RBLNAutoModelForSpeechSeq2Seq,
+    RBLNAutoModelForTextEncoding,
     RBLNAutoModelForVision2Seq,
 )

optimum/rbln/transformers/models/auto/modeling_auto.py CHANGED Viewed

@@ -35,6 +35,8 @@ from transformers.models.auto.modeling_auto import (
     MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES,
     MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
     MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES,
+    MODEL_FOR_TEXT_ENCODING_MAPPING,
+    MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES,
     MODEL_FOR_VISION_2_SEQ_MAPPING,
     MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES,
     MODEL_MAPPING,
@@ -115,3 +117,8 @@ class RBLNAutoModelForImageClassification(_BaseAutoModelClass):
 class RBLNAutoModelForQuestionAnswering(_BaseAutoModelClass):
     _model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
     _model_mapping_names = MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES
+class RBLNAutoModelForTextEncoding(_BaseAutoModelClass):
+    _model_mapping = MODEL_FOR_TEXT_ENCODING_MAPPING
+    _model_mapping_names = MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES

optimum/rbln/transformers/models/bart/bart_architecture.py CHANGED Viewed

@@ -16,9 +16,7 @@ from typing import Tuple
 import torch
 from torch import nn
-from transformers.modeling_attn_mask_utils import (
-    _prepare_4d_attention_mask,
-)
+from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask
 from transformers.utils import logging
 from ..seq2seq.seq2seq_architecture import (

optimum/rbln/transformers/models/bart/configuration_bart.py CHANGED Viewed

@@ -32,3 +32,5 @@ class RBLNBartForConditionalGenerationConfig(RBLNModelForSeq2SeqLMConfig):
     This configuration class stores the configuration parameters specific to
     RBLN-optimized BART models for conditional text generation tasks.
     """
+    support_paged_attention = True

optimum/rbln/transformers/models/blip_2/configuration_blip_2.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, Optional
+from typing import Any, Optional
 from ....configuration_utils import RBLNModelConfig
@@ -62,7 +62,7 @@ class RBLNBlip2ForConditionalGenerationConfig(RBLNModelConfig):
         vision_model: Optional[RBLNModelConfig] = None,
         qformer: Optional[RBLNModelConfig] = None,
         language_model: Optional[RBLNModelConfig] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         """
         Args:

optimum/rbln/transformers/models/blip_2/modeling_blip_2.py CHANGED Viewed

@@ -35,11 +35,7 @@ from ....modeling import RBLNModel
 logger = logging.get_logger(__name__)
 if TYPE_CHECKING:
-    from transformers import (
-        AutoFeatureExtractor,
-        AutoProcessor,
-        AutoTokenizer,
-    )
+    from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
 class LoopProjector:

optimum-rbln 0.8.2a7__py3-none-any.whl → 0.8.3a0__py3-none-any.whl

Potentially problematic release.

optimum-rbln 0.8.2a7py3-none-any.whl → 0.8.3a0py3-none-any.whl