PyPI - optimum-rbln - Versions diffs - 0.8.1rc0__py3-none-any.whl → 0.8.2__py3-none-any.whl - Mend

optimum-rbln 0.8.1rc0py3-none-any.whl → 0.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of optimum-rbln might be problematic. Click here for more details.

Files changed (120) hide show

optimum/rbln/transformers/models/qwen3/modeling_qwen3.py ADDED Viewed

@@ -0,0 +1,133 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+from transformers import PretrainedConfig
+from ....utils import logging
+from ...models.decoderonly import (
+    RBLNDecoderOnlyModel,
+    RBLNDecoderOnlyModelForCausalLM,
+    RBLNDecoderOnlyModelForCausalLMConfig,
+)
+from .qwen3_architecture import Qwen3Wrapper
+logger = logging.get_logger(__name__)
+if TYPE_CHECKING:
+    from transformers import PretrainedConfig
+class RBLNQwen3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
+    """
+    The Qwen3 Model transformer with a language modeling head (linear layer) on top.
+    This model inherits from [`RBLNDecoderOnlyModelForCausalLM`]. Check the superclass documentation for the generic methods the library implements for all its models.
+    A class to convert and run pre-trained transformers based Qwen3ForCausalLM model on RBLN devices.
+    It implements the methods to convert a pre-trained transformers Qwen3ForCausalLM model into a RBLN transformer model by:
+    - transferring the checkpoint weights of the original into an optimized RBLN graph,
+    - compiling the resulting graph using the RBLN compiler.
+    **Configuration:**
+    This model uses [`RBLNQwen3ForCausalLMConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
+    the `rbln_config` parameter should be an instance of [`RBLNQwen3ForCausalLMConfig`] or a dictionary conforming to its structure.
+    See the [`RBLNQwen3ForCausalLMConfig`] class for all available configuration options.
+    Examples:
+        ```python
+        from optimum.rbln import RBLNQwen3ForCausalLM
+        # Simple usage using rbln_* arguments
+        # `max_seq_len` is automatically inferred from the model config
+        model = RBLNQwen3ForCausalLM.from_pretrained(
+            "Qwen/Qwen3-4B",
+            export=True,
+            rbln_batch_size=1,
+            rbln_tensor_parallel_size=4,
+        )
+        # Using a config dictionary
+        rbln_config = {
+            "batch_size": 1,
+            "max_seq_len": 40_960,
+            "tensor_parallel_size": 4,
+            "kvcache_partition_len": 8192,
+        }
+        model = RBLNQwen3ForCausalLM.from_pretrained(
+            "Qwen/Qwen3-4B",
+            export=True,
+            rbln_config=rbln_config
+        )
+        # Using a RBLNQwen3ForCausalLMConfig instance (recommended for type checking)
+        from optimum.rbln import RBLNQwen3ForCausalLMConfig
+        config = RBLNQwen3ForCausalLMConfig(
+            batch_size=1,
+            max_seq_len=40_960,
+            tensor_parallel_size=4,
+            kvcache_partition_len=8192,
+        )
+        model = RBLNQwen3ForCausalLM.from_pretrained(
+            "Qwen/Qwen3-4B",
+            export=True,
+            rbln_config=config
+        )
+        ```
+    """
+    _decoder_wrapper_cls = Qwen3Wrapper
+    @classmethod
+    def _update_sliding_window_config(
+        cls, model_config: PretrainedConfig, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig
+    ):
+        # https://github.com/huggingface/transformers/issues/35896
+        # There seems to be a bug in transformers(v4.52.4). Therefore, similar to when attn_implementation is eager,
+        # we set all layers to use sliding window in this version. This should be updated once the bug is fixed.
+        rbln_config.cache_impl = "sliding_window"
+        rbln_config.sliding_window = model_config.sliding_window
+        rbln_config.sliding_window_layers = list(range(model_config.num_hidden_layers))
+        return rbln_config
+    def forward(self, *args, **kwargs):
+        kwargs["return_dict"] = True
+        return super().forward(*args, **kwargs)
+class RBLNQwen3Model(RBLNDecoderOnlyModel):
+    """
+    The bare Qwen3 Model outputting raw hidden-states without any specific head on top.
+    This model inherits from [`RBLNDecoderOnlyModel`]. Check the superclass documentation for the generic methods the library implements for all its models.
+    A class to convert and run pre-trained transformers based Qwen3Model on RBLN devices.
+    It implements the methods to convert a pre-trained transformers Qwen3Model into a RBLN transformer model by:
+    - transferring the checkpoint weights of the original into an optimized RBLN graph,
+    - compiling the resulting graph using the RBLN compiler.
+    **Configuration:**
+    This model uses [`RBLNQwen3ModelConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
+    the `rbln_config` parameter should be an instance of [`RBLNQwen3ModelConfig`] or a dictionary conforming to its structure.
+    See the [`RBLNQwen3ModelConfig`] class for all available configuration options.
+    Examples:
+        ```python
+        from optimum.rbln import RBLNQwen3Model
+        # Simple usage using rbln_* arguments
+        # `max_seq_len` is automatically inferred from the model config
+        model = RBLNQwen3Model.from_pretrained(
+            "Qwen/Qwen3-Embedding-4B",
+            export=True,
+            rbln_batch_size=1,
+            rbln_max_seq_len=40_960,
+            rbln_tensor_parallel_size=4,
+            rbln_kvcache_partition_len=8192,
+        )
+    """
+    _decoder_wrapper_cls = Qwen3Wrapper
+    _use_rotary_emb = True

optimum/rbln/transformers/models/qwen3/qwen3_architecture.py ADDED Viewed

@@ -0,0 +1,31 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ..decoderonly.decoderonly_architecture import DecoderOnlyAttention, DecoderOnlyWrapper
+class Qwen3Wrapper(DecoderOnlyWrapper):
+    def get_rbln_attn_class(self):
+        return Qwen3Attention
+class Qwen3Attention(DecoderOnlyAttention):
+    def __post_init__(self):
+        self.k_proj = self._original_mod.k_proj
+        self.v_proj = self._original_mod.v_proj
+        self.q_proj = self._original_mod.q_proj
+        self.o_proj = self._original_mod.o_proj
+        self.q_norm = self._original_mod.q_norm
+        self.k_norm = self._original_mod.k_norm

optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py CHANGED Viewed

@@ -12,9 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, Optional
-import rebel
+from typing import Any, Optional
 from ....configuration_utils import RBLNModelConfig
 from ....utils.logging import get_logger
@@ -31,7 +29,7 @@ class RBLNModelForSeq2SeqLMConfig(RBLNModelConfig):
         dec_max_seq_len: Optional[int] = None,
         use_attention_mask: Optional[bool] = None,
         pad_token_id: Optional[int] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         """
         Args:
@@ -39,7 +37,6 @@ class RBLNModelForSeq2SeqLMConfig(RBLNModelConfig):
             enc_max_seq_len (Optional[int]): Maximum sequence length for the encoder.
             dec_max_seq_len (Optional[int]): Maximum sequence length for the decoder.
             use_attention_mask (Optional[bool]): Whether to use attention masks during inference.
-                This is automatically set to True for RBLN-CA02 devices.
             pad_token_id (Optional[int]): The ID of the padding token in the vocabulary.
             **kwargs: Additional arguments passed to the parent RBLNModelConfig.
@@ -55,12 +52,5 @@ class RBLNModelForSeq2SeqLMConfig(RBLNModelConfig):
         self.dec_max_seq_len = dec_max_seq_len
         self.use_attention_mask = use_attention_mask
-        npu = self.npu or rebel.get_npu_name()
-        if npu == "RBLN-CA02":
-            if self.use_attention_mask is False:
-                logger.warning("Attention mask should be used with RBLN-CA02. Setting use_attention_mask to True.")
-            self.use_attention_mask = True
-        else:
-            self.use_attention_mask = self.use_attention_mask or False
         self.pad_token_id = pad_token_id

optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py CHANGED Viewed

@@ -38,7 +38,7 @@ if TYPE_CHECKING:
 class RBLNRuntimeEncoder(RBLNPytorchRuntime):
     mandatory_members = ["main_input_name"]
-    def forward(self, *args: List[torch.Tensor], **kwargs: Dict[str, torch.Tensor]):
+    def forward(self, *args: List[torch.Tensor], **kwargs: torch.Tensor):
         output = super().forward(*args, **kwargs)
         return BaseModelOutput(last_hidden_state=output)
@@ -327,12 +327,14 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
                 tensor_type="pt",
                 device=rbln_config.device_map["encoder"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
             rebel.Runtime(
                 compiled_models[1],
                 tensor_type="pt",
                 device=rbln_config.device_map["decoder"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
         ]

optimum/rbln/transformers/models/siglip/__init__.py CHANGED Viewed

@@ -12,9 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .configuration_siglip import (
-    RBLNSiglipVisionModelConfig,
-)
-from .modeling_siglip import (
-    RBLNSiglipVisionModel,
-)
+from .configuration_siglip import RBLNSiglipVisionModelConfig
+from .modeling_siglip import RBLNSiglipVisionModel

optimum/rbln/transformers/models/siglip/modeling_siglip.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Optional, Tuple, Union
 import torch
 from transformers import SiglipVisionConfig, SiglipVisionModel
@@ -126,7 +126,7 @@ class RBLNSiglipVisionModel(RBLNModel):
         output_attentions: bool = None,
         output_hidden_states: bool = None,
         interpolate_pos_encoding: bool = False,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> Union[Tuple, BaseModelOutputWithPooling]:
         if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
             logger.warning(

optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional
+from typing import Any, Optional
 from ....configuration_utils import RBLNModelConfig
@@ -17,7 +17,7 @@ class RBLNTimeSeriesTransformerForPredictionConfig(RBLNModelConfig):
         enc_max_seq_len: Optional[int] = None,
         dec_max_seq_len: Optional[int] = None,
         num_parallel_samples: Optional[int] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         """
         Args:

optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py CHANGED Viewed

@@ -30,11 +30,7 @@ from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple, Union
 import rebel
 import torch
 from rebel.compile_context import CompileContext
-from transformers import (
-    PretrainedConfig,
-    TimeSeriesTransformerForPrediction,
-    TimeSeriesTransformerModel,
-)
+from transformers import PretrainedConfig, TimeSeriesTransformerForPrediction, TimeSeriesTransformerModel
 from transformers.modeling_outputs import ModelOutput, SampleTSPredictionOutput, Seq2SeqTSModelOutput
 from transformers.modeling_utils import no_init_weights
@@ -331,12 +327,14 @@ class RBLNTimeSeriesTransformerForPrediction(RBLNModel):
                 tensor_type="pt",
                 device=rbln_config.device_map["encoder"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
             rebel.Runtime(
                 compiled_models[1],
                 tensor_type="pt",
                 device=rbln_config.device_map["decoder"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
         ]

optimum/rbln/transformers/models/whisper/configuration_whisper.py CHANGED Viewed

@@ -12,9 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict
-import rebel
+from typing import Any
 from ....configuration_utils import RBLNModelConfig
 from ....utils.logging import get_logger
@@ -38,14 +36,13 @@ class RBLNWhisperForConditionalGenerationConfig(RBLNModelConfig):
         use_attention_mask: bool = None,
         enc_max_seq_len: int = None,
         dec_max_seq_len: int = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         """
         Args:
             batch_size (int, optional): The batch size for inference. Defaults to 1.
             token_timestamps (bool, optional): Whether to output token timestamps during generation. Defaults to False.
             use_attention_mask (bool, optional): Whether to use attention masks during inference. This is automatically
-                set to True for RBLN-CA02 devices.
             enc_max_seq_len (int, optional): Maximum sequence length for the encoder.
             dec_max_seq_len (int, optional): Maximum sequence length for the decoder.
             **kwargs: Additional arguments passed to the parent RBLNModelConfig.
@@ -64,10 +61,4 @@ class RBLNWhisperForConditionalGenerationConfig(RBLNModelConfig):
         self.dec_max_seq_len = dec_max_seq_len
         self.use_attention_mask = use_attention_mask
-        npu = self.npu or rebel.get_npu_name()
-        if npu == "RBLN-CA02":
-            if self.use_attention_mask is False:
-                logger.warning("Attention mask should be used with RBLN-CA02. Setting use_attention_mask to True.")
-            self.use_attention_mask = True
-        else:
-            self.use_attention_mask = self.use_attention_mask or False
+        self.use_attention_mask = self.use_attention_mask or False

optimum/rbln/transformers/models/whisper/modeling_whisper.py CHANGED Viewed

@@ -46,7 +46,7 @@ if TYPE_CHECKING:
 class RBLNRuntimeEncoder(RBLNPytorchRuntime):
     mandatory_members = ["main_input_name"]
-    def forward(self, *args: List[torch.Tensor], **kwargs: Dict[str, torch.Tensor]):
+    def forward(self, *args: List[torch.Tensor], **kwargs: torch.Tensor):
         output = super().forward(*args, **kwargs)
         return BaseModelOutput(last_hidden_state=output)
@@ -73,6 +73,7 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
         decoder_input_ids: torch.Tensor = None,
         decoder_attention_mask: torch.Tensor = None,
         cache_position: torch.Tensor = None,
+        block_tables: torch.Tensor = None,
     ):
         inputs_bsz = decoder_input_ids.shape[0]
         padded_bsz = self.batch_size - inputs_bsz
@@ -89,11 +90,14 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
                     )
                 decoder_attention_mask[b_idx, : decoding_step + 1] = 1
+        if block_tables is None:
+            block_tables = self.default_block_tables
         outputs = super().forward(
             decoder_input_ids,
             decoder_attention_mask if self.use_attention_mask else None,
             cache_position,
-            block_tables=self.default_block_tables,
+            block_tables=block_tables,
         )
         if isinstance(outputs, torch.Tensor):
@@ -345,12 +349,14 @@ class RBLNWhisperForConditionalGeneration(RBLNModel, RBLNWhisperGenerationMixin)
                 tensor_type="pt",
                 device=rbln_config.device_map["encoder"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
             rebel.Runtime(
                 compiled_models[1],
                 tensor_type="pt",
                 device=rbln_config.device_map["decoder"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
         ]

optimum/rbln/transformers/models/xlm_roberta/__init__.py CHANGED Viewed

@@ -12,14 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .configuration_xlm_roberta import (
-    RBLNXLMRobertaForSequenceClassificationConfig,
-    RBLNXLMRobertaModelConfig,
-)
-from .modeling_xlm_roberta import (
-    RBLNXLMRobertaForSequenceClassification,
-    RBLNXLMRobertaModel,
-)
+from .configuration_xlm_roberta import RBLNXLMRobertaForSequenceClassificationConfig, RBLNXLMRobertaModelConfig
+from .modeling_xlm_roberta import RBLNXLMRobertaForSequenceClassification, RBLNXLMRobertaModel
 __all__ = [

optimum/rbln/utils/depreacate_utils.py ADDED Viewed

@@ -0,0 +1,16 @@
+from typing import Optional
+import rebel
+from .logging import get_logger
+logger = get_logger(__name__)
+def warn_deprecated_npu(npu: Optional[str] = None):
+    npu = npu or rebel.get_npu_name()
+    if npu == "RBLN-CA02":
+        logger.warning_once(
+            "Support for the RBLN-CA02 device is provided only up to optimum-rbln v0.8.0 and has reached end of life.",
+        )

optimum/rbln/utils/hub.py CHANGED Viewed

@@ -12,59 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
 from pathlib import Path
 from typing import List, Optional, Union
-from huggingface_hub import HfApi, HfFolder, hf_hub_download
-class PushToHubMixin:
-    def push_to_hub(
-        self,
-        save_directory: str,
-        repository_id: str,
-        private: Optional[bool] = None,
-        use_auth_token: Union[bool, str] = True,
-    ) -> str:
-        huggingface_token = _get_huggingface_token(use_auth_token)
-        api = HfApi()
-        api.create_repo(
-            token=huggingface_token,
-            repo_id=repository_id,
-            exist_ok=True,
-            private=private,
-        )
-        for path, subdirs, files in os.walk(save_directory):
-            for name in files:
-                local_file_path = os.path.join(path, name)
-                _, hub_file_path = os.path.split(local_file_path)
-                # FIXME: when huggingface_hub fixes the return of upload_file
-                try:
-                    api.upload_file(
-                        token=huggingface_token,
-                        repo_id=f"{repository_id}",
-                        path_or_fileobj=os.path.join(os.getcwd(), local_file_path),
-                        path_in_repo=hub_file_path,
-                    )
-                except KeyError:
-                    pass
-                except NameError:
-                    pass
+from huggingface_hub import HfApi, get_token, hf_hub_download
 def pull_compiled_model_from_hub(
     model_id: Union[str, Path],
     subfolder: str,
-    use_auth_token: Optional[Union[bool, str]],
+    token: Union[bool, str],
     revision: Optional[str],
     cache_dir: Optional[str],
     force_download: bool,
     local_files_only: bool,
 ) -> Path:
     """Pull model files from the HuggingFace Hub."""
-    huggingface_token = _get_huggingface_token(use_auth_token)
+    huggingface_token = _get_huggingface_token(token)
     repo_files = list(
         map(
             Path,
@@ -87,7 +51,7 @@ def pull_compiled_model_from_hub(
             repo_id=model_id,
             filename=filename,
             subfolder=subfolder,
-            use_auth_token=use_auth_token,
+            token=token,
             revision=revision,
             cache_dir=cache_dir,
             force_download=force_download,
@@ -113,10 +77,7 @@ def validate_files(
         raise FileExistsError(f"Multiple rbln_config.json files found in {location}. This is not expected.")
-def _get_huggingface_token(use_auth_token: Union[bool, str]) -> str:
-    if isinstance(use_auth_token, str):
-        return use_auth_token
-    elif use_auth_token:
-        return HfFolder.get_token()
-    else:
-        raise ValueError("`use_auth_token` must be provided to interact with the HuggingFace Hub.")
+def _get_huggingface_token(token: Union[bool, str]) -> str:
+    if isinstance(token, str):
+        return token
+    return get_token()

optimum/rbln/utils/runtime_utils.py CHANGED Viewed

@@ -12,13 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import re
 import threading
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, List, Optional, Union
 import rebel
 import torch
+def normalize_npu(npu: str) -> str:
+    """Normalize the NPU string by removing the form factor."""
+    match = re.match(r"(RBLN-CA|RBLN-CR)(\d+)", npu)
+    if match:
+        prefix, num = match.groups()
+        if len(num) == 1:
+            # Convert "RBLN-CAx" → "RBLN-CA0"
+            # (e.g., "RBLN-CA2" -> "RBLN-CA0")
+            npu = f"{prefix}0"
+        elif len(num) == 2:
+            # Strip form factor (e.g., "RBLN-CA15" → "RBLN-CA1")
+            npu = f"{prefix}{num[:-1]}"
+    return npu
 def tp_and_devices_are_ok(
     tensor_parallel_size: Optional[int] = None,
     device: Optional[Union[int, List[int]]] = None,
@@ -58,7 +74,7 @@ def tp_and_devices_are_ok(
     if npu is not None:
         for device_id in device:
             npu_name = rebel.get_npu_name(device_id)
-            if npu_name != npu:
+            if normalize_npu(npu_name) != normalize_npu(npu):
                 return f"Device {device_id} ({npu_name}) is not on the same NPU as {npu}."
     return None
@@ -78,7 +94,7 @@ class RBLNPytorchRuntime:
     def __call__(self, *args: Any, **kwds: Any) -> Any:
         return self.forward(*args, **kwds)
-    def forward(self, *args: List["torch.Tensor"], **kwargs: Dict[str, "torch.Tensor"]):
+    def forward(self, *args: List["torch.Tensor"], **kwargs: "torch.Tensor"):
         # filtering useless args or kwarg such as None.
         args = list(filter(lambda arg: isinstance(arg, torch.Tensor), args))
         kwargs = dict(filter(lambda kwarg: isinstance(kwarg[1], torch.Tensor) or kwarg[0] == "out", kwargs.items()))
@@ -126,7 +142,7 @@ class UnavailableRuntime:
         """Returns an iterator with self as the only item."""
         return iter([self])
-    def forward(self, *args: List["torch.Tensor"], **kwargs: Dict[str, "torch.Tensor"]):
+    def forward(self, *args: List["torch.Tensor"], **kwargs: "torch.Tensor"):
         """Raises a detailed RuntimeError explaining why inference cannot be performed."""
         raise RuntimeError(
             "Cannot perform inference: RBLN runtime is not available.\n\n"
@@ -147,13 +163,20 @@ class ContextRblnConfig:
     _local = threading.local()
     def __init__(
-        self, device=None, device_map=None, create_runtimes=None, optimize_host_mem=None, activate_profiler=None
+        self,
+        device=None,
+        device_map=None,
+        create_runtimes=None,
+        optimize_host_mem=None,
+        activate_profiler=None,
+        timeout=None,
     ):
         self.device = device
         self.device_map = device_map
         self.create_runtimes = create_runtimes
         self.optimize_host_mem = optimize_host_mem
         self.activate_profiler = activate_profiler
+        self.timeout = timeout
     def __enter__(self):
         self._local.device = self.device
@@ -161,6 +184,7 @@ class ContextRblnConfig:
         self._local.create_runtimes = self.create_runtimes
         self._local.optimize_host_memory = self.optimize_host_mem
         self._local.activate_profiler = self.activate_profiler
+        self._local.timeout = self.timeout
         return self
     def __exit__(self, exc_type, exc_val, exc_tb):
@@ -169,6 +193,7 @@ class ContextRblnConfig:
         self._local.create_runtimes = None
         self._local.optimize_host_memory = None
         self._local.activate_profiler = None
+        self._local.timeout = None
     @classmethod
     def get_current_context(cls):
@@ -178,4 +203,5 @@ class ContextRblnConfig:
             "create_runtimes": getattr(cls._local, "create_runtimes", None),
             "optimize_host_memory": getattr(cls._local, "optimize_host_memory", None),
             "activate_profiler": getattr(cls._local, "activate_profiler", None),
+            "timeout": getattr(cls._local, "timeout", None),
         }

{optimum_rbln-0.8.1rc0.dist-info → optimum_rbln-0.8.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: optimum-rbln
-Version: 0.8.1rc0
+Version: 0.8.2
 Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
 Project-URL: Homepage, https://rebellions.ai
 Project-URL: Documentation, https://docs.rbln.ai

optimum-rbln 0.8.1rc0__py3-none-any.whl → 0.8.2__py3-none-any.whl

Potentially problematic release.

optimum-rbln 0.8.1rc0py3-none-any.whl → 0.8.2py3-none-any.whl