PyPI - optimum-rbln - Versions diffs - 0.8.2a1__py3-none-any.whl → 0.8.2a2__py3-none-any.whl - Mend

optimum-rbln 0.8.2a1py3-none-any.whl → 0.8.2a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of optimum-rbln might be problematic. Click here for more details.

Files changed (28) hide show

optimum/rbln/__init__.py CHANGED Viewed

@@ -110,6 +110,10 @@ _import_structure = {
         "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
         "RBLNQwen2_5_VLForConditionalGeneration",
         "RBLNQwen2_5_VLForConditionalGenerationConfig",
+        "RBLNQwen3ForCausalLM",
+        "RBLNQwen3ForCausalLMConfig",
+        "RBLNQwen3Model",
+        "RBLNQwen3ModelConfig",
         "RBLNResNetForImageClassification",
         "RBLNResNetForImageClassificationConfig",
         "RBLNRobertaForMaskedLM",
@@ -357,6 +361,10 @@ if TYPE_CHECKING:
         RBLNQwen2_5_VLForConditionalGenerationConfig,
         RBLNQwen2ForCausalLM,
         RBLNQwen2ForCausalLMConfig,
+        RBLNQwen3ForCausalLM,
+        RBLNQwen3ForCausalLMConfig,
+        RBLNQwen3Model,
+        RBLNQwen3ModelConfig,
         RBLNResNetForImageClassification,
         RBLNResNetForImageClassificationConfig,
         RBLNRobertaForMaskedLM,

optimum/rbln/__version__.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.8.2a1'
-__version_tuple__ = version_tuple = (0, 8, 2, 'a1')
+__version__ = version = '0.8.2a2'
+__version_tuple__ = version_tuple = (0, 8, 2, 'a2')

optimum/rbln/configuration_utils.py CHANGED Viewed

@@ -147,7 +147,7 @@ class RBLNCompileConfig:
         return asdict(self)
-RUNTIME_KEYWORDS = ["create_runtimes", "optimize_host_memory", "device", "device_map", "activate_profiler"]
+RUNTIME_KEYWORDS = ["create_runtimes", "optimize_host_memory", "device", "device_map", "activate_profiler", "timeout"]
 CONFIG_MAPPING: Dict[str, Type["RBLNModelConfig"]] = {}
@@ -481,6 +481,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
         "device",
         "device_map",
         "activate_profiler",
+        "timeout",
     ]
     submodules: List[str] = []
     subclass_non_save_attributes = []
@@ -561,6 +562,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
         activate_profiler: Optional[bool] = None,
         npu: Optional[str] = None,
         tensor_parallel_size: Optional[int] = None,
+        timeout: Optional[int] = None,
         optimum_rbln_version: Optional[str] = None,
         _compile_cfgs: List[RBLNCompileConfig] = [],
         **kwargs: Dict[str, Any],
@@ -577,6 +579,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
             activate_profiler (Optional[bool]): Whether to activate the profiler for performance analysis.
             npu (Optional[str]): The NPU device name to use for compilation.
             tensor_parallel_size (Optional[int]): Size for tensor parallelism to distribute the model across devices.
+            timeout (Optional[int]): The timeout for the runtime in seconds. If it isn't provided, it will be set to 60 by default.
             optimum_rbln_version (Optional[str]): The optimum-rbln version used for this configuration.
             _compile_cfgs (List[RBLNCompileConfig]): List of compilation configurations for the model.
             **kwargs: Additional keyword arguments.
@@ -599,6 +602,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
         self._runtime_options["device"] = device
         self._runtime_options["device_map"] = device_map
         self._runtime_options["activate_profiler"] = activate_profiler
+        self._runtime_options["timeout"] = timeout
         # Automatically pass npu, tensor_parallel_size to compile_cfgs
         self.npu = npu
@@ -838,3 +842,14 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
     @activate_profiler.setter
     def activate_profiler(self, activate_profiler: bool):
         self._runtime_options["activate_profiler"] = activate_profiler
+    @property
+    def timeout(self):
+        context = ContextRblnConfig.get_current_context()["timeout"]
+        if context is not None:
+            return context
+        return self._runtime_options["timeout"]
+    @timeout.setter
+    def timeout(self, timeout: int):
+        self._runtime_options["timeout"] = timeout

optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py CHANGED Viewed

@@ -52,6 +52,9 @@ class RBLNCosmosTransformer3DModelConfig(RBLNModelConfig):
         Raises:
             ValueError: If batch_size is not a positive integer.
         """
+        if kwargs.get("timeout") is None:
+            kwargs["timeout"] = 80
         super().__init__(**kwargs)
         self.batch_size = batch_size or 1
         self.num_frames = num_frames or 121

optimum/rbln/diffusers/modeling_diffusers.py CHANGED Viewed

@@ -230,6 +230,7 @@ class RBLNDiffusionMixin:
             create_runtimes=rbln_config.create_runtimes,
             optimize_host_mem=rbln_config.optimize_host_memory,
             activate_profiler=rbln_config.activate_profiler,
+            timeout=rbln_config.timeout,
         ):
             model = super().from_pretrained(pretrained_model_name_or_path=model_id, **kwargs)

optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py CHANGED Viewed

@@ -209,6 +209,7 @@ class RBLNAutoencoderKL(RBLNModel):
                 tensor_type="pt",
                 device=device_val,
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             )
             for compiled_model, device_val in zip(compiled_models, device_vals)
         ]

optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py CHANGED Viewed

@@ -200,6 +200,7 @@ class RBLNAutoencoderKLCosmos(RBLNModel):
                 tensor_type="pt",
                 device=device_val,
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             )
             for compiled_model, device_val in zip(compiled_models, device_vals)
         ]

optimum/rbln/diffusers/models/autoencoders/vq_model.py CHANGED Viewed

@@ -165,6 +165,7 @@ class RBLNVQModel(RBLNModel):
                 tensor_type="pt",
                 device=device_val,
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             )
             for compiled_model, device_val in zip(compiled_models, device_vals)
         ]

optimum/rbln/diffusers/models/transformers/transformer_cosmos.py CHANGED Viewed

@@ -279,7 +279,7 @@ class RBLNCosmosTransformer3DModel(RBLNModel):
                 tensor_type="pt",
                 device=rbln_config.device_map[DEFAULT_COMPILED_MODEL_NAME],
                 activate_profiler=rbln_config.activate_profiler,
-                timeout=120,
+                timeout=rbln_config.timeout,
             )
             for compiled_model in compiled_models
         ]

optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py CHANGED Viewed

@@ -15,7 +15,7 @@
 from typing import Any, Dict, Optional, Tuple
 from ....configuration_utils import RBLNAutoConfig, RBLNModelConfig
-from ....transformers import RBLNSiglipVisionModelConfig
+from ....transformers import RBLNLlamaForCausalLMConfig, RBLNSiglipVisionModelConfig
 class RBLNVideoSafetyModelConfig(RBLNModelConfig):
@@ -75,7 +75,15 @@ class RBLNCosmosSafetyCheckerConfig(RBLNModelConfig):
         if height is not None and width is not None:
             image_size = (height, width)
-        self.aegis = self.init_submodule_config(RBLNModelConfig, aegis)
+        tensor_parallel_size = kwargs.get("tensor_parallel_size")
+        self.aegis = self.init_submodule_config(
+            RBLNLlamaForCausalLMConfig,
+            aegis,
+            batch_size=batch_size,
+            tensor_parallel_size=tensor_parallel_size,
+        )
         self.siglip_encoder = self.init_submodule_config(
             RBLNSiglipVisionModelConfig,
             siglip_encoder,

optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py CHANGED Viewed

@@ -127,25 +127,13 @@ class RBLNSigLIPEncoder(SigLIPEncoder):
             # We don't use RBLNSiglipModel, but we need to override get_image_features to return pooler_output
             self.model = RBLNSiglipVisionModel.from_pretrained(
-                self.checkpoint_dir,
-                rbln_device=rbln_config.siglip_encoder.device,
-                rbln_create_runtimes=rbln_config.siglip_encoder.create_runtimes,
-                rbln_activate_profiler=rbln_config.siglip_encoder.activate_profiler,
-                rbln_optimize_host_memory=rbln_config.siglip_encoder.optimize_host_memory,
+                self.checkpoint_dir, rbln_config=rbln_config.siglip_encoder
             )
         else:
             super().__init__(model_name, checkpoint_id)
             model = self.model
             del self.model
-            self.model = RBLNSiglipVisionModel.from_model(
-                model,
-                rbln_device=rbln_config.siglip_encoder.device,
-                rbln_image_size=rbln_config.siglip_encoder.image_size,
-                rbln_npu=rbln_config.siglip_encoder.npu,
-                rbln_create_runtimes=rbln_config.siglip_encoder.create_runtimes,
-                rbln_activate_profiler=rbln_config.siglip_encoder.activate_profiler,
-                rbln_optimize_host_memory=rbln_config.siglip_encoder.optimize_host_memory,
-            )
+            self.model = RBLNSiglipVisionModel.from_model(model, rbln_config=rbln_config.siglip_encoder)
         self.rbln_config = rbln_config
         # Override get_image_features to return pooler_output
@@ -336,28 +324,14 @@ class RBLNAegis(Aegis):
             torch.nn.Module.__init__(self)
             cache_dir = pathlib.Path(checkpoint_id) / "aegis"
             self.tokenizer = AutoTokenizer.from_pretrained(cache_dir)
-            self.model = RBLNAutoModelForCausalLM.from_pretrained(
-                cache_dir,
-                rbln_device=rbln_config.aegis.device,
-                rbln_create_runtimes=rbln_config.aegis.create_runtimes,
-                rbln_activate_profiler=rbln_config.aegis.activate_profiler,
-                rbln_optimize_host_memory=rbln_config.aegis.optimize_host_memory,
-            )
+            self.model = RBLNAutoModelForCausalLM.from_pretrained(cache_dir, rbln_config=rbln_config.aegis)
         else:
             super().__init__(checkpoint_id, base_model_id, aegis_adapter)
             model = self.model.merge_and_unload()  # peft merge
             del self.model
-            self.model = RBLNAutoModelForCausalLM.from_model(
-                model,
-                rbln_tensor_parallel_size=4,
-                rbln_device=rbln_config.aegis.device,
-                rbln_create_runtimes=rbln_config.aegis.create_runtimes,
-                rbln_npu=rbln_config.aegis.npu,
-                rbln_activate_profiler=rbln_config.aegis.activate_profiler,
-                rbln_optimize_host_memory=rbln_config.aegis.optimize_host_memory,
-            )
+            self.model = RBLNAutoModelForCausalLM.from_model(model, rbln_config=rbln_config.aegis)
         self.rbln_config = rbln_config
         self.dtype = torch.bfloat16

optimum/rbln/modeling.py CHANGED Viewed

@@ -238,6 +238,7 @@ class RBLNModel(RBLNBaseModel):
                 tensor_type="pt",
                 device=rbln_config.device_map[DEFAULT_COMPILED_MODEL_NAME],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             )
             for compiled_model in compiled_models
         ]

optimum/rbln/transformers/__init__.py CHANGED Viewed

@@ -98,6 +98,10 @@ _import_structure = {
         "RBLNQwen2_5_VLForConditionalGenerationConfig",
         "RBLNQwen2ForCausalLM",
         "RBLNQwen2ForCausalLMConfig",
+        "RBLNQwen3ForCausalLM",
+        "RBLNQwen3ForCausalLMConfig",
+        "RBLNQwen3Model",
+        "RBLNQwen3ModelConfig",
         "RBLNResNetForImageClassification",
         "RBLNResNetForImageClassificationConfig",
         "RBLNRobertaForMaskedLM",
@@ -204,6 +208,10 @@ if TYPE_CHECKING:
         RBLNQwen2_5_VLForConditionalGenerationConfig,
         RBLNQwen2ForCausalLM,
         RBLNQwen2ForCausalLMConfig,
+        RBLNQwen3ForCausalLM,
+        RBLNQwen3ForCausalLMConfig,
+        RBLNQwen3Model,
+        RBLNQwen3ModelConfig,
         RBLNResNetForImageClassification,
         RBLNResNetForImageClassificationConfig,
         RBLNRobertaForMaskedLM,

optimum/rbln/transformers/models/__init__.py CHANGED Viewed

@@ -113,6 +113,7 @@ _import_structure = {
     "mistral": ["RBLNMistralForCausalLM", "RBLNMistralForCausalLMConfig"],
     "phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig"],
     "qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig"],
+    "qwen3": ["RBLNQwen3ForCausalLM", "RBLNQwen3ForCausalLMConfig", "RBLNQwen3Model", "RBLNQwen3ModelConfig"],
     "resnet": ["RBLNResNetForImageClassification", "RBLNResNetForImageClassificationConfig"],
     "roberta": [
         "RBLNRobertaForMaskedLM",
@@ -241,6 +242,7 @@ if TYPE_CHECKING:
         RBLNQwen2_5_VLForConditionalGeneration,
         RBLNQwen2_5_VLForConditionalGenerationConfig,
     )
+    from .qwen3 import RBLNQwen3ForCausalLM, RBLNQwen3ForCausalLMConfig, RBLNQwen3Model, RBLNQwen3ModelConfig
     from .resnet import RBLNResNetForImageClassification, RBLNResNetForImageClassificationConfig
     from .roberta import (
         RBLNRobertaForMaskedLM,

optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py CHANGED Viewed

@@ -1085,6 +1085,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
                 tensor_type="pt",
                 device=rbln_config.device_map["prefill"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
             *[
                 rebel.Runtime(
@@ -1092,6 +1093,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
                     tensor_type="pt",
                     device=rbln_config.device_map[f"decoder_batch_{batch_size}"],
                     activate_profiler=rbln_config.activate_profiler,
+                    timeout=rbln_config.timeout,
                 )
                 for i, batch_size in enumerate(rbln_config.decoder_batch_sizes)
             ],
@@ -1190,6 +1192,11 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
         if cache_position is None:
             logits = []
             inputs = inputs_embeds if inputs_embeds is not None else input_ids
+            # for only use forward
+            if generate_idx is None:
+                generate_idx = attention_mask.sum(dim=-1, keepdim=True).int()
+            if padded_cache_lengths is None:
+                padded_cache_lengths = torch.zeros_like(generate_idx)
             batch_size = inputs.shape[0]
             for b_idx in range(batch_size):
                 cache_position = torch.arange(0, generate_idx[b_idx].item(), dtype=torch.int32).unsqueeze(0)

optimum/rbln/transformers/models/gemma3/modeling_gemma3.py CHANGED Viewed

@@ -884,12 +884,14 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
                 tensor_type="pt",
                 device=rbln_config.device_map["prefill"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
             rebel.Runtime(
                 compiled_models[1],
                 tensor_type="pt",
                 device=rbln_config.device_map["image_prefill"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
             *[
                 rebel.Runtime(
@@ -897,6 +899,7 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
                     tensor_type="pt",
                     device=rbln_config.device_map[f"decoder_batch_{batch_size}"],
                     activate_profiler=rbln_config.activate_profiler,
+                    timeout=rbln_config.timeout,
                 )
                 for i, batch_size in enumerate(rbln_config.decoder_batch_sizes)
             ],

optimum/rbln/transformers/models/qwen3/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .configuration_qwen3 import RBLNQwen3ForCausalLMConfig, RBLNQwen3ModelConfig
+from .modeling_qwen3 import RBLNQwen3ForCausalLM, RBLNQwen3Model

optimum/rbln/transformers/models/qwen3/configuration_qwen3.py ADDED Viewed

@@ -0,0 +1,71 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausalLMConfig
+class RBLNQwen3ForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
+    """
+    Configuration class for RBLN Qwen3 models.
+    This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
+    Example usage:
+    ```python
+    from optimum.rbln import RBLNQwen3ForCausalLM, RBLNQwen3ForCausalLMConfig
+    # Create a configuration object
+    config = RBLNQwen3ForCausalLMConfig(
+        batch_size=1,
+        max_seq_len=40960,
+        tensor_parallel_size=4,
+        kvcache_partition_len=16384
+    )
+    # Use the configuration with from_pretrained
+    model = RBLNQwen3ForCausalLM.from_pretrained(
+        "Qwen/Qwen3-4B",
+        export=True,
+        rbln_config=config
+    )
+    ```
+    """
+class RBLNQwen3ModelConfig(RBLNDecoderOnlyModelForCausalLMConfig):
+    """
+    Configuration class for RBLN Qwen3 models.
+    This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
+    Example usage:
+    ```python
+    from optimum.rbln import RBLNQwen3Model, RBLNQwen3ModelConfig
+    # Create a configuration object
+    config = RBLNQwen3ModelConfig(
+        batch_size=1,
+        max_seq_len=40960,
+        tensor_parallel_size=4,
+        kvcache_partition_len=16384
+    )
+    # Use the configuration with from_pretrained
+    model = RBLNQwen3Model.from_pretrained(
+        "Qwen/Qwen3-Embedding-4B",
+        export=True,
+        rbln_config=config
+    )
+    ```
+    """

optimum-rbln 0.8.2a1__py3-none-any.whl → 0.8.2a2__py3-none-any.whl

Potentially problematic release.

optimum-rbln 0.8.2a1py3-none-any.whl → 0.8.2a2py3-none-any.whl