PyPI - optimum-rbln - Versions diffs - 0.8.2a1__tar.gz → 0.8.2a2__tar.gz - Mend

optimum-rbln 0.8.2a1tar.gz → 0.8.2a2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of optimum-rbln might be problematic. Click here for more details.

Files changed (286) hide show

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: optimum-rbln
-Version: 0.8.2a1
+Version: 0.8.2a2
 Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
 Project-URL: Homepage, https://rebellions.ai
 Project-URL: Documentation, https://docs.rbln.ai

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/__init__.py RENAMED Viewed

@@ -110,6 +110,10 @@ _import_structure = {
         "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
         "RBLNQwen2_5_VLForConditionalGeneration",
         "RBLNQwen2_5_VLForConditionalGenerationConfig",
+        "RBLNQwen3ForCausalLM",
+        "RBLNQwen3ForCausalLMConfig",
+        "RBLNQwen3Model",
+        "RBLNQwen3ModelConfig",
         "RBLNResNetForImageClassification",
         "RBLNResNetForImageClassificationConfig",
         "RBLNRobertaForMaskedLM",
@@ -357,6 +361,10 @@ if TYPE_CHECKING:
         RBLNQwen2_5_VLForConditionalGenerationConfig,
         RBLNQwen2ForCausalLM,
         RBLNQwen2ForCausalLMConfig,
+        RBLNQwen3ForCausalLM,
+        RBLNQwen3ForCausalLMConfig,
+        RBLNQwen3Model,
+        RBLNQwen3ModelConfig,
         RBLNResNetForImageClassification,
         RBLNResNetForImageClassificationConfig,
         RBLNRobertaForMaskedLM,

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/__version__.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.8.2a1'
-__version_tuple__ = version_tuple = (0, 8, 2, 'a1')
+__version__ = version = '0.8.2a2'
+__version_tuple__ = version_tuple = (0, 8, 2, 'a2')

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/configuration_utils.py RENAMED Viewed

@@ -147,7 +147,7 @@ class RBLNCompileConfig:
         return asdict(self)
-RUNTIME_KEYWORDS = ["create_runtimes", "optimize_host_memory", "device", "device_map", "activate_profiler"]
+RUNTIME_KEYWORDS = ["create_runtimes", "optimize_host_memory", "device", "device_map", "activate_profiler", "timeout"]
 CONFIG_MAPPING: Dict[str, Type["RBLNModelConfig"]] = {}
@@ -481,6 +481,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
         "device",
         "device_map",
         "activate_profiler",
+        "timeout",
     ]
     submodules: List[str] = []
     subclass_non_save_attributes = []
@@ -561,6 +562,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
         activate_profiler: Optional[bool] = None,
         npu: Optional[str] = None,
         tensor_parallel_size: Optional[int] = None,
+        timeout: Optional[int] = None,
         optimum_rbln_version: Optional[str] = None,
         _compile_cfgs: List[RBLNCompileConfig] = [],
         **kwargs: Dict[str, Any],
@@ -577,6 +579,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
             activate_profiler (Optional[bool]): Whether to activate the profiler for performance analysis.
             npu (Optional[str]): The NPU device name to use for compilation.
             tensor_parallel_size (Optional[int]): Size for tensor parallelism to distribute the model across devices.
+            timeout (Optional[int]): The timeout for the runtime in seconds. If it isn't provided, it will be set to 60 by default.
             optimum_rbln_version (Optional[str]): The optimum-rbln version used for this configuration.
             _compile_cfgs (List[RBLNCompileConfig]): List of compilation configurations for the model.
             **kwargs: Additional keyword arguments.
@@ -599,6 +602,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
         self._runtime_options["device"] = device
         self._runtime_options["device_map"] = device_map
         self._runtime_options["activate_profiler"] = activate_profiler
+        self._runtime_options["timeout"] = timeout
         # Automatically pass npu, tensor_parallel_size to compile_cfgs
         self.npu = npu
@@ -838,3 +842,14 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
     @activate_profiler.setter
     def activate_profiler(self, activate_profiler: bool):
         self._runtime_options["activate_profiler"] = activate_profiler
+    @property
+    def timeout(self):
+        context = ContextRblnConfig.get_current_context()["timeout"]
+        if context is not None:
+            return context
+        return self._runtime_options["timeout"]
+    @timeout.setter
+    def timeout(self, timeout: int):
+        self._runtime_options["timeout"] = timeout

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py RENAMED Viewed

@@ -52,6 +52,9 @@ class RBLNCosmosTransformer3DModelConfig(RBLNModelConfig):
         Raises:
             ValueError: If batch_size is not a positive integer.
         """
+        if kwargs.get("timeout") is None:
+            kwargs["timeout"] = 80
         super().__init__(**kwargs)
         self.batch_size = batch_size or 1
         self.num_frames = num_frames or 121

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/diffusers/modeling_diffusers.py RENAMED Viewed

@@ -230,6 +230,7 @@ class RBLNDiffusionMixin:
             create_runtimes=rbln_config.create_runtimes,
             optimize_host_mem=rbln_config.optimize_host_memory,
             activate_profiler=rbln_config.activate_profiler,
+            timeout=rbln_config.timeout,
         ):
             model = super().from_pretrained(pretrained_model_name_or_path=model_id, **kwargs)

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py RENAMED Viewed

@@ -209,6 +209,7 @@ class RBLNAutoencoderKL(RBLNModel):
                 tensor_type="pt",
                 device=device_val,
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             )
             for compiled_model, device_val in zip(compiled_models, device_vals)
         ]

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py RENAMED Viewed

@@ -200,6 +200,7 @@ class RBLNAutoencoderKLCosmos(RBLNModel):
                 tensor_type="pt",
                 device=device_val,
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             )
             for compiled_model, device_val in zip(compiled_models, device_vals)
         ]

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/diffusers/models/autoencoders/vq_model.py RENAMED Viewed

@@ -165,6 +165,7 @@ class RBLNVQModel(RBLNModel):
                 tensor_type="pt",
                 device=device_val,
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             )
             for compiled_model, device_val in zip(compiled_models, device_vals)
         ]

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/diffusers/models/transformers/transformer_cosmos.py RENAMED Viewed

@@ -279,7 +279,7 @@ class RBLNCosmosTransformer3DModel(RBLNModel):
                 tensor_type="pt",
                 device=rbln_config.device_map[DEFAULT_COMPILED_MODEL_NAME],
                 activate_profiler=rbln_config.activate_profiler,
-                timeout=120,
+                timeout=rbln_config.timeout,
             )
             for compiled_model in compiled_models
         ]

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py RENAMED Viewed

@@ -15,7 +15,7 @@
 from typing import Any, Dict, Optional, Tuple
 from ....configuration_utils import RBLNAutoConfig, RBLNModelConfig
-from ....transformers import RBLNSiglipVisionModelConfig
+from ....transformers import RBLNLlamaForCausalLMConfig, RBLNSiglipVisionModelConfig
 class RBLNVideoSafetyModelConfig(RBLNModelConfig):
@@ -75,7 +75,15 @@ class RBLNCosmosSafetyCheckerConfig(RBLNModelConfig):
         if height is not None and width is not None:
             image_size = (height, width)
-        self.aegis = self.init_submodule_config(RBLNModelConfig, aegis)
+        tensor_parallel_size = kwargs.get("tensor_parallel_size")
+        self.aegis = self.init_submodule_config(
+            RBLNLlamaForCausalLMConfig,
+            aegis,
+            batch_size=batch_size,
+            tensor_parallel_size=tensor_parallel_size,
+        )
         self.siglip_encoder = self.init_submodule_config(
             RBLNSiglipVisionModelConfig,
             siglip_encoder,

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py RENAMED Viewed

@@ -127,25 +127,13 @@ class RBLNSigLIPEncoder(SigLIPEncoder):
             # We don't use RBLNSiglipModel, but we need to override get_image_features to return pooler_output
             self.model = RBLNSiglipVisionModel.from_pretrained(
-                self.checkpoint_dir,
-                rbln_device=rbln_config.siglip_encoder.device,
-                rbln_create_runtimes=rbln_config.siglip_encoder.create_runtimes,
-                rbln_activate_profiler=rbln_config.siglip_encoder.activate_profiler,
-                rbln_optimize_host_memory=rbln_config.siglip_encoder.optimize_host_memory,
+                self.checkpoint_dir, rbln_config=rbln_config.siglip_encoder
             )
         else:
             super().__init__(model_name, checkpoint_id)
             model = self.model
             del self.model
-            self.model = RBLNSiglipVisionModel.from_model(
-                model,
-                rbln_device=rbln_config.siglip_encoder.device,
-                rbln_image_size=rbln_config.siglip_encoder.image_size,
-                rbln_npu=rbln_config.siglip_encoder.npu,
-                rbln_create_runtimes=rbln_config.siglip_encoder.create_runtimes,
-                rbln_activate_profiler=rbln_config.siglip_encoder.activate_profiler,
-                rbln_optimize_host_memory=rbln_config.siglip_encoder.optimize_host_memory,
-            )
+            self.model = RBLNSiglipVisionModel.from_model(model, rbln_config=rbln_config.siglip_encoder)
         self.rbln_config = rbln_config
         # Override get_image_features to return pooler_output
@@ -336,28 +324,14 @@ class RBLNAegis(Aegis):
             torch.nn.Module.__init__(self)
             cache_dir = pathlib.Path(checkpoint_id) / "aegis"
             self.tokenizer = AutoTokenizer.from_pretrained(cache_dir)
-            self.model = RBLNAutoModelForCausalLM.from_pretrained(
-                cache_dir,
-                rbln_device=rbln_config.aegis.device,
-                rbln_create_runtimes=rbln_config.aegis.create_runtimes,
-                rbln_activate_profiler=rbln_config.aegis.activate_profiler,
-                rbln_optimize_host_memory=rbln_config.aegis.optimize_host_memory,
-            )
+            self.model = RBLNAutoModelForCausalLM.from_pretrained(cache_dir, rbln_config=rbln_config.aegis)
         else:
             super().__init__(checkpoint_id, base_model_id, aegis_adapter)
             model = self.model.merge_and_unload()  # peft merge
             del self.model
-            self.model = RBLNAutoModelForCausalLM.from_model(
-                model,
-                rbln_tensor_parallel_size=4,
-                rbln_device=rbln_config.aegis.device,
-                rbln_create_runtimes=rbln_config.aegis.create_runtimes,
-                rbln_npu=rbln_config.aegis.npu,
-                rbln_activate_profiler=rbln_config.aegis.activate_profiler,
-                rbln_optimize_host_memory=rbln_config.aegis.optimize_host_memory,
-            )
+            self.model = RBLNAutoModelForCausalLM.from_model(model, rbln_config=rbln_config.aegis)
         self.rbln_config = rbln_config
         self.dtype = torch.bfloat16

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/modeling.py RENAMED Viewed

@@ -238,6 +238,7 @@ class RBLNModel(RBLNBaseModel):
                 tensor_type="pt",
                 device=rbln_config.device_map[DEFAULT_COMPILED_MODEL_NAME],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             )
             for compiled_model in compiled_models
         ]

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/transformers/__init__.py RENAMED Viewed

@@ -98,6 +98,10 @@ _import_structure = {
         "RBLNQwen2_5_VLForConditionalGenerationConfig",
         "RBLNQwen2ForCausalLM",
         "RBLNQwen2ForCausalLMConfig",
+        "RBLNQwen3ForCausalLM",
+        "RBLNQwen3ForCausalLMConfig",
+        "RBLNQwen3Model",
+        "RBLNQwen3ModelConfig",
         "RBLNResNetForImageClassification",
         "RBLNResNetForImageClassificationConfig",
         "RBLNRobertaForMaskedLM",
@@ -204,6 +208,10 @@ if TYPE_CHECKING:
         RBLNQwen2_5_VLForConditionalGenerationConfig,
         RBLNQwen2ForCausalLM,
         RBLNQwen2ForCausalLMConfig,
+        RBLNQwen3ForCausalLM,
+        RBLNQwen3ForCausalLMConfig,
+        RBLNQwen3Model,
+        RBLNQwen3ModelConfig,
         RBLNResNetForImageClassification,
         RBLNResNetForImageClassificationConfig,
         RBLNRobertaForMaskedLM,

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/transformers/models/__init__.py RENAMED Viewed

@@ -113,6 +113,7 @@ _import_structure = {
     "mistral": ["RBLNMistralForCausalLM", "RBLNMistralForCausalLMConfig"],
     "phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig"],
     "qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig"],
+    "qwen3": ["RBLNQwen3ForCausalLM", "RBLNQwen3ForCausalLMConfig", "RBLNQwen3Model", "RBLNQwen3ModelConfig"],
     "resnet": ["RBLNResNetForImageClassification", "RBLNResNetForImageClassificationConfig"],
     "roberta": [
         "RBLNRobertaForMaskedLM",
@@ -241,6 +242,7 @@ if TYPE_CHECKING:
         RBLNQwen2_5_VLForConditionalGeneration,
         RBLNQwen2_5_VLForConditionalGenerationConfig,
     )
+    from .qwen3 import RBLNQwen3ForCausalLM, RBLNQwen3ForCausalLMConfig, RBLNQwen3Model, RBLNQwen3ModelConfig
     from .resnet import RBLNResNetForImageClassification, RBLNResNetForImageClassificationConfig
     from .roberta import (
         RBLNRobertaForMaskedLM,

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py RENAMED Viewed

@@ -1085,6 +1085,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
                 tensor_type="pt",
                 device=rbln_config.device_map["prefill"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
             *[
                 rebel.Runtime(
@@ -1092,6 +1093,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
                     tensor_type="pt",
                     device=rbln_config.device_map[f"decoder_batch_{batch_size}"],
                     activate_profiler=rbln_config.activate_profiler,
+                    timeout=rbln_config.timeout,
                 )
                 for i, batch_size in enumerate(rbln_config.decoder_batch_sizes)
             ],
@@ -1190,6 +1192,11 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
         if cache_position is None:
             logits = []
             inputs = inputs_embeds if inputs_embeds is not None else input_ids
+            # for only use forward
+            if generate_idx is None:
+                generate_idx = attention_mask.sum(dim=-1, keepdim=True).int()
+            if padded_cache_lengths is None:
+                padded_cache_lengths = torch.zeros_like(generate_idx)
             batch_size = inputs.shape[0]
             for b_idx in range(batch_size):
                 cache_position = torch.arange(0, generate_idx[b_idx].item(), dtype=torch.int32).unsqueeze(0)

{optimum_rbln-0.8.2a1 → optimum_rbln-0.8.2a2}/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py RENAMED Viewed

@@ -884,12 +884,14 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
                 tensor_type="pt",
                 device=rbln_config.device_map["prefill"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
             rebel.Runtime(
                 compiled_models[1],
                 tensor_type="pt",
                 device=rbln_config.device_map["image_prefill"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
             *[
                 rebel.Runtime(
@@ -897,6 +899,7 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
                     tensor_type="pt",
                     device=rbln_config.device_map[f"decoder_batch_{batch_size}"],
                     activate_profiler=rbln_config.activate_profiler,
+                    timeout=rbln_config.timeout,
                 )
                 for i, batch_size in enumerate(rbln_config.decoder_batch_sizes)
             ],

optimum_rbln-0.8.2a2/src/optimum/rbln/transformers/models/qwen3/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .configuration_qwen3 import RBLNQwen3ForCausalLMConfig, RBLNQwen3ModelConfig
+from .modeling_qwen3 import RBLNQwen3ForCausalLM, RBLNQwen3Model

optimum_rbln-0.8.2a2/src/optimum/rbln/transformers/models/qwen3/configuration_qwen3.py ADDED Viewed

@@ -0,0 +1,71 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausalLMConfig
+class RBLNQwen3ForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
+    """
+    Configuration class for RBLN Qwen3 models.
+    This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
+    Example usage:
+    ```python
+    from optimum.rbln import RBLNQwen3ForCausalLM, RBLNQwen3ForCausalLMConfig
+    # Create a configuration object
+    config = RBLNQwen3ForCausalLMConfig(
+        batch_size=1,
+        max_seq_len=40960,
+        tensor_parallel_size=4,
+        kvcache_partition_len=16384
+    )
+    # Use the configuration with from_pretrained
+    model = RBLNQwen3ForCausalLM.from_pretrained(
+        "Qwen/Qwen3-4B",
+        export=True,
+        rbln_config=config
+    )
+    ```
+    """
+class RBLNQwen3ModelConfig(RBLNDecoderOnlyModelForCausalLMConfig):
+    """
+    Configuration class for RBLN Qwen3 models.
+    This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
+    Example usage:
+    ```python
+    from optimum.rbln import RBLNQwen3Model, RBLNQwen3ModelConfig
+    # Create a configuration object
+    config = RBLNQwen3ModelConfig(
+        batch_size=1,
+        max_seq_len=40960,
+        tensor_parallel_size=4,
+        kvcache_partition_len=16384
+    )
+    # Use the configuration with from_pretrained
+    model = RBLNQwen3Model.from_pretrained(
+        "Qwen/Qwen3-Embedding-4B",
+        export=True,
+        rbln_config=config
+    )
+    ```
+    """

optimum-rbln 0.8.2a1__tar.gz → 0.8.2a2__tar.gz

Potentially problematic release.

optimum-rbln 0.8.2a1tar.gz → 0.8.2a2tar.gz