PyPI - optimum-rbln - Versions diffs - 0.8.1a2__tar.gz → 0.8.1a4__tar.gz - Mend

optimum-rbln 0.8.1a2tar.gz → 0.8.1a4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (266) hide show

{optimum_rbln-0.8.1a2 → optimum_rbln-0.8.1a4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: optimum-rbln
-Version: 0.8.1a2
+Version: 0.8.1a4
 Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
 Project-URL: Homepage, https://rebellions.ai
 Project-URL: Documentation, https://docs.rbln.ai
@@ -23,7 +23,7 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: <3.13,>=3.9
 Requires-Dist: accelerate>=1.0.1
-Requires-Dist: diffusers<=0.31.0
+Requires-Dist: diffusers==0.34.0
 Requires-Dist: packaging>=24.1
 Requires-Dist: torch==2.6.0
 Requires-Dist: torchaudio<=2.6.0

{optimum_rbln-0.8.1a2 → optimum_rbln-0.8.1a4}/pyproject.toml RENAMED Viewed

@@ -33,7 +33,7 @@ dependencies = [
     "torchvision<=0.21.0",
     "accelerate>=1.0.1",
     "transformers==4.51.3",
-    "diffusers<=0.31.0",
+    "diffusers==0.34.0",
     "packaging>=24.1",
 ]

{optimum_rbln-0.8.1a2 → optimum_rbln-0.8.1a4}/src/optimum/rbln/__version__.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.8.1a2'
-__version_tuple__ = version_tuple = (0, 8, 1, 'a2')
+__version__ = version = '0.8.1a4'
+__version_tuple__ = version_tuple = (0, 8, 1, 'a4')

{optimum_rbln-0.8.1a2 → optimum_rbln-0.8.1a4}/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py RENAMED Viewed

@@ -185,7 +185,10 @@ class RBLNUNet2DConditionModel(RBLNModel):
         rbln_config: RBLNUNet2DConditionModelConfig,
         image_size: Optional[Tuple[int, int]] = None,
     ) -> Tuple[int, int]:
-        scale_factor = pipe.movq_scale_factor if hasattr(pipe, "movq_scale_factor") else pipe.vae_scale_factor
+        if hasattr(pipe, "movq"):
+            scale_factor = 2 ** (len(pipe.movq.config.block_out_channels) - 1)
+        else:
+            scale_factor = pipe.vae_scale_factor
         if image_size is None:
             if "Img2Img" in pipe.__class__.__name__:

{optimum_rbln-0.8.1a2 → optimum_rbln-0.8.1a4}/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py RENAMED Viewed

@@ -177,8 +177,8 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
                 )
             elif block_tables is None and local_block_tables is None:
                 return False
-        else:
-            return True
+        return True
     def forward(
         self,

{optimum_rbln-0.8.1a2 → optimum_rbln-0.8.1a4}/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py RENAMED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import importlib
 import inspect
 from collections import deque
 from dataclasses import dataclass
@@ -124,23 +123,6 @@ class RBLNGemma3ForConditionalGeneration(RBLNModel):
     def can_generate(self):
         return True
-    @classmethod
-    def get_pytorch_model(cls, *args, **kwargs):
-        model = super().get_pytorch_model(*args, **kwargs)
-        with no_init_weights():
-            model_cls_name = model.model.language_model.__class__.__name__
-            causal_model_cls_name = model_cls_name.replace("TextModel", "ForCausalLM")
-            causal_model_cls = getattr(importlib.import_module("transformers"), causal_model_cls_name)
-            new_language_model = causal_model_cls(model.model.language_model.config)
-        new_language_model.lm_head = model.lm_head
-        new_language_model.model = model.model.language_model
-        model.model.language_model = new_language_model
-        model.lm_head = None
-        del model.lm_head
-        return model
     def __post_init__(self, **kwargs):
         self.vision_tower = LoopVisionTower(self.rbln_submodules[0])
         self.language_model = self.rbln_submodules[1]
@@ -559,7 +541,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
         (
             inputs,
             cache_position,
-            chunked_attention_mask,
+            padded_attention_mask,
             out_buffers,
             position_ids,
             position_embed,
@@ -571,7 +553,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
         )
         if not is_external_block_tables:
             local_block_tables = torch.tensor([batch_idx], dtype=torch.int16)
-            self.dec_attn_mask[batch_idx : batch_idx + 1] = chunked_attention_mask[:1]
+            self.dec_attn_mask[batch_idx : batch_idx + 1] = padded_attention_mask[:1]
         if self.rbln_config.use_attention_mask and self.rbln_config.use_position_ids:
             chunked_attention_mask = torch.zeros(1, self.rbln_config.max_seq_len, dtype=torch.float32)
@@ -587,18 +569,10 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
                 else None
             )
-            # Not used in Gemma3 yet.
             if self.rbln_config.use_attention_mask:
                 if self.rbln_config.use_position_ids:
-                    chunked_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size] = self.dec_attn_mask[
-                        batch_idx, step : step + self.rbln_config.prefill_chunk_size
-                    ]
-                else:
-                    # Update attention mask to ensure proper causal behavior
-                    if step >= self.rbln_config.prefill_chunk_size:
-                        chunked_attention_mask[:, :, :, step - self.rbln_config.prefill_chunk_size : step] = 1
-                    chunked_attention_mask[:, :, :, step : step + self.rbln_config.prefill_chunk_size] = (
-                        self.causal_mask
+                    chunked_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size] = (
+                        padded_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size]
                     )
             # Define query position

{optimum_rbln-0.8.1a2 → optimum_rbln-0.8.1a4}/src/optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py RENAMED Viewed

@@ -28,7 +28,6 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
     Qwen2_5_VisionPatchEmbed,
     Qwen2_5_VisionRotaryEmbedding,
     Qwen2_5_VisionTransformerPretrainedModel,
-    Qwen2_5_VLModel,
     Qwen2_5_VLRotaryEmbedding,
 )
@@ -391,14 +390,6 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
     def can_generate(self):
         return True
-    @classmethod
-    def get_pytorch_model(cls, *args, **kwargs):
-        model = super().get_pytorch_model(*args, **kwargs)
-        model.model.lm_head = model.lm_head
-        model.lm_head = None
-        del model.lm_head
-        return model
     @classmethod
     def update_kwargs(cls, kwargs):
         kwargs.update(
@@ -540,8 +531,7 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
             vision_tokens = input_id[0][vision_start_indices + 1]
             image_nums = (vision_tokens == image_token_id).sum()
             video_nums = (vision_tokens == video_token_id).sum()
-            position_ids, rope_deltas = Qwen2_5_VLModel.get_rope_index(
-                self,
+            position_ids, rope_deltas = self.get_rope_index(
                 input_id,
                 image_grid_thw[image_idx : image_idx + image_nums] if image_grid_thw is not None else None,
                 video_grid_thw[video_idx : video_idx + video_nums] if video_grid_thw is not None else None,

{optimum_rbln-0.8.1a2 → optimum_rbln-0.8.1a4}/src/optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py RENAMED Viewed

@@ -3,14 +3,8 @@ from typing import Tuple
 import torch
 import torch.nn as nn
-from transformers import PreTrainedModel
 from ..decoderonly.decoderonly_architecture import (
-    DecoderOnlyAttention,
-    DecoderOnlyFlashAttention,
-    DecoderOnlyForCausalLM,
-    DecoderOnlyLayer,
-    DecoderOnlyModel,
     DecoderOnlyWrapper,
     apply_rotary_pos_emb,
 )
@@ -203,40 +197,3 @@ class Qwen2_5_VL_LanguageModelWrapper(DecoderOnlyWrapper):
             past_key_values,
             position_embeds,
         )
-    def convert_to_rbln_causal_lm(self, causal_lm: PreTrainedModel, max_seq_len: int):
-        new_layers = []
-        for layer in causal_lm.model.language_model.layers:
-            if self.attn_impl == "eager":
-                new_self_attn = DecoderOnlyAttention(
-                    layer.self_attn,
-                    self.use_attention_mask,
-                    self.use_position_ids,
-                    kvcache_block_size=self.kvcache_block_size,
-                )
-            elif self.attn_impl == "flash_attn":
-                new_self_attn = DecoderOnlyFlashAttention(
-                    layer.self_attn,
-                    kvcache_partition_len=self.kvcache_partition_len,
-                    kvcache_block_size=self.kvcache_block_size,
-                    use_attention_mask=self.use_attention_mask,
-                    use_position_ids=self.use_position_ids,
-                )
-            else:
-                raise NotImplementedError(f"Unknwon attn : {self.attn_impl}")
-            new_layer = DecoderOnlyLayer(layer, new_self_attn)
-            new_layers.append(new_layer)
-        new_model = DecoderOnlyModel(
-            causal_lm.model.language_model,
-            new_layers,
-            partition_len=self.kvcache_partition_len,
-            max_seq_len=max_seq_len,
-            kvcache_block_size=self.kvcache_block_size,
-            use_learned_pos_emb=self.use_learned_pos_emb,
-            sliding_window_layers=self.sliding_window_layers,
-        )
-        new_causal_lm = DecoderOnlyForCausalLM(causal_lm.model, new_model)
-        return new_causal_lm

{optimum_rbln-0.8.1a2 → optimum_rbln-0.8.1a4}/tests/test_llm.py RENAMED Viewed

@@ -67,7 +67,7 @@ class LLMTest:
 class TestQwen2Model(LLMTest.TestLLM):
     RBLN_CLASS = RBLNQwen2ForCausalLM
     HF_MODEL_ID = "Qwen/Qwen2-0.5B-Instruct"
-    EXPECTED_OUTPUT = " I am a 30-year-old woman who has been living with lupus for over 1"
+    EXPECTED_OUTPUT = " I am a 20 year old girl from the United States. I have been studying English for"
     HF_CONFIG_KWARGS = {"max_position_embeddings": 1024}
@@ -108,7 +108,7 @@ class TestLlamaForCausalLM_Flash(LLMTest.TestLLM):
 class TestLlamaForCausalLM_Multibatch(TestLlamaForCausalLM):
     PROMPT = ["Who are you?", "What is the capital of France?", "What is the capital of Germany?"]
     EXPECTED_OUTPUT = [
-        "reress makefable R���� noethetsshss rechoolso�",
+        "reress makefable R���� noethetss0oss invetetet",
         "resget makeget makeichget makeichualichual#choolchool accngngngng",
         "resget makeget makeichget makeichualichual#choolchool accngngngng",
     ]

optimum-rbln 0.8.1a2__tar.gz → 0.8.1a4__tar.gz

optimum-rbln 0.8.1a2tar.gz → 0.8.1a4tar.gz