PyPI - optimum-rbln - Versions diffs - 0.8.4a0__tar.gz → 0.8.4a2__tar.gz - Mend

optimum-rbln 0.8.4a0tar.gz → 0.8.4a2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of optimum-rbln might be problematic. Click here for more details.

Files changed (320) hide show

optimum_rbln-0.8.4a2/.github/version.yaml ADDED Viewed

	@@ -0,0 +1 @@
1	+ rebel_compiler_version: 0.8.4.dev278+ge147357b

{optimum_rbln-0.8.4a0 → optimum_rbln-0.8.4a2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: optimum-rbln
-Version: 0.8.4a0
+Version: 0.8.4a2
 Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
 Project-URL: Homepage, https://rebellions.ai
 Project-URL: Documentation, https://docs.rbln.ai

{optimum_rbln-0.8.4a0 → optimum_rbln-0.8.4a2}/src/optimum/rbln/__init__.py RENAMED Viewed

@@ -148,6 +148,10 @@ _import_structure = {
         "RBLNQwen3ForCausalLMConfig",
         "RBLNQwen3Model",
         "RBLNQwen3ModelConfig",
+        "RBLNQwen2VisionTransformerPretrainedModel",
+        "RBLNQwen2VisionTransformerPretrainedModelConfig",
+        "RBLNQwen2VLForConditionalGeneration",
+        "RBLNQwen2VLForConditionalGenerationConfig",
         "RBLNResNetForImageClassification",
         "RBLNResNetForImageClassificationConfig",
         "RBLNRobertaForMaskedLM",
@@ -430,6 +434,10 @@ if TYPE_CHECKING:
         RBLNQwen2ForCausalLMConfig,
         RBLNQwen2Model,
         RBLNQwen2ModelConfig,
+        RBLNQwen2VisionTransformerPretrainedModel,
+        RBLNQwen2VisionTransformerPretrainedModelConfig,
+        RBLNQwen2VLForConditionalGeneration,
+        RBLNQwen2VLForConditionalGenerationConfig,
         RBLNQwen3ForCausalLM,
         RBLNQwen3ForCausalLMConfig,
         RBLNQwen3Model,

{optimum_rbln-0.8.4a0 → optimum_rbln-0.8.4a2}/src/optimum/rbln/__version__.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.8.4a0'
-__version_tuple__ = version_tuple = (0, 8, 4, 'a0')
+__version__ = version = '0.8.4a2'
+__version_tuple__ = version_tuple = (0, 8, 4, 'a2')
 __commit_id__ = commit_id = None

{optimum_rbln-0.8.4a0 → optimum_rbln-0.8.4a2}/src/optimum/rbln/configuration_utils.py RENAMED Viewed

@@ -248,9 +248,6 @@ class RBLNAutoConfig:
             if key[5:] not in RUNTIME_KEYWORDS and key[5:] not in cls.submodules
         }
-        if len(rbln_kwargs) > 0:
-            raise ValueError(f"Cannot set the following arguments: {list(rbln_kwargs.keys())}")
         # Process submodule's rbln_config
         for submodule in cls.submodules:
             if submodule not in config_file:
@@ -265,6 +262,16 @@ class RBLNAutoConfig:
         config_file.update(rbln_runtime_kwargs)
+        rbln_config = cls(**config_file)
+        if len(rbln_kwargs) > 0:
+            for key, value in rbln_kwargs.items():
+                if getattr(rbln_config, key) != value:
+                    raise ValueError(
+                        f"Cannot set the following arguments: {list(rbln_kwargs.keys())} "
+                        f"Since the value is already set to {getattr(rbln_config, key)}"
+                    )
         if return_unused_kwargs:
             return cls(**config_file), kwargs
         else:

{optimum_rbln-0.8.4a0 → optimum_rbln-0.8.4a2}/src/optimum/rbln/diffusers/modeling_diffusers.py RENAMED Viewed

@@ -130,7 +130,7 @@ class RBLNDiffusionMixin:
         cls,
         model_id: str,
         *,
-        export: bool = False,
+        export: bool = None,
         model_save_dir: Optional[PathLike] = None,
         rbln_config: Dict[str, Any] = {},
         lora_ids: Optional[Union[str, List[str]]] = None,
@@ -181,6 +181,20 @@ class RBLNDiffusionMixin:
         """
         rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
+        if export is None:
+            export = any(
+                not RBLNModel._is_compiled(
+                    model_id,
+                    token=kwargs.get("token"),
+                    revision=kwargs.get("revision"),
+                    force_download=kwargs.get("force_download", False),
+                    cache_dir=kwargs.get("cache_dir"),
+                    subfolder=submodule_name,
+                    local_files_only=kwargs.get("local_files_only", False),
+                )
+                for submodule_name in cls._submodules
+            )
         if export:
             # keep submodules if user passed any of them.
             passed_submodules = {

{optimum_rbln-0.8.4a0 → optimum_rbln-0.8.4a2}/src/optimum/rbln/diffusers/pipelines/auto_pipeline.py RENAMED Viewed

@@ -14,7 +14,8 @@
 import importlib
-from typing import Type
+from pathlib import Path
+from typing import Type, Union
 from diffusers.models.controlnets import ControlNetUnionModel
 from diffusers.pipelines.auto_pipeline import (
@@ -42,7 +43,13 @@ class RBLNAutoPipelineBase:
     _model_mapping_names = None
     @classmethod
-    def get_rbln_cls(cls, pretrained_model_name_or_path, export=True, **kwargs):
+    def get_rbln_cls(cls, pretrained_model_name_or_path: Union[str, Path], export: bool = None, **kwargs):
+        if isinstance(pretrained_model_name_or_path, Path):
+            pretrained_model_name_or_path = pretrained_model_name_or_path.as_posix()
+        if export is None:
+            export = not cls._is_compiled_pipeline(pretrained_model_name_or_path, **kwargs)
         if export:
             hf_model_class = cls.infer_hf_model_class(pretrained_model_name_or_path, **kwargs)
             rbln_class_name = convert_hf_to_rbln_model_name(hf_model_class.__name__)
@@ -66,7 +73,7 @@ class RBLNAutoPipelineBase:
         return rbln_cls
     @classmethod
-    def get_rbln_model_cls_name(cls, pretrained_model_name_or_path, **kwargs):
+    def get_rbln_model_cls_name(cls, pretrained_model_name_or_path: Union[str, Path], **kwargs):
         """
         Retrieve the path to the compiled model directory for a given RBLN model.
@@ -86,10 +93,36 @@ class RBLNAutoPipelineBase:
         return model_index_config["_class_name"]
+    @classmethod
+    def _is_compiled_pipeline(
+        cls,
+        pretrained_model_name_or_path: Union[str, Path],
+        cache_dir=None,
+        force_download=False,
+        proxies=None,
+        token=None,
+        local_files_only=False,
+        revision=None,
+        **kwargs,
+    ):
+        config: dict = cls.load_config(
+            pretrained_model_name_or_path,
+            cache_dir=cache_dir,
+            force_download=force_download,
+            proxies=proxies,
+            token=token,
+            local_files_only=local_files_only,
+            revision=revision,
+        )
+        for value in config.values():
+            if isinstance(value, list) and len(value) > 0 and value[0] == "optimum.rbln":
+                return True
+        return False
     @classmethod
     def infer_hf_model_class(
         cls,
-        pretrained_model_or_path,
+        pretrained_model_or_path: Union[str, Path],
         cache_dir=None,
         force_download=False,
         proxies=None,

{optimum_rbln-0.8.4a0 → optimum_rbln-0.8.4a2}/src/optimum/rbln/modeling_base.py RENAMED Viewed

@@ -343,11 +343,37 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         rbln_config, kwargs = config_cls.initialize_from_kwargs(rbln_config, **kwargs)
         return rbln_config, kwargs
+    @classmethod
+    def _is_compiled(
+        cls,
+        model_id: Union[str, Path],
+        token: Optional[Union[bool, str]] = None,
+        revision: Optional[str] = None,
+        force_download: bool = False,
+        cache_dir: Optional[str] = None,
+        subfolder: str = "",
+        local_files_only: bool = False,
+    ) -> bool:
+        # Check if the model is already compiled.
+        try:
+            cls._load_compiled_model_dir(
+                model_id=model_id,
+                token=token,
+                revision=revision,
+                force_download=force_download,
+                cache_dir=cache_dir,
+                subfolder=subfolder,
+                local_files_only=local_files_only,
+            )
+            return True
+        except (FileNotFoundError, KeyError):
+            return False
     @classmethod
     def from_pretrained(
         cls: Type["RBLNBaseModel"],
         model_id: Union[str, Path],
-        export: bool = False,
+        export: bool = None,
         rbln_config: Optional[Union[Dict, RBLNModelConfig]] = None,
         **kwargs: Any,
     ) -> "RBLNBaseModel":
@@ -357,7 +383,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         Args:
             model_id: The model id of the pre-trained model to be loaded. It can be downloaded from the HuggingFace model hub or a local path, or a model id of a compiled model using the RBLN Compiler.
-            export: A boolean flag to indicate whether the model should be compiled.
+            export: A boolean flag to indicate whether the model should be compiled. If None, it will be determined based on the existence of the compiled model files in the model_id.
             rbln_config: Configuration for RBLN model compilation and runtime. This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
                 For detailed configuration options, see the specific model's configuration class documentation.
@@ -369,6 +395,18 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         if isinstance(model_id, Path):
             model_id = model_id.as_posix()
+        if export is None:
+            export = not cls._is_compiled(
+                model_id=model_id,
+                token=kwargs.get("token"),
+                revision=kwargs.get("revision"),
+                force_download=kwargs.get("force_download", False),
+                cache_dir=kwargs.get("cache_dir"),
+                subfolder=kwargs.get("subfolder", ""),
+                local_files_only=kwargs.get("local_files_only", False),
+            )
         from_pretrained_method = cls._export if export else cls._from_pretrained
         return from_pretrained_method(model_id=model_id, **kwargs, rbln_config=rbln_config)

{optimum_rbln-0.8.4a0 → optimum_rbln-0.8.4a2}/src/optimum/rbln/transformers/__init__.py RENAMED Viewed

@@ -130,6 +130,10 @@ _import_structure = {
         "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
         "RBLNQwen2_5_VLForConditionalGeneration",
         "RBLNQwen2_5_VLForConditionalGenerationConfig",
+        "RBLNQwen2VisionTransformerPretrainedModel",
+        "RBLNQwen2VisionTransformerPretrainedModelConfig",
+        "RBLNQwen2VLForConditionalGeneration",
+        "RBLNQwen2VLForConditionalGenerationConfig",
         "RBLNQwen2Model",
         "RBLNQwen2ModelConfig",
         "RBLNQwen2ForCausalLM",
@@ -282,6 +286,10 @@ if TYPE_CHECKING:
         RBLNQwen2ForCausalLMConfig,
         RBLNQwen2Model,
         RBLNQwen2ModelConfig,
+        RBLNQwen2VisionTransformerPretrainedModel,
+        RBLNQwen2VisionTransformerPretrainedModelConfig,
+        RBLNQwen2VLForConditionalGeneration,
+        RBLNQwen2VLForConditionalGenerationConfig,
         RBLNQwen3ForCausalLM,
         RBLNQwen3ForCausalLMConfig,
         RBLNQwen3Model,

{optimum_rbln-0.8.4a0 → optimum_rbln-0.8.4a2}/src/optimum/rbln/transformers/models/__init__.py RENAMED Viewed

@@ -85,6 +85,12 @@ _import_structure = {
         "RBLNQwen2_5_VLForConditionalGeneration",
         "RBLNQwen2_5_VLForConditionalGenerationConfig",
     ],
+    "qwen2_vl": [
+        "RBLNQwen2VisionTransformerPretrainedModel",
+        "RBLNQwen2VisionTransformerPretrainedModelConfig",
+        "RBLNQwen2VLForConditionalGeneration",
+        "RBLNQwen2VLForConditionalGenerationConfig",
+    ],
     "decoderonly": [
         "RBLNDecoderOnlyModelConfig",
         "RBLNDecoderOnlyModel",
@@ -281,6 +287,12 @@ if TYPE_CHECKING:
         RBLNQwen2_5_VLForConditionalGeneration,
         RBLNQwen2_5_VLForConditionalGenerationConfig,
     )
+    from .qwen2_vl import (
+        RBLNQwen2VisionTransformerPretrainedModel,
+        RBLNQwen2VisionTransformerPretrainedModelConfig,
+        RBLNQwen2VLForConditionalGeneration,
+        RBLNQwen2VLForConditionalGenerationConfig,
+    )
     from .qwen3 import RBLNQwen3ForCausalLM, RBLNQwen3ForCausalLMConfig, RBLNQwen3Model, RBLNQwen3ModelConfig
     from .resnet import RBLNResNetForImageClassification, RBLNResNetForImageClassificationConfig
     from .roberta import (

{optimum_rbln-0.8.4a0 → optimum_rbln-0.8.4a2}/src/optimum/rbln/transformers/models/auto/auto_factory.py RENAMED Viewed

@@ -14,9 +14,10 @@
 import importlib
 import inspect
 import warnings
-from typing import Type
+from pathlib import Path
+from typing import Any, Type, Union
-from transformers import AutoConfig, PretrainedConfig
+from transformers import AutoConfig, PretrainedConfig, PreTrainedModel
 from transformers.dynamic_module_utils import get_class_from_dynamic_module
 from transformers.models.auto.auto_factory import _get_model_class
@@ -43,10 +44,10 @@ class _BaseAutoModelClass:
     @classmethod
     def get_rbln_cls(
         cls,
-        pretrained_model_name_or_path,
-        *args,
-        export=True,
-        **kwargs,
+        pretrained_model_name_or_path: Union[str, Path],
+        *args: Any,
+        export: bool = None,
+        **kwargs: Any,
     ):
         """
         Determine the appropriate RBLN model class based on the given model ID and configuration.
@@ -59,6 +60,20 @@ class _BaseAutoModelClass:
         Returns:
             RBLNBaseModel: The corresponding RBLN model class.
         """
+        if isinstance(pretrained_model_name_or_path, Path):
+            pretrained_model_name_or_path = pretrained_model_name_or_path.as_posix()
+        if export is None:
+            export = not RBLNBaseModel._is_compiled(
+                model_id=pretrained_model_name_or_path,
+                token=kwargs.get("token"),
+                revision=kwargs.get("revision"),
+                force_download=kwargs.get("force_download", False),
+                cache_dir=kwargs.get("cache_dir"),
+                subfolder=kwargs.get("subfolder", ""),
+                local_files_only=kwargs.get("local_files_only", False),
+            )
         if export:
             hf_model_class = cls.infer_hf_model_class(pretrained_model_name_or_path, **kwargs)
             rbln_class_name = convert_hf_to_rbln_model_name(hf_model_class.__name__)
@@ -85,9 +100,9 @@ class _BaseAutoModelClass:
     @classmethod
     def infer_hf_model_class(
         cls,
-        pretrained_model_name_or_path,
-        *args,
-        **kwargs,
+        pretrained_model_name_or_path: Union[str, Path],
+        *args: Any,
+        **kwargs: Any,
     ):
         """
         Infer the HuggingFace model class based on the configuration or model name.
@@ -140,7 +155,7 @@ class _BaseAutoModelClass:
         return model_class
     @classmethod
-    def get_rbln_model_cls_name(cls, pretrained_model_name_or_path, **kwargs):
+    def get_rbln_model_cls_name(cls, pretrained_model_name_or_path: Union[str, Path], **kwargs):
         """
         Retrieve the path to the compiled model directory for a given RBLN model.
@@ -163,17 +178,17 @@ class _BaseAutoModelClass:
         return rbln_config.rbln_model_cls_name
     @classmethod
-    def from_pretrained(cls, model_id, *args, **kwargs):
+    def from_pretrained(cls, model_id: Union[str, Path], *args, **kwargs):
         rbln_cls = cls.get_rbln_cls(model_id, *args, **kwargs)
         return rbln_cls.from_pretrained(model_id, *args, **kwargs)
     @classmethod
-    def from_model(cls, model, *args, **kwargs):
+    def from_model(cls, model: PreTrainedModel, *args, **kwargs):
         rbln_cls = get_rbln_model_cls(f"RBLN{model.__class__.__name__}")
         return rbln_cls.from_model(model, *args, **kwargs)
     @staticmethod
-    def register(rbln_cls: Type[RBLNBaseModel], exist_ok=False):
+    def register(rbln_cls: Type[RBLNBaseModel], exist_ok: bool = False):
         """
         Register a new RBLN model class.

{optimum_rbln-0.8.4a0 → optimum_rbln-0.8.4a2}/src/optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py RENAMED Viewed

@@ -579,7 +579,7 @@ class DecoderOnlyAttention(nn.Module):
         )
         self.head_dim = self._original_mod.head_dim
         self._phase = "prefill"
-        self.scale = torch.tensor(self.get_attn_scale())
+        self.scale = torch.nn.Parameter(torch.tensor(self.get_attn_scale()))
         self.quantization = rbln_config.quantization
         if hasattr(self._original_mod, "num_key_value_heads"):

{optimum_rbln-0.8.4a0 → optimum_rbln-0.8.4a2}/src/optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py RENAMED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import math
 from functools import wraps
 from typing import TYPE_CHECKING, List, Optional, Tuple
@@ -20,7 +21,6 @@ from torch import Tensor
 from transformers.models.grounding_dino.modeling_grounding_dino import (
     GroundingDinoDecoder,
     GroundingDinoEncoder,
-    get_sine_pos_embed,
 )
@@ -33,31 +33,46 @@ def monkey_patch():
         GroundingDinoBiMultiHeadAttention,
         GroundingDinoEncoderLayer,
         GroundingDinoMultiscaleDeformableAttention,
+        MultiScaleDeformableAttention,
     )
     original_forward = GroundingDinoMultiscaleDeformableAttention.forward
     original_bi_multihead_attention_forward = GroundingDinoBiMultiHeadAttention.forward
     original_encoder_layer_forward = GroundingDinoEncoderLayer.forward
+    original_multiscale_deform_attn = MultiScaleDeformableAttention.forward
     # Patch the methods with the custom implementations
     GroundingDinoMultiscaleDeformableAttention.forward = _GroundingDinoMultiscaleDeformableAttention.forward
     GroundingDinoBiMultiHeadAttention.forward = _GroundingDinoBiMultiHeadAttention.forward
     GroundingDinoEncoderLayer.forward = _GroundingDinoEncoderLayer.forward
+    MultiScaleDeformableAttention.forward = _MultiScaleDeformableAttention.forward
-    return (original_forward, original_bi_multihead_attention_forward, original_encoder_layer_forward)
+    return (
+        original_forward,
+        original_bi_multihead_attention_forward,
+        original_encoder_layer_forward,
+        original_multiscale_deform_attn,
+    )
-def restore_monkey_patch(original_forward, original_bi_multihead_attention_forward, original_encoder_layer_forward):
+def restore_monkey_patch(
+    original_forward,
+    original_bi_multihead_attention_forward,
+    original_encoder_layer_forward,
+    original_multiscale_deform_attn,
+):
     from transformers.models.grounding_dino.modeling_grounding_dino import (
         GroundingDinoBiMultiHeadAttention,
         GroundingDinoEncoderLayer,
         GroundingDinoMultiscaleDeformableAttention,
+        MultiScaleDeformableAttention,
     )
     # Restore the original methods
     GroundingDinoMultiscaleDeformableAttention.forward = original_forward
     GroundingDinoBiMultiHeadAttention.forward = original_bi_multihead_attention_forward
     GroundingDinoEncoderLayer.forward = original_encoder_layer_forward
+    MultiScaleDeformableAttention.forward = original_multiscale_deform_attn
 def monkey_patch_decorator(func):
@@ -76,6 +91,30 @@ def monkey_patch_decorator(func):
     return wrapper
+def get_sine_pos_embed(
+    pos_tensor: torch.Tensor, num_pos_feats: int = 128, temperature: int = 10000, exchange_xy: bool = True
+) -> Tensor:
+    scale = 2 * math.pi
+    dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=pos_tensor.device)
+    dim_t = temperature ** (2 * torch.div(dim_t, 2, rounding_mode="floor") / num_pos_feats)
+    scaled_pos = pos_tensor.unsqueeze(-1) * scale / dim_t
+    reshaped_pos = scaled_pos.view(*scaled_pos.shape[:-1], -1, 2)
+    sin_chunk, cos_chunk = torch.split(reshaped_pos, 1, dim=-1)
+    sin_embed = sin_chunk.squeeze(-1).sin()
+    cos_embed = cos_chunk.squeeze(-1).cos()
+    pos_embed = torch.stack((sin_embed, cos_embed), dim=-1).flatten(-2)
+    if exchange_xy and pos_tensor.shape[-1] >= 2:
+        swapped_embeds = torch.cat([pos_embed[..., 1:2, :], pos_embed[..., 0:1, :], pos_embed[..., 2:, :]], dim=-2)
+        pos_embed = swapped_embeds
+    position_embeddings = pos_embed.flatten(start_dim=-2)
+    return position_embeddings
 class _GroundingDinoEncoder(torch.nn.Module):
     def __init__(self, model: "GroundingDinoEncoder", rbln_config: "RBLNGroundingDinoEncoderConfig"):
         super().__init__()
@@ -357,10 +396,16 @@ class _GroundingDinoMultiscaleDeformableAttention(torch.nn.Module):
         batch_size, num_queries, _ = hidden_states.shape
         batch_size, sequence_length, _ = encoder_hidden_states.shape
         # Ignore copy
-        if (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() != sequence_length:
-            raise ValueError(
-                "Make sure to align the spatial shapes with the sequence length of the encoder hidden states"
+        if torch.compiler.is_exporting():
+            torch._check(
+                (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum().item() == sequence_length,
+                "Make sure to align the spatial shapes with the sequence length of the encoder hidden states",
             )
+        else:
+            if (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() != sequence_length:
+                raise ValueError(
+                    "Make sure to align the spatial shapes with the sequence length of the encoder hidden states"
+                )
         value = self.value_proj(encoder_hidden_states)
         if attention_mask is not None:
@@ -380,16 +425,20 @@ class _GroundingDinoMultiscaleDeformableAttention(torch.nn.Module):
         # batch_size, num_queries, n_heads, n_levels, n_points, 2
         num_coordinates = reference_points.shape[-1]
         if num_coordinates == 2:
-            offset_normalizer = torch.stack([spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
-            sampling_locations = (
-                reference_points[:, :, None, :, None, :]
+            offset_normalizer = 0.5 * torch.stack([spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
+            sampling_grids = (
+                2 * reference_points[:, :, None, :, None, :]
+                - 1
                 + sampling_offsets / offset_normalizer[None, None, None, :, None, :]
             )
         elif num_coordinates == 4:
-            sampling_locations = (
-                reference_points[:, :, None, :, None, :2]
-                + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5
-            )
+            ref_points_xy, ref_points_wh = torch.split(reference_points, 2, dim=-1)
+            ref_points_xy = ref_points_xy[:, :, None, :, None, :]
+            ref_points_wh = ref_points_wh[:, :, None, :, None, :]
+            ref_points_grids = 2 * ref_points_xy - 1
+            offset_grids = sampling_offsets / self.n_points * ref_points_wh
+            sampling_grids = ref_points_grids + offset_grids
         else:
             raise ValueError(f"Last dim of reference_points must be 2 or 4, but got {reference_points.shape[-1]}")
@@ -398,7 +447,7 @@ class _GroundingDinoMultiscaleDeformableAttention(torch.nn.Module):
             spatial_shapes,
             spatial_shapes_list,
             level_start_index,
-            sampling_locations,
+            sampling_grids,
             attention_weights,
             self.im2col_step,
         )
@@ -450,15 +499,14 @@ class _GroundingDinoBiMultiHeadAttention(torch.nn.Module):
         # # Do not increase -50000/50000, data type half has quite limited range
         attn_weights = torch.clamp(attn_weights, min=-50000, max=50000)
-        attn_weights_transposed = attn_weights.transpose(1, 2)
         # RBLN FIX: max_values from scalar to vector
-        text_attn_weights = attn_weights_transposed - torch.max(attn_weights_transposed, dim=-1, keepdim=True)[
-            0
-        ].repeat(1, 1, tgt_len)
+        text_attn_weights = attn_weights - torch.max(attn_weights, dim=1, keepdim=True)[0].repeat(1, tgt_len, 1)
         # # Do not increase -50000/50000, data type half has quite limited range
         text_attn_weights = torch.clamp(text_attn_weights, min=-50000, max=50000)
+        text_attn_weights = text_attn_weights.transpose(1, 2)
         # mask vision for language
         if vision_attention_mask is not None:
             # RBLN FIX: bool tensor to float tensor
@@ -505,3 +553,47 @@ class _GroundingDinoBiMultiHeadAttention(torch.nn.Module):
         text_attn_output = self.out_text_proj(text_attn_output)
         return (vision_attn_output, vision_attn_weights), (text_attn_output, text_attn_weights)
+class _MultiScaleDeformableAttention(torch.nn.Module):
+    def forward(
+        self,
+        value: Tensor,
+        value_spatial_shapes: Tensor,
+        value_spatial_shapes_list: List[Tuple],
+        level_start_index: Tensor,
+        sampling_grids: Tensor,
+        attention_weights: Tensor,
+        im2col_step: int,
+    ):
+        batch_size, _, num_heads, hidden_dim = value.shape
+        _, num_queries, num_heads, num_levels, num_points, _ = sampling_grids.shape
+        value_list = value.split([height * width for height, width in value_spatial_shapes_list], dim=1)
+        sampling_value_list = []
+        sampling_grids_list = [t.squeeze(3) for t in torch.split(sampling_grids, 1, dim=3)]
+        for level_id, (height, width) in enumerate(value_spatial_shapes_list):
+            value_l_ = (
+                value_list[level_id].permute(0, 2, 3, 1).reshape(batch_size * num_heads, hidden_dim, height, width)
+            )
+            sampling_grid_l_ = sampling_grids_list[level_id].transpose(1, 2).flatten(0, 1)
+            sampling_value_l_ = torch.nn.functional.grid_sample(
+                value_l_,
+                sampling_grid_l_,
+                mode="bilinear",
+                padding_mode="zeros",
+                align_corners=False,
+            )
+            sampling_value_list.append(sampling_value_l_)
+        sampling_values = torch.cat(sampling_value_list, dim=-1)
+        attention_weights_prep = attention_weights.transpose(1, 2)
+        values_permuted = sampling_values.permute(0, 2, 3, 1)
+        weights_for_matmul = attention_weights_prep.reshape(
+            batch_size * num_heads, num_queries, 1, num_levels * num_points
+        )
+        output_before_permute = torch.matmul(weights_for_matmul, values_permuted)
+        output_before_view = output_before_permute.squeeze(2).permute(0, 2, 1)
+        output = output_before_view.reshape(batch_size, num_heads * hidden_dim, num_queries)
+        return output.transpose(1, 2).contiguous()

optimum_rbln-0.8.4a2/src/optimum/rbln/transformers/models/qwen2_vl/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .configuration_qwen2_vl import (
+    RBLNQwen2VisionTransformerPretrainedModelConfig,
+    RBLNQwen2VLForConditionalGenerationConfig,
+)
+from .modeling_qwen2_vl import RBLNQwen2VisionTransformerPretrainedModel, RBLNQwen2VLForConditionalGeneration

optimum-rbln 0.8.4a0__tar.gz → 0.8.4a2__tar.gz

Potentially problematic release.

optimum-rbln 0.8.4a0tar.gz → 0.8.4a2tar.gz