PyPI - optimum-rbln - Versions diffs - 0.9.2a3__tar.gz → 0.9.2a5__tar.gz - Mend

optimum-rbln 0.9.2a3tar.gz → 0.9.2a5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of optimum-rbln might be problematic. Click here for more details.

Files changed (326) hide show

optimum_rbln-0.9.2a5/.github/version.yaml ADDED Viewed

	@@ -0,0 +1 @@
1	+ rebel_compiler_version: 0.9.2.dev155+g1ab49983.prod

optimum_rbln-0.9.2a5/.github/workflows/rbln_scheduled_test.yaml ADDED Viewed

@@ -0,0 +1,186 @@
+name: Optimum-rbln / Scheduled Test
+on:
+  schedule:
+    # Run every day at 2am (17:00 UTC, 2:00am KST)
+    - cron: '0 17 * * *'
+env:
+  HF_USER_ID: ${{ secrets.HF_USER_ID }}
+  HF_AUTH_TOKEN: ${{ secrets.HF_AUTH_TOKEN }}
+jobs:
+  check-code-quality:
+    uses: ./.github/workflows/check_code_quality.yml
+  test-docstrings:
+    uses: ./.github/workflows/test-docstrings.yml
+    with:
+      test_all_files: true
+  load-version:
+    runs-on: rebel-k8s-runner
+    outputs:
+      compiler_version: ${{ steps.get_version.outputs.compiler_version }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+      - name: Get compiler version
+        id: get_version
+        run: |
+          VERSION=$(grep rebel_compiler_version .github/version.yaml | cut -d ':' -f2 | tr -d ' ')
+          echo "compiler_version=$VERSION" >> $GITHUB_OUTPUT
+  check-compiler:
+    needs: load-version
+    uses: ./.github/workflows/rbln_check_compiler.yaml
+    with:
+      compiler_version: ${{ needs.load-version.outputs.compiler_version }}
+    secrets: inherit
+  optimum-rbln-pytest:
+    needs: [load-version, check-compiler]
+    if: ${{ needs.check-compiler.outputs.compiler_version_check == 'true' }}
+    uses: ./.github/workflows/rbln_optimum_pytest.yaml
+    with:
+      ref: main
+      rebel_compiler_version: ${{ needs.check-compiler.outputs.compiler_version }}
+      test_level: "full"
+      enable_hf_hub_tests: true
+      fail_fast: false
+    secrets: inherit
+  optimum-rbln-inference-test:
+    needs: check-compiler
+    if: ${{ needs.check-compiler.outputs.compiler_version_check == 'true' }}
+    uses: ./.github/workflows/rbln_optimum_inference_test.yaml
+    with:
+      ref: main
+      rebel_compiler_version: ${{ needs.check-compiler.outputs.compiler_version }}
+    secrets: inherit
+  summary_and_report:
+    needs: [load-version, check-compiler, optimum-rbln-pytest]
+    if: always()
+    runs-on: rebel-k8s-runner
+    steps:
+      - name: Get failed test details
+        id: get_failed_tests
+        if: needs.optimum-rbln-pytest.result == 'failure'
+        run: |
+          # Get the workflow run jobs
+          JOBS=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
+            "https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs")
+          # Extract failed pytest job names step by step for readability
+          FAILED_JOB_NAMES=$(echo "$JOBS" | jq -r '.jobs[] | select(.conclusion == "failure" and (.name | contains("Pytest"))) | .name')
+          # Remove "Pytest (...)" wrapper to get the test name
+          FAILED_TEST_NAMES=$(echo "$FAILED_JOB_NAMES" | sed 's/Pytest (\(.*\))/\1/')
+          # Join names with comma and space
+          FAILED_TESTS=$(echo "$FAILED_TEST_NAMES" | tr '\n' ', ' | sed 's/,$//')
+          if [ -z "$FAILED_TESTS" ]; then
+            echo "failed_tests=Unknown" >> $GITHUB_OUTPUT
+          else
+            echo "failed_tests=$FAILED_TESTS" >> $GITHUB_OUTPUT
+          fi
+      - name: Determine test results
+        id: test_results
+        run: |
+          # Determine overall status
+          if [ "${{ needs.optimum-rbln-pytest.result }}" == "success" ]; then
+            echo "pytest_status=✅ Success - All tests passed" >> $GITHUB_OUTPUT
+            echo "pytest_emoji=✅" >> $GITHUB_OUTPUT
+          elif [ "${{ needs.optimum-rbln-pytest.result }}" == "failure" ]; then
+            FAILED="${{ steps.get_failed_tests.outputs.failed_tests }}"
+            if [ -n "$FAILED" ] && [ "$FAILED" != "Unknown" ]; then
+              echo "pytest_status=❌ Failed - Tests: \`$FAILED\`" >> $GITHUB_OUTPUT
+            else
+              echo "pytest_status=❌ Failed" >> $GITHUB_OUTPUT
+            fi
+            echo "pytest_emoji=❌" >> $GITHUB_OUTPUT
+          elif [ "${{ needs.optimum-rbln-pytest.result }}" == "skipped" ]; then
+            echo "pytest_status=⏭️ Skipped" >> $GITHUB_OUTPUT
+            echo "pytest_emoji=⏭️" >> $GITHUB_OUTPUT
+          else
+            echo "pytest_status=⚠️ Cancelled" >> $GITHUB_OUTPUT
+            echo "pytest_emoji=⚠️" >> $GITHUB_OUTPUT
+          fi
+          # Determine compiler check status
+          if [ "${{ needs.check-compiler.result }}" == "success" ]; then
+            echo "compiler_status=✅ Available" >> $GITHUB_OUTPUT
+          else
+            echo "compiler_status=❌ Not Available" >> $GITHUB_OUTPUT
+          fi
+      - name: Notify Slack
+        if: always()
+        run: |
+          # Determine overall workflow status emoji
+          if [ "${{ needs.optimum-rbln-pytest.result }}" == "success" ]; then
+            title="✅ Optimum-RBLN Scheduled Pytest Results"
+          elif [ "${{ needs.check-compiler.result }}" != "success" ]; then
+            title="⚠️ Optimum-RBLN Scheduled Pytest Results - Compiler Check Failed"
+          else
+            title="❌ Optimum-RBLN Scheduled Pytest Results"
+          fi
+          commit="*Commit*\n<https://github.com/rebellions-sw/optimum-rbln/commit/${{github.sha}}|${{github.sha}}>"
+          action_link="*CI Report*\n<https://github.com/rebellions-sw/optimum-rbln/actions/runs/${{ github.run_id }}|View Details>"
+          compiler_version="${{ needs.load-version.outputs.compiler_version }}"
+          payload=$(jq -n \
+            --arg channel "${{ secrets.SLACK_CI_REPORTER_CHANNEL }}" \
+            --arg title "$title" \
+            --arg commit "$commit" \
+            --arg action_link "$action_link" \
+            --arg compiler_version "$compiler_version" \
+            --arg compiler_status "${{ steps.test_results.outputs.compiler_status }}" \
+            --arg pytest_status "${{ steps.test_results.outputs.pytest_status }}" \
+            '{
+              channel: $channel,
+              text: "Optimum-RBLN Scheduled Test Results",
+              blocks: [
+                {
+                  type: "header",
+                  text: {
+                    type: "plain_text",
+                    text: $title
+                  }
+                },
+                {
+                  type: "section",
+                  fields: [
+                    { type: "mrkdwn", text: $commit },
+                    { type: "mrkdwn", text: $action_link }
+                  ]
+                },
+                {
+                  type: "section",
+                  fields: [
+                    { type: "mrkdwn", text: "*Compiler Version*" },
+                    { type: "mrkdwn", text: ("`" + $compiler_version + "`") }
+                  ]
+                },
+                { type: "divider" },
+                {
+                  type: "section",
+                  fields: [
+                    { type: "mrkdwn", text: "*Compiler Check*" },
+                    { type: "mrkdwn", text: $compiler_status }
+                  ]
+                },
+                {
+                  type: "section",
+                  fields: [
+                    { type: "mrkdwn", text: "*Pytest Results*" },
+                    { type: "mrkdwn", text: $pytest_status }
+                  ]
+                }
+              ]
+            }'
+          )
+          curl -X POST -H 'Authorization: Bearer ${{ secrets.SLACK_BOT_USER_OAUTH_ACCESS_TOKEN }}' -H 'Content-type: application/json; charset=utf-8' --data "$payload" https://slack.com/api/chat.postMessage

{optimum_rbln-0.9.2a3 → optimum_rbln-0.9.2a5}/.github/workflows/rbln_trigger_on_pr.yaml RENAMED Viewed

@@ -92,4 +92,4 @@ jobs:
       ref: ${{ github.event.pull_request.head.sha }}
       rebel_compiler_version: ${{ needs.check-compiler.outputs.compiler_version }}
       test_level: "default"
-    secrets: inherit
+    secrets: inherit

{optimum_rbln-0.9.2a3 → optimum_rbln-0.9.2a5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: optimum-rbln
-Version: 0.9.2a3
+Version: 0.9.2a5
 Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
 Project-URL: Homepage, https://rebellions.ai
 Project-URL: Documentation, https://docs.rbln.ai
@@ -26,10 +26,10 @@ Requires-Python: <3.14,>=3.9
 Requires-Dist: accelerate>=1.0.1
 Requires-Dist: diffusers==0.35.1
 Requires-Dist: packaging>=24.1
-Requires-Dist: torch==2.7.0
-Requires-Dist: torchaudio<=2.7.0
-Requires-Dist: torchvision<=0.22.0
-Requires-Dist: transformers==4.53.1
+Requires-Dist: torch==2.8.0
+Requires-Dist: torchaudio<=2.8.0
+Requires-Dist: torchvision<=0.23.0
+Requires-Dist: transformers==4.57.1
 Description-Content-Type: text/markdown

{optimum_rbln-0.9.2a3 → optimum_rbln-0.9.2a5}/pyproject.toml RENAMED Viewed

@@ -29,11 +29,11 @@ classifiers = [
 ]
 keywords = ["transformers", "diffusers", "inference", "rbln", "atom", "rebel"]
 dependencies = [
-    "torch==2.7.0",
-    "torchaudio<=2.7.0",
-    "torchvision<=0.22.0",
+    "torch==2.8.0",
+    "torchaudio<=2.8.0",
+    "torchvision<=0.23.0",
     "accelerate>=1.0.1",
-    "transformers==4.53.1",
+    "transformers==4.57.1",
     "diffusers==0.35.1",
     "packaging>=24.1",
 ]

{optimum_rbln-0.9.2a3 → optimum_rbln-0.9.2a5}/src/optimum/rbln/__init__.py RENAMED Viewed

@@ -118,6 +118,8 @@ _import_structure = {
         "RBLNLlavaForConditionalGenerationConfig",
         "RBLNLlavaNextForConditionalGeneration",
         "RBLNLlavaNextForConditionalGenerationConfig",
+        "RBLNLoRAAdapterConfig",
+        "RBLNLoRAConfig",
         "RBLNMidmLMHeadModel",
         "RBLNMidmLMHeadModelConfig",
         "RBLNMistralModel",
@@ -406,6 +408,8 @@ if TYPE_CHECKING:
         RBLNLlavaForConditionalGenerationConfig,
         RBLNLlavaNextForConditionalGeneration,
         RBLNLlavaNextForConditionalGenerationConfig,
+        RBLNLoRAAdapterConfig,
+        RBLNLoRAConfig,
         RBLNMidmLMHeadModel,
         RBLNMidmLMHeadModelConfig,
         RBLNMistralForCausalLM,

{optimum_rbln-0.9.2a3 → optimum_rbln-0.9.2a5}/src/optimum/rbln/__version__.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.9.2a3'
-__version_tuple__ = version_tuple = (0, 9, 2, 'a3')
+__version__ = version = '0.9.2a5'
+__version_tuple__ = version_tuple = (0, 9, 2, 'a5')
 __commit_id__ = commit_id = None

{optimum_rbln-0.9.2a3 → optimum_rbln-0.9.2a5}/src/optimum/rbln/configuration_utils.py RENAMED Viewed

@@ -41,6 +41,9 @@ TypeInputInfo = List[Tuple[str, Tuple[int], str]]
 class RBLNSerializableConfigProtocol(Protocol):
     def _prepare_for_serialization(self) -> Dict[str, Any]: ...
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({self._prepare_for_serialization()})"
 @dataclass
 class RBLNCompileConfig:

{optimum_rbln-0.9.2a3 → optimum_rbln-0.9.2a5}/src/optimum/rbln/modeling.py RENAMED Viewed

@@ -34,6 +34,49 @@ if TYPE_CHECKING:
 logger = get_logger(__name__)
+def _get_dtype(
+    cls,
+    dtype: Optional[Union[str, torch.dtype, dict]],
+    config: PretrainedConfig,
+) -> tuple[PretrainedConfig, Optional[torch.dtype], Optional[torch.dtype]]:
+    dtype_orig = None
+    if dtype is not None:
+        if isinstance(dtype, str):
+            if dtype == "auto":
+                if hasattr(config, "dtype") and config.dtype is not None:
+                    dtype = config.dtype
+                else:
+                    dtype = torch.get_default_dtype()
+            elif hasattr(torch, dtype):
+                dtype = getattr(torch, dtype)
+                config.dtype = dtype
+        elif isinstance(dtype, torch.dtype):
+            config.dtype = dtype
+        elif isinstance(dtype, dict):
+            for key, curr_dtype in dtype.items():
+                if hasattr(config, key):
+                    value = getattr(config, key)
+                    curr_dtype = curr_dtype if not isinstance(curr_dtype, str) else getattr(torch, curr_dtype)
+                    value.dtype = curr_dtype
+            # main torch dtype for modules that aren't part of any sub-config
+            dtype = dtype.get("")
+            dtype = dtype if not isinstance(dtype, str) else getattr(torch, dtype)
+            config.dtype = dtype
+            if dtype is None:
+                dtype = torch.float32
+        else:
+            raise ValueError(f"Invalid dtype: {dtype}")
+        dtype_orig = cls._set_default_dtype(dtype)
+    else:
+        # Use default dtype
+        default_dtype = torch.get_default_dtype()
+        config.dtype = default_dtype
+    return config, dtype, dtype_orig
 class RBLNModel(RBLNBaseModel):
     @classmethod
     def update_kwargs(cls, kwargs):
@@ -206,10 +249,37 @@ class RBLNModel(RBLNBaseModel):
         trust_remote_code: bool = False,
         # Some rbln-config should be applied before loading torch module (i.e. quantized llm)
         rbln_config: Optional[RBLNModelConfig] = None,
+        dtype: Optional[Union[str, torch.dtype, dict]] = None,
         **kwargs,
     ) -> "PreTrainedModel":
         kwargs = cls.update_kwargs(kwargs)
-        return cls.get_hf_class().from_pretrained(
+        hf_class = cls.get_hf_class()
+        if dtype is not None:
+            config = hf_class.config_class.from_pretrained(
+                model_id,
+                subfolder=subfolder,
+                revision=revision,
+                cache_dir=cache_dir,
+                use_auth_token=use_auth_token,
+                local_files_only=local_files_only,
+                force_download=force_download,
+                trust_remote_code=trust_remote_code,
+            )
+            config, processed_dtype, dtype_orig = _get_dtype(
+                cls=hf_class,
+                dtype=dtype,
+                config=config,
+            )
+            kwargs["torch_dtype"] = processed_dtype
+            if dtype_orig is not None:
+                hf_class._set_default_dtype(dtype_orig)
+        return hf_class.from_pretrained(
             model_id,
             subfolder=subfolder,
             revision=revision,

{optimum_rbln-0.9.2a3 → optimum_rbln-0.9.2a5}/src/optimum/rbln/transformers/__init__.py RENAMED Viewed

@@ -110,6 +110,8 @@ _import_structure = {
         "RBLNPegasusModelConfig",
         "RBLNLlavaNextForConditionalGeneration",
         "RBLNLlavaNextForConditionalGenerationConfig",
+        "RBLNLoRAAdapterConfig",
+        "RBLNLoRAConfig",
         "RBLNMidmLMHeadModel",
         "RBLNMidmLMHeadModelConfig",
         "RBLNMistralForCausalLM",
@@ -258,6 +260,8 @@ if TYPE_CHECKING:
         RBLNLlavaForConditionalGenerationConfig,
         RBLNLlavaNextForConditionalGeneration,
         RBLNLlavaNextForConditionalGenerationConfig,
+        RBLNLoRAAdapterConfig,
+        RBLNLoRAConfig,
         RBLNMidmLMHeadModel,
         RBLNMidmLMHeadModelConfig,
         RBLNMistralForCausalLM,

{optimum_rbln-0.9.2a3 → optimum_rbln-0.9.2a5}/src/optimum/rbln/transformers/modeling_generic.py RENAMED Viewed

@@ -23,6 +23,7 @@ different model architectures.
 import inspect
 from typing import TYPE_CHECKING, Optional, Union
+from torch import nn
 from transformers import (
     AutoModel,
     AutoModelForAudioClassification,
@@ -57,6 +58,28 @@ class RBLNTransformerEncoder(RBLNModel):
     rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
     rbln_dtype = "int64"
+    @classmethod
+    def wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNTransformerEncoderConfig) -> nn.Module:
+        class TransformerEncoderWrapper(nn.Module):
+            # Parameters to disable for RBLN compilation
+            DISABLED_PARAMS = {"return_dict", "use_cache"}
+            def __init__(self, model: "PreTrainedModel", rbln_config: RBLNTransformerEncoderConfig):
+                super().__init__()
+                self.model = model
+                self.rbln_config = rbln_config
+                self._forward_signature = inspect.signature(model.forward)
+            def forward(self, *args, **kwargs):
+                # Disable parameters that are not compatible with RBLN compilation
+                for param_name in self.DISABLED_PARAMS:
+                    if param_name in self._forward_signature.parameters:
+                        kwargs[param_name] = False
+                return self.model(*args, **kwargs)
+        return TransformerEncoderWrapper(model, rbln_config).eval()
     @classmethod
     def _update_rbln_config(
         cls,
@@ -208,7 +231,6 @@ class RBLNModelForQuestionAnswering(RBLNTransformerEncoder):
     def _prepare_output(self, output, return_dict):
         # Prepare QuestionAnswering specific output format.
         start_logits, end_logits = output
         if not return_dict:

{optimum_rbln-0.9.2a3 → optimum_rbln-0.9.2a5}/src/optimum/rbln/transformers/models/__init__.py RENAMED Viewed

@@ -96,6 +96,8 @@ _import_structure = {
         "RBLNDecoderOnlyModel",
         "RBLNDecoderOnlyModelForCausalLM",
         "RBLNDecoderOnlyModelForCausalLMConfig",
+        "RBLNLoRAAdapterConfig",
+        "RBLNLoRAConfig",
     ],
     "depth_anything": ["RBLNDepthAnythingForDepthEstimationConfig", "RBLNDepthAnythingForDepthEstimation"],
     "dpt": [
@@ -239,6 +241,8 @@ if TYPE_CHECKING:
         RBLNDecoderOnlyModelConfig,
         RBLNDecoderOnlyModelForCausalLM,
         RBLNDecoderOnlyModelForCausalLMConfig,
+        RBLNLoRAAdapterConfig,
+        RBLNLoRAConfig,
     )
     from .depth_anything import RBLNDepthAnythingForDepthEstimation, RBLNDepthAnythingForDepthEstimationConfig
     from .distilbert import RBLNDistilBertForQuestionAnswering, RBLNDistilBertForQuestionAnsweringConfig

{optimum_rbln-0.9.2a3 → optimum_rbln-0.9.2a5}/src/optimum/rbln/transformers/models/blip_2/modeling_blip_2.py RENAMED Viewed

@@ -31,6 +31,7 @@ from transformers.utils import logging
 from ....configuration_utils import RBLNCompileConfig, RBLNModelConfig
 from ....modeling import RBLNModel
 from ...utils.rbln_runtime_wrapper import LoopProcessor
+from ..decoderonly.generation_decoderonly import RBLNDecoderOnlyGenerationMixin
 logger = logging.get_logger(__name__)
@@ -265,7 +266,7 @@ class RBLNBlip2QFormerModel(RBLNModel):
         )
-class RBLNBlip2ForConditionalGeneration(RBLNModel):
+class RBLNBlip2ForConditionalGeneration(RBLNModel, RBLNDecoderOnlyGenerationMixin):
     """
     RBLNBlip2ForConditionalGeneration is a multi-modal model that integrates vision and language processing capabilities,
     optimized for RBLN NPUs. It is designed for conditional generation tasks that involve both image and text inputs.
@@ -433,3 +434,66 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel):
             )
         return inputs_embeds
+    @torch.no_grad()
+    def generate(
+        self,
+        pixel_values: torch.FloatTensor,
+        input_ids: Optional[torch.LongTensor] = None,
+        attention_mask: Optional[torch.LongTensor] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        interpolate_pos_encoding: bool = False,
+        **generate_kwargs,
+    ) -> torch.LongTensor:
+        batch_size = pixel_values.shape[0]
+        image_embeds = self.vision_model(
+            pixel_values,
+            return_dict=True,
+            interpolate_pos_encoding=interpolate_pos_encoding,
+        ).last_hidden_state
+        image_attention_mask = torch.ones(image_embeds.size()[:-1], dtype=torch.long, device=image_embeds.device)
+        query_tokens = self.query_tokens.expand(image_embeds.shape[0], -1, -1)
+        query_outputs = self.qformer(
+            query_embeds=query_tokens,
+            encoder_hidden_states=image_embeds,
+            encoder_attention_mask=image_attention_mask,
+            return_dict=True,
+        )
+        query_output = query_outputs.last_hidden_state
+        if query_output.dtype != image_embeds.dtype:
+            query_output = query_output.to(image_embeds.dtype)
+        language_model_inputs = self.language_projection(query_output)
+        if inputs_embeds is None:
+            if input_ids is None:
+                image_tokens = [self.config.image_token_index] * self.config.num_query_tokens
+                start_tokens = image_tokens + [self.config.text_config.bos_token_id]
+                input_ids = torch.tensor([start_tokens], dtype=torch.long, device=image_embeds.device)
+                input_ids = input_ids.repeat(batch_size, 1)
+            inputs_embeds = self.get_input_embeddings()(input_ids)
+        if attention_mask is None:
+            attention_mask = torch.ones_like(input_ids)
+        if input_ids is None:
+            special_image_mask = inputs_embeds == self.get_input_embeddings()(
+                torch.tensor(self.config.image_token_id, dtype=torch.long, device=inputs_embeds.device)
+            )
+            special_image_mask = special_image_mask.all(-1)
+        else:
+            special_image_mask = input_ids == self.config.image_token_id
+        special_image_mask = special_image_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
+        language_model_inputs = language_model_inputs.to(inputs_embeds.device, inputs_embeds.dtype)
+        inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, language_model_inputs)
+        inputs = {"inputs_embeds": inputs_embeds, "attention_mask": attention_mask}
+        if not self.language_model.config.is_encoder_decoder:
+            inputs["input_ids"] = input_ids
+        outputs = self.language_model.generate(**inputs, **generate_kwargs)
+        return outputs

{optimum_rbln-0.9.2a3 → optimum_rbln-0.9.2a5}/src/optimum/rbln/transformers/models/decoderonly/__init__.py RENAMED Viewed

@@ -23,4 +23,5 @@ from ....ops import (
     paged_flash_causal_attn_prefill,
 )
 from .configuration_decoderonly import RBLNDecoderOnlyModelConfig, RBLNDecoderOnlyModelForCausalLMConfig
+from .configuration_lora import RBLNLoRAAdapterConfig, RBLNLoRAConfig
 from .modeling_decoderonly import RBLNDecoderOnlyModel, RBLNDecoderOnlyModelForCausalLM

{optimum_rbln-0.9.2a3 → optimum_rbln-0.9.2a5}/src/optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py RENAMED Viewed

@@ -17,6 +17,7 @@ from typing import Any, Dict, List, Literal, Optional, Union, get_args
 from ....configuration_utils import RBLNModelConfig
 from ....utils.logging import get_logger
 from ...utils.rbln_quantization import RBLNQuantizationConfig
+from .configuration_lora import RBLNLoRAConfig
 logger = get_logger()
@@ -48,6 +49,7 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
         kvcache_partition_len: Optional[int] = None,
         kvcache_block_size: Optional[int] = None,
         quantization: Optional[Union[Dict[str, Any], RBLNQuantizationConfig]] = None,
+        lora_config: Optional[Union[Dict[str, Any], RBLNLoRAConfig]] = None,
         prefill_chunk_size: Optional[int] = None,
         kvcache_num_blocks: Optional[int] = None,
         decoder_batch_sizes: Optional[List[int]] = None,
@@ -80,6 +82,12 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
             kvcache_block_size (Optional[int]): Sets the size (in number of tokens) of each block
                 in the PagedAttention KV cache. See the "KV Cache Block Size (`kvcache_block_size`)"
                 section below for details.
+            quantization (Optional[Dict[str, Any]]): Configuration dictionary for applying model
+                quantization. Specifies format, etc.
+            lora_config (Optional[Union[Dict[str, Any], RBLNLoRAConfig]]): Configuration for LoRA
+                (Low-Rank Adaptation) settings when using (multi-)LoRA support. Can be provided as
+                a dictionary or an RBLNLoRAConfig instance. When provided, enables LoRA functionality
+                for the model compilation. Defaults to None (no LoRA).
             prefill_chunk_size (Optional[int]): The chunk size used during the prefill phase for
                 processing input sequences. Defaults to 128. Must be a positive integer
                 divisible by 64. Affects prefill performance and memory usage.
@@ -185,6 +193,26 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
         if self.quantization and isinstance(self.quantization, dict):
             self.quantization = RBLNQuantizationConfig(**self.quantization)
+        self.lora_config = lora_config
+        if self.lora_config and isinstance(self.lora_config, dict):
+            self.lora_config = RBLNLoRAConfig(**self.lora_config)
+        # Validate LoRA adapters if LoRA is enabled
+        if self.lora_config is not None:
+            validation_results = self.lora_config.validate_adapter_weights()
+            failed_adapters = [adapter_id for adapter_id, is_valid in validation_results.items() if not is_valid]
+            if failed_adapters:
+                raise ValueError(
+                    f"Some LoRA adapters failed validation and may not be accessible at compile time: {failed_adapters}. "
+                    "Please ensure all adapter weights are available and properly formatted."
+                )
+            logger.info(
+                f"LoRA configuration initialized with {self.lora_config.num_adapters} adapters: "
+                f"{self.lora_config.adapter_ids}. Max rank: {self.lora_config.max_lora_rank}"
+            )
         self.attn_impl = attn_impl
         self.kvcache_partition_len = kvcache_partition_len
         self.kvcache_block_size = kvcache_block_size
@@ -204,6 +232,7 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
         if self.logits_to_keep is not None and self.logits_to_keep > 1:
             raise NotImplementedError("`logits_to_keep` > 1 is currently not supported for RBLN models.")
+        self.decoder_batch_sizes = None
         if "decode" in self.phases:
             self.decoder_batch_sizes = decoder_batch_sizes
             if self.decoder_batch_sizes is None:
@@ -243,6 +272,11 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
     def use_multiple_decoder(self) -> bool:
         return isinstance(self.decoder_batch_sizes, list) and len(self.decoder_batch_sizes) > 1
+    @property
+    def use_lora(self):
+        """Check if LoRA is enabled for this configuration."""
+        return self.lora_config is not None
     @property
     def can_generate(self) -> bool:
         return "decode" in self.phases

optimum-rbln 0.9.2a3__tar.gz → 0.9.2a5__tar.gz

Potentially problematic release.

optimum-rbln 0.9.2a3tar.gz → 0.9.2a5tar.gz