PyPI - optimum-rbln - Versions diffs - 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

optimum-rbln 0.1.13py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (103) hide show

optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py CHANGED Viewed

@@ -20,45 +20,34 @@
 # are the intellectual property of Rebellions Inc. and may not be
 # copied, modified, or distributed without prior written permission
 # from Rebellions Inc.
-import functools
-import glob
 import inspect
-import os
 from dataclasses import dataclass
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
 import rebel
 import torch
-import transformers
-from safetensors.torch import load_file
+from rebel.compile_context import CompileContext
 from transformers import AutoConfig, AutoModelForCausalLM, PretrainedConfig, PreTrainedModel
 from transformers.modeling_utils import no_init_weights
 from transformers.utils import ModelOutput
-from ....modeling_base import RBLNModel
-from ....modeling_config import DEFAULT_COMPILED_MODEL_NAME, RBLNCompileConfig, RBLNConfig
+from ....modeling import RBLNModel
+from ....modeling_config import RBLNCompileConfig, RBLNConfig
 from ....utils.logging import get_logger
 from ....utils.runtime_utils import RBLNPytorchRuntime
-from ....utils.timer_utils import rbln_timer
-from .decoderonly_architecture import DecoderOnlyWrapper
+from ...utils.rbln_quantization import QuantizationManager
+from .decoderonly_architecture import (
+    DecoderOnlyWrapper,
+    validate_attention_method,
+)
 logger = get_logger()
 if TYPE_CHECKING:
-    from transformers import (
-        AutoFeatureExtractor,
-        AutoProcessor,
-        AutoTokenizer,
-        PretrainedConfig,
-    )
-SUPPORTED_QUANTIZATIONS = {
-    "rbln": [
-        "w4a16",
-    ],
-}
+    from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PretrainedConfig
 class RBLNRuntimeModel(RBLNPytorchRuntime):
@@ -70,32 +59,21 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
         inputs_embeds: torch.Tensor,
         attention_mask: torch.Tensor,
         cache_position: torch.Tensor,
-        batch_position: torch.Tensor,
-        query_idx: torch.Tensor,
         **kwargs,
     ):
         if inputs_embeds is None:
             inp = input_ids
             if self.embed_tokens is not None:
                 inp = self.embed_tokens(inp)
-            return super().forward(
-                inp,
-                attention_mask,
-                cache_position,
-                batch_position,
-                query_idx,
-                **kwargs,
-            )
         else:
-            return super().forward(
-                inputs_embeds,
-                attention_mask,
-                cache_position,
-                batch_position,
-                query_idx,
-                **kwargs,
-            )
+            inp = inputs_embeds
+        return super().forward(
+            inp,
+            attention_mask,
+            cache_position,
+            **kwargs,
+        )
 @dataclass
@@ -127,24 +105,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
     main_input_name = "input_ids"
     auto_model_class = AutoModelForCausalLM
     _decoder_wrapper_cls = DecoderOnlyWrapper
-    _original_cls = None
-    @classmethod
-    @property
-    def original_cls(cls):
-        """
-        Lazily loads and caches the corresponding Hugging Face model class.
-        Removes 'RBLN' prefix from the class name to get the original class name
-        (e.g., RBLNLlamaForCausalLM -> LlamaForCausalLM) and imports it from
-        the transformers module.
-        Returns:
-            type: The original Hugging Face model class
-        """
-        if cls._original_cls is None:
-            hf_original_cls_name = cls.__name__[4:]
-            cls._original_cls = getattr(transformers, hf_original_cls_name)
-        return cls._original_cls
+    _use_rotary_emb = True
     def __post_init__(self, **kwargs):
         self.batch_size = self.rbln_config.model_cfg["batch_size"]
@@ -203,6 +164,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
     def get_quantized_model(
         cls,
         model_id: str,
+        config: Optional[PretrainedConfig] = None,
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
@@ -212,57 +174,28 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
         trust_remote_code: bool = False,
         **kwargs,
     ):
-        from ...utils.rbln_quantization import update_layers_to_quantized
+        from ...utils.rbln_quantization import prepare_model_for_quantization
         kwargs = cls.update_kwargs(kwargs)
-        config = AutoConfig.from_pretrained(
-            model_id,
-            use_auth_token=use_auth_token,
-            revision=revision,
-            force_download=force_download,
-            cache_dir=cache_dir,
-            trust_remote_code=trust_remote_code,
-            **kwargs,
-        )
+        if config is None:
+            config = AutoConfig.from_pretrained(
+                model_id,
+                use_auth_token=use_auth_token,
+                revision=revision,
+                force_download=force_download,
+                cache_dir=cache_dir,
+                trust_remote_code=trust_remote_code,
+                **kwargs,
+            )
         with no_init_weights():
             model = AutoModelForCausalLM.from_config(config)
-        update_layers_to_quantized(model)
-        n_layer = kwargs.get("num_hidden_layers", None)
-        cls._load_weights_directly_to_model(model, model_id, n_layer)
+        prepare_model_for_quantization(model, model_id, kwargs.get("num_hidden_layers"))
         return model
-    def _load_weights_directly_to_model(model, model_id, n_layer=None):
-        """
-        Load safetensor file data directly into the model, filtering by layer if n_layer is provided.
-        """
-        model_params = dict(model.named_parameters(recurse=True))
-        model_buffers = dict(model.named_buffers(recurse=True))
-        safetensor_files = glob.glob(f"{model_id}/*.safetensors")
-        target_layers = list(range(n_layer)) if n_layer is not None else None
-        for safetensor_file in safetensor_files:
-            file_data = load_file(safetensor_file)
-            for key, value in file_data.items():
-                if target_layers is not None:
-                    parts = key.split(".")
-                    if len(parts) > 2 and parts[2].isdigit() and (int(parts[2]) not in target_layers):
-                        continue
-                if key in model_params:
-                    model_params[key].data.copy_(value)
-                elif key in model_buffers:
-                    model_buffers[key].data.copy_(value)
-        return 0
     def __getattr__(self, __name: str) -> Any:
         """
         Special method to delegate attribute access to the original Huggingface LM class.
@@ -278,7 +211,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
         def redirect(func):
             return lambda *pargs, **kwargs: func(self, *pargs, **kwargs)
-        val = getattr(self.original_cls, __name)
+        val = getattr(self.hf_class, __name, None) or getattr(PreTrainedModel, __name)
         if isinstance(val, Callable) and "self" in set(inspect.signature(val).parameters):
             return redirect(val)
         return val
@@ -295,61 +228,12 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
         return model
-    def validate_quantization_config(quantize_config):
-        if quantize_config is not None:
-            q_format = quantize_config.get("format")
-            q_precision = quantize_config.get("precision")
-            if q_format not in SUPPORTED_QUANTIZATIONS:
-                raise ValueError(
-                    f"Invalid quantization format: {q_format}. "
-                    f"Supported formats are: {list(SUPPORTED_QUANTIZATIONS.keys())}"
-                )
-            if q_precision not in SUPPORTED_QUANTIZATIONS[q_format]:
-                raise ValueError(
-                    f"Invalid precision: {q_precision} for format: {q_format}. "
-                    f"Supported precisions are: {SUPPORTED_QUANTIZATIONS[q_format]}"
-                )
-        return quantize_config
-    @classmethod
-    def set_quantize_env(cls, quantize_config):
-        RBLN_QUANT_BITS_ENV = "RBLN_QUANT_BITS"
-        quantize_config = cls.validate_quantization_config(quantize_config)
-        if quantize_config is not None:
-            q_precision = quantize_config.get("precision")
-            quant_bits = q_precision.split("w")[1].split("a")[0]
-            os.environ[RBLN_QUANT_BITS_ENV] = quant_bits
-            return RBLN_QUANT_BITS_ENV
-        return None
-    @classmethod
-    def reset_quantize_env(cls, env_var_name):
-        if env_var_name is not None and env_var_name in os.environ:
-            del os.environ[env_var_name]
-    @classmethod
-    def manage_quantize_env(cls, func):
-        @functools.wraps(func)
-        def wrapper(*args, **kwargs):
-            quantize_config = kwargs.get("quantize_config")
-            quantize_env_var = cls.set_quantize_env(quantize_config)
-            try:
-                return func(*args, **kwargs)
-            finally:
-                cls.reset_quantize_env(quantize_env_var)
-        return wrapper
     @classmethod
     def wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: "RBLNConfig"):
         wrapper_cfg = {"max_seq_len": rbln_config.model_cfg["max_seq_len"]}
-        # If the model wrapper supports rbln-custom-flash-attention
-        if "kvcache_partition_len" in inspect.signature(cls._decoder_wrapper_cls.__init__).parameters:
-            wrapper_cfg["kvcache_partition_len"] = rbln_config.model_cfg.get("kvcache_partition_len")
+        wrapper_cfg["attn_impl"] = rbln_config.model_cfg.get("attn_impl")
+        wrapper_cfg["kvcache_partition_len"] = rbln_config.model_cfg.get("kvcache_partition_len")
+        wrapper_cfg["use_rotary_emb"] = cls._use_rotary_emb
         return cls._decoder_wrapper_cls(model, **wrapper_cfg).eval()
@@ -359,69 +243,46 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
         wrapped_model = cls.wrap_model_if_needed(model, rbln_config)
         rbln_compile_configs = rbln_config.compile_cfgs
-        prefill_rbln_compile_config = rbln_compile_configs[0]
-        dec_rbln_compile_config = rbln_compile_configs[1]
-        @rbln_timer("JIT trace")
-        def get_scripted_model():
-            # This function is nested to dealloc the example inputs before compilation.
-            # FIXME: 3rd dummy_input(batch_idx) should be fill zero to compile flash_attn.
-            prefill_example_inputs = prefill_rbln_compile_config.get_dummy_inputs(fill=0)
-            dec_example_inputs = dec_rbln_compile_config.get_dummy_inputs(fill=0)
-            prefill_scripted_model = torch.jit.trace(
-                wrapped_model, prefill_example_inputs, check_trace=False, _store_inputs=False
-            )
-            dec_scripted_model = torch.jit.trace(
-                wrapped_model, dec_example_inputs, check_trace=False, _store_inputs=False
-            )
-            return prefill_scripted_model, dec_scripted_model
+        prefill_compile_config = rbln_compile_configs[0]
+        dec_compile_config = rbln_compile_configs[1]
-        prefill_scripted_model, dec_scripted_model = get_scripted_model()
+        context = CompileContext(use_weight_sharing=True)
-        @rbln_timer("Model conversion")
-        def scripted_model_to_ir():
-            prefill_ir = rebel.torchscript_to_ir(
-                prefill_scripted_model,
-                input_names=[v[0] for v in prefill_rbln_compile_config.input_info],
-            )
-            dec_ir = rebel.torchscript_to_ir(
-                dec_scripted_model,
-                input_names=[v[0] for v in dec_rbln_compile_config.input_info],
-            )
-            return prefill_ir, dec_ir
-        prefill_ir, dec_ir = scripted_model_to_ir()
-        # Caching prefill_decoder/decoder I/O
-        cache_index_offset = 5
-        connections = [
-            (prefill_ir.outputs[1 + i], prefill_ir.inputs[cache_index_offset + i])
-            for i in range(model.config.num_hidden_layers * 2)
-        ]
+        # Here we use meta tensor, for the memory efficiency.
+        meta_tensor_names = [name for name, _, _ in prefill_compile_config.input_info if "past_key_values" in name]
+        prefill_example_inputs = prefill_compile_config.get_dummy_inputs(fill=0, meta_tensor_names=meta_tensor_names)
+        # Mark static tensors (self kv states)
+        static_tensors = {}
+        for (name, _, _), tensor in zip(prefill_compile_config.input_info, prefill_example_inputs):
+            if "past_key_values" in name:
+                static_tensors[name] = tensor
+                context.mark_static_address(tensor)
+        dec_example_inputs = dec_compile_config.get_dummy_inputs(fill=0, static_tensors=static_tensors)
-        # Extract quantize_config from rbln_config
         quantize_config = rbln_config.model_cfg.get("quantization", None)
-        @cls.manage_quantize_env
+        @QuantizationManager.with_quantization_env
         def compile_model(*args, **kwargs):
-            # Remove quantize_config from kwargs
-            kwargs.pop("quantize_config", None)
-            # Call rebel.compile with the updated kwargs
-            return rebel.compile(*args, **kwargs)
-        compiled_model = compile_model(
-            prefill_ir,
-            dec_ir,
-            connections=connections,
-            fusion=prefill_rbln_compile_config.fusion,
-            npu=prefill_rbln_compile_config.npu,
-            tensor_parallel_size=prefill_rbln_compile_config.tensor_parallel_size,
-            use_weight_sharing=True,
-            quantize_config=quantize_config,
-        )
+            wrapped_model.phase = "prefill"
+            compiled_prefill = RBLNModel.compile(
+                wrapped_model,
+                prefill_compile_config,
+                example_inputs=prefill_example_inputs,
+                compile_context=context,
+            )
-        return compiled_model
+            wrapped_model.phase = "decode"
+            compiled_decoder = RBLNModel.compile(
+                wrapped_model,
+                dec_compile_config,
+                example_inputs=dec_example_inputs,
+                compile_context=context,
+            )
+            return {"prefill": compiled_prefill, "decoder": compiled_decoder}
+        return compile_model(quantize_config=quantize_config)
     @classmethod
     def _get_rbln_config(
@@ -432,10 +293,10 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
     ) -> RBLNConfig:
         rbln_max_seq_len = rbln_kwargs.get("max_seq_len", None)
         rbln_batch_size = rbln_kwargs.get("batch_size", None)
-        rbln_quantization = rbln_kwargs.get("quantization", None)
         rbln_use_inputs_embeds = rbln_kwargs.get("use_inputs_embeds", None)
-        rbln_quantization = cls.validate_quantization_config(rbln_quantization)
+        rbln_attn_impl = rbln_kwargs.get("attn_impl", None)
+        rbln_kvcache_partition_len = rbln_kwargs.get("kvcache_partition_len", None)
+        rbln_quantization = QuantizationManager.validate_quantization_config(rbln_kwargs.get("quantization", None))
         prefill_chunk_size = 128
         if rbln_max_seq_len is None:
@@ -444,9 +305,16 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
             )
         if rbln_max_seq_len is None:
             raise ValueError("`rbln_max_seq_len` should be specified.")
         rbln_batch_size = 1 if rbln_batch_size is None else rbln_batch_size
         rbln_use_inputs_embeds = False if rbln_use_inputs_embeds is None else rbln_use_inputs_embeds
+        rbln_attn_impl, rbln_kvcache_partition_len = validate_attention_method(
+            rbln_attn_impl=rbln_attn_impl,
+            rbln_kvcache_partition_len=rbln_kvcache_partition_len,
+            rbln_max_seq_len=rbln_max_seq_len,
+        )
         num_attention_heads = getattr(model_config, "n_head", None) or getattr(model_config, "num_attention_heads")
         num_key_value_heads = getattr(model_config, "num_key_value_heads", None) or num_attention_heads
         num_hidden_layers = getattr(model_config, "n_layer", None) or getattr(model_config, "num_hidden_layers")
@@ -472,9 +340,14 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
                     [batch_size, query_length],
                     "int32",
                 ),
-                ("batch_position", [], "int16"),
-                ("query_idx", [], "int16"),
             ]
+            if query_length > 1:
+                input_info.extend(
+                    [
+                        ("batch_position", [], "int16"),
+                        ("query_position", [], "int16"),
+                    ]
+                )
             input_info.extend(
                 [
@@ -507,12 +380,12 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
             hidden_size=hidden_size,
         )
-        prefill_rbln_compile_config = RBLNCompileConfig(input_info=prefill_input_info)
-        dec_rbln_compile_config = RBLNCompileConfig(input_info=dec_input_info)
+        prefill_compile_config = RBLNCompileConfig(compiled_model_name="prefill", input_info=prefill_input_info)
+        dec_compile_config = RBLNCompileConfig(compiled_model_name="decoder", input_info=dec_input_info)
         rbln_config = RBLNConfig(
             rbln_cls=cls.__name__,
-            compile_cfgs=[prefill_rbln_compile_config, dec_rbln_compile_config],
+            compile_cfgs=[prefill_compile_config, dec_compile_config],
             rbln_kwargs=rbln_kwargs,
         )
@@ -522,6 +395,8 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
                 "batch_size": rbln_batch_size,
                 "prefill_chunk_size": prefill_chunk_size,
                 "use_inputs_embeds": rbln_use_inputs_embeds,
+                "kvcache_partition_len": rbln_kvcache_partition_len,
+                "attn_impl": rbln_attn_impl,
             }
         )
@@ -532,12 +407,21 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
     @classmethod
     def _create_runtimes(
-        cls, compiled_models: List[rebel.RBLNCompiledModel], rbln_device_map: Dict[str, int]
+        cls,
+        compiled_models: List[rebel.RBLNCompiledModel],
+        rbln_device_map: Dict[str, int],
+        activate_profiler: Optional[bool] = None,
     ) -> List[rebel.Runtime]:
-        device_val = rbln_device_map[DEFAULT_COMPILED_MODEL_NAME]
+        if any(model_name not in rbln_device_map for model_name in ["prefill", "decoder"]):
+            cls._raise_missing_compiled_file_error(["prefill", "decoder"])
         return [
-            compiled_models[0].create_runtime(input_info_index=0, tensor_type="pt", device=device_val),
-            compiled_models[0].create_runtime(input_info_index=1, tensor_type="pt", device=device_val),
+            compiled_models[0].create_runtime(
+                tensor_type="pt", device=rbln_device_map["prefill"], activate_profiler=activate_profiler
+            ),
+            compiled_models[1].create_runtime(
+                tensor_type="pt", device=rbln_device_map["decoder"], activate_profiler=activate_profiler
+            ),
         ]
     def get_decoder(self):
@@ -610,8 +494,6 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
         cache_position: Optional[torch.Tensor] = None,
         attention_mask: Optional[torch.LongTensor] = None,
         generate_idx: Optional[torch.Tensor] = None,
-        # from llava_next forward args
-        batch_idx: Optional[int] = None,
         **kwargs,
     ) -> Tuple[torch.FloatTensor]:
         # prefll
@@ -633,7 +515,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
                     input_ids=input_tensor if inputs_embeds is None else None,
                     inputs_embeds=input_tensor if inputs_embeds is not None else None,
                     cache_position=cache_position,
-                    batch_idx=b_idx if batch_idx is None else batch_idx,  # Llava-next prefill
+                    batch_idx=b_idx,
                 )
                 logits.append(logit)
             logits = torch.cat(logits, dim=0)
@@ -671,12 +553,16 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
                 ],
                 dtype=torch.float32,
                 device="cpu",
-            ),
-            torch.empty(size=[], dtype=torch.int16, device="cpu"),
+            )
         ]
         input_tensors = inputs_embeds if inputs_embeds is not None else input_ids
         query_length = input_tensors.shape[1]
+        if query_length > self.max_seq_len:
+            raise ValueError(
+                f"Input length ({query_length}) exceeds the maximum allowed sequence length ({self.max_seq_len})."
+            )
         _attention_mask = self.prefill_attention_mask.clone()
         for step in range(0, query_length, self.prefill_chunk_size):
@@ -709,15 +595,15 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
                 _attention_mask[:, :, :, step - self.prefill_chunk_size : step] = 1
             _attention_mask[:, :, :, step : step + self.prefill_chunk_size] = self.causal_mask
-            query_idx = (query_length - 1) % self.prefill_chunk_size
+            query_position = (query_length - 1) % self.prefill_chunk_size
-            logits, _ = self.prefill_decoder(
+            logits = self.prefill_decoder(
                 input_ids=_input_tensors.contiguous() if inputs_embeds is None else None,
                 inputs_embeds=_input_tensors.contiguous() if inputs_embeds is not None else None,
                 attention_mask=_attention_mask.contiguous(),
                 cache_position=_cache_position.contiguous(),
                 batch_position=torch.tensor(batch_idx, dtype=torch.int16),
-                query_idx=torch.tensor(query_idx, dtype=torch.int16),
+                query_position=torch.tensor(query_position, dtype=torch.int16),
                 out=out_buffers,
             )
@@ -734,48 +620,30 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
         cache_position: torch.Tensor = None,
     ) -> torch.FloatTensor:
         input_tensors = inputs_embeds if inputs_embeds is not None else input_ids
+        if input_tensors is None:
+            raise ValueError("Either `input_ids` or `inputs_embeds` must be provided.")
         batch_size = input_tensors.shape[0]
+        if batch_size != self.batch_size:
+            raise RuntimeError(
+                f"Batch size mismatch: got {batch_size}, expected {self.batch_size} (compiled batch size)."
+            )
+        if batch_size != cache_position.shape[0]:
+            raise RuntimeError(f"Cache position size mismatch: got {cache_position.shape[0]}, expected {batch_size}.")
         for b_idx in range(batch_size):
             decoding_step = cache_position[b_idx].item()
+            if not (0 <= decoding_step < self.dec_attn_mask.shape[-1]):
+                raise ValueError(
+                    f"Decoding step {decoding_step} out of bounds for attention mask with shape {self.dec_attn_mask.shape}."
+                )
             self.dec_attn_mask[b_idx, :, :, decoding_step] = 1
-        logits, _ = self.decoder(
+        logits = self.decoder(
             input_ids=input_tensors.contiguous() if inputs_embeds is None else None,
             inputs_embeds=input_tensors.contiguous() if inputs_embeds is not None else None,
             attention_mask=self.dec_attn_mask.contiguous(),
             cache_position=cache_position.contiguous(),
-            batch_position=torch.tensor(0, dtype=torch.int16),
-            query_idx=torch.tensor(0, dtype=torch.int16),
         )
         return logits
-    def vllm_forward(
-        self,
-        input_ids: torch.LongTensor = None,
-        inputs_embeds: torch.Tensor = None,
-        cache_position: torch.Tensor = None,
-        batch_idx: Optional[int] = None,
-        **kwargs,
-    ) -> Tuple[torch.FloatTensor]:
-        # prefll
-        if cache_position.shape[-1] > 1:
-            logits = self._forward_prefill(
-                input_ids=input_ids,
-                inputs_embeds=inputs_embeds,
-                cache_position=cache_position,
-                batch_idx=batch_idx,
-            )
-        # decoder
-        else:
-            logits = self._forward_decoder(
-                input_ids=input_ids,
-                inputs_embeds=inputs_embeds,
-                cache_position=cache_position,
-            )
-        return RBLNDecoderOnlyOutput(
-            logits=logits,
-        )

optimum/rbln/transformers/models/dpt/modeling_dpt.py CHANGED Viewed

@@ -27,7 +27,7 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Union
 from transformers import AutoModelForDepthEstimation
 from transformers.modeling_outputs import DepthEstimatorOutput
-from ....modeling_base import RBLNModel
+from ....modeling import RBLNModel
 from ....modeling_config import RBLNCompileConfig, RBLNConfig

optimum-rbln 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl

optimum-rbln 0.1.13py3-none-any.whl → 0.2.0py3-none-any.whl