PyPI - optimum-rbln - Versions diffs - 0.7.3a4__py3-none-any.whl → 0.7.3a6__py3-none-any.whl - Mend

optimum-rbln 0.7.3a4py3-none-any.whl → 0.7.3a6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

optimum/rbln/__init__.py CHANGED Viewed

@@ -78,9 +78,13 @@ _import_structure = {
         "RBLNAutoencoderKL",
         "RBLNControlNetModel",
         "RBLNPriorTransformer",
+        "RBLNKandinskyV22CombinedPipeline",
+        "RBLNKandinskyV22Img2ImgCombinedPipeline",
         "RBLNKandinskyV22InpaintCombinedPipeline",
         "RBLNKandinskyV22InpaintPipeline",
+        "RBLNKandinskyV22Img2ImgPipeline",
         "RBLNKandinskyV22PriorPipeline",
+        "RBLNKandinskyV22Pipeline",
         "RBLNStableDiffusionPipeline",
         "RBLNStableDiffusionXLPipeline",
         "RBLNUNet2DConditionModel",
@@ -107,8 +111,12 @@ if TYPE_CHECKING:
         RBLNAutoencoderKL,
         RBLNControlNetModel,
         RBLNDiffusionMixin,
+        RBLNKandinskyV22CombinedPipeline,
+        RBLNKandinskyV22Img2ImgCombinedPipeline,
+        RBLNKandinskyV22Img2ImgPipeline,
         RBLNKandinskyV22InpaintCombinedPipeline,
         RBLNKandinskyV22InpaintPipeline,
+        RBLNKandinskyV22Pipeline,
         RBLNKandinskyV22PriorPipeline,
         RBLNMultiControlNetModel,
         RBLNPriorTransformer,

optimum/rbln/__version__.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.7.3a4'
-__version_tuple__ = version_tuple = (0, 7, 3, 'a4')
+__version__ = version = '0.7.3a6'
+__version_tuple__ = version_tuple = (0, 7, 3, 'a6')

optimum/rbln/diffusers/__init__.py CHANGED Viewed

@@ -24,9 +24,13 @@ ALL_IMPORTABLE_CLASSES.update(LOADABLE_CLASSES["optimum.rbln"])
 _import_structure = {
     "pipelines": [
+        "RBLNKandinskyV22CombinedPipeline",
+        "RBLNKandinskyV22Img2ImgCombinedPipeline",
         "RBLNKandinskyV22InpaintCombinedPipeline",
         "RBLNKandinskyV22InpaintPipeline",
+        "RBLNKandinskyV22Img2ImgPipeline",
         "RBLNKandinskyV22PriorPipeline",
+        "RBLNKandinskyV22Pipeline",
         "RBLNStableDiffusionPipeline",
         "RBLNStableDiffusionXLPipeline",
         "RBLNStableDiffusionImg2ImgPipeline",
@@ -66,8 +70,12 @@ if TYPE_CHECKING:
         RBLNVQModel,
     )
     from .pipelines import (
+        RBLNKandinskyV22CombinedPipeline,
+        RBLNKandinskyV22Img2ImgCombinedPipeline,
+        RBLNKandinskyV22Img2ImgPipeline,
         RBLNKandinskyV22InpaintCombinedPipeline,
         RBLNKandinskyV22InpaintPipeline,
+        RBLNKandinskyV22Pipeline,
         RBLNKandinskyV22PriorPipeline,
         RBLNMultiControlNetModel,
         RBLNStableDiffusion3Img2ImgPipeline,

optimum/rbln/diffusers/models/autoencoders/vq_model.py CHANGED Viewed

@@ -90,9 +90,17 @@ class RBLNVQModel(RBLNModel):
         model_config: "PretrainedConfig",
         rbln_kwargs: Dict[str, Any] = {},
     ) -> RBLNConfig:
-        batch_size = rbln_kwargs.get("batch_size") or 1
-        height = rbln_kwargs.get("img_height") or 512
-        width = rbln_kwargs.get("img_width") or 512
+        batch_size = rbln_kwargs.get("batch_size")
+        if batch_size is None:
+            batch_size = 1
+        height = rbln_kwargs.get("img_height")
+        if height is None:
+            height = 512
+        width = rbln_kwargs.get("img_width")
+        if width is None:
+            width = 512
         if hasattr(model_config, "block_out_channels"):
             scale_factor = 2 ** (len(model_config.block_out_channels) - 1)

optimum/rbln/diffusers/models/unets/unet_2d_condition.py CHANGED Viewed

@@ -176,15 +176,22 @@ class RBLNUNet2DConditionModel(RBLNModel):
             raise ValueError("Both image height and image width must be given or not given")
         elif image_size[0] is None and image_size[1] is None:
             if rbln_config["img2img_pipeline"]:
-                # In case of img2img, sample size of unet is determined by vae encoder.
-                vae_sample_size = pipe.vae.config.sample_size
-                if isinstance(vae_sample_size, int):
-                    sample_size = vae_sample_size // scale_factor
-                else:
-                    sample_size = (
-                        vae_sample_size[0] // scale_factor,
-                        vae_sample_size[1] // scale_factor,
+                if hasattr(pipe, "vae"):
+                    # In case of img2img, sample size of unet is determined by vae encoder.
+                    vae_sample_size = pipe.vae.config.sample_size
+                    if isinstance(vae_sample_size, int):
+                        sample_size = vae_sample_size // scale_factor
+                    else:
+                        sample_size = (
+                            vae_sample_size[0] // scale_factor,
+                            vae_sample_size[1] // scale_factor,
+                        )
+                elif hasattr(pipe, "movq"):
+                    logger.warning(
+                        "RBLN config 'img_height' and 'img_width' should have been provided for this pipeline. "
+                        "Both variable will be set 512 by default."
                     )
+                    sample_size = (512 // scale_factor, 512 // scale_factor)
             else:
                 sample_size = pipe.unet.config.sample_size
         else:

optimum/rbln/diffusers/pipelines/__init__.py CHANGED Viewed

@@ -26,9 +26,13 @@ _import_structure = {
         "RBLNStableDiffusionXLControlNetPipeline",
     ],
     "kandinsky2_2": [
+        "RBLNKandinskyV22CombinedPipeline",
+        "RBLNKandinskyV22Img2ImgCombinedPipeline",
         "RBLNKandinskyV22InpaintCombinedPipeline",
         "RBLNKandinskyV22InpaintPipeline",
+        "RBLNKandinskyV22Img2ImgPipeline",
         "RBLNKandinskyV22PriorPipeline",
+        "RBLNKandinskyV22Pipeline",
     ],
     "stable_diffusion": [
         "RBLNStableDiffusionImg2ImgPipeline",
@@ -55,8 +59,12 @@ if TYPE_CHECKING:
         RBLNStableDiffusionXLControlNetPipeline,
     )
     from .kandinsky2_2 import (
+        RBLNKandinskyV22CombinedPipeline,
+        RBLNKandinskyV22Img2ImgCombinedPipeline,
+        RBLNKandinskyV22Img2ImgPipeline,
         RBLNKandinskyV22InpaintCombinedPipeline,
         RBLNKandinskyV22InpaintPipeline,
+        RBLNKandinskyV22Pipeline,
         RBLNKandinskyV22PriorPipeline,
     )
     from .stable_diffusion import (

optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py CHANGED Viewed

@@ -12,6 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .pipeline_kandinsky2_2_combined import RBLNKandinskyV22InpaintCombinedPipeline
+from .pipeline_kandinsky2_2 import RBLNKandinskyV22Pipeline
+from .pipeline_kandinsky2_2_combined import (
+    RBLNKandinskyV22CombinedPipeline,
+    RBLNKandinskyV22Img2ImgCombinedPipeline,
+    RBLNKandinskyV22InpaintCombinedPipeline,
+)
+from .pipeline_kandinsky2_2_img2img import RBLNKandinskyV22Img2ImgPipeline
 from .pipeline_kandinsky2_2_inpaint import RBLNKandinskyV22InpaintPipeline
 from .pipeline_kandinsky2_2_prior import RBLNKandinskyV22PriorPipeline

optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py ADDED Viewed

@@ -0,0 +1,25 @@
+# Copyright 2024 Rebellions Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from diffusers import KandinskyV22Pipeline
+from ...modeling_diffusers import RBLNDiffusionMixin
+class RBLNKandinskyV22Pipeline(RBLNDiffusionMixin, KandinskyV22Pipeline):
+    original_class = KandinskyV22Pipeline
+    _submodules = ["unet", "movq"]
+    def get_compiled_image_size(self):
+        return self.movq.image_size

optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py CHANGED Viewed

@@ -14,6 +14,8 @@
 from diffusers import (
     DDPMScheduler,
+    KandinskyV22CombinedPipeline,
+    KandinskyV22Img2ImgCombinedPipeline,
     KandinskyV22InpaintCombinedPipeline,
     PriorTransformer,
     UnCLIPScheduler,
@@ -28,10 +30,114 @@ from transformers import (
 )
 from ...modeling_diffusers import RBLNDiffusionMixin
+from .pipeline_kandinsky2_2 import RBLNKandinskyV22Pipeline
+from .pipeline_kandinsky2_2_img2img import RBLNKandinskyV22Img2ImgPipeline
 from .pipeline_kandinsky2_2_inpaint import RBLNKandinskyV22InpaintPipeline
 from .pipeline_kandinsky2_2_prior import RBLNKandinskyV22PriorPipeline
+class RBLNKandinskyV22CombinedPipeline(RBLNDiffusionMixin, KandinskyV22CombinedPipeline):
+    original_class = KandinskyV22CombinedPipeline
+    _connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22Pipeline}
+    _submodules = ["prior_pipe", "decoder_pipe"]
+    _prefix = {"prior_pipe": "prior_"}
+    def __init__(
+        self,
+        unet: "UNet2DConditionModel",
+        scheduler: "DDPMScheduler",
+        movq: "VQModel",
+        prior_prior: "PriorTransformer",
+        prior_image_encoder: "CLIPVisionModelWithProjection",
+        prior_text_encoder: "CLIPTextModelWithProjection",
+        prior_tokenizer: "CLIPTokenizer",
+        prior_scheduler: "UnCLIPScheduler",
+        prior_image_processor: "CLIPImageProcessor",
+    ):
+        RBLNDiffusionMixin.__init__(self)
+        super(KandinskyV22CombinedPipeline, self).__init__()
+        self.register_modules(
+            unet=unet,
+            scheduler=scheduler,
+            movq=movq,
+            prior_prior=prior_prior,
+            prior_image_encoder=prior_image_encoder,
+            prior_text_encoder=prior_text_encoder,
+            prior_tokenizer=prior_tokenizer,
+            prior_scheduler=prior_scheduler,
+            prior_image_processor=prior_image_processor,
+        )
+        self.prior_pipe = RBLNKandinskyV22PriorPipeline(
+            prior=prior_prior,
+            image_encoder=prior_image_encoder,
+            text_encoder=prior_text_encoder,
+            tokenizer=prior_tokenizer,
+            scheduler=prior_scheduler,
+            image_processor=prior_image_processor,
+        )
+        self.decoder_pipe = RBLNKandinskyV22Pipeline(
+            unet=unet,
+            scheduler=scheduler,
+            movq=movq,
+        )
+    def get_compiled_image_size(self):
+        return self.movq.image_size
+class RBLNKandinskyV22Img2ImgCombinedPipeline(RBLNDiffusionMixin, KandinskyV22Img2ImgCombinedPipeline):
+    original_class = KandinskyV22Img2ImgCombinedPipeline
+    _connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22Img2ImgPipeline}
+    _submodules = ["prior_pipe", "decoder_pipe"]
+    _prefix = {"prior_pipe": "prior_"}
+    def __init__(
+        self,
+        unet: "UNet2DConditionModel",
+        scheduler: "DDPMScheduler",
+        movq: "VQModel",
+        prior_prior: "PriorTransformer",
+        prior_image_encoder: "CLIPVisionModelWithProjection",
+        prior_text_encoder: "CLIPTextModelWithProjection",
+        prior_tokenizer: "CLIPTokenizer",
+        prior_scheduler: "UnCLIPScheduler",
+        prior_image_processor: "CLIPImageProcessor",
+    ):
+        RBLNDiffusionMixin.__init__(self)
+        super(KandinskyV22Img2ImgCombinedPipeline, self).__init__()
+        self.register_modules(
+            unet=unet,
+            scheduler=scheduler,
+            movq=movq,
+            prior_prior=prior_prior,
+            prior_image_encoder=prior_image_encoder,
+            prior_text_encoder=prior_text_encoder,
+            prior_tokenizer=prior_tokenizer,
+            prior_scheduler=prior_scheduler,
+            prior_image_processor=prior_image_processor,
+        )
+        self.prior_pipe = RBLNKandinskyV22PriorPipeline(
+            prior=prior_prior,
+            image_encoder=prior_image_encoder,
+            text_encoder=prior_text_encoder,
+            tokenizer=prior_tokenizer,
+            scheduler=prior_scheduler,
+            image_processor=prior_image_processor,
+        )
+        self.decoder_pipe = RBLNKandinskyV22Img2ImgPipeline(
+            unet=unet,
+            scheduler=scheduler,
+            movq=movq,
+        )
+    def get_compiled_image_size(self):
+        return self.movq.image_size
 class RBLNKandinskyV22InpaintCombinedPipeline(RBLNDiffusionMixin, KandinskyV22InpaintCombinedPipeline):
     original_class = KandinskyV22InpaintCombinedPipeline
     _connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22InpaintPipeline}

optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py ADDED Viewed

@@ -0,0 +1,25 @@
+# Copyright 2024 Rebellions Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from diffusers import KandinskyV22Img2ImgPipeline
+from ...modeling_diffusers import RBLNDiffusionMixin
+class RBLNKandinskyV22Img2ImgPipeline(RBLNDiffusionMixin, KandinskyV22Img2ImgPipeline):
+    original_class = KandinskyV22Img2ImgPipeline
+    _submodules = ["unet", "movq"]
+    def get_compiled_image_size(self):
+        return self.movq.image_size

optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py CHANGED Viewed

@@ -20,3 +20,6 @@ from ...modeling_diffusers import RBLNDiffusionMixin
 class RBLNKandinskyV22InpaintPipeline(RBLNDiffusionMixin, KandinskyV22InpaintPipeline):
     original_class = KandinskyV22InpaintPipeline
     _submodules = ["unet", "movq"]
+    def get_compiled_image_size(self):
+        return self.movq.image_size

optimum/rbln/modeling_base.py CHANGED Viewed

@@ -282,15 +282,6 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
             **kwargs,
         )
-    @classmethod
-    def _check_compiled_models(
-        cls, compiled_models: Dict[str, rebel.RBLNCompiledModel], rbln_config: RBLNConfig, config: "PretrainedConfig"
-    ):
-        # check compiled model can create runtimes.
-        # this logic currently only works in LLM
-        # fail when LLM model using Paged Attention can't guarantee max sequence length
-        pass
     @classmethod
     def _from_compiled_models(
         cls,
@@ -305,8 +296,6 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         if isinstance(model_save_dir, str):
             model_save_dir = Path(model_save_dir)
-        cls._check_compiled_models(compiled_models=rbln_compiled_models, rbln_config=rbln_config, config=config)
         # FIXME:: Should we convert it?
         compiled_model_names = [cfg.compiled_model_name for cfg in rbln_config.compile_cfgs]
         rbln_compiled_models = [rbln_compiled_models[cm_name] for cm_name in compiled_model_names]

optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py CHANGED Viewed

@@ -98,9 +98,9 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
         """
         NO_BLOCKS_ERROR = (
-            "No memory blocks are available for allocation."
-            "The generate() API cannot complete this inference task because Paged Attention is not fully supported by optimum-rbln."
-            "This is supported by vllm-rbln (see: https://docs.rbln.ai/software/model_serving/vllm_support/vllm-rbln.html)."
+            "No memory blocks are available for allocation. "
+            "The generate() API cannot complete this inference task because Paged Attention is not fully supported by optimum-rbln. "
+            "This is supported by vllm-rbln (see: https://docs.rbln.ai/software/model_serving/vllm_support/vllm-rbln.html). "
             "Using vllm-rbln should fix this issue and enhance inference performance."
         )
@@ -575,59 +575,58 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
         nbits_per_param: int,
         n_model_params: int,
     ) -> int:
-        num_attention_heads = getattr(config, "n_head", None) or getattr(config, "num_attention_heads")
-        num_layers = getattr(config, "n_layer", None) or getattr(config, "num_hidden_layers")
-        head_dim = getattr(config, "head_dim", None) or config.hidden_size // num_attention_heads
-        vocab_size = config.vocab_size
-        hidden_size = getattr(config, "n_embd", None) or getattr(config, "hidden_size")
-        num_key_value_heads = getattr(config, "num_key_value_heads", None) or num_attention_heads
-        TARGET_DRAM_LIMIT = int(tensor_parallel_size * 15.7 * 2**30)  # 16GB # TODO(jongho): 더 정확한 값
         def align(x: int, nbytes: int) -> int:
             return int(math.ceil(x / nbytes) * nbytes)
         def align_2MB(x: int) -> int:
             return align(x, 2 * 1024 * 1024)
-        def get_kernel_size() -> int:
-            # TODO: Implement
-            lm_heads_params = align(vocab_size, 64) * hidden_size
-            lm_heads_nbytes = (
-                align_2MB(lm_heads_params * nbits_per_param // 8 / tensor_parallel_size) * tensor_parallel_size
-            )
+        num_attention_heads = getattr(config, "n_head", None) or getattr(config, "num_attention_heads")
+        num_layers = getattr(config, "n_layer", None) or getattr(config, "num_hidden_layers")
+        head_dim = getattr(config, "head_dim", None) or config.hidden_size // num_attention_heads
+        vocab_size = config.vocab_size
+        hidden_size = getattr(config, "n_embd", None) or getattr(config, "hidden_size")
+        num_key_value_heads = getattr(config, "num_key_value_heads", None) or num_attention_heads
-            params = n_model_params - lm_heads_params
-            layer_nbytes = (
-                align_2MB(params * nbits_per_param // 8 / num_layers / tensor_parallel_size)
-                * num_layers
-                * tensor_parallel_size
-            )
+        # TODO(jongho): Update if target npu is REBEL.
+        ATOM_DRAM_NBYTES = 16 * 2**30
+        ATOM_SYS_DRAM_NBYTES = 288 * 2**20
+        available_dram = tensor_parallel_size * (ATOM_DRAM_NBYTES - ATOM_SYS_DRAM_NBYTES)
-            return layer_nbytes + lm_heads_nbytes
+        # Get estimated kernel size (approximated)
+        lm_heads_params = align(vocab_size, 64) * hidden_size
+        lm_heads_nbytes = (
+            align_2MB(lm_heads_params * nbits_per_param // 8 / tensor_parallel_size) * tensor_parallel_size
+        )
+        params = n_model_params - lm_heads_params
+        layer_nbytes = (
+            align_2MB(params * nbits_per_param // 8 / num_layers / tensor_parallel_size)
+            * num_layers
+            * tensor_parallel_size
+        )
+        kernel_size = layer_nbytes + lm_heads_nbytes
-        available_dram = TARGET_DRAM_LIMIT - get_kernel_size()
+        available_dram -= kernel_size
-        buffer = 2**30  # 1GB
-        if tensor_parallel_size <= 2:
+        # TODO: Accurate buffer estimation
+        buffer = 2**30  # 1GB Buffer
+        if tensor_parallel_size <= 4:
             buffer /= 4
         available_dram -= buffer
-        def get_nbytes_per_block() -> int:
-            return (
-                align_2MB(
-                    kvcache_block_size
-                    * head_dim
-                    * math.ceil(num_key_value_heads / tensor_parallel_size)  # Shard
-                    * 2  # (fp16)
-                )
-                * num_layers
-                * 2  # (k, v)
-                * tensor_parallel_size
+        # Estimate nbytes per a single kvcache block
+        nbytes_per_block = (
+            align_2MB(
+                kvcache_block_size
+                * head_dim
+                * math.ceil(num_key_value_heads / tensor_parallel_size)  # Shard
+                * 2  # (fp16)
             )
-        nbytes_per_block = get_nbytes_per_block()
+            * num_layers
+            * 2  # (k, v)
+            * tensor_parallel_size
+        )
         n_blocks = available_dram // nbytes_per_block
         return n_blocks, nbytes_per_block
@@ -685,27 +684,28 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
             else:
                 rbln_kvcache_block_size = rbln_kvcache_partition_len
-        max_num_blocks, nbytes_per_block = cls.get_maximum_num_blocks(
-            config=model_config,
-            tensor_parallel_size=rbln_kwargs.get("tensor_parallel_size", 1),
-            kvcache_block_size=rbln_kvcache_block_size,
-            nbits_per_param=16 if rbln_quantization is None else 4,  # TODO(jongho): FIX Ad-hoc
-            n_model_params=rbln_kwargs["n_model_params"],
-        )
-        model_num_blocks = (rbln_max_seq_len // rbln_kvcache_block_size) * rbln_batch_size
-        rbln_kvcache_num_blocks = min(model_num_blocks, max_num_blocks)
+        rbln_kvcache_num_blocks = (rbln_max_seq_len // rbln_kvcache_block_size) * rbln_batch_size
+        if rbln_attn_impl == "flash_attn":
+            max_num_blocks, _ = cls.get_maximum_num_blocks(
+                config=model_config,
+                tensor_parallel_size=rbln_kwargs.get("tensor_parallel_size", 1),
+                kvcache_block_size=rbln_kvcache_block_size,
+                nbits_per_param=16 if rbln_quantization is None else 4,  # TODO(jongho): FIX Ad-hoc
+                n_model_params=rbln_kwargs["n_model_params"],
+            )
+            rbln_kvcache_num_blocks = min(rbln_kvcache_num_blocks, max_num_blocks)
-        required_blocks = rbln_max_seq_len // rbln_kvcache_block_size + 1
-        if rbln_kvcache_num_blocks < required_blocks:
-            rbln_kvcache_num_blocks = required_blocks
+            required_blocks = rbln_max_seq_len // rbln_kvcache_block_size + 1
+            if rbln_kvcache_num_blocks < required_blocks:
+                rbln_kvcache_num_blocks = required_blocks
-        logger.info(f"[KVCache] Compiling with num_blocks: {rbln_kvcache_num_blocks}")
+            logger.info(f"[KVCache] Compiling with num_blocks: {rbln_kvcache_num_blocks}")
-        if rbln_kvcache_num_blocks < rbln_batch_size:
-            raise RuntimeError(
-                f"Batch size ({rbln_batch_size}) exceeds available KV cache blocks ({rbln_kvcache_num_blocks}). "
-                "Ensure the number of blocks is at least equal to the batch size."
-            )
+            if rbln_kvcache_num_blocks < rbln_batch_size:
+                raise RuntimeError(
+                    f"Batch size ({rbln_batch_size}) exceeds available KV cache blocks ({rbln_kvcache_num_blocks}). "
+                    "Ensure the number of blocks is at least equal to the batch size."
+                )
         num_attention_heads = getattr(model_config, "n_head", None) or getattr(model_config, "num_attention_heads")
         num_key_value_heads = getattr(model_config, "num_key_value_heads", None) or num_attention_heads
@@ -805,9 +805,6 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
                 "kvcache_block_size": rbln_kvcache_block_size,
                 "attn_impl": rbln_attn_impl,
                 "kvcache_num_blocks": rbln_kvcache_num_blocks,
-                "model_num_blocks": model_num_blocks,
-                "max_num_blocks": max_num_blocks,
-                "nbytes_per_block": nbytes_per_block,
             }
         )

optimum/rbln/transformers/models/phi/phi_architecture.py CHANGED Viewed

@@ -92,7 +92,7 @@ class PhiLayer(DecoderOnlyLayer):
         hidden_states = self.get_pre_attention_layernorm()(hidden_states)
-        attn_outputs, present_key_values = self.self_attn(
+        attn_output = self.self_attn(
             hidden_states=hidden_states,
             attention_mask=attention_mask,
             seq_positions=seq_positions,
@@ -104,9 +104,9 @@ class PhiLayer(DecoderOnlyLayer):
         feed_forward_hidden_states = self._original_mod.mlp(hidden_states)
-        hidden_states = attn_outputs + feed_forward_hidden_states + residual
+        hidden_states = attn_output + feed_forward_hidden_states + residual
-        return hidden_states, present_key_values
+        return hidden_states
 class PhiModel(DecoderOnlyModel):

{optimum_rbln-0.7.3a4.dist-info → optimum_rbln-0.7.3a6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: optimum-rbln
-Version: 0.7.3a4
+Version: 0.7.3a6
 Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
 Project-URL: Homepage, https://rebellions.ai
 Project-URL: Documentation, https://docs.rbln.ai

{optimum_rbln-0.7.3a4.dist-info → optimum_rbln-0.7.3a6.dist-info}/RECORD RENAMED Viewed

@@ -1,31 +1,33 @@
-optimum/rbln/__init__.py,sha256=eHi15YM3989AcX52jka9rUmgAtlp1PHqMNwBEdOfuu8,6554
-optimum/rbln/__version__.py,sha256=MLlg_138GxyhciEP0ZB5dPN8vriXkicRnaZiwqygxOY,519
+optimum/rbln/__init__.py,sha256=ZDzXcl-oAcYJhKjJMpotjbTih9awo7HzUb6T3MUEP6Q,6894
+optimum/rbln/__version__.py,sha256=9voT1MrnPHKvqTeiZK8bNEZcPseZOq7N_U5etptnmTE,519
 optimum/rbln/modeling.py,sha256=nJsAs5zs--VVOYGFjYNpqfxYIemJIK4Lr0WEzlDLdP0,8390
-optimum/rbln/modeling_base.py,sha256=Ow73GVJF1N5cDFO8_rgirtGj1wC-cXBDyqXHW5PCybA,22270
+optimum/rbln/modeling_base.py,sha256=dNCL-BhrWCpuOVkZaj8-MW567Tf4lLo3p3Z3ldjWJfU,21779
 optimum/rbln/modeling_config.py,sha256=7104bxmrvKW4Q6XTruQayiIGl8GHDFmPkJ3cknMIInE,11335
-optimum/rbln/diffusers/__init__.py,sha256=pOyoXv3-JRzTBSwPKbgLS9H6F2K9dJdReEmpGhcLQYU,3283
+optimum/rbln/diffusers/__init__.py,sha256=Hq87CbtiCy85YmK2SB-OmUyfv77oe3j4bsTenTRnu6w,3623
 optimum/rbln/diffusers/modeling_diffusers.py,sha256=zqVNgH9oeOx2iNE7VsW_FinVf4s6G5Idyh4TKz7XJJg,21116
 optimum/rbln/diffusers/models/__init__.py,sha256=mkCvJyH1KcwrsUvYSq_bVC79oOfyqtBSFDyPS1_48wA,1478
 optimum/rbln/diffusers/models/controlnet.py,sha256=EM_HlzCdaZdnnK0oGpY2fQeigPqHhlwh4NHCzlmoumI,10512
 optimum/rbln/diffusers/models/autoencoders/__init__.py,sha256=dg17ZTUsiqTcbIaEE4fqew9uRbao0diQ21PXvRKIqKg,679
 optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py,sha256=rCbC32bJnfXtsLdVvNVVHpRAkCYy6jeCSwIZ-JSReWk,9220
 optimum/rbln/diffusers/models/autoencoders/vae.py,sha256=gB9HR7Bf7wpIXLv-Js4Pc3oyWRlqEe4cms4sI2AJicY,4380
-optimum/rbln/diffusers/models/autoencoders/vq_model.py,sha256=GunIau02_-lodYZBzd0ktJSNRT5axEFIZxSAfj2Mlyo,5974
+optimum/rbln/diffusers/models/autoencoders/vq_model.py,sha256=b36QqPbayjApKivceQVVyQxHyR1ZOZ1ffuGgdALEPTQ,6117
 optimum/rbln/diffusers/models/transformers/__init__.py,sha256=V8rSR7WzHs-i8Cwb_MNxhY2NFbwPgxu24vGtkwl-6tk,706
 optimum/rbln/diffusers/models/transformers/prior_transformer.py,sha256=VG9cQo-_eppDvQSW1q1euAGBt1socUHetN_fIN2u1iU,6169
 optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=n_krmMgiRxWrG--567PNpk58EG_X7x7H4gidIkRvwjo,7308
 optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
-optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=xHnBzFrm7aNaolxrsotbjo9GkbNiNdTleXQoeqGLlhg,15540
-optimum/rbln/diffusers/pipelines/__init__.py,sha256=DAsM4eNks3hEY-bsUKSxRKmgwUWDGDlw82gfplSOdO8,2800
+optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=QIjVWQQf8KBn5rU7lvipdm3gNBxZl7l6HCAj7p5FjLU,15977
+optimum/rbln/diffusers/pipelines/__init__.py,sha256=5KLZ5LrpMzBya2e_3_PvEoPwG24U8JMexfw_ygZREKc,3140
 optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
 optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=JWKtnZYBIfgmbAo0SLFIvHBQCv2BPSFNvpcdjG4GUOY,4113
 optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py,sha256=dGdw5cwJLS4CLv6IHskk5ZCcPgS7UDuHKbfOZ8ojNUs,35187
 optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py,sha256=7xCiXrH4ToCTHohVGFXqO7_f9G8HShYaHgZxoMZARkQ,33664
 optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py,sha256=Gzt2wg4dgFg0TV3Bu0cs8Xru3wVrxWUxxgciwZ-QKLE,44755
 optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py,sha256=RfwxNX_zQWFtvvFQJ5bt3qtHbdYdQV_3XLHm9WYCKOs,46084
-optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py,sha256=YFqA76_XiMNxPwqotbHug2kd7jCbOXOu5NlxG2hbaVs,808
-optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py,sha256=9szfe1NvOr1mgDnSPZvBGq1b65RElUrqLVhuErY3Dmw,2962
-optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py,sha256=WxBbHAZSAKDSWhFerPvUlIhhWEsejW4NmhwmWX-_b54,856
+optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py,sha256=I4YQq2HfA3xONbWsdJ870IEJPyLWeCDDG-UCJsu9YO8,1035
+optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py,sha256=aNFGOjth8tDvPrjYLbRWrkHr6p-8AFgcQx1Qay1fw70,904
+optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py,sha256=unqFDviA7dnx0yuo8L8tXVj2mjFYCPm7C9dcpdWBICc,6882
+optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py,sha256=fEs-WgJqWs5zvuCkKb7MuZokH9Mi6q-0DOEKxzfWxzo,932
+optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py,sha256=Ad2ZYCXaMiYpB0mz-8X1CGhILxrVbt7rRIXt6IPwYBM,932
 optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py,sha256=Mf7tzrXetwCgt7LuXfkX-CX1hltLgNZdwF9bHxAbDJM,874
 optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py,sha256=gz6CbP4T6w8XH3PIGRIJXTmKFsChJIkwcAEAsiR5Ydg,830
 optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py,sha256=DgRLzO9HxtgE1jICmHoHaqeVXM4Ih-5uo2JqNMAPMcc,876
@@ -60,7 +62,7 @@ optimum/rbln/transformers/models/clip/__init__.py,sha256=H9vuBwrmFO0-CqZhXUrKF-u
 optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=NiSm7bHs4SReHDUr53BBWSX0Y8bkKOeUSpsBDrp8YDw,6628
 optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=pDogsdpJKKB5rqnVFrRjwfhUvOSV-jZ3oARMsqSvOOQ,665
 optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=7OIKteJLKNxOLOg0w3lLOM7TxZovQn4jkglI9wRkrtQ,40609
-optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=W9HnxJoTz78Wc4X5Q3sMSHhMTSa7-9uQCFlnqNVozvA,38932
+optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=uGdPGcFrWm2gAwFLjfBiALwFsl49VGCReVi4NUfOPxM,38898
 optimum/rbln/transformers/models/dpt/__init__.py,sha256=gP1tkR3XMNlHq1GT87ugIVvb2o_1eAUg1JaniXjy1Lw,651
 optimum/rbln/transformers/models/dpt/modeling_dpt.py,sha256=ZsS2SOiqcA4azULB-WFEMQZbgIoOyVUKqVKqrw_tWzA,3430
 optimum/rbln/transformers/models/exaone/__init__.py,sha256=zYH_5tVa8-juEdsOIky7I33WSC3Zuhoq1upI0OHYeVw,859
@@ -85,7 +87,7 @@ optimum/rbln/transformers/models/mistral/mistral_architecture.py,sha256=_aU8TE_t
 optimum/rbln/transformers/models/mistral/modeling_mistral.py,sha256=7nrddoBIHf8S12LZWBUpotnvG3gND11vMQda9yYXJ-s,1560
 optimum/rbln/transformers/models/phi/__init__.py,sha256=mZLt1M7BbYEvSon5UlkniMUPa15SfjZFdw0kMSAF3VA,644
 optimum/rbln/transformers/models/phi/modeling_phi.py,sha256=j-6Pqd5rR2JE8I1pnKFlCi4nW5Dv3wZjoPWxohissoo,1516
-optimum/rbln/transformers/models/phi/phi_architecture.py,sha256=rBQjr6MOYBo1i5yLekMSR81TzYlHrHAA30kyKDdR7ww,4132
+optimum/rbln/transformers/models/phi/phi_architecture.py,sha256=TueyqmjPXWmOPOxBm4dIFyd0X3iV1jgw0U6c26iCAPk,4090
 optimum/rbln/transformers/models/qwen2/__init__.py,sha256=RAMWc21W_2I6DH9xBjeNxPECmAcTrbKhSIefq3Lass0,648
 optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=9-aFDvjMzPNUyGOz0qo33RE18bUFGYZ3Wt_68zb5uJY,1530
 optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
@@ -114,7 +116,7 @@ optimum/rbln/utils/model_utils.py,sha256=DfD_Z2qvZHqcddXqnzTM1AN8khanj3-DXK2lJvV
 optimum/rbln/utils/runtime_utils.py,sha256=5-DYniyP59nx-mrrbi7AqA77L85b4Cm5oLpaxidSyss,3699
 optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
 optimum/rbln/utils/submodule.py,sha256=oZoGrItB8WqY4i-K9WJPlLlcLohc1YGB9OHB8_XZw3A,4071
-optimum_rbln-0.7.3a4.dist-info/METADATA,sha256=8VNTOVgsgFtcFUuZ9VEeRQfC2LEB60OFmW92hlJo8V8,5300
-optimum_rbln-0.7.3a4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-optimum_rbln-0.7.3a4.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-optimum_rbln-0.7.3a4.dist-info/RECORD,,
+optimum_rbln-0.7.3a6.dist-info/METADATA,sha256=TGw8TCIfBQ9RWlzxf5JI16Zoy-xoEodnBO8m6SKXBsk,5300
+optimum_rbln-0.7.3a6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+optimum_rbln-0.7.3a6.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+optimum_rbln-0.7.3a6.dist-info/RECORD,,

{optimum_rbln-0.7.3a4.dist-info → optimum_rbln-0.7.3a6.dist-info}/WHEEL RENAMED Viewed

File without changes

{optimum_rbln-0.7.3a4.dist-info → optimum_rbln-0.7.3a6.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

optimum-rbln 0.7.3a4__py3-none-any.whl → 0.7.3a6__py3-none-any.whl

optimum-rbln 0.7.3a4py3-none-any.whl → 0.7.3a6py3-none-any.whl