PyPI - optimum-rbln - Versions diffs - 0.8.3a0__py3-none-any.whl → 0.8.3a2__py3-none-any.whl - Mend

optimum-rbln 0.8.3a0py3-none-any.whl → 0.8.3a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of optimum-rbln might be problematic. Click here for more details.

Files changed (11) hide show

optimum/rbln/__init__.py CHANGED Viewed

@@ -169,6 +169,9 @@ _import_structure = {
         "RBLNAutoencoderKLConfig",
         "RBLNAutoencoderKLCosmos",
         "RBLNAutoencoderKLCosmosConfig",
+        "RBLNAutoPipelineForImage2Image",
+        "RBLNAutoPipelineForInpainting",
+        "RBLNAutoPipelineForText2Image",
         "RBLNControlNetModel",
         "RBLNControlNetModelConfig",
         "RBLNCosmosTextToWorldPipeline",
@@ -238,6 +241,9 @@ if TYPE_CHECKING:
         RBLNAutoencoderKLConfig,
         RBLNAutoencoderKLCosmos,
         RBLNAutoencoderKLCosmosConfig,
+        RBLNAutoPipelineForImage2Image,
+        RBLNAutoPipelineForInpainting,
+        RBLNAutoPipelineForText2Image,
         RBLNControlNetModel,
         RBLNControlNetModelConfig,
         RBLNCosmosSafetyChecker,

optimum/rbln/__version__.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.8.3a0'
-__version_tuple__ = version_tuple = (0, 8, 3, 'a0')
+__version__ = version = '0.8.3a2'
+__version_tuple__ = version_tuple = (0, 8, 3, 'a2')
 __commit_id__ = commit_id = None

optimum/rbln/diffusers/__init__.py CHANGED Viewed

@@ -59,6 +59,9 @@ _import_structure = {
         "RBLNVQModelConfig",
     ],
     "pipelines": [
+        "RBLNAutoPipelineForImage2Image",
+        "RBLNAutoPipelineForInpainting",
+        "RBLNAutoPipelineForText2Image",
         "RBLNCosmosTextToWorldPipeline",
         "RBLNCosmosVideoToWorldPipeline",
         "RBLNCosmosSafetyChecker",
@@ -144,6 +147,9 @@ if TYPE_CHECKING:
         RBLNVQModel,
     )
     from .pipelines import (
+        RBLNAutoPipelineForImage2Image,
+        RBLNAutoPipelineForInpainting,
+        RBLNAutoPipelineForText2Image,
         RBLNCosmosSafetyChecker,
         RBLNCosmosTextToWorldPipeline,
         RBLNCosmosVideoToWorldPipeline,

optimum/rbln/diffusers/pipelines/__init__.py CHANGED Viewed

@@ -18,6 +18,11 @@ from transformers.utils import _LazyModule
 _import_structure = {
+    "auto_pipeline": [
+        "RBLNAutoPipelineForImage2Image",
+        "RBLNAutoPipelineForInpainting",
+        "RBLNAutoPipelineForText2Image",
+    ],
     "controlnet": [
         "RBLNMultiControlNetModel",
         "RBLNStableDiffusionControlNetImg2ImgPipeline",
@@ -56,6 +61,11 @@ _import_structure = {
     ],
 }
 if TYPE_CHECKING:
+    from .auto_pipeline import (
+        RBLNAutoPipelineForImage2Image,
+        RBLNAutoPipelineForInpainting,
+        RBLNAutoPipelineForText2Image,
+    )
     from .controlnet import (
         RBLNMultiControlNetModel,
         RBLNStableDiffusionControlNetImg2ImgPipeline,

optimum/rbln/diffusers/pipelines/auto_pipeline.py ADDED Viewed

@@ -0,0 +1,237 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import importlib
+from typing import Type
+from diffusers.models.controlnets import ControlNetUnionModel
+from diffusers.pipelines.auto_pipeline import (
+    AUTO_IMAGE2IMAGE_PIPELINES_MAPPING,
+    AUTO_INPAINT_PIPELINES_MAPPING,
+    AUTO_TEXT2IMAGE_PIPELINES_MAPPING,
+    AutoPipelineForImage2Image,
+    AutoPipelineForInpainting,
+    AutoPipelineForText2Image,
+    _get_task_class,
+)
+from huggingface_hub.utils import validate_hf_hub_args
+from optimum.rbln.modeling_base import RBLNBaseModel
+from optimum.rbln.utils.model_utils import (
+    MODEL_MAPPING,
+    convert_hf_to_rbln_model_name,
+    convert_rbln_to_hf_model_name,
+    get_rbln_model_cls,
+)
+class RBLNAutoPipelineBase:
+    _model_mapping = None
+    _model_mapping_names = None
+    @classmethod
+    def get_rbln_cls(cls, pretrained_model_name_or_path, export=True, **kwargs):
+        if export:
+            hf_model_class = cls.infer_hf_model_class(pretrained_model_name_or_path, **kwargs)
+            rbln_class_name = convert_hf_to_rbln_model_name(hf_model_class.__name__)
+        else:
+            rbln_class_name = cls.get_rbln_model_cls_name(pretrained_model_name_or_path, **kwargs)
+            if convert_rbln_to_hf_model_name(rbln_class_name) not in cls._model_mapping_names:
+                raise ValueError(
+                    f"The architecture '{rbln_class_name}' is not supported by the `{cls.__name__}.from_pretrained()` method. "
+                    "Please use the `from_pretrained()` method of the appropriate class to load this model, "
+                    f"or directly use '{rbln_class_name}.from_pretrained()`."
+                )
+        try:
+            rbln_cls = get_rbln_model_cls(rbln_class_name)
+        except AttributeError as e:
+            raise AttributeError(
+                f"Class '{rbln_class_name}' not found in 'optimum.rbln' module for model ID '{pretrained_model_name_or_path}'. "
+                "Ensure that the class name is correctly mapped and available in the 'optimum.rbln' module."
+            ) from e
+        return rbln_cls
+    @classmethod
+    def get_rbln_model_cls_name(cls, pretrained_model_name_or_path, **kwargs):
+        """
+        Retrieve the path to the compiled model directory for a given RBLN model.
+        Args:
+            pretrained_model_name_or_path (str): Identifier of the model.
+        Returns:
+            str: Path to the compiled model directory.
+        """
+        model_index_config = cls.load_config(pretrained_model_name_or_path)
+        if "_class_name" not in model_index_config:
+            raise ValueError(
+                "The `_class_name` field is missing from model_index_config. This is unexpected and should be reported as an issue. "
+                "Please use the `from_pretrained()` method of the appropriate class to load this model."
+            )
+        return model_index_config["_class_name"]
+    @classmethod
+    def infer_hf_model_class(
+        cls,
+        pretrained_model_or_path,
+        cache_dir=None,
+        force_download=False,
+        proxies=None,
+        token=None,
+        local_files_only=False,
+        revision=None,
+        **kwargs,
+    ):
+        config = cls.load_config(
+            pretrained_model_or_path,
+            cache_dir=cache_dir,
+            force_download=force_download,
+            proxies=proxies,
+            token=token,
+            local_files_only=local_files_only,
+            revision=revision,
+        )
+        pipeline_key_name = cls.get_pipeline_key_name(config, **kwargs)
+        pipeline_cls = _get_task_class(cls._model_mapping, pipeline_key_name)
+        return pipeline_cls
+    @classmethod
+    def get_pipeline_key_name(cls, config, **kwargs):
+        orig_class_name = config["_class_name"]
+        if "ControlPipeline" in orig_class_name:
+            to_replace = "ControlPipeline"
+        else:
+            to_replace = "Pipeline"
+        if "controlnet" in kwargs:
+            if isinstance(kwargs["controlnet"], ControlNetUnionModel):
+                orig_class_name = config["_class_name"].replace(to_replace, "ControlNetUnionPipeline")
+            else:
+                orig_class_name = config["_class_name"].replace(to_replace, "ControlNetPipeline")
+        if "enable_pag" in kwargs:
+            enable_pag = kwargs.pop("enable_pag")
+            if enable_pag:
+                orig_class_name = orig_class_name.replace(to_replace, "PAGPipeline")
+        return orig_class_name
+    @classmethod
+    @validate_hf_hub_args
+    def from_pretrained(cls, model_id, **kwargs):
+        rbln_cls = cls.get_rbln_cls(model_id, **kwargs)
+        return rbln_cls.from_pretrained(model_id, **kwargs)
+    @classmethod
+    def from_model(cls, model, **kwargs):
+        rbln_cls = get_rbln_model_cls(f"RBLN{model.__class__.__name__}")
+        return rbln_cls.from_model(model, **kwargs)
+    @staticmethod
+    def register(rbln_cls: Type[RBLNBaseModel], exist_ok=False):
+        """
+        Register a new RBLN model class.
+        Args:
+            rbln_cls (Type[RBLNBaseModel]): The RBLN model class to register.
+            exist_ok (bool): Whether to allow registering an already registered model.
+        """
+        if not issubclass(rbln_cls, RBLNBaseModel):
+            raise ValueError("`rbln_cls` must be a subclass of RBLNBaseModel.")
+        native_cls = getattr(importlib.import_module("optimum.rbln"), rbln_cls.__name__, None)
+        if rbln_cls.__name__ in MODEL_MAPPING or native_cls is not None:
+            if not exist_ok:
+                raise ValueError(f"Model for {rbln_cls.__name__} already registered.")
+        MODEL_MAPPING[rbln_cls.__name__] = rbln_cls
+class RBLNAutoPipelineForText2Image(RBLNAutoPipelineBase, AutoPipelineForText2Image):
+    _model_mapping = AUTO_TEXT2IMAGE_PIPELINES_MAPPING
+    _model_mapping_names = {x[0]: x[1].__name__ for x in AUTO_TEXT2IMAGE_PIPELINES_MAPPING.items()}
+class RBLNAutoPipelineForImage2Image(RBLNAutoPipelineBase, AutoPipelineForImage2Image):
+    _model_mapping = AUTO_IMAGE2IMAGE_PIPELINES_MAPPING
+    _model_mapping_names = {x[0]: x[1].__name__ for x in AUTO_IMAGE2IMAGE_PIPELINES_MAPPING.items()}
+    @classmethod
+    def get_pipeline_key_name(cls, config, **kwargs):
+        orig_class_name = config["_class_name"]
+        # the `orig_class_name` can be:
+        # `- *Pipeline` (for regular text-to-image checkpoint)
+        #  - `*ControlPipeline` (for Flux tools specific checkpoint)
+        # `- *Img2ImgPipeline` (for refiner checkpoint)
+        if "Img2Img" in orig_class_name:
+            to_replace = "Img2ImgPipeline"
+        elif "ControlPipeline" in orig_class_name:
+            to_replace = "ControlPipeline"
+        else:
+            to_replace = "Pipeline"
+        if "controlnet" in kwargs:
+            if isinstance(kwargs["controlnet"], ControlNetUnionModel):
+                orig_class_name = orig_class_name.replace(to_replace, "ControlNetUnion" + to_replace)
+            else:
+                orig_class_name = orig_class_name.replace(to_replace, "ControlNet" + to_replace)
+        if "enable_pag" in kwargs:
+            enable_pag = kwargs.pop("enable_pag")
+            if enable_pag:
+                orig_class_name = orig_class_name.replace(to_replace, "PAG" + to_replace)
+        if to_replace == "ControlPipeline":
+            orig_class_name = orig_class_name.replace(to_replace, "ControlImg2ImgPipeline")
+        return orig_class_name
+class RBLNAutoPipelineForInpainting(RBLNAutoPipelineBase, AutoPipelineForInpainting):
+    _model_mapping = AUTO_INPAINT_PIPELINES_MAPPING
+    _model_mapping_names = {x[0]: x[1].__name__ for x in AUTO_INPAINT_PIPELINES_MAPPING.items()}
+    @classmethod
+    def get_pipeline_key_name(cls, config, **kwargs):
+        orig_class_name = config["_class_name"]
+        # The `orig_class_name`` can be:
+        # `- *InpaintPipeline` (for inpaint-specific checkpoint)
+        #  - `*ControlPipeline` (for Flux tools specific checkpoint)
+        #  - or *Pipeline (for regular text-to-image checkpoint)
+        if "Inpaint" in orig_class_name:
+            to_replace = "InpaintPipeline"
+        elif "ControlPipeline" in orig_class_name:
+            to_replace = "ControlPipeline"
+        else:
+            to_replace = "Pipeline"
+        if "controlnet" in kwargs:
+            if isinstance(kwargs["controlnet"], ControlNetUnionModel):
+                orig_class_name = orig_class_name.replace(to_replace, "ControlNetUnion" + to_replace)
+            else:
+                orig_class_name = orig_class_name.replace(to_replace, "ControlNet" + to_replace)
+        if "enable_pag" in kwargs:
+            enable_pag = kwargs.pop("enable_pag")
+            if enable_pag:
+                orig_class_name = orig_class_name.replace(to_replace, "PAG" + to_replace)
+        if to_replace == "ControlPipeline":
+            orig_class_name = orig_class_name.replace(to_replace, "ControlInpaintPipeline")
+        return orig_class_name

optimum/rbln/transformers/models/t5/t5_architecture.py CHANGED Viewed

@@ -126,7 +126,14 @@ class T5Decoder(Seq2SeqDecoder):
         b_size = attention_mask.shape[0]
         batch_decoder_position_bias = []
         for i in range(b_size):
-            batch_position_bias = self._dec_position_bias[:, :, cache_position[i][0]].unsqueeze(2)
+            if torch.compiler.is_exporting():
+                cache_pos = cache_position[i][0].item()
+                torch._check_is_size(cache_pos)
+                torch._check(cache_pos >= 0)
+                torch._check(cache_pos < self._dec_position_bias.shape[2])
+            else:
+                cache_pos = cache_position[i][0]
+            batch_position_bias = torch.select(self._dec_position_bias, dim=2, index=cache_pos).unsqueeze(2)
             batch_decoder_position_bias.append(batch_position_bias)
         position_bias = torch.cat(batch_decoder_position_bias, dim=0)

optimum/rbln/transformers/utils/rbln_quantization.py CHANGED Viewed

@@ -13,9 +13,8 @@
 # limitations under the License.
 import glob
-import json
 import os
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 import torch
 from huggingface_hub import hf_hub_download, list_repo_files
@@ -30,10 +29,31 @@ from ...utils.logging import get_logger
 logger = get_logger()
+# Constants
+QUANTIZED_WEIGHTS = {
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "gate_proj",
+    "up_proj",
+    "down_proj",
+}
+# Common alias sets seen in community checkpoints
+VARIANT_ALIASES: Dict[str, List[str]] = {
+    "weight_scale": ["weight_scale", "scales", "w_scale", "scale"],
+    "input_scale": ["input_scale", "act_scale", "activation_scale", "a_scale"],
+    "kv_scale": ["kv_scale", "kv_scales"],
+    "k_scale": ["k_scale", "k_scales"],
+    "v_scale": ["v_scale", "v_scales"],
+}
 class RBLNQuantizationConfig(RBLNSerializableConfigProtocol):
     SUPPORTED_FORMATS = ["rbln"]
-    SUPPORTED_WEIGHTS = ["int4", "fp8", "fp16"]
-    SUPPORTED_ACTIVATIONS = ["fp8", "fp16"]
+    SUPPORTED_WEIGHTS = ["int4", "int8", "fp8", "fp16"]
+    SUPPORTED_ACTIVATIONS = ["int8", "fp8", "fp16"]
     SUPPORTED_KVCACHES = ["fp8", "fp16"]
     RBLN_QUANT_BITS_ENV = "RBLN_QUANT_BITS"
@@ -64,7 +84,6 @@ class RBLNQuantizationConfig(RBLNSerializableConfigProtocol):
         self.weights = weights or "fp16"
         self.activations = activations or "fp16"
         self.kv_caches = kv_caches or "fp16"
         self._validate()
     def _validate(self):
@@ -105,7 +124,7 @@ class QuantizedLayerFactory:
         self.quantization_config = quantization_config
     def create_linear(self, layer: Linear) -> Linear:
-        if self.quantization_config.weights == "int4":
+        if self.quantization_config.weights in ["int4", "int8"]:
             return self.create_qlinear(layer)
         elif self.quantization_config.weights == "fp8":
             return self.create_fp8linear(layer)
@@ -119,18 +138,6 @@ class QuantizedLayerFactory:
         return create_fp8linear(layer, self.quantization_config)
-# Constants
-QUANTIZED_WEIGHTS = {
-    "q_proj",
-    "k_proj",
-    "v_proj",
-    "o_proj",
-    "gate_proj",
-    "up_proj",
-    "down_proj",
-}
 def prepare_model_for_quantization(
     model: torch.nn.Module,
     model_id: str,
@@ -146,8 +153,8 @@ def prepare_model_for_quantization(
     Prepare the model for quantization by updating specified linear layers to quantized (qlinear) layers.
     """
-    # 1. Load weight files and safetensors.index.json
-    safetensor_files, index_data = load_weight_files_and_index(
+    # 1. Load weight files
+    safetensor_files = load_weight_files(
         model_id,
         use_auth_token=use_auth_token,
         revision=revision,
@@ -156,43 +163,34 @@ def prepare_model_for_quantization(
         local_files_only=local_files_only,
     )
-    # 2. Determine format from safetensors.index.json
-    determined_format = determine_format_from_index(index_data)
-    # 3. Update linear layers based on the determined format
+    # 2. Update linear layers based on the quantization config
     update_layers_to_quantize(model, rbln_quantization)
-    # 4. Load weights into model parameters
+    # 3. Load weights into model parameters
     load_weights_from_files(
         model,
         safetensor_files,
         n_layer,
         rbln_quantization=rbln_quantization,
-        determined_format=determined_format,
     )
     return model
-def load_weight_files_and_index(
+def load_weight_files(
     model_id: str,
     use_auth_token: Optional[Union[bool, str]] = None,
     revision: Optional[str] = None,
     cache_dir: Optional[str] = None,
     force_download: bool = False,
     local_files_only: bool = False,
-) -> tuple[list[str], Optional[Dict]]:
+) -> list[str]:
     """
-    Load safetensor file data directly into the model, filtering by layer if n_layer is provided.
+    Discover and download safetensors files for the given model id.
     """
-    index_data = None
     if os.path.isdir(model_id):
         safetensor_files = glob.glob(f"{model_id}/*.safetensors")
-        index_path = os.path.join(model_id, "model.safetensors.index.json")
-        if os.path.exists(index_path):
-            with open(index_path, "r") as f:
-                index_data = json.load(f)
     else:
         try:
             # List all files in the repository
@@ -213,20 +211,6 @@ def load_weight_files_and_index(
                         local_files_only=local_files_only,
                     )
                     safetensor_files.append(downloaded_file)
-                elif file == "model.safetensors.index.json":
-                    # Download the index file
-                    index_file = hf_hub_download(
-                        repo_id=model_id,
-                        filename=file,
-                        revision=revision,
-                        token=use_auth_token,
-                        cache_dir=cache_dir,
-                        force_download=force_download,
-                        local_files_only=local_files_only,
-                    )
-                    with open(index_file, "r") as f:
-                        index_data = json.load(f)
         except Exception as e:
             logger.error(f"Failed to download safetensors files from Hugging Face Hub: {e}")
             raise e
@@ -234,32 +218,7 @@ def load_weight_files_and_index(
     if not safetensor_files:
         raise FileNotFoundError(f"No safetensors files found for model_id: {model_id}")
-    return safetensor_files, index_data
-def determine_format_from_index(index_data: Optional[Dict]) -> str:
-    """
-    Determine the quantization format from safetensors.index.json data.
-    Args:
-        index_data: The loaded safetensors.index.json content
-    Returns:
-        str: The determined format string
-    """
-    if index_data is None:
-        raise ValueError("safetensors.index.json not found")
-    if "weight_map" not in index_data:
-        raise ValueError("weight_map not found in safetensors.index.json")
-    if any("self_attn.k_proj.k_scale" in key for key in index_data["weight_map"]):
-        return "tensorrt"
-    elif any("self_attn.kv_scale" in key for key in index_data["weight_map"]):
-        return "quark"
-    elif any("weight_scale" in key or "input_scale" in key for key in index_data["weight_map"]):
-        return "default"
-    else:
-        raise ValueError("Unknown quantization format of the index data of weight map.")
+    return safetensor_files
 def update_layers_to_quantize(
@@ -283,12 +242,139 @@ def update_layers_to_quantize(
         logger.debug(f"Updated the following linear layers to quantized layers:\n {{{', '.join(processed_layers)}}}")
+def _last_segment(key: str) -> str:
+    parts = key.split(".")
+    return parts[-1]
+def _replace_last_with(key: str, new_tail: str) -> str:
+    parts = key.split(".")
+    return ".".join(parts[:-1] + new_tail.split("."))
+def _matches_any_alias(key: str, kind: str) -> bool:
+    tail = _last_segment(key)
+    return tail in VARIANT_ALIASES.get(kind, [])
+def _reduce_to_scalar(t: torch.Tensor) -> torch.Tensor:
+    if t.ndim == 0:
+        return t
+    return t.reshape(-1).amax()
+def _coerce_per_out_channel_scale(scale: torch.Tensor, out_features: int) -> torch.Tensor:
+    s = scale
+    if s.ndim == 0:
+        # scalar -> expand to [out_features, 1]
+        return s.reshape(1, 1).expand(out_features, 1).contiguous()
+    if s.ndim == 1:
+        if s.numel() == 1:
+            return s.reshape(1, 1).expand(out_features, 1).contiguous()
+        if s.numel() == out_features:
+            return s.reshape(out_features, 1).contiguous()
+        # fallback: reduce to scalar then expand
+        v = _reduce_to_scalar(s)
+        return v.reshape(1, 1).expand(out_features, 1).contiguous()
+    if s.ndim == 2:
+        if s.shape == (out_features, 1):
+            return s.contiguous()
+        if s.shape == (1, out_features):
+            return s.transpose(0, 1).contiguous()
+        # fallback: reduce to [out_features] on non-out dims if possible
+        if s.shape[0] == out_features:
+            v = s
+            while v.ndim > 2:
+                v = v.amax(dim=-1)
+            if v.shape[-1] != 1:
+                v = v.amax(dim=-1, keepdim=True)
+            return v.contiguous()
+        # otherwise reduce to scalar then expand
+        v = _reduce_to_scalar(s)
+        return v.reshape(1, 1).expand(out_features, 1).contiguous()
+    # high-rank: reduce to scalar then expand
+    v = _reduce_to_scalar(s)
+    return v.reshape(1, 1).expand(out_features, 1).contiguous()
+def _kv_split_items(base_key: str, tensor: torch.Tensor) -> List[Tuple[str, torch.Tensor]]:
+    # base_key is the original key whose last token was 'kv_scale'
+    # We produce keys with 'k_proj.k_scale' and 'v_proj.v_scale'
+    if tensor.ndim == 1 and tensor.numel() >= 2:
+        tk, tv = tensor[0], tensor[1]
+    elif tensor.ndim == 2 and tensor.shape[0] >= 2 and tensor.shape[1] == 1:
+        tk, tv = tensor[0, 0], tensor[1, 0]
+    else:
+        tk = tv = tensor
+    k_key = _replace_last_with(base_key, "k_proj.k_scale")
+    v_key = _replace_last_with(base_key, "v_proj.v_scale")
+    return [(k_key, tk), (v_key, tv)]
+def canonicalize_checkpoint_items(
+    model: torch.nn.Module,
+    items: Iterable[Tuple[str, torch.Tensor]],
+    rbln_quantization: Optional[RBLNQuantizationConfig],
+) -> List[Tuple[str, torch.Tensor]]:
+    params = dict(model.named_parameters(recurse=True))
+    results: List[Tuple[str, torch.Tensor]] = []
+    for key, value in items:
+        t = value
+        # Normalize weight scale variants
+        if _matches_any_alias(key, "weight_scale"):
+            # rename last token to the canonical weight scale key
+            target_key = _replace_last_with(key, "weight_scale")
+            # Determine associated weight param to infer shape
+            weight_key = _replace_last_with(target_key, "weight")
+            out_features = None
+            if weight_key in params:
+                wshape = params[weight_key].shape
+                if len(wshape) == 2:
+                    out_features = int(wshape[0])
+            if rbln_quantization.weights in ["int4", "int8"] and out_features is not None:
+                t = _coerce_per_out_channel_scale(t.to(torch.float32), out_features)
+            elif rbln_quantization.weights == "fp8":
+                # Use a conservative scalar scale to ensure broadcastability
+                t = _reduce_to_scalar(t.to(torch.float32))
+            else:
+                t = t.to(torch.float32)
+            results.append((target_key, t))
+            continue
+        # Normalize input/activation scale variants
+        if _matches_any_alias(key, "input_scale"):
+            target_key = _replace_last_with(key, "input_scale")
+            t = _reduce_to_scalar(t.to(torch.float32))
+            results.append((target_key, t))
+            continue
+        # KV scale handling
+        if _matches_any_alias(key, "kv_scale"):
+            # For quark-like formats, expand to k/v
+            kv_items = _kv_split_items(key, t.to(torch.float32))
+            for k2, v2 in kv_items:
+                results.append((k2, v2))
+            continue
+        if _matches_any_alias(key, "k_scale") or _matches_any_alias(key, "v_scale"):
+            results.append((key, t.to(torch.float32)))
+            continue
+        # Default: passthrough
+        results.append((key, t))
+    return results
 def load_weights_from_files(
     model: torch.nn.Module,
     safetensor_files: list[str],
     n_layer: Optional[int] = None,
     rbln_quantization: Optional[RBLNQuantizationConfig] = None,
-    determined_format: Optional[str] = None,
 ):
     """
     Load safetensor file data directly into the model from provided safetensor files,
@@ -308,33 +394,43 @@ def load_weights_from_files(
     for safetensor_file in safetensor_files:
         file_data = load_file(safetensor_file)
-        for key, value in file_data.items():
-            loaded_input_scale = loaded_input_scale or "input_scale" in key
-            loaded_weight_scale = loaded_weight_scale or "weight_scale" in key
-            loaded_kv_scale = loaded_kv_scale or any(scale in key for scale in ["kv_scale", "k_scale", "v_scale"])
+        # Normalize all (key, tensor) pairs to the internal schema
+        normalized_items = canonicalize_checkpoint_items(
+            model=model,
+            items=file_data.items(),
+            rbln_quantization=rbln_quantization,
+        )
+        for key, value in normalized_items:
+            # Track which types of scales were observed (post-normalization)
+            if key.endswith("input_scale"):
+                loaded_input_scale = True
+            if key.endswith("weight_scale"):
+                loaded_weight_scale = True
+            if key.endswith("k_scale") or key.endswith("v_scale"):
+                loaded_kv_scale = True
+            # Filter by layer index if requested
             if target_layers is not None:
                 parts = key.split(".")
                 if len(parts) > 2 and parts[2].isdigit() and (int(parts[2]) not in target_layers):
                     continue
+            # Copy into parameters or buffers
             if key in model_params:
+                # Ensure dtype compatibility
+                if model_params[key].dtype != value.dtype:
+                    value = value.to(model_params[key].dtype)
                 model_params[key].data.copy_(value)
             elif key in model_buffers:
+                if model_buffers[key].dtype != value.dtype:
+                    value = value.to(model_buffers[key].dtype)
                 model_buffers[key].data.copy_(value)
-            elif "kv_scale" in key and determined_format == "quark":
-                if rbln_quantization.kv_caches == "fp8":
-                    model_params[key.replace("kv_scale", "k_proj.k_scale")].data.copy_(value)
-                    model_params[key.replace("kv_scale", "v_proj.v_scale")].data.copy_(value)
-                else:
-                    unloaded_keys.append(key)
             else:
                 unloaded_keys.append(key)
     if len(unloaded_keys) > 0:
         logger.warning(f"There are unexpected parameters/buffers on the checkpoint: {unloaded_keys}")
     if not loaded_input_scale and rbln_quantization.activations == "fp8":
         raise ValueError(
             "No input_scale found in the checkpoint. Did you use the correct quantization config? "
@@ -391,16 +487,17 @@ def create_qlinear(layer: Linear, rbln_quantization: RBLNQuantizationConfig) ->
     """
     def qlinear_forward(self, inputs: torch.Tensor) -> torch.Tensor:
-        if inputs.dtype != self.scales.dtype:
-            raise TypeError(f"Expected input dtype {self.scales.dtype}, but got {inputs.dtype}")
+        weight_scale = self.weight_scale
+        if inputs.dtype != weight_scale.dtype:
+            raise TypeError(f"Expected input dtype {weight_scale.dtype}, but got {inputs.dtype}")
         w_fp = self.weight.type(inputs.dtype)
-        w_fp *= self.scales.view(-1, 1)
+        w_fp *= weight_scale.view(-1, 1)
         return F.linear(inputs, w_fp, self.bias)
     # Convert weight to int8 and add scale parameter
     layer.weight = Parameter(layer.weight.to(torch.int8), requires_grad=False)
-    layer.scales = Parameter(torch.ones(layer.out_features, dtype=torch.float32), requires_grad=False)
+    layer.weight_scale = Parameter(torch.ones(layer.out_features, 1, dtype=torch.float32), requires_grad=False)
     layer.forward = lambda inputs: qlinear_forward(layer, inputs)
     return layer

{optimum_rbln-0.8.3a0.dist-info → optimum_rbln-0.8.3a2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: optimum-rbln
-Version: 0.8.3a0
+Version: 0.8.3a2
 Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
 Project-URL: Homepage, https://rebellions.ai
 Project-URL: Documentation, https://docs.rbln.ai

{optimum_rbln-0.8.3a0.dist-info → optimum_rbln-0.8.3a2.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-optimum/rbln/__init__.py,sha256=i3WWddZ0okF5dQN3B_2wfM7NTnQ37lwdD2udzjMRGH8,17140
-optimum/rbln/__version__.py,sha256=1jEsQwW1wBFWk7T3YA4ed9DazJ1mgUtA1pZZvNgBZpc,712
+optimum/rbln/__init__.py,sha256=YhaBhcyu6BgoJrprUogLGAmiBaHayvg6Tjm6PpfJETw,17382
+optimum/rbln/__version__.py,sha256=LoGi14U0L2os-fSHKgBIGeByegJLodfXKteGMBVsCEc,712
 optimum/rbln/configuration_utils.py,sha256=xneqnRWSUVROqpzbTrBACex42-L9zwo3eSjfHjFuhv4,33072
 optimum/rbln/modeling.py,sha256=0CMQnpVvW9evNrTFHM2XFbNpRY1HkbFzYJ5sRyYFq0o,14293
 optimum/rbln/modeling_base.py,sha256=gHfqIO6lKT8smkUthUuRHnbITpxHpnDeBPT8iTeasCk,24575
-optimum/rbln/diffusers/__init__.py,sha256=cvyJaFRU1sP1WeRjWrxMOm-5vr0c4X-TD8eqQ21XIgc,6990
+optimum/rbln/diffusers/__init__.py,sha256=1tgU_xWA42BmInqu9bBz_5R_E9TGhhK3mI06YlaiTLg,7232
 optimum/rbln/diffusers/modeling_diffusers.py,sha256=TAuMb7PSMjNwK7mh5ItE_CtAEgYeZKI27XkFFmxjHlQ,19902
 optimum/rbln/diffusers/configurations/__init__.py,sha256=vMRnPY4s-Uju43xP038D2EA18X_mhy2YfsZVpSU-VoA,1322
 optimum/rbln/diffusers/configurations/models/__init__.py,sha256=7q95gtgDzCeIBogGw8SLQoHT4Wch7vpLJVF2UQovuoo,567
@@ -35,7 +35,8 @@ optimum/rbln/diffusers/models/transformers/transformer_cosmos.py,sha256=UQ_R7RVJ
 optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=yF7sS0QvawowpV9hR5GeT8DaE8CCp3mj1njHHd9cKTc,6630
 optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
 optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=v3WS9EGKROE_QClXrxC7rmRko1BspAvAbeIfh83LK88,15832
-optimum/rbln/diffusers/pipelines/__init__.py,sha256=Ft1i48HP3wVi5t7PpIPNhL-bcxpLfwyZ5kuaTECAx1A,3392
+optimum/rbln/diffusers/pipelines/__init__.py,sha256=r8mu21102cKXdkG1II9tpfpUS6wuyren2oK9y_MptZY,3703
+optimum/rbln/diffusers/pipelines/auto_pipeline.py,sha256=oGZWXfj82w695D2NiYUitgoWiwP2Z4PlgA3q6hoOKww,9502
 optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
 optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=3S9dogIHW8Bqg5kIlCudhCQG-4g3FcdOPEWhBOf7CJA,4059
 optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py,sha256=G96bh4D9Cu-w4F9gZBQF6wNzhJQv9kvI34ZFsuEDjSw,35714
@@ -195,7 +196,7 @@ optimum/rbln/transformers/models/siglip/modeling_siglip.py,sha256=1TyRaxmhp6mg6U
 optimum/rbln/transformers/models/t5/__init__.py,sha256=R1Q8Z1vaIdx4rDjeCmm_ZMSgewWaqaI0l93AHwewtew,818
 optimum/rbln/transformers/models/t5/configuration_t5.py,sha256=nqDbibqykeeWn1TlKk6LmCn-DawTVudMMuBn2c2jds8,1362
 optimum/rbln/transformers/models/t5/modeling_t5.py,sha256=pdAWBLVknTzbma0Ij-VQ2Qve-frPjxL-AwMyU-zouPY,5123
-optimum/rbln/transformers/models/t5/t5_architecture.py,sha256=X_9X4QRhkiiMrwFHv3mzER3yGmF9oQ2U-HdH6jbwVmw,9824
+optimum/rbln/transformers/models/t5/t5_architecture.py,sha256=DlJNrGk35NTBhcp76PEhiyfs5yuUoDWKvMhfe4_puIE,10171
 optimum/rbln/transformers/models/time_series_transformer/__init__.py,sha256=xJaFWQawlwtv4H5tVFcY1pxLYzjHtMAlLq6nXysdkN8,1243
 optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py,sha256=MO-T4pcsea4EOmYeeg0tosUH6w76azqIPyV8Em8CMqw,1621
 optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py,sha256=8orxM-LbShCt2jC8Uyx43cSxWN1CGxamS58pKPjvzxs,17167
@@ -215,7 +216,7 @@ optimum/rbln/transformers/models/xlm_roberta/__init__.py,sha256=O3o2KzJ8Li3QhB7G
 optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py,sha256=wHRpGTXL9khYqSkKL1IgA7__6_lt9QpOz9tHumjK7fo,1260
 optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py,sha256=EZd3flRUEE38DYtdqEnG70LV7fHhkamRZV51xrVyjYI,1093
 optimum/rbln/transformers/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-optimum/rbln/transformers/utils/rbln_quantization.py,sha256=PQY46_Yq_ic6n8F_RsZSumdFNd_NGKHfVNHNxDbVia0,17578
+optimum/rbln/transformers/utils/rbln_quantization.py,sha256=ARngdvRmeVoOphUU3Md9kT6zS5HDrYdEFYljJwaAaio,21020
 optimum/rbln/utils/__init__.py,sha256=ieDBT2VFTt2E0M4v_POLBpuGW9LxSydpb_DuPd6PQqc,712
 optimum/rbln/utils/decorator_utils.py,sha256=xu-TrsNi33SRC2a7DBsyoo6-pEQxWKZPZSmM9QlDe2Y,3745
 optimum/rbln/utils/depreacate_utils.py,sha256=uKxl3ENUCNaZXPnaDQvNxrH8hUIWdBWfZH6BM7ZV__4,385
@@ -226,7 +227,7 @@ optimum/rbln/utils/model_utils.py,sha256=4k5879Kh75m3x_vS4-qOGfqsOiAvc2kdNFFfvsF
 optimum/rbln/utils/runtime_utils.py,sha256=R6uXDbeJP03-FWdd4vthNe2D4aCra5n12E3WB1ifiGM,7933
 optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
 optimum/rbln/utils/submodule.py,sha256=w5mgPgncI740gVKMu3S-69DGNdUSI0bTZxegQGcZ98Y,5011
-optimum_rbln-0.8.3a0.dist-info/METADATA,sha256=pv8AVPfkvMkms_pTvelG637GLOE0DdTIsCfJSLMSjfQ,5299
-optimum_rbln-0.8.3a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-optimum_rbln-0.8.3a0.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-optimum_rbln-0.8.3a0.dist-info/RECORD,,
+optimum_rbln-0.8.3a2.dist-info/METADATA,sha256=KAOx0J5beZebrxsAf9AsklRO43eTWaw222WX1iInnpk,5299
+optimum_rbln-0.8.3a2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+optimum_rbln-0.8.3a2.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+optimum_rbln-0.8.3a2.dist-info/RECORD,,

{optimum_rbln-0.8.3a0.dist-info → optimum_rbln-0.8.3a2.dist-info}/WHEEL RENAMED Viewed

File without changes

{optimum_rbln-0.8.3a0.dist-info → optimum_rbln-0.8.3a2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

optimum-rbln 0.8.3a0__py3-none-any.whl → 0.8.3a2__py3-none-any.whl

Potentially problematic release.

optimum-rbln 0.8.3a0py3-none-any.whl → 0.8.3a2py3-none-any.whl