optimum-rbln 0.7.4a9__py3-none-any.whl → 0.7.5a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. optimum/rbln/__init__.py +21 -0
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +11 -7
  4. optimum/rbln/diffusers/models/controlnet.py +1 -1
  5. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +1 -1
  6. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +1 -1
  7. optimum/rbln/modeling.py +7 -5
  8. optimum/rbln/ops/__init__.py +1 -0
  9. optimum/rbln/ops/attn.py +10 -0
  10. optimum/rbln/ops/flash_attn.py +8 -0
  11. optimum/rbln/ops/sliding_window_attn.py +111 -0
  12. optimum/rbln/transformers/__init__.py +22 -3
  13. optimum/rbln/transformers/models/__init__.py +23 -0
  14. optimum/rbln/transformers/models/blip_2/__init__.py +20 -0
  15. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +93 -0
  16. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +298 -0
  17. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +42 -6
  18. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +81 -77
  19. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +251 -135
  20. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +11 -7
  21. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +4 -4
  22. optimum/rbln/transformers/models/opt/__init__.py +16 -0
  23. optimum/rbln/transformers/models/opt/configuration_opt.py +19 -0
  24. optimum/rbln/transformers/models/opt/modeling_opt.py +78 -0
  25. optimum/rbln/transformers/models/opt/opt_architecture.py +74 -0
  26. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +16 -10
  27. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +35 -52
  28. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +2 -0
  29. optimum/rbln/transformers/models/siglip/__init__.py +20 -0
  30. optimum/rbln/transformers/models/siglip/configuration_siglip.py +66 -0
  31. optimum/rbln/transformers/models/siglip/modeling_siglip.py +146 -0
  32. optimum/rbln/transformers/models/whisper/whisper_architecture.py +1 -0
  33. optimum/rbln/transformers/utils/rbln_quantization.py +121 -72
  34. optimum/rbln/utils/import_utils.py +23 -6
  35. optimum/rbln/utils/submodule.py +13 -1
  36. {optimum_rbln-0.7.4a9.dist-info → optimum_rbln-0.7.5a1.dist-info}/METADATA +1 -1
  37. {optimum_rbln-0.7.4a9.dist-info → optimum_rbln-0.7.5a1.dist-info}/RECORD +39 -28
  38. {optimum_rbln-0.7.4a9.dist-info → optimum_rbln-0.7.5a1.dist-info}/WHEEL +0 -0
  39. {optimum_rbln-0.7.4a9.dist-info → optimum_rbln-0.7.5a1.dist-info}/licenses/LICENSE +0 -0
@@ -313,6 +313,7 @@ class WhisperSelfAttention(WhisperAttention):
313
313
  args["mask"] = attention_mask.unsqueeze(2)
314
314
  attn_output = torch.ops.rbln_custom_ops.paged_attn_decode(**args)
315
315
  else:
316
+ args["mask"] = None
316
317
  attn_output = torch.ops.rbln_custom_ops.paged_causal_attn_decode(**args)
317
318
 
318
319
  attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim)
@@ -12,94 +12,82 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import functools
16
15
  import glob
17
16
  import os
18
- from typing import Any, Callable, Dict, Optional
17
+ from typing import Any, Dict, Optional, Union
19
18
 
20
19
  import torch
21
20
  from safetensors.torch import load_file
22
21
  from torch.nn import Linear, Parameter
23
22
  from torch.nn import functional as F
24
23
 
24
+ from ...configuration_utils import RBLNSerializableConfigProtocol
25
25
  from ...utils.logging import get_logger
26
26
 
27
27
 
28
28
  logger = get_logger()
29
29
 
30
- SUPPORTED_QUANTIZATIONS: Dict[str, list[str]] = {
31
- "rbln": ["w4a16"],
32
- }
33
30
 
31
+ class RBLNQuantizationConfig(RBLNSerializableConfigProtocol):
32
+ SUPPORTED_FORMATS = ["rbln"]
33
+ SUPPORTED_WEIGHTS = ["int4", "fp16"]
34
+ SUPPORTED_ACTIVATIONS = ["fp16"]
34
35
 
35
- class QuantizationManager:
36
36
  # The RBLN_QUANT_BITS environment variable defines the precision of each layer during the graph compilation process.
37
37
  # It specifies the quantization bit depth. For instance, setting RBLN_QUANT_BITS=4 will apply 4-bit precision for quantization.
38
38
  RBLN_QUANT_BITS_ENV = "RBLN_QUANT_BITS"
39
39
 
40
- @staticmethod
41
- def _raise_invalid_config_error(
42
- key: str, value: str, valid_values: list[str], context: Optional[str] = None
43
- ) -> None:
44
- context_info = f" for {context}" if context else ""
45
- valid_values_str = ", ".join(valid_values)
46
- raise ValueError(f"Invalid {key}: {value}{context_info}. Supported values are: {valid_values_str}")
47
-
48
- @staticmethod
49
- def validate_quantization_config(quantize_config: Optional[dict]) -> Optional[dict]:
50
- if not quantize_config:
51
- return None
52
-
53
- q_format = quantize_config.get("format")
54
- q_precision = quantize_config.get("precision")
55
-
56
- if q_format not in SUPPORTED_QUANTIZATIONS:
57
- QuantizationManager._raise_invalid_config_error(
58
- "quantization format", q_format, list(SUPPORTED_QUANTIZATIONS.keys())
40
+ def __init__(
41
+ self,
42
+ format: Optional[str] = None,
43
+ precision: Optional[str] = None,
44
+ weights: Optional[str] = None,
45
+ activations: Optional[str] = None,
46
+ ):
47
+ self.format = format
48
+ if precision is not None:
49
+ logger.warning("The `precision` argument is deprecated. Use `weights` and `activations` instead.")
50
+ if any(precision_arg is not None for precision_arg in (weights, activations)):
51
+ raise ValueError("`precision` and `weights` or `activations` cannot be set at the same time.")
52
+
53
+ if precision == "w4a16":
54
+ weights = "int4"
55
+ activations = "fp16"
56
+ else:
57
+ raise ValueError(f"Invalid precision: {precision}")
58
+
59
+ self.weights = weights or "fp16"
60
+ self.activations = activations or "fp16"
61
+ self._validate()
62
+
63
+ def _validate(self):
64
+ if self.format not in self.SUPPORTED_FORMATS:
65
+ raise ValueError(f"Invalid format: {self.format}, supported formats are: {self.SUPPORTED_FORMATS}")
66
+ if self.weights not in self.SUPPORTED_WEIGHTS:
67
+ raise ValueError(f"Invalid weights: {self.weights}, supported weights are: {self.SUPPORTED_WEIGHTS}")
68
+ if self.activations not in self.SUPPORTED_ACTIVATIONS:
69
+ raise ValueError(
70
+ f"Invalid activations: {self.activations}, supported activations are: {self.SUPPORTED_ACTIVATIONS}"
59
71
  )
72
+ if self.weights == "fp16" and self.activations == "fp16":
73
+ raise ValueError("weights and activations cannot be both fp16. It is meaningless.")
60
74
 
61
- if q_precision not in SUPPORTED_QUANTIZATIONS[q_format]:
62
- QuantizationManager._raise_invalid_config_error(
63
- "precision", q_precision, SUPPORTED_QUANTIZATIONS[q_format], q_format
64
- )
75
+ def _prepare_for_serialization(self) -> Dict[str, Any]:
76
+ return {
77
+ "format": self.format,
78
+ "weights": self.weights,
79
+ "activations": self.activations,
80
+ }
65
81
 
66
- return quantize_config
67
-
68
- @classmethod
69
- def _set_env_var(cls, name: str, value: str) -> None:
70
- os.environ[name] = value
71
-
72
- @classmethod
73
- def _unset_env_var(cls, name: str) -> None:
74
- os.environ.pop(name, None)
75
-
76
- @classmethod
77
- def set_quantization_env(cls, quantize_config: Optional[dict]) -> Optional[str]:
78
- quantize_config = cls.validate_quantization_config(quantize_config)
79
- if quantize_config:
80
- q_precision: str = quantize_config["precision"]
81
- quant_bits = q_precision.split("w")[1].split("a")[0]
82
- cls._set_env_var(cls.RBLN_QUANT_BITS_ENV, quant_bits)
83
- return cls.RBLN_QUANT_BITS_ENV
84
- return None
85
-
86
- @classmethod
87
- def reset_quantization_env(cls, env_var_name: Optional[str]) -> None:
88
- if env_var_name:
89
- cls._unset_env_var(env_var_name)
90
-
91
- @classmethod
92
- def with_quantization_env(cls, func: Callable) -> Callable:
93
- @functools.wraps(func)
94
- def wrapper(*args, **kwargs):
95
- quantize_config = kwargs.get("quantize_config")
96
- quantize_env_var = cls.set_quantization_env(quantize_config)
97
- try:
98
- return func(*args, **kwargs)
99
- finally:
100
- cls.reset_quantization_env(quantize_env_var)
101
-
102
- return wrapper
82
+ def maybe_set_quantization_env(self):
83
+ quant_bits = None
84
+ if self.weights == "int4":
85
+ quant_bits = "4"
86
+ os.environ[self.RBLN_QUANT_BITS_ENV] = quant_bits
87
+
88
+ def maybe_reset_quantization_env(self):
89
+ if self.RBLN_QUANT_BITS_ENV in os.environ:
90
+ os.environ.pop(self.RBLN_QUANT_BITS_ENV)
103
91
 
104
92
 
105
93
  # Constants
@@ -114,12 +102,31 @@ QUANTIZED_WEIGHTS = {
114
102
  }
115
103
 
116
104
 
117
- def prepare_model_for_quantization(model: torch.nn.Module, model_id: str, n_layer: Optional[int] = None) -> None:
105
+ def prepare_model_for_quantization(
106
+ model: torch.nn.Module,
107
+ model_id: str,
108
+ n_layer: Optional[int] = None,
109
+ use_auth_token: Optional[Union[bool, str]] = None,
110
+ revision: Optional[str] = None,
111
+ cache_dir: Optional[str] = None,
112
+ force_download: bool = False,
113
+ local_files_only: bool = False,
114
+ ) -> torch.nn.Module:
118
115
  """
119
116
  Prepare the model for quantization by updating specified linear layers to quantized (qlinear) layers.
120
117
  """
121
118
  update_layers_to_quantize(model)
122
- load_weights(model, model_id, n_layer)
119
+ load_weights(
120
+ model,
121
+ model_id,
122
+ n_layer,
123
+ use_auth_token=use_auth_token,
124
+ revision=revision,
125
+ cache_dir=cache_dir,
126
+ force_download=force_download,
127
+ local_files_only=local_files_only,
128
+ )
129
+ return model
123
130
 
124
131
 
125
132
  def update_layers_to_quantize(module: torch.nn.Module) -> None:
@@ -140,18 +147,57 @@ def update_layers_to_quantize(module: torch.nn.Module) -> None:
140
147
  logger.debug(f"Updated the following linear layers to quantized layers:\n {{{', '.join(processed_layers)}}}")
141
148
 
142
149
 
143
- def load_weights(model, model_id, n_layer=None):
150
+ def load_weights(
151
+ model,
152
+ model_id,
153
+ n_layer=None,
154
+ use_auth_token=None,
155
+ revision=None,
156
+ cache_dir=None,
157
+ force_download=False,
158
+ local_files_only=False,
159
+ ):
144
160
  """
145
161
  Load safetensor file data directly into the model, filtering by layer if n_layer is provided.
146
162
  """
147
- logger.debug("Loading the quantized weights into the CPU.") # TODO(jongho): remove.
148
163
 
149
164
  model_params = dict(model.named_parameters(recurse=True))
150
165
  model_buffers = dict(model.named_buffers(recurse=True))
151
- safetensor_files = glob.glob(f"{model_id}/*.safetensors")
166
+
167
+ if os.path.isdir(model_id):
168
+ safetensor_files = glob.glob(f"{model_id}/*.safetensors")
169
+ else:
170
+ from huggingface_hub import hf_hub_download, list_repo_files
171
+
172
+ try:
173
+ # List all files in the repository
174
+ repo_files = list_repo_files(model_id, revision=revision, token=use_auth_token)
175
+ # Filter for safetensors files
176
+ safetensor_files = []
177
+
178
+ for file in repo_files:
179
+ if file.endswith(".safetensors"):
180
+ # Download the safetensors file
181
+ downloaded_file = hf_hub_download(
182
+ repo_id=model_id,
183
+ filename=file,
184
+ revision=revision,
185
+ token=use_auth_token,
186
+ cache_dir=cache_dir,
187
+ force_download=force_download,
188
+ local_files_only=local_files_only,
189
+ )
190
+ safetensor_files.append(downloaded_file)
191
+ except Exception as e:
192
+ logger.error(f"Failed to download safetensors files from Hugging Face Hub: {e}")
193
+ raise e
194
+
195
+ if not safetensor_files:
196
+ raise FileNotFoundError(f"No safetensors files found for model_id: {model_id}")
152
197
 
153
198
  target_layers = list(range(n_layer)) if n_layer is not None else None
154
199
 
200
+ unloaded_keys = []
155
201
  for safetensor_file in safetensor_files:
156
202
  file_data = load_file(safetensor_file)
157
203
  for key, value in file_data.items():
@@ -165,8 +211,11 @@ def load_weights(model, model_id, n_layer=None):
165
211
  model_params[key].data.copy_(value)
166
212
  elif key in model_buffers:
167
213
  model_buffers[key].data.copy_(value)
214
+ else:
215
+ unloaded_keys.append(key)
168
216
 
169
- logger.debug("Loaded the quantized weights into the CPU.")
217
+ if len(unloaded_keys) > 0:
218
+ logger.warning(f"There are unexpected parameters/buffers on the checkpoint: {unloaded_keys}")
170
219
 
171
220
 
172
221
  def is_target_for_qlinear_replacement(layer_name: str, layer: torch.nn.Module) -> bool:
@@ -144,10 +144,27 @@ def check_version_compats() -> None:
144
144
  except importlib.metadata.PackageNotFoundError:
145
145
  warnings.warn(f"optimum-rbln requires {compat.package_name} to be installed.", ImportWarning)
146
146
  continue
147
+ # For versions 0.7.2 and above, don't show warning for rebel-compiler if base versions match
147
148
 
148
- if not Version(compat.min_version) <= Version(dep_version) < Version(compat.max_version):
149
- warnings.warn(
150
- f"optimum-rbln v{my_version} is compatible to {compat.package_name} v{compat.min_version} to v{compat.max_version}. (you are currently using v{dep_version})\n"
151
- "Please refer to our SDK release notes at https://docs.rbln.ai/about_atom/release_note.html",
152
- ImportWarning,
153
- )
149
+ if compat.package_name == "rebel-compiler":
150
+ # For optimum-rbln versions 0.7.2 and above, suppress the warning if the base versions of
151
+ # optimum-rbln and rebel-compiler match (e.g., 0.7.x with 0.7.y).
152
+ if (
153
+ Version(my_version) >= Version("0.7.2")
154
+ and Version(my_version).base_version == Version(dep_version).base_version
155
+ ):
156
+ continue
157
+ else:
158
+ warnings.warn(
159
+ f"Version mismatch detected: optimum-rbln v{my_version} and {compat.package_name} v{dep_version} have different base versions. "
160
+ f"For optimal performance and compatibility, please ensure both packages share the same major and minor version numbers. "
161
+ "Please refer to our SDK release notes at https://docs.rbln.ai/about_atom/release_note.html",
162
+ ImportWarning,
163
+ )
164
+ else:
165
+ if not Version(compat.min_version) <= Version(dep_version) < Version(compat.max_version):
166
+ warnings.warn(
167
+ f"optimum-rbln v{my_version} is compatible to {compat.package_name} v{compat.min_version} to v{compat.max_version}. (you are currently using v{dep_version})\n"
168
+ "Please refer to our SDK release notes at https://docs.rbln.ai/about_atom/release_note.html",
169
+ ImportWarning,
170
+ )
@@ -13,8 +13,11 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import importlib
16
+ from pathlib import Path
16
17
  from typing import TYPE_CHECKING, Any, Dict, List, Type
17
18
 
19
+ from transformers import PretrainedConfig
20
+
18
21
  from ..configuration_utils import RBLNModelConfig
19
22
 
20
23
 
@@ -38,6 +41,10 @@ class SubModulesMixin:
38
41
  for submodule_meta, submodule in zip(self._rbln_submodules, rbln_submodules):
39
42
  setattr(self, submodule_meta["name"], submodule)
40
43
 
44
+ @classmethod
45
+ def _update_submodule_config(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
46
+ return rbln_config
47
+
41
48
  @classmethod
42
49
  def _export_submodules_from_model(
43
50
  cls, model: "PreTrainedModel", model_save_dir: str, rbln_config: RBLNModelConfig, **kwargs
@@ -62,6 +69,8 @@ class SubModulesMixin:
62
69
  submodule_rbln_config = submodule_rbln_config_class(**submodule_rbln_config)
63
70
  setattr(rbln_config, submodule_name, submodule_rbln_config)
64
71
 
72
+ submodule_rbln_config = submodule_cls._update_submodule_config(model, submodule_rbln_config)
73
+
65
74
  rbln_submodule = submodule_cls.from_model(
66
75
  model=torch_submodule,
67
76
  config=torch_submodule.config,
@@ -90,9 +99,12 @@ class SubModulesMixin:
90
99
  importlib.import_module("optimum.rbln"), submodule_rbln_config.rbln_model_cls_name
91
100
  )
92
101
 
102
+ json_file_path = Path(model_save_dir) / submodule_name / "config.json"
103
+ config = PretrainedConfig.from_json_file(json_file_path)
104
+
93
105
  rbln_submodule = submodule_cls._from_pretrained(
94
106
  model_id=model_save_dir,
95
- config=None,
107
+ config=config,
96
108
  subfolder=submodule_name,
97
109
  rbln_config=submodule_rbln_config,
98
110
  **kwargs,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.7.4a9
3
+ Version: 0.7.5a1
4
4
  Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -1,7 +1,7 @@
1
- optimum/rbln/__init__.py,sha256=c2whRR6XkelNLlH1MwAKYMoaBEhmGxSQFrhfKS1JC-I,13186
2
- optimum/rbln/__version__.py,sha256=GuANUTgAHXrhQ4V27YBqp-zK5jY_U9soaBj4Ef2cU3A,519
3
- optimum/rbln/configuration_utils.py,sha256=rOXG9_ptYwzLlDIebd-CoiT3i6xaZl5IVPplLIrUyoE,31106
4
- optimum/rbln/modeling.py,sha256=qDXB69Oq0jx9hfONebDiSNe2_DgKYhnAGLTbGAtwYVw,9677
1
+ optimum/rbln/__init__.py,sha256=MXUvNNwbZhrPpxjpDSIV5fCQIT_rcNaLPstQgUYj4gE,13970
2
+ optimum/rbln/__version__.py,sha256=mPJDzmeS5WyCHJnuvAR_w6Lpxp2T9tocDUBPoe9Zwoo,519
3
+ optimum/rbln/configuration_utils.py,sha256=gvAjRFEGw5rnSoH0IoyuLrE4fkxtk3DN1pikqrN_Rpk,31277
4
+ optimum/rbln/modeling.py,sha256=4Xwi3ovWDHOOqxUDH_ZgsgTuea8Kyg25D9s81zVYpr0,9669
5
5
  optimum/rbln/modeling_base.py,sha256=iQKw2IORu1cN6sOK0xeBVrhatt-ZPeinT_v6l2FnGRw,24173
6
6
  optimum/rbln/diffusers/__init__.py,sha256=XL6oKPHbPCV6IVCw3fu0-M9mD2KO_x6unx5kJdAtpVY,6180
7
7
  optimum/rbln/diffusers/modeling_diffusers.py,sha256=bPyP5RMbOFLb2DfEAuLVp7hTuQWJvWid7El72wGmFrY,19535
@@ -20,16 +20,16 @@ optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.p
20
20
  optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py,sha256=54NTvVur7RADGgjGwO33s76dgKQ4zVNvmFl68rQFapw,6370
21
21
  optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py,sha256=H0hqsqpAfqb9gBIK5KsfUf9gX9cTnggK9Nt2aqfzeIM,5528
22
22
  optimum/rbln/diffusers/models/__init__.py,sha256=mkCvJyH1KcwrsUvYSq_bVC79oOfyqtBSFDyPS1_48wA,1478
23
- optimum/rbln/diffusers/models/controlnet.py,sha256=m2hHKrom1ladsDO4bTSZ7o_bIRrLRpzv7XBI2BlesxY,10224
23
+ optimum/rbln/diffusers/models/controlnet.py,sha256=kzDbUckGlioor8t0kKBvwi-hzNaG15XluEzTa7xZs1Q,10292
24
24
  optimum/rbln/diffusers/models/autoencoders/__init__.py,sha256=dg17ZTUsiqTcbIaEE4fqew9uRbao0diQ21PXvRKIqKg,679
25
25
  optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py,sha256=qjReFNmuQEjnHjoI8f7ClAp_rRTA1vIk-3dwxJi6s7U,7905
26
26
  optimum/rbln/diffusers/models/autoencoders/vae.py,sha256=_fyFco2697uT1zo_P_fGML-_zqZw2sUQp3tRRjA5pg4,4172
27
27
  optimum/rbln/diffusers/models/autoencoders/vq_model.py,sha256=DC8Nee8_BabGhagJgpCUDhA-oaTpZMg-lCVzXJ6dNEw,6134
28
28
  optimum/rbln/diffusers/models/transformers/__init__.py,sha256=V8rSR7WzHs-i8Cwb_MNxhY2NFbwPgxu24vGtkwl-6tk,706
29
29
  optimum/rbln/diffusers/models/transformers/prior_transformer.py,sha256=d7CYmm88lozepqXjmrFr4qsQ-lRE_10wQRwnenMSflU,4989
30
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=qBdk8fBxOnlRywUDKMab1G4YpFDVnMOkTyhKW9LPops,6503
30
+ optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=ywWiRAYQ0wfKMMZBrJ9C34GBwIu92F5QXSG8qi7Cr6s,6579
31
31
  optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
32
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=bFFidMPZ2JgZf5tQVi2WNrDtT_yRDyqzNlkZ-wvQm7M,15481
32
+ optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=dyrez3vS0_YSxTVwcjlSijDJhe6XchdsLsCFv74HiFQ,15555
33
33
  optimum/rbln/diffusers/pipelines/__init__.py,sha256=5KLZ5LrpMzBya2e_3_PvEoPwG24U8JMexfw_ygZREKc,3140
34
34
  optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
35
35
  optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=Cv__E0Boc6TSOIv8TdXVE821zIiPG4MVI_lnaGSqquk,4102
@@ -55,18 +55,19 @@ optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py,sha256=9iIMZYvp
55
55
  optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py,sha256=9GQIzBSgBae2kE9esGycg7WqNwO8TGF5c97rMx9934Y,1029
56
56
  optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py,sha256=BtzoyVd6Np5BPu3-OyXjbVMM8tl2ARF3HeFZab9NLXU,1071
57
57
  optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py,sha256=7_ezUBCaH24e25VS19vrhJHBvmEOFnBpgfslpjL6aT4,1071
58
- optimum/rbln/ops/__init__.py,sha256=rSz6mfC0aGbNYjMaNSsOZSPYxPRenW8DWbNpAkjTfAc,703
59
- optimum/rbln/ops/attn.py,sha256=x02yFLk7FcONFqfow0ROmVy9fmxo5Pw0SPCiDY3AZNg,9012
60
- optimum/rbln/ops/flash_attn.py,sha256=NmCqUdMTzgJ4sbYGj8IWXJEsLWvbuCMponR01w5DK6w,4121
58
+ optimum/rbln/ops/__init__.py,sha256=SPepB2VbmvEgathWAs_oCbDOPVyBhPey3wZX2X6dIBM,738
59
+ optimum/rbln/ops/attn.py,sha256=lFAypLsboLq9p74y3n-x7iwYTjaKdCZGTKLgvFWkXTI,9481
60
+ optimum/rbln/ops/flash_attn.py,sha256=z39DJZSk94630ueoOCkiybxR5gzvNR-SRADHs0F6pzU,4346
61
61
  optimum/rbln/ops/kv_cache_update.py,sha256=HjnHBR-oFrJQibsVnkYb0P5_-wEma8jl0mkjkylwakU,1270
62
62
  optimum/rbln/ops/linear.py,sha256=1_7Hg-9wXxhu97fqPobotLQx17k7VPeSSL91_9Z7EDg,1018
63
- optimum/rbln/transformers/__init__.py,sha256=P89UOclQWiLgNkH90GXdnwWD2492O2tusM-fZApfBNg,8084
63
+ optimum/rbln/ops/sliding_window_attn.py,sha256=EQrV_yRGc5z6kvwEsAcLP028bJWkQg2UPI3xubt9skU,3487
64
+ optimum/rbln/transformers/__init__.py,sha256=M9hEzEWIwLdkS-T0-sMof8BtYeFH-NAj4iX73mB3Rj8,8767
64
65
  optimum/rbln/transformers/configuration_alias.py,sha256=qFVfg6ohsR7a6b-CBgxjBUPDrk9MyiJwtO8AQah_RTU,1505
65
66
  optimum/rbln/transformers/configuration_generic.py,sha256=XIiZ1-5p1CMHhG7Sr2qR4SLYKcYw9aph7eGlga3Opx0,5056
66
67
  optimum/rbln/transformers/modeling_alias.py,sha256=yx7FnZQWAnrWzivaO5hI7T6i-fyLzt2tMIXG2oDNbPo,1657
67
68
  optimum/rbln/transformers/modeling_generic.py,sha256=nT_lytAILkYtwBVJKxXg0dxmh0UpjGYO6zOdLoMs1uU,12891
68
69
  optimum/rbln/transformers/modeling_rope_utils.py,sha256=3zwkhYUyTZhxCJUSmwCc88iiY1TppRWEY9ShwUqNB2k,14293
69
- optimum/rbln/transformers/models/__init__.py,sha256=72eMPN5UYGJ9P5gnJ2yi25cGdX1jV7viTOKmsX2OqBg,7221
70
+ optimum/rbln/transformers/models/__init__.py,sha256=rofipeYNupozWru5PfSV9BudskognIUuNRIo7z-TEKE,8039
70
71
  optimum/rbln/transformers/models/auto/__init__.py,sha256=GvGbb3ZpMv-h6euXeZ42jSizoOfrL2O1uvpAnfKxYEo,1034
71
72
  optimum/rbln/transformers/models/auto/auto_factory.py,sha256=Uf5rCUoxec2qhIAwbAeZNZN4NIMFaLurSB1EdI79lwA,7044
72
73
  optimum/rbln/transformers/models/auto/modeling_auto.py,sha256=Un9qoqdy3dO8JBza_bTJF_6_fRVNM9QisihSgTRFI-o,3933
@@ -77,13 +78,16 @@ optimum/rbln/transformers/models/bart/modeling_bart.py,sha256=q42gQ42Ldkfrw_rv0_
77
78
  optimum/rbln/transformers/models/bert/__init__.py,sha256=86FuGRBLw315_Roa9D5OUx6Ku2PM0DqSPZ-YSqbF-io,806
78
79
  optimum/rbln/transformers/models/bert/configuration_bert.py,sha256=NIlBRn-zrnNirkEfJ4Uv2TZRIBL761PLJ9-cZaPyzpg,1017
79
80
  optimum/rbln/transformers/models/bert/modeling_bert.py,sha256=XxsRhBhexZ2w3mRCvKl73pIyGdqcFR1RrOKG7h4EAyk,1223
81
+ optimum/rbln/transformers/models/blip_2/__init__.py,sha256=L01gPXcUCa8Vg-bcng20vZvBIN_jlqCzwUSFuq0QOag,855
82
+ optimum/rbln/transformers/models/blip_2/configuration_blip_2.py,sha256=Dh_gbeF46Tg3DKK4lq9DRblweI3B7XZHE2PlxO8qStU,3662
83
+ optimum/rbln/transformers/models/blip_2/modeling_blip_2.py,sha256=4-EWHRau363-YoZQcTfLXqm97IsAs3-Uya2L1IVGfxE,10830
80
84
  optimum/rbln/transformers/models/clip/__init__.py,sha256=TLeXDqcFK6M6v9x7Xr64kBbqGu3hFHM7p754dQ8UVQc,938
81
85
  optimum/rbln/transformers/models/clip/configuration_clip.py,sha256=wgfZeVvcVdSzrN9tcnt7DKJQ0NLR0REvW7MyUXyv2Bg,2976
82
86
  optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=UslcDN6otyQ_psou7F_YcdK5vCImEtgIdcbwmexSfOM,7256
83
87
  optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=vQYZDDdoddwA7yKc5zzrq2Zs9sax-0p8rNF_aYfF4bk,1006
84
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py,sha256=b1W7zS0MUmeDd048bLp5AkZMrWd3LIhHaVy8NvlwdCw,4116
85
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=NG2tKC3gT57r34PYKgU0evZHctEHzJGRrk2FOjLyK7Q,41748
86
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=5o2m_xPVjfCovP_jcW8E17sSKkLqcVblr4mFLbv-VDU,42991
88
+ optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py,sha256=WQlHUjcMuD_3BPLo1Ytzz-xAI-8J9BYfr7dEzmpLVks,6389
89
+ optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=89I4A0WEdY0bOX_iGeG5qS7Cd2RPbwvdgYjfZIhcUy4,42035
90
+ optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=iaB1lgElTaCfq5-HY5mbOcbQ8CICyTONckJf25RPjpU,48565
87
91
  optimum/rbln/transformers/models/dpt/__init__.py,sha256=Nzep9mlzKyL1kV726IBqY8DnLp1DkH9JzFeknWSRhok,714
88
92
  optimum/rbln/transformers/models/dpt/configuration_dpt.py,sha256=4fW6bzVhaAxym4wGV3F785rvUOoWPyw_gdEMqB08Leg,755
89
93
  optimum/rbln/transformers/models/dpt/modeling_dpt.py,sha256=oKLX7MQZvfk1QB8wOtcdi7AmZH2fOIVbypa9A3RA9MI,733
@@ -101,14 +105,14 @@ optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=1IxqHmB-GlH2Dv
101
105
  optimum/rbln/transformers/models/gpt2/modeling_gpt2.py,sha256=qBDanUk_O-HtOIVCA4IE3FYyCsnL9xIDK00vft-0caw,1490
102
106
  optimum/rbln/transformers/models/idefics3/__init__.py,sha256=ulxE7HEfXsNJhd25J9Fvi6vggo9aZH9sLKJjWB6LlzQ,814
103
107
  optimum/rbln/transformers/models/idefics3/configuration_idefics3.py,sha256=sM0pXsvkxcpDXagoKlqwKdBAcNdayB9KlWdYC9xlyDU,1889
104
- optimum/rbln/transformers/models/idefics3/modeling_idefics3.py,sha256=Rr9BJDyoOqJFQ8dJV78QU4Tjjhhj3aqRk05JcDqFv6Y,17904
108
+ optimum/rbln/transformers/models/idefics3/modeling_idefics3.py,sha256=oSF08arqwjoodq9_A304jhRlQbk_FEIkR4EhJqiNAzk,18062
105
109
  optimum/rbln/transformers/models/llama/__init__.py,sha256=knxvRkPx8x6-WOxqSq_PlaKYD-9F9Q8dh7r095Esey0,708
106
110
  optimum/rbln/transformers/models/llama/configuration_llama.py,sha256=B9gr4pTn9yiv3-8DIk0P7_AQdIHEc7SuLaH9gZAmP8E,773
107
111
  optimum/rbln/transformers/models/llama/llama_architecture.py,sha256=S7MCPfyjG5eUqgaS-QNBB0ApUD6wnb5fR0RHq7k7-pA,728
108
112
  optimum/rbln/transformers/models/llama/modeling_llama.py,sha256=Z3iony7icoFhRQ11MAuFx9UF03uJCsvJQZ6bxHXlrgk,1530
109
113
  optimum/rbln/transformers/models/llava_next/__init__.py,sha256=kDXKr7wMkp1XqE__DER2B8kQF_NYMxhzsQS5ytGg56I,752
110
114
  optimum/rbln/transformers/models/llava_next/configuration_llava_next.py,sha256=QPreWZyohwRL23GOyvoAfKtk5UNg7IJ_Y_pNfUDe7cU,1838
111
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py,sha256=xOXc1XUIK4oLSFvAq7Q0lxiOLlDFMbFdOcg5JvLnVkI,25979
115
+ optimum/rbln/transformers/models/llava_next/modeling_llava_next.py,sha256=Igq_VCcOgSxltzm7eV9F5MiHS-vyDAp4DIswH80q76c,26022
112
116
  optimum/rbln/transformers/models/midm/__init__.py,sha256=IC3FETwgYinbp3wDj7tp4zIHJhbqM-c6GfTRdYcMNj8,913
113
117
  optimum/rbln/transformers/models/midm/configuration_midm.py,sha256=Kv5g5dIsBrhGcZ2_pFUOPNB80np4Xiw0wPH1IZm1PHI,772
114
118
  optimum/rbln/transformers/models/midm/midm_architecture.py,sha256=357iviqQkzI0s_lU_teH1sVOChNRDUABe3GA0HuhZZY,5444
@@ -117,6 +121,10 @@ optimum/rbln/transformers/models/mistral/__init__.py,sha256=9FE64bCYfSIyrBkRcwlq
117
121
  optimum/rbln/transformers/models/mistral/configuration_mistral.py,sha256=zCHZnmjTXs7UKYIIRR2IYjccROm5ODXfonWploITNxg,775
118
122
  optimum/rbln/transformers/models/mistral/mistral_architecture.py,sha256=_aU8TE_tdvfo0K7QpgTlz_d0qwk4O82dl9268lPL16E,733
119
123
  optimum/rbln/transformers/models/mistral/modeling_mistral.py,sha256=7nrddoBIHf8S12LZWBUpotnvG3gND11vMQda9yYXJ-s,1560
124
+ optimum/rbln/transformers/models/opt/__init__.py,sha256=w0v8GzbzlR5_4yL851njGDSJgX89TrYxrHnpNfMHZEI,700
125
+ optimum/rbln/transformers/models/opt/configuration_opt.py,sha256=1BIxum4m6An5dsYpw1Kmf-QIHSGQK8uDjMd2hUkRvzE,771
126
+ optimum/rbln/transformers/models/opt/modeling_opt.py,sha256=A40krCtNLOWDex4tHx894wBTcWhyrsQBtGgZZDDc3fg,3005
127
+ optimum/rbln/transformers/models/opt/opt_architecture.py,sha256=7ANZ1aIK0HZ8HPrAl22V1Hc0B-8gBigsa4VNMh9HVk8,2592
120
128
  optimum/rbln/transformers/models/phi/__init__.py,sha256=uqQb-sO1HXuaju2hfo7qJHk_IWhnptY-qFjNjK_uOc0,700
121
129
  optimum/rbln/transformers/models/phi/configuration_phi.py,sha256=9Mk06ZNymGnC2P0eiU02oguH1KFKxL-LYtMJSNqpmmo,771
122
130
  optimum/rbln/transformers/models/phi/modeling_phi.py,sha256=j-6Pqd5rR2JE8I1pnKFlCi4nW5Dv3wZjoPWxohissoo,1516
@@ -127,12 +135,15 @@ optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=9-aFDvjMzPNUyGOz
127
135
  optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
128
136
  optimum/rbln/transformers/models/qwen2_5_vl/__init__.py,sha256=rAW3DKQUzGL6EMwa5r1iLu94yhpiZpk6zfoD7TtYXrc,865
129
137
  optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py,sha256=dPcGNaLwJf61PIvVbyt-lvBflp_dvK0hubhNoA3len0,3123
130
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=sKsDAiMGWtbquTw6_PITK4ijL0aHBZqgs2nPObEiFN8,24951
131
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py,sha256=YRy7Ylm-UQLovt5BmxhayJMKzF3rj0_HIc4tUXaiPO0,7474
138
+ optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=etE0y5UlNpNcRZO_68TAYK1Wg2TsqZUiM0L2qeTZacs,25144
139
+ optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py,sha256=OR-tTu8uzeFryenohoqRwgIVqw5zM7dcsZHwKL_jD0A,7232
132
140
  optimum/rbln/transformers/models/seq2seq/__init__.py,sha256=6WKstWiS1kW0oFDn_jyrKMW5QEJAWkmsSRAaadNedDM,715
133
141
  optimum/rbln/transformers/models/seq2seq/configuration_seq2seq2.py,sha256=vSNP1eILfL32cbiLOAD58Ocz6lk3hYFnhIRLDVqlSoI,2624
134
142
  optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py,sha256=7MN6CNVyYg4JsULYXDfQ_KkDd3w-1TXuxndSrM-CX8w,16980
135
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py,sha256=w5pMnWbJhgYmpl5NT_vJEzcb4RfEbHfalJ371IL3wp8,18685
143
+ optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py,sha256=i2YXcusNvWjqYmS3ZvrWJiilovKIlfozB92fqDrtwq8,18729
144
+ optimum/rbln/transformers/models/siglip/__init__.py,sha256=39MdhvWLZU0_9julQtJYVEiWI4csPrryS9krMauEA3s,730
145
+ optimum/rbln/transformers/models/siglip/configuration_siglip.py,sha256=tXzkTXeFamOwRnafJMYmjIPDODKwM6xNsW-UTf5cH38,2625
146
+ optimum/rbln/transformers/models/siglip/modeling_siglip.py,sha256=hvffh55rYxHx7CLHe5TR_5nfCGUhVd3dUmCwUasLjwE,5941
136
147
  optimum/rbln/transformers/models/t5/__init__.py,sha256=R1Q8Z1vaIdx4rDjeCmm_ZMSgewWaqaI0l93AHwewtew,818
137
148
  optimum/rbln/transformers/models/t5/configuration_t5.py,sha256=cOa2NAAocNSSECTtLpOsVVLfn4-7l7K7jz3nf6CSNCs,912
138
149
  optimum/rbln/transformers/models/t5/modeling_t5.py,sha256=cnN76IAw6QaJN8Oodfk5Y4Dte6m84Fy_CRgnqquzzTI,2990
@@ -148,22 +159,22 @@ optimum/rbln/transformers/models/whisper/__init__.py,sha256=ErquiUlYycSYPsDcq9Iw
148
159
  optimum/rbln/transformers/models/whisper/configuration_whisper.py,sha256=-Su7pbkg3gkYTf-ECRJyxkpD3JtUJX4y5Mfml8tJJBI,2612
149
160
  optimum/rbln/transformers/models/whisper/generation_whisper.py,sha256=GIHTca3b1VtW81kp7BzKQ7f77c2t9OsEsbZetripgDo,4582
150
161
  optimum/rbln/transformers/models/whisper/modeling_whisper.py,sha256=7VcOjxV3ZAHBYFDLiSc6wVJ3PZLqzmbBAIkQ4JVfpNM,17538
151
- optimum/rbln/transformers/models/whisper/whisper_architecture.py,sha256=zEwfn8DDTbt2TN7lHKMZG9JXZc5WdW9Cp8mH4OVfo3s,13949
162
+ optimum/rbln/transformers/models/whisper/whisper_architecture.py,sha256=fKUbAMIl20o6EBMVcLg9TDSsJ1FDp8NKcl4jT9RWCEM,13981
152
163
  optimum/rbln/transformers/models/xlm_roberta/__init__.py,sha256=mFeFTiF6xY_n1glUsvZ420uKKwkeVOS5Oag0L6E4Qs8,718
153
164
  optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py,sha256=XBICzoxTaZ8DhBtr8qos2tDJar4jvCiKOZwXGImlTAw,779
154
165
  optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py,sha256=rro7kH_BLDIlKSBRUfeuX-qQu6MeO5SzbVcf0AbFxEk,757
155
166
  optimum/rbln/transformers/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
156
- optimum/rbln/transformers/utils/rbln_quantization.py,sha256=gwBVHf97sQgPNmGa0wq87E8mPyrtXYhMnO4X4sKp3c8,7639
167
+ optimum/rbln/transformers/utils/rbln_quantization.py,sha256=um0N2ZruU_lNibo6rlzgwHAI2_8QOrYE7W7pA1qfXKM,9396
157
168
  optimum/rbln/utils/__init__.py,sha256=ieDBT2VFTt2E0M4v_POLBpuGW9LxSydpb_DuPd6PQqc,712
158
169
  optimum/rbln/utils/decorator_utils.py,sha256=xu-TrsNi33SRC2a7DBsyoo6-pEQxWKZPZSmM9QlDe2Y,3745
159
170
  optimum/rbln/utils/hub.py,sha256=Z_R9Ic9VAew8bUmlaAlxZf5JGMDBivHvvFRI557pILY,4196
160
- optimum/rbln/utils/import_utils.py,sha256=uMldLJmDVMj5uHvxBfb96uV29bfGEDvlksLY26GOHAs,4389
171
+ optimum/rbln/utils/import_utils.py,sha256=fpOERIIxXm-cDYGn1NN6c7aWDPQYVitPQW2MiyZ9NEY,5471
161
172
  optimum/rbln/utils/logging.py,sha256=VKKBmlQSdg6iZCGmAXaWYiW67K84jyp1QJhLQSSjPPE,3453
162
173
  optimum/rbln/utils/model_utils.py,sha256=V2kFpUe2aqVzLwbpztD8JOVFQqRHncvIWwJbgnUPr4E,1274
163
174
  optimum/rbln/utils/runtime_utils.py,sha256=LoKNK3AQNV_BSScstIZWjICkJf265MnUgy360BOocVI,5454
164
175
  optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
165
- optimum/rbln/utils/submodule.py,sha256=TtcH3OLctFd2Dosc-zNMGZ8xOXKKUfE91dLQ1v09E8Q,4636
166
- optimum_rbln-0.7.4a9.dist-info/METADATA,sha256=s2GSC8Y7NFqtsFxZFbkYzQdMRBuyzA6zjKeON7ov8G0,5299
167
- optimum_rbln-0.7.4a9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
168
- optimum_rbln-0.7.4a9.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
169
- optimum_rbln-0.7.4a9.dist-info/RECORD,,
176
+ optimum/rbln/utils/submodule.py,sha256=ZfI7e3YzbjbbBW4Yjfucj8NygEsukfIkaJi3PtwHrhc,5105
177
+ optimum_rbln-0.7.5a1.dist-info/METADATA,sha256=r1huaKWrPkBuoc7Zm743oYmueJxP7g5hlz9Ywtxf-r8,5299
178
+ optimum_rbln-0.7.5a1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
179
+ optimum_rbln-0.7.5a1.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
180
+ optimum_rbln-0.7.5a1.dist-info/RECORD,,