optimum-rbln 0.9.4a2__py3-none-any.whl → 0.10.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. optimum/rbln/__init__.py +44 -0
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +230 -67
  4. optimum/rbln/diffusers/models/controlnet.py +2 -2
  5. optimum/rbln/diffusers/models/transformers/prior_transformer.py +2 -2
  6. optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +2 -2
  7. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +2 -2
  8. optimum/rbln/diffusers/pipelines/auto_pipeline.py +2 -3
  9. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +3 -12
  10. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +2 -4
  11. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +1 -3
  12. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +1 -3
  13. optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +2 -2
  14. optimum/rbln/modeling_base.py +11 -10
  15. optimum/rbln/ops/__init__.py +1 -0
  16. optimum/rbln/ops/attn.py +10 -0
  17. optimum/rbln/ops/flash_attn.py +8 -0
  18. optimum/rbln/ops/moe.py +180 -0
  19. optimum/rbln/ops/sliding_window_attn.py +9 -0
  20. optimum/rbln/transformers/__init__.py +44 -0
  21. optimum/rbln/transformers/modeling_attention_utils.py +124 -222
  22. optimum/rbln/transformers/modeling_outputs.py +25 -0
  23. optimum/rbln/transformers/modeling_rope_utils.py +78 -42
  24. optimum/rbln/transformers/models/__init__.py +38 -0
  25. optimum/rbln/transformers/models/auto/auto_factory.py +3 -3
  26. optimum/rbln/transformers/models/bart/bart_architecture.py +24 -24
  27. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +7 -2
  28. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +1 -1
  29. optimum/rbln/transformers/models/colpali/colpali_architecture.py +14 -20
  30. optimum/rbln/transformers/models/colpali/configuration_colpali.py +12 -17
  31. optimum/rbln/transformers/models/colpali/modeling_colpali.py +66 -182
  32. optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +40 -23
  33. optimum/rbln/transformers/models/colqwen2/modeling_colqwen2.py +107 -371
  34. optimum/rbln/transformers/models/decoderonly/__init__.py +2 -0
  35. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +144 -17
  36. optimum/rbln/transformers/models/decoderonly/configuration_lora.py +1 -1
  37. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +122 -48
  38. optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +5 -7
  39. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +120 -128
  40. optimum/rbln/transformers/models/detr/__init__.py +23 -0
  41. optimum/rbln/transformers/models/detr/configuration_detr.py +38 -0
  42. optimum/rbln/transformers/models/detr/modeling_detr.py +53 -0
  43. optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -36
  44. optimum/rbln/transformers/models/gemma/gemma_architecture.py +1 -1
  45. optimum/rbln/transformers/models/gemma2/__init__.py +16 -0
  46. optimum/rbln/transformers/models/gemma2/configuration_gemma2.py +45 -0
  47. optimum/rbln/transformers/models/gemma2/gemma2_architecture.py +83 -0
  48. optimum/rbln/transformers/models/gemma2/modeling_gemma2.py +101 -0
  49. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +2 -7
  50. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +16 -18
  51. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +5 -177
  52. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +8 -34
  53. optimum/rbln/transformers/models/gpt_oss/__init__.py +16 -0
  54. optimum/rbln/transformers/models/gpt_oss/configuration_gpt_oss.py +42 -0
  55. optimum/rbln/transformers/models/gpt_oss/gpt_oss_architecture.py +122 -0
  56. optimum/rbln/transformers/models/gpt_oss/modeling_gpt_oss.py +168 -0
  57. optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +8 -5
  58. optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +6 -4
  59. optimum/rbln/transformers/models/llava/modeling_llava.py +0 -1
  60. optimum/rbln/transformers/models/midm/midm_architecture.py +29 -22
  61. optimum/rbln/transformers/models/mixtral/__init__.py +16 -0
  62. optimum/rbln/transformers/models/mixtral/configuration_mixtral.py +38 -0
  63. optimum/rbln/transformers/models/mixtral/mixtral_architecture.py +76 -0
  64. optimum/rbln/transformers/models/mixtral/modeling_mixtral.py +68 -0
  65. optimum/rbln/transformers/models/opt/opt_architecture.py +1 -44
  66. optimum/rbln/transformers/models/paligemma/__init__.py +16 -0
  67. optimum/rbln/transformers/models/paligemma/configuration_paligemma.py +129 -0
  68. optimum/rbln/transformers/models/paligemma/modeling_paligemma.py +564 -0
  69. optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +24 -24
  70. optimum/rbln/transformers/models/phi/phi_architecture.py +13 -21
  71. optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +9 -5
  72. optimum/rbln/transformers/models/qwen2_5_vl/__init__.py +6 -1
  73. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +13 -1
  74. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +271 -122
  75. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +43 -39
  76. optimum/rbln/transformers/models/qwen2_moe/__init__.py +16 -0
  77. optimum/rbln/transformers/models/qwen2_moe/configuration_qwen2_moe.py +38 -0
  78. optimum/rbln/transformers/models/qwen2_moe/modeling_qwen2_moe.py +68 -0
  79. optimum/rbln/transformers/models/qwen2_moe/qwen2_moe_architecture.py +94 -0
  80. optimum/rbln/transformers/models/qwen2_vl/__init__.py +6 -1
  81. optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +13 -1
  82. optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +263 -105
  83. optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +26 -34
  84. optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +7 -7
  85. optimum/rbln/transformers/models/qwen3_moe/__init__.py +16 -0
  86. optimum/rbln/transformers/models/qwen3_moe/configuration_qwen3_moe.py +38 -0
  87. optimum/rbln/transformers/models/qwen3_moe/modeling_qwen3_moe.py +68 -0
  88. optimum/rbln/transformers/models/qwen3_moe/qwen3_moe_architecture.py +100 -0
  89. optimum/rbln/transformers/models/resnet/configuration_resnet.py +10 -4
  90. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +14 -12
  91. optimum/rbln/transformers/models/siglip/modeling_siglip.py +4 -18
  92. optimum/rbln/transformers/models/swin/configuration_swin.py +1 -6
  93. optimum/rbln/transformers/models/t5/t5_architecture.py +15 -16
  94. optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +0 -3
  95. optimum/rbln/transformers/models/whisper/generation_whisper.py +8 -8
  96. optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -3
  97. optimum/rbln/transformers/utils/rbln_quantization.py +20 -12
  98. optimum/rbln/utils/deprecation.py +78 -1
  99. optimum/rbln/utils/hub.py +93 -2
  100. optimum/rbln/utils/import_utils.py +16 -1
  101. optimum/rbln/utils/runtime_utils.py +12 -8
  102. optimum/rbln/utils/submodule.py +24 -0
  103. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/METADATA +6 -6
  104. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/RECORD +107 -81
  105. optimum/rbln/transformers/models/colqwen2/colqwen2_architecture.py +0 -233
  106. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/WHEEL +0 -0
  107. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/entry_points.txt +0 -0
  108. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/utils/hub.py CHANGED
@@ -16,7 +16,8 @@ import json
16
16
  from pathlib import Path
17
17
  from typing import List, Optional, Union
18
18
 
19
- from huggingface_hub import HfApi, get_token, hf_hub_download
19
+ from huggingface_hub import HfApi, get_token, hf_hub_download, try_to_load_from_cache
20
+ from huggingface_hub.errors import LocalEntryNotFoundError
20
21
 
21
22
 
22
23
  def pull_compiled_model_from_hub(
@@ -29,6 +30,97 @@ def pull_compiled_model_from_hub(
29
30
  local_files_only: bool,
30
31
  ) -> Path:
31
32
  """Pull model files from the HuggingFace Hub."""
33
+ config_filename = "rbln_config.json" if subfolder == "" else f"{subfolder}/rbln_config.json"
34
+
35
+ # Try to find config file in cache first.
36
+ config_cache_path = try_to_load_from_cache(
37
+ repo_id=str(model_id),
38
+ filename=config_filename,
39
+ revision=revision,
40
+ cache_dir=cache_dir,
41
+ )
42
+
43
+ # If config is cached and we're not forcing download, try to use cached files
44
+ if config_cache_path and isinstance(config_cache_path, str) and not force_download:
45
+ config_path = Path(config_cache_path)
46
+ if config_path.exists():
47
+ cache_dir_path = config_path.parent
48
+
49
+ # Look for .rbln files in the same directory
50
+ pattern_rbln = "*.rbln"
51
+ rbln_files = list(cache_dir_path.glob(pattern_rbln))
52
+
53
+ # Validate files found in cache
54
+ rbln_config_filenames = [config_path] if config_path.exists() else []
55
+ validate_files(rbln_files, rbln_config_filenames, f"cached repository {model_id}")
56
+
57
+ # If local_files_only is True, return cached directory without API call
58
+ if local_files_only:
59
+ return cache_dir_path
60
+
61
+ # If local_files_only is False, ensure all files are downloaded
62
+ # Download config file (will use cache if available, download if missing)
63
+ rbln_config_cache_path = hf_hub_download(
64
+ repo_id=model_id,
65
+ filename=config_filename,
66
+ token=token,
67
+ revision=revision,
68
+ cache_dir=cache_dir,
69
+ force_download=force_download,
70
+ local_files_only=False,
71
+ )
72
+ cache_dir_path = Path(rbln_config_cache_path).parent
73
+
74
+ # Download all .rbln files found in cache (hf_hub_download will use cache if available)
75
+ for rbln_file in rbln_files:
76
+ filename = rbln_file.name if subfolder == "" else f"{subfolder}/{rbln_file.name}"
77
+ try:
78
+ hf_hub_download(
79
+ repo_id=model_id,
80
+ filename=filename,
81
+ token=token,
82
+ revision=revision,
83
+ cache_dir=cache_dir,
84
+ force_download=force_download,
85
+ local_files_only=False,
86
+ )
87
+ except LocalEntryNotFoundError:
88
+ # File might not exist in repo, skip it
89
+ pass
90
+
91
+ # Note: We skip the API call here since we're using cached files
92
+ # If there are additional files in the repo that aren't cached,
93
+ # they won't be downloaded.
94
+ # If the user needs all files, they should use force_download=True
95
+ return cache_dir_path
96
+
97
+ # If local_files_only is True and config not found in cache, try to download with local_files_only
98
+ if local_files_only:
99
+ try:
100
+ rbln_config_cache_path = hf_hub_download(
101
+ repo_id=model_id,
102
+ filename=config_filename,
103
+ token=token,
104
+ revision=revision,
105
+ cache_dir=cache_dir,
106
+ force_download=force_download,
107
+ local_files_only=True,
108
+ )
109
+ cache_dir_path = Path(rbln_config_cache_path).parent
110
+ rbln_files = list(cache_dir_path.glob("*.rbln"))
111
+ rbln_config_filenames = [Path(rbln_config_cache_path)] if Path(rbln_config_cache_path).exists() else []
112
+ validate_files(rbln_files, rbln_config_filenames, f"cached repository {model_id}")
113
+ return cache_dir_path
114
+ except LocalEntryNotFoundError as err:
115
+ raise FileNotFoundError(
116
+ f"Could not find compiled model files for {model_id} in local cache. "
117
+ f"Set local_files_only=False to download from HuggingFace Hub."
118
+ ) from err
119
+
120
+ # List files from repository. This only happens when:
121
+ # 1. Config is not cached, OR
122
+ # 2. force_download=True, OR
123
+ # 3. local_files_only=False and we need to discover all files in the repo
32
124
  huggingface_token = _get_huggingface_token(token)
33
125
  repo_files = list(
34
126
  map(
@@ -51,7 +143,6 @@ def pull_compiled_model_from_hub(
51
143
  rbln_config_cache_path = hf_hub_download(
52
144
  repo_id=model_id,
53
145
  filename=filename,
54
- subfolder=subfolder,
55
146
  token=token,
56
147
  revision=revision,
57
148
  cache_dir=cache_dir,
@@ -136,7 +136,22 @@ def is_rbln_available() -> bool:
136
136
 
137
137
  def check_version_compats() -> None:
138
138
  warnings.filterwarnings(action="always", category=ImportWarning, module="optimum.rbln")
139
- my_version = importlib.metadata.version("optimum-rbln")
139
+ try:
140
+ my_version = importlib.metadata.version("optimum-rbln")
141
+ except importlib.metadata.PackageNotFoundError:
142
+ # Common dev case: running from source (e.g. PYTHONPATH=src) without installing the package.
143
+ # package metadata doesn't exist, so fall back to the in-repo version file.
144
+ try:
145
+ from optimum.rbln.__version__ import __version__ as my_version # type: ignore
146
+ except Exception:
147
+ warnings.warn(
148
+ "Could not determine optimum-rbln version (package metadata missing). "
149
+ "If you are running from source, consider `pip install -e .` to install metadata.",
150
+ ImportWarning,
151
+ stacklevel=2,
152
+ )
153
+ return
154
+
140
155
  target_version = list(filter(lambda v: Version(my_version) >= Version(v), RBLN_VERSION_COMPATS.keys()))[0]
141
156
  for compat in RBLN_VERSION_COMPATS[target_version]:
142
157
  try:
@@ -20,6 +20,10 @@ import rebel
20
20
  import torch
21
21
 
22
22
 
23
+ def is_compiler_supports_buffer_resize() -> bool:
24
+ return hasattr(rebel.RBLNCompiledModel, "exp_multiply_buffer_size")
25
+
26
+
23
27
  def get_available_dram(npu: Optional[str] = None) -> int:
24
28
  """
25
29
  Get the available DRAM size of the specified NPU.
@@ -75,12 +79,6 @@ def tp_and_devices_are_ok(
75
79
  if tensor_parallel_size is None:
76
80
  tensor_parallel_size = 1
77
81
 
78
- if rebel.device_count() < tensor_parallel_size:
79
- return (
80
- f"Tensor parallel size {tensor_parallel_size} is greater than "
81
- f"the number of available devices {rebel.device_count()}."
82
- )
83
-
84
82
  if device is None:
85
83
  device = list(range(tensor_parallel_size))
86
84
  elif isinstance(device, int):
@@ -100,9 +98,15 @@ def tp_and_devices_are_ok(
100
98
  return None
101
99
  if rebel.get_npu_name(device_id) is None:
102
100
  return (
103
- f"Device {device_id} is not a valid NPU device. Please check your NPU status with 'rbln-stat' command."
101
+ f"Device {device_id} is not a valid NPU device. Please check your NPU status with 'rbln-smi' command."
104
102
  )
105
103
 
104
+ if rebel.device_count() < tensor_parallel_size:
105
+ return (
106
+ f"Tensor parallel size {tensor_parallel_size} is greater than "
107
+ f"the number of available devices {rebel.device_count()}."
108
+ )
109
+
106
110
  if npu is not None:
107
111
  for device_id in device:
108
112
  npu_name = rebel.get_npu_name(device_id)
@@ -181,7 +185,7 @@ class UnavailableRuntime:
181
185
  "This model was loaded with create_runtimes=False. To use this model for inference:\n"
182
186
  "1. Load the model with runtime creation enabled:\n"
183
187
  " model = RBLNModel.from_pretrained(..., rbln_create_runtimes=True)\n"
184
- "2. Ensure your NPU hardware is properly configured (check with 'rbln-stat' command)\n"
188
+ "2. Ensure your NPU hardware is properly configured (check with 'rbln-smi' command)\n"
185
189
  "3. If you're on a machine without NPU hardware, you need to transfer the model files\n"
186
190
  " to a compatible system with NPU support."
187
191
  )
@@ -61,12 +61,25 @@ class SubModulesMixin:
61
61
  ):
62
62
  return rbln_config
63
63
 
64
+ @classmethod
65
+ def _update_submodule_rbln_config(
66
+ cls,
67
+ submodule_name: str,
68
+ submodule_cls: Type["RBLNModel"],
69
+ model: "PreTrainedModel",
70
+ submodule_config: PretrainedConfig,
71
+ submodule_rbln_config: RBLNModelConfig,
72
+ preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
73
+ ):
74
+ return submodule_rbln_config
75
+
64
76
  @classmethod
65
77
  def _export_submodules_from_model(
66
78
  cls, model: "PreTrainedModel", model_save_dir: str, rbln_config: RBLNModelConfig, **kwargs
67
79
  ) -> List["RBLNModel"]:
68
80
  rbln_submodules = []
69
81
  submodule_prefix = getattr(cls, "_rbln_submodule_prefix", None)
82
+ submodule_postfix = getattr(cls, "_rbln_submodule_postfix", None)
70
83
  preprocessors = kwargs.pop("preprocessors", [])
71
84
 
72
85
  for submodule in cls._rbln_submodules:
@@ -74,6 +87,9 @@ class SubModulesMixin:
74
87
  if submodule_prefix is not None:
75
88
  torch_submodule: PreTrainedModel = getattr(model, submodule_prefix)
76
89
  torch_submodule = getattr(torch_submodule, submodule_name)
90
+ elif submodule_postfix is not None:
91
+ torch_submodule: PreTrainedModel = getattr(model, submodule_name)
92
+ torch_submodule = getattr(torch_submodule, submodule_postfix)
77
93
  else:
78
94
  torch_submodule: PreTrainedModel = getattr(model, submodule_name)
79
95
 
@@ -92,6 +108,14 @@ class SubModulesMixin:
92
108
  filtered_kwargs["cls_name"] = submodule_config_cls.__name__
93
109
  submodule_rbln_config = submodule_config_cls(**filtered_kwargs)
94
110
 
111
+ submodule_rbln_config = cls._update_submodule_rbln_config(
112
+ submodule_name=submodule_name,
113
+ submodule_cls=submodule_cls,
114
+ model=model,
115
+ submodule_config=torch_submodule.config,
116
+ submodule_rbln_config=submodule_rbln_config,
117
+ preprocessors=preprocessors,
118
+ )
95
119
  setattr(rbln_config, submodule_name, submodule_rbln_config)
96
120
  submodule_rbln_config = submodule_cls._update_submodule_config(model, submodule_rbln_config, preprocessors)
97
121
 
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.9.4a2
3
+ Version: 0.10.0.post1
4
4
  Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
7
- Project-URL: Repository, https://github.com/rebellions-sw/optimum-rbln
7
+ Project-URL: Repository, https://github.com/rbln-sw/optimum-rbln
8
8
  Author-email: "Rebellions Inc." <support@rebellions.ai>
9
9
  License-Expression: Apache-2.0
10
10
  License-File: LICENSE
@@ -24,12 +24,12 @@ Classifier: Programming Language :: Python :: 3.13
24
24
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
25
  Requires-Python: <3.14,>=3.9
26
26
  Requires-Dist: accelerate>=1.0.1
27
- Requires-Dist: diffusers==0.35.2
27
+ Requires-Dist: diffusers==0.36.0
28
28
  Requires-Dist: packaging>=24.1
29
29
  Requires-Dist: torch==2.8.0
30
30
  Requires-Dist: torchaudio<=2.8.0
31
31
  Requires-Dist: torchvision<=0.23.0
32
- Requires-Dist: transformers==4.57.1
32
+ Requires-Dist: transformers==4.57.3
33
33
  Description-Content-Type: text/markdown
34
34
 
35
35
 
@@ -40,7 +40,7 @@ Description-Content-Type: text/markdown
40
40
  <img src="assets/rbln_logo.png" width="60%"/>
41
41
 
42
42
  [![PyPI version](https://badge.fury.io/py/optimum-rbln.svg)](https://badge.fury.io/py/optimum-rbln)
43
- [![License](https://img.shields.io/github/license/rebellions-sw/optimum-rbln)](https://github.com/rebellions-sw/optimum-rbln/blob/main/LICENSE)
43
+ [![License](https://img.shields.io/github/license/rbln-sw/optimum-rbln)](https://github.com/rbln-sw/optimum-rbln/blob/main/LICENSE)
44
44
  [![Documentation](https://img.shields.io/badge/docs-available-brightgreen)](https://docs.rbln.ai/software/optimum/optimum_rbln.html)
45
45
  [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](CODE_OF_CONDUCT.md)
46
46
 
@@ -113,7 +113,7 @@ pip install optimum-rbln --extra-index-url https://download.pytorch.org/whl/cpu
113
113
  The below command installs `optimum-rbln` along with its dependencies.
114
114
 
115
115
  ```bash
116
- git clone https://github.com/rebellions-sw/optimum-rbln.git
116
+ git clone https://github.com/rbln-sw/optimum-rbln.git
117
117
  cd optimum-rbln
118
118
  ./scripts/uv-sync.sh
119
119
  ```