optimum-rbln 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. optimum/rbln/__init__.py +22 -12
  2. optimum/rbln/__version__.py +16 -1
  3. optimum/rbln/diffusers/__init__.py +22 -2
  4. optimum/rbln/diffusers/models/__init__.py +34 -3
  5. optimum/rbln/{transformers/generation → diffusers/models/autoencoders}/__init__.py +1 -2
  6. optimum/rbln/diffusers/models/{autoencoder_kl.py → autoencoders/autoencoder_kl.py} +44 -58
  7. optimum/rbln/diffusers/models/autoencoders/vae.py +84 -0
  8. optimum/rbln/diffusers/models/controlnet.py +54 -14
  9. optimum/rbln/diffusers/models/transformers/__init__.py +24 -0
  10. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +203 -0
  11. optimum/rbln/diffusers/models/unets/__init__.py +24 -0
  12. optimum/rbln/diffusers/models/{unet_2d_condition.py → unets/unet_2d_condition.py} +78 -16
  13. optimum/rbln/diffusers/pipelines/__init__.py +22 -2
  14. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +5 -26
  15. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +1 -0
  16. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +1 -0
  17. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +1 -0
  18. optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +1 -0
  19. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +1 -0
  20. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +0 -11
  21. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +32 -0
  22. optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +26 -0
  23. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +32 -0
  24. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +32 -0
  25. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +32 -0
  26. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +1 -0
  27. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +14 -6
  28. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +14 -6
  29. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +32 -0
  30. optimum/rbln/modeling.py +572 -0
  31. optimum/rbln/modeling_alias.py +1 -1
  32. optimum/rbln/modeling_base.py +164 -758
  33. optimum/rbln/modeling_diffusers.py +51 -122
  34. optimum/rbln/transformers/__init__.py +0 -2
  35. optimum/rbln/transformers/models/auto/auto_factory.py +117 -23
  36. optimum/rbln/transformers/models/auto/modeling_auto.py +37 -12
  37. optimum/rbln/transformers/models/bart/modeling_bart.py +3 -6
  38. optimum/rbln/transformers/models/bert/modeling_bert.py +3 -6
  39. optimum/rbln/transformers/models/clip/modeling_clip.py +8 -25
  40. optimum/rbln/transformers/models/decoderonly/__init__.py +0 -3
  41. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +672 -412
  42. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +38 -155
  43. optimum/rbln/transformers/models/dpt/modeling_dpt.py +1 -1
  44. optimum/rbln/transformers/models/exaone/exaone_architecture.py +61 -45
  45. optimum/rbln/transformers/models/exaone/modeling_exaone.py +4 -2
  46. optimum/rbln/transformers/models/gemma/gemma_architecture.py +33 -104
  47. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +50 -238
  48. optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +3 -2
  49. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +2 -75
  50. optimum/rbln/transformers/models/midm/midm_architecture.py +88 -242
  51. optimum/rbln/transformers/models/midm/modeling_midm.py +6 -6
  52. optimum/rbln/transformers/models/phi/phi_architecture.py +61 -261
  53. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +1 -46
  54. optimum/rbln/transformers/models/t5/modeling_t5.py +102 -4
  55. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -1
  56. optimum/rbln/transformers/models/whisper/modeling_whisper.py +1 -1
  57. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +3 -35
  58. optimum/rbln/transformers/utils/rbln_quantization.py +120 -3
  59. optimum/rbln/utils/decorator_utils.py +10 -6
  60. optimum/rbln/utils/hub.py +131 -0
  61. optimum/rbln/utils/import_utils.py +15 -1
  62. optimum/rbln/utils/model_utils.py +53 -0
  63. optimum/rbln/utils/runtime_utils.py +1 -1
  64. optimum/rbln/utils/submodule.py +114 -0
  65. optimum_rbln-0.1.15.dist-info/METADATA +106 -0
  66. {optimum_rbln-0.1.13.dist-info → optimum_rbln-0.1.15.dist-info}/RECORD +69 -66
  67. {optimum_rbln-0.1.13.dist-info → optimum_rbln-0.1.15.dist-info}/WHEEL +1 -1
  68. optimum/rbln/transformers/generation/streamers.py +0 -139
  69. optimum/rbln/transformers/generation/utils.py +0 -397
  70. optimum/rbln/transformers/models/exaone/hf_hub_cached/configuration_exaone.py +0 -181
  71. optimum/rbln/transformers/models/exaone/hf_hub_cached/modeling_exaone.py +0 -1725
  72. optimum/rbln/transformers/models/midm/hf_hub_cached/configuration_midm.py +0 -22
  73. optimum/rbln/transformers/models/midm/hf_hub_cached/midm_bitext_tokenization.py +0 -304
  74. optimum/rbln/transformers/models/midm/hf_hub_cached/modeling_midm.py +0 -1469
  75. optimum/rbln/transformers/models/midm/hf_hub_cached/rotary_position_embedding.py +0 -98
  76. optimum/rbln/utils/context.py +0 -58
  77. optimum_rbln-0.1.13.dist-info/METADATA +0 -120
  78. optimum_rbln-0.1.13.dist-info/entry_points.txt +0 -4
  79. {optimum_rbln-0.1.13.dist-info → optimum_rbln-0.1.15.dist-info}/licenses/LICENSE +0 -0
@@ -22,12 +22,95 @@
22
22
  # from Rebellions Inc.
23
23
 
24
24
 
25
- from typing import Any
25
+ import functools
26
+ import glob
27
+ import os
28
+ from typing import Any, Callable, Dict, Optional
26
29
 
27
30
  import torch
31
+ from safetensors.torch import load_file
28
32
  from torch.nn import Linear, Parameter
29
33
  from torch.nn import functional as F
30
34
 
35
+ from ...utils.logging import get_logger
36
+
37
+
38
+ logger = get_logger()
39
+
40
+ SUPPORTED_QUANTIZATIONS: Dict[str, list[str]] = {
41
+ "rbln": ["w4a16"],
42
+ }
43
+
44
+
45
+ class QuantizationManager:
46
+ # The RBLN_QUANT_BITS environment variable defines the precision of each layer during the graph compilation process.
47
+ # It specifies the quantization bit depth. For instance, setting RBLN_QUANT_BITS=4 will apply 4-bit precision for quantization.
48
+ RBLN_QUANT_BITS_ENV = "RBLN_QUANT_BITS"
49
+
50
+ @staticmethod
51
+ def _raise_invalid_config_error(
52
+ key: str, value: str, valid_values: list[str], context: Optional[str] = None
53
+ ) -> None:
54
+ context_info = f" for {context}" if context else ""
55
+ valid_values_str = ", ".join(valid_values)
56
+ raise ValueError(f"Invalid {key}: {value}{context_info}. " f"Supported values are: {valid_values_str}")
57
+
58
+ @staticmethod
59
+ def validate_quantization_config(quantize_config: Optional[dict]) -> Optional[dict]:
60
+ if not quantize_config:
61
+ return None
62
+
63
+ q_format = quantize_config.get("format")
64
+ q_precision = quantize_config.get("precision")
65
+
66
+ if q_format not in SUPPORTED_QUANTIZATIONS:
67
+ QuantizationManager._raise_invalid_config_error(
68
+ "quantization format", q_format, list(SUPPORTED_QUANTIZATIONS.keys())
69
+ )
70
+
71
+ if q_precision not in SUPPORTED_QUANTIZATIONS[q_format]:
72
+ QuantizationManager._raise_invalid_config_error(
73
+ "precision", q_precision, SUPPORTED_QUANTIZATIONS[q_format], q_format
74
+ )
75
+
76
+ return quantize_config
77
+
78
+ @classmethod
79
+ def _set_env_var(cls, name: str, value: str) -> None:
80
+ os.environ[name] = value
81
+
82
+ @classmethod
83
+ def _unset_env_var(cls, name: str) -> None:
84
+ os.environ.pop(name, None)
85
+
86
+ @classmethod
87
+ def set_quantization_env(cls, quantize_config: Optional[dict]) -> Optional[str]:
88
+ quantize_config = cls.validate_quantization_config(quantize_config)
89
+ if quantize_config:
90
+ q_precision: str = quantize_config["precision"]
91
+ quant_bits = q_precision.split("w")[1].split("a")[0]
92
+ cls._set_env_var(cls.RBLN_QUANT_BITS_ENV, quant_bits)
93
+ return cls.RBLN_QUANT_BITS_ENV
94
+ return None
95
+
96
+ @classmethod
97
+ def reset_quantization_env(cls, env_var_name: Optional[str]) -> None:
98
+ if env_var_name:
99
+ cls._unset_env_var(env_var_name)
100
+
101
+ @classmethod
102
+ def with_quantization_env(cls, func: Callable) -> Callable:
103
+ @functools.wraps(func)
104
+ def wrapper(*args, **kwargs):
105
+ quantize_config = kwargs.get("quantize_config")
106
+ quantize_env_var = cls.set_quantization_env(quantize_config)
107
+ try:
108
+ return func(*args, **kwargs)
109
+ finally:
110
+ cls.reset_quantization_env(quantize_env_var)
111
+
112
+ return wrapper
113
+
31
114
 
32
115
  # Constants
33
116
  QUANTIZED_WEIGHTS = {
@@ -41,7 +124,15 @@ QUANTIZED_WEIGHTS = {
41
124
  }
42
125
 
43
126
 
44
- def update_layers_to_quantized(module: torch.nn.Module) -> None:
127
+ def prepare_model_for_quantization(model: torch.nn.Module, model_id: str, n_layer: Optional[int] = None) -> None:
128
+ """
129
+ Prepare the model for quantization by updating specified linear layers to quantized (qlinear) layers.
130
+ """
131
+ update_layers_to_quantize(model)
132
+ load_weights(model, model_id, n_layer)
133
+
134
+
135
+ def update_layers_to_quantize(module: torch.nn.Module) -> None:
45
136
  """
46
137
  Updates specified linear layers to quantized (qlinear) layers in the given module.
47
138
  """
@@ -54,7 +145,33 @@ def update_layers_to_quantized(module: torch.nn.Module) -> None:
54
145
  processed_layers.append(name)
55
146
 
56
147
  if processed_layers:
57
- print(f"Updated the following linear layers to quantized layers:\n {{{', '.join(processed_layers)}}}")
148
+ logger.debug(f"Updated the following linear layers to quantized layers:\n {{{', '.join(processed_layers)}}}")
149
+
150
+
151
+ def load_weights(model, model_id, n_layer=None):
152
+ """
153
+ Load safetensor file data directly into the model, filtering by layer if n_layer is provided.
154
+ """
155
+
156
+ model_params = dict(model.named_parameters(recurse=True))
157
+ model_buffers = dict(model.named_buffers(recurse=True))
158
+ safetensor_files = glob.glob(f"{model_id}/*.safetensors")
159
+
160
+ target_layers = list(range(n_layer)) if n_layer is not None else None
161
+
162
+ for safetensor_file in safetensor_files:
163
+ file_data = load_file(safetensor_file)
164
+ for key, value in file_data.items():
165
+ if target_layers is not None:
166
+ parts = key.split(".")
167
+
168
+ if len(parts) > 2 and parts[2].isdigit() and (int(parts[2]) not in target_layers):
169
+ continue
170
+
171
+ if key in model_params:
172
+ model_params[key].data.copy_(value)
173
+ elif key in model_buffers:
174
+ model_buffers[key].data.copy_(value)
58
175
 
59
176
 
60
177
  def is_target_for_qlinear_replacement(layer_name: str, layer: torch.nn.Module) -> bool:
@@ -23,13 +23,17 @@ def remove_compile_time_kwargs(func):
23
23
  def wrapper(self, *args, **kwargs):
24
24
  height_exists = "height" in kwargs and kwargs["height"] is not None
25
25
  width_exists = "width" in kwargs and kwargs["width"] is not None
26
+ compiled_image_size = self.vae.image_size
26
27
  if height_exists or width_exists:
27
- logger.warning(
28
- "Image dimension parameters (`height`, `width`) will be ignored during inference. "
29
- "Image dimensions must be specified during model compilation using from_pretrained()."
30
- )
31
- kwargs.pop("width", None)
32
- kwargs.pop("height", None)
28
+ if kwargs["height"] == compiled_image_size[0] and kwargs["width"] == compiled_image_size[1]:
29
+ pass
30
+ else:
31
+ logger.warning(
32
+ "Image dimension parameters (`height`, `width`) will be ignored during inference. "
33
+ "Image dimensions must be specified during model compilation using from_pretrained()."
34
+ )
35
+ kwargs.pop("width", None)
36
+ kwargs.pop("height", None)
33
37
 
34
38
  if "cross_attention_kwargs" in kwargs:
35
39
  cross_attention_kwargs = kwargs.get("cross_attention_kwargs")
@@ -0,0 +1,131 @@
1
+ # Copyright 2024 Rebellions Inc.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # Portions of this software are licensed under the Apache License,
16
+ # Version 2.0. See the NOTICE file distributed with this work for
17
+ # additional information regarding copyright ownership.
18
+
19
+ # All other portions of this software, including proprietary code,
20
+ # are the intellectual property of Rebellions Inc. and may not be
21
+ # copied, modified, or distributed without prior written permission
22
+ # from Rebellions Inc.
23
+
24
+ import os
25
+ from pathlib import Path
26
+ from typing import List, Optional, Union
27
+
28
+ from huggingface_hub import HfApi, HfFolder, hf_hub_download
29
+
30
+
31
+ class PushToHubMixin:
32
+ def push_to_hub(
33
+ self,
34
+ save_directory: str,
35
+ repository_id: str,
36
+ private: Optional[bool] = None,
37
+ use_auth_token: Union[bool, str] = True,
38
+ ) -> str:
39
+ huggingface_token = _get_huggingface_token(use_auth_token)
40
+ api = HfApi()
41
+
42
+ api.create_repo(
43
+ token=huggingface_token,
44
+ repo_id=repository_id,
45
+ exist_ok=True,
46
+ private=private,
47
+ )
48
+ for path, subdirs, files in os.walk(save_directory):
49
+ for name in files:
50
+ local_file_path = os.path.join(path, name)
51
+ _, hub_file_path = os.path.split(local_file_path)
52
+ # FIXME: when huggingface_hub fixes the return of upload_file
53
+ try:
54
+ api.upload_file(
55
+ token=huggingface_token,
56
+ repo_id=f"{repository_id}",
57
+ path_or_fileobj=os.path.join(os.getcwd(), local_file_path),
58
+ path_in_repo=hub_file_path,
59
+ )
60
+ except KeyError:
61
+ pass
62
+ except NameError:
63
+ pass
64
+
65
+
66
+ def pull_compiled_model_from_hub(
67
+ model_id: Union[str, Path],
68
+ subfolder: str,
69
+ use_auth_token: Optional[Union[bool, str]],
70
+ revision: Optional[str],
71
+ cache_dir: Optional[str],
72
+ force_download: bool,
73
+ local_files_only: bool,
74
+ ) -> Path:
75
+ """Pull model files from the Hugging Face Hub."""
76
+ huggingface_token = _get_huggingface_token(use_auth_token)
77
+ repo_files = list(
78
+ map(
79
+ Path,
80
+ HfApi().list_repo_files(model_id, revision=revision, token=huggingface_token),
81
+ )
82
+ )
83
+
84
+ pattern_rbln = "*.rbln" if subfolder == "" else f"{subfolder}/*.rbln"
85
+ rbln_files = [p for p in repo_files if p.match(pattern_rbln)]
86
+
87
+ pattern_config = "rbln_config.json" if subfolder == "" else f"{subfolder}/rbln_config.json"
88
+ rbln_config_filenames = [p for p in repo_files if p.match(pattern_config)]
89
+
90
+ validate_files(rbln_files, rbln_config_filenames, f"repository {model_id}")
91
+
92
+ filenames = [str(path) for path in repo_files]
93
+
94
+ for filename in filenames:
95
+ rbln_config_cache_path = hf_hub_download(
96
+ repo_id=model_id,
97
+ filename=filename,
98
+ subfolder=subfolder,
99
+ use_auth_token=use_auth_token,
100
+ revision=revision,
101
+ cache_dir=cache_dir,
102
+ force_download=force_download,
103
+ local_files_only=local_files_only,
104
+ )
105
+
106
+ return Path(rbln_config_cache_path).parent
107
+
108
+
109
+ def validate_files(
110
+ files: List[Path],
111
+ config_files: List[Path],
112
+ location: str,
113
+ ):
114
+ """Validate the presence and count of required files."""
115
+ if len(files) == 0:
116
+ raise FileNotFoundError(f"Could not find any rbln model file in {location}")
117
+
118
+ if len(config_files) == 0:
119
+ raise FileNotFoundError(f"Could not find `rbln_config.json` file in {location}")
120
+
121
+ if len(config_files) > 1:
122
+ raise FileExistsError(f"Multiple rbln_config.json files found in {location}. This is not expected.")
123
+
124
+
125
+ def _get_huggingface_token(use_auth_token: Union[bool, str]) -> str:
126
+ if isinstance(use_auth_token, str):
127
+ return use_auth_token
128
+ elif use_auth_token:
129
+ return HfFolder.get_token()
130
+ else:
131
+ raise ValueError("`use_auth_token` must be provided to interact with the Hugging Face Hub.")
@@ -37,11 +37,25 @@ class VersionCompat:
37
37
 
38
38
 
39
39
  RBLN_VERSION_COMPATS = {
40
+ "0.1.15": [
41
+ VersionCompat(
42
+ package_name="rebel-compiler",
43
+ min_version="0.6.2",
44
+ max_version="0.6.3",
45
+ ),
46
+ ],
47
+ "0.1.14": [
48
+ VersionCompat(
49
+ package_name="rebel-compiler",
50
+ min_version="0.6.2",
51
+ max_version="0.6.3",
52
+ ),
53
+ ],
40
54
  "0.1.13": [
41
55
  VersionCompat(
42
56
  package_name="rebel-compiler",
43
57
  min_version="0.6.0",
44
- max_version="0.6.1",
58
+ max_version="0.6.2",
45
59
  ),
46
60
  ],
47
61
  "0.1.12": [
@@ -0,0 +1,53 @@
1
+ # Copyright 2024 Rebellions Inc.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # Portions of this software are licensed under the Apache License,
16
+ # Version 2.0. See the NOTICE file distributed with this work for
17
+ # additional information regarding copyright ownership.
18
+
19
+ # All other portions of this software, including proprietary code,
20
+ # are the intellectual property of Rebellions Inc. and may not be
21
+ # copied, modified, or distributed without prior written permission
22
+ # from Rebellions Inc.
23
+
24
+
25
+ # Prefix used for RBLN model class names
26
+ RBLN_PREFIX = "RBLN"
27
+
28
+
29
+ def convert_hf_to_rbln_model_name(hf_model_name: str):
30
+ """
31
+ Convert Hugging Face model name to RBLN model name.
32
+
33
+ Args:
34
+ hf_model_name (str): The Hugging Face model name.
35
+
36
+ Returns:
37
+ str: The corresponding RBLN model name.
38
+ """
39
+ return RBLN_PREFIX + hf_model_name
40
+
41
+
42
+ def convert_rbln_to_hf_model_name(rbln_model_name: str):
43
+ """
44
+ Convert RBLN model name to Hugging Face model name.
45
+
46
+ Args:
47
+ rbln_model_name (str): The RBLN model name.
48
+
49
+ Returns:
50
+ str: The corresponding Hugging Face model name.
51
+ """
52
+
53
+ return rbln_model_name.removeprefix(RBLN_PREFIX)
@@ -43,7 +43,7 @@ class RBLNPytorchRuntime:
43
43
  return self.forward(*args, **kwds)
44
44
 
45
45
  def forward(self, *args: List["torch.Tensor"], **kwargs: Dict[str, "torch.Tensor"]):
46
- # filtering uselss args or kwarg such as None.
46
+ # filtering useless args or kwarg such as None.
47
47
  args = list(filter(lambda arg: isinstance(arg, torch.Tensor), args))
48
48
  kwargs = dict(filter(lambda kwarg: isinstance(kwarg[1], torch.Tensor) or kwarg[0] == "out", kwargs.items()))
49
49
  output = self.runtime(*args, **kwargs)
@@ -0,0 +1,114 @@
1
+ import importlib
2
+ from pathlib import Path
3
+ from typing import TYPE_CHECKING, Any, Dict, List
4
+
5
+ from ..modeling_config import RBLNConfig
6
+
7
+
8
+ if TYPE_CHECKING:
9
+ from transformers import PreTrainedModel
10
+
11
+ from ..modeling_base import RBLNBaseModel
12
+
13
+
14
+ class SubModulesMixin:
15
+ """
16
+ _rbln_submodules = [
17
+ {"name": "vision_tower"},
18
+ {"name": "language_model"},
19
+ ]
20
+ """
21
+
22
+ _rbln_submodules: List[Dict[str, Any]] = []
23
+
24
+ def __init__(
25
+ self,
26
+ *,
27
+ rbln_submodules: List["RBLNBaseModel"] = [],
28
+ **kwargs,
29
+ ) -> None:
30
+ for submodule_meta, submodule in zip(self._rbln_submodules, rbln_submodules):
31
+ setattr(self, submodule_meta["name"], submodule)
32
+
33
+ @classmethod
34
+ def _export_submodules_from_model(
35
+ cls,
36
+ model: "PreTrainedModel",
37
+ model_save_dir: str,
38
+ rbln_kwargs: Dict[str, Any],
39
+ **kwargs,
40
+ ) -> List["RBLNBaseModel"]:
41
+ rbln_submodules = []
42
+ for submodule in cls._rbln_submodules:
43
+ submodule_name = submodule["name"]
44
+ torch_submodule: "PreTrainedModel" = getattr(model, submodule["name"])
45
+ cls_name = torch_submodule.__class__.__name__
46
+ submodule_cls: "RBLNBaseModel" = getattr(importlib.import_module("optimum.rbln"), f"RBLN{cls_name}")
47
+
48
+ if submodule_name in rbln_kwargs:
49
+ kwargs["rbln_config"] = rbln_kwargs[submodule_name]
50
+
51
+ rbln_submodule = submodule_cls.from_model(
52
+ model=torch_submodule,
53
+ subfolder=submodule_name,
54
+ model_save_dir=model_save_dir,
55
+ **kwargs,
56
+ )
57
+
58
+ rbln_submodules.append(rbln_submodule)
59
+
60
+ return rbln_submodules
61
+
62
+ @classmethod
63
+ def _load_submodules_from_compiled_models(
64
+ cls,
65
+ model_save_dir: str,
66
+ rbln_kwargs: Dict[str, Any],
67
+ **kwargs,
68
+ ):
69
+ rbln_submodules = []
70
+ for submodule in cls._rbln_submodules:
71
+ submodule_name = submodule["name"]
72
+
73
+ if submodule_name in rbln_kwargs:
74
+ kwargs["rbln_config"] = rbln_kwargs[submodule_name]
75
+
76
+ # Get cls name for call the constructor of the rbln class
77
+ submodule_rbln_config = RBLNConfig.load(Path(model_save_dir) / submodule_name)
78
+ submodule_cls_name = submodule_rbln_config.meta["cls"]
79
+ submodule_cls: "RBLNBaseModel" = getattr(importlib.import_module("optimum.rbln"), submodule_cls_name)
80
+
81
+ rbln_submodule = submodule_cls._from_pretrained(
82
+ model_id=model_save_dir,
83
+ config=None,
84
+ subfolder=submodule_name,
85
+ **kwargs,
86
+ )
87
+ rbln_submodules.append(rbln_submodule)
88
+ return rbln_submodules
89
+
90
+ @classmethod
91
+ def _load_submodules(
92
+ cls,
93
+ model_save_dir,
94
+ rbln_kwargs,
95
+ model=None,
96
+ **kwargs,
97
+ ):
98
+ # Two ways :
99
+ # 1. Compile from pytorch object
100
+ # 2. Load from compiled file
101
+ if model is not None:
102
+ return cls._export_submodules_from_model(
103
+ model=model,
104
+ model_save_dir=model_save_dir,
105
+ rbln_kwargs=rbln_kwargs,
106
+ **kwargs,
107
+ )
108
+
109
+ else:
110
+ return cls._load_submodules_from_compiled_models(
111
+ model_save_dir=model_save_dir,
112
+ rbln_kwargs=rbln_kwargs,
113
+ **kwargs,
114
+ )
@@ -0,0 +1,106 @@
1
+ Metadata-Version: 2.3
2
+ Name: optimum-rbln
3
+ Version: 0.1.15
4
+ Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
+ Project-URL: Homepage, https://rebellions.ai
6
+ Project-URL: Documentation, https://docs.rbln.ai
7
+ Author-email: "Rebellions Inc." <support@rebellions.ai>
8
+ License: Apache
9
+ Keywords: atom,diffusers,inference,rbln,rebel,transformers
10
+ Classifier: Development Status :: 2 - Pre-Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Education
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: POSIX :: Linux
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: <3.13,>=3.9
23
+ Requires-Dist: accelerate>=1.0.1
24
+ Requires-Dist: diffusers<=0.31.0
25
+ Requires-Dist: einops>=0.8.0
26
+ Requires-Dist: halo>=0.0.31
27
+ Requires-Dist: packaging>=24.1
28
+ Requires-Dist: torch<=2.5.1
29
+ Requires-Dist: torchaudio<=2.5.1
30
+ Requires-Dist: torchvision<=0.20.1
31
+ Requires-Dist: transformers==4.45.2
32
+ Description-Content-Type: text/markdown
33
+
34
+
35
+ # Optimum RBLN
36
+
37
+ <div align="center">
38
+
39
+ <img src="assets/rbln_logo.png" width="60%"/>
40
+
41
+ [![PyPI version](https://badge.fury.io/py/optimum-rbln.svg)](https://badge.fury.io/py/optimum-rbln)
42
+ [![License](https://img.shields.io/github/license/rebellions-sw/optimum-rbln)](https://github.com/rebellions-sw/optimum-rbln/blob/main/LICENSE)
43
+
44
+ </div>
45
+
46
+ 🤗 Optimum RBLN provides an interface between Hugging Face libraries ([Transformers](https://huggingface.co/docs/transformers), [Diffusers](https://huggingface.co/docs/diffusers/index)) and RBLN Accelerators, including [ATOM](https://rebellions.ai/rebellions-product/rbln-ca25/) and [REBEL](https://rebellions.ai/rebellions-product/rebel/).
47
+
48
+ This library enables seamless integration between the Hugging Face ecosystem and RBLN's NPU acceleration through a comprehensive toolkit for model loading and inference across single- and multi-Accelerator environments. While we maintain a list of [officially validated models and tasks](https://docs.rbln.ai/software/optimum/optimum_rbln.html), users can easily adapt other models and tasks with minimal modifications.
49
+
50
+ ## Key Features
51
+
52
+ 🚀 **High Performance Inference**
53
+ - Optimized model execution on RBLN NPUs through RBLN SDK compilation
54
+ - Support for both single-NPU and multi-NPU inference
55
+ - Integrated with RBLN Runtime for optimal performance
56
+
57
+ 🔧 **Easy Integration**
58
+ - Seamless compatibility with Huggingface model hub
59
+ - Drop-in replacement for existing Huggingface pipelines
60
+ - Minimal code changes required for NPU acceleration
61
+
62
+
63
+ ## Documentation
64
+
65
+ Check out [the documentation of Optimum RBLN](https://docs.rbln.ai/software/optimum/optimum_rbln.html) for more advanced usage.
66
+
67
+ ## Getting Started
68
+
69
+ ### Install from PyPI
70
+
71
+ To install the latest release of this package:
72
+
73
+ - Export environment variables to access to RBLN private PyPI.
74
+ ```bash
75
+ export REBEL_PYPI_USERNAME=<username>
76
+ export REBEL_PYPI_PASSWORD=<password>
77
+ ```
78
+
79
+ - Install optimum-rbln package:
80
+ ```bash
81
+ pip install --index-url https://pypi.rebellions.in/simple optimum-rbln
82
+ ```
83
+
84
+ ### Install from source
85
+
86
+ #### Prerequisites
87
+
88
+ - Install [uv](https://docs.astral.sh/uv/) (refer [this link](https://docs.astral.sh/uv/getting-started/installation/) for detailed commands)
89
+
90
+ The below command installs optimum-rbln along with its dependencies.
91
+
92
+ ```bash
93
+ git clone https://github.com/rebellions-sw/optimum-rbln.git
94
+ cd optimum-rbln
95
+ ./scripts/uv-sync.sh
96
+ ```
97
+
98
+ If you want to install local rebel-compiler as editable mode in uv environment,
99
+ ```bash
100
+ uv pip install -e /path/to/rebel_compiler/python
101
+ ```
102
+
103
+ ### Need Help?
104
+
105
+ - Join our [Developer Community](https://discuss.rebellions.ai/)
106
+ - Contact maintainers at [support@rebellions.ai](mailto:support@rebellions.ai)