optimum-rbln 0.9.3.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of optimum-rbln might be problematic. Click here for more details.
- optimum/rbln/__init__.py +505 -0
- optimum/rbln/__version__.py +34 -0
- optimum/rbln/cli.py +660 -0
- optimum/rbln/configuration_utils.py +968 -0
- optimum/rbln/diffusers/__init__.py +198 -0
- optimum/rbln/diffusers/configurations/__init__.py +37 -0
- optimum/rbln/diffusers/configurations/models/__init__.py +10 -0
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +73 -0
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +84 -0
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_temporal_decoder.py +67 -0
- optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +64 -0
- optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +59 -0
- optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +78 -0
- optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +63 -0
- optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +81 -0
- optimum/rbln/diffusers/configurations/models/configuration_unet_spatio_temporal_condition.py +59 -0
- optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +74 -0
- optimum/rbln/diffusers/configurations/pipelines/__init__.py +34 -0
- optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +316 -0
- optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +117 -0
- optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +363 -0
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +156 -0
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +176 -0
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +159 -0
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_video_diffusion.py +114 -0
- optimum/rbln/diffusers/modeling_diffusers.py +451 -0
- optimum/rbln/diffusers/models/__init__.py +64 -0
- optimum/rbln/diffusers/models/autoencoders/__init__.py +18 -0
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +255 -0
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +245 -0
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +275 -0
- optimum/rbln/diffusers/models/autoencoders/vae.py +178 -0
- optimum/rbln/diffusers/models/autoencoders/vq_model.py +211 -0
- optimum/rbln/diffusers/models/controlnet.py +281 -0
- optimum/rbln/diffusers/models/transformers/__init__.py +17 -0
- optimum/rbln/diffusers/models/transformers/prior_transformer.py +160 -0
- optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +344 -0
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py +191 -0
- optimum/rbln/diffusers/models/unets/__init__.py +16 -0
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +408 -0
- optimum/rbln/diffusers/models/unets/unet_spatio_temporal_condition.py +201 -0
- optimum/rbln/diffusers/pipelines/__init__.py +113 -0
- optimum/rbln/diffusers/pipelines/auto_pipeline.py +307 -0
- optimum/rbln/diffusers/pipelines/controlnet/__init__.py +19 -0
- optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +139 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +669 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +640 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +825 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +837 -0
- optimum/rbln/diffusers/pipelines/cosmos/__init__.py +17 -0
- optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py +113 -0
- optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +425 -0
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +128 -0
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +128 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +23 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +34 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +207 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +34 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +34 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +31 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +17 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +32 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +31 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +31 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +17 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +31 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +31 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +31 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +17 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +31 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +31 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +31 -0
- optimum/rbln/diffusers/pipelines/stable_video_diffusion/__init__.py +15 -0
- optimum/rbln/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +46 -0
- optimum/rbln/modeling.py +364 -0
- optimum/rbln/modeling_base.py +637 -0
- optimum/rbln/ops/__init__.py +19 -0
- optimum/rbln/ops/attn.py +455 -0
- optimum/rbln/ops/flash_attn.py +350 -0
- optimum/rbln/ops/kv_cache_update.py +29 -0
- optimum/rbln/ops/linear.py +32 -0
- optimum/rbln/ops/sliding_window_attn.py +111 -0
- optimum/rbln/transformers/__init__.py +340 -0
- optimum/rbln/transformers/configuration_generic.py +120 -0
- optimum/rbln/transformers/modeling_attention_utils.py +385 -0
- optimum/rbln/transformers/modeling_generic.py +280 -0
- optimum/rbln/transformers/modeling_outputs.py +37 -0
- optimum/rbln/transformers/modeling_rope_utils.py +314 -0
- optimum/rbln/transformers/models/__init__.py +343 -0
- optimum/rbln/transformers/models/audio_spectrogram_transformer/__init__.py +17 -0
- optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +47 -0
- optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +91 -0
- optimum/rbln/transformers/models/auto/__init__.py +31 -0
- optimum/rbln/transformers/models/auto/auto_factory.py +267 -0
- optimum/rbln/transformers/models/auto/modeling_auto.py +162 -0
- optimum/rbln/transformers/models/bart/__init__.py +17 -0
- optimum/rbln/transformers/models/bart/bart_architecture.py +163 -0
- optimum/rbln/transformers/models/bart/configuration_bart.py +36 -0
- optimum/rbln/transformers/models/bart/modeling_bart.py +86 -0
- optimum/rbln/transformers/models/bert/__init__.py +16 -0
- optimum/rbln/transformers/models/bert/bert_architecture.py +16 -0
- optimum/rbln/transformers/models/bert/configuration_bert.py +46 -0
- optimum/rbln/transformers/models/bert/modeling_bert.py +148 -0
- optimum/rbln/transformers/models/blip_2/__init__.py +20 -0
- optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +115 -0
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +526 -0
- optimum/rbln/transformers/models/clip/__init__.py +26 -0
- optimum/rbln/transformers/models/clip/configuration_clip.py +103 -0
- optimum/rbln/transformers/models/clip/modeling_clip.py +384 -0
- optimum/rbln/transformers/models/colpali/__init__.py +2 -0
- optimum/rbln/transformers/models/colpali/colpali_architecture.py +218 -0
- optimum/rbln/transformers/models/colpali/configuration_colpali.py +84 -0
- optimum/rbln/transformers/models/colpali/modeling_colpali.py +361 -0
- optimum/rbln/transformers/models/colqwen2/__init__.py +2 -0
- optimum/rbln/transformers/models/colqwen2/colqwen2_architecture.py +233 -0
- optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +74 -0
- optimum/rbln/transformers/models/colqwen2/modeling_colqwen2.py +446 -0
- optimum/rbln/transformers/models/decoderonly/__init__.py +27 -0
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +300 -0
- optimum/rbln/transformers/models/decoderonly/configuration_lora.py +411 -0
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +1224 -0
- optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +508 -0
- optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +119 -0
- optimum/rbln/transformers/models/decoderonly/lora_architecture.py +204 -0
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +823 -0
- optimum/rbln/transformers/models/depth_anything/__init__.py +16 -0
- optimum/rbln/transformers/models/depth_anything/configuration_depth_anything.py +24 -0
- optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py +42 -0
- optimum/rbln/transformers/models/distilbert/__init__.py +19 -0
- optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +24 -0
- optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +51 -0
- optimum/rbln/transformers/models/dpt/__init__.py +16 -0
- optimum/rbln/transformers/models/dpt/configuration_dpt.py +24 -0
- optimum/rbln/transformers/models/dpt/modeling_dpt.py +42 -0
- optimum/rbln/transformers/models/exaone/__init__.py +24 -0
- optimum/rbln/transformers/models/exaone/configuration_exaone.py +42 -0
- optimum/rbln/transformers/models/exaone/exaone_architecture.py +77 -0
- optimum/rbln/transformers/models/exaone/modeling_exaone.py +145 -0
- optimum/rbln/transformers/models/gemma/__init__.py +16 -0
- optimum/rbln/transformers/models/gemma/configuration_gemma.py +50 -0
- optimum/rbln/transformers/models/gemma/gemma_architecture.py +27 -0
- optimum/rbln/transformers/models/gemma/modeling_gemma.py +104 -0
- optimum/rbln/transformers/models/gemma3/__init__.py +16 -0
- optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +109 -0
- optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +170 -0
- optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py +245 -0
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +611 -0
- optimum/rbln/transformers/models/gpt2/__init__.py +16 -0
- optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +50 -0
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +93 -0
- optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +55 -0
- optimum/rbln/transformers/models/grounding_dino/__init__.py +10 -0
- optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +92 -0
- optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +599 -0
- optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py +1048 -0
- optimum/rbln/transformers/models/idefics3/__init__.py +16 -0
- optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +89 -0
- optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +497 -0
- optimum/rbln/transformers/models/llama/__init__.py +16 -0
- optimum/rbln/transformers/models/llama/configuration_llama.py +50 -0
- optimum/rbln/transformers/models/llama/llama_architecture.py +19 -0
- optimum/rbln/transformers/models/llama/modeling_llama.py +104 -0
- optimum/rbln/transformers/models/llava/__init__.py +16 -0
- optimum/rbln/transformers/models/llava/configuration_llava.py +72 -0
- optimum/rbln/transformers/models/llava/modeling_llava.py +490 -0
- optimum/rbln/transformers/models/llava_next/__init__.py +16 -0
- optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +69 -0
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +493 -0
- optimum/rbln/transformers/models/midm/__init__.py +24 -0
- optimum/rbln/transformers/models/midm/configuration_midm.py +42 -0
- optimum/rbln/transformers/models/midm/midm_architecture.py +144 -0
- optimum/rbln/transformers/models/midm/modeling_midm.py +144 -0
- optimum/rbln/transformers/models/mistral/__init__.py +16 -0
- optimum/rbln/transformers/models/mistral/configuration_mistral.py +50 -0
- optimum/rbln/transformers/models/mistral/mistral_architecture.py +19 -0
- optimum/rbln/transformers/models/mistral/modeling_mistral.py +115 -0
- optimum/rbln/transformers/models/opt/__init__.py +16 -0
- optimum/rbln/transformers/models/opt/configuration_opt.py +29 -0
- optimum/rbln/transformers/models/opt/modeling_opt.py +102 -0
- optimum/rbln/transformers/models/opt/opt_architecture.py +74 -0
- optimum/rbln/transformers/models/pegasus/__init__.py +17 -0
- optimum/rbln/transformers/models/pegasus/configuration_pegasus.py +38 -0
- optimum/rbln/transformers/models/pegasus/modeling_pegasus.py +71 -0
- optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +161 -0
- optimum/rbln/transformers/models/phi/__init__.py +16 -0
- optimum/rbln/transformers/models/phi/configuration_phi.py +50 -0
- optimum/rbln/transformers/models/phi/modeling_phi.py +92 -0
- optimum/rbln/transformers/models/phi/phi_architecture.py +115 -0
- optimum/rbln/transformers/models/pixtral/__init__.py +16 -0
- optimum/rbln/transformers/models/pixtral/configuration_pixtral.py +43 -0
- optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +322 -0
- optimum/rbln/transformers/models/pixtral/pixtral_architecture.py +73 -0
- optimum/rbln/transformers/models/qwen2/__init__.py +16 -0
- optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +50 -0
- optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +123 -0
- optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +19 -0
- optimum/rbln/transformers/models/qwen2_5_vl/__init__.py +19 -0
- optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +111 -0
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +636 -0
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +220 -0
- optimum/rbln/transformers/models/qwen2_vl/__init__.py +19 -0
- optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +88 -0
- optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +513 -0
- optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +165 -0
- optimum/rbln/transformers/models/qwen3/__init__.py +16 -0
- optimum/rbln/transformers/models/qwen3/configuration_qwen3.py +71 -0
- optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +133 -0
- optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +31 -0
- optimum/rbln/transformers/models/resnet/__init__.py +23 -0
- optimum/rbln/transformers/models/resnet/configuration_resnet.py +42 -0
- optimum/rbln/transformers/models/resnet/modeling_resnet.py +99 -0
- optimum/rbln/transformers/models/roberta/__init__.py +24 -0
- optimum/rbln/transformers/models/roberta/configuration_roberta.py +33 -0
- optimum/rbln/transformers/models/roberta/modeling_roberta.py +72 -0
- optimum/rbln/transformers/models/seq2seq/__init__.py +16 -0
- optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +71 -0
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +477 -0
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +527 -0
- optimum/rbln/transformers/models/siglip/__init__.py +16 -0
- optimum/rbln/transformers/models/siglip/configuration_siglip.py +76 -0
- optimum/rbln/transformers/models/siglip/modeling_siglip.py +199 -0
- optimum/rbln/transformers/models/swin/__init__.py +16 -0
- optimum/rbln/transformers/models/swin/configuration_swin.py +42 -0
- optimum/rbln/transformers/models/swin/modeling_swin.py +354 -0
- optimum/rbln/transformers/models/t5/__init__.py +17 -0
- optimum/rbln/transformers/models/t5/configuration_t5.py +36 -0
- optimum/rbln/transformers/models/t5/modeling_t5.py +130 -0
- optimum/rbln/transformers/models/t5/t5_architecture.py +264 -0
- optimum/rbln/transformers/models/time_series_transformer/__init__.py +26 -0
- optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +41 -0
- optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +435 -0
- optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +337 -0
- optimum/rbln/transformers/models/vit/__init__.py +19 -0
- optimum/rbln/transformers/models/vit/configuration_vit.py +24 -0
- optimum/rbln/transformers/models/vit/modeling_vit.py +44 -0
- optimum/rbln/transformers/models/wav2vec2/__init__.py +16 -0
- optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +38 -0
- optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +104 -0
- optimum/rbln/transformers/models/whisper/__init__.py +17 -0
- optimum/rbln/transformers/models/whisper/configuration_whisper.py +72 -0
- optimum/rbln/transformers/models/whisper/generation_whisper.py +159 -0
- optimum/rbln/transformers/models/whisper/modeling_whisper.py +475 -0
- optimum/rbln/transformers/models/whisper/whisper_architecture.py +349 -0
- optimum/rbln/transformers/models/xlm_roberta/__init__.py +24 -0
- optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +32 -0
- optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +82 -0
- optimum/rbln/transformers/utils/__init__.py +0 -0
- optimum/rbln/transformers/utils/rbln_quantization.py +589 -0
- optimum/rbln/transformers/utils/rbln_runtime_wrapper.py +79 -0
- optimum/rbln/utils/__init__.py +16 -0
- optimum/rbln/utils/decorator_utils.py +86 -0
- optimum/rbln/utils/deprecation.py +213 -0
- optimum/rbln/utils/hub.py +94 -0
- optimum/rbln/utils/import_utils.py +170 -0
- optimum/rbln/utils/logging.py +110 -0
- optimum/rbln/utils/model_utils.py +63 -0
- optimum/rbln/utils/runtime_utils.py +249 -0
- optimum/rbln/utils/save_utils.py +102 -0
- optimum/rbln/utils/submodule.py +152 -0
- optimum_rbln-0.9.3.post1.dist-info/METADATA +124 -0
- optimum_rbln-0.9.3.post1.dist-info/RECORD +264 -0
- optimum_rbln-0.9.3.post1.dist-info/WHEEL +4 -0
- optimum_rbln-0.9.3.post1.dist-info/entry_points.txt +2 -0
- optimum_rbln-0.9.3.post1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at:
|
|
6
|
+
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import importlib
|
|
16
|
+
from typing import TYPE_CHECKING, Type
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from ..modeling import RBLNModel
|
|
21
|
+
|
|
22
|
+
# Prefix used for RBLN model class names
|
|
23
|
+
RBLN_PREFIX = "RBLN"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
MODEL_MAPPING = {}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def convert_hf_to_rbln_model_name(hf_model_name: str):
|
|
30
|
+
"""
|
|
31
|
+
Convert HuggingFace model name to RBLN model name.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
hf_model_name (str): The HuggingFace model name.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
str: The corresponding RBLN model name.
|
|
38
|
+
"""
|
|
39
|
+
return RBLN_PREFIX + hf_model_name
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def convert_rbln_to_hf_model_name(rbln_model_name: str):
|
|
43
|
+
"""
|
|
44
|
+
Convert RBLN model name to HuggingFace model name.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
rbln_model_name (str): The RBLN model name.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
str: The corresponding HuggingFace model name.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
return rbln_model_name.removeprefix(RBLN_PREFIX)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_rbln_model_cls(cls_name: str) -> Type["RBLNModel"]:
|
|
57
|
+
cls = getattr(importlib.import_module("optimum.rbln"), cls_name, None)
|
|
58
|
+
if cls is None:
|
|
59
|
+
if cls_name in MODEL_MAPPING:
|
|
60
|
+
cls = MODEL_MAPPING[cls_name]
|
|
61
|
+
else:
|
|
62
|
+
raise AttributeError(f"RBLNModel for {cls_name} not found.")
|
|
63
|
+
return cls
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at:
|
|
6
|
+
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
import threading
|
|
17
|
+
from typing import Any, List, Optional, Union
|
|
18
|
+
|
|
19
|
+
import rebel
|
|
20
|
+
import torch
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_available_dram(npu: Optional[str] = None) -> int:
|
|
24
|
+
"""
|
|
25
|
+
Get the available DRAM size of the specified NPU.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
npu : Optional[str], default=None
|
|
29
|
+
The NPU to get the available DRAM size.
|
|
30
|
+
If None, the function will attempt to retrieve through `ensure_valid_npu()`
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
int
|
|
34
|
+
The available DRAM size in bytes.
|
|
35
|
+
"""
|
|
36
|
+
if npu is None:
|
|
37
|
+
if not rebel.npu_is_available(0):
|
|
38
|
+
raise RuntimeError("No NPU is available to get available DRAM size.")
|
|
39
|
+
|
|
40
|
+
npu = rebel.get_npu_name(0)
|
|
41
|
+
|
|
42
|
+
if npu.startswith("RBLN-CR"):
|
|
43
|
+
# TODO(jongho): Assuming 4 chiplets.
|
|
44
|
+
DRAM_NBYTES = 144 * 2**30
|
|
45
|
+
SYS_DRAM_NBYTES = 4 * 2**30
|
|
46
|
+
elif npu.startswith("RBLN-CA"):
|
|
47
|
+
DRAM_NBYTES = 16 * 2**30
|
|
48
|
+
SYS_DRAM_NBYTES = 288 * 2**20
|
|
49
|
+
else:
|
|
50
|
+
raise ValueError(f"Unknown npu name: {npu}")
|
|
51
|
+
|
|
52
|
+
return DRAM_NBYTES - SYS_DRAM_NBYTES
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def normalize_npu(npu: str) -> str:
|
|
56
|
+
"""Normalize the NPU string by removing the form factor."""
|
|
57
|
+
match = re.match(r"(RBLN-CA|RBLN-CR)(\d+)", npu)
|
|
58
|
+
if match:
|
|
59
|
+
prefix, num = match.groups()
|
|
60
|
+
if len(num) == 1:
|
|
61
|
+
# Convert "RBLN-CAx" → "RBLN-CA0"
|
|
62
|
+
# (e.g., "RBLN-CA2" -> "RBLN-CA0")
|
|
63
|
+
npu = f"{prefix}0"
|
|
64
|
+
elif len(num) == 2:
|
|
65
|
+
# Strip form factor (e.g., "RBLN-CA15" → "RBLN-CA1")
|
|
66
|
+
npu = f"{prefix}{num[:-1]}"
|
|
67
|
+
return npu
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def tp_and_devices_are_ok(
|
|
71
|
+
tensor_parallel_size: Optional[int] = None,
|
|
72
|
+
device: Optional[Union[int, List[int]]] = None,
|
|
73
|
+
npu: Optional[str] = None,
|
|
74
|
+
) -> Optional[str]:
|
|
75
|
+
if tensor_parallel_size is None:
|
|
76
|
+
tensor_parallel_size = 1
|
|
77
|
+
|
|
78
|
+
if rebel.device_count() < tensor_parallel_size:
|
|
79
|
+
return (
|
|
80
|
+
f"Tensor parallel size {tensor_parallel_size} is greater than "
|
|
81
|
+
f"the number of available devices {rebel.device_count()}."
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
if device is None:
|
|
85
|
+
device = list(range(tensor_parallel_size))
|
|
86
|
+
elif isinstance(device, int):
|
|
87
|
+
device = [device]
|
|
88
|
+
elif isinstance(device, list):
|
|
89
|
+
if any(not isinstance(d, int) for d in device):
|
|
90
|
+
return "Device must be a(n) (list of) integer(s)."
|
|
91
|
+
if len(device) != tensor_parallel_size:
|
|
92
|
+
return (
|
|
93
|
+
f"The number of devices ({len(device)}) does not match tensor parallel size ({tensor_parallel_size})."
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
return f"Invalid device: {device}"
|
|
97
|
+
|
|
98
|
+
for device_id in device:
|
|
99
|
+
if device_id < 0: # if any device is dummy device, skip it
|
|
100
|
+
return None
|
|
101
|
+
if rebel.get_npu_name(device_id) is None:
|
|
102
|
+
return (
|
|
103
|
+
f"Device {device_id} is not a valid NPU device. Please check your NPU status with 'rbln-stat' command."
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
if npu is not None:
|
|
107
|
+
for device_id in device:
|
|
108
|
+
npu_name = rebel.get_npu_name(device_id)
|
|
109
|
+
if normalize_npu(npu_name) != normalize_npu(npu):
|
|
110
|
+
return f"Device {device_id} ({npu_name}) is not on the same NPU as {npu}."
|
|
111
|
+
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class RBLNPytorchRuntime:
|
|
116
|
+
mandatory_members = []
|
|
117
|
+
|
|
118
|
+
def __init__(self, runtime: rebel.Runtime, **kwargs) -> None:
|
|
119
|
+
self.runtime = runtime
|
|
120
|
+
for key, value in kwargs.items():
|
|
121
|
+
setattr(self, key, value)
|
|
122
|
+
for mandatory_member in self.mandatory_members:
|
|
123
|
+
if mandatory_member not in kwargs:
|
|
124
|
+
raise AttributeError(f"`{mandatory_member}` should be assigned to {self.__class__.__name__} objects.")
|
|
125
|
+
|
|
126
|
+
def __call__(self, *args: Any, **kwds: Any) -> Any:
|
|
127
|
+
return self.forward(*args, **kwds)
|
|
128
|
+
|
|
129
|
+
def forward(self, *args: List["torch.Tensor"], **kwargs: "torch.Tensor"):
|
|
130
|
+
# filtering useless args or kwarg such as None.
|
|
131
|
+
args = list(filter(lambda arg: isinstance(arg, torch.Tensor), args))
|
|
132
|
+
kwargs = dict(filter(lambda kwarg: isinstance(kwarg[1], torch.Tensor) or kwarg[0] == "out", kwargs.items()))
|
|
133
|
+
output = self.runtime(*args, **kwargs)
|
|
134
|
+
return output
|
|
135
|
+
|
|
136
|
+
def __repr__(self) -> str:
|
|
137
|
+
return repr(self.runtime)
|
|
138
|
+
|
|
139
|
+
def parameters(self):
|
|
140
|
+
yield torch.tensor([1.0], dtype=torch.float32, device=torch.device("cpu"))
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class UnavailableRuntime:
|
|
144
|
+
"""
|
|
145
|
+
A placeholder class used when model runtimes are not created.
|
|
146
|
+
|
|
147
|
+
This class is returned by RBLNBaseModel._from_compiled_models when rbln_config.create_runtimes=False.
|
|
148
|
+
It provides proper error messages when users attempt to use a model that was loaded without
|
|
149
|
+
runtime creation.
|
|
150
|
+
|
|
151
|
+
Usage:
|
|
152
|
+
1. When compiling models on machines without NPU hardware
|
|
153
|
+
2. When preparing models for later deployment
|
|
154
|
+
3. When only model compilation is needed, not inference
|
|
155
|
+
|
|
156
|
+
To use a model with runtimes, either:
|
|
157
|
+
- Load the model with from_pretrained(..., rbln_create_runtimes=True)
|
|
158
|
+
- Or set rbln_config={"create_runtimes": True} during loading
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
162
|
+
"""Raises a RuntimeError when the model is called without runtimes."""
|
|
163
|
+
raise self.forward(*args, **kwargs)
|
|
164
|
+
|
|
165
|
+
def __len__(self) -> int:
|
|
166
|
+
"""Returns 0 since no runtimes are available."""
|
|
167
|
+
return 0
|
|
168
|
+
|
|
169
|
+
def __getitem__(self, idx: int) -> Any:
|
|
170
|
+
"""Returns self for any index, allowing iteration to work with appropriate errors."""
|
|
171
|
+
return self
|
|
172
|
+
|
|
173
|
+
def __iter__(self):
|
|
174
|
+
"""Returns an iterator with self as the only item."""
|
|
175
|
+
return iter([self])
|
|
176
|
+
|
|
177
|
+
def forward(self, *args: List["torch.Tensor"], **kwargs: "torch.Tensor"):
|
|
178
|
+
"""Raises a detailed RuntimeError explaining why inference cannot be performed."""
|
|
179
|
+
raise RuntimeError(
|
|
180
|
+
"Cannot perform inference: RBLN runtime is not available.\n\n"
|
|
181
|
+
"This model was loaded with create_runtimes=False. To use this model for inference:\n"
|
|
182
|
+
"1. Load the model with runtime creation enabled:\n"
|
|
183
|
+
" model = RBLNModel.from_pretrained(..., rbln_create_runtimes=True)\n"
|
|
184
|
+
"2. Ensure your NPU hardware is properly configured (check with 'rbln-stat' command)\n"
|
|
185
|
+
"3. If you're on a machine without NPU hardware, you need to transfer the model files\n"
|
|
186
|
+
" to a compatible system with NPU support."
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
def __repr__(self) -> str:
|
|
190
|
+
"""Returns a detailed string representation of the UnavailableRuntime."""
|
|
191
|
+
return "<UnavailableRuntime: Model loaded without runtime creation (create_runtimes=False)>"
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class ContextRblnConfig:
|
|
195
|
+
_local = threading.local()
|
|
196
|
+
|
|
197
|
+
def __init__(
|
|
198
|
+
self,
|
|
199
|
+
device=None,
|
|
200
|
+
device_map=None,
|
|
201
|
+
create_runtimes=None,
|
|
202
|
+
activate_profiler=None,
|
|
203
|
+
timeout=None,
|
|
204
|
+
):
|
|
205
|
+
self.device = device
|
|
206
|
+
self.device_map = device_map
|
|
207
|
+
self.create_runtimes = create_runtimes
|
|
208
|
+
self.activate_profiler = activate_profiler
|
|
209
|
+
self.timeout = timeout
|
|
210
|
+
self._previous_context = None
|
|
211
|
+
|
|
212
|
+
def __enter__(self):
|
|
213
|
+
self._previous_context = {
|
|
214
|
+
"device": getattr(self._local, "device", None),
|
|
215
|
+
"device_map": getattr(self._local, "device_map", None),
|
|
216
|
+
"create_runtimes": getattr(self._local, "create_runtimes", None),
|
|
217
|
+
"activate_profiler": getattr(self._local, "activate_profiler", None),
|
|
218
|
+
"timeout": getattr(self._local, "timeout", None),
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if self.device is not None:
|
|
222
|
+
self._local.device = self.device
|
|
223
|
+
if self.device_map is not None:
|
|
224
|
+
self._local.device_map = self.device_map
|
|
225
|
+
if self.create_runtimes is not None:
|
|
226
|
+
self._local.create_runtimes = self.create_runtimes
|
|
227
|
+
if self.activate_profiler is not None:
|
|
228
|
+
self._local.activate_profiler = self.activate_profiler
|
|
229
|
+
if self.timeout is not None:
|
|
230
|
+
self._local.timeout = self.timeout
|
|
231
|
+
return self
|
|
232
|
+
|
|
233
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
234
|
+
if self._previous_context is not None:
|
|
235
|
+
self._local.device = self._previous_context["device"]
|
|
236
|
+
self._local.device_map = self._previous_context["device_map"]
|
|
237
|
+
self._local.create_runtimes = self._previous_context["create_runtimes"]
|
|
238
|
+
self._local.activate_profiler = self._previous_context["activate_profiler"]
|
|
239
|
+
self._local.timeout = self._previous_context["timeout"]
|
|
240
|
+
|
|
241
|
+
@classmethod
|
|
242
|
+
def get_current_context(cls):
|
|
243
|
+
return {
|
|
244
|
+
"device": getattr(cls._local, "device", None),
|
|
245
|
+
"device_map": getattr(cls._local, "device_map", None),
|
|
246
|
+
"create_runtimes": getattr(cls._local, "create_runtimes", None),
|
|
247
|
+
"activate_profiler": getattr(cls._local, "activate_profiler", None),
|
|
248
|
+
"timeout": getattr(cls._local, "timeout", None),
|
|
249
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
|
16
|
+
|
|
17
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
18
|
+
# you may not use this file except in compliance with the License.
|
|
19
|
+
# You may obtain a copy of the License at:
|
|
20
|
+
|
|
21
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
22
|
+
|
|
23
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
24
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
25
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
26
|
+
# See the License for the specific language governing permissions and
|
|
27
|
+
# limitations under the License.
|
|
28
|
+
|
|
29
|
+
"""
|
|
30
|
+
Refer to huggingface/optimum/blob/4fdeea77d71e79451ba53e0c1f9d8f37e9704268/optimum/utils/save_utils.py
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
from typing import List, Union
|
|
35
|
+
|
|
36
|
+
from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
|
|
37
|
+
|
|
38
|
+
from .logging import get_logger
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
logger = get_logger(__name__)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def maybe_load_preprocessors(
|
|
45
|
+
src_name_or_path: Union[str, Path], subfolder: str = "", trust_remote_code: bool = False
|
|
46
|
+
) -> List:
|
|
47
|
+
preprocessors = []
|
|
48
|
+
try:
|
|
49
|
+
preprocessors.append(
|
|
50
|
+
AutoTokenizer.from_pretrained(src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code)
|
|
51
|
+
)
|
|
52
|
+
except Exception:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
preprocessors.append(
|
|
57
|
+
AutoProcessor.from_pretrained(src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code)
|
|
58
|
+
)
|
|
59
|
+
except Exception:
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
preprocessors.append(
|
|
64
|
+
AutoFeatureExtractor.from_pretrained(
|
|
65
|
+
src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
except Exception:
|
|
69
|
+
pass
|
|
70
|
+
return preprocessors
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def maybe_save_preprocessors(
|
|
74
|
+
src_name_or_path: Union[str, Path],
|
|
75
|
+
dest_dir: Union[str, Path],
|
|
76
|
+
src_subfolder: str = "",
|
|
77
|
+
trust_remote_code: bool = False,
|
|
78
|
+
):
|
|
79
|
+
"""
|
|
80
|
+
Saves the tokenizer, the processor and the feature extractor when found in `src_dir` in `dest_dir`.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
src_dir (`Union[str, Path]`):
|
|
84
|
+
The source directory from which to copy the files.
|
|
85
|
+
dest_dir (`Union[str, Path]`):
|
|
86
|
+
The destination directory to copy the files to.
|
|
87
|
+
src_subfolder (`str`, defaults to `""`):
|
|
88
|
+
In case the preprocessor files are located inside a subfolder of the model directory / repo on the Hugging
|
|
89
|
+
Face Hub, you can specify the subfolder name here.
|
|
90
|
+
trust_remote_code (`bool`, defaults to `False`):
|
|
91
|
+
Whether to allow to save preprocessors that is allowed to run arbitrary code. Use this option at your own risk.
|
|
92
|
+
"""
|
|
93
|
+
if not isinstance(dest_dir, Path):
|
|
94
|
+
dest_dir = Path(dest_dir)
|
|
95
|
+
|
|
96
|
+
dest_dir.mkdir(exist_ok=True)
|
|
97
|
+
preprocessors = maybe_load_preprocessors(
|
|
98
|
+
src_name_or_path, subfolder=src_subfolder, trust_remote_code=trust_remote_code
|
|
99
|
+
)
|
|
100
|
+
for preprocessor in preprocessors:
|
|
101
|
+
preprocessor.save_pretrained(dest_dir)
|
|
102
|
+
return preprocessors
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at:
|
|
6
|
+
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, Union
|
|
17
|
+
|
|
18
|
+
from transformers import PretrainedConfig
|
|
19
|
+
|
|
20
|
+
from ..configuration_utils import RBLNModelConfig, get_rbln_config_class
|
|
21
|
+
from ..utils.model_utils import get_rbln_model_cls
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel
|
|
26
|
+
|
|
27
|
+
from ..modeling import RBLNModel
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SubModulesMixin:
|
|
31
|
+
"""
|
|
32
|
+
_rbln_submodules = [
|
|
33
|
+
{"name": "vision_tower"},
|
|
34
|
+
{"name": "language_model"},
|
|
35
|
+
]
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
_rbln_submodules: List[Dict[str, Any]] = []
|
|
39
|
+
|
|
40
|
+
def __init__(self, *, rbln_submodules: List["RBLNModel"] = [], **kwargs) -> None:
|
|
41
|
+
for submodule_meta, submodule in zip(self._rbln_submodules, rbln_submodules):
|
|
42
|
+
setattr(self, submodule_meta["name"], submodule)
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def _get_submodule_config_class(
|
|
46
|
+
cls, cls_name: str, submodule_rbln_config: Dict[str, Any]
|
|
47
|
+
) -> Type[RBLNModelConfig]:
|
|
48
|
+
if isinstance(submodule_rbln_config, dict) and "cls_name" in submodule_rbln_config:
|
|
49
|
+
config_cls_name = submodule_rbln_config["cls_name"]
|
|
50
|
+
return get_rbln_config_class(config_cls_name)
|
|
51
|
+
return get_rbln_config_class(f"RBLN{cls_name}Config")
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def _update_submodule_config(
|
|
55
|
+
cls,
|
|
56
|
+
model: "PreTrainedModel",
|
|
57
|
+
rbln_config: RBLNModelConfig,
|
|
58
|
+
preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
|
|
59
|
+
):
|
|
60
|
+
return rbln_config
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def _export_submodules_from_model(
|
|
64
|
+
cls, model: "PreTrainedModel", model_save_dir: str, rbln_config: RBLNModelConfig, **kwargs
|
|
65
|
+
) -> List["RBLNModel"]:
|
|
66
|
+
rbln_submodules = []
|
|
67
|
+
submodule_prefix = getattr(cls, "_rbln_submodule_prefix", None)
|
|
68
|
+
preprocessors = kwargs.pop("preprocessors", [])
|
|
69
|
+
|
|
70
|
+
for submodule in cls._rbln_submodules:
|
|
71
|
+
submodule_name = submodule["name"]
|
|
72
|
+
if submodule_prefix is not None:
|
|
73
|
+
torch_submodule: PreTrainedModel = getattr(model, submodule_prefix)
|
|
74
|
+
torch_submodule = getattr(torch_submodule, submodule_name)
|
|
75
|
+
else:
|
|
76
|
+
torch_submodule: PreTrainedModel = getattr(model, submodule_name)
|
|
77
|
+
|
|
78
|
+
cls_name = torch_submodule.__class__.__name__
|
|
79
|
+
submodule_cls: Type["RBLNModel"] = get_rbln_model_cls(f"RBLN{cls_name}")
|
|
80
|
+
submodule_rbln_config = getattr(rbln_config, submodule_name) or {}
|
|
81
|
+
submodule_config_cls = cls._get_submodule_config_class(cls_name, submodule_rbln_config)
|
|
82
|
+
|
|
83
|
+
if isinstance(submodule_rbln_config, dict):
|
|
84
|
+
filtered_kwargs = rbln_config.filter_parameters(submodule_config_cls, submodule_rbln_config)
|
|
85
|
+
filtered_kwargs["cls_name"] = submodule_config_cls.__name__
|
|
86
|
+
submodule_rbln_config = submodule_config_cls(**filtered_kwargs)
|
|
87
|
+
elif not isinstance(submodule_rbln_config, submodule_config_cls):
|
|
88
|
+
config_dict = {k: v for k, v in submodule_rbln_config.__dict__.items() if not k.startswith("_")}
|
|
89
|
+
filtered_kwargs = rbln_config.filter_parameters(submodule_config_cls, config_dict)
|
|
90
|
+
filtered_kwargs["cls_name"] = submodule_config_cls.__name__
|
|
91
|
+
submodule_rbln_config = submodule_config_cls(**filtered_kwargs)
|
|
92
|
+
|
|
93
|
+
setattr(rbln_config, submodule_name, submodule_rbln_config)
|
|
94
|
+
submodule_rbln_config = submodule_cls._update_submodule_config(model, submodule_rbln_config, preprocessors)
|
|
95
|
+
|
|
96
|
+
rbln_submodule = submodule_cls.from_model(
|
|
97
|
+
model=torch_submodule,
|
|
98
|
+
config=torch_submodule.config,
|
|
99
|
+
subfolder=submodule_name,
|
|
100
|
+
model_save_dir=model_save_dir,
|
|
101
|
+
rbln_config=submodule_rbln_config,
|
|
102
|
+
**kwargs,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
rbln_submodules.append(rbln_submodule)
|
|
106
|
+
|
|
107
|
+
return rbln_submodules
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def _load_submodules_from_compiled_models(cls, model_save_dir: str, rbln_config: RBLNModelConfig, **kwargs):
|
|
111
|
+
rbln_submodules = []
|
|
112
|
+
|
|
113
|
+
for submodule in cls._rbln_submodules:
|
|
114
|
+
submodule_name = submodule["name"]
|
|
115
|
+
|
|
116
|
+
# Get cls name for call the constructor of the rbln class
|
|
117
|
+
submodule_rbln_config = getattr(rbln_config, submodule_name)
|
|
118
|
+
|
|
119
|
+
# RBLNModelConfig -> RBLNModel
|
|
120
|
+
submodule_cls = get_rbln_model_cls(submodule_rbln_config.rbln_model_cls_name)
|
|
121
|
+
|
|
122
|
+
json_file_path = Path(model_save_dir) / submodule_name / "config.json"
|
|
123
|
+
config = PretrainedConfig.from_json_file(json_file_path)
|
|
124
|
+
|
|
125
|
+
rbln_submodule = submodule_cls._from_pretrained(
|
|
126
|
+
model_id=model_save_dir,
|
|
127
|
+
config=config,
|
|
128
|
+
subfolder=submodule_name,
|
|
129
|
+
rbln_config=submodule_rbln_config,
|
|
130
|
+
**kwargs,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# update submodule's rbln_config since it is updated in the from_pretrained method
|
|
134
|
+
setattr(rbln_config, submodule_name, rbln_submodule.rbln_config)
|
|
135
|
+
rbln_submodules.append(rbln_submodule)
|
|
136
|
+
|
|
137
|
+
return rbln_submodules
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def _load_submodules(cls, model_save_dir, rbln_config: RBLNModelConfig, model=None, **kwargs):
|
|
141
|
+
# Two ways :
|
|
142
|
+
# 1. Compile from pytorch object
|
|
143
|
+
# 2. Load from compiled file
|
|
144
|
+
if model is not None:
|
|
145
|
+
return cls._export_submodules_from_model(
|
|
146
|
+
model=model, model_save_dir=model_save_dir, rbln_config=rbln_config, **kwargs
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
else:
|
|
150
|
+
return cls._load_submodules_from_compiled_models(
|
|
151
|
+
model_save_dir=model_save_dir, rbln_config=rbln_config, **kwargs
|
|
152
|
+
)
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: optimum-rbln
|
|
3
|
+
Version: 0.9.3.post1
|
|
4
|
+
Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
|
|
5
|
+
Project-URL: Homepage, https://rebellions.ai
|
|
6
|
+
Project-URL: Documentation, https://docs.rbln.ai
|
|
7
|
+
Project-URL: Repository, https://github.com/rebellions-sw/optimum-rbln
|
|
8
|
+
Author-email: "Rebellions Inc." <support@rebellions.ai>
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: atom,diffusers,inference,rbln,rebel,transformers
|
|
12
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Education
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
17
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
18
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
|
+
Requires-Python: <3.14,>=3.9
|
|
26
|
+
Requires-Dist: accelerate>=1.0.1
|
|
27
|
+
Requires-Dist: diffusers==0.35.2
|
|
28
|
+
Requires-Dist: packaging>=24.1
|
|
29
|
+
Requires-Dist: torch==2.8.0
|
|
30
|
+
Requires-Dist: torchaudio<=2.8.0
|
|
31
|
+
Requires-Dist: torchvision<=0.23.0
|
|
32
|
+
Requires-Dist: transformers==4.57.1
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# Optimum RBLN
|
|
37
|
+
|
|
38
|
+
<div align="center">
|
|
39
|
+
|
|
40
|
+
<img src="assets/rbln_logo.png" width="60%"/>
|
|
41
|
+
|
|
42
|
+
[](https://badge.fury.io/py/optimum-rbln)
|
|
43
|
+
[](https://github.com/rebellions-sw/optimum-rbln/blob/main/LICENSE)
|
|
44
|
+
[](https://docs.rbln.ai/software/optimum/optimum_rbln.html)
|
|
45
|
+
[](CODE_OF_CONDUCT.md)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
</div>
|
|
49
|
+
|
|
50
|
+
🤗 Optimum RBLN provides an interface between HuggingFace libraries ([Transformers](https://huggingface.co/docs/transformers), [Diffusers](https://huggingface.co/docs/diffusers/index)) and RBLN NPUs, including [ATOM](https://rebellions.ai/rebellions-product/rbln-ca25/) and [REBEL](https://rebellions.ai/rebellions-product/rebel/).
|
|
51
|
+
|
|
52
|
+
This library enables seamless integration between the HuggingFace ecosystem and RBLN NPUs through a comprehensive toolkit for model loading and inference across single and multi-NPU environments. While we maintain a list of [officially validated models and tasks](https://docs.rbln.ai/software/optimum/optimum_rbln.html), users can easily adapt other models and tasks with minimal modifications.
|
|
53
|
+
|
|
54
|
+
## Key Features
|
|
55
|
+
|
|
56
|
+
🚀 **High Performance Inference**
|
|
57
|
+
- Optimized model execution on RBLN NPUs through RBLN SDK compilation
|
|
58
|
+
- Support for both single and multi-NPU inference
|
|
59
|
+
- Integrated with RBLN Runtime for optimal performance
|
|
60
|
+
|
|
61
|
+
🔧 **Easy Integration**
|
|
62
|
+
- Seamless compatibility with HuggingFace Model Hub
|
|
63
|
+
- Drop-in replacement for existing HuggingFace pipelines
|
|
64
|
+
- Minimal code changes required for NPU acceleration
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
## Seamless Replacement for Existing HuggingFace Code
|
|
68
|
+
|
|
69
|
+
```diff
|
|
70
|
+
- from diffusers import StableDiffusionXLPipeline
|
|
71
|
+
+ from optimum.rbln import RBLNStableDiffusionXLPipeline
|
|
72
|
+
|
|
73
|
+
# Load model
|
|
74
|
+
model_id = "stabilityai/stable-diffusion-xl-base-1.0"
|
|
75
|
+
prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
|
|
76
|
+
- pipe = StableDiffusionXLPipeline.from_pretrained(model_id)
|
|
77
|
+
+ pipe = RBLNStableDiffusionXLPipeline.from_pretrained(model_id, export=True)
|
|
78
|
+
|
|
79
|
+
# Generate image
|
|
80
|
+
image = pipe(prompt).images[0]
|
|
81
|
+
|
|
82
|
+
# Save image result
|
|
83
|
+
image.save("image.png")
|
|
84
|
+
|
|
85
|
+
+ # (Optional) Save compiled artifacts to skip the compilation step in future runs
|
|
86
|
+
+ pipe.save_pretrained("compiled_sdxl")
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Documentation
|
|
90
|
+
|
|
91
|
+
Check out [the documentation of Optimum RBLN](https://docs.rbln.ai/software/optimum/optimum_rbln.html) for more advanced usage.
|
|
92
|
+
|
|
93
|
+
## Getting Started
|
|
94
|
+
|
|
95
|
+
> **Note:** The `rebel-compiler` library, which is required for running `optimum-rbln`, is only available for approved users. Please refer to the [installation guide](https://docs.rbln.ai/getting_started/installation_guide.html) for instructions on accessing and installing `rebel-compiler`.
|
|
96
|
+
|
|
97
|
+
### Install from PyPI
|
|
98
|
+
|
|
99
|
+
To install the latest release of this package:
|
|
100
|
+
```bash
|
|
101
|
+
pip install optimum-rbln
|
|
102
|
+
|
|
103
|
+
# CPU-only installation (recommended if you don't plan to use CUDA-enabled PyTorch)
|
|
104
|
+
pip install optimum-rbln --extra-index-url https://download.pytorch.org/whl/cpu
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Install from source
|
|
108
|
+
|
|
109
|
+
#### Prerequisites
|
|
110
|
+
|
|
111
|
+
- Install [uv](https://docs.astral.sh/uv/) (refer to [this link](https://docs.astral.sh/uv/getting-started/installation/) for detailed commands)
|
|
112
|
+
|
|
113
|
+
The below command installs `optimum-rbln` along with its dependencies.
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
git clone https://github.com/rebellions-sw/optimum-rbln.git
|
|
117
|
+
cd optimum-rbln
|
|
118
|
+
./scripts/uv-sync.sh
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Need Help?
|
|
122
|
+
|
|
123
|
+
- Join discussions and get answers in our [Developer Community](https://discuss.rebellions.ai/)
|
|
124
|
+
- Contact maintainers at [support@rebellions.ai](mailto:support@rebellions.ai)
|