optimum-rbln 0.8.2a4__py3-none-any.whl → 0.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +108 -9
- optimum/rbln/__version__.py +16 -3
- optimum/rbln/cli.py +660 -0
- optimum/rbln/configuration_utils.py +156 -43
- optimum/rbln/diffusers/__init__.py +19 -0
- optimum/rbln/diffusers/configurations/__init__.py +3 -0
- optimum/rbln/diffusers/configurations/models/__init__.py +2 -0
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +3 -3
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +1 -1
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_temporal_decoder.py +67 -0
- optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +3 -3
- optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +4 -4
- optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +9 -4
- optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +9 -4
- optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +3 -3
- optimum/rbln/diffusers/configurations/models/configuration_unet_spatio_temporal_condition.py +59 -0
- optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +3 -3
- optimum/rbln/diffusers/configurations/pipelines/__init__.py +3 -0
- optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +35 -19
- optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +14 -11
- optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +30 -20
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +13 -9
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +17 -13
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +17 -10
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_video_diffusion.py +114 -0
- optimum/rbln/diffusers/modeling_diffusers.py +30 -14
- optimum/rbln/diffusers/models/__init__.py +4 -0
- optimum/rbln/diffusers/models/autoencoders/__init__.py +1 -0
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +31 -3
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +31 -6
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +275 -0
- optimum/rbln/diffusers/models/autoencoders/vae.py +27 -8
- optimum/rbln/diffusers/models/autoencoders/vq_model.py +31 -3
- optimum/rbln/diffusers/models/controlnet.py +16 -1
- optimum/rbln/diffusers/models/transformers/prior_transformer.py +17 -3
- optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +25 -2
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py +23 -2
- optimum/rbln/diffusers/models/unets/__init__.py +1 -0
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +23 -4
- optimum/rbln/diffusers/models/unets/unet_spatio_temporal_condition.py +201 -0
- optimum/rbln/diffusers/pipelines/__init__.py +15 -5
- optimum/rbln/diffusers/pipelines/auto_pipeline.py +307 -0
- optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +20 -0
- optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py +19 -16
- optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +14 -18
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +31 -1
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +31 -1
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +1 -6
- optimum/rbln/diffusers/pipelines/stable_video_diffusion/__init__.py +15 -0
- optimum/rbln/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +46 -0
- optimum/rbln/modeling.py +48 -21
- optimum/rbln/modeling_base.py +99 -22
- optimum/rbln/ops/attn.py +158 -0
- optimum/rbln/ops/flash_attn.py +166 -0
- optimum/rbln/ops/kv_cache_update.py +5 -0
- optimum/rbln/ops/linear.py +7 -0
- optimum/rbln/transformers/__init__.py +92 -0
- optimum/rbln/transformers/configuration_generic.py +7 -32
- optimum/rbln/transformers/modeling_attention_utils.py +385 -0
- optimum/rbln/transformers/modeling_generic.py +48 -65
- optimum/rbln/transformers/modeling_outputs.py +37 -0
- optimum/rbln/transformers/models/__init__.py +91 -30
- optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +28 -2
- optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +68 -5
- optimum/rbln/transformers/models/auto/__init__.py +2 -0
- optimum/rbln/transformers/models/auto/auto_factory.py +92 -17
- optimum/rbln/transformers/models/auto/modeling_auto.py +45 -0
- optimum/rbln/transformers/models/bart/bart_architecture.py +1 -3
- optimum/rbln/transformers/models/bart/configuration_bart.py +2 -0
- optimum/rbln/transformers/models/bart/modeling_bart.py +23 -2
- optimum/rbln/transformers/models/bert/bert_architecture.py +16 -0
- optimum/rbln/transformers/models/bert/modeling_bert.py +93 -4
- optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +42 -11
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +135 -44
- optimum/rbln/transformers/models/clip/configuration_clip.py +10 -7
- optimum/rbln/transformers/models/clip/modeling_clip.py +67 -6
- optimum/rbln/transformers/models/colpali/colpali_architecture.py +3 -6
- optimum/rbln/transformers/models/colpali/configuration_colpali.py +37 -21
- optimum/rbln/transformers/models/colpali/modeling_colpali.py +82 -104
- optimum/rbln/transformers/models/colqwen2/__init__.py +2 -0
- optimum/rbln/transformers/models/colqwen2/colqwen2_architecture.py +233 -0
- optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +74 -0
- optimum/rbln/transformers/models/colqwen2/modeling_colqwen2.py +446 -0
- optimum/rbln/transformers/models/decoderonly/__init__.py +3 -2
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +114 -37
- optimum/rbln/transformers/models/decoderonly/configuration_lora.py +411 -0
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +318 -309
- optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +508 -0
- optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +119 -0
- optimum/rbln/transformers/models/decoderonly/lora_architecture.py +204 -0
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +485 -905
- optimum/rbln/transformers/models/depth_anything/__init__.py +16 -0
- optimum/rbln/transformers/models/depth_anything/configuration_depth_anything.py +24 -0
- optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py +42 -0
- optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +24 -0
- optimum/rbln/transformers/models/dpt/modeling_dpt.py +17 -0
- optimum/rbln/transformers/models/exaone/modeling_exaone.py +42 -4
- optimum/rbln/transformers/models/gemma/__init__.py +2 -2
- optimum/rbln/transformers/models/gemma/configuration_gemma.py +9 -1
- optimum/rbln/transformers/models/gemma/gemma_architecture.py +1 -4
- optimum/rbln/transformers/models/gemma/modeling_gemma.py +22 -1
- optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +49 -13
- optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +12 -2
- optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py +245 -0
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +201 -351
- optimum/rbln/transformers/models/gpt2/__init__.py +2 -2
- optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +31 -3
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +10 -8
- optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +18 -1
- optimum/rbln/transformers/models/grounding_dino/__init__.py +10 -0
- optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +92 -0
- optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +599 -0
- optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py +1048 -0
- optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +35 -7
- optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +29 -32
- optimum/rbln/transformers/models/llama/__init__.py +2 -2
- optimum/rbln/transformers/models/llama/configuration_llama.py +9 -1
- optimum/rbln/transformers/models/llama/modeling_llama.py +22 -1
- optimum/rbln/transformers/models/llava/__init__.py +16 -0
- optimum/rbln/transformers/models/llava/configuration_llava.py +72 -0
- optimum/rbln/transformers/models/llava/modeling_llava.py +490 -0
- optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +15 -17
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +234 -376
- optimum/rbln/transformers/models/midm/midm_architecture.py +4 -1
- optimum/rbln/transformers/models/midm/modeling_midm.py +42 -4
- optimum/rbln/transformers/models/mistral/__init__.py +2 -2
- optimum/rbln/transformers/models/mistral/configuration_mistral.py +9 -1
- optimum/rbln/transformers/models/mistral/mistral_architecture.py +1 -1
- optimum/rbln/transformers/models/mistral/modeling_mistral.py +26 -3
- optimum/rbln/transformers/models/opt/__init__.py +2 -2
- optimum/rbln/transformers/models/opt/configuration_opt.py +8 -1
- optimum/rbln/transformers/models/opt/modeling_opt.py +29 -17
- optimum/rbln/transformers/models/opt/opt_architecture.py +4 -4
- optimum/rbln/transformers/models/pegasus/__init__.py +17 -0
- optimum/rbln/transformers/models/pegasus/configuration_pegasus.py +38 -0
- optimum/rbln/transformers/models/pegasus/modeling_pegasus.py +71 -0
- optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +161 -0
- optimum/rbln/transformers/models/phi/__init__.py +2 -2
- optimum/rbln/transformers/models/phi/configuration_phi.py +9 -1
- optimum/rbln/transformers/models/phi/modeling_phi.py +10 -1
- optimum/rbln/transformers/models/phi/phi_architecture.py +11 -7
- optimum/rbln/transformers/models/pixtral/__init__.py +16 -0
- optimum/rbln/transformers/models/pixtral/configuration_pixtral.py +43 -0
- optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +322 -0
- optimum/rbln/transformers/models/pixtral/pixtral_architecture.py +73 -0
- optimum/rbln/transformers/models/qwen2/__init__.py +2 -2
- optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +9 -1
- optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +27 -1
- optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +21 -6
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +15 -22
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +28 -7
- optimum/rbln/transformers/models/qwen2_vl/__init__.py +19 -0
- optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +88 -0
- optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +513 -0
- optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +165 -0
- optimum/rbln/transformers/models/qwen3/configuration_qwen3.py +2 -2
- optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +86 -330
- optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +1 -245
- optimum/rbln/transformers/models/resnet/configuration_resnet.py +17 -0
- optimum/rbln/transformers/models/resnet/modeling_resnet.py +73 -0
- optimum/rbln/transformers/models/roberta/modeling_roberta.py +33 -0
- optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +21 -16
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +58 -13
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +2 -2
- optimum/rbln/transformers/models/siglip/__init__.py +2 -6
- optimum/rbln/transformers/models/siglip/configuration_siglip.py +1 -1
- optimum/rbln/transformers/models/siglip/modeling_siglip.py +21 -16
- optimum/rbln/transformers/models/swin/__init__.py +16 -0
- optimum/rbln/transformers/models/swin/configuration_swin.py +42 -0
- optimum/rbln/transformers/models/swin/modeling_swin.py +354 -0
- optimum/rbln/transformers/models/t5/configuration_t5.py +2 -0
- optimum/rbln/transformers/models/t5/modeling_t5.py +2 -2
- optimum/rbln/transformers/models/t5/t5_architecture.py +8 -1
- optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +3 -3
- optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +20 -16
- optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +7 -1
- optimum/rbln/transformers/models/vit/modeling_vit.py +19 -0
- optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +15 -3
- optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +61 -8
- optimum/rbln/transformers/models/whisper/configuration_whisper.py +12 -13
- optimum/rbln/transformers/models/whisper/generation_whisper.py +62 -6
- optimum/rbln/transformers/models/whisper/modeling_whisper.py +30 -5
- optimum/rbln/transformers/models/xlm_roberta/__init__.py +2 -8
- optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +43 -0
- optimum/rbln/transformers/utils/rbln_quantization.py +400 -75
- optimum/rbln/transformers/utils/rbln_runtime_wrapper.py +79 -0
- optimum/rbln/utils/deprecation.py +213 -0
- optimum/rbln/utils/hub.py +14 -3
- optimum/rbln/utils/runtime_utils.py +60 -18
- optimum/rbln/utils/submodule.py +31 -9
- {optimum_rbln-0.8.2a4.dist-info → optimum_rbln-0.9.3.dist-info}/METADATA +8 -7
- optimum_rbln-0.9.3.dist-info/RECORD +264 -0
- {optimum_rbln-0.8.2a4.dist-info → optimum_rbln-0.9.3.dist-info}/WHEEL +1 -1
- optimum_rbln-0.9.3.dist-info/entry_points.txt +2 -0
- optimum_rbln-0.8.2a4.dist-info/RECORD +0 -215
- {optimum_rbln-0.8.2a4.dist-info → optimum_rbln-0.9.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -36,6 +36,8 @@ _import_structure = {
|
|
|
36
36
|
"RBLNAutoModelForSpeechSeq2Seq",
|
|
37
37
|
"RBLNAutoModelForVision2Seq",
|
|
38
38
|
"RBLNAutoModelForImageTextToText",
|
|
39
|
+
"RBLNAutoModelForTextEncoding",
|
|
40
|
+
"RBLNAutoModelForZeroShotObjectDetection",
|
|
39
41
|
],
|
|
40
42
|
"bart": [
|
|
41
43
|
"RBLNBartForConditionalGeneration",
|
|
@@ -73,6 +75,10 @@ _import_structure = {
|
|
|
73
75
|
"RBLNColPaliForRetrieval",
|
|
74
76
|
"RBLNColPaliForRetrievalConfig",
|
|
75
77
|
],
|
|
78
|
+
"colqwen2": [
|
|
79
|
+
"RBLNColQwen2ForRetrieval",
|
|
80
|
+
"RBLNColQwen2ForRetrievalConfig",
|
|
81
|
+
],
|
|
76
82
|
"distilbert": [
|
|
77
83
|
"RBLNDistilBertForQuestionAnswering",
|
|
78
84
|
"RBLNDistilBertForQuestionAnsweringConfig",
|
|
@@ -83,36 +89,60 @@ _import_structure = {
|
|
|
83
89
|
"RBLNQwen2_5_VLForConditionalGeneration",
|
|
84
90
|
"RBLNQwen2_5_VLForConditionalGenerationConfig",
|
|
85
91
|
],
|
|
92
|
+
"qwen2_vl": [
|
|
93
|
+
"RBLNQwen2VisionTransformerPretrainedModel",
|
|
94
|
+
"RBLNQwen2VisionTransformerPretrainedModelConfig",
|
|
95
|
+
"RBLNQwen2VLForConditionalGeneration",
|
|
96
|
+
"RBLNQwen2VLForConditionalGenerationConfig",
|
|
97
|
+
],
|
|
86
98
|
"decoderonly": [
|
|
99
|
+
"RBLNDecoderOnlyModelConfig",
|
|
100
|
+
"RBLNDecoderOnlyModel",
|
|
87
101
|
"RBLNDecoderOnlyModelForCausalLM",
|
|
88
102
|
"RBLNDecoderOnlyModelForCausalLMConfig",
|
|
103
|
+
"RBLNLoRAAdapterConfig",
|
|
104
|
+
"RBLNLoRAConfig",
|
|
89
105
|
],
|
|
106
|
+
"depth_anything": ["RBLNDepthAnythingForDepthEstimationConfig", "RBLNDepthAnythingForDepthEstimation"],
|
|
90
107
|
"dpt": [
|
|
91
108
|
"RBLNDPTForDepthEstimation",
|
|
92
109
|
"RBLNDPTForDepthEstimationConfig",
|
|
93
110
|
],
|
|
94
111
|
"exaone": ["RBLNExaoneForCausalLM", "RBLNExaoneForCausalLMConfig"],
|
|
95
|
-
"gemma": ["RBLNGemmaForCausalLM", "RBLNGemmaForCausalLMConfig"],
|
|
112
|
+
"gemma": ["RBLNGemmaForCausalLM", "RBLNGemmaForCausalLMConfig", "RBLNGemmaModel", "RBLNGemmaModelConfig"],
|
|
96
113
|
"gemma3": [
|
|
97
114
|
"RBLNGemma3ForCausalLM",
|
|
98
115
|
"RBLNGemma3ForCausalLMConfig",
|
|
99
116
|
"RBLNGemma3ForConditionalGeneration",
|
|
100
117
|
"RBLNGemma3ForConditionalGenerationConfig",
|
|
101
118
|
],
|
|
102
|
-
"gpt2": ["RBLNGPT2LMHeadModel", "RBLNGPT2LMHeadModelConfig"],
|
|
119
|
+
"gpt2": ["RBLNGPT2LMHeadModel", "RBLNGPT2LMHeadModelConfig", "RBLNGPT2Model", "RBLNGPT2ModelConfig"],
|
|
103
120
|
"idefics3": [
|
|
104
121
|
"RBLNIdefics3VisionTransformer",
|
|
105
122
|
"RBLNIdefics3ForConditionalGeneration",
|
|
106
123
|
"RBLNIdefics3ForConditionalGenerationConfig",
|
|
107
124
|
"RBLNIdefics3VisionTransformerConfig",
|
|
108
125
|
],
|
|
109
|
-
"
|
|
110
|
-
"
|
|
126
|
+
"llava": ["RBLNLlavaForConditionalGeneration", "RBLNLlavaForConditionalGenerationConfig"],
|
|
127
|
+
"llama": ["RBLNLlamaForCausalLM", "RBLNLlamaForCausalLMConfig", "RBLNLlamaModel", "RBLNLlamaModelConfig"],
|
|
128
|
+
"opt": ["RBLNOPTForCausalLM", "RBLNOPTForCausalLMConfig", "RBLNOPTModel", "RBLNOPTModelConfig"],
|
|
129
|
+
"pegasus": [
|
|
130
|
+
"RBLNPegasusForConditionalGeneration",
|
|
131
|
+
"RBLNPegasusModel",
|
|
132
|
+
"RBLNPegasusForConditionalGenerationConfig",
|
|
133
|
+
"RBLNPegasusModelConfig",
|
|
134
|
+
],
|
|
111
135
|
"llava_next": ["RBLNLlavaNextForConditionalGeneration", "RBLNLlavaNextForConditionalGenerationConfig"],
|
|
112
136
|
"midm": ["RBLNMidmLMHeadModel", "RBLNMidmLMHeadModelConfig"],
|
|
113
|
-
"
|
|
114
|
-
"
|
|
115
|
-
|
|
137
|
+
"pixtral": ["RBLNPixtralVisionModel", "RBLNPixtralVisionModelConfig"],
|
|
138
|
+
"mistral": [
|
|
139
|
+
"RBLNMistralForCausalLM",
|
|
140
|
+
"RBLNMistralForCausalLMConfig",
|
|
141
|
+
"RBLNMistralModel",
|
|
142
|
+
"RBLNMistralModelConfig",
|
|
143
|
+
],
|
|
144
|
+
"phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig", "RBLNPhiModel", "RBLNPhiModelConfig"],
|
|
145
|
+
"qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig", "RBLNQwen2Model", "RBLNQwen2ModelConfig"],
|
|
116
146
|
"qwen3": ["RBLNQwen3ForCausalLM", "RBLNQwen3ForCausalLMConfig", "RBLNQwen3Model", "RBLNQwen3ModelConfig"],
|
|
117
147
|
"resnet": ["RBLNResNetForImageClassification", "RBLNResNetForImageClassificationConfig"],
|
|
118
148
|
"roberta": [
|
|
@@ -125,6 +155,10 @@ _import_structure = {
|
|
|
125
155
|
"RBLNSiglipVisionModel",
|
|
126
156
|
"RBLNSiglipVisionModelConfig",
|
|
127
157
|
],
|
|
158
|
+
"swin": [
|
|
159
|
+
"RBLNSwinBackbone",
|
|
160
|
+
"RBLNSwinBackboneConfig",
|
|
161
|
+
],
|
|
128
162
|
"time_series_transformer": [
|
|
129
163
|
"RBLNTimeSeriesTransformerForPrediction",
|
|
130
164
|
"RBLNTimeSeriesTransformerForPredictionConfig",
|
|
@@ -144,13 +178,18 @@ _import_structure = {
|
|
|
144
178
|
"RBLNXLMRobertaForSequenceClassification",
|
|
145
179
|
"RBLNXLMRobertaForSequenceClassificationConfig",
|
|
146
180
|
],
|
|
181
|
+
"grounding_dino": [
|
|
182
|
+
"RBLNGroundingDinoForObjectDetection",
|
|
183
|
+
"RBLNGroundingDinoForObjectDetectionConfig",
|
|
184
|
+
"RBLNGroundingDinoEncoder",
|
|
185
|
+
"RBLNGroundingDinoEncoderConfig",
|
|
186
|
+
"RBLNGroundingDinoDecoder",
|
|
187
|
+
"RBLNGroundingDinoDecoderConfig",
|
|
188
|
+
],
|
|
147
189
|
}
|
|
148
190
|
|
|
149
191
|
if TYPE_CHECKING:
|
|
150
|
-
from .audio_spectrogram_transformer import
|
|
151
|
-
RBLNASTForAudioClassification,
|
|
152
|
-
RBLNASTForAudioClassificationConfig,
|
|
153
|
-
)
|
|
192
|
+
from .audio_spectrogram_transformer import RBLNASTForAudioClassification, RBLNASTForAudioClassificationConfig
|
|
154
193
|
from .auto import (
|
|
155
194
|
RBLNAutoModel,
|
|
156
195
|
RBLNAutoModelForAudioClassification,
|
|
@@ -164,7 +203,9 @@ if TYPE_CHECKING:
|
|
|
164
203
|
RBLNAutoModelForSeq2SeqLM,
|
|
165
204
|
RBLNAutoModelForSequenceClassification,
|
|
166
205
|
RBLNAutoModelForSpeechSeq2Seq,
|
|
206
|
+
RBLNAutoModelForTextEncoding,
|
|
167
207
|
RBLNAutoModelForVision2Seq,
|
|
208
|
+
RBLNAutoModelForZeroShotObjectDetection,
|
|
168
209
|
)
|
|
169
210
|
from .bart import (
|
|
170
211
|
RBLNBartForConditionalGeneration,
|
|
@@ -198,50 +239,69 @@ if TYPE_CHECKING:
|
|
|
198
239
|
RBLNCLIPVisionModelWithProjection,
|
|
199
240
|
RBLNCLIPVisionModelWithProjectionConfig,
|
|
200
241
|
)
|
|
201
|
-
from .colpali import
|
|
202
|
-
|
|
203
|
-
RBLNColPaliForRetrievalConfig,
|
|
204
|
-
)
|
|
242
|
+
from .colpali import RBLNColPaliForRetrieval, RBLNColPaliForRetrievalConfig
|
|
243
|
+
from .colqwen2 import RBLNColQwen2ForRetrieval, RBLNColQwen2ForRetrievalConfig
|
|
205
244
|
from .decoderonly import (
|
|
245
|
+
RBLNDecoderOnlyModel,
|
|
246
|
+
RBLNDecoderOnlyModelConfig,
|
|
206
247
|
RBLNDecoderOnlyModelForCausalLM,
|
|
207
248
|
RBLNDecoderOnlyModelForCausalLMConfig,
|
|
249
|
+
RBLNLoRAAdapterConfig,
|
|
250
|
+
RBLNLoRAConfig,
|
|
208
251
|
)
|
|
209
|
-
from .
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
)
|
|
213
|
-
from .dpt import (
|
|
214
|
-
RBLNDPTForDepthEstimation,
|
|
215
|
-
RBLNDPTForDepthEstimationConfig,
|
|
216
|
-
)
|
|
252
|
+
from .depth_anything import RBLNDepthAnythingForDepthEstimation, RBLNDepthAnythingForDepthEstimationConfig
|
|
253
|
+
from .distilbert import RBLNDistilBertForQuestionAnswering, RBLNDistilBertForQuestionAnsweringConfig
|
|
254
|
+
from .dpt import RBLNDPTForDepthEstimation, RBLNDPTForDepthEstimationConfig
|
|
217
255
|
from .exaone import RBLNExaoneForCausalLM, RBLNExaoneForCausalLMConfig
|
|
218
|
-
from .gemma import RBLNGemmaForCausalLM, RBLNGemmaForCausalLMConfig
|
|
256
|
+
from .gemma import RBLNGemmaForCausalLM, RBLNGemmaForCausalLMConfig, RBLNGemmaModel, RBLNGemmaModelConfig
|
|
219
257
|
from .gemma3 import (
|
|
220
258
|
RBLNGemma3ForCausalLM,
|
|
221
259
|
RBLNGemma3ForCausalLMConfig,
|
|
222
260
|
RBLNGemma3ForConditionalGeneration,
|
|
223
261
|
RBLNGemma3ForConditionalGenerationConfig,
|
|
224
262
|
)
|
|
225
|
-
from .gpt2 import RBLNGPT2LMHeadModel, RBLNGPT2LMHeadModelConfig
|
|
263
|
+
from .gpt2 import RBLNGPT2LMHeadModel, RBLNGPT2LMHeadModelConfig, RBLNGPT2Model, RBLNGPT2ModelConfig
|
|
264
|
+
from .grounding_dino import (
|
|
265
|
+
RBLNGroundingDinoDecoder,
|
|
266
|
+
RBLNGroundingDinoDecoderConfig,
|
|
267
|
+
RBLNGroundingDinoEncoder,
|
|
268
|
+
RBLNGroundingDinoEncoderConfig,
|
|
269
|
+
RBLNGroundingDinoForObjectDetection,
|
|
270
|
+
RBLNGroundingDinoForObjectDetectionConfig,
|
|
271
|
+
)
|
|
226
272
|
from .idefics3 import (
|
|
227
273
|
RBLNIdefics3ForConditionalGeneration,
|
|
228
274
|
RBLNIdefics3ForConditionalGenerationConfig,
|
|
229
275
|
RBLNIdefics3VisionTransformer,
|
|
230
276
|
RBLNIdefics3VisionTransformerConfig,
|
|
231
277
|
)
|
|
232
|
-
from .llama import RBLNLlamaForCausalLM, RBLNLlamaForCausalLMConfig
|
|
278
|
+
from .llama import RBLNLlamaForCausalLM, RBLNLlamaForCausalLMConfig, RBLNLlamaModel, RBLNLlamaModelConfig
|
|
279
|
+
from .llava import RBLNLlavaForConditionalGeneration, RBLNLlavaForConditionalGenerationConfig
|
|
233
280
|
from .llava_next import RBLNLlavaNextForConditionalGeneration, RBLNLlavaNextForConditionalGenerationConfig
|
|
234
281
|
from .midm import RBLNMidmLMHeadModel, RBLNMidmLMHeadModelConfig
|
|
235
|
-
from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig
|
|
236
|
-
from .opt import RBLNOPTForCausalLM, RBLNOPTForCausalLMConfig
|
|
237
|
-
from .
|
|
238
|
-
|
|
282
|
+
from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig, RBLNMistralModel, RBLNMistralModelConfig
|
|
283
|
+
from .opt import RBLNOPTForCausalLM, RBLNOPTForCausalLMConfig, RBLNOPTModel, RBLNOPTModelConfig
|
|
284
|
+
from .pegasus import (
|
|
285
|
+
RBLNPegasusForConditionalGeneration,
|
|
286
|
+
RBLNPegasusForConditionalGenerationConfig,
|
|
287
|
+
RBLNPegasusModel,
|
|
288
|
+
RBLNPegasusModelConfig,
|
|
289
|
+
)
|
|
290
|
+
from .phi import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig, RBLNPhiModel, RBLNPhiModelConfig
|
|
291
|
+
from .pixtral import RBLNPixtralVisionModel, RBLNPixtralVisionModelConfig
|
|
292
|
+
from .qwen2 import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig, RBLNQwen2Model, RBLNQwen2ModelConfig
|
|
239
293
|
from .qwen2_5_vl import (
|
|
240
294
|
RBLNQwen2_5_VisionTransformerPretrainedModel,
|
|
241
295
|
RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
|
|
242
296
|
RBLNQwen2_5_VLForConditionalGeneration,
|
|
243
297
|
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
|
244
298
|
)
|
|
299
|
+
from .qwen2_vl import (
|
|
300
|
+
RBLNQwen2VisionTransformerPretrainedModel,
|
|
301
|
+
RBLNQwen2VisionTransformerPretrainedModelConfig,
|
|
302
|
+
RBLNQwen2VLForConditionalGeneration,
|
|
303
|
+
RBLNQwen2VLForConditionalGenerationConfig,
|
|
304
|
+
)
|
|
245
305
|
from .qwen3 import RBLNQwen3ForCausalLM, RBLNQwen3ForCausalLMConfig, RBLNQwen3Model, RBLNQwen3ModelConfig
|
|
246
306
|
from .resnet import RBLNResNetForImageClassification, RBLNResNetForImageClassificationConfig
|
|
247
307
|
from .roberta import (
|
|
@@ -251,6 +311,7 @@ if TYPE_CHECKING:
|
|
|
251
311
|
RBLNRobertaForSequenceClassificationConfig,
|
|
252
312
|
)
|
|
253
313
|
from .siglip import RBLNSiglipVisionModel, RBLNSiglipVisionModelConfig
|
|
314
|
+
from .swin import RBLNSwinBackbone, RBLNSwinBackboneConfig
|
|
254
315
|
from .t5 import (
|
|
255
316
|
RBLNT5EncoderModel,
|
|
256
317
|
RBLNT5EncoderModelConfig,
|
|
@@ -12,10 +12,36 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from
|
|
15
|
+
from typing import Any, Optional
|
|
16
16
|
|
|
17
|
+
from ....configuration_utils import RBLNModelConfig
|
|
18
|
+
from ....utils.deprecation import deprecate_kwarg
|
|
17
19
|
|
|
18
|
-
|
|
20
|
+
|
|
21
|
+
class RBLNASTForAudioClassificationConfig(RBLNModelConfig):
|
|
19
22
|
"""
|
|
20
23
|
Configuration class for RBLNASTForAudioClassification.
|
|
21
24
|
"""
|
|
25
|
+
|
|
26
|
+
@deprecate_kwarg(old_name="num_mel_bins", version="0.10.0")
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
batch_size: Optional[int] = None,
|
|
30
|
+
max_length: Optional[int] = None,
|
|
31
|
+
**kwargs: Any,
|
|
32
|
+
):
|
|
33
|
+
"""
|
|
34
|
+
Args:
|
|
35
|
+
batch_size (Optional[int]): The batch size for inference. Defaults to 1.
|
|
36
|
+
max_length (Optional[int]): Maximum length of the audio input in time dimension.
|
|
37
|
+
kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
|
38
|
+
|
|
39
|
+
Raises:
|
|
40
|
+
ValueError: If batch_size is not a positive integer.
|
|
41
|
+
"""
|
|
42
|
+
super().__init__(**kwargs)
|
|
43
|
+
self.batch_size = batch_size or 1
|
|
44
|
+
if not isinstance(self.batch_size, int) or self.batch_size < 0:
|
|
45
|
+
raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
|
|
46
|
+
|
|
47
|
+
self.max_length = max_length
|
|
@@ -12,17 +12,80 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from
|
|
15
|
+
from typing import TYPE_CHECKING, Optional
|
|
16
16
|
|
|
17
|
+
import torch
|
|
18
|
+
from transformers import AutoModelForAudioClassification
|
|
19
|
+
from transformers.modeling_outputs import SequenceClassifierOutput
|
|
17
20
|
|
|
18
|
-
|
|
21
|
+
from ....configuration_utils import RBLNCompileConfig
|
|
22
|
+
from ....modeling import RBLNModel
|
|
23
|
+
from .configuration_audio_spectrogram_transformer import RBLNASTForAudioClassificationConfig
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from transformers import AutoFeatureExtractor, PretrainedConfig, PreTrainedModel
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class RBLNASTForAudioClassification(RBLNModel):
|
|
19
31
|
"""
|
|
20
32
|
Audio Spectrogram Transformer model with an audio classification head on top (a linear layer on top of the pooled output) e.g. for datasets like AudioSet, Speech Commands v2.
|
|
21
|
-
This model inherits from [
|
|
33
|
+
This model inherits from [RBLNModelForAudioClassification]. Check the superclass documentation for the generic methods the library implements for all its models.
|
|
22
34
|
|
|
23
|
-
A class to convert and run pre-trained transformer-based
|
|
24
|
-
It implements the methods to convert a pre-trained transformers
|
|
35
|
+
A class to convert and run pre-trained transformer-based ASTForAudioClassification models on RBLN devices.
|
|
36
|
+
It implements the methods to convert a pre-trained transformers ASTForAudioClassification model into a RBLN transformer model by:
|
|
25
37
|
|
|
26
38
|
- transferring the checkpoint weights of the original into an optimized RBLN graph,
|
|
27
39
|
- compiling the resulting graph using the RBLN Compiler.
|
|
28
40
|
"""
|
|
41
|
+
|
|
42
|
+
auto_model_class = AutoModelForAudioClassification
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def _update_rbln_config(
|
|
46
|
+
cls,
|
|
47
|
+
preprocessors: "AutoFeatureExtractor" = None,
|
|
48
|
+
model: Optional["PreTrainedModel"] = None,
|
|
49
|
+
model_config: "PretrainedConfig" = None,
|
|
50
|
+
rbln_config: Optional[RBLNASTForAudioClassificationConfig] = None,
|
|
51
|
+
) -> RBLNASTForAudioClassificationConfig:
|
|
52
|
+
num_mel_bins = getattr(model_config, "num_mel_bins", None)
|
|
53
|
+
|
|
54
|
+
if rbln_config.max_length is None:
|
|
55
|
+
rbln_config.max_length = getattr(model_config, "max_length", None)
|
|
56
|
+
for feature_extractor in preprocessors:
|
|
57
|
+
if hasattr(feature_extractor, "max_length"):
|
|
58
|
+
rbln_config.max_length = feature_extractor.max_length
|
|
59
|
+
break
|
|
60
|
+
|
|
61
|
+
if rbln_config.max_length is None:
|
|
62
|
+
raise ValueError("max_length should be specified!")
|
|
63
|
+
|
|
64
|
+
input_info = [
|
|
65
|
+
(
|
|
66
|
+
"input_values",
|
|
67
|
+
[rbln_config.batch_size, rbln_config.max_length, num_mel_bins],
|
|
68
|
+
"float32",
|
|
69
|
+
),
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
rbln_config.set_compile_cfgs([RBLNCompileConfig(input_info=input_info)])
|
|
73
|
+
return rbln_config
|
|
74
|
+
|
|
75
|
+
def forward(self, input_values: torch.Tensor, **kwargs) -> SequenceClassifierOutput:
|
|
76
|
+
"""
|
|
77
|
+
Forward pass for the RBLN-optimized Audio Spectrogram Transformer model for audio classification.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
input_values (torch.FloatTensor of shape (batch_size, max_length, num_mel_bins)):
|
|
81
|
+
Float values mel features extracted from the raw audio waveform. Raw audio waveform can be obtained by
|
|
82
|
+
loading a .flac or .wav audio file into an array of type list[float], a numpy.ndarray or a torch.Tensor, *e.g.* via
|
|
83
|
+
the torchcodec library (pip install torchcodec) or the soundfile library (pip install soundfile).
|
|
84
|
+
To prepare the array into input_features, the [AutoFeatureExtractor] should be used for extracting the
|
|
85
|
+
mel features, padding and conversion into a tensor of type torch.FloatTensor.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Returns a SequenceClassifierOutput object.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
return super().forward(input_values, **kwargs)
|
|
@@ -14,13 +14,14 @@
|
|
|
14
14
|
import importlib
|
|
15
15
|
import inspect
|
|
16
16
|
import warnings
|
|
17
|
-
from
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any, Dict, Optional, Type, Union
|
|
18
19
|
|
|
19
|
-
from transformers import AutoConfig, PretrainedConfig
|
|
20
|
+
from transformers import AutoConfig, PretrainedConfig, PreTrainedModel
|
|
20
21
|
from transformers.dynamic_module_utils import get_class_from_dynamic_module
|
|
21
22
|
from transformers.models.auto.auto_factory import _get_model_class
|
|
22
23
|
|
|
23
|
-
from optimum.rbln.configuration_utils import RBLNAutoConfig
|
|
24
|
+
from optimum.rbln.configuration_utils import RBLNAutoConfig, RBLNModelConfig
|
|
24
25
|
from optimum.rbln.modeling_base import RBLNBaseModel
|
|
25
26
|
from optimum.rbln.utils.model_utils import (
|
|
26
27
|
MODEL_MAPPING,
|
|
@@ -43,10 +44,10 @@ class _BaseAutoModelClass:
|
|
|
43
44
|
@classmethod
|
|
44
45
|
def get_rbln_cls(
|
|
45
46
|
cls,
|
|
46
|
-
pretrained_model_name_or_path,
|
|
47
|
-
*args,
|
|
48
|
-
export=
|
|
49
|
-
**kwargs,
|
|
47
|
+
pretrained_model_name_or_path: Union[str, Path],
|
|
48
|
+
*args: Any,
|
|
49
|
+
export: bool = None,
|
|
50
|
+
**kwargs: Any,
|
|
50
51
|
):
|
|
51
52
|
"""
|
|
52
53
|
Determine the appropriate RBLN model class based on the given model ID and configuration.
|
|
@@ -59,6 +60,20 @@ class _BaseAutoModelClass:
|
|
|
59
60
|
Returns:
|
|
60
61
|
RBLNBaseModel: The corresponding RBLN model class.
|
|
61
62
|
"""
|
|
63
|
+
if isinstance(pretrained_model_name_or_path, Path):
|
|
64
|
+
pretrained_model_name_or_path = pretrained_model_name_or_path.as_posix()
|
|
65
|
+
|
|
66
|
+
if export is None:
|
|
67
|
+
export = not RBLNBaseModel._is_compiled(
|
|
68
|
+
model_id=pretrained_model_name_or_path,
|
|
69
|
+
token=kwargs.get("token"),
|
|
70
|
+
revision=kwargs.get("revision"),
|
|
71
|
+
force_download=kwargs.get("force_download", False),
|
|
72
|
+
cache_dir=kwargs.get("cache_dir"),
|
|
73
|
+
subfolder=kwargs.get("subfolder", ""),
|
|
74
|
+
local_files_only=kwargs.get("local_files_only", False),
|
|
75
|
+
)
|
|
76
|
+
|
|
62
77
|
if export:
|
|
63
78
|
hf_model_class = cls.infer_hf_model_class(pretrained_model_name_or_path, **kwargs)
|
|
64
79
|
rbln_class_name = convert_hf_to_rbln_model_name(hf_model_class.__name__)
|
|
@@ -85,9 +100,9 @@ class _BaseAutoModelClass:
|
|
|
85
100
|
@classmethod
|
|
86
101
|
def infer_hf_model_class(
|
|
87
102
|
cls,
|
|
88
|
-
pretrained_model_name_or_path,
|
|
89
|
-
*args,
|
|
90
|
-
**kwargs,
|
|
103
|
+
pretrained_model_name_or_path: Union[str, Path],
|
|
104
|
+
*args: Any,
|
|
105
|
+
**kwargs: Any,
|
|
91
106
|
):
|
|
92
107
|
"""
|
|
93
108
|
Infer the HuggingFace model class based on the configuration or model name.
|
|
@@ -140,7 +155,7 @@ class _BaseAutoModelClass:
|
|
|
140
155
|
return model_class
|
|
141
156
|
|
|
142
157
|
@classmethod
|
|
143
|
-
def get_rbln_model_cls_name(cls, pretrained_model_name_or_path, **kwargs):
|
|
158
|
+
def get_rbln_model_cls_name(cls, pretrained_model_name_or_path: Union[str, Path], **kwargs):
|
|
144
159
|
"""
|
|
145
160
|
Retrieve the path to the compiled model directory for a given RBLN model.
|
|
146
161
|
|
|
@@ -163,17 +178,77 @@ class _BaseAutoModelClass:
|
|
|
163
178
|
return rbln_config.rbln_model_cls_name
|
|
164
179
|
|
|
165
180
|
@classmethod
|
|
166
|
-
def from_pretrained(
|
|
167
|
-
|
|
168
|
-
|
|
181
|
+
def from_pretrained(
|
|
182
|
+
cls,
|
|
183
|
+
model_id: Union[str, Path],
|
|
184
|
+
export: bool = None,
|
|
185
|
+
rbln_config: Optional[Union[Dict, RBLNModelConfig]] = None,
|
|
186
|
+
**kwargs,
|
|
187
|
+
):
|
|
188
|
+
"""
|
|
189
|
+
Load an RBLN-accelerated model from a pretrained checkpoint or a compiled RBLN artifact.
|
|
190
|
+
|
|
191
|
+
This convenience method determines the concrete `RBLN*` model class that matches the
|
|
192
|
+
underlying HuggingFace architecture and dispatches to that class's
|
|
193
|
+
`from_pretrained()` implementation. Depending on whether a compiled RBLN folder is
|
|
194
|
+
detected (or if `export=True` is passed), it will either:
|
|
195
|
+
|
|
196
|
+
- Compile from a HuggingFace checkpoint to an RBLN model
|
|
197
|
+
- Or load an already-compiled RBLN model directory/repository
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
model_id:
|
|
201
|
+
HF repo id or local path. For compiled models, this should point to a directory
|
|
202
|
+
(optionally under `subfolder`) that contains `*.rbln` files and `rbln_config.json`.
|
|
203
|
+
export:
|
|
204
|
+
Force compilation from a HuggingFace checkpoint. When `None`, this is inferred by
|
|
205
|
+
checking whether compiled artifacts exist at `model_id`.
|
|
206
|
+
rbln_config:
|
|
207
|
+
RBLN compilation/runtime configuration. May be provided as a dictionary or as an
|
|
208
|
+
instance of the specific model's config class (e.g., `RBLNLlamaForCausalLMConfig`).
|
|
209
|
+
kwargs: Additional keyword arguments.
|
|
210
|
+
- Arguments prefixed with `rbln_` are forwarded to the RBLN config.
|
|
211
|
+
- Remaining arguments are forwarded to the HuggingFace loader (e.g., `revision`,
|
|
212
|
+
`token`, `trust_remote_code`, `cache_dir`, `subfolder`, `local_files_only`).
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
An instantiated RBLN model ready for inference on RBLN NPUs.
|
|
216
|
+
"""
|
|
217
|
+
rbln_cls = cls.get_rbln_cls(model_id, export=export, **kwargs)
|
|
218
|
+
return rbln_cls.from_pretrained(model_id, export=export, rbln_config=rbln_config, **kwargs)
|
|
169
219
|
|
|
170
220
|
@classmethod
|
|
171
|
-
def from_model(
|
|
221
|
+
def from_model(
|
|
222
|
+
cls,
|
|
223
|
+
model: PreTrainedModel,
|
|
224
|
+
config: Optional[PretrainedConfig] = None,
|
|
225
|
+
rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
|
|
226
|
+
**kwargs: Any,
|
|
227
|
+
) -> RBLNBaseModel:
|
|
228
|
+
"""
|
|
229
|
+
Convert and compile an in-memory HuggingFace model into an RBLN model.
|
|
230
|
+
|
|
231
|
+
This method resolves the appropriate concrete `RBLN*` class from the input model's class
|
|
232
|
+
name (e.g., `LlamaForCausalLM` -> `RBLNLlamaForCausalLM`) and then delegates to that
|
|
233
|
+
class's `from_model()` implementation.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
model: A HuggingFace model instance to convert.
|
|
237
|
+
config: The configuration object associated with the model.
|
|
238
|
+
rbln_config:
|
|
239
|
+
RBLN compilation/runtime configuration. May be provided as a dictionary or as an
|
|
240
|
+
instance of the specific model's config class.
|
|
241
|
+
kwargs: Additional keyword arguments.
|
|
242
|
+
- Arguments prefixed with `rbln_` are forwarded to the RBLN config.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
An instantiated RBLN model ready for inference on RBLN NPUs.
|
|
246
|
+
"""
|
|
172
247
|
rbln_cls = get_rbln_model_cls(f"RBLN{model.__class__.__name__}")
|
|
173
|
-
return rbln_cls.from_model(model,
|
|
248
|
+
return rbln_cls.from_model(model, config=config, rbln_config=rbln_config, **kwargs)
|
|
174
249
|
|
|
175
250
|
@staticmethod
|
|
176
|
-
def register(rbln_cls: Type[RBLNBaseModel], exist_ok=False):
|
|
251
|
+
def register(rbln_cls: Type[RBLNBaseModel], exist_ok: bool = False):
|
|
177
252
|
"""
|
|
178
253
|
Register a new RBLN model class.
|
|
179
254
|
|
|
@@ -35,8 +35,12 @@ from transformers.models.auto.modeling_auto import (
|
|
|
35
35
|
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES,
|
|
36
36
|
MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
|
|
37
37
|
MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES,
|
|
38
|
+
MODEL_FOR_TEXT_ENCODING_MAPPING,
|
|
39
|
+
MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES,
|
|
38
40
|
MODEL_FOR_VISION_2_SEQ_MAPPING,
|
|
39
41
|
MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES,
|
|
42
|
+
MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING,
|
|
43
|
+
MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES,
|
|
40
44
|
MODEL_MAPPING,
|
|
41
45
|
MODEL_MAPPING_NAMES,
|
|
42
46
|
)
|
|
@@ -53,65 +57,106 @@ MODEL_FOR_CAUSAL_LM_MAPPING_NAMES.update(
|
|
|
53
57
|
|
|
54
58
|
|
|
55
59
|
class RBLNAutoModel(_BaseAutoModelClass):
|
|
60
|
+
"""Automatically detect all supported transformers models."""
|
|
61
|
+
|
|
56
62
|
_model_mapping = MODEL_MAPPING
|
|
57
63
|
_model_mapping_names = MODEL_MAPPING_NAMES
|
|
58
64
|
|
|
59
65
|
|
|
60
66
|
class RBLNAutoModelForCTC(_BaseAutoModelClass):
|
|
67
|
+
"""Automatically detect Connectionist Temporal Classification (CTC) head Models."""
|
|
68
|
+
|
|
61
69
|
_model_mapping = MODEL_FOR_CTC_MAPPING
|
|
62
70
|
_model_mapping_names = MODEL_FOR_CTC_MAPPING_NAMES
|
|
63
71
|
|
|
64
72
|
|
|
65
73
|
class RBLNAutoModelForCausalLM(_BaseAutoModelClass):
|
|
74
|
+
"""Automatically detect Casual Language Models."""
|
|
75
|
+
|
|
76
|
+
""""""
|
|
66
77
|
_model_mapping = MODEL_FOR_CAUSAL_LM_MAPPING
|
|
67
78
|
_model_mapping_names = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
|
|
68
79
|
|
|
69
80
|
|
|
70
81
|
class RBLNAutoModelForSeq2SeqLM(_BaseAutoModelClass):
|
|
82
|
+
"""Automatically detect Sequence to Sequence Language Models."""
|
|
83
|
+
|
|
71
84
|
_model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
|
|
72
85
|
_model_mapping_names = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES
|
|
73
86
|
|
|
74
87
|
|
|
75
88
|
class RBLNAutoModelForSpeechSeq2Seq(_BaseAutoModelClass):
|
|
89
|
+
"""Automatically detect Sequence to Sequence Generation Models."""
|
|
90
|
+
|
|
76
91
|
_model_mapping = MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING
|
|
77
92
|
_model_mapping_names = MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES
|
|
78
93
|
|
|
79
94
|
|
|
80
95
|
class RBLNAutoModelForDepthEstimation(_BaseAutoModelClass):
|
|
96
|
+
"""Automatically detect Speech Sequence to Sequence Language Models."""
|
|
97
|
+
|
|
81
98
|
_model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING
|
|
82
99
|
_model_mapping_names = MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES
|
|
83
100
|
|
|
84
101
|
|
|
85
102
|
class RBLNAutoModelForSequenceClassification(_BaseAutoModelClass):
|
|
103
|
+
"""Automatically detect Sequence Classification Models."""
|
|
104
|
+
|
|
86
105
|
_model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
|
|
87
106
|
_model_mapping_names = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES
|
|
88
107
|
|
|
89
108
|
|
|
90
109
|
class RBLNAutoModelForVision2Seq(_BaseAutoModelClass):
|
|
110
|
+
"""Automatically detect Vision to Sequence Generation Models."""
|
|
111
|
+
|
|
91
112
|
_model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
|
|
92
113
|
_model_mapping_names = MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES
|
|
93
114
|
|
|
94
115
|
|
|
95
116
|
class RBLNAutoModelForImageTextToText(_BaseAutoModelClass):
|
|
117
|
+
"""Automatically detect Image and Text to Text Generation Models."""
|
|
118
|
+
|
|
96
119
|
_model_mapping = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING
|
|
97
120
|
_model_mapping_names = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES
|
|
98
121
|
|
|
99
122
|
|
|
100
123
|
class RBLNAutoModelForMaskedLM(_BaseAutoModelClass):
|
|
124
|
+
"""Automatically detect Masked Lanuage Models."""
|
|
125
|
+
|
|
101
126
|
_model_mapping = MODEL_FOR_MASKED_LM_MAPPING
|
|
102
127
|
_model_mapping_names = MODEL_FOR_MASKED_LM_MAPPING_NAMES
|
|
103
128
|
|
|
104
129
|
|
|
105
130
|
class RBLNAutoModelForAudioClassification(_BaseAutoModelClass):
|
|
131
|
+
"""Automatically detect Audio Classification Models."""
|
|
132
|
+
|
|
106
133
|
_model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
|
|
107
134
|
_model_mapping_names = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES
|
|
108
135
|
|
|
109
136
|
|
|
110
137
|
class RBLNAutoModelForImageClassification(_BaseAutoModelClass):
|
|
138
|
+
"""Automatically detect Image Classification Models."""
|
|
139
|
+
|
|
111
140
|
_model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
|
|
112
141
|
_model_mapping_names = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES
|
|
113
142
|
|
|
114
143
|
|
|
115
144
|
class RBLNAutoModelForQuestionAnswering(_BaseAutoModelClass):
|
|
145
|
+
"""Automatically detect Question Answering Models."""
|
|
146
|
+
|
|
116
147
|
_model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
|
|
117
148
|
_model_mapping_names = MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class RBLNAutoModelForTextEncoding(_BaseAutoModelClass):
|
|
152
|
+
"""Automatically detect Text Encoding Models."""
|
|
153
|
+
|
|
154
|
+
_model_mapping = MODEL_FOR_TEXT_ENCODING_MAPPING
|
|
155
|
+
_model_mapping_names = MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class RBLNAutoModelForZeroShotObjectDetection(_BaseAutoModelClass):
|
|
159
|
+
"""Automatically detect Zero Shot Object Detection Models."""
|
|
160
|
+
|
|
161
|
+
_model_mapping = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING
|
|
162
|
+
_model_mapping_names = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES
|
|
@@ -16,9 +16,7 @@ from typing import Tuple
|
|
|
16
16
|
|
|
17
17
|
import torch
|
|
18
18
|
from torch import nn
|
|
19
|
-
from transformers.modeling_attn_mask_utils import
|
|
20
|
-
_prepare_4d_attention_mask,
|
|
21
|
-
)
|
|
19
|
+
from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask
|
|
22
20
|
from transformers.utils import logging
|
|
23
21
|
|
|
24
22
|
from ..seq2seq.seq2seq_architecture import (
|