optimum-rbln 0.8.1a6__py3-none-any.whl → 0.8.1a7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +0 -4
- optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +0 -2
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +0 -4
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +0 -4
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +0 -4
- optimum/rbln/diffusers/modeling_diffusers.py +16 -18
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +6 -1
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +13 -3
- optimum/rbln/diffusers/models/autoencoders/vq_model.py +6 -1
- optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +70 -14
- optimum/rbln/modeling.py +38 -2
- optimum/rbln/modeling_base.py +18 -2
- optimum/rbln/transformers/modeling_generic.py +3 -3
- optimum/rbln/transformers/models/bart/configuration_bart.py +12 -2
- optimum/rbln/transformers/models/bart/modeling_bart.py +16 -7
- optimum/rbln/transformers/models/bert/configuration_bert.py +18 -3
- optimum/rbln/transformers/models/bert/modeling_bert.py +24 -0
- optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +13 -1
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +15 -0
- optimum/rbln/transformers/models/clip/configuration_clip.py +12 -2
- optimum/rbln/transformers/models/clip/modeling_clip.py +27 -1
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +22 -20
- optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +6 -1
- optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +8 -0
- optimum/rbln/transformers/models/dpt/configuration_dpt.py +6 -1
- optimum/rbln/transformers/models/dpt/modeling_dpt.py +6 -1
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +5 -3
- optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +8 -0
- optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +16 -0
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +8 -0
- optimum/rbln/transformers/models/resnet/configuration_resnet.py +6 -1
- optimum/rbln/transformers/models/resnet/modeling_resnet.py +5 -1
- optimum/rbln/transformers/models/roberta/configuration_roberta.py +12 -2
- optimum/rbln/transformers/models/roberta/modeling_roberta.py +16 -0
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +6 -2
- optimum/rbln/transformers/models/siglip/configuration_siglip.py +7 -0
- optimum/rbln/transformers/models/siglip/modeling_siglip.py +7 -0
- optimum/rbln/transformers/models/t5/configuration_t5.py +12 -2
- optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +7 -0
- optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +6 -2
- optimum/rbln/transformers/models/vit/configuration_vit.py +6 -1
- optimum/rbln/transformers/models/vit/modeling_vit.py +7 -1
- optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +7 -0
- optimum/rbln/transformers/models/whisper/configuration_whisper.py +7 -0
- optimum/rbln/transformers/models/whisper/modeling_whisper.py +6 -2
- optimum/rbln/utils/runtime_utils.py +46 -1
- {optimum_rbln-0.8.1a6.dist-info → optimum_rbln-0.8.1a7.dist-info}/METADATA +1 -1
- {optimum_rbln-0.8.1a6.dist-info → optimum_rbln-0.8.1a7.dist-info}/RECORD +51 -51
- {optimum_rbln-0.8.1a6.dist-info → optimum_rbln-0.8.1a7.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.8.1a6.dist-info → optimum_rbln-0.8.1a7.dist-info}/licenses/LICENSE +0 -0
@@ -43,6 +43,13 @@ class _TextEncoder(torch.nn.Module):
|
|
43
43
|
|
44
44
|
|
45
45
|
class RBLNCLIPTextModel(RBLNModel):
|
46
|
+
"""
|
47
|
+
RBLN optimized CLIP text encoder model.
|
48
|
+
|
49
|
+
This class provides hardware-accelerated inference for CLIP text encoders
|
50
|
+
on RBLN devices, supporting text encoding for multimodal tasks.
|
51
|
+
"""
|
52
|
+
|
46
53
|
@classmethod
|
47
54
|
def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPTextModelConfig) -> torch.nn.Module:
|
48
55
|
return _TextEncoder(model).eval()
|
@@ -95,7 +102,12 @@ class RBLNCLIPTextModel(RBLNModel):
|
|
95
102
|
|
96
103
|
|
97
104
|
class RBLNCLIPTextModelWithProjection(RBLNCLIPTextModel):
|
98
|
-
|
105
|
+
"""
|
106
|
+
RBLN optimized CLIP text encoder model with projection layer.
|
107
|
+
|
108
|
+
This class extends RBLNCLIPTextModel with a projection layer for
|
109
|
+
multimodal embedding alignment tasks.
|
110
|
+
"""
|
99
111
|
|
100
112
|
|
101
113
|
class _VisionEncoder(torch.nn.Module):
|
@@ -109,6 +121,13 @@ class _VisionEncoder(torch.nn.Module):
|
|
109
121
|
|
110
122
|
|
111
123
|
class RBLNCLIPVisionModel(RBLNModel):
|
124
|
+
"""
|
125
|
+
RBLN optimized CLIP vision encoder model.
|
126
|
+
|
127
|
+
This class provides hardware-accelerated inference for CLIP vision encoders
|
128
|
+
on RBLN devices, supporting image encoding for multimodal tasks.
|
129
|
+
"""
|
130
|
+
|
112
131
|
@classmethod
|
113
132
|
def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPVisionModelConfig) -> torch.nn.Module:
|
114
133
|
return _VisionEncoder(model).eval()
|
@@ -182,6 +201,13 @@ class RBLNCLIPVisionModel(RBLNModel):
|
|
182
201
|
|
183
202
|
|
184
203
|
class RBLNCLIPVisionModelWithProjection(RBLNCLIPVisionModel):
|
204
|
+
"""
|
205
|
+
RBLN optimized CLIP vision encoder model with projection layer.
|
206
|
+
|
207
|
+
This class extends RBLNCLIPVisionModel with a projection layer for
|
208
|
+
multimodal embedding alignment tasks.
|
209
|
+
"""
|
210
|
+
|
185
211
|
def forward(
|
186
212
|
self,
|
187
213
|
pixel_values: Optional[torch.FloatTensor] = None,
|
@@ -78,7 +78,7 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
|
|
78
78
|
torch.ones(1, 1, self.rbln_config.prefill_chunk_size, self.rbln_config.prefill_chunk_size), diagonal=1
|
79
79
|
)
|
80
80
|
|
81
|
-
def get_block_tables(self, cache_position: torch.Tensor, batch_idx: int = None):
|
81
|
+
def get_block_tables(self, cache_position: torch.Tensor, batch_idx: int = None) -> torch.Tensor:
|
82
82
|
"""
|
83
83
|
Manages and returns the KV cache block tables.
|
84
84
|
Updates the block tables based on the given cache_position, allocating new blocks or reusing existing ones as needed.
|
@@ -88,7 +88,7 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
|
|
88
88
|
batch_idx (int, optional): Specific batch index, used when phase is 'prefill'.
|
89
89
|
|
90
90
|
Returns:
|
91
|
-
|
91
|
+
Updated block tables.
|
92
92
|
"""
|
93
93
|
|
94
94
|
NO_BLOCKS_ERROR = (
|
@@ -458,6 +458,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
458
458
|
This class serves as the foundation for various decoder-only architectures like GPT, LLaMA, etc.
|
459
459
|
|
460
460
|
The class provides core functionality for:
|
461
|
+
|
461
462
|
1. Converting pre-trained transformer models to RBLN-optimized format
|
462
463
|
2. Handling the compilation process for RBLN devices
|
463
464
|
3. Managing inference operations for causal language modeling
|
@@ -532,7 +533,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
532
533
|
@classmethod
|
533
534
|
def save_torch_artifacts(
|
534
535
|
cls,
|
535
|
-
model:
|
536
|
+
model: PreTrainedModel,
|
536
537
|
save_dir_path: Path,
|
537
538
|
subfolder: str,
|
538
539
|
rbln_config: RBLNDecoderOnlyModelForCausalLMConfig,
|
@@ -566,7 +567,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
566
567
|
def get_quantized_model(
|
567
568
|
cls,
|
568
569
|
model_id: str,
|
569
|
-
config: Optional[
|
570
|
+
config: Optional[PretrainedConfig] = None,
|
570
571
|
use_auth_token: Optional[Union[bool, str]] = None,
|
571
572
|
revision: Optional[str] = None,
|
572
573
|
force_download: bool = False,
|
@@ -605,16 +606,15 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
605
606
|
return model
|
606
607
|
|
607
608
|
def __getattr__(self, __name: str) -> Any:
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
1. For methods: Creates a wrapper that properly binds 'self' to method calls
|
616
|
-
2. For other attributes: Returns them directly from the original class
|
617
|
-
"""
|
609
|
+
# Special method to delegate attribute access to the original Huggingface LM class.
|
610
|
+
# This method is called when an attribute is not found in the current instance's dictionary.
|
611
|
+
# It enables transparent access to the original model's attributes and methods while maintaining
|
612
|
+
# proper method binding.
|
613
|
+
|
614
|
+
# The method implements a delegation pattern that:
|
615
|
+
|
616
|
+
# 1. For methods: Creates a wrapper that properly binds 'self' to method calls
|
617
|
+
# 2. For other attributes: Returns them directly from the original class
|
618
618
|
|
619
619
|
def redirect(func):
|
620
620
|
return lambda *pargs, **kwargs: func(self, *pargs, **kwargs)
|
@@ -627,7 +627,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
627
627
|
@classmethod
|
628
628
|
def get_pytorch_model(
|
629
629
|
cls, *args, rbln_config: Optional[RBLNDecoderOnlyModelForCausalLMConfig] = None, **kwargs
|
630
|
-
) ->
|
630
|
+
) -> PreTrainedModel:
|
631
631
|
if rbln_config and rbln_config.quantization:
|
632
632
|
model = cls.get_quantized_model(*args, **kwargs)
|
633
633
|
else:
|
@@ -636,7 +636,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
636
636
|
return model
|
637
637
|
|
638
638
|
@classmethod
|
639
|
-
def wrap_model_if_needed(cls, model:
|
639
|
+
def wrap_model_if_needed(cls, model: PreTrainedModel, rbln_config: "RBLNDecoderOnlyModelForCausalLMConfig"):
|
640
640
|
wrapper_cfg = {
|
641
641
|
"max_seq_len": rbln_config.max_seq_len,
|
642
642
|
"attn_impl": rbln_config.attn_impl,
|
@@ -654,7 +654,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
654
654
|
|
655
655
|
@classmethod
|
656
656
|
@torch.inference_mode()
|
657
|
-
def get_compiled_model(cls, model:
|
657
|
+
def get_compiled_model(cls, model: PreTrainedModel, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig):
|
658
658
|
wrapped_model = cls.wrap_model_if_needed(model, rbln_config)
|
659
659
|
|
660
660
|
rbln_compile_configs = rbln_config.compile_cfgs
|
@@ -679,9 +679,11 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
679
679
|
quantization.maybe_set_quantization_env()
|
680
680
|
original_linear = torch.nn.functional.linear
|
681
681
|
torch.nn.functional.linear = torch.ops.rbln_custom_ops.linear
|
682
|
-
compiled_model =
|
682
|
+
compiled_model = cls.compile(
|
683
683
|
wrapped_model,
|
684
684
|
compile_config,
|
685
|
+
create_runtimes=rbln_config.create_runtimes,
|
686
|
+
device=rbln_config.device,
|
685
687
|
example_inputs=example_inputs,
|
686
688
|
compile_context=compile_context,
|
687
689
|
)
|
@@ -973,8 +975,8 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
973
975
|
def _update_rbln_config(
|
974
976
|
cls,
|
975
977
|
preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]] = None,
|
976
|
-
model: Optional[
|
977
|
-
model_config: Optional[
|
978
|
+
model: Optional[PreTrainedModel] = None,
|
979
|
+
model_config: Optional[PretrainedConfig] = None,
|
978
980
|
rbln_config: Optional[RBLNDecoderOnlyModelForCausalLMConfig] = None,
|
979
981
|
) -> RBLNDecoderOnlyModelForCausalLMConfig:
|
980
982
|
if rbln_config.max_seq_len is None:
|
@@ -16,4 +16,9 @@ from ...configuration_generic import RBLNModelForQuestionAnsweringConfig
|
|
16
16
|
|
17
17
|
|
18
18
|
class RBLNDistilBertForQuestionAnsweringConfig(RBLNModelForQuestionAnsweringConfig):
|
19
|
-
""
|
19
|
+
"""
|
20
|
+
Configuration class for RBLNDistilBertForQuestionAnswering.
|
21
|
+
|
22
|
+
This configuration class stores the configuration parameters specific to
|
23
|
+
RBLN-optimized DistilBERT models for question answering tasks.
|
24
|
+
"""
|
@@ -16,4 +16,12 @@ from ...modeling_generic import RBLNModelForQuestionAnswering
|
|
16
16
|
|
17
17
|
|
18
18
|
class RBLNDistilBertForQuestionAnswering(RBLNModelForQuestionAnswering):
|
19
|
+
"""
|
20
|
+
RBLN optimized DistilBERT model for question answering tasks.
|
21
|
+
|
22
|
+
This class provides hardware-accelerated inference for DistilBERT models
|
23
|
+
on RBLN devices, supporting extractive question answering tasks where
|
24
|
+
the model predicts start and end positions of answers in text.
|
25
|
+
"""
|
26
|
+
|
19
27
|
rbln_model_input_names = ["input_ids", "attention_mask"]
|
@@ -16,4 +16,9 @@ from ...configuration_generic import RBLNModelForDepthEstimationConfig
|
|
16
16
|
|
17
17
|
|
18
18
|
class RBLNDPTForDepthEstimationConfig(RBLNModelForDepthEstimationConfig):
|
19
|
-
|
19
|
+
"""
|
20
|
+
Configuration class for RBLNDPTForDepthEstimation.
|
21
|
+
|
22
|
+
This configuration class stores the configuration parameters specific to
|
23
|
+
RBLN-optimized DPT (Dense Prediction Transformer) models for depth estimation tasks.
|
24
|
+
"""
|
@@ -17,4 +17,9 @@ from ...modeling_generic import RBLNModelForDepthEstimation
|
|
17
17
|
|
18
18
|
|
19
19
|
class RBLNDPTForDepthEstimation(RBLNModelForDepthEstimation):
|
20
|
-
|
20
|
+
"""
|
21
|
+
RBLN optimized DPT model for depth estimation tasks.
|
22
|
+
|
23
|
+
This class provides hardware-accelerated inference for DPT (Dense Prediction Transformer)
|
24
|
+
models on RBLN devices, supporting monocular depth estimation from single images.
|
25
|
+
"""
|
@@ -326,7 +326,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
|
|
326
326
|
attention_mask: torch.Tensor,
|
327
327
|
position_ids: torch.Tensor,
|
328
328
|
token_type_ids: Optional[torch.Tensor] = None,
|
329
|
-
):
|
329
|
+
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, int, torch.Tensor]:
|
330
330
|
"""
|
331
331
|
Pads inputs, attention_mask, and position_ids so image token groups (256 tokens with token_type_ids == 1)
|
332
332
|
start at multiples of prefill_chunk_size (256). Returns padded tensors and total padded length.
|
@@ -338,7 +338,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
|
|
338
338
|
token_type_ids: (1, seq_len) tensor, 0 for text, 1 for image.
|
339
339
|
|
340
340
|
Returns:
|
341
|
-
|
341
|
+
(inputs_padded, attention_mask_padded, position_ids_padded, padded_len, token_type_ids_padded).
|
342
342
|
"""
|
343
343
|
|
344
344
|
if token_type_ids is None:
|
@@ -816,9 +816,11 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
|
816
816
|
quantization.maybe_set_quantization_env()
|
817
817
|
original_linear = torch.nn.functional.linear
|
818
818
|
torch.nn.functional.linear = torch.ops.rbln_custom_ops.linear
|
819
|
-
compiled_model =
|
819
|
+
compiled_model = cls.compile(
|
820
820
|
wrapped_model,
|
821
821
|
compile_config,
|
822
|
+
create_runtimes=rbln_config.create_runtimes,
|
823
|
+
device=rbln_config.device,
|
822
824
|
example_inputs=example_inputs,
|
823
825
|
compile_context=compile_context,
|
824
826
|
)
|
@@ -18,6 +18,14 @@ from ....configuration_utils import RBLNModelConfig
|
|
18
18
|
|
19
19
|
|
20
20
|
class RBLNLlavaNextForConditionalGenerationConfig(RBLNModelConfig):
|
21
|
+
"""
|
22
|
+
Configuration class for RBLNLlavaNextForConditionalGeneration.
|
23
|
+
|
24
|
+
This configuration class stores the configuration parameters specific to
|
25
|
+
RBLN-optimized LLaVA-Next models for multimodal conditional generation tasks
|
26
|
+
that combine vision and language processing capabilities.
|
27
|
+
"""
|
28
|
+
|
21
29
|
submodules = ["vision_tower", "language_model"]
|
22
30
|
|
23
31
|
def __init__(
|
@@ -19,6 +19,14 @@ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausa
|
|
19
19
|
|
20
20
|
|
21
21
|
class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausalLMConfig):
|
22
|
+
"""
|
23
|
+
Configuration class for RBLNQwen2_5_VLForConditionalGeneration.
|
24
|
+
|
25
|
+
This configuration class stores the configuration parameters specific to
|
26
|
+
RBLN-optimized Qwen2.5-VL models for multimodal conditional generation tasks
|
27
|
+
that combine vision and language processing capabilities.
|
28
|
+
"""
|
29
|
+
|
22
30
|
submodules = ["visual"]
|
23
31
|
|
24
32
|
def __init__(
|
@@ -37,6 +45,14 @@ class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausal
|
|
37
45
|
|
38
46
|
|
39
47
|
class RBLNQwen2_5_VisionTransformerPretrainedModelConfig(RBLNModelConfig):
|
48
|
+
"""
|
49
|
+
Configuration class for RBLNQwen2_5_VisionTransformerPretrainedModel.
|
50
|
+
|
51
|
+
This configuration class stores the configuration parameters specific to
|
52
|
+
RBLN-optimized Qwen2.5-VL vision transformer models with window-based attention
|
53
|
+
mechanisms for processing images and videos.
|
54
|
+
"""
|
55
|
+
|
40
56
|
def __init__(self, max_seq_lens: Union[int, List[int]] = None, **kwargs: Dict[str, Any]):
|
41
57
|
"""
|
42
58
|
Args:
|
@@ -54,6 +54,14 @@ if TYPE_CHECKING:
|
|
54
54
|
|
55
55
|
|
56
56
|
class RBLNQwen2_5_VisionTransformerPretrainedModel(RBLNModel):
|
57
|
+
"""
|
58
|
+
RBLN optimized Qwen2.5-VL vision transformer model.
|
59
|
+
|
60
|
+
This class provides hardware-accelerated inference for Qwen2.5-VL vision transformers
|
61
|
+
on RBLN devices, supporting image and video encoding for multimodal vision-language tasks
|
62
|
+
with window-based attention mechanisms.
|
63
|
+
"""
|
64
|
+
|
57
65
|
auto_model_class = None
|
58
66
|
|
59
67
|
def __post_init__(self, **kwargs):
|
@@ -17,4 +17,9 @@ from ...configuration_generic import RBLNModelForImageClassificationConfig
|
|
17
17
|
|
18
18
|
|
19
19
|
class RBLNResNetForImageClassificationConfig(RBLNModelForImageClassificationConfig):
|
20
|
-
""
|
20
|
+
"""
|
21
|
+
Configuration class for RBLNResNetForImageClassification.
|
22
|
+
|
23
|
+
This configuration class stores the configuration parameters specific to
|
24
|
+
RBLN-optimized ResNet models for image classification tasks.
|
25
|
+
"""
|
@@ -18,5 +18,9 @@ from ...modeling_generic import RBLNModelForImageClassification
|
|
18
18
|
|
19
19
|
class RBLNResNetForImageClassification(RBLNModelForImageClassification):
|
20
20
|
"""
|
21
|
-
ResNet model for image classification tasks
|
21
|
+
RBLN optimized ResNet model for image classification tasks.
|
22
|
+
|
23
|
+
This class provides hardware-accelerated inference for ResNet models
|
24
|
+
on RBLN devices, supporting image classification with convolutional neural networks
|
25
|
+
designed for computer vision tasks.
|
22
26
|
"""
|
@@ -16,8 +16,18 @@ from ...configuration_generic import RBLNModelForMaskedLMConfig, RBLNModelForSeq
|
|
16
16
|
|
17
17
|
|
18
18
|
class RBLNRobertaForMaskedLMConfig(RBLNModelForMaskedLMConfig):
|
19
|
-
""
|
19
|
+
"""
|
20
|
+
Configuration class for RBLNRobertaForMaskedLM.
|
21
|
+
|
22
|
+
This configuration class stores the configuration parameters specific to
|
23
|
+
RBLN-optimized RoBERTa models for masked language modeling tasks.
|
24
|
+
"""
|
20
25
|
|
21
26
|
|
22
27
|
class RBLNRobertaForSequenceClassificationConfig(RBLNModelForSequenceClassificationConfig):
|
23
|
-
""
|
28
|
+
"""
|
29
|
+
Configuration class for RBLNRobertaForSequenceClassification.
|
30
|
+
|
31
|
+
This configuration class stores the configuration parameters specific to
|
32
|
+
RBLN-optimized RoBERTa models for sequence classification tasks.
|
33
|
+
"""
|
@@ -16,8 +16,24 @@ from ...modeling_generic import RBLNModelForMaskedLM, RBLNModelForSequenceClassi
|
|
16
16
|
|
17
17
|
|
18
18
|
class RBLNRobertaForMaskedLM(RBLNModelForMaskedLM):
|
19
|
+
"""
|
20
|
+
RBLN optimized RoBERTa model for masked language modeling tasks.
|
21
|
+
|
22
|
+
This class provides hardware-accelerated inference for RoBERTa models
|
23
|
+
on RBLN devices, supporting masked language modeling tasks such as
|
24
|
+
token prediction and text completion.
|
25
|
+
"""
|
26
|
+
|
19
27
|
rbln_model_input_names = ["input_ids", "attention_mask"]
|
20
28
|
|
21
29
|
|
22
30
|
class RBLNRobertaForSequenceClassification(RBLNModelForSequenceClassification):
|
31
|
+
"""
|
32
|
+
RBLN optimized RoBERTa model for sequence classification tasks.
|
33
|
+
|
34
|
+
This class provides hardware-accelerated inference for RoBERTa models
|
35
|
+
on RBLN devices, supporting text classification tasks such as sentiment analysis,
|
36
|
+
topic classification, and other sequence-level prediction tasks.
|
37
|
+
"""
|
38
|
+
|
23
39
|
rbln_model_input_names = ["input_ids", "attention_mask"]
|
@@ -161,16 +161,20 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
|
|
161
161
|
if "key_value_states" in name:
|
162
162
|
context.mark_static_address(tensor)
|
163
163
|
|
164
|
-
compiled_encoder =
|
164
|
+
compiled_encoder = cls.compile(
|
165
165
|
wrapped_model.encoder,
|
166
166
|
enc_compile_config,
|
167
|
+
create_runtimes=rbln_config.create_runtimes,
|
168
|
+
device=rbln_config.device,
|
167
169
|
example_inputs=enc_example_inputs,
|
168
170
|
compile_context=context,
|
169
171
|
)
|
170
172
|
|
171
|
-
compiled_decoder =
|
173
|
+
compiled_decoder = cls.compile(
|
172
174
|
wrapped_model.decoder,
|
173
175
|
dec_compile_config,
|
176
|
+
create_runtimes=rbln_config.create_runtimes,
|
177
|
+
device=rbln_config.device,
|
174
178
|
example_inputs=dec_example_inputs,
|
175
179
|
compile_context=context,
|
176
180
|
)
|
@@ -18,6 +18,13 @@ from ....configuration_utils import RBLNModelConfig
|
|
18
18
|
|
19
19
|
|
20
20
|
class RBLNSiglipVisionModelConfig(RBLNModelConfig):
|
21
|
+
"""
|
22
|
+
Configuration class for RBLNSiglipVisionModel.
|
23
|
+
|
24
|
+
This configuration class stores the configuration parameters specific to
|
25
|
+
RBLN-optimized SigLIP vision models for image encoding in multimodal tasks.
|
26
|
+
"""
|
27
|
+
|
21
28
|
def __init__(
|
22
29
|
self,
|
23
30
|
batch_size: Optional[int] = None,
|
@@ -58,6 +58,13 @@ class _SiglipVisionModel(torch.nn.Module):
|
|
58
58
|
|
59
59
|
|
60
60
|
class RBLNSiglipVisionModel(RBLNModel):
|
61
|
+
"""
|
62
|
+
RBLN optimized SigLIP vision model.
|
63
|
+
|
64
|
+
This class provides hardware-accelerated inference for SigLIP vision models
|
65
|
+
on RBLN devices, supporting image encoding for multimodal vision-language tasks.
|
66
|
+
"""
|
67
|
+
|
61
68
|
@classmethod
|
62
69
|
def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNSiglipVisionModelConfig) -> torch.nn.Module:
|
63
70
|
wrapper_cfg = {
|
@@ -17,8 +17,18 @@ from ..seq2seq import RBLNModelForSeq2SeqLMConfig
|
|
17
17
|
|
18
18
|
|
19
19
|
class RBLNT5EncoderModelConfig(RBLNTransformerEncoderForFeatureExtractionConfig):
|
20
|
-
|
20
|
+
"""
|
21
|
+
Configuration class for RBLNT5EncoderModel.
|
22
|
+
|
23
|
+
This configuration class stores the configuration parameters specific to
|
24
|
+
RBLN-optimized T5 encoder models for feature extraction tasks.
|
25
|
+
"""
|
21
26
|
|
22
27
|
|
23
28
|
class RBLNT5ForConditionalGenerationConfig(RBLNModelForSeq2SeqLMConfig):
|
24
|
-
|
29
|
+
"""
|
30
|
+
Configuration class for RBLNT5ForConditionalGeneration.
|
31
|
+
|
32
|
+
This configuration class stores the configuration parameters specific to
|
33
|
+
RBLN-optimized T5 models for conditional text generation tasks.
|
34
|
+
"""
|
optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py
CHANGED
@@ -4,6 +4,13 @@ from ....configuration_utils import RBLNModelConfig
|
|
4
4
|
|
5
5
|
|
6
6
|
class RBLNTimeSeriesTransformerForPredictionConfig(RBLNModelConfig):
|
7
|
+
"""
|
8
|
+
Configuration class for RBLNTimeSeriesTransformerForPrediction.
|
9
|
+
|
10
|
+
This configuration class stores the configuration parameters specific to
|
11
|
+
RBLN-optimized Time Series Transformer models for time series forecasting tasks.
|
12
|
+
"""
|
13
|
+
|
7
14
|
def __init__(
|
8
15
|
self,
|
9
16
|
batch_size: Optional[int] = None,
|
optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py
CHANGED
@@ -194,15 +194,19 @@ class RBLNTimeSeriesTransformerForPrediction(RBLNModel):
|
|
194
194
|
if "key_value_states" in name:
|
195
195
|
context.mark_static_address(tensor)
|
196
196
|
|
197
|
-
compiled_decoder =
|
197
|
+
compiled_decoder = cls.compile(
|
198
198
|
wrapped_model.decoder,
|
199
199
|
dec_compile_config,
|
200
|
+
create_runtimes=rbln_config.create_runtimes,
|
201
|
+
device=rbln_config.device,
|
200
202
|
example_inputs=dec_example_inputs,
|
201
203
|
compile_context=context,
|
202
204
|
)
|
203
|
-
compiled_encoder =
|
205
|
+
compiled_encoder = cls.compile(
|
204
206
|
wrapped_model.encoder,
|
205
207
|
enc_compile_config,
|
208
|
+
create_runtimes=rbln_config.create_runtimes,
|
209
|
+
device=rbln_config.device,
|
206
210
|
example_inputs=enc_example_inputs,
|
207
211
|
compile_context=context,
|
208
212
|
)
|
@@ -16,4 +16,9 @@ from ...configuration_generic import RBLNModelForImageClassificationConfig
|
|
16
16
|
|
17
17
|
|
18
18
|
class RBLNViTForImageClassificationConfig(RBLNModelForImageClassificationConfig):
|
19
|
-
""
|
19
|
+
"""
|
20
|
+
Configuration class for RBLNViTForImageClassification.
|
21
|
+
|
22
|
+
This configuration class stores the configuration parameters specific to
|
23
|
+
RBLN-optimized Vision Transformer (ViT) models for image classification tasks.
|
24
|
+
"""
|
@@ -16,4 +16,10 @@ from ...modeling_generic import RBLNModelForImageClassification
|
|
16
16
|
|
17
17
|
|
18
18
|
class RBLNViTForImageClassification(RBLNModelForImageClassification):
|
19
|
-
""
|
19
|
+
"""
|
20
|
+
RBLN optimized Vision Transformer (ViT) model for image classification tasks.
|
21
|
+
|
22
|
+
This class provides hardware-accelerated inference for Vision Transformer models
|
23
|
+
on RBLN devices, supporting image classification with transformer-based architectures
|
24
|
+
that process images as sequences of patches.
|
25
|
+
"""
|
@@ -16,4 +16,11 @@ from ...configuration_generic import RBLNModelForMaskedLMConfig
|
|
16
16
|
|
17
17
|
|
18
18
|
class RBLNWav2Vec2ForCTCConfig(RBLNModelForMaskedLMConfig):
|
19
|
+
"""
|
20
|
+
Configuration class for RBLNWav2Vec2ForCTC.
|
21
|
+
|
22
|
+
This configuration class stores the configuration parameters specific to
|
23
|
+
RBLN-optimized Wav2Vec2 models for Connectionist Temporal Classification (CTC) tasks.
|
24
|
+
"""
|
25
|
+
|
19
26
|
rbln_model_input_names = ["input_values"]
|
@@ -24,6 +24,13 @@ logger = get_logger()
|
|
24
24
|
|
25
25
|
|
26
26
|
class RBLNWhisperForConditionalGenerationConfig(RBLNModelConfig):
|
27
|
+
"""
|
28
|
+
Configuration class for RBLNWhisperForConditionalGeneration.
|
29
|
+
|
30
|
+
This configuration class stores the configuration parameters specific to
|
31
|
+
RBLN-optimized Whisper models for speech recognition and transcription tasks.
|
32
|
+
"""
|
33
|
+
|
27
34
|
def __init__(
|
28
35
|
self,
|
29
36
|
batch_size: int = None,
|
@@ -230,15 +230,19 @@ class RBLNWhisperForConditionalGeneration(RBLNModel, RBLNWhisperGenerationMixin)
|
|
230
230
|
if "key_value_states" in name:
|
231
231
|
context.mark_static_address(tensor)
|
232
232
|
|
233
|
-
compiled_encoder =
|
233
|
+
compiled_encoder = cls.compile(
|
234
234
|
wrapped_model.encoder,
|
235
235
|
enc_compile_config,
|
236
|
+
create_runtimes=rbln_config.create_runtimes,
|
237
|
+
device=rbln_config.device,
|
236
238
|
example_inputs=enc_example_inputs,
|
237
239
|
compile_context=context,
|
238
240
|
)
|
239
|
-
compiled_decoder =
|
241
|
+
compiled_decoder = cls.compile(
|
240
242
|
wrapped_model.decoder,
|
241
243
|
dec_compile_config,
|
244
|
+
create_runtimes=rbln_config.create_runtimes,
|
245
|
+
device=rbln_config.device,
|
242
246
|
example_inputs=dec_example_inputs,
|
243
247
|
compile_context=context,
|
244
248
|
)
|
@@ -13,12 +13,57 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
import threading
|
16
|
-
from typing import Any, Dict, List
|
16
|
+
from typing import Any, Dict, List, Optional, Union
|
17
17
|
|
18
18
|
import rebel
|
19
19
|
import torch
|
20
20
|
|
21
21
|
|
22
|
+
def tp_and_devices_are_ok(
|
23
|
+
tensor_parallel_size: Optional[int] = None,
|
24
|
+
device: Optional[Union[int, List[int]]] = None,
|
25
|
+
npu: Optional[str] = None,
|
26
|
+
) -> Optional[str]:
|
27
|
+
if tensor_parallel_size is None:
|
28
|
+
tensor_parallel_size = 1
|
29
|
+
|
30
|
+
if rebel.device_count() < tensor_parallel_size:
|
31
|
+
return (
|
32
|
+
f"Tensor parallel size {tensor_parallel_size} is greater than "
|
33
|
+
f"the number of available devices {rebel.device_count()}."
|
34
|
+
)
|
35
|
+
|
36
|
+
if device is None:
|
37
|
+
device = list(range(tensor_parallel_size))
|
38
|
+
elif isinstance(device, int):
|
39
|
+
device = [device]
|
40
|
+
elif isinstance(device, list):
|
41
|
+
if any(not isinstance(d, int) for d in device):
|
42
|
+
return "Device must be a(n) (list of) integer(s)."
|
43
|
+
if len(device) != tensor_parallel_size:
|
44
|
+
return (
|
45
|
+
f"The number of devices ({len(device)}) does not match tensor parallel size ({tensor_parallel_size})."
|
46
|
+
)
|
47
|
+
else:
|
48
|
+
return f"Invalid device: {device}"
|
49
|
+
|
50
|
+
for device_id in device:
|
51
|
+
if device_id < 0: # if any device is dummy device, skip it
|
52
|
+
return None
|
53
|
+
if rebel.get_npu_name(device_id) is None:
|
54
|
+
return (
|
55
|
+
f"Device {device_id} is not a valid NPU device. Please check your NPU status with 'rbln-stat' command."
|
56
|
+
)
|
57
|
+
|
58
|
+
if npu is not None:
|
59
|
+
for device_id in device:
|
60
|
+
npu_name = rebel.get_npu_name(device_id)
|
61
|
+
if npu_name != npu:
|
62
|
+
return f"Device {device_id} ({npu_name}) is not on the same NPU as {npu}."
|
63
|
+
|
64
|
+
return None
|
65
|
+
|
66
|
+
|
22
67
|
class RBLNPytorchRuntime:
|
23
68
|
mandatory_members = []
|
24
69
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: optimum-rbln
|
3
|
-
Version: 0.8.
|
3
|
+
Version: 0.8.1a7
|
4
4
|
Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
|
5
5
|
Project-URL: Homepage, https://rebellions.ai
|
6
6
|
Project-URL: Documentation, https://docs.rbln.ai
|