optimum-rbln 0.7.5a0__py3-none-any.whl → 0.7.5rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. optimum/rbln/__init__.py +30 -0
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +9 -4
  4. optimum/rbln/modeling.py +7 -5
  5. optimum/rbln/ops/__init__.py +1 -0
  6. optimum/rbln/ops/attn.py +10 -0
  7. optimum/rbln/ops/flash_attn.py +8 -0
  8. optimum/rbln/ops/sliding_window_attn.py +111 -0
  9. optimum/rbln/transformers/__init__.py +32 -3
  10. optimum/rbln/transformers/models/__init__.py +37 -0
  11. optimum/rbln/transformers/models/auto/__init__.py +1 -0
  12. optimum/rbln/transformers/models/auto/modeling_auto.py +7 -0
  13. optimum/rbln/transformers/models/blip_2/__init__.py +20 -0
  14. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +93 -0
  15. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +298 -0
  16. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +12 -6
  17. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +189 -90
  18. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +186 -95
  19. optimum/rbln/transformers/models/exaone/exaone_architecture.py +5 -1
  20. optimum/rbln/transformers/models/gemma/gemma_architecture.py +5 -1
  21. optimum/rbln/transformers/models/gemma3/__init__.py +16 -0
  22. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +69 -0
  23. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +446 -0
  24. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +1057 -0
  25. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +4 -1
  26. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +11 -7
  27. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +4 -4
  28. optimum/rbln/transformers/models/midm/midm_architecture.py +4 -1
  29. optimum/rbln/transformers/models/opt/__init__.py +16 -0
  30. optimum/rbln/transformers/models/opt/configuration_opt.py +19 -0
  31. optimum/rbln/transformers/models/opt/modeling_opt.py +80 -0
  32. optimum/rbln/transformers/models/opt/opt_architecture.py +77 -0
  33. optimum/rbln/transformers/models/phi/phi_architecture.py +4 -1
  34. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +18 -11
  35. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +35 -52
  36. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +2 -0
  37. optimum/rbln/transformers/models/siglip/__init__.py +20 -0
  38. optimum/rbln/transformers/models/siglip/configuration_siglip.py +66 -0
  39. optimum/rbln/transformers/models/siglip/modeling_siglip.py +146 -0
  40. optimum/rbln/transformers/models/whisper/whisper_architecture.py +1 -0
  41. optimum/rbln/transformers/utils/rbln_quantization.py +121 -72
  42. optimum/rbln/utils/submodule.py +13 -1
  43. {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5rc0.dist-info}/METADATA +1 -1
  44. {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5rc0.dist-info}/RECORD +46 -31
  45. {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5rc0.dist-info}/WHEEL +0 -0
  46. {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5rc0.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/__init__.py CHANGED
@@ -38,6 +38,7 @@ _import_structure = {
38
38
  "RBLNAutoModelForCTC",
39
39
  "RBLNAutoModelForDepthEstimation",
40
40
  "RBLNAutoModelForImageClassification",
41
+ "RBLNAutoModelForImageTextToText",
41
42
  "RBLNAutoModelForMaskedLM",
42
43
  "RBLNAutoModelForQuestionAnswering",
43
44
  "RBLNAutoModelForSeq2SeqLM",
@@ -54,6 +55,12 @@ _import_structure = {
54
55
  "RBLNBertForQuestionAnsweringConfig",
55
56
  "RBLNBertModel",
56
57
  "RBLNBertModelConfig",
58
+ "RBLNBlip2VisionModelConfig",
59
+ "RBLNBlip2VisionModel",
60
+ "RBLNBlip2QFormerModel",
61
+ "RBLNBlip2QFormerModelConfig",
62
+ "RBLNBlip2ForConditionalGeneration",
63
+ "RBLNBlip2ForConditionalGenerationConfig",
57
64
  "RBLNCLIPTextModel",
58
65
  "RBLNCLIPTextModelConfig",
59
66
  "RBLNCLIPTextModelWithProjection",
@@ -72,6 +79,10 @@ _import_structure = {
72
79
  "RBLNExaoneForCausalLMConfig",
73
80
  "RBLNGemmaForCausalLM",
74
81
  "RBLNGemmaForCausalLMConfig",
82
+ "RBLNGemma3ForCausalLM",
83
+ "RBLNGemma3ForCausalLMConfig",
84
+ "RBLNGemma3ForConditionalGeneration",
85
+ "RBLNGemma3ForConditionalGenerationConfig",
75
86
  "RBLNGPT2LMHeadModel",
76
87
  "RBLNGPT2LMHeadModelConfig",
77
88
  "RBLNIdefics3VisionTransformer",
@@ -80,6 +91,8 @@ _import_structure = {
80
91
  "RBLNIdefics3VisionTransformerConfig",
81
92
  "RBLNLlamaForCausalLM",
82
93
  "RBLNLlamaForCausalLMConfig",
94
+ "RBLNOPTForCausalLM",
95
+ "RBLNOPTForCausalLMConfig",
83
96
  "RBLNLlavaNextForConditionalGeneration",
84
97
  "RBLNLlavaNextForConditionalGenerationConfig",
85
98
  "RBLNMidmLMHeadModel",
@@ -100,6 +113,8 @@ _import_structure = {
100
113
  "RBLNRobertaForMaskedLMConfig",
101
114
  "RBLNRobertaForSequenceClassification",
102
115
  "RBLNRobertaForSequenceClassificationConfig",
116
+ "RBLNSiglipVisionModel",
117
+ "RBLNSiglipVisionModelConfig",
103
118
  "RBLNT5EncoderModel",
104
119
  "RBLNT5EncoderModelConfig",
105
120
  "RBLNT5ForConditionalGeneration",
@@ -249,6 +264,7 @@ if TYPE_CHECKING:
249
264
  RBLNAutoModelForCTC,
250
265
  RBLNAutoModelForDepthEstimation,
251
266
  RBLNAutoModelForImageClassification,
267
+ RBLNAutoModelForImageTextToText,
252
268
  RBLNAutoModelForMaskedLM,
253
269
  RBLNAutoModelForQuestionAnswering,
254
270
  RBLNAutoModelForSeq2SeqLM,
@@ -265,6 +281,12 @@ if TYPE_CHECKING:
265
281
  RBLNBertForQuestionAnsweringConfig,
266
282
  RBLNBertModel,
267
283
  RBLNBertModelConfig,
284
+ RBLNBlip2ForConditionalGeneration,
285
+ RBLNBlip2ForConditionalGenerationConfig,
286
+ RBLNBlip2QFormerModel,
287
+ RBLNBlip2QFormerModelConfig,
288
+ RBLNBlip2VisionModel,
289
+ RBLNBlip2VisionModelConfig,
268
290
  RBLNCLIPTextModel,
269
291
  RBLNCLIPTextModelConfig,
270
292
  RBLNCLIPTextModelWithProjection,
@@ -281,6 +303,10 @@ if TYPE_CHECKING:
281
303
  RBLNDPTForDepthEstimationConfig,
282
304
  RBLNExaoneForCausalLM,
283
305
  RBLNExaoneForCausalLMConfig,
306
+ RBLNGemma3ForCausalLM,
307
+ RBLNGemma3ForCausalLMConfig,
308
+ RBLNGemma3ForConditionalGeneration,
309
+ RBLNGemma3ForConditionalGenerationConfig,
284
310
  RBLNGemmaForCausalLM,
285
311
  RBLNGemmaForCausalLMConfig,
286
312
  RBLNGPT2LMHeadModel,
@@ -297,6 +323,8 @@ if TYPE_CHECKING:
297
323
  RBLNMidmLMHeadModelConfig,
298
324
  RBLNMistralForCausalLM,
299
325
  RBLNMistralForCausalLMConfig,
326
+ RBLNOPTForCausalLM,
327
+ RBLNOPTForCausalLMConfig,
300
328
  RBLNPhiForCausalLM,
301
329
  RBLNPhiForCausalLMConfig,
302
330
  RBLNQwen2_5_VisionTransformerPretrainedModel,
@@ -311,6 +339,8 @@ if TYPE_CHECKING:
311
339
  RBLNRobertaForMaskedLMConfig,
312
340
  RBLNRobertaForSequenceClassification,
313
341
  RBLNRobertaForSequenceClassificationConfig,
342
+ RBLNSiglipVisionModel,
343
+ RBLNSiglipVisionModelConfig,
314
344
  RBLNT5EncoderModel,
315
345
  RBLNT5EncoderModelConfig,
316
346
  RBLNT5ForConditionalGeneration,
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.7.5a0'
21
- __version_tuple__ = version_tuple = (0, 7, 5, 'a0')
20
+ __version__ = version = '0.7.5rc0'
21
+ __version_tuple__ = version_tuple = (0, 7, 5, 'rc0')
@@ -17,7 +17,7 @@ import inspect
17
17
  import json
18
18
  from dataclasses import asdict, dataclass
19
19
  from pathlib import Path
20
- from typing import Any, Dict, List, Optional, Tuple, Type, Union
20
+ from typing import Any, Dict, List, Optional, Protocol, Tuple, Type, Union, runtime_checkable
21
21
 
22
22
  import torch
23
23
 
@@ -34,6 +34,11 @@ DEFAULT_MOD_NAME = "default"
34
34
  TypeInputInfo = List[Tuple[str, Tuple[int], str]]
35
35
 
36
36
 
37
+ @runtime_checkable
38
+ class RBLNSerializableConfigProtocol(Protocol):
39
+ def _prepare_for_serialization(self) -> Dict[str, Any]: ...
40
+
41
+
37
42
  @dataclass
38
43
  class RBLNCompileConfig:
39
44
  """
@@ -234,7 +239,7 @@ class RBLNAutoConfig:
234
239
  return cls(**config_file)
235
240
 
236
241
 
237
- class RBLNModelConfig:
242
+ class RBLNModelConfig(RBLNSerializableConfigProtocol):
238
243
  """Base configuration class for RBLN models that handles compilation settings, runtime options, and submodules.
239
244
 
240
245
  This class provides functionality for:
@@ -594,14 +599,14 @@ class RBLNModelConfig:
594
599
  )
595
600
  return rbln_model_cls
596
601
 
597
- def _prepare_for_serialization(self):
602
+ def _prepare_for_serialization(self) -> Dict[str, Any]:
598
603
  """
599
604
  Prepare the attributes map for serialization by converting nested RBLNModelConfig
600
605
  objects to their serializable form.
601
606
  """
602
607
  serializable_map = {}
603
608
  for key, value in self._attributes_map.items():
604
- if isinstance(value, RBLNModelConfig):
609
+ if isinstance(value, RBLNSerializableConfigProtocol):
605
610
  # Convert nested RBLNModelConfig to its serializable form
606
611
  serializable_map[key] = value._prepare_for_serialization()
607
612
  elif key == "_compile_cfgs":
optimum/rbln/modeling.py CHANGED
@@ -56,11 +56,7 @@ class RBLNModel(RBLNBaseModel):
56
56
  def update_kwargs(cls, kwargs):
57
57
  """
58
58
  Update user-given kwargs to get proper pytorch model.
59
-
60
- For example, `torchscript`=True should be set because torch.jit
61
- does not support `transformers` output instances as module output;
62
59
  """
63
- kwargs.update({"torchscript": True})
64
60
  return kwargs
65
61
 
66
62
  @classmethod
@@ -133,7 +129,6 @@ class RBLNModel(RBLNBaseModel):
133
129
 
134
130
  if not isinstance(config, PretrainedConfig): # diffusers config
135
131
  config = PretrainedConfig(**config)
136
- config.save_pretrained(save_dir_path / subfolder)
137
132
 
138
133
  # Save preprocessor
139
134
  for preprocessor in preprocessors:
@@ -155,6 +150,10 @@ class RBLNModel(RBLNBaseModel):
155
150
  preprocessors=preprocessors, model=model, model_config=config, rbln_config=rbln_config
156
151
  )
157
152
 
153
+ # torchscript should be True for jit to work
154
+ torchscript_backup = config.torchscript
155
+ config.torchscript = True
156
+
158
157
  compiled_model: Union[rebel.RBLNCompiledModel, Dict[str, rebel.RBLNCompiledModel]] = cls.get_compiled_model(
159
158
  model, rbln_config=rbln_config
160
159
  )
@@ -169,6 +168,9 @@ class RBLNModel(RBLNBaseModel):
169
168
  cm.save(save_dir_path / subfolder / f"{compiled_model_name}.rbln")
170
169
  rbln_config.save(save_dir_path / subfolder)
171
170
 
171
+ config.torchscript = torchscript_backup
172
+ config.save_pretrained(save_dir_path / subfolder)
173
+
172
174
  # Save torch artifacts (e.g. embedding matrix if needed.)
173
175
  cls.save_torch_artifacts(model, save_dir_path=save_dir_path, subfolder=subfolder, rbln_config=rbln_config)
174
176
 
@@ -16,3 +16,4 @@ from .attn import *
16
16
  from .flash_attn import *
17
17
  from .kv_cache_update import *
18
18
  from .linear import linear
19
+ from .sliding_window_attn import *
optimum/rbln/ops/attn.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Optional
15
16
 
16
17
  import torch
17
18
  from torch import Tensor
@@ -125,6 +126,7 @@ def paged_causal_attn_decode(
125
126
  scale: Tensor,
126
127
  block_table: Tensor,
127
128
  block_size: int,
129
+ mask: Optional[Tensor] = None,
128
130
  ) -> Tensor:
129
131
  """Defines the computation pattern for fused attention with KV cache updates.
130
132
 
@@ -147,6 +149,7 @@ def paged_causal_attn_decode(
147
149
  - scale: [] - Attention scale factor
148
150
  - block_table: [batch_size, max_seq_len // block_size] - Block indices for KV cache management
149
151
  - block_size: [] - Number of tokens per block
152
+ - mask: [batch=1, max_seq_len] - attention mask when use position_ids
150
153
 
151
154
  Returns:
152
155
  Tensor: attn_output: [batch=1, n_heads, n_groups, 1, head_dim] - Attention output
@@ -165,6 +168,7 @@ def paged_causal_attn_decode_fake(
165
168
  scale: Tensor,
166
169
  block_table: Tensor,
167
170
  block_size: int,
171
+ mask: Optional[Tensor] = None,
168
172
  ) -> Tensor:
169
173
  return torch.empty_like(q)
170
174
 
@@ -183,6 +187,8 @@ def paged_causal_attn_prefill(
183
187
  scale: Tensor,
184
188
  block_table: Tensor,
185
189
  block_size: int,
190
+ is_bidirectional: bool,
191
+ mask: Optional[Tensor] = None,
186
192
  ) -> Tensor:
187
193
  """Defines the computation pattern for prefill phase attention with KV cache updates.
188
194
 
@@ -204,6 +210,8 @@ def paged_causal_attn_prefill(
204
210
  - scale: [] - Attention scale factor
205
211
  - block_table: [batch_size, max_seq_len // block_size] - Block indices for KV cache management
206
212
  - block_size: [] - Number of tokens per block
213
+ - is_bidirectional: [] - Whether the attention is bidirectional at current sequence position
214
+ - mask: [batch=1, max_seq_len] - attention mask when use position_ids
207
215
 
208
216
  Returns:
209
217
  Tensor: attn_output: [batch=1, n_heads, n_groups, seq_len, head_dim] - Attention output
@@ -222,6 +230,8 @@ def paged_causal_attn_prefill_fake(
222
230
  scale: Tensor,
223
231
  block_table: Tensor,
224
232
  block_size: int,
233
+ is_bidirectional: bool,
234
+ mask: Optional[Tensor] = None,
225
235
  ) -> Tensor:
226
236
  return torch.empty_like(q)
227
237
 
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Optional
16
+
15
17
  import torch
16
18
  from torch import Tensor
17
19
 
@@ -113,6 +115,7 @@ def paged_flash_causal_attn_decode(
113
115
  block_table: Tensor,
114
116
  block_size: int,
115
117
  partition: int,
118
+ mask: Optional[Tensor] = None,
116
119
  ) -> Tensor:
117
120
  """Defines the computation pattern for fused causal flash attention with KV cache for decoding.
118
121
 
@@ -133,6 +136,7 @@ def paged_flash_causal_attn_decode_fake(
133
136
  block_table: Tensor,
134
137
  block_size: int,
135
138
  partition: int,
139
+ mask: Optional[Tensor] = None,
136
140
  ) -> Tensor:
137
141
  return torch.empty_like(q)
138
142
 
@@ -152,6 +156,8 @@ def paged_flash_causal_attn_prefill(
152
156
  block_table: Tensor,
153
157
  block_size: int,
154
158
  partition: int,
159
+ is_bidirectional: bool,
160
+ mask: Optional[Tensor] = None,
155
161
  ) -> Tensor:
156
162
  """Defines the computation pattern for fused causal flash attention with KV cache for prefill.
157
163
 
@@ -172,5 +178,7 @@ def paged_flash_causal_attn_prefill_fake(
172
178
  block_table: Tensor,
173
179
  block_size: int,
174
180
  partition: int,
181
+ is_bidirectional: bool,
182
+ mask: Optional[Tensor] = None,
175
183
  ) -> Tensor:
176
184
  return torch.empty_like(q)
@@ -0,0 +1,111 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import torch
17
+ from torch import Tensor
18
+
19
+
20
+ @torch.library.custom_op(
21
+ "rbln_custom_ops::paged_sliding_window_attn_prefill",
22
+ mutates_args=(["kcache", "vcache"]),
23
+ )
24
+ def paged_sliding_window_attn_prefill(
25
+ q: Tensor,
26
+ k: Tensor,
27
+ v: Tensor,
28
+ kcache: Tensor,
29
+ vcache: Tensor,
30
+ cache_seq_len: Tensor,
31
+ cache_offset: Tensor,
32
+ scale: Tensor,
33
+ block_table: Tensor,
34
+ block_size: int,
35
+ is_bidirectional: bool,
36
+ ) -> Tensor:
37
+ """Defines the computation pattern for prefill phase attention with KV cache updates.
38
+
39
+ IMPORTANT: This op serves as a pattern definition for the RBLN compiler to generate
40
+ a single optimized NPU operation. It is NOT meant for CPU execution.
41
+
42
+ Key differences from decode pattern:
43
+ - Handles prefill phase with multiple input tokens
44
+ - Takes explicit batch index for continuous batching
45
+
46
+ Expected tensor shapes:
47
+ - q: [batch=1, n_heads, n_groups, seq_len, head_dim] - Query states for multiple tokens
48
+ - k: [batch=1, n_heads, 1, seq_len, head_dim] - Key states for current input
49
+ - v: [batch=1, n_heads, 1, seq_len, head_dim] - Value states for current input
50
+ - kcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Key cache
51
+ - vcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Value cache
52
+ - cache_seq_len: [] - the sequence length of the cached states that were seen by the model
53
+ - cache_offset: [] - The valid length in the combined sequence of the KV cache and the current projected key states.
54
+ - scale: [] - Attention scale factor
55
+ - is_bidirectional: [] - Whether the attention is bidirectional
56
+ Returns:
57
+ Tensor: attn_output: [batch=1, n_heads, n_groups, seq_len, head_dim] - Attention output
58
+ """
59
+ return torch.empty_like(q)
60
+
61
+
62
+ @paged_sliding_window_attn_prefill.register_fake
63
+ def paged_sliding_window_attn_prefill_fake(
64
+ q: Tensor,
65
+ k: Tensor,
66
+ v: Tensor,
67
+ kcache: Tensor,
68
+ vcache: Tensor,
69
+ cache_seq_len: Tensor,
70
+ cache_offset: Tensor,
71
+ scale: Tensor,
72
+ block_table: Tensor,
73
+ block_size: int,
74
+ is_bidirectional: bool,
75
+ ) -> Tensor:
76
+ return torch.empty_like(q)
77
+
78
+
79
+ @torch.library.custom_op(
80
+ "rbln_custom_ops::paged_sliding_window_attn_decode",
81
+ mutates_args=(["kcache", "vcache"]),
82
+ )
83
+ def paged_sliding_window_attn_decode(
84
+ q: Tensor,
85
+ k: Tensor,
86
+ v: Tensor,
87
+ kcache: Tensor,
88
+ vcache: Tensor,
89
+ cache_seq_len: Tensor,
90
+ cache_offset: Tensor,
91
+ scale: Tensor,
92
+ block_table: Tensor,
93
+ block_size: int,
94
+ ) -> Tensor:
95
+ return torch.empty_like(q)
96
+
97
+
98
+ @paged_sliding_window_attn_decode.register_fake
99
+ def paged_sliding_window_attn_decode_fake(
100
+ q: Tensor,
101
+ k: Tensor,
102
+ v: Tensor,
103
+ kcache: Tensor,
104
+ vcache: Tensor,
105
+ cache_seq_len: Tensor,
106
+ cache_offset: Tensor,
107
+ scale: Tensor,
108
+ block_table: Tensor,
109
+ block_size: int,
110
+ ) -> Tensor:
111
+ return torch.empty_like(q)
@@ -34,6 +34,7 @@ _import_structure = {
34
34
  "RBLNAutoModelForCTC",
35
35
  "RBLNAutoModelForDepthEstimation",
36
36
  "RBLNAutoModelForImageClassification",
37
+ "RBLNAutoModelForImageTextToText",
37
38
  "RBLNAutoModelForMaskedLM",
38
39
  "RBLNAutoModelForQuestionAnswering",
39
40
  "RBLNAutoModelForSeq2SeqLM",
@@ -50,6 +51,12 @@ _import_structure = {
50
51
  "RBLNBertForQuestionAnsweringConfig",
51
52
  "RBLNBertModel",
52
53
  "RBLNBertModelConfig",
54
+ "RBLNBlip2VisionModelConfig",
55
+ "RBLNBlip2VisionModel",
56
+ "RBLNBlip2QFormerModel",
57
+ "RBLNBlip2QFormerModelConfig",
58
+ "RBLNBlip2ForConditionalGeneration",
59
+ "RBLNBlip2ForConditionalGenerationConfig",
53
60
  "RBLNCLIPTextModel",
54
61
  "RBLNCLIPTextModelConfig",
55
62
  "RBLNCLIPTextModelWithProjection",
@@ -66,6 +73,10 @@ _import_structure = {
66
73
  "RBLNExaoneForCausalLMConfig",
67
74
  "RBLNGemmaForCausalLM",
68
75
  "RBLNGemmaForCausalLMConfig",
76
+ "RBLNGemma3ForCausalLM",
77
+ "RBLNGemma3ForCausalLMConfig",
78
+ "RBLNGemma3ForConditionalGeneration",
79
+ "RBLNGemma3ForConditionalGenerationConfig",
69
80
  "RBLNGPT2LMHeadModel",
70
81
  "RBLNGPT2LMHeadModelConfig",
71
82
  "RBLNIdefics3VisionTransformer",
@@ -74,6 +85,8 @@ _import_structure = {
74
85
  "RBLNIdefics3VisionTransformerConfig",
75
86
  "RBLNLlamaForCausalLM",
76
87
  "RBLNLlamaForCausalLMConfig",
88
+ "RBLNOPTForCausalLM",
89
+ "RBLNOPTForCausalLMConfig",
77
90
  "RBLNLlavaNextForConditionalGeneration",
78
91
  "RBLNLlavaNextForConditionalGenerationConfig",
79
92
  "RBLNMidmLMHeadModel",
@@ -88,17 +101,18 @@ _import_structure = {
88
101
  "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
89
102
  "RBLNQwen2_5_VLForConditionalGeneration",
90
103
  "RBLNQwen2_5_VLForConditionalGenerationConfig",
104
+ "RBLNSiglipVisionModel",
105
+ "RBLNSiglipVisionModelConfig",
91
106
  "RBLNT5EncoderModel",
92
107
  "RBLNT5EncoderModelConfig",
93
108
  "RBLNT5ForConditionalGeneration",
94
109
  "RBLNT5ForConditionalGenerationConfig",
110
+ "RBLNTimeSeriesTransformerForPrediction",
111
+ "RBLNTimeSeriesTransformerForPredictionConfig",
95
112
  "RBLNWav2Vec2ForCTC",
96
113
  "RBLNWav2Vec2ForCTCConfig",
97
114
  "RBLNWhisperForConditionalGeneration",
98
115
  "RBLNWhisperForConditionalGenerationConfig",
99
- "RBLNTimeSeriesTransformerForPrediction",
100
- "RBLNTimeSeriesTransformerForPredictionConfig",
101
- "RBLNLlavaNextForConditionalGeneration",
102
116
  "RBLNXLMRobertaModel",
103
117
  "RBLNXLMRobertaModelConfig",
104
118
  ],
@@ -139,6 +153,7 @@ if TYPE_CHECKING:
139
153
  RBLNAutoModelForCTC,
140
154
  RBLNAutoModelForDepthEstimation,
141
155
  RBLNAutoModelForImageClassification,
156
+ RBLNAutoModelForImageTextToText,
142
157
  RBLNAutoModelForMaskedLM,
143
158
  RBLNAutoModelForQuestionAnswering,
144
159
  RBLNAutoModelForSeq2SeqLM,
@@ -155,6 +170,12 @@ if TYPE_CHECKING:
155
170
  RBLNBertForQuestionAnsweringConfig,
156
171
  RBLNBertModel,
157
172
  RBLNBertModelConfig,
173
+ RBLNBlip2ForConditionalGeneration,
174
+ RBLNBlip2ForConditionalGenerationConfig,
175
+ RBLNBlip2QFormerModel,
176
+ RBLNBlip2QFormerModelConfig,
177
+ RBLNBlip2VisionModel,
178
+ RBLNBlip2VisionModelConfig,
158
179
  RBLNCLIPTextModel,
159
180
  RBLNCLIPTextModelConfig,
160
181
  RBLNCLIPTextModelWithProjection,
@@ -169,6 +190,10 @@ if TYPE_CHECKING:
169
190
  RBLNDPTForDepthEstimationConfig,
170
191
  RBLNExaoneForCausalLM,
171
192
  RBLNExaoneForCausalLMConfig,
193
+ RBLNGemma3ForCausalLM,
194
+ RBLNGemma3ForCausalLMConfig,
195
+ RBLNGemma3ForConditionalGeneration,
196
+ RBLNGemma3ForConditionalGenerationConfig,
172
197
  RBLNGemmaForCausalLM,
173
198
  RBLNGemmaForCausalLMConfig,
174
199
  RBLNGPT2LMHeadModel,
@@ -185,6 +210,8 @@ if TYPE_CHECKING:
185
210
  RBLNMidmLMHeadModelConfig,
186
211
  RBLNMistralForCausalLM,
187
212
  RBLNMistralForCausalLMConfig,
213
+ RBLNOPTForCausalLM,
214
+ RBLNOPTForCausalLMConfig,
188
215
  RBLNPhiForCausalLM,
189
216
  RBLNPhiForCausalLMConfig,
190
217
  RBLNQwen2_5_VisionTransformerPretrainedModel,
@@ -193,6 +220,8 @@ if TYPE_CHECKING:
193
220
  RBLNQwen2_5_VLForConditionalGenerationConfig,
194
221
  RBLNQwen2ForCausalLM,
195
222
  RBLNQwen2ForCausalLMConfig,
223
+ RBLNSiglipVisionModel,
224
+ RBLNSiglipVisionModelConfig,
196
225
  RBLNT5EncoderModel,
197
226
  RBLNT5EncoderModelConfig,
198
227
  RBLNT5ForConditionalGeneration,
@@ -31,6 +31,7 @@ _import_structure = {
31
31
  "RBLNAutoModelForSequenceClassification",
32
32
  "RBLNAutoModelForSpeechSeq2Seq",
33
33
  "RBLNAutoModelForVision2Seq",
34
+ "RBLNAutoModelForImageTextToText",
34
35
  ],
35
36
  "bart": [
36
37
  "RBLNBartForConditionalGeneration",
@@ -46,6 +47,14 @@ _import_structure = {
46
47
  "RBLNBertForMaskedLM",
47
48
  "RBLNBertForMaskedLMConfig",
48
49
  ],
50
+ "blip_2": [
51
+ "RBLNBlip2VisionModelConfig",
52
+ "RBLNBlip2VisionModel",
53
+ "RBLNBlip2ForConditionalGeneration",
54
+ "RBLNBlip2ForConditionalGenerationConfig",
55
+ "RBLNBlip2QFormerModel",
56
+ "RBLNBlip2QFormerModelConfig",
57
+ ],
49
58
  "clip": [
50
59
  "RBLNCLIPTextModel",
51
60
  "RBLNCLIPTextModelConfig",
@@ -72,6 +81,12 @@ _import_structure = {
72
81
  ],
73
82
  "exaone": ["RBLNExaoneForCausalLM", "RBLNExaoneForCausalLMConfig"],
74
83
  "gemma": ["RBLNGemmaForCausalLM", "RBLNGemmaForCausalLMConfig"],
84
+ "gemma3": [
85
+ "RBLNGemma3ForCausalLM",
86
+ "RBLNGemma3ForCausalLMConfig",
87
+ "RBLNGemma3ForConditionalGeneration",
88
+ "RBLNGemma3ForConditionalGenerationConfig",
89
+ ],
75
90
  "gpt2": ["RBLNGPT2LMHeadModel", "RBLNGPT2LMHeadModelConfig"],
76
91
  "idefics3": [
77
92
  "RBLNIdefics3VisionTransformer",
@@ -80,11 +95,16 @@ _import_structure = {
80
95
  "RBLNIdefics3VisionTransformerConfig",
81
96
  ],
82
97
  "llama": ["RBLNLlamaForCausalLM", "RBLNLlamaForCausalLMConfig"],
98
+ "opt": ["RBLNOPTForCausalLM", "RBLNOPTForCausalLMConfig"],
83
99
  "llava_next": ["RBLNLlavaNextForConditionalGeneration", "RBLNLlavaNextForConditionalGenerationConfig"],
84
100
  "midm": ["RBLNMidmLMHeadModel", "RBLNMidmLMHeadModelConfig"],
85
101
  "mistral": ["RBLNMistralForCausalLM", "RBLNMistralForCausalLMConfig"],
86
102
  "phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig"],
87
103
  "qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig"],
104
+ "siglip": [
105
+ "RBLNSiglipVisionModel",
106
+ "RBLNSiglipVisionModelConfig",
107
+ ],
88
108
  "time_series_transformers": [
89
109
  "RBLNTimeSeriesTransformerForPrediction",
90
110
  "RBLNTimeSeriesTransformerForPredictionConfig",
@@ -108,6 +128,7 @@ if TYPE_CHECKING:
108
128
  RBLNAutoModelForCTC,
109
129
  RBLNAutoModelForDepthEstimation,
110
130
  RBLNAutoModelForImageClassification,
131
+ RBLNAutoModelForImageTextToText,
111
132
  RBLNAutoModelForMaskedLM,
112
133
  RBLNAutoModelForQuestionAnswering,
113
134
  RBLNAutoModelForSeq2SeqLM,
@@ -129,6 +150,14 @@ if TYPE_CHECKING:
129
150
  RBLNBertModel,
130
151
  RBLNBertModelConfig,
131
152
  )
153
+ from .blip_2 import (
154
+ RBLNBlip2ForConditionalGeneration,
155
+ RBLNBlip2ForConditionalGenerationConfig,
156
+ RBLNBlip2QFormerModel,
157
+ RBLNBlip2QFormerModelConfig,
158
+ RBLNBlip2VisionModel,
159
+ RBLNBlip2VisionModelConfig,
160
+ )
132
161
  from .clip import (
133
162
  RBLNCLIPTextModel,
134
163
  RBLNCLIPTextModelConfig,
@@ -149,6 +178,12 @@ if TYPE_CHECKING:
149
178
  )
150
179
  from .exaone import RBLNExaoneForCausalLM, RBLNExaoneForCausalLMConfig
151
180
  from .gemma import RBLNGemmaForCausalLM, RBLNGemmaForCausalLMConfig
181
+ from .gemma3 import (
182
+ RBLNGemma3ForCausalLM,
183
+ RBLNGemma3ForCausalLMConfig,
184
+ RBLNGemma3ForConditionalGeneration,
185
+ RBLNGemma3ForConditionalGenerationConfig,
186
+ )
152
187
  from .gpt2 import RBLNGPT2LMHeadModel, RBLNGPT2LMHeadModelConfig
153
188
  from .idefics3 import (
154
189
  RBLNIdefics3ForConditionalGeneration,
@@ -160,6 +195,7 @@ if TYPE_CHECKING:
160
195
  from .llava_next import RBLNLlavaNextForConditionalGeneration, RBLNLlavaNextForConditionalGenerationConfig
161
196
  from .midm import RBLNMidmLMHeadModel, RBLNMidmLMHeadModelConfig
162
197
  from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig
198
+ from .opt import RBLNOPTForCausalLM, RBLNOPTForCausalLMConfig
163
199
  from .phi import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig
164
200
  from .qwen2 import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig
165
201
  from .qwen2_5_vl import (
@@ -168,6 +204,7 @@ if TYPE_CHECKING:
168
204
  RBLNQwen2_5_VLForConditionalGeneration,
169
205
  RBLNQwen2_5_VLForConditionalGenerationConfig,
170
206
  )
207
+ from .siglip import RBLNSiglipVisionModel, RBLNSiglipVisionModelConfig
171
208
  from .t5 import (
172
209
  RBLNT5EncoderModel,
173
210
  RBLNT5EncoderModelConfig,
@@ -19,6 +19,7 @@ from .modeling_auto import (
19
19
  RBLNAutoModelForCTC,
20
20
  RBLNAutoModelForDepthEstimation,
21
21
  RBLNAutoModelForImageClassification,
22
+ RBLNAutoModelForImageTextToText,
22
23
  RBLNAutoModelForMaskedLM,
23
24
  RBLNAutoModelForQuestionAnswering,
24
25
  RBLNAutoModelForSeq2SeqLM,
@@ -23,6 +23,8 @@ from transformers.models.auto.modeling_auto import (
23
23
  MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES,
24
24
  MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
25
25
  MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
26
+ MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING,
27
+ MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES,
26
28
  MODEL_FOR_MASKED_LM_MAPPING,
27
29
  MODEL_FOR_MASKED_LM_MAPPING_NAMES,
28
30
  MODEL_FOR_QUESTION_ANSWERING_MAPPING,
@@ -90,6 +92,11 @@ class RBLNAutoModelForVision2Seq(_BaseAutoModelClass):
90
92
  _model_mapping_names = MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES
91
93
 
92
94
 
95
+ class RBLNAutoModelForImageTextToText(_BaseAutoModelClass):
96
+ _model_mapping = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING
97
+ _model_mapping_names = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES
98
+
99
+
93
100
  class RBLNAutoModelForMaskedLM(_BaseAutoModelClass):
94
101
  _model_mapping = MODEL_FOR_MASKED_LM_MAPPING
95
102
  _model_mapping_names = MODEL_FOR_MASKED_LM_MAPPING_NAMES
@@ -0,0 +1,20 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .configuration_blip_2 import (
16
+ RBLNBlip2ForConditionalGenerationConfig,
17
+ RBLNBlip2QFormerModelConfig,
18
+ RBLNBlip2VisionModelConfig,
19
+ )
20
+ from .modeling_blip_2 import RBLNBlip2ForConditionalGeneration, RBLNBlip2QFormerModel, RBLNBlip2VisionModel