optimum-rbln 0.7.5a0__py3-none-any.whl → 0.7.5a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. optimum/rbln/__init__.py +20 -0
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +9 -4
  4. optimum/rbln/modeling.py +7 -5
  5. optimum/rbln/ops/__init__.py +1 -0
  6. optimum/rbln/ops/attn.py +10 -0
  7. optimum/rbln/ops/flash_attn.py +8 -0
  8. optimum/rbln/ops/sliding_window_attn.py +111 -0
  9. optimum/rbln/transformers/__init__.py +22 -3
  10. optimum/rbln/transformers/models/__init__.py +23 -0
  11. optimum/rbln/transformers/models/blip_2/__init__.py +20 -0
  12. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +93 -0
  13. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +298 -0
  14. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +12 -6
  15. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +81 -77
  16. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +160 -88
  17. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +11 -7
  18. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +4 -4
  19. optimum/rbln/transformers/models/opt/__init__.py +16 -0
  20. optimum/rbln/transformers/models/opt/configuration_opt.py +19 -0
  21. optimum/rbln/transformers/models/opt/modeling_opt.py +78 -0
  22. optimum/rbln/transformers/models/opt/opt_architecture.py +74 -0
  23. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +16 -10
  24. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +35 -52
  25. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +2 -0
  26. optimum/rbln/transformers/models/siglip/__init__.py +20 -0
  27. optimum/rbln/transformers/models/siglip/configuration_siglip.py +66 -0
  28. optimum/rbln/transformers/models/siglip/modeling_siglip.py +146 -0
  29. optimum/rbln/transformers/models/whisper/whisper_architecture.py +1 -0
  30. optimum/rbln/transformers/utils/rbln_quantization.py +121 -72
  31. optimum/rbln/utils/submodule.py +13 -1
  32. {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5a1.dist-info}/METADATA +1 -1
  33. {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5a1.dist-info}/RECORD +35 -24
  34. {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5a1.dist-info}/WHEEL +0 -0
  35. {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5a1.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/__init__.py CHANGED
@@ -54,6 +54,12 @@ _import_structure = {
54
54
  "RBLNBertForQuestionAnsweringConfig",
55
55
  "RBLNBertModel",
56
56
  "RBLNBertModelConfig",
57
+ "RBLNBlip2VisionModelConfig",
58
+ "RBLNBlip2VisionModel",
59
+ "RBLNBlip2QFormerModel",
60
+ "RBLNBlip2QFormerModelConfig",
61
+ "RBLNBlip2ForConditionalGeneration",
62
+ "RBLNBlip2ForConditionalGenerationConfig",
57
63
  "RBLNCLIPTextModel",
58
64
  "RBLNCLIPTextModelConfig",
59
65
  "RBLNCLIPTextModelWithProjection",
@@ -80,6 +86,8 @@ _import_structure = {
80
86
  "RBLNIdefics3VisionTransformerConfig",
81
87
  "RBLNLlamaForCausalLM",
82
88
  "RBLNLlamaForCausalLMConfig",
89
+ "RBLNOPTForCausalLM",
90
+ "RBLNOPTForCausalLMConfig",
83
91
  "RBLNLlavaNextForConditionalGeneration",
84
92
  "RBLNLlavaNextForConditionalGenerationConfig",
85
93
  "RBLNMidmLMHeadModel",
@@ -100,6 +108,8 @@ _import_structure = {
100
108
  "RBLNRobertaForMaskedLMConfig",
101
109
  "RBLNRobertaForSequenceClassification",
102
110
  "RBLNRobertaForSequenceClassificationConfig",
111
+ "RBLNSiglipVisionModel",
112
+ "RBLNSiglipVisionModelConfig",
103
113
  "RBLNT5EncoderModel",
104
114
  "RBLNT5EncoderModelConfig",
105
115
  "RBLNT5ForConditionalGeneration",
@@ -265,6 +275,12 @@ if TYPE_CHECKING:
265
275
  RBLNBertForQuestionAnsweringConfig,
266
276
  RBLNBertModel,
267
277
  RBLNBertModelConfig,
278
+ RBLNBlip2ForConditionalGeneration,
279
+ RBLNBlip2ForConditionalGenerationConfig,
280
+ RBLNBlip2QFormerModel,
281
+ RBLNBlip2QFormerModelConfig,
282
+ RBLNBlip2VisionModel,
283
+ RBLNBlip2VisionModelConfig,
268
284
  RBLNCLIPTextModel,
269
285
  RBLNCLIPTextModelConfig,
270
286
  RBLNCLIPTextModelWithProjection,
@@ -297,6 +313,8 @@ if TYPE_CHECKING:
297
313
  RBLNMidmLMHeadModelConfig,
298
314
  RBLNMistralForCausalLM,
299
315
  RBLNMistralForCausalLMConfig,
316
+ RBLNOPTForCausalLM,
317
+ RBLNOPTForCausalLMConfig,
300
318
  RBLNPhiForCausalLM,
301
319
  RBLNPhiForCausalLMConfig,
302
320
  RBLNQwen2_5_VisionTransformerPretrainedModel,
@@ -311,6 +329,8 @@ if TYPE_CHECKING:
311
329
  RBLNRobertaForMaskedLMConfig,
312
330
  RBLNRobertaForSequenceClassification,
313
331
  RBLNRobertaForSequenceClassificationConfig,
332
+ RBLNSiglipVisionModel,
333
+ RBLNSiglipVisionModelConfig,
314
334
  RBLNT5EncoderModel,
315
335
  RBLNT5EncoderModelConfig,
316
336
  RBLNT5ForConditionalGeneration,
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.7.5a0'
21
- __version_tuple__ = version_tuple = (0, 7, 5, 'a0')
20
+ __version__ = version = '0.7.5a1'
21
+ __version_tuple__ = version_tuple = (0, 7, 5, 'a1')
@@ -17,7 +17,7 @@ import inspect
17
17
  import json
18
18
  from dataclasses import asdict, dataclass
19
19
  from pathlib import Path
20
- from typing import Any, Dict, List, Optional, Tuple, Type, Union
20
+ from typing import Any, Dict, List, Optional, Protocol, Tuple, Type, Union, runtime_checkable
21
21
 
22
22
  import torch
23
23
 
@@ -34,6 +34,11 @@ DEFAULT_MOD_NAME = "default"
34
34
  TypeInputInfo = List[Tuple[str, Tuple[int], str]]
35
35
 
36
36
 
37
+ @runtime_checkable
38
+ class RBLNSerializableConfigProtocol(Protocol):
39
+ def _prepare_for_serialization(self) -> Dict[str, Any]: ...
40
+
41
+
37
42
  @dataclass
38
43
  class RBLNCompileConfig:
39
44
  """
@@ -234,7 +239,7 @@ class RBLNAutoConfig:
234
239
  return cls(**config_file)
235
240
 
236
241
 
237
- class RBLNModelConfig:
242
+ class RBLNModelConfig(RBLNSerializableConfigProtocol):
238
243
  """Base configuration class for RBLN models that handles compilation settings, runtime options, and submodules.
239
244
 
240
245
  This class provides functionality for:
@@ -594,14 +599,14 @@ class RBLNModelConfig:
594
599
  )
595
600
  return rbln_model_cls
596
601
 
597
- def _prepare_for_serialization(self):
602
+ def _prepare_for_serialization(self) -> Dict[str, Any]:
598
603
  """
599
604
  Prepare the attributes map for serialization by converting nested RBLNModelConfig
600
605
  objects to their serializable form.
601
606
  """
602
607
  serializable_map = {}
603
608
  for key, value in self._attributes_map.items():
604
- if isinstance(value, RBLNModelConfig):
609
+ if isinstance(value, RBLNSerializableConfigProtocol):
605
610
  # Convert nested RBLNModelConfig to its serializable form
606
611
  serializable_map[key] = value._prepare_for_serialization()
607
612
  elif key == "_compile_cfgs":
optimum/rbln/modeling.py CHANGED
@@ -56,11 +56,7 @@ class RBLNModel(RBLNBaseModel):
56
56
  def update_kwargs(cls, kwargs):
57
57
  """
58
58
  Update user-given kwargs to get proper pytorch model.
59
-
60
- For example, `torchscript`=True should be set because torch.jit
61
- does not support `transformers` output instances as module output;
62
59
  """
63
- kwargs.update({"torchscript": True})
64
60
  return kwargs
65
61
 
66
62
  @classmethod
@@ -133,7 +129,6 @@ class RBLNModel(RBLNBaseModel):
133
129
 
134
130
  if not isinstance(config, PretrainedConfig): # diffusers config
135
131
  config = PretrainedConfig(**config)
136
- config.save_pretrained(save_dir_path / subfolder)
137
132
 
138
133
  # Save preprocessor
139
134
  for preprocessor in preprocessors:
@@ -155,6 +150,10 @@ class RBLNModel(RBLNBaseModel):
155
150
  preprocessors=preprocessors, model=model, model_config=config, rbln_config=rbln_config
156
151
  )
157
152
 
153
+ # torchscript should be True for jit to work
154
+ torchscript_backup = config.torchscript
155
+ config.torchscript = True
156
+
158
157
  compiled_model: Union[rebel.RBLNCompiledModel, Dict[str, rebel.RBLNCompiledModel]] = cls.get_compiled_model(
159
158
  model, rbln_config=rbln_config
160
159
  )
@@ -169,6 +168,9 @@ class RBLNModel(RBLNBaseModel):
169
168
  cm.save(save_dir_path / subfolder / f"{compiled_model_name}.rbln")
170
169
  rbln_config.save(save_dir_path / subfolder)
171
170
 
171
+ config.torchscript = torchscript_backup
172
+ config.save_pretrained(save_dir_path / subfolder)
173
+
172
174
  # Save torch artifacts (e.g. embedding matrix if needed.)
173
175
  cls.save_torch_artifacts(model, save_dir_path=save_dir_path, subfolder=subfolder, rbln_config=rbln_config)
174
176
 
@@ -16,3 +16,4 @@ from .attn import *
16
16
  from .flash_attn import *
17
17
  from .kv_cache_update import *
18
18
  from .linear import linear
19
+ from .sliding_window_attn import *
optimum/rbln/ops/attn.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Optional
15
16
 
16
17
  import torch
17
18
  from torch import Tensor
@@ -125,6 +126,7 @@ def paged_causal_attn_decode(
125
126
  scale: Tensor,
126
127
  block_table: Tensor,
127
128
  block_size: int,
129
+ mask: Optional[Tensor] = None,
128
130
  ) -> Tensor:
129
131
  """Defines the computation pattern for fused attention with KV cache updates.
130
132
 
@@ -147,6 +149,7 @@ def paged_causal_attn_decode(
147
149
  - scale: [] - Attention scale factor
148
150
  - block_table: [batch_size, max_seq_len // block_size] - Block indices for KV cache management
149
151
  - block_size: [] - Number of tokens per block
152
+ - mask: [batch=1, max_seq_len] - attention mask when use position_ids
150
153
 
151
154
  Returns:
152
155
  Tensor: attn_output: [batch=1, n_heads, n_groups, 1, head_dim] - Attention output
@@ -165,6 +168,7 @@ def paged_causal_attn_decode_fake(
165
168
  scale: Tensor,
166
169
  block_table: Tensor,
167
170
  block_size: int,
171
+ mask: Optional[Tensor] = None,
168
172
  ) -> Tensor:
169
173
  return torch.empty_like(q)
170
174
 
@@ -183,6 +187,8 @@ def paged_causal_attn_prefill(
183
187
  scale: Tensor,
184
188
  block_table: Tensor,
185
189
  block_size: int,
190
+ is_bidirectional: bool,
191
+ mask: Optional[Tensor] = None,
186
192
  ) -> Tensor:
187
193
  """Defines the computation pattern for prefill phase attention with KV cache updates.
188
194
 
@@ -204,6 +210,8 @@ def paged_causal_attn_prefill(
204
210
  - scale: [] - Attention scale factor
205
211
  - block_table: [batch_size, max_seq_len // block_size] - Block indices for KV cache management
206
212
  - block_size: [] - Number of tokens per block
213
+ - is_bidirectional: [] - Whether the attention is bidirectional at current sequence position
214
+ - mask: [batch=1, max_seq_len] - attention mask when use position_ids
207
215
 
208
216
  Returns:
209
217
  Tensor: attn_output: [batch=1, n_heads, n_groups, seq_len, head_dim] - Attention output
@@ -222,6 +230,8 @@ def paged_causal_attn_prefill_fake(
222
230
  scale: Tensor,
223
231
  block_table: Tensor,
224
232
  block_size: int,
233
+ is_bidirectional: bool,
234
+ mask: Optional[Tensor] = None,
225
235
  ) -> Tensor:
226
236
  return torch.empty_like(q)
227
237
 
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Optional
16
+
15
17
  import torch
16
18
  from torch import Tensor
17
19
 
@@ -113,6 +115,7 @@ def paged_flash_causal_attn_decode(
113
115
  block_table: Tensor,
114
116
  block_size: int,
115
117
  partition: int,
118
+ mask: Optional[Tensor] = None,
116
119
  ) -> Tensor:
117
120
  """Defines the computation pattern for fused causal flash attention with KV cache for decoding.
118
121
 
@@ -133,6 +136,7 @@ def paged_flash_causal_attn_decode_fake(
133
136
  block_table: Tensor,
134
137
  block_size: int,
135
138
  partition: int,
139
+ mask: Optional[Tensor] = None,
136
140
  ) -> Tensor:
137
141
  return torch.empty_like(q)
138
142
 
@@ -152,6 +156,8 @@ def paged_flash_causal_attn_prefill(
152
156
  block_table: Tensor,
153
157
  block_size: int,
154
158
  partition: int,
159
+ is_bidirectional: bool,
160
+ mask: Optional[Tensor] = None,
155
161
  ) -> Tensor:
156
162
  """Defines the computation pattern for fused causal flash attention with KV cache for prefill.
157
163
 
@@ -172,5 +178,7 @@ def paged_flash_causal_attn_prefill_fake(
172
178
  block_table: Tensor,
173
179
  block_size: int,
174
180
  partition: int,
181
+ is_bidirectional: bool,
182
+ mask: Optional[Tensor] = None,
175
183
  ) -> Tensor:
176
184
  return torch.empty_like(q)
@@ -0,0 +1,111 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import torch
17
+ from torch import Tensor
18
+
19
+
20
+ @torch.library.custom_op(
21
+ "rbln_custom_ops::paged_sliding_window_attn_prefill",
22
+ mutates_args=(["kcache", "vcache"]),
23
+ )
24
+ def paged_sliding_window_attn_prefill(
25
+ q: Tensor,
26
+ k: Tensor,
27
+ v: Tensor,
28
+ kcache: Tensor,
29
+ vcache: Tensor,
30
+ cache_seq_len: Tensor,
31
+ cache_offset: Tensor,
32
+ scale: Tensor,
33
+ block_table: Tensor,
34
+ block_size: int,
35
+ is_bidirectional: bool,
36
+ ) -> Tensor:
37
+ """Defines the computation pattern for prefill phase attention with KV cache updates.
38
+
39
+ IMPORTANT: This op serves as a pattern definition for the RBLN compiler to generate
40
+ a single optimized NPU operation. It is NOT meant for CPU execution.
41
+
42
+ Key differences from decode pattern:
43
+ - Handles prefill phase with multiple input tokens
44
+ - Takes explicit batch index for continuous batching
45
+
46
+ Expected tensor shapes:
47
+ - q: [batch=1, n_heads, n_groups, seq_len, head_dim] - Query states for multiple tokens
48
+ - k: [batch=1, n_heads, 1, seq_len, head_dim] - Key states for current input
49
+ - v: [batch=1, n_heads, 1, seq_len, head_dim] - Value states for current input
50
+ - kcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Key cache
51
+ - vcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Value cache
52
+ - cache_seq_len: [] - the sequence length of the cached states that were seen by the model
53
+ - cache_offset: [] - The valid length in the combined sequence of the KV cache and the current projected key states.
54
+ - scale: [] - Attention scale factor
55
+ - is_bidirectional: [] - Whether the attention is bidirectional
56
+ Returns:
57
+ Tensor: attn_output: [batch=1, n_heads, n_groups, seq_len, head_dim] - Attention output
58
+ """
59
+ return torch.empty_like(q)
60
+
61
+
62
+ @paged_sliding_window_attn_prefill.register_fake
63
+ def paged_sliding_window_attn_prefill_fake(
64
+ q: Tensor,
65
+ k: Tensor,
66
+ v: Tensor,
67
+ kcache: Tensor,
68
+ vcache: Tensor,
69
+ cache_seq_len: Tensor,
70
+ cache_offset: Tensor,
71
+ scale: Tensor,
72
+ block_table: Tensor,
73
+ block_size: int,
74
+ is_bidirectional: bool,
75
+ ) -> Tensor:
76
+ return torch.empty_like(q)
77
+
78
+
79
+ @torch.library.custom_op(
80
+ "rbln_custom_ops::paged_sliding_window_attn_decode",
81
+ mutates_args=(["kcache", "vcache"]),
82
+ )
83
+ def paged_sliding_window_attn_decode(
84
+ q: Tensor,
85
+ k: Tensor,
86
+ v: Tensor,
87
+ kcache: Tensor,
88
+ vcache: Tensor,
89
+ cache_seq_len: Tensor,
90
+ cache_offset: Tensor,
91
+ scale: Tensor,
92
+ block_table: Tensor,
93
+ block_size: int,
94
+ ) -> Tensor:
95
+ return torch.empty_like(q)
96
+
97
+
98
+ @paged_sliding_window_attn_decode.register_fake
99
+ def paged_sliding_window_attn_decode_fake(
100
+ q: Tensor,
101
+ k: Tensor,
102
+ v: Tensor,
103
+ kcache: Tensor,
104
+ vcache: Tensor,
105
+ cache_seq_len: Tensor,
106
+ cache_offset: Tensor,
107
+ scale: Tensor,
108
+ block_table: Tensor,
109
+ block_size: int,
110
+ ) -> Tensor:
111
+ return torch.empty_like(q)
@@ -50,6 +50,12 @@ _import_structure = {
50
50
  "RBLNBertForQuestionAnsweringConfig",
51
51
  "RBLNBertModel",
52
52
  "RBLNBertModelConfig",
53
+ "RBLNBlip2VisionModelConfig",
54
+ "RBLNBlip2VisionModel",
55
+ "RBLNBlip2QFormerModel",
56
+ "RBLNBlip2QFormerModelConfig",
57
+ "RBLNBlip2ForConditionalGeneration",
58
+ "RBLNBlip2ForConditionalGenerationConfig",
53
59
  "RBLNCLIPTextModel",
54
60
  "RBLNCLIPTextModelConfig",
55
61
  "RBLNCLIPTextModelWithProjection",
@@ -74,6 +80,8 @@ _import_structure = {
74
80
  "RBLNIdefics3VisionTransformerConfig",
75
81
  "RBLNLlamaForCausalLM",
76
82
  "RBLNLlamaForCausalLMConfig",
83
+ "RBLNOPTForCausalLM",
84
+ "RBLNOPTForCausalLMConfig",
77
85
  "RBLNLlavaNextForConditionalGeneration",
78
86
  "RBLNLlavaNextForConditionalGenerationConfig",
79
87
  "RBLNMidmLMHeadModel",
@@ -88,17 +96,18 @@ _import_structure = {
88
96
  "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
89
97
  "RBLNQwen2_5_VLForConditionalGeneration",
90
98
  "RBLNQwen2_5_VLForConditionalGenerationConfig",
99
+ "RBLNSiglipVisionModel",
100
+ "RBLNSiglipVisionModelConfig",
91
101
  "RBLNT5EncoderModel",
92
102
  "RBLNT5EncoderModelConfig",
93
103
  "RBLNT5ForConditionalGeneration",
94
104
  "RBLNT5ForConditionalGenerationConfig",
105
+ "RBLNTimeSeriesTransformerForPrediction",
106
+ "RBLNTimeSeriesTransformerForPredictionConfig",
95
107
  "RBLNWav2Vec2ForCTC",
96
108
  "RBLNWav2Vec2ForCTCConfig",
97
109
  "RBLNWhisperForConditionalGeneration",
98
110
  "RBLNWhisperForConditionalGenerationConfig",
99
- "RBLNTimeSeriesTransformerForPrediction",
100
- "RBLNTimeSeriesTransformerForPredictionConfig",
101
- "RBLNLlavaNextForConditionalGeneration",
102
111
  "RBLNXLMRobertaModel",
103
112
  "RBLNXLMRobertaModelConfig",
104
113
  ],
@@ -155,6 +164,12 @@ if TYPE_CHECKING:
155
164
  RBLNBertForQuestionAnsweringConfig,
156
165
  RBLNBertModel,
157
166
  RBLNBertModelConfig,
167
+ RBLNBlip2ForConditionalGeneration,
168
+ RBLNBlip2ForConditionalGenerationConfig,
169
+ RBLNBlip2QFormerModel,
170
+ RBLNBlip2QFormerModelConfig,
171
+ RBLNBlip2VisionModel,
172
+ RBLNBlip2VisionModelConfig,
158
173
  RBLNCLIPTextModel,
159
174
  RBLNCLIPTextModelConfig,
160
175
  RBLNCLIPTextModelWithProjection,
@@ -185,6 +200,8 @@ if TYPE_CHECKING:
185
200
  RBLNMidmLMHeadModelConfig,
186
201
  RBLNMistralForCausalLM,
187
202
  RBLNMistralForCausalLMConfig,
203
+ RBLNOPTForCausalLM,
204
+ RBLNOPTForCausalLMConfig,
188
205
  RBLNPhiForCausalLM,
189
206
  RBLNPhiForCausalLMConfig,
190
207
  RBLNQwen2_5_VisionTransformerPretrainedModel,
@@ -193,6 +210,8 @@ if TYPE_CHECKING:
193
210
  RBLNQwen2_5_VLForConditionalGenerationConfig,
194
211
  RBLNQwen2ForCausalLM,
195
212
  RBLNQwen2ForCausalLMConfig,
213
+ RBLNSiglipVisionModel,
214
+ RBLNSiglipVisionModelConfig,
196
215
  RBLNT5EncoderModel,
197
216
  RBLNT5EncoderModelConfig,
198
217
  RBLNT5ForConditionalGeneration,
@@ -46,6 +46,14 @@ _import_structure = {
46
46
  "RBLNBertForMaskedLM",
47
47
  "RBLNBertForMaskedLMConfig",
48
48
  ],
49
+ "blip_2": [
50
+ "RBLNBlip2VisionModelConfig",
51
+ "RBLNBlip2VisionModel",
52
+ "RBLNBlip2ForConditionalGeneration",
53
+ "RBLNBlip2ForConditionalGenerationConfig",
54
+ "RBLNBlip2QFormerModel",
55
+ "RBLNBlip2QFormerModelConfig",
56
+ ],
49
57
  "clip": [
50
58
  "RBLNCLIPTextModel",
51
59
  "RBLNCLIPTextModelConfig",
@@ -80,11 +88,16 @@ _import_structure = {
80
88
  "RBLNIdefics3VisionTransformerConfig",
81
89
  ],
82
90
  "llama": ["RBLNLlamaForCausalLM", "RBLNLlamaForCausalLMConfig"],
91
+ "opt": ["RBLNOPTForCausalLM", "RBLNOPTForCausalLMConfig"],
83
92
  "llava_next": ["RBLNLlavaNextForConditionalGeneration", "RBLNLlavaNextForConditionalGenerationConfig"],
84
93
  "midm": ["RBLNMidmLMHeadModel", "RBLNMidmLMHeadModelConfig"],
85
94
  "mistral": ["RBLNMistralForCausalLM", "RBLNMistralForCausalLMConfig"],
86
95
  "phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig"],
87
96
  "qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig"],
97
+ "siglip": [
98
+ "RBLNSiglipVisionModel",
99
+ "RBLNSiglipVisionModelConfig",
100
+ ],
88
101
  "time_series_transformers": [
89
102
  "RBLNTimeSeriesTransformerForPrediction",
90
103
  "RBLNTimeSeriesTransformerForPredictionConfig",
@@ -129,6 +142,14 @@ if TYPE_CHECKING:
129
142
  RBLNBertModel,
130
143
  RBLNBertModelConfig,
131
144
  )
145
+ from .blip_2 import (
146
+ RBLNBlip2ForConditionalGeneration,
147
+ RBLNBlip2ForConditionalGenerationConfig,
148
+ RBLNBlip2QFormerModel,
149
+ RBLNBlip2QFormerModelConfig,
150
+ RBLNBlip2VisionModel,
151
+ RBLNBlip2VisionModelConfig,
152
+ )
132
153
  from .clip import (
133
154
  RBLNCLIPTextModel,
134
155
  RBLNCLIPTextModelConfig,
@@ -160,6 +181,7 @@ if TYPE_CHECKING:
160
181
  from .llava_next import RBLNLlavaNextForConditionalGeneration, RBLNLlavaNextForConditionalGenerationConfig
161
182
  from .midm import RBLNMidmLMHeadModel, RBLNMidmLMHeadModelConfig
162
183
  from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig
184
+ from .opt import RBLNOPTForCausalLM, RBLNOPTForCausalLMConfig
163
185
  from .phi import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig
164
186
  from .qwen2 import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig
165
187
  from .qwen2_5_vl import (
@@ -168,6 +190,7 @@ if TYPE_CHECKING:
168
190
  RBLNQwen2_5_VLForConditionalGeneration,
169
191
  RBLNQwen2_5_VLForConditionalGenerationConfig,
170
192
  )
193
+ from .siglip import RBLNSiglipVisionModel, RBLNSiglipVisionModelConfig
171
194
  from .t5 import (
172
195
  RBLNT5EncoderModel,
173
196
  RBLNT5EncoderModelConfig,
@@ -0,0 +1,20 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .configuration_blip_2 import (
16
+ RBLNBlip2ForConditionalGenerationConfig,
17
+ RBLNBlip2QFormerModelConfig,
18
+ RBLNBlip2VisionModelConfig,
19
+ )
20
+ from .modeling_blip_2 import RBLNBlip2ForConditionalGeneration, RBLNBlip2QFormerModel, RBLNBlip2VisionModel
@@ -0,0 +1,93 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Optional
16
+
17
+ from ....configuration_utils import RBLNModelConfig
18
+
19
+
20
+ class RBLNBlip2VisionModelConfig(RBLNModelConfig):
21
+ def __init__(
22
+ self,
23
+ batch_size: Optional[int] = None,
24
+ **kwargs,
25
+ ):
26
+ """
27
+ Args:
28
+ batch_size (Optional[int]): The batch size for inference. Defaults to 1.
29
+ **kwargs: Additional arguments passed to the parent RBLNModelConfig.
30
+
31
+ Raises:
32
+ ValueError: If batch_size is not a positive integer.
33
+ """
34
+ super().__init__(**kwargs)
35
+ self.batch_size = batch_size or 1
36
+ if not isinstance(self.batch_size, int) or self.batch_size < 0:
37
+ raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
38
+
39
+
40
+ class RBLNBlip2QFormerModelConfig(RBLNModelConfig):
41
+ def __init__(
42
+ self,
43
+ batch_size: Optional[int] = None,
44
+ num_query_tokens: Optional[int] = None,
45
+ image_text_hidden_size: Optional[int] = None,
46
+ **kwargs,
47
+ ):
48
+ """
49
+ Args:
50
+ batch_size (Optional[int]): The batch size for inference. Defaults to 1.
51
+ **kwargs: Additional arguments passed to the parent RBLNModelConfig.
52
+
53
+ Raises:
54
+ ValueError: If batch_size is not a positive integer.
55
+ """
56
+ super().__init__(**kwargs)
57
+ self.batch_size = batch_size or 1
58
+ if not isinstance(self.batch_size, int) or self.batch_size < 0:
59
+ raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
60
+
61
+ self.num_query_tokens = num_query_tokens
62
+ self.image_text_hidden_size = image_text_hidden_size
63
+
64
+
65
+ class RBLNBlip2ForConditionalGenerationConfig(RBLNModelConfig):
66
+ submodules = ["vision_model", "qformer", "language_model"]
67
+
68
+ def __init__(
69
+ self,
70
+ batch_size: Optional[int] = None,
71
+ vision_model: Optional[RBLNModelConfig] = None,
72
+ qformer: Optional[RBLNModelConfig] = None,
73
+ language_model: Optional[RBLNModelConfig] = None,
74
+ **kwargs,
75
+ ):
76
+ """
77
+ Args:
78
+ batch_size (Optional[int]): The batch size for inference. Defaults to 1.
79
+ vision_model (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
80
+ language_model (Optional[RBLNModelConfig]): Configuration for the language model component.
81
+ **kwargs: Additional arguments passed to the parent RBLNModelConfig.
82
+
83
+ Raises:
84
+ ValueError: If batch_size is not a positive integer.
85
+ """
86
+ super().__init__(**kwargs)
87
+ self.batch_size = batch_size or 1
88
+ if not isinstance(self.batch_size, int) or self.batch_size < 0:
89
+ raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
90
+
91
+ self.vision_model = self.init_submodule_config(RBLNBlip2VisionModelConfig, vision_model, batch_size=batch_size)
92
+ self.language_model = language_model
93
+ self.qformer = self.init_submodule_config(RBLNBlip2QFormerModelConfig, qformer, batch_size=batch_size)