optimum-rbln 0.7.5a0__py3-none-any.whl → 0.7.5a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +20 -0
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/configuration_utils.py +9 -4
- optimum/rbln/modeling.py +7 -5
- optimum/rbln/ops/__init__.py +1 -0
- optimum/rbln/ops/attn.py +10 -0
- optimum/rbln/ops/flash_attn.py +8 -0
- optimum/rbln/ops/sliding_window_attn.py +111 -0
- optimum/rbln/transformers/__init__.py +22 -3
- optimum/rbln/transformers/models/__init__.py +23 -0
- optimum/rbln/transformers/models/blip_2/__init__.py +20 -0
- optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +93 -0
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +298 -0
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +12 -6
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +81 -77
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +160 -88
- optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +11 -7
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +4 -4
- optimum/rbln/transformers/models/opt/__init__.py +16 -0
- optimum/rbln/transformers/models/opt/configuration_opt.py +19 -0
- optimum/rbln/transformers/models/opt/modeling_opt.py +78 -0
- optimum/rbln/transformers/models/opt/opt_architecture.py +74 -0
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +16 -10
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +35 -52
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +2 -0
- optimum/rbln/transformers/models/siglip/__init__.py +20 -0
- optimum/rbln/transformers/models/siglip/configuration_siglip.py +66 -0
- optimum/rbln/transformers/models/siglip/modeling_siglip.py +146 -0
- optimum/rbln/transformers/models/whisper/whisper_architecture.py +1 -0
- optimum/rbln/transformers/utils/rbln_quantization.py +121 -72
- optimum/rbln/utils/submodule.py +13 -1
- {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5a1.dist-info}/METADATA +1 -1
- {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5a1.dist-info}/RECORD +35 -24
- {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5a1.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5a1.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/__init__.py
CHANGED
@@ -54,6 +54,12 @@ _import_structure = {
|
|
54
54
|
"RBLNBertForQuestionAnsweringConfig",
|
55
55
|
"RBLNBertModel",
|
56
56
|
"RBLNBertModelConfig",
|
57
|
+
"RBLNBlip2VisionModelConfig",
|
58
|
+
"RBLNBlip2VisionModel",
|
59
|
+
"RBLNBlip2QFormerModel",
|
60
|
+
"RBLNBlip2QFormerModelConfig",
|
61
|
+
"RBLNBlip2ForConditionalGeneration",
|
62
|
+
"RBLNBlip2ForConditionalGenerationConfig",
|
57
63
|
"RBLNCLIPTextModel",
|
58
64
|
"RBLNCLIPTextModelConfig",
|
59
65
|
"RBLNCLIPTextModelWithProjection",
|
@@ -80,6 +86,8 @@ _import_structure = {
|
|
80
86
|
"RBLNIdefics3VisionTransformerConfig",
|
81
87
|
"RBLNLlamaForCausalLM",
|
82
88
|
"RBLNLlamaForCausalLMConfig",
|
89
|
+
"RBLNOPTForCausalLM",
|
90
|
+
"RBLNOPTForCausalLMConfig",
|
83
91
|
"RBLNLlavaNextForConditionalGeneration",
|
84
92
|
"RBLNLlavaNextForConditionalGenerationConfig",
|
85
93
|
"RBLNMidmLMHeadModel",
|
@@ -100,6 +108,8 @@ _import_structure = {
|
|
100
108
|
"RBLNRobertaForMaskedLMConfig",
|
101
109
|
"RBLNRobertaForSequenceClassification",
|
102
110
|
"RBLNRobertaForSequenceClassificationConfig",
|
111
|
+
"RBLNSiglipVisionModel",
|
112
|
+
"RBLNSiglipVisionModelConfig",
|
103
113
|
"RBLNT5EncoderModel",
|
104
114
|
"RBLNT5EncoderModelConfig",
|
105
115
|
"RBLNT5ForConditionalGeneration",
|
@@ -265,6 +275,12 @@ if TYPE_CHECKING:
|
|
265
275
|
RBLNBertForQuestionAnsweringConfig,
|
266
276
|
RBLNBertModel,
|
267
277
|
RBLNBertModelConfig,
|
278
|
+
RBLNBlip2ForConditionalGeneration,
|
279
|
+
RBLNBlip2ForConditionalGenerationConfig,
|
280
|
+
RBLNBlip2QFormerModel,
|
281
|
+
RBLNBlip2QFormerModelConfig,
|
282
|
+
RBLNBlip2VisionModel,
|
283
|
+
RBLNBlip2VisionModelConfig,
|
268
284
|
RBLNCLIPTextModel,
|
269
285
|
RBLNCLIPTextModelConfig,
|
270
286
|
RBLNCLIPTextModelWithProjection,
|
@@ -297,6 +313,8 @@ if TYPE_CHECKING:
|
|
297
313
|
RBLNMidmLMHeadModelConfig,
|
298
314
|
RBLNMistralForCausalLM,
|
299
315
|
RBLNMistralForCausalLMConfig,
|
316
|
+
RBLNOPTForCausalLM,
|
317
|
+
RBLNOPTForCausalLMConfig,
|
300
318
|
RBLNPhiForCausalLM,
|
301
319
|
RBLNPhiForCausalLMConfig,
|
302
320
|
RBLNQwen2_5_VisionTransformerPretrainedModel,
|
@@ -311,6 +329,8 @@ if TYPE_CHECKING:
|
|
311
329
|
RBLNRobertaForMaskedLMConfig,
|
312
330
|
RBLNRobertaForSequenceClassification,
|
313
331
|
RBLNRobertaForSequenceClassificationConfig,
|
332
|
+
RBLNSiglipVisionModel,
|
333
|
+
RBLNSiglipVisionModelConfig,
|
314
334
|
RBLNT5EncoderModel,
|
315
335
|
RBLNT5EncoderModelConfig,
|
316
336
|
RBLNT5ForConditionalGeneration,
|
optimum/rbln/__version__.py
CHANGED
@@ -17,5 +17,5 @@ __version__: str
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
18
18
|
version_tuple: VERSION_TUPLE
|
19
19
|
|
20
|
-
__version__ = version = '0.7.
|
21
|
-
__version_tuple__ = version_tuple = (0, 7, 5, '
|
20
|
+
__version__ = version = '0.7.5a1'
|
21
|
+
__version_tuple__ = version_tuple = (0, 7, 5, 'a1')
|
@@ -17,7 +17,7 @@ import inspect
|
|
17
17
|
import json
|
18
18
|
from dataclasses import asdict, dataclass
|
19
19
|
from pathlib import Path
|
20
|
-
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
20
|
+
from typing import Any, Dict, List, Optional, Protocol, Tuple, Type, Union, runtime_checkable
|
21
21
|
|
22
22
|
import torch
|
23
23
|
|
@@ -34,6 +34,11 @@ DEFAULT_MOD_NAME = "default"
|
|
34
34
|
TypeInputInfo = List[Tuple[str, Tuple[int], str]]
|
35
35
|
|
36
36
|
|
37
|
+
@runtime_checkable
|
38
|
+
class RBLNSerializableConfigProtocol(Protocol):
|
39
|
+
def _prepare_for_serialization(self) -> Dict[str, Any]: ...
|
40
|
+
|
41
|
+
|
37
42
|
@dataclass
|
38
43
|
class RBLNCompileConfig:
|
39
44
|
"""
|
@@ -234,7 +239,7 @@ class RBLNAutoConfig:
|
|
234
239
|
return cls(**config_file)
|
235
240
|
|
236
241
|
|
237
|
-
class RBLNModelConfig:
|
242
|
+
class RBLNModelConfig(RBLNSerializableConfigProtocol):
|
238
243
|
"""Base configuration class for RBLN models that handles compilation settings, runtime options, and submodules.
|
239
244
|
|
240
245
|
This class provides functionality for:
|
@@ -594,14 +599,14 @@ class RBLNModelConfig:
|
|
594
599
|
)
|
595
600
|
return rbln_model_cls
|
596
601
|
|
597
|
-
def _prepare_for_serialization(self):
|
602
|
+
def _prepare_for_serialization(self) -> Dict[str, Any]:
|
598
603
|
"""
|
599
604
|
Prepare the attributes map for serialization by converting nested RBLNModelConfig
|
600
605
|
objects to their serializable form.
|
601
606
|
"""
|
602
607
|
serializable_map = {}
|
603
608
|
for key, value in self._attributes_map.items():
|
604
|
-
if isinstance(value,
|
609
|
+
if isinstance(value, RBLNSerializableConfigProtocol):
|
605
610
|
# Convert nested RBLNModelConfig to its serializable form
|
606
611
|
serializable_map[key] = value._prepare_for_serialization()
|
607
612
|
elif key == "_compile_cfgs":
|
optimum/rbln/modeling.py
CHANGED
@@ -56,11 +56,7 @@ class RBLNModel(RBLNBaseModel):
|
|
56
56
|
def update_kwargs(cls, kwargs):
|
57
57
|
"""
|
58
58
|
Update user-given kwargs to get proper pytorch model.
|
59
|
-
|
60
|
-
For example, `torchscript`=True should be set because torch.jit
|
61
|
-
does not support `transformers` output instances as module output;
|
62
59
|
"""
|
63
|
-
kwargs.update({"torchscript": True})
|
64
60
|
return kwargs
|
65
61
|
|
66
62
|
@classmethod
|
@@ -133,7 +129,6 @@ class RBLNModel(RBLNBaseModel):
|
|
133
129
|
|
134
130
|
if not isinstance(config, PretrainedConfig): # diffusers config
|
135
131
|
config = PretrainedConfig(**config)
|
136
|
-
config.save_pretrained(save_dir_path / subfolder)
|
137
132
|
|
138
133
|
# Save preprocessor
|
139
134
|
for preprocessor in preprocessors:
|
@@ -155,6 +150,10 @@ class RBLNModel(RBLNBaseModel):
|
|
155
150
|
preprocessors=preprocessors, model=model, model_config=config, rbln_config=rbln_config
|
156
151
|
)
|
157
152
|
|
153
|
+
# torchscript should be True for jit to work
|
154
|
+
torchscript_backup = config.torchscript
|
155
|
+
config.torchscript = True
|
156
|
+
|
158
157
|
compiled_model: Union[rebel.RBLNCompiledModel, Dict[str, rebel.RBLNCompiledModel]] = cls.get_compiled_model(
|
159
158
|
model, rbln_config=rbln_config
|
160
159
|
)
|
@@ -169,6 +168,9 @@ class RBLNModel(RBLNBaseModel):
|
|
169
168
|
cm.save(save_dir_path / subfolder / f"{compiled_model_name}.rbln")
|
170
169
|
rbln_config.save(save_dir_path / subfolder)
|
171
170
|
|
171
|
+
config.torchscript = torchscript_backup
|
172
|
+
config.save_pretrained(save_dir_path / subfolder)
|
173
|
+
|
172
174
|
# Save torch artifacts (e.g. embedding matrix if needed.)
|
173
175
|
cls.save_torch_artifacts(model, save_dir_path=save_dir_path, subfolder=subfolder, rbln_config=rbln_config)
|
174
176
|
|
optimum/rbln/ops/__init__.py
CHANGED
optimum/rbln/ops/attn.py
CHANGED
@@ -12,6 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
from typing import Optional
|
15
16
|
|
16
17
|
import torch
|
17
18
|
from torch import Tensor
|
@@ -125,6 +126,7 @@ def paged_causal_attn_decode(
|
|
125
126
|
scale: Tensor,
|
126
127
|
block_table: Tensor,
|
127
128
|
block_size: int,
|
129
|
+
mask: Optional[Tensor] = None,
|
128
130
|
) -> Tensor:
|
129
131
|
"""Defines the computation pattern for fused attention with KV cache updates.
|
130
132
|
|
@@ -147,6 +149,7 @@ def paged_causal_attn_decode(
|
|
147
149
|
- scale: [] - Attention scale factor
|
148
150
|
- block_table: [batch_size, max_seq_len // block_size] - Block indices for KV cache management
|
149
151
|
- block_size: [] - Number of tokens per block
|
152
|
+
- mask: [batch=1, max_seq_len] - attention mask when use position_ids
|
150
153
|
|
151
154
|
Returns:
|
152
155
|
Tensor: attn_output: [batch=1, n_heads, n_groups, 1, head_dim] - Attention output
|
@@ -165,6 +168,7 @@ def paged_causal_attn_decode_fake(
|
|
165
168
|
scale: Tensor,
|
166
169
|
block_table: Tensor,
|
167
170
|
block_size: int,
|
171
|
+
mask: Optional[Tensor] = None,
|
168
172
|
) -> Tensor:
|
169
173
|
return torch.empty_like(q)
|
170
174
|
|
@@ -183,6 +187,8 @@ def paged_causal_attn_prefill(
|
|
183
187
|
scale: Tensor,
|
184
188
|
block_table: Tensor,
|
185
189
|
block_size: int,
|
190
|
+
is_bidirectional: bool,
|
191
|
+
mask: Optional[Tensor] = None,
|
186
192
|
) -> Tensor:
|
187
193
|
"""Defines the computation pattern for prefill phase attention with KV cache updates.
|
188
194
|
|
@@ -204,6 +210,8 @@ def paged_causal_attn_prefill(
|
|
204
210
|
- scale: [] - Attention scale factor
|
205
211
|
- block_table: [batch_size, max_seq_len // block_size] - Block indices for KV cache management
|
206
212
|
- block_size: [] - Number of tokens per block
|
213
|
+
- is_bidirectional: [] - Whether the attention is bidirectional at current sequence position
|
214
|
+
- mask: [batch=1, max_seq_len] - attention mask when use position_ids
|
207
215
|
|
208
216
|
Returns:
|
209
217
|
Tensor: attn_output: [batch=1, n_heads, n_groups, seq_len, head_dim] - Attention output
|
@@ -222,6 +230,8 @@ def paged_causal_attn_prefill_fake(
|
|
222
230
|
scale: Tensor,
|
223
231
|
block_table: Tensor,
|
224
232
|
block_size: int,
|
233
|
+
is_bidirectional: bool,
|
234
|
+
mask: Optional[Tensor] = None,
|
225
235
|
) -> Tensor:
|
226
236
|
return torch.empty_like(q)
|
227
237
|
|
optimum/rbln/ops/flash_attn.py
CHANGED
@@ -12,6 +12,8 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
from typing import Optional
|
16
|
+
|
15
17
|
import torch
|
16
18
|
from torch import Tensor
|
17
19
|
|
@@ -113,6 +115,7 @@ def paged_flash_causal_attn_decode(
|
|
113
115
|
block_table: Tensor,
|
114
116
|
block_size: int,
|
115
117
|
partition: int,
|
118
|
+
mask: Optional[Tensor] = None,
|
116
119
|
) -> Tensor:
|
117
120
|
"""Defines the computation pattern for fused causal flash attention with KV cache for decoding.
|
118
121
|
|
@@ -133,6 +136,7 @@ def paged_flash_causal_attn_decode_fake(
|
|
133
136
|
block_table: Tensor,
|
134
137
|
block_size: int,
|
135
138
|
partition: int,
|
139
|
+
mask: Optional[Tensor] = None,
|
136
140
|
) -> Tensor:
|
137
141
|
return torch.empty_like(q)
|
138
142
|
|
@@ -152,6 +156,8 @@ def paged_flash_causal_attn_prefill(
|
|
152
156
|
block_table: Tensor,
|
153
157
|
block_size: int,
|
154
158
|
partition: int,
|
159
|
+
is_bidirectional: bool,
|
160
|
+
mask: Optional[Tensor] = None,
|
155
161
|
) -> Tensor:
|
156
162
|
"""Defines the computation pattern for fused causal flash attention with KV cache for prefill.
|
157
163
|
|
@@ -172,5 +178,7 @@ def paged_flash_causal_attn_prefill_fake(
|
|
172
178
|
block_table: Tensor,
|
173
179
|
block_size: int,
|
174
180
|
partition: int,
|
181
|
+
is_bidirectional: bool,
|
182
|
+
mask: Optional[Tensor] = None,
|
175
183
|
) -> Tensor:
|
176
184
|
return torch.empty_like(q)
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
2
|
+
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
import torch
|
17
|
+
from torch import Tensor
|
18
|
+
|
19
|
+
|
20
|
+
@torch.library.custom_op(
|
21
|
+
"rbln_custom_ops::paged_sliding_window_attn_prefill",
|
22
|
+
mutates_args=(["kcache", "vcache"]),
|
23
|
+
)
|
24
|
+
def paged_sliding_window_attn_prefill(
|
25
|
+
q: Tensor,
|
26
|
+
k: Tensor,
|
27
|
+
v: Tensor,
|
28
|
+
kcache: Tensor,
|
29
|
+
vcache: Tensor,
|
30
|
+
cache_seq_len: Tensor,
|
31
|
+
cache_offset: Tensor,
|
32
|
+
scale: Tensor,
|
33
|
+
block_table: Tensor,
|
34
|
+
block_size: int,
|
35
|
+
is_bidirectional: bool,
|
36
|
+
) -> Tensor:
|
37
|
+
"""Defines the computation pattern for prefill phase attention with KV cache updates.
|
38
|
+
|
39
|
+
IMPORTANT: This op serves as a pattern definition for the RBLN compiler to generate
|
40
|
+
a single optimized NPU operation. It is NOT meant for CPU execution.
|
41
|
+
|
42
|
+
Key differences from decode pattern:
|
43
|
+
- Handles prefill phase with multiple input tokens
|
44
|
+
- Takes explicit batch index for continuous batching
|
45
|
+
|
46
|
+
Expected tensor shapes:
|
47
|
+
- q: [batch=1, n_heads, n_groups, seq_len, head_dim] - Query states for multiple tokens
|
48
|
+
- k: [batch=1, n_heads, 1, seq_len, head_dim] - Key states for current input
|
49
|
+
- v: [batch=1, n_heads, 1, seq_len, head_dim] - Value states for current input
|
50
|
+
- kcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Key cache
|
51
|
+
- vcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Value cache
|
52
|
+
- cache_seq_len: [] - the sequence length of the cached states that were seen by the model
|
53
|
+
- cache_offset: [] - The valid length in the combined sequence of the KV cache and the current projected key states.
|
54
|
+
- scale: [] - Attention scale factor
|
55
|
+
- is_bidirectional: [] - Whether the attention is bidirectional
|
56
|
+
Returns:
|
57
|
+
Tensor: attn_output: [batch=1, n_heads, n_groups, seq_len, head_dim] - Attention output
|
58
|
+
"""
|
59
|
+
return torch.empty_like(q)
|
60
|
+
|
61
|
+
|
62
|
+
@paged_sliding_window_attn_prefill.register_fake
|
63
|
+
def paged_sliding_window_attn_prefill_fake(
|
64
|
+
q: Tensor,
|
65
|
+
k: Tensor,
|
66
|
+
v: Tensor,
|
67
|
+
kcache: Tensor,
|
68
|
+
vcache: Tensor,
|
69
|
+
cache_seq_len: Tensor,
|
70
|
+
cache_offset: Tensor,
|
71
|
+
scale: Tensor,
|
72
|
+
block_table: Tensor,
|
73
|
+
block_size: int,
|
74
|
+
is_bidirectional: bool,
|
75
|
+
) -> Tensor:
|
76
|
+
return torch.empty_like(q)
|
77
|
+
|
78
|
+
|
79
|
+
@torch.library.custom_op(
|
80
|
+
"rbln_custom_ops::paged_sliding_window_attn_decode",
|
81
|
+
mutates_args=(["kcache", "vcache"]),
|
82
|
+
)
|
83
|
+
def paged_sliding_window_attn_decode(
|
84
|
+
q: Tensor,
|
85
|
+
k: Tensor,
|
86
|
+
v: Tensor,
|
87
|
+
kcache: Tensor,
|
88
|
+
vcache: Tensor,
|
89
|
+
cache_seq_len: Tensor,
|
90
|
+
cache_offset: Tensor,
|
91
|
+
scale: Tensor,
|
92
|
+
block_table: Tensor,
|
93
|
+
block_size: int,
|
94
|
+
) -> Tensor:
|
95
|
+
return torch.empty_like(q)
|
96
|
+
|
97
|
+
|
98
|
+
@paged_sliding_window_attn_decode.register_fake
|
99
|
+
def paged_sliding_window_attn_decode_fake(
|
100
|
+
q: Tensor,
|
101
|
+
k: Tensor,
|
102
|
+
v: Tensor,
|
103
|
+
kcache: Tensor,
|
104
|
+
vcache: Tensor,
|
105
|
+
cache_seq_len: Tensor,
|
106
|
+
cache_offset: Tensor,
|
107
|
+
scale: Tensor,
|
108
|
+
block_table: Tensor,
|
109
|
+
block_size: int,
|
110
|
+
) -> Tensor:
|
111
|
+
return torch.empty_like(q)
|
@@ -50,6 +50,12 @@ _import_structure = {
|
|
50
50
|
"RBLNBertForQuestionAnsweringConfig",
|
51
51
|
"RBLNBertModel",
|
52
52
|
"RBLNBertModelConfig",
|
53
|
+
"RBLNBlip2VisionModelConfig",
|
54
|
+
"RBLNBlip2VisionModel",
|
55
|
+
"RBLNBlip2QFormerModel",
|
56
|
+
"RBLNBlip2QFormerModelConfig",
|
57
|
+
"RBLNBlip2ForConditionalGeneration",
|
58
|
+
"RBLNBlip2ForConditionalGenerationConfig",
|
53
59
|
"RBLNCLIPTextModel",
|
54
60
|
"RBLNCLIPTextModelConfig",
|
55
61
|
"RBLNCLIPTextModelWithProjection",
|
@@ -74,6 +80,8 @@ _import_structure = {
|
|
74
80
|
"RBLNIdefics3VisionTransformerConfig",
|
75
81
|
"RBLNLlamaForCausalLM",
|
76
82
|
"RBLNLlamaForCausalLMConfig",
|
83
|
+
"RBLNOPTForCausalLM",
|
84
|
+
"RBLNOPTForCausalLMConfig",
|
77
85
|
"RBLNLlavaNextForConditionalGeneration",
|
78
86
|
"RBLNLlavaNextForConditionalGenerationConfig",
|
79
87
|
"RBLNMidmLMHeadModel",
|
@@ -88,17 +96,18 @@ _import_structure = {
|
|
88
96
|
"RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
|
89
97
|
"RBLNQwen2_5_VLForConditionalGeneration",
|
90
98
|
"RBLNQwen2_5_VLForConditionalGenerationConfig",
|
99
|
+
"RBLNSiglipVisionModel",
|
100
|
+
"RBLNSiglipVisionModelConfig",
|
91
101
|
"RBLNT5EncoderModel",
|
92
102
|
"RBLNT5EncoderModelConfig",
|
93
103
|
"RBLNT5ForConditionalGeneration",
|
94
104
|
"RBLNT5ForConditionalGenerationConfig",
|
105
|
+
"RBLNTimeSeriesTransformerForPrediction",
|
106
|
+
"RBLNTimeSeriesTransformerForPredictionConfig",
|
95
107
|
"RBLNWav2Vec2ForCTC",
|
96
108
|
"RBLNWav2Vec2ForCTCConfig",
|
97
109
|
"RBLNWhisperForConditionalGeneration",
|
98
110
|
"RBLNWhisperForConditionalGenerationConfig",
|
99
|
-
"RBLNTimeSeriesTransformerForPrediction",
|
100
|
-
"RBLNTimeSeriesTransformerForPredictionConfig",
|
101
|
-
"RBLNLlavaNextForConditionalGeneration",
|
102
111
|
"RBLNXLMRobertaModel",
|
103
112
|
"RBLNXLMRobertaModelConfig",
|
104
113
|
],
|
@@ -155,6 +164,12 @@ if TYPE_CHECKING:
|
|
155
164
|
RBLNBertForQuestionAnsweringConfig,
|
156
165
|
RBLNBertModel,
|
157
166
|
RBLNBertModelConfig,
|
167
|
+
RBLNBlip2ForConditionalGeneration,
|
168
|
+
RBLNBlip2ForConditionalGenerationConfig,
|
169
|
+
RBLNBlip2QFormerModel,
|
170
|
+
RBLNBlip2QFormerModelConfig,
|
171
|
+
RBLNBlip2VisionModel,
|
172
|
+
RBLNBlip2VisionModelConfig,
|
158
173
|
RBLNCLIPTextModel,
|
159
174
|
RBLNCLIPTextModelConfig,
|
160
175
|
RBLNCLIPTextModelWithProjection,
|
@@ -185,6 +200,8 @@ if TYPE_CHECKING:
|
|
185
200
|
RBLNMidmLMHeadModelConfig,
|
186
201
|
RBLNMistralForCausalLM,
|
187
202
|
RBLNMistralForCausalLMConfig,
|
203
|
+
RBLNOPTForCausalLM,
|
204
|
+
RBLNOPTForCausalLMConfig,
|
188
205
|
RBLNPhiForCausalLM,
|
189
206
|
RBLNPhiForCausalLMConfig,
|
190
207
|
RBLNQwen2_5_VisionTransformerPretrainedModel,
|
@@ -193,6 +210,8 @@ if TYPE_CHECKING:
|
|
193
210
|
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
194
211
|
RBLNQwen2ForCausalLM,
|
195
212
|
RBLNQwen2ForCausalLMConfig,
|
213
|
+
RBLNSiglipVisionModel,
|
214
|
+
RBLNSiglipVisionModelConfig,
|
196
215
|
RBLNT5EncoderModel,
|
197
216
|
RBLNT5EncoderModelConfig,
|
198
217
|
RBLNT5ForConditionalGeneration,
|
@@ -46,6 +46,14 @@ _import_structure = {
|
|
46
46
|
"RBLNBertForMaskedLM",
|
47
47
|
"RBLNBertForMaskedLMConfig",
|
48
48
|
],
|
49
|
+
"blip_2": [
|
50
|
+
"RBLNBlip2VisionModelConfig",
|
51
|
+
"RBLNBlip2VisionModel",
|
52
|
+
"RBLNBlip2ForConditionalGeneration",
|
53
|
+
"RBLNBlip2ForConditionalGenerationConfig",
|
54
|
+
"RBLNBlip2QFormerModel",
|
55
|
+
"RBLNBlip2QFormerModelConfig",
|
56
|
+
],
|
49
57
|
"clip": [
|
50
58
|
"RBLNCLIPTextModel",
|
51
59
|
"RBLNCLIPTextModelConfig",
|
@@ -80,11 +88,16 @@ _import_structure = {
|
|
80
88
|
"RBLNIdefics3VisionTransformerConfig",
|
81
89
|
],
|
82
90
|
"llama": ["RBLNLlamaForCausalLM", "RBLNLlamaForCausalLMConfig"],
|
91
|
+
"opt": ["RBLNOPTForCausalLM", "RBLNOPTForCausalLMConfig"],
|
83
92
|
"llava_next": ["RBLNLlavaNextForConditionalGeneration", "RBLNLlavaNextForConditionalGenerationConfig"],
|
84
93
|
"midm": ["RBLNMidmLMHeadModel", "RBLNMidmLMHeadModelConfig"],
|
85
94
|
"mistral": ["RBLNMistralForCausalLM", "RBLNMistralForCausalLMConfig"],
|
86
95
|
"phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig"],
|
87
96
|
"qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig"],
|
97
|
+
"siglip": [
|
98
|
+
"RBLNSiglipVisionModel",
|
99
|
+
"RBLNSiglipVisionModelConfig",
|
100
|
+
],
|
88
101
|
"time_series_transformers": [
|
89
102
|
"RBLNTimeSeriesTransformerForPrediction",
|
90
103
|
"RBLNTimeSeriesTransformerForPredictionConfig",
|
@@ -129,6 +142,14 @@ if TYPE_CHECKING:
|
|
129
142
|
RBLNBertModel,
|
130
143
|
RBLNBertModelConfig,
|
131
144
|
)
|
145
|
+
from .blip_2 import (
|
146
|
+
RBLNBlip2ForConditionalGeneration,
|
147
|
+
RBLNBlip2ForConditionalGenerationConfig,
|
148
|
+
RBLNBlip2QFormerModel,
|
149
|
+
RBLNBlip2QFormerModelConfig,
|
150
|
+
RBLNBlip2VisionModel,
|
151
|
+
RBLNBlip2VisionModelConfig,
|
152
|
+
)
|
132
153
|
from .clip import (
|
133
154
|
RBLNCLIPTextModel,
|
134
155
|
RBLNCLIPTextModelConfig,
|
@@ -160,6 +181,7 @@ if TYPE_CHECKING:
|
|
160
181
|
from .llava_next import RBLNLlavaNextForConditionalGeneration, RBLNLlavaNextForConditionalGenerationConfig
|
161
182
|
from .midm import RBLNMidmLMHeadModel, RBLNMidmLMHeadModelConfig
|
162
183
|
from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig
|
184
|
+
from .opt import RBLNOPTForCausalLM, RBLNOPTForCausalLMConfig
|
163
185
|
from .phi import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig
|
164
186
|
from .qwen2 import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig
|
165
187
|
from .qwen2_5_vl import (
|
@@ -168,6 +190,7 @@ if TYPE_CHECKING:
|
|
168
190
|
RBLNQwen2_5_VLForConditionalGeneration,
|
169
191
|
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
170
192
|
)
|
193
|
+
from .siglip import RBLNSiglipVisionModel, RBLNSiglipVisionModelConfig
|
171
194
|
from .t5 import (
|
172
195
|
RBLNT5EncoderModel,
|
173
196
|
RBLNT5EncoderModelConfig,
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
2
|
+
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from .configuration_blip_2 import (
|
16
|
+
RBLNBlip2ForConditionalGenerationConfig,
|
17
|
+
RBLNBlip2QFormerModelConfig,
|
18
|
+
RBLNBlip2VisionModelConfig,
|
19
|
+
)
|
20
|
+
from .modeling_blip_2 import RBLNBlip2ForConditionalGeneration, RBLNBlip2QFormerModel, RBLNBlip2VisionModel
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
2
|
+
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from typing import Optional
|
16
|
+
|
17
|
+
from ....configuration_utils import RBLNModelConfig
|
18
|
+
|
19
|
+
|
20
|
+
class RBLNBlip2VisionModelConfig(RBLNModelConfig):
|
21
|
+
def __init__(
|
22
|
+
self,
|
23
|
+
batch_size: Optional[int] = None,
|
24
|
+
**kwargs,
|
25
|
+
):
|
26
|
+
"""
|
27
|
+
Args:
|
28
|
+
batch_size (Optional[int]): The batch size for inference. Defaults to 1.
|
29
|
+
**kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
30
|
+
|
31
|
+
Raises:
|
32
|
+
ValueError: If batch_size is not a positive integer.
|
33
|
+
"""
|
34
|
+
super().__init__(**kwargs)
|
35
|
+
self.batch_size = batch_size or 1
|
36
|
+
if not isinstance(self.batch_size, int) or self.batch_size < 0:
|
37
|
+
raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
|
38
|
+
|
39
|
+
|
40
|
+
class RBLNBlip2QFormerModelConfig(RBLNModelConfig):
|
41
|
+
def __init__(
|
42
|
+
self,
|
43
|
+
batch_size: Optional[int] = None,
|
44
|
+
num_query_tokens: Optional[int] = None,
|
45
|
+
image_text_hidden_size: Optional[int] = None,
|
46
|
+
**kwargs,
|
47
|
+
):
|
48
|
+
"""
|
49
|
+
Args:
|
50
|
+
batch_size (Optional[int]): The batch size for inference. Defaults to 1.
|
51
|
+
**kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
52
|
+
|
53
|
+
Raises:
|
54
|
+
ValueError: If batch_size is not a positive integer.
|
55
|
+
"""
|
56
|
+
super().__init__(**kwargs)
|
57
|
+
self.batch_size = batch_size or 1
|
58
|
+
if not isinstance(self.batch_size, int) or self.batch_size < 0:
|
59
|
+
raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
|
60
|
+
|
61
|
+
self.num_query_tokens = num_query_tokens
|
62
|
+
self.image_text_hidden_size = image_text_hidden_size
|
63
|
+
|
64
|
+
|
65
|
+
class RBLNBlip2ForConditionalGenerationConfig(RBLNModelConfig):
|
66
|
+
submodules = ["vision_model", "qformer", "language_model"]
|
67
|
+
|
68
|
+
def __init__(
|
69
|
+
self,
|
70
|
+
batch_size: Optional[int] = None,
|
71
|
+
vision_model: Optional[RBLNModelConfig] = None,
|
72
|
+
qformer: Optional[RBLNModelConfig] = None,
|
73
|
+
language_model: Optional[RBLNModelConfig] = None,
|
74
|
+
**kwargs,
|
75
|
+
):
|
76
|
+
"""
|
77
|
+
Args:
|
78
|
+
batch_size (Optional[int]): The batch size for inference. Defaults to 1.
|
79
|
+
vision_model (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
|
80
|
+
language_model (Optional[RBLNModelConfig]): Configuration for the language model component.
|
81
|
+
**kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
82
|
+
|
83
|
+
Raises:
|
84
|
+
ValueError: If batch_size is not a positive integer.
|
85
|
+
"""
|
86
|
+
super().__init__(**kwargs)
|
87
|
+
self.batch_size = batch_size or 1
|
88
|
+
if not isinstance(self.batch_size, int) or self.batch_size < 0:
|
89
|
+
raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
|
90
|
+
|
91
|
+
self.vision_model = self.init_submodule_config(RBLNBlip2VisionModelConfig, vision_model, batch_size=batch_size)
|
92
|
+
self.language_model = language_model
|
93
|
+
self.qformer = self.init_submodule_config(RBLNBlip2QFormerModelConfig, qformer, batch_size=batch_size)
|