optimum-rbln 0.7.5a0__py3-none-any.whl → 0.7.5rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +30 -0
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/configuration_utils.py +9 -4
- optimum/rbln/modeling.py +7 -5
- optimum/rbln/ops/__init__.py +1 -0
- optimum/rbln/ops/attn.py +10 -0
- optimum/rbln/ops/flash_attn.py +8 -0
- optimum/rbln/ops/sliding_window_attn.py +111 -0
- optimum/rbln/transformers/__init__.py +32 -3
- optimum/rbln/transformers/models/__init__.py +37 -0
- optimum/rbln/transformers/models/auto/__init__.py +1 -0
- optimum/rbln/transformers/models/auto/modeling_auto.py +7 -0
- optimum/rbln/transformers/models/blip_2/__init__.py +20 -0
- optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +93 -0
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +298 -0
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +12 -6
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +189 -90
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +186 -95
- optimum/rbln/transformers/models/exaone/exaone_architecture.py +5 -1
- optimum/rbln/transformers/models/gemma/gemma_architecture.py +5 -1
- optimum/rbln/transformers/models/gemma3/__init__.py +16 -0
- optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +69 -0
- optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +446 -0
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +1057 -0
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +4 -1
- optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +11 -7
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +4 -4
- optimum/rbln/transformers/models/midm/midm_architecture.py +4 -1
- optimum/rbln/transformers/models/opt/__init__.py +16 -0
- optimum/rbln/transformers/models/opt/configuration_opt.py +19 -0
- optimum/rbln/transformers/models/opt/modeling_opt.py +80 -0
- optimum/rbln/transformers/models/opt/opt_architecture.py +77 -0
- optimum/rbln/transformers/models/phi/phi_architecture.py +4 -1
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +18 -11
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +35 -52
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +2 -0
- optimum/rbln/transformers/models/siglip/__init__.py +20 -0
- optimum/rbln/transformers/models/siglip/configuration_siglip.py +66 -0
- optimum/rbln/transformers/models/siglip/modeling_siglip.py +146 -0
- optimum/rbln/transformers/models/whisper/whisper_architecture.py +1 -0
- optimum/rbln/transformers/utils/rbln_quantization.py +121 -72
- optimum/rbln/utils/submodule.py +13 -1
- {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5rc0.dist-info}/METADATA +1 -1
- {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5rc0.dist-info}/RECORD +46 -31
- {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5rc0.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.7.5a0.dist-info → optimum_rbln-0.7.5rc0.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/__init__.py
CHANGED
@@ -38,6 +38,7 @@ _import_structure = {
|
|
38
38
|
"RBLNAutoModelForCTC",
|
39
39
|
"RBLNAutoModelForDepthEstimation",
|
40
40
|
"RBLNAutoModelForImageClassification",
|
41
|
+
"RBLNAutoModelForImageTextToText",
|
41
42
|
"RBLNAutoModelForMaskedLM",
|
42
43
|
"RBLNAutoModelForQuestionAnswering",
|
43
44
|
"RBLNAutoModelForSeq2SeqLM",
|
@@ -54,6 +55,12 @@ _import_structure = {
|
|
54
55
|
"RBLNBertForQuestionAnsweringConfig",
|
55
56
|
"RBLNBertModel",
|
56
57
|
"RBLNBertModelConfig",
|
58
|
+
"RBLNBlip2VisionModelConfig",
|
59
|
+
"RBLNBlip2VisionModel",
|
60
|
+
"RBLNBlip2QFormerModel",
|
61
|
+
"RBLNBlip2QFormerModelConfig",
|
62
|
+
"RBLNBlip2ForConditionalGeneration",
|
63
|
+
"RBLNBlip2ForConditionalGenerationConfig",
|
57
64
|
"RBLNCLIPTextModel",
|
58
65
|
"RBLNCLIPTextModelConfig",
|
59
66
|
"RBLNCLIPTextModelWithProjection",
|
@@ -72,6 +79,10 @@ _import_structure = {
|
|
72
79
|
"RBLNExaoneForCausalLMConfig",
|
73
80
|
"RBLNGemmaForCausalLM",
|
74
81
|
"RBLNGemmaForCausalLMConfig",
|
82
|
+
"RBLNGemma3ForCausalLM",
|
83
|
+
"RBLNGemma3ForCausalLMConfig",
|
84
|
+
"RBLNGemma3ForConditionalGeneration",
|
85
|
+
"RBLNGemma3ForConditionalGenerationConfig",
|
75
86
|
"RBLNGPT2LMHeadModel",
|
76
87
|
"RBLNGPT2LMHeadModelConfig",
|
77
88
|
"RBLNIdefics3VisionTransformer",
|
@@ -80,6 +91,8 @@ _import_structure = {
|
|
80
91
|
"RBLNIdefics3VisionTransformerConfig",
|
81
92
|
"RBLNLlamaForCausalLM",
|
82
93
|
"RBLNLlamaForCausalLMConfig",
|
94
|
+
"RBLNOPTForCausalLM",
|
95
|
+
"RBLNOPTForCausalLMConfig",
|
83
96
|
"RBLNLlavaNextForConditionalGeneration",
|
84
97
|
"RBLNLlavaNextForConditionalGenerationConfig",
|
85
98
|
"RBLNMidmLMHeadModel",
|
@@ -100,6 +113,8 @@ _import_structure = {
|
|
100
113
|
"RBLNRobertaForMaskedLMConfig",
|
101
114
|
"RBLNRobertaForSequenceClassification",
|
102
115
|
"RBLNRobertaForSequenceClassificationConfig",
|
116
|
+
"RBLNSiglipVisionModel",
|
117
|
+
"RBLNSiglipVisionModelConfig",
|
103
118
|
"RBLNT5EncoderModel",
|
104
119
|
"RBLNT5EncoderModelConfig",
|
105
120
|
"RBLNT5ForConditionalGeneration",
|
@@ -249,6 +264,7 @@ if TYPE_CHECKING:
|
|
249
264
|
RBLNAutoModelForCTC,
|
250
265
|
RBLNAutoModelForDepthEstimation,
|
251
266
|
RBLNAutoModelForImageClassification,
|
267
|
+
RBLNAutoModelForImageTextToText,
|
252
268
|
RBLNAutoModelForMaskedLM,
|
253
269
|
RBLNAutoModelForQuestionAnswering,
|
254
270
|
RBLNAutoModelForSeq2SeqLM,
|
@@ -265,6 +281,12 @@ if TYPE_CHECKING:
|
|
265
281
|
RBLNBertForQuestionAnsweringConfig,
|
266
282
|
RBLNBertModel,
|
267
283
|
RBLNBertModelConfig,
|
284
|
+
RBLNBlip2ForConditionalGeneration,
|
285
|
+
RBLNBlip2ForConditionalGenerationConfig,
|
286
|
+
RBLNBlip2QFormerModel,
|
287
|
+
RBLNBlip2QFormerModelConfig,
|
288
|
+
RBLNBlip2VisionModel,
|
289
|
+
RBLNBlip2VisionModelConfig,
|
268
290
|
RBLNCLIPTextModel,
|
269
291
|
RBLNCLIPTextModelConfig,
|
270
292
|
RBLNCLIPTextModelWithProjection,
|
@@ -281,6 +303,10 @@ if TYPE_CHECKING:
|
|
281
303
|
RBLNDPTForDepthEstimationConfig,
|
282
304
|
RBLNExaoneForCausalLM,
|
283
305
|
RBLNExaoneForCausalLMConfig,
|
306
|
+
RBLNGemma3ForCausalLM,
|
307
|
+
RBLNGemma3ForCausalLMConfig,
|
308
|
+
RBLNGemma3ForConditionalGeneration,
|
309
|
+
RBLNGemma3ForConditionalGenerationConfig,
|
284
310
|
RBLNGemmaForCausalLM,
|
285
311
|
RBLNGemmaForCausalLMConfig,
|
286
312
|
RBLNGPT2LMHeadModel,
|
@@ -297,6 +323,8 @@ if TYPE_CHECKING:
|
|
297
323
|
RBLNMidmLMHeadModelConfig,
|
298
324
|
RBLNMistralForCausalLM,
|
299
325
|
RBLNMistralForCausalLMConfig,
|
326
|
+
RBLNOPTForCausalLM,
|
327
|
+
RBLNOPTForCausalLMConfig,
|
300
328
|
RBLNPhiForCausalLM,
|
301
329
|
RBLNPhiForCausalLMConfig,
|
302
330
|
RBLNQwen2_5_VisionTransformerPretrainedModel,
|
@@ -311,6 +339,8 @@ if TYPE_CHECKING:
|
|
311
339
|
RBLNRobertaForMaskedLMConfig,
|
312
340
|
RBLNRobertaForSequenceClassification,
|
313
341
|
RBLNRobertaForSequenceClassificationConfig,
|
342
|
+
RBLNSiglipVisionModel,
|
343
|
+
RBLNSiglipVisionModelConfig,
|
314
344
|
RBLNT5EncoderModel,
|
315
345
|
RBLNT5EncoderModelConfig,
|
316
346
|
RBLNT5ForConditionalGeneration,
|
optimum/rbln/__version__.py
CHANGED
@@ -17,5 +17,5 @@ __version__: str
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
18
18
|
version_tuple: VERSION_TUPLE
|
19
19
|
|
20
|
-
__version__ = version = '0.7.
|
21
|
-
__version_tuple__ = version_tuple = (0, 7, 5, '
|
20
|
+
__version__ = version = '0.7.5rc0'
|
21
|
+
__version_tuple__ = version_tuple = (0, 7, 5, 'rc0')
|
@@ -17,7 +17,7 @@ import inspect
|
|
17
17
|
import json
|
18
18
|
from dataclasses import asdict, dataclass
|
19
19
|
from pathlib import Path
|
20
|
-
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
20
|
+
from typing import Any, Dict, List, Optional, Protocol, Tuple, Type, Union, runtime_checkable
|
21
21
|
|
22
22
|
import torch
|
23
23
|
|
@@ -34,6 +34,11 @@ DEFAULT_MOD_NAME = "default"
|
|
34
34
|
TypeInputInfo = List[Tuple[str, Tuple[int], str]]
|
35
35
|
|
36
36
|
|
37
|
+
@runtime_checkable
|
38
|
+
class RBLNSerializableConfigProtocol(Protocol):
|
39
|
+
def _prepare_for_serialization(self) -> Dict[str, Any]: ...
|
40
|
+
|
41
|
+
|
37
42
|
@dataclass
|
38
43
|
class RBLNCompileConfig:
|
39
44
|
"""
|
@@ -234,7 +239,7 @@ class RBLNAutoConfig:
|
|
234
239
|
return cls(**config_file)
|
235
240
|
|
236
241
|
|
237
|
-
class RBLNModelConfig:
|
242
|
+
class RBLNModelConfig(RBLNSerializableConfigProtocol):
|
238
243
|
"""Base configuration class for RBLN models that handles compilation settings, runtime options, and submodules.
|
239
244
|
|
240
245
|
This class provides functionality for:
|
@@ -594,14 +599,14 @@ class RBLNModelConfig:
|
|
594
599
|
)
|
595
600
|
return rbln_model_cls
|
596
601
|
|
597
|
-
def _prepare_for_serialization(self):
|
602
|
+
def _prepare_for_serialization(self) -> Dict[str, Any]:
|
598
603
|
"""
|
599
604
|
Prepare the attributes map for serialization by converting nested RBLNModelConfig
|
600
605
|
objects to their serializable form.
|
601
606
|
"""
|
602
607
|
serializable_map = {}
|
603
608
|
for key, value in self._attributes_map.items():
|
604
|
-
if isinstance(value,
|
609
|
+
if isinstance(value, RBLNSerializableConfigProtocol):
|
605
610
|
# Convert nested RBLNModelConfig to its serializable form
|
606
611
|
serializable_map[key] = value._prepare_for_serialization()
|
607
612
|
elif key == "_compile_cfgs":
|
optimum/rbln/modeling.py
CHANGED
@@ -56,11 +56,7 @@ class RBLNModel(RBLNBaseModel):
|
|
56
56
|
def update_kwargs(cls, kwargs):
|
57
57
|
"""
|
58
58
|
Update user-given kwargs to get proper pytorch model.
|
59
|
-
|
60
|
-
For example, `torchscript`=True should be set because torch.jit
|
61
|
-
does not support `transformers` output instances as module output;
|
62
59
|
"""
|
63
|
-
kwargs.update({"torchscript": True})
|
64
60
|
return kwargs
|
65
61
|
|
66
62
|
@classmethod
|
@@ -133,7 +129,6 @@ class RBLNModel(RBLNBaseModel):
|
|
133
129
|
|
134
130
|
if not isinstance(config, PretrainedConfig): # diffusers config
|
135
131
|
config = PretrainedConfig(**config)
|
136
|
-
config.save_pretrained(save_dir_path / subfolder)
|
137
132
|
|
138
133
|
# Save preprocessor
|
139
134
|
for preprocessor in preprocessors:
|
@@ -155,6 +150,10 @@ class RBLNModel(RBLNBaseModel):
|
|
155
150
|
preprocessors=preprocessors, model=model, model_config=config, rbln_config=rbln_config
|
156
151
|
)
|
157
152
|
|
153
|
+
# torchscript should be True for jit to work
|
154
|
+
torchscript_backup = config.torchscript
|
155
|
+
config.torchscript = True
|
156
|
+
|
158
157
|
compiled_model: Union[rebel.RBLNCompiledModel, Dict[str, rebel.RBLNCompiledModel]] = cls.get_compiled_model(
|
159
158
|
model, rbln_config=rbln_config
|
160
159
|
)
|
@@ -169,6 +168,9 @@ class RBLNModel(RBLNBaseModel):
|
|
169
168
|
cm.save(save_dir_path / subfolder / f"{compiled_model_name}.rbln")
|
170
169
|
rbln_config.save(save_dir_path / subfolder)
|
171
170
|
|
171
|
+
config.torchscript = torchscript_backup
|
172
|
+
config.save_pretrained(save_dir_path / subfolder)
|
173
|
+
|
172
174
|
# Save torch artifacts (e.g. embedding matrix if needed.)
|
173
175
|
cls.save_torch_artifacts(model, save_dir_path=save_dir_path, subfolder=subfolder, rbln_config=rbln_config)
|
174
176
|
|
optimum/rbln/ops/__init__.py
CHANGED
optimum/rbln/ops/attn.py
CHANGED
@@ -12,6 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
from typing import Optional
|
15
16
|
|
16
17
|
import torch
|
17
18
|
from torch import Tensor
|
@@ -125,6 +126,7 @@ def paged_causal_attn_decode(
|
|
125
126
|
scale: Tensor,
|
126
127
|
block_table: Tensor,
|
127
128
|
block_size: int,
|
129
|
+
mask: Optional[Tensor] = None,
|
128
130
|
) -> Tensor:
|
129
131
|
"""Defines the computation pattern for fused attention with KV cache updates.
|
130
132
|
|
@@ -147,6 +149,7 @@ def paged_causal_attn_decode(
|
|
147
149
|
- scale: [] - Attention scale factor
|
148
150
|
- block_table: [batch_size, max_seq_len // block_size] - Block indices for KV cache management
|
149
151
|
- block_size: [] - Number of tokens per block
|
152
|
+
- mask: [batch=1, max_seq_len] - attention mask when use position_ids
|
150
153
|
|
151
154
|
Returns:
|
152
155
|
Tensor: attn_output: [batch=1, n_heads, n_groups, 1, head_dim] - Attention output
|
@@ -165,6 +168,7 @@ def paged_causal_attn_decode_fake(
|
|
165
168
|
scale: Tensor,
|
166
169
|
block_table: Tensor,
|
167
170
|
block_size: int,
|
171
|
+
mask: Optional[Tensor] = None,
|
168
172
|
) -> Tensor:
|
169
173
|
return torch.empty_like(q)
|
170
174
|
|
@@ -183,6 +187,8 @@ def paged_causal_attn_prefill(
|
|
183
187
|
scale: Tensor,
|
184
188
|
block_table: Tensor,
|
185
189
|
block_size: int,
|
190
|
+
is_bidirectional: bool,
|
191
|
+
mask: Optional[Tensor] = None,
|
186
192
|
) -> Tensor:
|
187
193
|
"""Defines the computation pattern for prefill phase attention with KV cache updates.
|
188
194
|
|
@@ -204,6 +210,8 @@ def paged_causal_attn_prefill(
|
|
204
210
|
- scale: [] - Attention scale factor
|
205
211
|
- block_table: [batch_size, max_seq_len // block_size] - Block indices for KV cache management
|
206
212
|
- block_size: [] - Number of tokens per block
|
213
|
+
- is_bidirectional: [] - Whether the attention is bidirectional at current sequence position
|
214
|
+
- mask: [batch=1, max_seq_len] - attention mask when use position_ids
|
207
215
|
|
208
216
|
Returns:
|
209
217
|
Tensor: attn_output: [batch=1, n_heads, n_groups, seq_len, head_dim] - Attention output
|
@@ -222,6 +230,8 @@ def paged_causal_attn_prefill_fake(
|
|
222
230
|
scale: Tensor,
|
223
231
|
block_table: Tensor,
|
224
232
|
block_size: int,
|
233
|
+
is_bidirectional: bool,
|
234
|
+
mask: Optional[Tensor] = None,
|
225
235
|
) -> Tensor:
|
226
236
|
return torch.empty_like(q)
|
227
237
|
|
optimum/rbln/ops/flash_attn.py
CHANGED
@@ -12,6 +12,8 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
from typing import Optional
|
16
|
+
|
15
17
|
import torch
|
16
18
|
from torch import Tensor
|
17
19
|
|
@@ -113,6 +115,7 @@ def paged_flash_causal_attn_decode(
|
|
113
115
|
block_table: Tensor,
|
114
116
|
block_size: int,
|
115
117
|
partition: int,
|
118
|
+
mask: Optional[Tensor] = None,
|
116
119
|
) -> Tensor:
|
117
120
|
"""Defines the computation pattern for fused causal flash attention with KV cache for decoding.
|
118
121
|
|
@@ -133,6 +136,7 @@ def paged_flash_causal_attn_decode_fake(
|
|
133
136
|
block_table: Tensor,
|
134
137
|
block_size: int,
|
135
138
|
partition: int,
|
139
|
+
mask: Optional[Tensor] = None,
|
136
140
|
) -> Tensor:
|
137
141
|
return torch.empty_like(q)
|
138
142
|
|
@@ -152,6 +156,8 @@ def paged_flash_causal_attn_prefill(
|
|
152
156
|
block_table: Tensor,
|
153
157
|
block_size: int,
|
154
158
|
partition: int,
|
159
|
+
is_bidirectional: bool,
|
160
|
+
mask: Optional[Tensor] = None,
|
155
161
|
) -> Tensor:
|
156
162
|
"""Defines the computation pattern for fused causal flash attention with KV cache for prefill.
|
157
163
|
|
@@ -172,5 +178,7 @@ def paged_flash_causal_attn_prefill_fake(
|
|
172
178
|
block_table: Tensor,
|
173
179
|
block_size: int,
|
174
180
|
partition: int,
|
181
|
+
is_bidirectional: bool,
|
182
|
+
mask: Optional[Tensor] = None,
|
175
183
|
) -> Tensor:
|
176
184
|
return torch.empty_like(q)
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
2
|
+
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
import torch
|
17
|
+
from torch import Tensor
|
18
|
+
|
19
|
+
|
20
|
+
@torch.library.custom_op(
|
21
|
+
"rbln_custom_ops::paged_sliding_window_attn_prefill",
|
22
|
+
mutates_args=(["kcache", "vcache"]),
|
23
|
+
)
|
24
|
+
def paged_sliding_window_attn_prefill(
|
25
|
+
q: Tensor,
|
26
|
+
k: Tensor,
|
27
|
+
v: Tensor,
|
28
|
+
kcache: Tensor,
|
29
|
+
vcache: Tensor,
|
30
|
+
cache_seq_len: Tensor,
|
31
|
+
cache_offset: Tensor,
|
32
|
+
scale: Tensor,
|
33
|
+
block_table: Tensor,
|
34
|
+
block_size: int,
|
35
|
+
is_bidirectional: bool,
|
36
|
+
) -> Tensor:
|
37
|
+
"""Defines the computation pattern for prefill phase attention with KV cache updates.
|
38
|
+
|
39
|
+
IMPORTANT: This op serves as a pattern definition for the RBLN compiler to generate
|
40
|
+
a single optimized NPU operation. It is NOT meant for CPU execution.
|
41
|
+
|
42
|
+
Key differences from decode pattern:
|
43
|
+
- Handles prefill phase with multiple input tokens
|
44
|
+
- Takes explicit batch index for continuous batching
|
45
|
+
|
46
|
+
Expected tensor shapes:
|
47
|
+
- q: [batch=1, n_heads, n_groups, seq_len, head_dim] - Query states for multiple tokens
|
48
|
+
- k: [batch=1, n_heads, 1, seq_len, head_dim] - Key states for current input
|
49
|
+
- v: [batch=1, n_heads, 1, seq_len, head_dim] - Value states for current input
|
50
|
+
- kcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Key cache
|
51
|
+
- vcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Value cache
|
52
|
+
- cache_seq_len: [] - the sequence length of the cached states that were seen by the model
|
53
|
+
- cache_offset: [] - The valid length in the combined sequence of the KV cache and the current projected key states.
|
54
|
+
- scale: [] - Attention scale factor
|
55
|
+
- is_bidirectional: [] - Whether the attention is bidirectional
|
56
|
+
Returns:
|
57
|
+
Tensor: attn_output: [batch=1, n_heads, n_groups, seq_len, head_dim] - Attention output
|
58
|
+
"""
|
59
|
+
return torch.empty_like(q)
|
60
|
+
|
61
|
+
|
62
|
+
@paged_sliding_window_attn_prefill.register_fake
|
63
|
+
def paged_sliding_window_attn_prefill_fake(
|
64
|
+
q: Tensor,
|
65
|
+
k: Tensor,
|
66
|
+
v: Tensor,
|
67
|
+
kcache: Tensor,
|
68
|
+
vcache: Tensor,
|
69
|
+
cache_seq_len: Tensor,
|
70
|
+
cache_offset: Tensor,
|
71
|
+
scale: Tensor,
|
72
|
+
block_table: Tensor,
|
73
|
+
block_size: int,
|
74
|
+
is_bidirectional: bool,
|
75
|
+
) -> Tensor:
|
76
|
+
return torch.empty_like(q)
|
77
|
+
|
78
|
+
|
79
|
+
@torch.library.custom_op(
|
80
|
+
"rbln_custom_ops::paged_sliding_window_attn_decode",
|
81
|
+
mutates_args=(["kcache", "vcache"]),
|
82
|
+
)
|
83
|
+
def paged_sliding_window_attn_decode(
|
84
|
+
q: Tensor,
|
85
|
+
k: Tensor,
|
86
|
+
v: Tensor,
|
87
|
+
kcache: Tensor,
|
88
|
+
vcache: Tensor,
|
89
|
+
cache_seq_len: Tensor,
|
90
|
+
cache_offset: Tensor,
|
91
|
+
scale: Tensor,
|
92
|
+
block_table: Tensor,
|
93
|
+
block_size: int,
|
94
|
+
) -> Tensor:
|
95
|
+
return torch.empty_like(q)
|
96
|
+
|
97
|
+
|
98
|
+
@paged_sliding_window_attn_decode.register_fake
|
99
|
+
def paged_sliding_window_attn_decode_fake(
|
100
|
+
q: Tensor,
|
101
|
+
k: Tensor,
|
102
|
+
v: Tensor,
|
103
|
+
kcache: Tensor,
|
104
|
+
vcache: Tensor,
|
105
|
+
cache_seq_len: Tensor,
|
106
|
+
cache_offset: Tensor,
|
107
|
+
scale: Tensor,
|
108
|
+
block_table: Tensor,
|
109
|
+
block_size: int,
|
110
|
+
) -> Tensor:
|
111
|
+
return torch.empty_like(q)
|
@@ -34,6 +34,7 @@ _import_structure = {
|
|
34
34
|
"RBLNAutoModelForCTC",
|
35
35
|
"RBLNAutoModelForDepthEstimation",
|
36
36
|
"RBLNAutoModelForImageClassification",
|
37
|
+
"RBLNAutoModelForImageTextToText",
|
37
38
|
"RBLNAutoModelForMaskedLM",
|
38
39
|
"RBLNAutoModelForQuestionAnswering",
|
39
40
|
"RBLNAutoModelForSeq2SeqLM",
|
@@ -50,6 +51,12 @@ _import_structure = {
|
|
50
51
|
"RBLNBertForQuestionAnsweringConfig",
|
51
52
|
"RBLNBertModel",
|
52
53
|
"RBLNBertModelConfig",
|
54
|
+
"RBLNBlip2VisionModelConfig",
|
55
|
+
"RBLNBlip2VisionModel",
|
56
|
+
"RBLNBlip2QFormerModel",
|
57
|
+
"RBLNBlip2QFormerModelConfig",
|
58
|
+
"RBLNBlip2ForConditionalGeneration",
|
59
|
+
"RBLNBlip2ForConditionalGenerationConfig",
|
53
60
|
"RBLNCLIPTextModel",
|
54
61
|
"RBLNCLIPTextModelConfig",
|
55
62
|
"RBLNCLIPTextModelWithProjection",
|
@@ -66,6 +73,10 @@ _import_structure = {
|
|
66
73
|
"RBLNExaoneForCausalLMConfig",
|
67
74
|
"RBLNGemmaForCausalLM",
|
68
75
|
"RBLNGemmaForCausalLMConfig",
|
76
|
+
"RBLNGemma3ForCausalLM",
|
77
|
+
"RBLNGemma3ForCausalLMConfig",
|
78
|
+
"RBLNGemma3ForConditionalGeneration",
|
79
|
+
"RBLNGemma3ForConditionalGenerationConfig",
|
69
80
|
"RBLNGPT2LMHeadModel",
|
70
81
|
"RBLNGPT2LMHeadModelConfig",
|
71
82
|
"RBLNIdefics3VisionTransformer",
|
@@ -74,6 +85,8 @@ _import_structure = {
|
|
74
85
|
"RBLNIdefics3VisionTransformerConfig",
|
75
86
|
"RBLNLlamaForCausalLM",
|
76
87
|
"RBLNLlamaForCausalLMConfig",
|
88
|
+
"RBLNOPTForCausalLM",
|
89
|
+
"RBLNOPTForCausalLMConfig",
|
77
90
|
"RBLNLlavaNextForConditionalGeneration",
|
78
91
|
"RBLNLlavaNextForConditionalGenerationConfig",
|
79
92
|
"RBLNMidmLMHeadModel",
|
@@ -88,17 +101,18 @@ _import_structure = {
|
|
88
101
|
"RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
|
89
102
|
"RBLNQwen2_5_VLForConditionalGeneration",
|
90
103
|
"RBLNQwen2_5_VLForConditionalGenerationConfig",
|
104
|
+
"RBLNSiglipVisionModel",
|
105
|
+
"RBLNSiglipVisionModelConfig",
|
91
106
|
"RBLNT5EncoderModel",
|
92
107
|
"RBLNT5EncoderModelConfig",
|
93
108
|
"RBLNT5ForConditionalGeneration",
|
94
109
|
"RBLNT5ForConditionalGenerationConfig",
|
110
|
+
"RBLNTimeSeriesTransformerForPrediction",
|
111
|
+
"RBLNTimeSeriesTransformerForPredictionConfig",
|
95
112
|
"RBLNWav2Vec2ForCTC",
|
96
113
|
"RBLNWav2Vec2ForCTCConfig",
|
97
114
|
"RBLNWhisperForConditionalGeneration",
|
98
115
|
"RBLNWhisperForConditionalGenerationConfig",
|
99
|
-
"RBLNTimeSeriesTransformerForPrediction",
|
100
|
-
"RBLNTimeSeriesTransformerForPredictionConfig",
|
101
|
-
"RBLNLlavaNextForConditionalGeneration",
|
102
116
|
"RBLNXLMRobertaModel",
|
103
117
|
"RBLNXLMRobertaModelConfig",
|
104
118
|
],
|
@@ -139,6 +153,7 @@ if TYPE_CHECKING:
|
|
139
153
|
RBLNAutoModelForCTC,
|
140
154
|
RBLNAutoModelForDepthEstimation,
|
141
155
|
RBLNAutoModelForImageClassification,
|
156
|
+
RBLNAutoModelForImageTextToText,
|
142
157
|
RBLNAutoModelForMaskedLM,
|
143
158
|
RBLNAutoModelForQuestionAnswering,
|
144
159
|
RBLNAutoModelForSeq2SeqLM,
|
@@ -155,6 +170,12 @@ if TYPE_CHECKING:
|
|
155
170
|
RBLNBertForQuestionAnsweringConfig,
|
156
171
|
RBLNBertModel,
|
157
172
|
RBLNBertModelConfig,
|
173
|
+
RBLNBlip2ForConditionalGeneration,
|
174
|
+
RBLNBlip2ForConditionalGenerationConfig,
|
175
|
+
RBLNBlip2QFormerModel,
|
176
|
+
RBLNBlip2QFormerModelConfig,
|
177
|
+
RBLNBlip2VisionModel,
|
178
|
+
RBLNBlip2VisionModelConfig,
|
158
179
|
RBLNCLIPTextModel,
|
159
180
|
RBLNCLIPTextModelConfig,
|
160
181
|
RBLNCLIPTextModelWithProjection,
|
@@ -169,6 +190,10 @@ if TYPE_CHECKING:
|
|
169
190
|
RBLNDPTForDepthEstimationConfig,
|
170
191
|
RBLNExaoneForCausalLM,
|
171
192
|
RBLNExaoneForCausalLMConfig,
|
193
|
+
RBLNGemma3ForCausalLM,
|
194
|
+
RBLNGemma3ForCausalLMConfig,
|
195
|
+
RBLNGemma3ForConditionalGeneration,
|
196
|
+
RBLNGemma3ForConditionalGenerationConfig,
|
172
197
|
RBLNGemmaForCausalLM,
|
173
198
|
RBLNGemmaForCausalLMConfig,
|
174
199
|
RBLNGPT2LMHeadModel,
|
@@ -185,6 +210,8 @@ if TYPE_CHECKING:
|
|
185
210
|
RBLNMidmLMHeadModelConfig,
|
186
211
|
RBLNMistralForCausalLM,
|
187
212
|
RBLNMistralForCausalLMConfig,
|
213
|
+
RBLNOPTForCausalLM,
|
214
|
+
RBLNOPTForCausalLMConfig,
|
188
215
|
RBLNPhiForCausalLM,
|
189
216
|
RBLNPhiForCausalLMConfig,
|
190
217
|
RBLNQwen2_5_VisionTransformerPretrainedModel,
|
@@ -193,6 +220,8 @@ if TYPE_CHECKING:
|
|
193
220
|
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
194
221
|
RBLNQwen2ForCausalLM,
|
195
222
|
RBLNQwen2ForCausalLMConfig,
|
223
|
+
RBLNSiglipVisionModel,
|
224
|
+
RBLNSiglipVisionModelConfig,
|
196
225
|
RBLNT5EncoderModel,
|
197
226
|
RBLNT5EncoderModelConfig,
|
198
227
|
RBLNT5ForConditionalGeneration,
|
@@ -31,6 +31,7 @@ _import_structure = {
|
|
31
31
|
"RBLNAutoModelForSequenceClassification",
|
32
32
|
"RBLNAutoModelForSpeechSeq2Seq",
|
33
33
|
"RBLNAutoModelForVision2Seq",
|
34
|
+
"RBLNAutoModelForImageTextToText",
|
34
35
|
],
|
35
36
|
"bart": [
|
36
37
|
"RBLNBartForConditionalGeneration",
|
@@ -46,6 +47,14 @@ _import_structure = {
|
|
46
47
|
"RBLNBertForMaskedLM",
|
47
48
|
"RBLNBertForMaskedLMConfig",
|
48
49
|
],
|
50
|
+
"blip_2": [
|
51
|
+
"RBLNBlip2VisionModelConfig",
|
52
|
+
"RBLNBlip2VisionModel",
|
53
|
+
"RBLNBlip2ForConditionalGeneration",
|
54
|
+
"RBLNBlip2ForConditionalGenerationConfig",
|
55
|
+
"RBLNBlip2QFormerModel",
|
56
|
+
"RBLNBlip2QFormerModelConfig",
|
57
|
+
],
|
49
58
|
"clip": [
|
50
59
|
"RBLNCLIPTextModel",
|
51
60
|
"RBLNCLIPTextModelConfig",
|
@@ -72,6 +81,12 @@ _import_structure = {
|
|
72
81
|
],
|
73
82
|
"exaone": ["RBLNExaoneForCausalLM", "RBLNExaoneForCausalLMConfig"],
|
74
83
|
"gemma": ["RBLNGemmaForCausalLM", "RBLNGemmaForCausalLMConfig"],
|
84
|
+
"gemma3": [
|
85
|
+
"RBLNGemma3ForCausalLM",
|
86
|
+
"RBLNGemma3ForCausalLMConfig",
|
87
|
+
"RBLNGemma3ForConditionalGeneration",
|
88
|
+
"RBLNGemma3ForConditionalGenerationConfig",
|
89
|
+
],
|
75
90
|
"gpt2": ["RBLNGPT2LMHeadModel", "RBLNGPT2LMHeadModelConfig"],
|
76
91
|
"idefics3": [
|
77
92
|
"RBLNIdefics3VisionTransformer",
|
@@ -80,11 +95,16 @@ _import_structure = {
|
|
80
95
|
"RBLNIdefics3VisionTransformerConfig",
|
81
96
|
],
|
82
97
|
"llama": ["RBLNLlamaForCausalLM", "RBLNLlamaForCausalLMConfig"],
|
98
|
+
"opt": ["RBLNOPTForCausalLM", "RBLNOPTForCausalLMConfig"],
|
83
99
|
"llava_next": ["RBLNLlavaNextForConditionalGeneration", "RBLNLlavaNextForConditionalGenerationConfig"],
|
84
100
|
"midm": ["RBLNMidmLMHeadModel", "RBLNMidmLMHeadModelConfig"],
|
85
101
|
"mistral": ["RBLNMistralForCausalLM", "RBLNMistralForCausalLMConfig"],
|
86
102
|
"phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig"],
|
87
103
|
"qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig"],
|
104
|
+
"siglip": [
|
105
|
+
"RBLNSiglipVisionModel",
|
106
|
+
"RBLNSiglipVisionModelConfig",
|
107
|
+
],
|
88
108
|
"time_series_transformers": [
|
89
109
|
"RBLNTimeSeriesTransformerForPrediction",
|
90
110
|
"RBLNTimeSeriesTransformerForPredictionConfig",
|
@@ -108,6 +128,7 @@ if TYPE_CHECKING:
|
|
108
128
|
RBLNAutoModelForCTC,
|
109
129
|
RBLNAutoModelForDepthEstimation,
|
110
130
|
RBLNAutoModelForImageClassification,
|
131
|
+
RBLNAutoModelForImageTextToText,
|
111
132
|
RBLNAutoModelForMaskedLM,
|
112
133
|
RBLNAutoModelForQuestionAnswering,
|
113
134
|
RBLNAutoModelForSeq2SeqLM,
|
@@ -129,6 +150,14 @@ if TYPE_CHECKING:
|
|
129
150
|
RBLNBertModel,
|
130
151
|
RBLNBertModelConfig,
|
131
152
|
)
|
153
|
+
from .blip_2 import (
|
154
|
+
RBLNBlip2ForConditionalGeneration,
|
155
|
+
RBLNBlip2ForConditionalGenerationConfig,
|
156
|
+
RBLNBlip2QFormerModel,
|
157
|
+
RBLNBlip2QFormerModelConfig,
|
158
|
+
RBLNBlip2VisionModel,
|
159
|
+
RBLNBlip2VisionModelConfig,
|
160
|
+
)
|
132
161
|
from .clip import (
|
133
162
|
RBLNCLIPTextModel,
|
134
163
|
RBLNCLIPTextModelConfig,
|
@@ -149,6 +178,12 @@ if TYPE_CHECKING:
|
|
149
178
|
)
|
150
179
|
from .exaone import RBLNExaoneForCausalLM, RBLNExaoneForCausalLMConfig
|
151
180
|
from .gemma import RBLNGemmaForCausalLM, RBLNGemmaForCausalLMConfig
|
181
|
+
from .gemma3 import (
|
182
|
+
RBLNGemma3ForCausalLM,
|
183
|
+
RBLNGemma3ForCausalLMConfig,
|
184
|
+
RBLNGemma3ForConditionalGeneration,
|
185
|
+
RBLNGemma3ForConditionalGenerationConfig,
|
186
|
+
)
|
152
187
|
from .gpt2 import RBLNGPT2LMHeadModel, RBLNGPT2LMHeadModelConfig
|
153
188
|
from .idefics3 import (
|
154
189
|
RBLNIdefics3ForConditionalGeneration,
|
@@ -160,6 +195,7 @@ if TYPE_CHECKING:
|
|
160
195
|
from .llava_next import RBLNLlavaNextForConditionalGeneration, RBLNLlavaNextForConditionalGenerationConfig
|
161
196
|
from .midm import RBLNMidmLMHeadModel, RBLNMidmLMHeadModelConfig
|
162
197
|
from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig
|
198
|
+
from .opt import RBLNOPTForCausalLM, RBLNOPTForCausalLMConfig
|
163
199
|
from .phi import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig
|
164
200
|
from .qwen2 import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig
|
165
201
|
from .qwen2_5_vl import (
|
@@ -168,6 +204,7 @@ if TYPE_CHECKING:
|
|
168
204
|
RBLNQwen2_5_VLForConditionalGeneration,
|
169
205
|
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
170
206
|
)
|
207
|
+
from .siglip import RBLNSiglipVisionModel, RBLNSiglipVisionModelConfig
|
171
208
|
from .t5 import (
|
172
209
|
RBLNT5EncoderModel,
|
173
210
|
RBLNT5EncoderModelConfig,
|
@@ -23,6 +23,8 @@ from transformers.models.auto.modeling_auto import (
|
|
23
23
|
MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES,
|
24
24
|
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
|
25
25
|
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
|
26
|
+
MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING,
|
27
|
+
MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES,
|
26
28
|
MODEL_FOR_MASKED_LM_MAPPING,
|
27
29
|
MODEL_FOR_MASKED_LM_MAPPING_NAMES,
|
28
30
|
MODEL_FOR_QUESTION_ANSWERING_MAPPING,
|
@@ -90,6 +92,11 @@ class RBLNAutoModelForVision2Seq(_BaseAutoModelClass):
|
|
90
92
|
_model_mapping_names = MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES
|
91
93
|
|
92
94
|
|
95
|
+
class RBLNAutoModelForImageTextToText(_BaseAutoModelClass):
|
96
|
+
_model_mapping = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING
|
97
|
+
_model_mapping_names = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES
|
98
|
+
|
99
|
+
|
93
100
|
class RBLNAutoModelForMaskedLM(_BaseAutoModelClass):
|
94
101
|
_model_mapping = MODEL_FOR_MASKED_LM_MAPPING
|
95
102
|
_model_mapping_names = MODEL_FOR_MASKED_LM_MAPPING_NAMES
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
2
|
+
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from .configuration_blip_2 import (
|
16
|
+
RBLNBlip2ForConditionalGenerationConfig,
|
17
|
+
RBLNBlip2QFormerModelConfig,
|
18
|
+
RBLNBlip2VisionModelConfig,
|
19
|
+
)
|
20
|
+
from .modeling_blip_2 import RBLNBlip2ForConditionalGeneration, RBLNBlip2QFormerModel, RBLNBlip2VisionModel
|