optimum-rbln 0.8.1a2__py3-none-any.whl → 0.8.1a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +2 -2
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +4 -12
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +1 -11
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +0 -43
- {optimum_rbln-0.8.1a2.dist-info → optimum_rbln-0.8.1a3.dist-info}/METADATA +1 -1
- {optimum_rbln-0.8.1a2.dist-info → optimum_rbln-0.8.1a3.dist-info}/RECORD +9 -9
- {optimum_rbln-0.8.1a2.dist-info → optimum_rbln-0.8.1a3.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.8.1a2.dist-info → optimum_rbln-0.8.1a3.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/__version__.py
CHANGED
@@ -17,5 +17,5 @@ __version__: str
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
18
18
|
version_tuple: VERSION_TUPLE
|
19
19
|
|
20
|
-
__version__ = version = '0.8.
|
21
|
-
__version_tuple__ = version_tuple = (0, 8, 1, '
|
20
|
+
__version__ = version = '0.8.1a3'
|
21
|
+
__version_tuple__ = version_tuple = (0, 8, 1, 'a3')
|
@@ -559,7 +559,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
|
|
559
559
|
(
|
560
560
|
inputs,
|
561
561
|
cache_position,
|
562
|
-
|
562
|
+
padded_attention_mask,
|
563
563
|
out_buffers,
|
564
564
|
position_ids,
|
565
565
|
position_embed,
|
@@ -571,7 +571,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
|
|
571
571
|
)
|
572
572
|
if not is_external_block_tables:
|
573
573
|
local_block_tables = torch.tensor([batch_idx], dtype=torch.int16)
|
574
|
-
self.dec_attn_mask[batch_idx : batch_idx + 1] =
|
574
|
+
self.dec_attn_mask[batch_idx : batch_idx + 1] = padded_attention_mask[:1]
|
575
575
|
|
576
576
|
if self.rbln_config.use_attention_mask and self.rbln_config.use_position_ids:
|
577
577
|
chunked_attention_mask = torch.zeros(1, self.rbln_config.max_seq_len, dtype=torch.float32)
|
@@ -587,18 +587,10 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
|
|
587
587
|
else None
|
588
588
|
)
|
589
589
|
|
590
|
-
# Not used in Gemma3 yet.
|
591
590
|
if self.rbln_config.use_attention_mask:
|
592
591
|
if self.rbln_config.use_position_ids:
|
593
|
-
chunked_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size] =
|
594
|
-
|
595
|
-
]
|
596
|
-
else:
|
597
|
-
# Update attention mask to ensure proper causal behavior
|
598
|
-
if step >= self.rbln_config.prefill_chunk_size:
|
599
|
-
chunked_attention_mask[:, :, :, step - self.rbln_config.prefill_chunk_size : step] = 1
|
600
|
-
chunked_attention_mask[:, :, :, step : step + self.rbln_config.prefill_chunk_size] = (
|
601
|
-
self.causal_mask
|
592
|
+
chunked_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size] = (
|
593
|
+
padded_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size]
|
602
594
|
)
|
603
595
|
|
604
596
|
# Define query position
|
@@ -28,7 +28,6 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
|
|
28
28
|
Qwen2_5_VisionPatchEmbed,
|
29
29
|
Qwen2_5_VisionRotaryEmbedding,
|
30
30
|
Qwen2_5_VisionTransformerPretrainedModel,
|
31
|
-
Qwen2_5_VLModel,
|
32
31
|
Qwen2_5_VLRotaryEmbedding,
|
33
32
|
)
|
34
33
|
|
@@ -391,14 +390,6 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
|
|
391
390
|
def can_generate(self):
|
392
391
|
return True
|
393
392
|
|
394
|
-
@classmethod
|
395
|
-
def get_pytorch_model(cls, *args, **kwargs):
|
396
|
-
model = super().get_pytorch_model(*args, **kwargs)
|
397
|
-
model.model.lm_head = model.lm_head
|
398
|
-
model.lm_head = None
|
399
|
-
del model.lm_head
|
400
|
-
return model
|
401
|
-
|
402
393
|
@classmethod
|
403
394
|
def update_kwargs(cls, kwargs):
|
404
395
|
kwargs.update(
|
@@ -540,8 +531,7 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
|
|
540
531
|
vision_tokens = input_id[0][vision_start_indices + 1]
|
541
532
|
image_nums = (vision_tokens == image_token_id).sum()
|
542
533
|
video_nums = (vision_tokens == video_token_id).sum()
|
543
|
-
position_ids, rope_deltas =
|
544
|
-
self,
|
534
|
+
position_ids, rope_deltas = self.get_rope_index(
|
545
535
|
input_id,
|
546
536
|
image_grid_thw[image_idx : image_idx + image_nums] if image_grid_thw is not None else None,
|
547
537
|
video_grid_thw[video_idx : video_idx + video_nums] if video_grid_thw is not None else None,
|
@@ -3,14 +3,8 @@ from typing import Tuple
|
|
3
3
|
|
4
4
|
import torch
|
5
5
|
import torch.nn as nn
|
6
|
-
from transformers import PreTrainedModel
|
7
6
|
|
8
7
|
from ..decoderonly.decoderonly_architecture import (
|
9
|
-
DecoderOnlyAttention,
|
10
|
-
DecoderOnlyFlashAttention,
|
11
|
-
DecoderOnlyForCausalLM,
|
12
|
-
DecoderOnlyLayer,
|
13
|
-
DecoderOnlyModel,
|
14
8
|
DecoderOnlyWrapper,
|
15
9
|
apply_rotary_pos_emb,
|
16
10
|
)
|
@@ -203,40 +197,3 @@ class Qwen2_5_VL_LanguageModelWrapper(DecoderOnlyWrapper):
|
|
203
197
|
past_key_values,
|
204
198
|
position_embeds,
|
205
199
|
)
|
206
|
-
|
207
|
-
def convert_to_rbln_causal_lm(self, causal_lm: PreTrainedModel, max_seq_len: int):
|
208
|
-
new_layers = []
|
209
|
-
|
210
|
-
for layer in causal_lm.model.language_model.layers:
|
211
|
-
if self.attn_impl == "eager":
|
212
|
-
new_self_attn = DecoderOnlyAttention(
|
213
|
-
layer.self_attn,
|
214
|
-
self.use_attention_mask,
|
215
|
-
self.use_position_ids,
|
216
|
-
kvcache_block_size=self.kvcache_block_size,
|
217
|
-
)
|
218
|
-
elif self.attn_impl == "flash_attn":
|
219
|
-
new_self_attn = DecoderOnlyFlashAttention(
|
220
|
-
layer.self_attn,
|
221
|
-
kvcache_partition_len=self.kvcache_partition_len,
|
222
|
-
kvcache_block_size=self.kvcache_block_size,
|
223
|
-
use_attention_mask=self.use_attention_mask,
|
224
|
-
use_position_ids=self.use_position_ids,
|
225
|
-
)
|
226
|
-
else:
|
227
|
-
raise NotImplementedError(f"Unknwon attn : {self.attn_impl}")
|
228
|
-
|
229
|
-
new_layer = DecoderOnlyLayer(layer, new_self_attn)
|
230
|
-
new_layers.append(new_layer)
|
231
|
-
|
232
|
-
new_model = DecoderOnlyModel(
|
233
|
-
causal_lm.model.language_model,
|
234
|
-
new_layers,
|
235
|
-
partition_len=self.kvcache_partition_len,
|
236
|
-
max_seq_len=max_seq_len,
|
237
|
-
kvcache_block_size=self.kvcache_block_size,
|
238
|
-
use_learned_pos_emb=self.use_learned_pos_emb,
|
239
|
-
sliding_window_layers=self.sliding_window_layers,
|
240
|
-
)
|
241
|
-
new_causal_lm = DecoderOnlyForCausalLM(causal_lm.model, new_model)
|
242
|
-
return new_causal_lm
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: optimum-rbln
|
3
|
-
Version: 0.8.
|
3
|
+
Version: 0.8.1a3
|
4
4
|
Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
|
5
5
|
Project-URL: Homepage, https://rebellions.ai
|
6
6
|
Project-URL: Documentation, https://docs.rbln.ai
|
@@ -1,5 +1,5 @@
|
|
1
1
|
optimum/rbln/__init__.py,sha256=qJJTumXhoFnawXGpeGJbAm4J4A9FFwD1SQ2MqcKDXoM,14436
|
2
|
-
optimum/rbln/__version__.py,sha256=
|
2
|
+
optimum/rbln/__version__.py,sha256=bE7uZ_Vr3gK5nEF_YfozhLcF3_Q9HvGMYkpifHovJxI,519
|
3
3
|
optimum/rbln/configuration_utils.py,sha256=o5oer7fBdE-MHLGNXoP35FjmuQbMmjEIDv0QE_k3kpo,32336
|
4
4
|
optimum/rbln/modeling.py,sha256=ZlJ_tOCWiFjDIlwJ_B_HOCO0kBduWrBAbW9VSEVIAFg,12088
|
5
5
|
optimum/rbln/modeling_base.py,sha256=5fUb1FaxfjApzJIkT8-SrPhuygGo_1Uc0i7UedawOeE,23393
|
@@ -88,7 +88,7 @@ optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=0u1JTlO47qoH_-qxWG
|
|
88
88
|
optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=vQYZDDdoddwA7yKc5zzrq2Zs9sax-0p8rNF_aYfF4bk,1006
|
89
89
|
optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py,sha256=cakn8RGo8gS3nmXdEqOfC2xUBOMGInROgLEbCOoLFR0,13398
|
90
90
|
optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=YAn8J_lIq4IS-HM_gbi5Qov8_osxhWtBr5z_28QRbGM,49667
|
91
|
-
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=
|
91
|
+
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=J8eJqg56vPcYnnSP3EYU6X1e5BvdRdPyRcWWlcHzK1c,53256
|
92
92
|
optimum/rbln/transformers/models/distilbert/__init__.py,sha256=zXL78SOEORTnUN_wrdoaDaYpntG8lcFHvPobM6jC0CI,841
|
93
93
|
optimum/rbln/transformers/models/distilbert/configuration_distilbert.py,sha256=qfxCk15hW47i1oO8dCo-xntSbKyW-WOu30h28rIw6eA,766
|
94
94
|
optimum/rbln/transformers/models/distilbert/modeling_distilbert.py,sha256=_Ubhbvrhi7jBC5uS9ITstIAE5VJVwAuDwvQ_Hrr6Ny4,797
|
@@ -106,7 +106,7 @@ optimum/rbln/transformers/models/gemma/modeling_gemma.py,sha256=Ojvum34EhDHWfMB4
|
|
106
106
|
optimum/rbln/transformers/models/gemma3/__init__.py,sha256=6rugk3615SEt4lh7gduo_J9VyGiSReuEIvL0Uno0eaI,790
|
107
107
|
optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=eupMGTHJGJNNrAZ3GE6M6GQBAQzBb7KFJvalyDmbM-A,3063
|
108
108
|
optimum/rbln/transformers/models/gemma3/gemma3_architecture.py,sha256=sgFQQbvEr15tb2Sxk_tgcgQFcjhKGbNSW6fm2u7-Vck,8609
|
109
|
-
optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=
|
109
|
+
optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=9c6-Qz4EGGbSnKwoz2zH5r6W7sVfjb-m5Z-dnQkAOXU,38992
|
110
110
|
optimum/rbln/transformers/models/gpt2/__init__.py,sha256=socBMIBZSiLbrVN12rQ4nL9gFeT0axMgz6SWaCaD4Ac,704
|
111
111
|
optimum/rbln/transformers/models/gpt2/configuration_gpt2.py,sha256=9sS6-EGapmow3rG9ViejK9qwrqy_X86VBxQ7u9x0Yqk,923
|
112
112
|
optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=pnGgixjgjW7HULbs5211cC2guw_4e4-MlS69vdCRMMg,3206
|
@@ -143,8 +143,8 @@ optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=OKd7SXQLLtzPVolr
|
|
143
143
|
optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
|
144
144
|
optimum/rbln/transformers/models/qwen2_5_vl/__init__.py,sha256=rAW3DKQUzGL6EMwa5r1iLu94yhpiZpk6zfoD7TtYXrc,865
|
145
145
|
optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py,sha256=U3ngIfkA58itqQZqTf-gbISMPoV7ipDttI7V2uwK_18,4155
|
146
|
-
optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=
|
147
|
-
optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py,sha256=
|
146
|
+
optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=Q4U-avMkby-CunNXEERqvRZx9duC5i-6UmfF1376ciU,26336
|
147
|
+
optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py,sha256=PAQz__9o_f5phlozhhXAB8JErBlS1jc4FYZkZkSYJuI,7312
|
148
148
|
optimum/rbln/transformers/models/resnet/__init__.py,sha256=0QqtEQF1IMYgEmmfXMGarCDS8kJB5tzODfwTEzDVZRg,837
|
149
149
|
optimum/rbln/transformers/models/resnet/configuration_resnet.py,sha256=KQd887jgNOl_Am3b407P2OvKtzkkeBS1cEhCfiN0tJg,769
|
150
150
|
optimum/rbln/transformers/models/resnet/modeling_resnet.py,sha256=E8vg3Rw_KsHt6vaOg0ungZD7sXe0T4OMP0X8NFG1EXI,816
|
@@ -191,7 +191,7 @@ optimum/rbln/utils/model_utils.py,sha256=4k5879Kh75m3x_vS4-qOGfqsOiAvc2kdNFFfvsF
|
|
191
191
|
optimum/rbln/utils/runtime_utils.py,sha256=LoKNK3AQNV_BSScstIZWjICkJf265MnUgy360BOocVI,5454
|
192
192
|
optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
|
193
193
|
optimum/rbln/utils/submodule.py,sha256=w5mgPgncI740gVKMu3S-69DGNdUSI0bTZxegQGcZ98Y,5011
|
194
|
-
optimum_rbln-0.8.
|
195
|
-
optimum_rbln-0.8.
|
196
|
-
optimum_rbln-0.8.
|
197
|
-
optimum_rbln-0.8.
|
194
|
+
optimum_rbln-0.8.1a3.dist-info/METADATA,sha256=e2Q0Hat0Lk5pWpTSk_kbikGUsOezSiz7nM-01GXFU8w,5299
|
195
|
+
optimum_rbln-0.8.1a3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
196
|
+
optimum_rbln-0.8.1a3.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
197
|
+
optimum_rbln-0.8.1a3.dist-info/RECORD,,
|
File without changes
|
File without changes
|