optimum-rbln 0.8.1a2__py3-none-any.whl → 0.8.1a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.8.1a2'
21
- __version_tuple__ = version_tuple = (0, 8, 1, 'a2')
20
+ __version__ = version = '0.8.1a3'
21
+ __version_tuple__ = version_tuple = (0, 8, 1, 'a3')
@@ -177,8 +177,8 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
177
177
  )
178
178
  elif block_tables is None and local_block_tables is None:
179
179
  return False
180
- else:
181
- return True
180
+
181
+ return True
182
182
 
183
183
  def forward(
184
184
  self,
@@ -559,7 +559,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
559
559
  (
560
560
  inputs,
561
561
  cache_position,
562
- chunked_attention_mask,
562
+ padded_attention_mask,
563
563
  out_buffers,
564
564
  position_ids,
565
565
  position_embed,
@@ -571,7 +571,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
571
571
  )
572
572
  if not is_external_block_tables:
573
573
  local_block_tables = torch.tensor([batch_idx], dtype=torch.int16)
574
- self.dec_attn_mask[batch_idx : batch_idx + 1] = chunked_attention_mask[:1]
574
+ self.dec_attn_mask[batch_idx : batch_idx + 1] = padded_attention_mask[:1]
575
575
 
576
576
  if self.rbln_config.use_attention_mask and self.rbln_config.use_position_ids:
577
577
  chunked_attention_mask = torch.zeros(1, self.rbln_config.max_seq_len, dtype=torch.float32)
@@ -587,18 +587,10 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
587
587
  else None
588
588
  )
589
589
 
590
- # Not used in Gemma3 yet.
591
590
  if self.rbln_config.use_attention_mask:
592
591
  if self.rbln_config.use_position_ids:
593
- chunked_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size] = self.dec_attn_mask[
594
- batch_idx, step : step + self.rbln_config.prefill_chunk_size
595
- ]
596
- else:
597
- # Update attention mask to ensure proper causal behavior
598
- if step >= self.rbln_config.prefill_chunk_size:
599
- chunked_attention_mask[:, :, :, step - self.rbln_config.prefill_chunk_size : step] = 1
600
- chunked_attention_mask[:, :, :, step : step + self.rbln_config.prefill_chunk_size] = (
601
- self.causal_mask
592
+ chunked_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size] = (
593
+ padded_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size]
602
594
  )
603
595
 
604
596
  # Define query position
@@ -28,7 +28,6 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
28
28
  Qwen2_5_VisionPatchEmbed,
29
29
  Qwen2_5_VisionRotaryEmbedding,
30
30
  Qwen2_5_VisionTransformerPretrainedModel,
31
- Qwen2_5_VLModel,
32
31
  Qwen2_5_VLRotaryEmbedding,
33
32
  )
34
33
 
@@ -391,14 +390,6 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
391
390
  def can_generate(self):
392
391
  return True
393
392
 
394
- @classmethod
395
- def get_pytorch_model(cls, *args, **kwargs):
396
- model = super().get_pytorch_model(*args, **kwargs)
397
- model.model.lm_head = model.lm_head
398
- model.lm_head = None
399
- del model.lm_head
400
- return model
401
-
402
393
  @classmethod
403
394
  def update_kwargs(cls, kwargs):
404
395
  kwargs.update(
@@ -540,8 +531,7 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
540
531
  vision_tokens = input_id[0][vision_start_indices + 1]
541
532
  image_nums = (vision_tokens == image_token_id).sum()
542
533
  video_nums = (vision_tokens == video_token_id).sum()
543
- position_ids, rope_deltas = Qwen2_5_VLModel.get_rope_index(
544
- self,
534
+ position_ids, rope_deltas = self.get_rope_index(
545
535
  input_id,
546
536
  image_grid_thw[image_idx : image_idx + image_nums] if image_grid_thw is not None else None,
547
537
  video_grid_thw[video_idx : video_idx + video_nums] if video_grid_thw is not None else None,
@@ -3,14 +3,8 @@ from typing import Tuple
3
3
 
4
4
  import torch
5
5
  import torch.nn as nn
6
- from transformers import PreTrainedModel
7
6
 
8
7
  from ..decoderonly.decoderonly_architecture import (
9
- DecoderOnlyAttention,
10
- DecoderOnlyFlashAttention,
11
- DecoderOnlyForCausalLM,
12
- DecoderOnlyLayer,
13
- DecoderOnlyModel,
14
8
  DecoderOnlyWrapper,
15
9
  apply_rotary_pos_emb,
16
10
  )
@@ -203,40 +197,3 @@ class Qwen2_5_VL_LanguageModelWrapper(DecoderOnlyWrapper):
203
197
  past_key_values,
204
198
  position_embeds,
205
199
  )
206
-
207
- def convert_to_rbln_causal_lm(self, causal_lm: PreTrainedModel, max_seq_len: int):
208
- new_layers = []
209
-
210
- for layer in causal_lm.model.language_model.layers:
211
- if self.attn_impl == "eager":
212
- new_self_attn = DecoderOnlyAttention(
213
- layer.self_attn,
214
- self.use_attention_mask,
215
- self.use_position_ids,
216
- kvcache_block_size=self.kvcache_block_size,
217
- )
218
- elif self.attn_impl == "flash_attn":
219
- new_self_attn = DecoderOnlyFlashAttention(
220
- layer.self_attn,
221
- kvcache_partition_len=self.kvcache_partition_len,
222
- kvcache_block_size=self.kvcache_block_size,
223
- use_attention_mask=self.use_attention_mask,
224
- use_position_ids=self.use_position_ids,
225
- )
226
- else:
227
- raise NotImplementedError(f"Unknwon attn : {self.attn_impl}")
228
-
229
- new_layer = DecoderOnlyLayer(layer, new_self_attn)
230
- new_layers.append(new_layer)
231
-
232
- new_model = DecoderOnlyModel(
233
- causal_lm.model.language_model,
234
- new_layers,
235
- partition_len=self.kvcache_partition_len,
236
- max_seq_len=max_seq_len,
237
- kvcache_block_size=self.kvcache_block_size,
238
- use_learned_pos_emb=self.use_learned_pos_emb,
239
- sliding_window_layers=self.sliding_window_layers,
240
- )
241
- new_causal_lm = DecoderOnlyForCausalLM(causal_lm.model, new_model)
242
- return new_causal_lm
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.8.1a2
3
+ Version: 0.8.1a3
4
4
  Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -1,5 +1,5 @@
1
1
  optimum/rbln/__init__.py,sha256=qJJTumXhoFnawXGpeGJbAm4J4A9FFwD1SQ2MqcKDXoM,14436
2
- optimum/rbln/__version__.py,sha256=shHuZmlC1NNfixc5X-chXIonN-Rp-nB4KxJDhJNxa3k,519
2
+ optimum/rbln/__version__.py,sha256=bE7uZ_Vr3gK5nEF_YfozhLcF3_Q9HvGMYkpifHovJxI,519
3
3
  optimum/rbln/configuration_utils.py,sha256=o5oer7fBdE-MHLGNXoP35FjmuQbMmjEIDv0QE_k3kpo,32336
4
4
  optimum/rbln/modeling.py,sha256=ZlJ_tOCWiFjDIlwJ_B_HOCO0kBduWrBAbW9VSEVIAFg,12088
5
5
  optimum/rbln/modeling_base.py,sha256=5fUb1FaxfjApzJIkT8-SrPhuygGo_1Uc0i7UedawOeE,23393
@@ -88,7 +88,7 @@ optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=0u1JTlO47qoH_-qxWG
88
88
  optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=vQYZDDdoddwA7yKc5zzrq2Zs9sax-0p8rNF_aYfF4bk,1006
89
89
  optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py,sha256=cakn8RGo8gS3nmXdEqOfC2xUBOMGInROgLEbCOoLFR0,13398
90
90
  optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=YAn8J_lIq4IS-HM_gbi5Qov8_osxhWtBr5z_28QRbGM,49667
91
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=cVk7UTXIc5HryADMSJdMN1ENW4ZZM_S5I7RuCk6srEI,53273
91
+ optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=J8eJqg56vPcYnnSP3EYU6X1e5BvdRdPyRcWWlcHzK1c,53256
92
92
  optimum/rbln/transformers/models/distilbert/__init__.py,sha256=zXL78SOEORTnUN_wrdoaDaYpntG8lcFHvPobM6jC0CI,841
93
93
  optimum/rbln/transformers/models/distilbert/configuration_distilbert.py,sha256=qfxCk15hW47i1oO8dCo-xntSbKyW-WOu30h28rIw6eA,766
94
94
  optimum/rbln/transformers/models/distilbert/modeling_distilbert.py,sha256=_Ubhbvrhi7jBC5uS9ITstIAE5VJVwAuDwvQ_Hrr6Ny4,797
@@ -106,7 +106,7 @@ optimum/rbln/transformers/models/gemma/modeling_gemma.py,sha256=Ojvum34EhDHWfMB4
106
106
  optimum/rbln/transformers/models/gemma3/__init__.py,sha256=6rugk3615SEt4lh7gduo_J9VyGiSReuEIvL0Uno0eaI,790
107
107
  optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=eupMGTHJGJNNrAZ3GE6M6GQBAQzBb7KFJvalyDmbM-A,3063
108
108
  optimum/rbln/transformers/models/gemma3/gemma3_architecture.py,sha256=sgFQQbvEr15tb2Sxk_tgcgQFcjhKGbNSW6fm2u7-Vck,8609
109
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=BjICf2xmiUKGkraoGmkTRMIpgqjHrBCiJxc4XgeQw9I,39483
109
+ optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=9c6-Qz4EGGbSnKwoz2zH5r6W7sVfjb-m5Z-dnQkAOXU,38992
110
110
  optimum/rbln/transformers/models/gpt2/__init__.py,sha256=socBMIBZSiLbrVN12rQ4nL9gFeT0axMgz6SWaCaD4Ac,704
111
111
  optimum/rbln/transformers/models/gpt2/configuration_gpt2.py,sha256=9sS6-EGapmow3rG9ViejK9qwrqy_X86VBxQ7u9x0Yqk,923
112
112
  optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=pnGgixjgjW7HULbs5211cC2guw_4e4-MlS69vdCRMMg,3206
@@ -143,8 +143,8 @@ optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=OKd7SXQLLtzPVolr
143
143
  optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
144
144
  optimum/rbln/transformers/models/qwen2_5_vl/__init__.py,sha256=rAW3DKQUzGL6EMwa5r1iLu94yhpiZpk6zfoD7TtYXrc,865
145
145
  optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py,sha256=U3ngIfkA58itqQZqTf-gbISMPoV7ipDttI7V2uwK_18,4155
146
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=x14mGDbCGVFNTw3yXoRJ70W6U5wAlIe_lxqk190P3z4,26636
147
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py,sha256=G7zAMC2FSqz1dYisBUmBJF5hkyueaOrJSI2gD3nLvCQ,9068
146
+ optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=Q4U-avMkby-CunNXEERqvRZx9duC5i-6UmfF1376ciU,26336
147
+ optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py,sha256=PAQz__9o_f5phlozhhXAB8JErBlS1jc4FYZkZkSYJuI,7312
148
148
  optimum/rbln/transformers/models/resnet/__init__.py,sha256=0QqtEQF1IMYgEmmfXMGarCDS8kJB5tzODfwTEzDVZRg,837
149
149
  optimum/rbln/transformers/models/resnet/configuration_resnet.py,sha256=KQd887jgNOl_Am3b407P2OvKtzkkeBS1cEhCfiN0tJg,769
150
150
  optimum/rbln/transformers/models/resnet/modeling_resnet.py,sha256=E8vg3Rw_KsHt6vaOg0ungZD7sXe0T4OMP0X8NFG1EXI,816
@@ -191,7 +191,7 @@ optimum/rbln/utils/model_utils.py,sha256=4k5879Kh75m3x_vS4-qOGfqsOiAvc2kdNFFfvsF
191
191
  optimum/rbln/utils/runtime_utils.py,sha256=LoKNK3AQNV_BSScstIZWjICkJf265MnUgy360BOocVI,5454
192
192
  optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
193
193
  optimum/rbln/utils/submodule.py,sha256=w5mgPgncI740gVKMu3S-69DGNdUSI0bTZxegQGcZ98Y,5011
194
- optimum_rbln-0.8.1a2.dist-info/METADATA,sha256=qiUSuqA0Rledv8xJhhmMT4DgtLU-wPUuvDXnyGv78CY,5299
195
- optimum_rbln-0.8.1a2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
196
- optimum_rbln-0.8.1a2.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
197
- optimum_rbln-0.8.1a2.dist-info/RECORD,,
194
+ optimum_rbln-0.8.1a3.dist-info/METADATA,sha256=e2Q0Hat0Lk5pWpTSk_kbikGUsOezSiz7nM-01GXFU8w,5299
195
+ optimum_rbln-0.8.1a3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
196
+ optimum_rbln-0.8.1a3.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
197
+ optimum_rbln-0.8.1a3.dist-info/RECORD,,