optimum-rbln 0.8.1a2__py3-none-any.whl → 0.8.1a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.8.1a2'
21
- __version_tuple__ = version_tuple = (0, 8, 1, 'a2')
20
+ __version__ = version = '0.8.1a4'
21
+ __version_tuple__ = version_tuple = (0, 8, 1, 'a4')
@@ -185,7 +185,10 @@ class RBLNUNet2DConditionModel(RBLNModel):
185
185
  rbln_config: RBLNUNet2DConditionModelConfig,
186
186
  image_size: Optional[Tuple[int, int]] = None,
187
187
  ) -> Tuple[int, int]:
188
- scale_factor = pipe.movq_scale_factor if hasattr(pipe, "movq_scale_factor") else pipe.vae_scale_factor
188
+ if hasattr(pipe, "movq"):
189
+ scale_factor = 2 ** (len(pipe.movq.config.block_out_channels) - 1)
190
+ else:
191
+ scale_factor = pipe.vae_scale_factor
189
192
 
190
193
  if image_size is None:
191
194
  if "Img2Img" in pipe.__class__.__name__:
@@ -177,8 +177,8 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
177
177
  )
178
178
  elif block_tables is None and local_block_tables is None:
179
179
  return False
180
- else:
181
- return True
180
+
181
+ return True
182
182
 
183
183
  def forward(
184
184
  self,
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- import importlib
15
14
  import inspect
16
15
  from collections import deque
17
16
  from dataclasses import dataclass
@@ -124,23 +123,6 @@ class RBLNGemma3ForConditionalGeneration(RBLNModel):
124
123
  def can_generate(self):
125
124
  return True
126
125
 
127
- @classmethod
128
- def get_pytorch_model(cls, *args, **kwargs):
129
- model = super().get_pytorch_model(*args, **kwargs)
130
-
131
- with no_init_weights():
132
- model_cls_name = model.model.language_model.__class__.__name__
133
- causal_model_cls_name = model_cls_name.replace("TextModel", "ForCausalLM")
134
- causal_model_cls = getattr(importlib.import_module("transformers"), causal_model_cls_name)
135
- new_language_model = causal_model_cls(model.model.language_model.config)
136
-
137
- new_language_model.lm_head = model.lm_head
138
- new_language_model.model = model.model.language_model
139
- model.model.language_model = new_language_model
140
- model.lm_head = None
141
- del model.lm_head
142
- return model
143
-
144
126
  def __post_init__(self, **kwargs):
145
127
  self.vision_tower = LoopVisionTower(self.rbln_submodules[0])
146
128
  self.language_model = self.rbln_submodules[1]
@@ -559,7 +541,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
559
541
  (
560
542
  inputs,
561
543
  cache_position,
562
- chunked_attention_mask,
544
+ padded_attention_mask,
563
545
  out_buffers,
564
546
  position_ids,
565
547
  position_embed,
@@ -571,7 +553,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
571
553
  )
572
554
  if not is_external_block_tables:
573
555
  local_block_tables = torch.tensor([batch_idx], dtype=torch.int16)
574
- self.dec_attn_mask[batch_idx : batch_idx + 1] = chunked_attention_mask[:1]
556
+ self.dec_attn_mask[batch_idx : batch_idx + 1] = padded_attention_mask[:1]
575
557
 
576
558
  if self.rbln_config.use_attention_mask and self.rbln_config.use_position_ids:
577
559
  chunked_attention_mask = torch.zeros(1, self.rbln_config.max_seq_len, dtype=torch.float32)
@@ -587,18 +569,10 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
587
569
  else None
588
570
  )
589
571
 
590
- # Not used in Gemma3 yet.
591
572
  if self.rbln_config.use_attention_mask:
592
573
  if self.rbln_config.use_position_ids:
593
- chunked_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size] = self.dec_attn_mask[
594
- batch_idx, step : step + self.rbln_config.prefill_chunk_size
595
- ]
596
- else:
597
- # Update attention mask to ensure proper causal behavior
598
- if step >= self.rbln_config.prefill_chunk_size:
599
- chunked_attention_mask[:, :, :, step - self.rbln_config.prefill_chunk_size : step] = 1
600
- chunked_attention_mask[:, :, :, step : step + self.rbln_config.prefill_chunk_size] = (
601
- self.causal_mask
574
+ chunked_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size] = (
575
+ padded_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size]
602
576
  )
603
577
 
604
578
  # Define query position
@@ -28,7 +28,6 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
28
28
  Qwen2_5_VisionPatchEmbed,
29
29
  Qwen2_5_VisionRotaryEmbedding,
30
30
  Qwen2_5_VisionTransformerPretrainedModel,
31
- Qwen2_5_VLModel,
32
31
  Qwen2_5_VLRotaryEmbedding,
33
32
  )
34
33
 
@@ -391,14 +390,6 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
391
390
  def can_generate(self):
392
391
  return True
393
392
 
394
- @classmethod
395
- def get_pytorch_model(cls, *args, **kwargs):
396
- model = super().get_pytorch_model(*args, **kwargs)
397
- model.model.lm_head = model.lm_head
398
- model.lm_head = None
399
- del model.lm_head
400
- return model
401
-
402
393
  @classmethod
403
394
  def update_kwargs(cls, kwargs):
404
395
  kwargs.update(
@@ -540,8 +531,7 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
540
531
  vision_tokens = input_id[0][vision_start_indices + 1]
541
532
  image_nums = (vision_tokens == image_token_id).sum()
542
533
  video_nums = (vision_tokens == video_token_id).sum()
543
- position_ids, rope_deltas = Qwen2_5_VLModel.get_rope_index(
544
- self,
534
+ position_ids, rope_deltas = self.get_rope_index(
545
535
  input_id,
546
536
  image_grid_thw[image_idx : image_idx + image_nums] if image_grid_thw is not None else None,
547
537
  video_grid_thw[video_idx : video_idx + video_nums] if video_grid_thw is not None else None,
@@ -3,14 +3,8 @@ from typing import Tuple
3
3
 
4
4
  import torch
5
5
  import torch.nn as nn
6
- from transformers import PreTrainedModel
7
6
 
8
7
  from ..decoderonly.decoderonly_architecture import (
9
- DecoderOnlyAttention,
10
- DecoderOnlyFlashAttention,
11
- DecoderOnlyForCausalLM,
12
- DecoderOnlyLayer,
13
- DecoderOnlyModel,
14
8
  DecoderOnlyWrapper,
15
9
  apply_rotary_pos_emb,
16
10
  )
@@ -203,40 +197,3 @@ class Qwen2_5_VL_LanguageModelWrapper(DecoderOnlyWrapper):
203
197
  past_key_values,
204
198
  position_embeds,
205
199
  )
206
-
207
- def convert_to_rbln_causal_lm(self, causal_lm: PreTrainedModel, max_seq_len: int):
208
- new_layers = []
209
-
210
- for layer in causal_lm.model.language_model.layers:
211
- if self.attn_impl == "eager":
212
- new_self_attn = DecoderOnlyAttention(
213
- layer.self_attn,
214
- self.use_attention_mask,
215
- self.use_position_ids,
216
- kvcache_block_size=self.kvcache_block_size,
217
- )
218
- elif self.attn_impl == "flash_attn":
219
- new_self_attn = DecoderOnlyFlashAttention(
220
- layer.self_attn,
221
- kvcache_partition_len=self.kvcache_partition_len,
222
- kvcache_block_size=self.kvcache_block_size,
223
- use_attention_mask=self.use_attention_mask,
224
- use_position_ids=self.use_position_ids,
225
- )
226
- else:
227
- raise NotImplementedError(f"Unknwon attn : {self.attn_impl}")
228
-
229
- new_layer = DecoderOnlyLayer(layer, new_self_attn)
230
- new_layers.append(new_layer)
231
-
232
- new_model = DecoderOnlyModel(
233
- causal_lm.model.language_model,
234
- new_layers,
235
- partition_len=self.kvcache_partition_len,
236
- max_seq_len=max_seq_len,
237
- kvcache_block_size=self.kvcache_block_size,
238
- use_learned_pos_emb=self.use_learned_pos_emb,
239
- sliding_window_layers=self.sliding_window_layers,
240
- )
241
- new_causal_lm = DecoderOnlyForCausalLM(causal_lm.model, new_model)
242
- return new_causal_lm
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.8.1a2
3
+ Version: 0.8.1a4
4
4
  Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -23,7 +23,7 @@ Classifier: Programming Language :: Python :: 3.12
23
23
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
24
  Requires-Python: <3.13,>=3.9
25
25
  Requires-Dist: accelerate>=1.0.1
26
- Requires-Dist: diffusers<=0.31.0
26
+ Requires-Dist: diffusers==0.34.0
27
27
  Requires-Dist: packaging>=24.1
28
28
  Requires-Dist: torch==2.6.0
29
29
  Requires-Dist: torchaudio<=2.6.0
@@ -1,5 +1,5 @@
1
1
  optimum/rbln/__init__.py,sha256=qJJTumXhoFnawXGpeGJbAm4J4A9FFwD1SQ2MqcKDXoM,14436
2
- optimum/rbln/__version__.py,sha256=shHuZmlC1NNfixc5X-chXIonN-Rp-nB4KxJDhJNxa3k,519
2
+ optimum/rbln/__version__.py,sha256=hdBV0MOKkAsGp6FVqyauDmHCC6gC0y_cyykn1_s49sg,519
3
3
  optimum/rbln/configuration_utils.py,sha256=o5oer7fBdE-MHLGNXoP35FjmuQbMmjEIDv0QE_k3kpo,32336
4
4
  optimum/rbln/modeling.py,sha256=ZlJ_tOCWiFjDIlwJ_B_HOCO0kBduWrBAbW9VSEVIAFg,12088
5
5
  optimum/rbln/modeling_base.py,sha256=5fUb1FaxfjApzJIkT8-SrPhuygGo_1Uc0i7UedawOeE,23393
@@ -29,7 +29,7 @@ optimum/rbln/diffusers/models/transformers/__init__.py,sha256=V8rSR7WzHs-i8Cwb_M
29
29
  optimum/rbln/diffusers/models/transformers/prior_transformer.py,sha256=SWoeVK--BRMwuXVABNVtonmzJDusx0iz4Q3EAvJ9uN8,5395
30
30
  optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=yF7sS0QvawowpV9hR5GeT8DaE8CCp3mj1njHHd9cKTc,6630
31
31
  optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
32
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=AVt3vbERl27K7G6cFnNh45AG-gdp_NNN4svxeQ2cjFw,15767
32
+ optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=v3WS9EGKROE_QClXrxC7rmRko1BspAvAbeIfh83LK88,15832
33
33
  optimum/rbln/diffusers/pipelines/__init__.py,sha256=5KLZ5LrpMzBya2e_3_PvEoPwG24U8JMexfw_ygZREKc,3140
34
34
  optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
35
35
  optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=3S9dogIHW8Bqg5kIlCudhCQG-4g3FcdOPEWhBOf7CJA,4059
@@ -88,7 +88,7 @@ optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=0u1JTlO47qoH_-qxWG
88
88
  optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=vQYZDDdoddwA7yKc5zzrq2Zs9sax-0p8rNF_aYfF4bk,1006
89
89
  optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py,sha256=cakn8RGo8gS3nmXdEqOfC2xUBOMGInROgLEbCOoLFR0,13398
90
90
  optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=YAn8J_lIq4IS-HM_gbi5Qov8_osxhWtBr5z_28QRbGM,49667
91
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=cVk7UTXIc5HryADMSJdMN1ENW4ZZM_S5I7RuCk6srEI,53273
91
+ optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=J8eJqg56vPcYnnSP3EYU6X1e5BvdRdPyRcWWlcHzK1c,53256
92
92
  optimum/rbln/transformers/models/distilbert/__init__.py,sha256=zXL78SOEORTnUN_wrdoaDaYpntG8lcFHvPobM6jC0CI,841
93
93
  optimum/rbln/transformers/models/distilbert/configuration_distilbert.py,sha256=qfxCk15hW47i1oO8dCo-xntSbKyW-WOu30h28rIw6eA,766
94
94
  optimum/rbln/transformers/models/distilbert/modeling_distilbert.py,sha256=_Ubhbvrhi7jBC5uS9ITstIAE5VJVwAuDwvQ_Hrr6Ny4,797
@@ -106,7 +106,7 @@ optimum/rbln/transformers/models/gemma/modeling_gemma.py,sha256=Ojvum34EhDHWfMB4
106
106
  optimum/rbln/transformers/models/gemma3/__init__.py,sha256=6rugk3615SEt4lh7gduo_J9VyGiSReuEIvL0Uno0eaI,790
107
107
  optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=eupMGTHJGJNNrAZ3GE6M6GQBAQzBb7KFJvalyDmbM-A,3063
108
108
  optimum/rbln/transformers/models/gemma3/gemma3_architecture.py,sha256=sgFQQbvEr15tb2Sxk_tgcgQFcjhKGbNSW6fm2u7-Vck,8609
109
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=BjICf2xmiUKGkraoGmkTRMIpgqjHrBCiJxc4XgeQw9I,39483
109
+ optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=IMrDtY2o-kzDQd3VG5qx_I8HejSxqGPDV2EqTjzrrfM,38220
110
110
  optimum/rbln/transformers/models/gpt2/__init__.py,sha256=socBMIBZSiLbrVN12rQ4nL9gFeT0axMgz6SWaCaD4Ac,704
111
111
  optimum/rbln/transformers/models/gpt2/configuration_gpt2.py,sha256=9sS6-EGapmow3rG9ViejK9qwrqy_X86VBxQ7u9x0Yqk,923
112
112
  optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=pnGgixjgjW7HULbs5211cC2guw_4e4-MlS69vdCRMMg,3206
@@ -143,8 +143,8 @@ optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=OKd7SXQLLtzPVolr
143
143
  optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
144
144
  optimum/rbln/transformers/models/qwen2_5_vl/__init__.py,sha256=rAW3DKQUzGL6EMwa5r1iLu94yhpiZpk6zfoD7TtYXrc,865
145
145
  optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py,sha256=U3ngIfkA58itqQZqTf-gbISMPoV7ipDttI7V2uwK_18,4155
146
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=x14mGDbCGVFNTw3yXoRJ70W6U5wAlIe_lxqk190P3z4,26636
147
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py,sha256=G7zAMC2FSqz1dYisBUmBJF5hkyueaOrJSI2gD3nLvCQ,9068
146
+ optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=Q4U-avMkby-CunNXEERqvRZx9duC5i-6UmfF1376ciU,26336
147
+ optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py,sha256=PAQz__9o_f5phlozhhXAB8JErBlS1jc4FYZkZkSYJuI,7312
148
148
  optimum/rbln/transformers/models/resnet/__init__.py,sha256=0QqtEQF1IMYgEmmfXMGarCDS8kJB5tzODfwTEzDVZRg,837
149
149
  optimum/rbln/transformers/models/resnet/configuration_resnet.py,sha256=KQd887jgNOl_Am3b407P2OvKtzkkeBS1cEhCfiN0tJg,769
150
150
  optimum/rbln/transformers/models/resnet/modeling_resnet.py,sha256=E8vg3Rw_KsHt6vaOg0ungZD7sXe0T4OMP0X8NFG1EXI,816
@@ -191,7 +191,7 @@ optimum/rbln/utils/model_utils.py,sha256=4k5879Kh75m3x_vS4-qOGfqsOiAvc2kdNFFfvsF
191
191
  optimum/rbln/utils/runtime_utils.py,sha256=LoKNK3AQNV_BSScstIZWjICkJf265MnUgy360BOocVI,5454
192
192
  optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
193
193
  optimum/rbln/utils/submodule.py,sha256=w5mgPgncI740gVKMu3S-69DGNdUSI0bTZxegQGcZ98Y,5011
194
- optimum_rbln-0.8.1a2.dist-info/METADATA,sha256=qiUSuqA0Rledv8xJhhmMT4DgtLU-wPUuvDXnyGv78CY,5299
195
- optimum_rbln-0.8.1a2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
196
- optimum_rbln-0.8.1a2.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
197
- optimum_rbln-0.8.1a2.dist-info/RECORD,,
194
+ optimum_rbln-0.8.1a4.dist-info/METADATA,sha256=jo7yVVPhX8QJJK0WE1x2ReG_VbuNiyhAkAPj9Um90A8,5299
195
+ optimum_rbln-0.8.1a4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
196
+ optimum_rbln-0.8.1a4.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
197
+ optimum_rbln-0.8.1a4.dist-info/RECORD,,