optimum-rbln 0.8.1a2__py3-none-any.whl → 0.8.1a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +4 -1
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +2 -2
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +4 -30
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +1 -11
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +0 -43
- {optimum_rbln-0.8.1a2.dist-info → optimum_rbln-0.8.1a4.dist-info}/METADATA +2 -2
- {optimum_rbln-0.8.1a2.dist-info → optimum_rbln-0.8.1a4.dist-info}/RECORD +10 -10
- {optimum_rbln-0.8.1a2.dist-info → optimum_rbln-0.8.1a4.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.8.1a2.dist-info → optimum_rbln-0.8.1a4.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/__version__.py
CHANGED
@@ -17,5 +17,5 @@ __version__: str
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
18
18
|
version_tuple: VERSION_TUPLE
|
19
19
|
|
20
|
-
__version__ = version = '0.8.
|
21
|
-
__version_tuple__ = version_tuple = (0, 8, 1, '
|
20
|
+
__version__ = version = '0.8.1a4'
|
21
|
+
__version_tuple__ = version_tuple = (0, 8, 1, 'a4')
|
@@ -185,7 +185,10 @@ class RBLNUNet2DConditionModel(RBLNModel):
|
|
185
185
|
rbln_config: RBLNUNet2DConditionModelConfig,
|
186
186
|
image_size: Optional[Tuple[int, int]] = None,
|
187
187
|
) -> Tuple[int, int]:
|
188
|
-
|
188
|
+
if hasattr(pipe, "movq"):
|
189
|
+
scale_factor = 2 ** (len(pipe.movq.config.block_out_channels) - 1)
|
190
|
+
else:
|
191
|
+
scale_factor = pipe.vae_scale_factor
|
189
192
|
|
190
193
|
if image_size is None:
|
191
194
|
if "Img2Img" in pipe.__class__.__name__:
|
@@ -11,7 +11,6 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
-
import importlib
|
15
14
|
import inspect
|
16
15
|
from collections import deque
|
17
16
|
from dataclasses import dataclass
|
@@ -124,23 +123,6 @@ class RBLNGemma3ForConditionalGeneration(RBLNModel):
|
|
124
123
|
def can_generate(self):
|
125
124
|
return True
|
126
125
|
|
127
|
-
@classmethod
|
128
|
-
def get_pytorch_model(cls, *args, **kwargs):
|
129
|
-
model = super().get_pytorch_model(*args, **kwargs)
|
130
|
-
|
131
|
-
with no_init_weights():
|
132
|
-
model_cls_name = model.model.language_model.__class__.__name__
|
133
|
-
causal_model_cls_name = model_cls_name.replace("TextModel", "ForCausalLM")
|
134
|
-
causal_model_cls = getattr(importlib.import_module("transformers"), causal_model_cls_name)
|
135
|
-
new_language_model = causal_model_cls(model.model.language_model.config)
|
136
|
-
|
137
|
-
new_language_model.lm_head = model.lm_head
|
138
|
-
new_language_model.model = model.model.language_model
|
139
|
-
model.model.language_model = new_language_model
|
140
|
-
model.lm_head = None
|
141
|
-
del model.lm_head
|
142
|
-
return model
|
143
|
-
|
144
126
|
def __post_init__(self, **kwargs):
|
145
127
|
self.vision_tower = LoopVisionTower(self.rbln_submodules[0])
|
146
128
|
self.language_model = self.rbln_submodules[1]
|
@@ -559,7 +541,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
|
|
559
541
|
(
|
560
542
|
inputs,
|
561
543
|
cache_position,
|
562
|
-
|
544
|
+
padded_attention_mask,
|
563
545
|
out_buffers,
|
564
546
|
position_ids,
|
565
547
|
position_embed,
|
@@ -571,7 +553,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
|
|
571
553
|
)
|
572
554
|
if not is_external_block_tables:
|
573
555
|
local_block_tables = torch.tensor([batch_idx], dtype=torch.int16)
|
574
|
-
self.dec_attn_mask[batch_idx : batch_idx + 1] =
|
556
|
+
self.dec_attn_mask[batch_idx : batch_idx + 1] = padded_attention_mask[:1]
|
575
557
|
|
576
558
|
if self.rbln_config.use_attention_mask and self.rbln_config.use_position_ids:
|
577
559
|
chunked_attention_mask = torch.zeros(1, self.rbln_config.max_seq_len, dtype=torch.float32)
|
@@ -587,18 +569,10 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
|
|
587
569
|
else None
|
588
570
|
)
|
589
571
|
|
590
|
-
# Not used in Gemma3 yet.
|
591
572
|
if self.rbln_config.use_attention_mask:
|
592
573
|
if self.rbln_config.use_position_ids:
|
593
|
-
chunked_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size] =
|
594
|
-
|
595
|
-
]
|
596
|
-
else:
|
597
|
-
# Update attention mask to ensure proper causal behavior
|
598
|
-
if step >= self.rbln_config.prefill_chunk_size:
|
599
|
-
chunked_attention_mask[:, :, :, step - self.rbln_config.prefill_chunk_size : step] = 1
|
600
|
-
chunked_attention_mask[:, :, :, step : step + self.rbln_config.prefill_chunk_size] = (
|
601
|
-
self.causal_mask
|
574
|
+
chunked_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size] = (
|
575
|
+
padded_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size]
|
602
576
|
)
|
603
577
|
|
604
578
|
# Define query position
|
@@ -28,7 +28,6 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
|
|
28
28
|
Qwen2_5_VisionPatchEmbed,
|
29
29
|
Qwen2_5_VisionRotaryEmbedding,
|
30
30
|
Qwen2_5_VisionTransformerPretrainedModel,
|
31
|
-
Qwen2_5_VLModel,
|
32
31
|
Qwen2_5_VLRotaryEmbedding,
|
33
32
|
)
|
34
33
|
|
@@ -391,14 +390,6 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
|
|
391
390
|
def can_generate(self):
|
392
391
|
return True
|
393
392
|
|
394
|
-
@classmethod
|
395
|
-
def get_pytorch_model(cls, *args, **kwargs):
|
396
|
-
model = super().get_pytorch_model(*args, **kwargs)
|
397
|
-
model.model.lm_head = model.lm_head
|
398
|
-
model.lm_head = None
|
399
|
-
del model.lm_head
|
400
|
-
return model
|
401
|
-
|
402
393
|
@classmethod
|
403
394
|
def update_kwargs(cls, kwargs):
|
404
395
|
kwargs.update(
|
@@ -540,8 +531,7 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
|
|
540
531
|
vision_tokens = input_id[0][vision_start_indices + 1]
|
541
532
|
image_nums = (vision_tokens == image_token_id).sum()
|
542
533
|
video_nums = (vision_tokens == video_token_id).sum()
|
543
|
-
position_ids, rope_deltas =
|
544
|
-
self,
|
534
|
+
position_ids, rope_deltas = self.get_rope_index(
|
545
535
|
input_id,
|
546
536
|
image_grid_thw[image_idx : image_idx + image_nums] if image_grid_thw is not None else None,
|
547
537
|
video_grid_thw[video_idx : video_idx + video_nums] if video_grid_thw is not None else None,
|
@@ -3,14 +3,8 @@ from typing import Tuple
|
|
3
3
|
|
4
4
|
import torch
|
5
5
|
import torch.nn as nn
|
6
|
-
from transformers import PreTrainedModel
|
7
6
|
|
8
7
|
from ..decoderonly.decoderonly_architecture import (
|
9
|
-
DecoderOnlyAttention,
|
10
|
-
DecoderOnlyFlashAttention,
|
11
|
-
DecoderOnlyForCausalLM,
|
12
|
-
DecoderOnlyLayer,
|
13
|
-
DecoderOnlyModel,
|
14
8
|
DecoderOnlyWrapper,
|
15
9
|
apply_rotary_pos_emb,
|
16
10
|
)
|
@@ -203,40 +197,3 @@ class Qwen2_5_VL_LanguageModelWrapper(DecoderOnlyWrapper):
|
|
203
197
|
past_key_values,
|
204
198
|
position_embeds,
|
205
199
|
)
|
206
|
-
|
207
|
-
def convert_to_rbln_causal_lm(self, causal_lm: PreTrainedModel, max_seq_len: int):
|
208
|
-
new_layers = []
|
209
|
-
|
210
|
-
for layer in causal_lm.model.language_model.layers:
|
211
|
-
if self.attn_impl == "eager":
|
212
|
-
new_self_attn = DecoderOnlyAttention(
|
213
|
-
layer.self_attn,
|
214
|
-
self.use_attention_mask,
|
215
|
-
self.use_position_ids,
|
216
|
-
kvcache_block_size=self.kvcache_block_size,
|
217
|
-
)
|
218
|
-
elif self.attn_impl == "flash_attn":
|
219
|
-
new_self_attn = DecoderOnlyFlashAttention(
|
220
|
-
layer.self_attn,
|
221
|
-
kvcache_partition_len=self.kvcache_partition_len,
|
222
|
-
kvcache_block_size=self.kvcache_block_size,
|
223
|
-
use_attention_mask=self.use_attention_mask,
|
224
|
-
use_position_ids=self.use_position_ids,
|
225
|
-
)
|
226
|
-
else:
|
227
|
-
raise NotImplementedError(f"Unknwon attn : {self.attn_impl}")
|
228
|
-
|
229
|
-
new_layer = DecoderOnlyLayer(layer, new_self_attn)
|
230
|
-
new_layers.append(new_layer)
|
231
|
-
|
232
|
-
new_model = DecoderOnlyModel(
|
233
|
-
causal_lm.model.language_model,
|
234
|
-
new_layers,
|
235
|
-
partition_len=self.kvcache_partition_len,
|
236
|
-
max_seq_len=max_seq_len,
|
237
|
-
kvcache_block_size=self.kvcache_block_size,
|
238
|
-
use_learned_pos_emb=self.use_learned_pos_emb,
|
239
|
-
sliding_window_layers=self.sliding_window_layers,
|
240
|
-
)
|
241
|
-
new_causal_lm = DecoderOnlyForCausalLM(causal_lm.model, new_model)
|
242
|
-
return new_causal_lm
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: optimum-rbln
|
3
|
-
Version: 0.8.
|
3
|
+
Version: 0.8.1a4
|
4
4
|
Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
|
5
5
|
Project-URL: Homepage, https://rebellions.ai
|
6
6
|
Project-URL: Documentation, https://docs.rbln.ai
|
@@ -23,7 +23,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
23
23
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
24
24
|
Requires-Python: <3.13,>=3.9
|
25
25
|
Requires-Dist: accelerate>=1.0.1
|
26
|
-
Requires-Dist: diffusers
|
26
|
+
Requires-Dist: diffusers==0.34.0
|
27
27
|
Requires-Dist: packaging>=24.1
|
28
28
|
Requires-Dist: torch==2.6.0
|
29
29
|
Requires-Dist: torchaudio<=2.6.0
|
@@ -1,5 +1,5 @@
|
|
1
1
|
optimum/rbln/__init__.py,sha256=qJJTumXhoFnawXGpeGJbAm4J4A9FFwD1SQ2MqcKDXoM,14436
|
2
|
-
optimum/rbln/__version__.py,sha256=
|
2
|
+
optimum/rbln/__version__.py,sha256=hdBV0MOKkAsGp6FVqyauDmHCC6gC0y_cyykn1_s49sg,519
|
3
3
|
optimum/rbln/configuration_utils.py,sha256=o5oer7fBdE-MHLGNXoP35FjmuQbMmjEIDv0QE_k3kpo,32336
|
4
4
|
optimum/rbln/modeling.py,sha256=ZlJ_tOCWiFjDIlwJ_B_HOCO0kBduWrBAbW9VSEVIAFg,12088
|
5
5
|
optimum/rbln/modeling_base.py,sha256=5fUb1FaxfjApzJIkT8-SrPhuygGo_1Uc0i7UedawOeE,23393
|
@@ -29,7 +29,7 @@ optimum/rbln/diffusers/models/transformers/__init__.py,sha256=V8rSR7WzHs-i8Cwb_M
|
|
29
29
|
optimum/rbln/diffusers/models/transformers/prior_transformer.py,sha256=SWoeVK--BRMwuXVABNVtonmzJDusx0iz4Q3EAvJ9uN8,5395
|
30
30
|
optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=yF7sS0QvawowpV9hR5GeT8DaE8CCp3mj1njHHd9cKTc,6630
|
31
31
|
optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
|
32
|
-
optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=
|
32
|
+
optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=v3WS9EGKROE_QClXrxC7rmRko1BspAvAbeIfh83LK88,15832
|
33
33
|
optimum/rbln/diffusers/pipelines/__init__.py,sha256=5KLZ5LrpMzBya2e_3_PvEoPwG24U8JMexfw_ygZREKc,3140
|
34
34
|
optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
|
35
35
|
optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=3S9dogIHW8Bqg5kIlCudhCQG-4g3FcdOPEWhBOf7CJA,4059
|
@@ -88,7 +88,7 @@ optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=0u1JTlO47qoH_-qxWG
|
|
88
88
|
optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=vQYZDDdoddwA7yKc5zzrq2Zs9sax-0p8rNF_aYfF4bk,1006
|
89
89
|
optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py,sha256=cakn8RGo8gS3nmXdEqOfC2xUBOMGInROgLEbCOoLFR0,13398
|
90
90
|
optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=YAn8J_lIq4IS-HM_gbi5Qov8_osxhWtBr5z_28QRbGM,49667
|
91
|
-
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=
|
91
|
+
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=J8eJqg56vPcYnnSP3EYU6X1e5BvdRdPyRcWWlcHzK1c,53256
|
92
92
|
optimum/rbln/transformers/models/distilbert/__init__.py,sha256=zXL78SOEORTnUN_wrdoaDaYpntG8lcFHvPobM6jC0CI,841
|
93
93
|
optimum/rbln/transformers/models/distilbert/configuration_distilbert.py,sha256=qfxCk15hW47i1oO8dCo-xntSbKyW-WOu30h28rIw6eA,766
|
94
94
|
optimum/rbln/transformers/models/distilbert/modeling_distilbert.py,sha256=_Ubhbvrhi7jBC5uS9ITstIAE5VJVwAuDwvQ_Hrr6Ny4,797
|
@@ -106,7 +106,7 @@ optimum/rbln/transformers/models/gemma/modeling_gemma.py,sha256=Ojvum34EhDHWfMB4
|
|
106
106
|
optimum/rbln/transformers/models/gemma3/__init__.py,sha256=6rugk3615SEt4lh7gduo_J9VyGiSReuEIvL0Uno0eaI,790
|
107
107
|
optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=eupMGTHJGJNNrAZ3GE6M6GQBAQzBb7KFJvalyDmbM-A,3063
|
108
108
|
optimum/rbln/transformers/models/gemma3/gemma3_architecture.py,sha256=sgFQQbvEr15tb2Sxk_tgcgQFcjhKGbNSW6fm2u7-Vck,8609
|
109
|
-
optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=
|
109
|
+
optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=IMrDtY2o-kzDQd3VG5qx_I8HejSxqGPDV2EqTjzrrfM,38220
|
110
110
|
optimum/rbln/transformers/models/gpt2/__init__.py,sha256=socBMIBZSiLbrVN12rQ4nL9gFeT0axMgz6SWaCaD4Ac,704
|
111
111
|
optimum/rbln/transformers/models/gpt2/configuration_gpt2.py,sha256=9sS6-EGapmow3rG9ViejK9qwrqy_X86VBxQ7u9x0Yqk,923
|
112
112
|
optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=pnGgixjgjW7HULbs5211cC2guw_4e4-MlS69vdCRMMg,3206
|
@@ -143,8 +143,8 @@ optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=OKd7SXQLLtzPVolr
|
|
143
143
|
optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
|
144
144
|
optimum/rbln/transformers/models/qwen2_5_vl/__init__.py,sha256=rAW3DKQUzGL6EMwa5r1iLu94yhpiZpk6zfoD7TtYXrc,865
|
145
145
|
optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py,sha256=U3ngIfkA58itqQZqTf-gbISMPoV7ipDttI7V2uwK_18,4155
|
146
|
-
optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=
|
147
|
-
optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py,sha256=
|
146
|
+
optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=Q4U-avMkby-CunNXEERqvRZx9duC5i-6UmfF1376ciU,26336
|
147
|
+
optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py,sha256=PAQz__9o_f5phlozhhXAB8JErBlS1jc4FYZkZkSYJuI,7312
|
148
148
|
optimum/rbln/transformers/models/resnet/__init__.py,sha256=0QqtEQF1IMYgEmmfXMGarCDS8kJB5tzODfwTEzDVZRg,837
|
149
149
|
optimum/rbln/transformers/models/resnet/configuration_resnet.py,sha256=KQd887jgNOl_Am3b407P2OvKtzkkeBS1cEhCfiN0tJg,769
|
150
150
|
optimum/rbln/transformers/models/resnet/modeling_resnet.py,sha256=E8vg3Rw_KsHt6vaOg0ungZD7sXe0T4OMP0X8NFG1EXI,816
|
@@ -191,7 +191,7 @@ optimum/rbln/utils/model_utils.py,sha256=4k5879Kh75m3x_vS4-qOGfqsOiAvc2kdNFFfvsF
|
|
191
191
|
optimum/rbln/utils/runtime_utils.py,sha256=LoKNK3AQNV_BSScstIZWjICkJf265MnUgy360BOocVI,5454
|
192
192
|
optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
|
193
193
|
optimum/rbln/utils/submodule.py,sha256=w5mgPgncI740gVKMu3S-69DGNdUSI0bTZxegQGcZ98Y,5011
|
194
|
-
optimum_rbln-0.8.
|
195
|
-
optimum_rbln-0.8.
|
196
|
-
optimum_rbln-0.8.
|
197
|
-
optimum_rbln-0.8.
|
194
|
+
optimum_rbln-0.8.1a4.dist-info/METADATA,sha256=jo7yVVPhX8QJJK0WE1x2ReG_VbuNiyhAkAPj9Um90A8,5299
|
195
|
+
optimum_rbln-0.8.1a4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
196
|
+
optimum_rbln-0.8.1a4.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
197
|
+
optimum_rbln-0.8.1a4.dist-info/RECORD,,
|
File without changes
|
File without changes
|