optimum-rbln 0.7.2rc1__py3-none-any.whl → 0.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. optimum/rbln/__init__.py +8 -0
  2. optimum/rbln/__version__.py +9 -4
  3. optimum/rbln/diffusers/__init__.py +8 -0
  4. optimum/rbln/diffusers/modeling_diffusers.py +103 -109
  5. optimum/rbln/diffusers/models/autoencoders/vq_model.py +11 -3
  6. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +15 -8
  7. optimum/rbln/diffusers/pipelines/__init__.py +8 -0
  8. optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +7 -1
  9. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +25 -0
  10. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +107 -1
  11. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +25 -0
  12. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +3 -0
  13. optimum/rbln/modeling.py +4 -1
  14. optimum/rbln/modeling_base.py +16 -3
  15. optimum/rbln/ops/__init__.py +6 -2
  16. optimum/rbln/ops/attn.py +94 -85
  17. optimum/rbln/ops/flash_attn.py +46 -25
  18. optimum/rbln/ops/kv_cache_update.py +4 -4
  19. optimum/rbln/transformers/modeling_generic.py +3 -3
  20. optimum/rbln/transformers/models/bart/bart_architecture.py +10 -6
  21. optimum/rbln/transformers/models/bart/modeling_bart.py +6 -2
  22. optimum/rbln/transformers/models/bert/modeling_bert.py +1 -1
  23. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +264 -133
  24. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +276 -29
  25. optimum/rbln/transformers/models/exaone/exaone_architecture.py +11 -4
  26. optimum/rbln/transformers/models/gemma/gemma_architecture.py +11 -4
  27. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +5 -3
  28. optimum/rbln/transformers/models/midm/midm_architecture.py +5 -3
  29. optimum/rbln/transformers/models/phi/phi_architecture.py +9 -7
  30. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +50 -13
  31. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +60 -36
  32. optimum/rbln/transformers/models/t5/modeling_t5.py +3 -1
  33. optimum/rbln/transformers/models/t5/t5_architecture.py +65 -3
  34. optimum/rbln/transformers/models/whisper/whisper_architecture.py +26 -36
  35. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +1 -14
  36. optimum/rbln/utils/import_utils.py +7 -0
  37. {optimum_rbln-0.7.2rc1.dist-info → optimum_rbln-0.7.3.dist-info}/METADATA +1 -1
  38. {optimum_rbln-0.7.2rc1.dist-info → optimum_rbln-0.7.3.dist-info}/RECORD +40 -38
  39. {optimum_rbln-0.7.2rc1.dist-info → optimum_rbln-0.7.3.dist-info}/WHEEL +0 -0
  40. {optimum_rbln-0.7.2rc1.dist-info → optimum_rbln-0.7.3.dist-info}/licenses/LICENSE +0 -0
@@ -25,7 +25,7 @@ from transformers.modeling_outputs import (
25
25
  )
26
26
  from transformers.utils import logging
27
27
 
28
- from ....ops import register_rbln_custom_cache_update
28
+ from ....ops import register_rbln_custom_add_softmax_attention, register_rbln_custom_cache_update
29
29
 
30
30
 
31
31
  logger = logging.get_logger(__name__)
@@ -34,6 +34,7 @@ logger = logging.get_logger(__name__)
34
34
  class WhisperWrapper:
35
35
  def __init__(self, model, rbln_token_timestamps):
36
36
  register_rbln_custom_cache_update()
37
+ register_rbln_custom_add_softmax_attention()
37
38
  self.encoder = WhisperEncoderWrapper(model)
38
39
  self.decoder = WhisperDecoderWrapper(model, output_attentions=rbln_token_timestamps)
39
40
 
@@ -77,9 +78,9 @@ class WhisperEncoderWrapper(torch.nn.Module):
77
78
  # 3. update cross_attention's past_key_value to the device-dram for optimization.
78
79
  bidx = torch.tensor(0, dtype=torch.int16)
79
80
  axis = torch.tensor(1, dtype=torch.int16)
80
- cross_key_values = torch.ops.rbln_custom_ops.rbln_cache_update(cross_key_values, cross_kv, bidx, axis)
81
+ enc_output = torch.ops.rbln_custom_ops.rbln_cache_update(cross_key_values, cross_kv, bidx, axis)
81
82
 
82
- return cross_key_values
83
+ return enc_output
83
84
 
84
85
 
85
86
  class WhisperDecoderWrapper(torch.nn.Module):
@@ -118,7 +119,7 @@ class WhisperDecoderWrapper(torch.nn.Module):
118
119
  cross_past_key_values = cross_past_key_values + ((cross_kv_cache[i], cross_kv_cache[i + 1]),)
119
120
 
120
121
  # Decode
121
- sequence_output, self_present_key_values, cross_attentions = self.decoder(
122
+ sequence_output, cross_attentions = self.decoder(
122
123
  input_ids=decoder_input_ids,
123
124
  attention_mask=decoder_attention_mask,
124
125
  cache_position=cache_position,
@@ -127,9 +128,7 @@ class WhisperDecoderWrapper(torch.nn.Module):
127
128
  )
128
129
 
129
130
  lm_logits = self.proj_out(sequence_output)
130
-
131
131
  outputs = (lm_logits,)
132
- outputs += self_present_key_values
133
132
 
134
133
  if self.output_attentions:
135
134
  # deocder's cross attention is used for token_timestamps
@@ -167,26 +166,23 @@ class WhisperDecoder(nn.Module):
167
166
  # prepare casual_attn_mask
168
167
  attention_mask = _prepare_4d_causal_attention_mask(attention_mask, input_shape, inputs_embeds, cache_position)
169
168
 
170
- self_present_key_values = ()
171
169
  cross_attentions = ()
172
170
  # iterate decoder_layer
173
171
  for self_past_key_value, cross_past_key_value, decoder_layer in zip(
174
172
  self_past_key_values, cross_past_key_values, self.layers
175
173
  ):
176
- layer_outputs = decoder_layer(
174
+ hidden_states, cross_attn_weights = decoder_layer(
177
175
  hidden_states,
178
176
  attention_mask=attention_mask,
179
177
  self_past_key_value=self_past_key_value,
180
178
  cross_past_key_value=cross_past_key_value,
181
179
  cache_position=cache_position,
182
180
  )
183
- hidden_states = layer_outputs[0]
184
- self_present_key_values += layer_outputs[1]
185
- cross_attentions += (layer_outputs[2],)
181
+ cross_attentions += (cross_attn_weights,)
186
182
 
187
183
  hidden_states = self.layer_norm(hidden_states)
188
184
 
189
- return hidden_states, self_present_key_values, cross_attentions
185
+ return hidden_states, cross_attentions
190
186
 
191
187
 
192
188
  class WhisperDecoderLayer(nn.Module):
@@ -213,7 +209,7 @@ class WhisperDecoderLayer(nn.Module):
213
209
  # Self Attention Block
214
210
  residual = hidden_states
215
211
  hidden_states = self.self_attn_layer_norm(hidden_states)
216
- hidden_states, _, self_present_key_value = self.self_attn(
212
+ hidden_states = self.self_attn(
217
213
  hidden_states=hidden_states,
218
214
  past_key_value=self_past_key_value,
219
215
  attention_mask=attention_mask,
@@ -224,7 +220,7 @@ class WhisperDecoderLayer(nn.Module):
224
220
  # Cross-Attention Block
225
221
  residual = hidden_states
226
222
  hidden_states = self.encoder_attn_layer_norm(hidden_states)
227
- hidden_states, cross_attn_weights, cross_present_key_value = self.encoder_attn(
223
+ hidden_states, cross_attn_weights = self.encoder_attn(
228
224
  hidden_states=hidden_states,
229
225
  past_key_value=cross_past_key_value,
230
226
  )
@@ -237,7 +233,7 @@ class WhisperDecoderLayer(nn.Module):
237
233
  hidden_states = self.fc2(hidden_states)
238
234
  hidden_states = residual + hidden_states
239
235
 
240
- return hidden_states, self_present_key_value, cross_attn_weights
236
+ return hidden_states, cross_attn_weights
241
237
 
242
238
 
243
239
  class WhisperAttention(nn.Module):
@@ -258,19 +254,8 @@ class WhisperAttention(nn.Module):
258
254
 
259
255
 
260
256
  class WhisperSelfAttention(WhisperAttention):
261
- def rbln_cache_update(
262
- self,
263
- past_key_value: torch.Tensor,
264
- key_states: torch.Tensor,
265
- value_states: torch.Tensor,
266
- cache_position: torch.Tensor,
267
- ):
268
- s_idx = torch.tensor(cache_position, dtype=torch.int16)
269
- axis = torch.tensor(2, dtype=torch.int16)
270
-
271
- key_states = torch.ops.rbln_custom_ops.rbln_cache_update(past_key_value[0], key_states, s_idx, axis)
272
- value_states = torch.ops.rbln_custom_ops.rbln_cache_update(past_key_value[1], value_states, s_idx, axis)
273
- return key_states, value_states
257
+ def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int) -> torch.Tensor:
258
+ return tensor.view(bsz, seq_len, 1, self.num_heads, self.head_dim).transpose(1, 3)
274
259
 
275
260
  def forward(
276
261
  self,
@@ -285,22 +270,27 @@ class WhisperSelfAttention(WhisperAttention):
285
270
 
286
271
  key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
287
272
  value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
288
- key_states, value_states = self.rbln_cache_update(past_key_value, key_states, value_states, cache_position)
289
273
 
290
- attn_weights = torch.matmul(query_states, key_states.transpose(2, 3))
291
- attn_weights = attn_weights + attention_mask
292
- attn_weights = nn.functional.softmax(attn_weights, dim=-1)
274
+ attn_output = torch.ops.rbln_custom_ops.add_softmax_attn_decode(
275
+ query_states,
276
+ key_states,
277
+ value_states,
278
+ attention_mask.unsqueeze(2),
279
+ past_key_value[0].view(bsz, self.num_heads, 1, -1, self.head_dim),
280
+ past_key_value[1].view(bsz, self.num_heads, 1, -1, self.head_dim),
281
+ cache_position.expand(bsz, 1),
282
+ torch.tensor(1.0, dtype=torch.float32), # scale
283
+ )
293
284
 
294
- attn_output = torch.matmul(attn_weights, value_states)
295
285
  attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim)
296
286
  attn_output = attn_output.transpose(1, 2)
297
287
  attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
298
288
  attn_output = self.out_proj(attn_output)
299
289
 
300
- return attn_output, attn_weights, (key_states, value_states)
290
+ return attn_output
301
291
 
302
292
 
303
- class WhisperCrossAttention(WhisperSelfAttention):
293
+ class WhisperCrossAttention(WhisperAttention):
304
294
  def forward(
305
295
  self,
306
296
  hidden_states: torch.Tensor,
@@ -322,4 +312,4 @@ class WhisperCrossAttention(WhisperSelfAttention):
322
312
  attn_output = attn_output.reshape(batch_size, query_len, self.embed_dim)
323
313
  attn_output = self.out_proj(attn_output)
324
314
 
325
- return attn_output, attn_weights, (key_states, value_states)
315
+ return attn_output, attn_weights
@@ -15,7 +15,6 @@
15
15
  import inspect
16
16
  from typing import TYPE_CHECKING, Optional, Union
17
17
 
18
- import torch
19
18
  from transformers import PretrainedConfig
20
19
 
21
20
  from ....modeling import RBLNModel
@@ -58,7 +57,7 @@ class RBLNXLMRobertaModel(RBLNModel):
58
57
  if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
59
58
  raise ValueError("`rbln_enc_max_seq_len` should be less or equal than max_position_embeddings!")
60
59
 
61
- signature_params = inspect.signature(cls.hf_class.forward).parameters.keys()
60
+ signature_params = inspect.signature(cls.get_hf_class().forward).parameters.keys()
62
61
 
63
62
  if rbln_model_input_names is None:
64
63
  for tokenizer in preprocessors:
@@ -99,15 +98,3 @@ class RBLNXLMRobertaModel(RBLNModel):
99
98
  )
100
99
  rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
101
100
  return rbln_config
102
-
103
- def forward(
104
- self,
105
- input_ids: "torch.Tensor",
106
- attention_mask: "torch.Tensor",
107
- token_type_ids: "torch.Tensor" = None,
108
- **kwargs,
109
- ):
110
- if token_type_ids is None:
111
- token_type_ids = torch.zeros_like(input=input_ids, dtype=torch.int64)
112
- output = super().forward(input_ids, attention_mask, token_type_ids)
113
- return output
@@ -28,6 +28,13 @@ class VersionCompat:
28
28
 
29
29
 
30
30
  RBLN_VERSION_COMPATS = {
31
+ "0.7.2": [
32
+ VersionCompat(
33
+ package_name="rebel-compiler",
34
+ min_version="0.7.2",
35
+ max_version="0.7.3",
36
+ ),
37
+ ],
31
38
  "0.2.0": [
32
39
  VersionCompat(
33
40
  package_name="rebel-compiler",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.7.2rc1
3
+ Version: 0.7.3
4
4
  Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -1,31 +1,33 @@
1
- optimum/rbln/__init__.py,sha256=eHi15YM3989AcX52jka9rUmgAtlp1PHqMNwBEdOfuu8,6554
2
- optimum/rbln/__version__.py,sha256=TkyQ16Pgq_vGa41fqX3hjKTP9w2uZMVmlSqV9zpFgkU,414
3
- optimum/rbln/modeling.py,sha256=REImAAKO82CqSNABR-9E1jJEsWch9amSOwOOQhFEYLY,8283
4
- optimum/rbln/modeling_base.py,sha256=fQ0bI1Bb6GJquRXftmSSN9K-TXLhFltZJ6C-2w43xMg,21193
1
+ optimum/rbln/__init__.py,sha256=ZDzXcl-oAcYJhKjJMpotjbTih9awo7HzUb6T3MUEP6Q,6894
2
+ optimum/rbln/__version__.py,sha256=HuvheJ09dqzSRYh_uHb_Af7PEi7XiVW5Tk0cjtnAa6I,511
3
+ optimum/rbln/modeling.py,sha256=nJsAs5zs--VVOYGFjYNpqfxYIemJIK4Lr0WEzlDLdP0,8390
4
+ optimum/rbln/modeling_base.py,sha256=dNCL-BhrWCpuOVkZaj8-MW567Tf4lLo3p3Z3ldjWJfU,21779
5
5
  optimum/rbln/modeling_config.py,sha256=7104bxmrvKW4Q6XTruQayiIGl8GHDFmPkJ3cknMIInE,11335
6
- optimum/rbln/diffusers/__init__.py,sha256=pOyoXv3-JRzTBSwPKbgLS9H6F2K9dJdReEmpGhcLQYU,3283
7
- optimum/rbln/diffusers/modeling_diffusers.py,sha256=2m5gXEF7vYWzu3z7BxdDqOJ6i1aakDfWJov66h76noI,20733
6
+ optimum/rbln/diffusers/__init__.py,sha256=Hq87CbtiCy85YmK2SB-OmUyfv77oe3j4bsTenTRnu6w,3623
7
+ optimum/rbln/diffusers/modeling_diffusers.py,sha256=IS6Mlgexofap7f9Lefk5cKFP7ejSG_oWN3v2PX9_IDQ,20118
8
8
  optimum/rbln/diffusers/models/__init__.py,sha256=mkCvJyH1KcwrsUvYSq_bVC79oOfyqtBSFDyPS1_48wA,1478
9
9
  optimum/rbln/diffusers/models/controlnet.py,sha256=EM_HlzCdaZdnnK0oGpY2fQeigPqHhlwh4NHCzlmoumI,10512
10
10
  optimum/rbln/diffusers/models/autoencoders/__init__.py,sha256=dg17ZTUsiqTcbIaEE4fqew9uRbao0diQ21PXvRKIqKg,679
11
11
  optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py,sha256=rCbC32bJnfXtsLdVvNVVHpRAkCYy6jeCSwIZ-JSReWk,9220
12
12
  optimum/rbln/diffusers/models/autoencoders/vae.py,sha256=gB9HR7Bf7wpIXLv-Js4Pc3oyWRlqEe4cms4sI2AJicY,4380
13
- optimum/rbln/diffusers/models/autoencoders/vq_model.py,sha256=GunIau02_-lodYZBzd0ktJSNRT5axEFIZxSAfj2Mlyo,5974
13
+ optimum/rbln/diffusers/models/autoencoders/vq_model.py,sha256=b36QqPbayjApKivceQVVyQxHyR1ZOZ1ffuGgdALEPTQ,6117
14
14
  optimum/rbln/diffusers/models/transformers/__init__.py,sha256=V8rSR7WzHs-i8Cwb_MNxhY2NFbwPgxu24vGtkwl-6tk,706
15
15
  optimum/rbln/diffusers/models/transformers/prior_transformer.py,sha256=VG9cQo-_eppDvQSW1q1euAGBt1socUHetN_fIN2u1iU,6169
16
16
  optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=n_krmMgiRxWrG--567PNpk58EG_X7x7H4gidIkRvwjo,7308
17
17
  optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
18
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=xHnBzFrm7aNaolxrsotbjo9GkbNiNdTleXQoeqGLlhg,15540
19
- optimum/rbln/diffusers/pipelines/__init__.py,sha256=DAsM4eNks3hEY-bsUKSxRKmgwUWDGDlw82gfplSOdO8,2800
18
+ optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=QIjVWQQf8KBn5rU7lvipdm3gNBxZl7l6HCAj7p5FjLU,15977
19
+ optimum/rbln/diffusers/pipelines/__init__.py,sha256=5KLZ5LrpMzBya2e_3_PvEoPwG24U8JMexfw_ygZREKc,3140
20
20
  optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
21
21
  optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=JWKtnZYBIfgmbAo0SLFIvHBQCv2BPSFNvpcdjG4GUOY,4113
22
22
  optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py,sha256=dGdw5cwJLS4CLv6IHskk5ZCcPgS7UDuHKbfOZ8ojNUs,35187
23
23
  optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py,sha256=7xCiXrH4ToCTHohVGFXqO7_f9G8HShYaHgZxoMZARkQ,33664
24
24
  optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py,sha256=Gzt2wg4dgFg0TV3Bu0cs8Xru3wVrxWUxxgciwZ-QKLE,44755
25
25
  optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py,sha256=RfwxNX_zQWFtvvFQJ5bt3qtHbdYdQV_3XLHm9WYCKOs,46084
26
- optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py,sha256=YFqA76_XiMNxPwqotbHug2kd7jCbOXOu5NlxG2hbaVs,808
27
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py,sha256=9szfe1NvOr1mgDnSPZvBGq1b65RElUrqLVhuErY3Dmw,2962
28
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py,sha256=WxBbHAZSAKDSWhFerPvUlIhhWEsejW4NmhwmWX-_b54,856
26
+ optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py,sha256=I4YQq2HfA3xONbWsdJ870IEJPyLWeCDDG-UCJsu9YO8,1035
27
+ optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py,sha256=aNFGOjth8tDvPrjYLbRWrkHr6p-8AFgcQx1Qay1fw70,904
28
+ optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py,sha256=BVXOpdrezWVTCibpuAMu9KkD5oEQUY00cSqm6dFbTnk,7020
29
+ optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py,sha256=fEs-WgJqWs5zvuCkKb7MuZokH9Mi6q-0DOEKxzfWxzo,932
30
+ optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py,sha256=Ad2ZYCXaMiYpB0mz-8X1CGhILxrVbt7rRIXt6IPwYBM,932
29
31
  optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py,sha256=Mf7tzrXetwCgt7LuXfkX-CX1hltLgNZdwF9bHxAbDJM,874
30
32
  optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py,sha256=gz6CbP4T6w8XH3PIGRIJXTmKFsChJIkwcAEAsiR5Ydg,830
31
33
  optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py,sha256=DgRLzO9HxtgE1jICmHoHaqeVXM4Ih-5uo2JqNMAPMcc,876
@@ -39,38 +41,38 @@ optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py,sha256=9iIMZYvp
39
41
  optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py,sha256=OvB5bxX6HUiqJeIc3uukuEmUXYEx1pTqGNOtdG2l1m8,902
40
42
  optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py,sha256=3aB1Rw-OgKytQOHwOaShbEvq_XVHPOGvsGm8pstEmKU,930
41
43
  optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py,sha256=MzVP1wscaO1sUIiBIPJqG6zuGyez9VUbA42-JSIm-mk,930
42
- optimum/rbln/ops/__init__.py,sha256=MbAHevg59fXQOFgrnsFFZ97s8-YrgvYCYML_sgKEEfM,816
43
- optimum/rbln/ops/attn.py,sha256=QYvSMg4ps_PenHwpLVhuYRoOAFvHIo19nY0ZEdj4nTE,9700
44
- optimum/rbln/ops/flash_attn.py,sha256=Zn5nkouY3kk6MBivQpPjgGh4oepjpi8F3tnTrmrNfpg,2304
45
- optimum/rbln/ops/kv_cache_update.py,sha256=9W4WCO1Dtfy0u5i978JJRa7uLbqrfR2lHuoPynb07fw,3143
44
+ optimum/rbln/ops/__init__.py,sha256=TxOmsN0u3PmyK4Sb89qbiC4rePOlkvUT7Lm6wVoTnY0,941
45
+ optimum/rbln/ops/attn.py,sha256=3EqU63Oj4zI4rLbkRycorsscXeD-IpKzt9N1MhkMa5o,10374
46
+ optimum/rbln/ops/flash_attn.py,sha256=wfyiCxDGf034IngzwRU160R7_DlKYpd-uWT0BDEGFks,3408
47
+ optimum/rbln/ops/kv_cache_update.py,sha256=pxf8kAptPaQF5xE8qItvmlFOq_sgim6ZERD7AVaOtec,3221
46
48
  optimum/rbln/transformers/__init__.py,sha256=AGo3BqVIZrsOzYsQAnnQ25HCstTPBclrXbvvUxVMlqE,4255
47
49
  optimum/rbln/transformers/modeling_alias.py,sha256=yx7FnZQWAnrWzivaO5hI7T6i-fyLzt2tMIXG2oDNbPo,1657
48
- optimum/rbln/transformers/modeling_generic.py,sha256=SD7XjpjnCn-ejNAUWgkaaHV6Fv2Y6K-hbXEXXb9W_H4,18177
50
+ optimum/rbln/transformers/modeling_generic.py,sha256=aaZWsqVDCRvH03q-Wen7DMfLr7Gy-u-I0mTw0aYqWjk,18195
49
51
  optimum/rbln/transformers/modeling_rope_utils.py,sha256=3zwkhYUyTZhxCJUSmwCc88iiY1TppRWEY9ShwUqNB2k,14293
50
52
  optimum/rbln/transformers/models/__init__.py,sha256=zGnYODR-_T65tv6jFjtC8l01LC4vjfm41bM4doCXRvY,3835
51
53
  optimum/rbln/transformers/models/auto/__init__.py,sha256=GvGbb3ZpMv-h6euXeZ42jSizoOfrL2O1uvpAnfKxYEo,1034
52
54
  optimum/rbln/transformers/models/auto/auto_factory.py,sha256=IK9jFrJ3EEzYQa9_aKpcp2TO68M5YGkA-HcfBVpA2QU,7027
53
55
  optimum/rbln/transformers/models/auto/modeling_auto.py,sha256=Un9qoqdy3dO8JBza_bTJF_6_fRVNM9QisihSgTRFI-o,3933
54
56
  optimum/rbln/transformers/models/bart/__init__.py,sha256=32HPe0_GIO0hp9U464Iv6Jd7M-1nop9g8hA1UZMHhyw,674
55
- optimum/rbln/transformers/models/bart/bart_architecture.py,sha256=dTkgMpNkyh4vT_mZU5tQ5bvH_lRZfRjaJ1gIHvJkmgs,5479
56
- optimum/rbln/transformers/models/bart/modeling_bart.py,sha256=ADRbE-5N3xJ60AzzjJ4BZs_THmB71qs4XTr9iFqsEqE,5667
57
+ optimum/rbln/transformers/models/bart/bart_architecture.py,sha256=Oo-Cdne7igKEex8wwP-gztKJHgs5GLHQjK1oc3IZIDE,5801
58
+ optimum/rbln/transformers/models/bart/modeling_bart.py,sha256=6IpWXlBCd02v66KF77oEWfrv8-FnPBYjjjL_8KZL3Ow,5835
57
59
  optimum/rbln/transformers/models/bert/__init__.py,sha256=YVV7k_laU6yJBawZrgjIWjRmIF-Y4oQQHqyf8lsraQs,691
58
- optimum/rbln/transformers/models/bert/modeling_bert.py,sha256=-nv-sgmHkyHQIoQvF8-lXOJiL4eaa1pq8MpdN4uRi9M,4668
60
+ optimum/rbln/transformers/models/bert/modeling_bert.py,sha256=p3utRqf3dv9_RkHwaMCa1EfXttNJkqCJUIZo3CeZ9YY,4674
59
61
  optimum/rbln/transformers/models/clip/__init__.py,sha256=H9vuBwrmFO0-CqZhXUrKF-uQL6igCqMlqrT1X_ELaAI,754
60
62
  optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=NiSm7bHs4SReHDUr53BBWSX0Y8bkKOeUSpsBDrp8YDw,6628
61
63
  optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=pDogsdpJKKB5rqnVFrRjwfhUvOSV-jZ3oARMsqSvOOQ,665
62
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=eT1fbKDL92yGBXtUKA_JibD4kiRPdf3tAFJHP5nlfH4,36646
63
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=2OO8MEgFgcl1VPrQXxqkvmRJJEuFdexwu8XqbHDbR6Y,27609
64
+ optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=m93-qKN7NMw3i0XDmFmttmRIRK4np_fWtLFlBb2RFgU,41351
65
+ optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=uGdPGcFrWm2gAwFLjfBiALwFsl49VGCReVi4NUfOPxM,38898
64
66
  optimum/rbln/transformers/models/dpt/__init__.py,sha256=gP1tkR3XMNlHq1GT87ugIVvb2o_1eAUg1JaniXjy1Lw,651
65
67
  optimum/rbln/transformers/models/dpt/modeling_dpt.py,sha256=ZsS2SOiqcA4azULB-WFEMQZbgIoOyVUKqVKqrw_tWzA,3430
66
68
  optimum/rbln/transformers/models/exaone/__init__.py,sha256=zYH_5tVa8-juEdsOIky7I33WSC3Zuhoq1upI0OHYeVw,859
67
- optimum/rbln/transformers/models/exaone/exaone_architecture.py,sha256=thzWLVz3eUcst4IPiOavta5QeXZw7JQwwfdIzQ_x6Ns,3029
69
+ optimum/rbln/transformers/models/exaone/exaone_architecture.py,sha256=ZM5vvz8KBipOiMVi8vqfvejkDSknW69xh4GrvJix-g0,3350
68
70
  optimum/rbln/transformers/models/exaone/modeling_exaone.py,sha256=WjyH8PmsMljSea7kJn_Cq1FJ96OXwXAoU7hv2Q8zUnI,1747
69
71
  optimum/rbln/transformers/models/gemma/__init__.py,sha256=7qUrekuBwCI9a6_Fq6j--FzCirRtUDz3ApY17mQS5Y4,648
70
- optimum/rbln/transformers/models/gemma/gemma_architecture.py,sha256=TvPWphXWG1snpnKakxQWUQZuST2GGJhCrRdUN1FLSIk,1965
72
+ optimum/rbln/transformers/models/gemma/gemma_architecture.py,sha256=bmCx405FVcffhgrQ53qMMZDbSlPxWOjucMHbvq19Gnw,2286
71
73
  optimum/rbln/transformers/models/gemma/modeling_gemma.py,sha256=-U3w3cEOv3ps1S8aL7uOq6Kq2siCPZz7Z8MXhDQgQqo,1530
72
74
  optimum/rbln/transformers/models/gpt2/__init__.py,sha256=UwwPPYVTB9ywDWy314L2bNL0i7wfkQFA71qjgXicEPg,646
73
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=sa4rU6mtLwD9q97EnBPwfPvhN7ZvG44xFtIs4inGL7o,2866
75
+ optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=1IxqHmB-GlH2Dv2Yk4z0rMxL9CpxMGHhSu_x8_4cxvs,3008
74
76
  optimum/rbln/transformers/models/gpt2/modeling_gpt2.py,sha256=qBDanUk_O-HtOIVCA4IE3FYyCsnL9xIDK00vft-0caw,1490
75
77
  optimum/rbln/transformers/models/llama/__init__.py,sha256=jo_j_eIrHYGNEhR5lb6g3r5sO0ewe3fm2TWO8mLrT58,648
76
78
  optimum/rbln/transformers/models/llama/llama_architecture.py,sha256=S7MCPfyjG5eUqgaS-QNBB0ApUD6wnb5fR0RHq7k7-pA,728
@@ -78,43 +80,43 @@ optimum/rbln/transformers/models/llama/modeling_llama.py,sha256=Z3iony7icoFhRQ11
78
80
  optimum/rbln/transformers/models/llava_next/__init__.py,sha256=VLieyWm-UgvuNxw9B38wrL1Jsa09NBDX_ebABmdpTbs,670
79
81
  optimum/rbln/transformers/models/llava_next/modeling_llava_next.py,sha256=w_plsUOzxnhkQBhQeUqW9aJqGCvCvLtsx0XNKYjOprU,26203
80
82
  optimum/rbln/transformers/models/midm/__init__.py,sha256=UJSaErsF-z6dZERIS143WTaygffZyzEGqoQ2ZPDiM-c,855
81
- optimum/rbln/transformers/models/midm/midm_architecture.py,sha256=mueRmMGX6UplZb0C0RFdUOa9lsNH8YJHV6rYrDLOdlQ,5302
83
+ optimum/rbln/transformers/models/midm/midm_architecture.py,sha256=357iviqQkzI0s_lU_teH1sVOChNRDUABe3GA0HuhZZY,5444
82
84
  optimum/rbln/transformers/models/midm/modeling_midm.py,sha256=GG25BozEZriAL-OPFGpzOjyDtSFB-NfeiLJTDAqxe20,1734
83
85
  optimum/rbln/transformers/models/mistral/__init__.py,sha256=jpGdNtRLoV7WmuYpRGVXR27BTC8RIi_nhmvYlxuhqRc,652
84
86
  optimum/rbln/transformers/models/mistral/mistral_architecture.py,sha256=_aU8TE_tdvfo0K7QpgTlz_d0qwk4O82dl9268lPL16E,733
85
87
  optimum/rbln/transformers/models/mistral/modeling_mistral.py,sha256=7nrddoBIHf8S12LZWBUpotnvG3gND11vMQda9yYXJ-s,1560
86
88
  optimum/rbln/transformers/models/phi/__init__.py,sha256=mZLt1M7BbYEvSon5UlkniMUPa15SfjZFdw0kMSAF3VA,644
87
89
  optimum/rbln/transformers/models/phi/modeling_phi.py,sha256=j-6Pqd5rR2JE8I1pnKFlCi4nW5Dv3wZjoPWxohissoo,1516
88
- optimum/rbln/transformers/models/phi/phi_architecture.py,sha256=v1MVf9YGxSV1jJ2w81IVvoz3vxjangZJH5rOZX02pPw,3996
90
+ optimum/rbln/transformers/models/phi/phi_architecture.py,sha256=TueyqmjPXWmOPOxBm4dIFyd0X3iV1jgw0U6c26iCAPk,4090
89
91
  optimum/rbln/transformers/models/qwen2/__init__.py,sha256=RAMWc21W_2I6DH9xBjeNxPECmAcTrbKhSIefq3Lass0,648
90
92
  optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=9-aFDvjMzPNUyGOz0qo33RE18bUFGYZ3Wt_68zb5uJY,1530
91
93
  optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
92
94
  optimum/rbln/transformers/models/seq2seq/__init__.py,sha256=EmEMV4rOYqKyruX85d0fR73-b8N6BSD6CPcbpYdBuVk,651
93
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py,sha256=HG_-8ufRWIls67imU1547V0bk9FUWC0haOBL7eyRV6k,16365
94
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py,sha256=_TL4-vpjM9lfRnQUXRFm3mtVdz_h5B23k01uc_XnW5I,18376
95
+ optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py,sha256=9Pf9Y86ABDfhwIenlZqYfgqjbyFmtKBiPnbCD7zxw4M,18017
96
+ optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py,sha256=tvzacIZam1sIr_1BvvZ_fDr8u5dXAiYiynFdX9tArtY,18877
95
97
  optimum/rbln/transformers/models/t5/__init__.py,sha256=1skR1RmnG62WTAP3-F5P1x-V_ReFhMyirH3u56vWwvc,675
96
- optimum/rbln/transformers/models/t5/modeling_t5.py,sha256=MFs-3yYviV1QqSpsTB2GarTEs9wGH5AYofksLQLMBXg,8043
97
- optimum/rbln/transformers/models/t5/t5_architecture.py,sha256=kkjErS42mW2jv5O_xL7BaKobvvqy7BGmYOowKyHakvI,7189
98
+ optimum/rbln/transformers/models/t5/modeling_t5.py,sha256=8PAhPlYT1dmpcWM7hUMmZV9lPd4d75CuMuFen1pzr3Q,8088
99
+ optimum/rbln/transformers/models/t5/t5_architecture.py,sha256=AArCQhZRETVM583wlIRzMFOSYq7t2nzxaAeyhZxyxKk,9508
98
100
  optimum/rbln/transformers/models/wav2vec2/__init__.py,sha256=YpgA0K-vyg9veh0eL_jxauosbRpb_kpGKHvvQLBspKM,649
99
101
  optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py,sha256=JYJmV52j6cBwim4RanVJryfKnV80V96ol0A-oR6o7cg,3856
100
102
  optimum/rbln/transformers/models/whisper/__init__.py,sha256=ktnNe5ri3ycCWZ_W_voFB9y9-vgGgxS1X9s8LBRZmWc,665
101
103
  optimum/rbln/transformers/models/whisper/generation_whisper.py,sha256=GIHTca3b1VtW81kp7BzKQ7f77c2t9OsEsbZetripgDo,4582
102
104
  optimum/rbln/transformers/models/whisper/modeling_whisper.py,sha256=0nBADNxE0A1ozBbRutTBvxpo_Y1qkOycT_zronkN-ZU,15840
103
- optimum/rbln/transformers/models/whisper/whisper_architecture.py,sha256=eP3UgkwCRaaFjc5Jc4ZEiWxr3-L7oJx9KzpJ7eFkwUs,13158
105
+ optimum/rbln/transformers/models/whisper/whisper_architecture.py,sha256=Yn6yFpmw6IQbWlnpIMAdEUsNF6huXgaKzGMUZbhSLdo,12572
104
106
  optimum/rbln/transformers/models/xlm_roberta/__init__.py,sha256=fC7iNcdxBZ_6eOF2snStmf8r2M3c8O_-XcXnQEaHQCE,653
105
- optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py,sha256=lKSeL3RUwIyfuca2jZ6SFV4N59EJS4UD59JMUfh3BiA,4767
107
+ optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py,sha256=8YNLz0bc5ze-QuU8rN-QhUfGzlSUs3iMJiWTxO3o6AM,4366
106
108
  optimum/rbln/transformers/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
109
  optimum/rbln/transformers/utils/rbln_quantization.py,sha256=gwBVHf97sQgPNmGa0wq87E8mPyrtXYhMnO4X4sKp3c8,7639
108
110
  optimum/rbln/utils/__init__.py,sha256=ieDBT2VFTt2E0M4v_POLBpuGW9LxSydpb_DuPd6PQqc,712
109
111
  optimum/rbln/utils/decorator_utils.py,sha256=xu-TrsNi33SRC2a7DBsyoo6-pEQxWKZPZSmM9QlDe2Y,3745
110
112
  optimum/rbln/utils/hub.py,sha256=bNmOJGEO9Jfux4Cg8Xli-898I4mxk20KuwQOhP0Zs1U,4198
111
- optimum/rbln/utils/import_utils.py,sha256=ec-tISKIjUPHIfjzj6p-w78NVejWVBohb59f7J-HJvQ,4059
113
+ optimum/rbln/utils/import_utils.py,sha256=n4HcvZPzFW2ytl45qJ4ZQYlrRSoOb0-nnqhyT2_JA8M,4224
112
114
  optimum/rbln/utils/logging.py,sha256=VKKBmlQSdg6iZCGmAXaWYiW67K84jyp1QJhLQSSjPPE,3453
113
115
  optimum/rbln/utils/model_utils.py,sha256=DfD_Z2qvZHqcddXqnzTM1AN8khanj3-DXK2lJvVxDvs,1278
114
116
  optimum/rbln/utils/runtime_utils.py,sha256=5-DYniyP59nx-mrrbi7AqA77L85b4Cm5oLpaxidSyss,3699
115
117
  optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
116
118
  optimum/rbln/utils/submodule.py,sha256=oZoGrItB8WqY4i-K9WJPlLlcLohc1YGB9OHB8_XZw3A,4071
117
- optimum_rbln-0.7.2rc1.dist-info/METADATA,sha256=s8trW-wJbj_RzACjxvDgwVIwy4DxeTkdY7MB3QLjmhc,5301
118
- optimum_rbln-0.7.2rc1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
119
- optimum_rbln-0.7.2rc1.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
120
- optimum_rbln-0.7.2rc1.dist-info/RECORD,,
119
+ optimum_rbln-0.7.3.dist-info/METADATA,sha256=XUq3lQDHCViSK9l_txn0h0psqls1nDN4ZFFJGGpifZ0,5298
120
+ optimum_rbln-0.7.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
121
+ optimum_rbln-0.7.3.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
122
+ optimum_rbln-0.7.3.dist-info/RECORD,,