optimum-rbln 0.7.2rc2__py3-none-any.whl → 0.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +8 -0
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/diffusers/__init__.py +8 -0
- optimum/rbln/diffusers/modeling_diffusers.py +103 -117
- optimum/rbln/diffusers/models/autoencoders/vq_model.py +11 -3
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +15 -8
- optimum/rbln/diffusers/pipelines/__init__.py +8 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +7 -1
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +25 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +107 -1
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +25 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +3 -0
- optimum/rbln/modeling.py +4 -1
- optimum/rbln/modeling_base.py +16 -3
- optimum/rbln/ops/__init__.py +6 -2
- optimum/rbln/ops/attn.py +94 -85
- optimum/rbln/ops/flash_attn.py +46 -25
- optimum/rbln/ops/kv_cache_update.py +4 -4
- optimum/rbln/transformers/modeling_generic.py +3 -3
- optimum/rbln/transformers/models/bart/bart_architecture.py +10 -6
- optimum/rbln/transformers/models/bart/modeling_bart.py +6 -2
- optimum/rbln/transformers/models/bert/modeling_bert.py +1 -1
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +264 -133
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +276 -29
- optimum/rbln/transformers/models/exaone/exaone_architecture.py +11 -4
- optimum/rbln/transformers/models/gemma/gemma_architecture.py +11 -4
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +5 -3
- optimum/rbln/transformers/models/midm/midm_architecture.py +5 -3
- optimum/rbln/transformers/models/phi/phi_architecture.py +9 -7
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +50 -13
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +60 -36
- optimum/rbln/transformers/models/t5/modeling_t5.py +3 -1
- optimum/rbln/transformers/models/t5/t5_architecture.py +65 -3
- optimum/rbln/transformers/models/whisper/whisper_architecture.py +26 -36
- optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +1 -14
- optimum/rbln/utils/import_utils.py +7 -0
- {optimum_rbln-0.7.2rc2.dist-info → optimum_rbln-0.7.3.dist-info}/METADATA +1 -1
- {optimum_rbln-0.7.2rc2.dist-info → optimum_rbln-0.7.3.dist-info}/RECORD +40 -38
- {optimum_rbln-0.7.2rc2.dist-info → optimum_rbln-0.7.3.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.7.2rc2.dist-info → optimum_rbln-0.7.3.dist-info}/licenses/LICENSE +0 -0
@@ -25,7 +25,7 @@ from transformers.modeling_outputs import (
|
|
25
25
|
)
|
26
26
|
from transformers.utils import logging
|
27
27
|
|
28
|
-
from ....ops import register_rbln_custom_cache_update
|
28
|
+
from ....ops import register_rbln_custom_add_softmax_attention, register_rbln_custom_cache_update
|
29
29
|
|
30
30
|
|
31
31
|
logger = logging.get_logger(__name__)
|
@@ -34,6 +34,7 @@ logger = logging.get_logger(__name__)
|
|
34
34
|
class WhisperWrapper:
|
35
35
|
def __init__(self, model, rbln_token_timestamps):
|
36
36
|
register_rbln_custom_cache_update()
|
37
|
+
register_rbln_custom_add_softmax_attention()
|
37
38
|
self.encoder = WhisperEncoderWrapper(model)
|
38
39
|
self.decoder = WhisperDecoderWrapper(model, output_attentions=rbln_token_timestamps)
|
39
40
|
|
@@ -77,9 +78,9 @@ class WhisperEncoderWrapper(torch.nn.Module):
|
|
77
78
|
# 3. update cross_attention's past_key_value to the device-dram for optimization.
|
78
79
|
bidx = torch.tensor(0, dtype=torch.int16)
|
79
80
|
axis = torch.tensor(1, dtype=torch.int16)
|
80
|
-
|
81
|
+
enc_output = torch.ops.rbln_custom_ops.rbln_cache_update(cross_key_values, cross_kv, bidx, axis)
|
81
82
|
|
82
|
-
return
|
83
|
+
return enc_output
|
83
84
|
|
84
85
|
|
85
86
|
class WhisperDecoderWrapper(torch.nn.Module):
|
@@ -118,7 +119,7 @@ class WhisperDecoderWrapper(torch.nn.Module):
|
|
118
119
|
cross_past_key_values = cross_past_key_values + ((cross_kv_cache[i], cross_kv_cache[i + 1]),)
|
119
120
|
|
120
121
|
# Decode
|
121
|
-
sequence_output,
|
122
|
+
sequence_output, cross_attentions = self.decoder(
|
122
123
|
input_ids=decoder_input_ids,
|
123
124
|
attention_mask=decoder_attention_mask,
|
124
125
|
cache_position=cache_position,
|
@@ -127,9 +128,7 @@ class WhisperDecoderWrapper(torch.nn.Module):
|
|
127
128
|
)
|
128
129
|
|
129
130
|
lm_logits = self.proj_out(sequence_output)
|
130
|
-
|
131
131
|
outputs = (lm_logits,)
|
132
|
-
outputs += self_present_key_values
|
133
132
|
|
134
133
|
if self.output_attentions:
|
135
134
|
# deocder's cross attention is used for token_timestamps
|
@@ -167,26 +166,23 @@ class WhisperDecoder(nn.Module):
|
|
167
166
|
# prepare casual_attn_mask
|
168
167
|
attention_mask = _prepare_4d_causal_attention_mask(attention_mask, input_shape, inputs_embeds, cache_position)
|
169
168
|
|
170
|
-
self_present_key_values = ()
|
171
169
|
cross_attentions = ()
|
172
170
|
# iterate decoder_layer
|
173
171
|
for self_past_key_value, cross_past_key_value, decoder_layer in zip(
|
174
172
|
self_past_key_values, cross_past_key_values, self.layers
|
175
173
|
):
|
176
|
-
|
174
|
+
hidden_states, cross_attn_weights = decoder_layer(
|
177
175
|
hidden_states,
|
178
176
|
attention_mask=attention_mask,
|
179
177
|
self_past_key_value=self_past_key_value,
|
180
178
|
cross_past_key_value=cross_past_key_value,
|
181
179
|
cache_position=cache_position,
|
182
180
|
)
|
183
|
-
|
184
|
-
self_present_key_values += layer_outputs[1]
|
185
|
-
cross_attentions += (layer_outputs[2],)
|
181
|
+
cross_attentions += (cross_attn_weights,)
|
186
182
|
|
187
183
|
hidden_states = self.layer_norm(hidden_states)
|
188
184
|
|
189
|
-
return hidden_states,
|
185
|
+
return hidden_states, cross_attentions
|
190
186
|
|
191
187
|
|
192
188
|
class WhisperDecoderLayer(nn.Module):
|
@@ -213,7 +209,7 @@ class WhisperDecoderLayer(nn.Module):
|
|
213
209
|
# Self Attention Block
|
214
210
|
residual = hidden_states
|
215
211
|
hidden_states = self.self_attn_layer_norm(hidden_states)
|
216
|
-
hidden_states
|
212
|
+
hidden_states = self.self_attn(
|
217
213
|
hidden_states=hidden_states,
|
218
214
|
past_key_value=self_past_key_value,
|
219
215
|
attention_mask=attention_mask,
|
@@ -224,7 +220,7 @@ class WhisperDecoderLayer(nn.Module):
|
|
224
220
|
# Cross-Attention Block
|
225
221
|
residual = hidden_states
|
226
222
|
hidden_states = self.encoder_attn_layer_norm(hidden_states)
|
227
|
-
hidden_states, cross_attn_weights
|
223
|
+
hidden_states, cross_attn_weights = self.encoder_attn(
|
228
224
|
hidden_states=hidden_states,
|
229
225
|
past_key_value=cross_past_key_value,
|
230
226
|
)
|
@@ -237,7 +233,7 @@ class WhisperDecoderLayer(nn.Module):
|
|
237
233
|
hidden_states = self.fc2(hidden_states)
|
238
234
|
hidden_states = residual + hidden_states
|
239
235
|
|
240
|
-
return hidden_states,
|
236
|
+
return hidden_states, cross_attn_weights
|
241
237
|
|
242
238
|
|
243
239
|
class WhisperAttention(nn.Module):
|
@@ -258,19 +254,8 @@ class WhisperAttention(nn.Module):
|
|
258
254
|
|
259
255
|
|
260
256
|
class WhisperSelfAttention(WhisperAttention):
|
261
|
-
def
|
262
|
-
self,
|
263
|
-
past_key_value: torch.Tensor,
|
264
|
-
key_states: torch.Tensor,
|
265
|
-
value_states: torch.Tensor,
|
266
|
-
cache_position: torch.Tensor,
|
267
|
-
):
|
268
|
-
s_idx = torch.tensor(cache_position, dtype=torch.int16)
|
269
|
-
axis = torch.tensor(2, dtype=torch.int16)
|
270
|
-
|
271
|
-
key_states = torch.ops.rbln_custom_ops.rbln_cache_update(past_key_value[0], key_states, s_idx, axis)
|
272
|
-
value_states = torch.ops.rbln_custom_ops.rbln_cache_update(past_key_value[1], value_states, s_idx, axis)
|
273
|
-
return key_states, value_states
|
257
|
+
def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int) -> torch.Tensor:
|
258
|
+
return tensor.view(bsz, seq_len, 1, self.num_heads, self.head_dim).transpose(1, 3)
|
274
259
|
|
275
260
|
def forward(
|
276
261
|
self,
|
@@ -285,22 +270,27 @@ class WhisperSelfAttention(WhisperAttention):
|
|
285
270
|
|
286
271
|
key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
|
287
272
|
value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
|
288
|
-
key_states, value_states = self.rbln_cache_update(past_key_value, key_states, value_states, cache_position)
|
289
273
|
|
290
|
-
|
291
|
-
|
292
|
-
|
274
|
+
attn_output = torch.ops.rbln_custom_ops.add_softmax_attn_decode(
|
275
|
+
query_states,
|
276
|
+
key_states,
|
277
|
+
value_states,
|
278
|
+
attention_mask.unsqueeze(2),
|
279
|
+
past_key_value[0].view(bsz, self.num_heads, 1, -1, self.head_dim),
|
280
|
+
past_key_value[1].view(bsz, self.num_heads, 1, -1, self.head_dim),
|
281
|
+
cache_position.expand(bsz, 1),
|
282
|
+
torch.tensor(1.0, dtype=torch.float32), # scale
|
283
|
+
)
|
293
284
|
|
294
|
-
attn_output = torch.matmul(attn_weights, value_states)
|
295
285
|
attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim)
|
296
286
|
attn_output = attn_output.transpose(1, 2)
|
297
287
|
attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
|
298
288
|
attn_output = self.out_proj(attn_output)
|
299
289
|
|
300
|
-
return attn_output
|
290
|
+
return attn_output
|
301
291
|
|
302
292
|
|
303
|
-
class WhisperCrossAttention(
|
293
|
+
class WhisperCrossAttention(WhisperAttention):
|
304
294
|
def forward(
|
305
295
|
self,
|
306
296
|
hidden_states: torch.Tensor,
|
@@ -322,4 +312,4 @@ class WhisperCrossAttention(WhisperSelfAttention):
|
|
322
312
|
attn_output = attn_output.reshape(batch_size, query_len, self.embed_dim)
|
323
313
|
attn_output = self.out_proj(attn_output)
|
324
314
|
|
325
|
-
return attn_output, attn_weights
|
315
|
+
return attn_output, attn_weights
|
@@ -15,7 +15,6 @@
|
|
15
15
|
import inspect
|
16
16
|
from typing import TYPE_CHECKING, Optional, Union
|
17
17
|
|
18
|
-
import torch
|
19
18
|
from transformers import PretrainedConfig
|
20
19
|
|
21
20
|
from ....modeling import RBLNModel
|
@@ -58,7 +57,7 @@ class RBLNXLMRobertaModel(RBLNModel):
|
|
58
57
|
if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
|
59
58
|
raise ValueError("`rbln_enc_max_seq_len` should be less or equal than max_position_embeddings!")
|
60
59
|
|
61
|
-
signature_params = inspect.signature(cls.
|
60
|
+
signature_params = inspect.signature(cls.get_hf_class().forward).parameters.keys()
|
62
61
|
|
63
62
|
if rbln_model_input_names is None:
|
64
63
|
for tokenizer in preprocessors:
|
@@ -99,15 +98,3 @@ class RBLNXLMRobertaModel(RBLNModel):
|
|
99
98
|
)
|
100
99
|
rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
|
101
100
|
return rbln_config
|
102
|
-
|
103
|
-
def forward(
|
104
|
-
self,
|
105
|
-
input_ids: "torch.Tensor",
|
106
|
-
attention_mask: "torch.Tensor",
|
107
|
-
token_type_ids: "torch.Tensor" = None,
|
108
|
-
**kwargs,
|
109
|
-
):
|
110
|
-
if token_type_ids is None:
|
111
|
-
token_type_ids = torch.zeros_like(input=input_ids, dtype=torch.int64)
|
112
|
-
output = super().forward(input_ids, attention_mask, token_type_ids)
|
113
|
-
return output
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: optimum-rbln
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.3
|
4
4
|
Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
|
5
5
|
Project-URL: Homepage, https://rebellions.ai
|
6
6
|
Project-URL: Documentation, https://docs.rbln.ai
|
@@ -1,31 +1,33 @@
|
|
1
|
-
optimum/rbln/__init__.py,sha256=
|
2
|
-
optimum/rbln/__version__.py,sha256=
|
3
|
-
optimum/rbln/modeling.py,sha256=
|
4
|
-
optimum/rbln/modeling_base.py,sha256=
|
1
|
+
optimum/rbln/__init__.py,sha256=ZDzXcl-oAcYJhKjJMpotjbTih9awo7HzUb6T3MUEP6Q,6894
|
2
|
+
optimum/rbln/__version__.py,sha256=HuvheJ09dqzSRYh_uHb_Af7PEi7XiVW5Tk0cjtnAa6I,511
|
3
|
+
optimum/rbln/modeling.py,sha256=nJsAs5zs--VVOYGFjYNpqfxYIemJIK4Lr0WEzlDLdP0,8390
|
4
|
+
optimum/rbln/modeling_base.py,sha256=dNCL-BhrWCpuOVkZaj8-MW567Tf4lLo3p3Z3ldjWJfU,21779
|
5
5
|
optimum/rbln/modeling_config.py,sha256=7104bxmrvKW4Q6XTruQayiIGl8GHDFmPkJ3cknMIInE,11335
|
6
|
-
optimum/rbln/diffusers/__init__.py,sha256=
|
7
|
-
optimum/rbln/diffusers/modeling_diffusers.py,sha256=
|
6
|
+
optimum/rbln/diffusers/__init__.py,sha256=Hq87CbtiCy85YmK2SB-OmUyfv77oe3j4bsTenTRnu6w,3623
|
7
|
+
optimum/rbln/diffusers/modeling_diffusers.py,sha256=IS6Mlgexofap7f9Lefk5cKFP7ejSG_oWN3v2PX9_IDQ,20118
|
8
8
|
optimum/rbln/diffusers/models/__init__.py,sha256=mkCvJyH1KcwrsUvYSq_bVC79oOfyqtBSFDyPS1_48wA,1478
|
9
9
|
optimum/rbln/diffusers/models/controlnet.py,sha256=EM_HlzCdaZdnnK0oGpY2fQeigPqHhlwh4NHCzlmoumI,10512
|
10
10
|
optimum/rbln/diffusers/models/autoencoders/__init__.py,sha256=dg17ZTUsiqTcbIaEE4fqew9uRbao0diQ21PXvRKIqKg,679
|
11
11
|
optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py,sha256=rCbC32bJnfXtsLdVvNVVHpRAkCYy6jeCSwIZ-JSReWk,9220
|
12
12
|
optimum/rbln/diffusers/models/autoencoders/vae.py,sha256=gB9HR7Bf7wpIXLv-Js4Pc3oyWRlqEe4cms4sI2AJicY,4380
|
13
|
-
optimum/rbln/diffusers/models/autoencoders/vq_model.py,sha256=
|
13
|
+
optimum/rbln/diffusers/models/autoencoders/vq_model.py,sha256=b36QqPbayjApKivceQVVyQxHyR1ZOZ1ffuGgdALEPTQ,6117
|
14
14
|
optimum/rbln/diffusers/models/transformers/__init__.py,sha256=V8rSR7WzHs-i8Cwb_MNxhY2NFbwPgxu24vGtkwl-6tk,706
|
15
15
|
optimum/rbln/diffusers/models/transformers/prior_transformer.py,sha256=VG9cQo-_eppDvQSW1q1euAGBt1socUHetN_fIN2u1iU,6169
|
16
16
|
optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=n_krmMgiRxWrG--567PNpk58EG_X7x7H4gidIkRvwjo,7308
|
17
17
|
optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
|
18
|
-
optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=
|
19
|
-
optimum/rbln/diffusers/pipelines/__init__.py,sha256=
|
18
|
+
optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=QIjVWQQf8KBn5rU7lvipdm3gNBxZl7l6HCAj7p5FjLU,15977
|
19
|
+
optimum/rbln/diffusers/pipelines/__init__.py,sha256=5KLZ5LrpMzBya2e_3_PvEoPwG24U8JMexfw_ygZREKc,3140
|
20
20
|
optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
|
21
21
|
optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=JWKtnZYBIfgmbAo0SLFIvHBQCv2BPSFNvpcdjG4GUOY,4113
|
22
22
|
optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py,sha256=dGdw5cwJLS4CLv6IHskk5ZCcPgS7UDuHKbfOZ8ojNUs,35187
|
23
23
|
optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py,sha256=7xCiXrH4ToCTHohVGFXqO7_f9G8HShYaHgZxoMZARkQ,33664
|
24
24
|
optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py,sha256=Gzt2wg4dgFg0TV3Bu0cs8Xru3wVrxWUxxgciwZ-QKLE,44755
|
25
25
|
optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py,sha256=RfwxNX_zQWFtvvFQJ5bt3qtHbdYdQV_3XLHm9WYCKOs,46084
|
26
|
-
optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py,sha256=
|
27
|
-
optimum/rbln/diffusers/pipelines/kandinsky2_2/
|
28
|
-
optimum/rbln/diffusers/pipelines/kandinsky2_2/
|
26
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py,sha256=I4YQq2HfA3xONbWsdJ870IEJPyLWeCDDG-UCJsu9YO8,1035
|
27
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py,sha256=aNFGOjth8tDvPrjYLbRWrkHr6p-8AFgcQx1Qay1fw70,904
|
28
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py,sha256=BVXOpdrezWVTCibpuAMu9KkD5oEQUY00cSqm6dFbTnk,7020
|
29
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py,sha256=fEs-WgJqWs5zvuCkKb7MuZokH9Mi6q-0DOEKxzfWxzo,932
|
30
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py,sha256=Ad2ZYCXaMiYpB0mz-8X1CGhILxrVbt7rRIXt6IPwYBM,932
|
29
31
|
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py,sha256=Mf7tzrXetwCgt7LuXfkX-CX1hltLgNZdwF9bHxAbDJM,874
|
30
32
|
optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py,sha256=gz6CbP4T6w8XH3PIGRIJXTmKFsChJIkwcAEAsiR5Ydg,830
|
31
33
|
optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py,sha256=DgRLzO9HxtgE1jICmHoHaqeVXM4Ih-5uo2JqNMAPMcc,876
|
@@ -39,38 +41,38 @@ optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py,sha256=9iIMZYvp
|
|
39
41
|
optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py,sha256=OvB5bxX6HUiqJeIc3uukuEmUXYEx1pTqGNOtdG2l1m8,902
|
40
42
|
optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py,sha256=3aB1Rw-OgKytQOHwOaShbEvq_XVHPOGvsGm8pstEmKU,930
|
41
43
|
optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py,sha256=MzVP1wscaO1sUIiBIPJqG6zuGyez9VUbA42-JSIm-mk,930
|
42
|
-
optimum/rbln/ops/__init__.py,sha256=
|
43
|
-
optimum/rbln/ops/attn.py,sha256=
|
44
|
-
optimum/rbln/ops/flash_attn.py,sha256=
|
45
|
-
optimum/rbln/ops/kv_cache_update.py,sha256=
|
44
|
+
optimum/rbln/ops/__init__.py,sha256=TxOmsN0u3PmyK4Sb89qbiC4rePOlkvUT7Lm6wVoTnY0,941
|
45
|
+
optimum/rbln/ops/attn.py,sha256=3EqU63Oj4zI4rLbkRycorsscXeD-IpKzt9N1MhkMa5o,10374
|
46
|
+
optimum/rbln/ops/flash_attn.py,sha256=wfyiCxDGf034IngzwRU160R7_DlKYpd-uWT0BDEGFks,3408
|
47
|
+
optimum/rbln/ops/kv_cache_update.py,sha256=pxf8kAptPaQF5xE8qItvmlFOq_sgim6ZERD7AVaOtec,3221
|
46
48
|
optimum/rbln/transformers/__init__.py,sha256=AGo3BqVIZrsOzYsQAnnQ25HCstTPBclrXbvvUxVMlqE,4255
|
47
49
|
optimum/rbln/transformers/modeling_alias.py,sha256=yx7FnZQWAnrWzivaO5hI7T6i-fyLzt2tMIXG2oDNbPo,1657
|
48
|
-
optimum/rbln/transformers/modeling_generic.py,sha256=
|
50
|
+
optimum/rbln/transformers/modeling_generic.py,sha256=aaZWsqVDCRvH03q-Wen7DMfLr7Gy-u-I0mTw0aYqWjk,18195
|
49
51
|
optimum/rbln/transformers/modeling_rope_utils.py,sha256=3zwkhYUyTZhxCJUSmwCc88iiY1TppRWEY9ShwUqNB2k,14293
|
50
52
|
optimum/rbln/transformers/models/__init__.py,sha256=zGnYODR-_T65tv6jFjtC8l01LC4vjfm41bM4doCXRvY,3835
|
51
53
|
optimum/rbln/transformers/models/auto/__init__.py,sha256=GvGbb3ZpMv-h6euXeZ42jSizoOfrL2O1uvpAnfKxYEo,1034
|
52
54
|
optimum/rbln/transformers/models/auto/auto_factory.py,sha256=IK9jFrJ3EEzYQa9_aKpcp2TO68M5YGkA-HcfBVpA2QU,7027
|
53
55
|
optimum/rbln/transformers/models/auto/modeling_auto.py,sha256=Un9qoqdy3dO8JBza_bTJF_6_fRVNM9QisihSgTRFI-o,3933
|
54
56
|
optimum/rbln/transformers/models/bart/__init__.py,sha256=32HPe0_GIO0hp9U464Iv6Jd7M-1nop9g8hA1UZMHhyw,674
|
55
|
-
optimum/rbln/transformers/models/bart/bart_architecture.py,sha256=
|
56
|
-
optimum/rbln/transformers/models/bart/modeling_bart.py,sha256=
|
57
|
+
optimum/rbln/transformers/models/bart/bart_architecture.py,sha256=Oo-Cdne7igKEex8wwP-gztKJHgs5GLHQjK1oc3IZIDE,5801
|
58
|
+
optimum/rbln/transformers/models/bart/modeling_bart.py,sha256=6IpWXlBCd02v66KF77oEWfrv8-FnPBYjjjL_8KZL3Ow,5835
|
57
59
|
optimum/rbln/transformers/models/bert/__init__.py,sha256=YVV7k_laU6yJBawZrgjIWjRmIF-Y4oQQHqyf8lsraQs,691
|
58
|
-
optimum/rbln/transformers/models/bert/modeling_bert.py,sha256
|
60
|
+
optimum/rbln/transformers/models/bert/modeling_bert.py,sha256=p3utRqf3dv9_RkHwaMCa1EfXttNJkqCJUIZo3CeZ9YY,4674
|
59
61
|
optimum/rbln/transformers/models/clip/__init__.py,sha256=H9vuBwrmFO0-CqZhXUrKF-uQL6igCqMlqrT1X_ELaAI,754
|
60
62
|
optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=NiSm7bHs4SReHDUr53BBWSX0Y8bkKOeUSpsBDrp8YDw,6628
|
61
63
|
optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=pDogsdpJKKB5rqnVFrRjwfhUvOSV-jZ3oARMsqSvOOQ,665
|
62
|
-
optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=
|
63
|
-
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=
|
64
|
+
optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=m93-qKN7NMw3i0XDmFmttmRIRK4np_fWtLFlBb2RFgU,41351
|
65
|
+
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=uGdPGcFrWm2gAwFLjfBiALwFsl49VGCReVi4NUfOPxM,38898
|
64
66
|
optimum/rbln/transformers/models/dpt/__init__.py,sha256=gP1tkR3XMNlHq1GT87ugIVvb2o_1eAUg1JaniXjy1Lw,651
|
65
67
|
optimum/rbln/transformers/models/dpt/modeling_dpt.py,sha256=ZsS2SOiqcA4azULB-WFEMQZbgIoOyVUKqVKqrw_tWzA,3430
|
66
68
|
optimum/rbln/transformers/models/exaone/__init__.py,sha256=zYH_5tVa8-juEdsOIky7I33WSC3Zuhoq1upI0OHYeVw,859
|
67
|
-
optimum/rbln/transformers/models/exaone/exaone_architecture.py,sha256=
|
69
|
+
optimum/rbln/transformers/models/exaone/exaone_architecture.py,sha256=ZM5vvz8KBipOiMVi8vqfvejkDSknW69xh4GrvJix-g0,3350
|
68
70
|
optimum/rbln/transformers/models/exaone/modeling_exaone.py,sha256=WjyH8PmsMljSea7kJn_Cq1FJ96OXwXAoU7hv2Q8zUnI,1747
|
69
71
|
optimum/rbln/transformers/models/gemma/__init__.py,sha256=7qUrekuBwCI9a6_Fq6j--FzCirRtUDz3ApY17mQS5Y4,648
|
70
|
-
optimum/rbln/transformers/models/gemma/gemma_architecture.py,sha256=
|
72
|
+
optimum/rbln/transformers/models/gemma/gemma_architecture.py,sha256=bmCx405FVcffhgrQ53qMMZDbSlPxWOjucMHbvq19Gnw,2286
|
71
73
|
optimum/rbln/transformers/models/gemma/modeling_gemma.py,sha256=-U3w3cEOv3ps1S8aL7uOq6Kq2siCPZz7Z8MXhDQgQqo,1530
|
72
74
|
optimum/rbln/transformers/models/gpt2/__init__.py,sha256=UwwPPYVTB9ywDWy314L2bNL0i7wfkQFA71qjgXicEPg,646
|
73
|
-
optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=
|
75
|
+
optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=1IxqHmB-GlH2Dv2Yk4z0rMxL9CpxMGHhSu_x8_4cxvs,3008
|
74
76
|
optimum/rbln/transformers/models/gpt2/modeling_gpt2.py,sha256=qBDanUk_O-HtOIVCA4IE3FYyCsnL9xIDK00vft-0caw,1490
|
75
77
|
optimum/rbln/transformers/models/llama/__init__.py,sha256=jo_j_eIrHYGNEhR5lb6g3r5sO0ewe3fm2TWO8mLrT58,648
|
76
78
|
optimum/rbln/transformers/models/llama/llama_architecture.py,sha256=S7MCPfyjG5eUqgaS-QNBB0ApUD6wnb5fR0RHq7k7-pA,728
|
@@ -78,43 +80,43 @@ optimum/rbln/transformers/models/llama/modeling_llama.py,sha256=Z3iony7icoFhRQ11
|
|
78
80
|
optimum/rbln/transformers/models/llava_next/__init__.py,sha256=VLieyWm-UgvuNxw9B38wrL1Jsa09NBDX_ebABmdpTbs,670
|
79
81
|
optimum/rbln/transformers/models/llava_next/modeling_llava_next.py,sha256=w_plsUOzxnhkQBhQeUqW9aJqGCvCvLtsx0XNKYjOprU,26203
|
80
82
|
optimum/rbln/transformers/models/midm/__init__.py,sha256=UJSaErsF-z6dZERIS143WTaygffZyzEGqoQ2ZPDiM-c,855
|
81
|
-
optimum/rbln/transformers/models/midm/midm_architecture.py,sha256=
|
83
|
+
optimum/rbln/transformers/models/midm/midm_architecture.py,sha256=357iviqQkzI0s_lU_teH1sVOChNRDUABe3GA0HuhZZY,5444
|
82
84
|
optimum/rbln/transformers/models/midm/modeling_midm.py,sha256=GG25BozEZriAL-OPFGpzOjyDtSFB-NfeiLJTDAqxe20,1734
|
83
85
|
optimum/rbln/transformers/models/mistral/__init__.py,sha256=jpGdNtRLoV7WmuYpRGVXR27BTC8RIi_nhmvYlxuhqRc,652
|
84
86
|
optimum/rbln/transformers/models/mistral/mistral_architecture.py,sha256=_aU8TE_tdvfo0K7QpgTlz_d0qwk4O82dl9268lPL16E,733
|
85
87
|
optimum/rbln/transformers/models/mistral/modeling_mistral.py,sha256=7nrddoBIHf8S12LZWBUpotnvG3gND11vMQda9yYXJ-s,1560
|
86
88
|
optimum/rbln/transformers/models/phi/__init__.py,sha256=mZLt1M7BbYEvSon5UlkniMUPa15SfjZFdw0kMSAF3VA,644
|
87
89
|
optimum/rbln/transformers/models/phi/modeling_phi.py,sha256=j-6Pqd5rR2JE8I1pnKFlCi4nW5Dv3wZjoPWxohissoo,1516
|
88
|
-
optimum/rbln/transformers/models/phi/phi_architecture.py,sha256=
|
90
|
+
optimum/rbln/transformers/models/phi/phi_architecture.py,sha256=TueyqmjPXWmOPOxBm4dIFyd0X3iV1jgw0U6c26iCAPk,4090
|
89
91
|
optimum/rbln/transformers/models/qwen2/__init__.py,sha256=RAMWc21W_2I6DH9xBjeNxPECmAcTrbKhSIefq3Lass0,648
|
90
92
|
optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=9-aFDvjMzPNUyGOz0qo33RE18bUFGYZ3Wt_68zb5uJY,1530
|
91
93
|
optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
|
92
94
|
optimum/rbln/transformers/models/seq2seq/__init__.py,sha256=EmEMV4rOYqKyruX85d0fR73-b8N6BSD6CPcbpYdBuVk,651
|
93
|
-
optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py,sha256=
|
94
|
-
optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py,sha256=
|
95
|
+
optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py,sha256=9Pf9Y86ABDfhwIenlZqYfgqjbyFmtKBiPnbCD7zxw4M,18017
|
96
|
+
optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py,sha256=tvzacIZam1sIr_1BvvZ_fDr8u5dXAiYiynFdX9tArtY,18877
|
95
97
|
optimum/rbln/transformers/models/t5/__init__.py,sha256=1skR1RmnG62WTAP3-F5P1x-V_ReFhMyirH3u56vWwvc,675
|
96
|
-
optimum/rbln/transformers/models/t5/modeling_t5.py,sha256=
|
97
|
-
optimum/rbln/transformers/models/t5/t5_architecture.py,sha256=
|
98
|
+
optimum/rbln/transformers/models/t5/modeling_t5.py,sha256=8PAhPlYT1dmpcWM7hUMmZV9lPd4d75CuMuFen1pzr3Q,8088
|
99
|
+
optimum/rbln/transformers/models/t5/t5_architecture.py,sha256=AArCQhZRETVM583wlIRzMFOSYq7t2nzxaAeyhZxyxKk,9508
|
98
100
|
optimum/rbln/transformers/models/wav2vec2/__init__.py,sha256=YpgA0K-vyg9veh0eL_jxauosbRpb_kpGKHvvQLBspKM,649
|
99
101
|
optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py,sha256=JYJmV52j6cBwim4RanVJryfKnV80V96ol0A-oR6o7cg,3856
|
100
102
|
optimum/rbln/transformers/models/whisper/__init__.py,sha256=ktnNe5ri3ycCWZ_W_voFB9y9-vgGgxS1X9s8LBRZmWc,665
|
101
103
|
optimum/rbln/transformers/models/whisper/generation_whisper.py,sha256=GIHTca3b1VtW81kp7BzKQ7f77c2t9OsEsbZetripgDo,4582
|
102
104
|
optimum/rbln/transformers/models/whisper/modeling_whisper.py,sha256=0nBADNxE0A1ozBbRutTBvxpo_Y1qkOycT_zronkN-ZU,15840
|
103
|
-
optimum/rbln/transformers/models/whisper/whisper_architecture.py,sha256=
|
105
|
+
optimum/rbln/transformers/models/whisper/whisper_architecture.py,sha256=Yn6yFpmw6IQbWlnpIMAdEUsNF6huXgaKzGMUZbhSLdo,12572
|
104
106
|
optimum/rbln/transformers/models/xlm_roberta/__init__.py,sha256=fC7iNcdxBZ_6eOF2snStmf8r2M3c8O_-XcXnQEaHQCE,653
|
105
|
-
optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py,sha256=
|
107
|
+
optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py,sha256=8YNLz0bc5ze-QuU8rN-QhUfGzlSUs3iMJiWTxO3o6AM,4366
|
106
108
|
optimum/rbln/transformers/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
107
109
|
optimum/rbln/transformers/utils/rbln_quantization.py,sha256=gwBVHf97sQgPNmGa0wq87E8mPyrtXYhMnO4X4sKp3c8,7639
|
108
110
|
optimum/rbln/utils/__init__.py,sha256=ieDBT2VFTt2E0M4v_POLBpuGW9LxSydpb_DuPd6PQqc,712
|
109
111
|
optimum/rbln/utils/decorator_utils.py,sha256=xu-TrsNi33SRC2a7DBsyoo6-pEQxWKZPZSmM9QlDe2Y,3745
|
110
112
|
optimum/rbln/utils/hub.py,sha256=bNmOJGEO9Jfux4Cg8Xli-898I4mxk20KuwQOhP0Zs1U,4198
|
111
|
-
optimum/rbln/utils/import_utils.py,sha256=
|
113
|
+
optimum/rbln/utils/import_utils.py,sha256=n4HcvZPzFW2ytl45qJ4ZQYlrRSoOb0-nnqhyT2_JA8M,4224
|
112
114
|
optimum/rbln/utils/logging.py,sha256=VKKBmlQSdg6iZCGmAXaWYiW67K84jyp1QJhLQSSjPPE,3453
|
113
115
|
optimum/rbln/utils/model_utils.py,sha256=DfD_Z2qvZHqcddXqnzTM1AN8khanj3-DXK2lJvVxDvs,1278
|
114
116
|
optimum/rbln/utils/runtime_utils.py,sha256=5-DYniyP59nx-mrrbi7AqA77L85b4Cm5oLpaxidSyss,3699
|
115
117
|
optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
|
116
118
|
optimum/rbln/utils/submodule.py,sha256=oZoGrItB8WqY4i-K9WJPlLlcLohc1YGB9OHB8_XZw3A,4071
|
117
|
-
optimum_rbln-0.7.
|
118
|
-
optimum_rbln-0.7.
|
119
|
-
optimum_rbln-0.7.
|
120
|
-
optimum_rbln-0.7.
|
119
|
+
optimum_rbln-0.7.3.dist-info/METADATA,sha256=XUq3lQDHCViSK9l_txn0h0psqls1nDN4ZFFJGGpifZ0,5298
|
120
|
+
optimum_rbln-0.7.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
121
|
+
optimum_rbln-0.7.3.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
122
|
+
optimum_rbln-0.7.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|