optimum-rbln 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. optimum/rbln/__init__.py +17 -0
  2. optimum/rbln/__version__.py +1 -1
  3. optimum/rbln/diffusers/__init__.py +0 -1
  4. optimum/rbln/diffusers/models/autoencoder_kl.py +3 -3
  5. optimum/rbln/diffusers/models/controlnet.py +7 -3
  6. optimum/rbln/diffusers/models/unet_2d_condition.py +5 -5
  7. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +23 -146
  8. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +107 -59
  9. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +106 -54
  10. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +130 -71
  11. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +131 -72
  12. optimum/rbln/modeling_alias.py +19 -1
  13. optimum/rbln/modeling_base.py +162 -18
  14. optimum/rbln/transformers/__init__.py +8 -0
  15. optimum/rbln/transformers/cache_utils.py +111 -0
  16. optimum/rbln/transformers/generation/utils.py +0 -2
  17. optimum/rbln/transformers/models/__init__.py +3 -0
  18. optimum/rbln/transformers/models/bart/bart_architecture.py +0 -5
  19. optimum/rbln/transformers/models/clip/modeling_clip.py +1 -1
  20. optimum/rbln/transformers/models/decoderonly/__init__.py +36 -0
  21. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +516 -0
  22. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +464 -0
  23. optimum/rbln/transformers/models/gemma/__init__.py +24 -0
  24. optimum/rbln/transformers/models/gemma/gemma_architecture.py +123 -0
  25. optimum/rbln/transformers/models/gemma/modeling_gemma.py +67 -0
  26. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +201 -166
  27. optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +10 -257
  28. optimum/rbln/transformers/models/llama/llama_architecture.py +3 -610
  29. optimum/rbln/transformers/models/llama/modeling_llama.py +12 -440
  30. optimum/rbln/transformers/models/midm/hf_hub_cached/midm_bitext_tokenization.py +2 -1
  31. optimum/rbln/transformers/models/midm/hf_hub_cached/modeling_midm.py +0 -4
  32. optimum/rbln/transformers/models/midm/midm_architecture.py +160 -357
  33. optimum/rbln/transformers/models/midm/modeling_midm.py +10 -325
  34. optimum/rbln/transformers/models/mistral/__init__.py +24 -0
  35. optimum/rbln/transformers/models/mistral/mistral_architecture.py +29 -0
  36. optimum/rbln/transformers/models/mistral/modeling_mistral.py +68 -0
  37. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -1
  38. optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -6
  39. optimum/rbln/transformers/models/xlm_roberta/__init__.py +24 -0
  40. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +131 -0
  41. optimum/rbln/transformers/utils/__init__.py +0 -0
  42. optimum/rbln/transformers/utils/rbln_quantization.py +109 -0
  43. optimum/rbln/utils/import_utils.py +1 -4
  44. optimum/rbln/utils/runtime_utils.py +2 -1
  45. {optimum_rbln-0.1.7.dist-info → optimum_rbln-0.1.9.dist-info}/METADATA +11 -5
  46. {optimum_rbln-0.1.7.dist-info → optimum_rbln-0.1.9.dist-info}/RECORD +48 -35
  47. optimum/rbln/transformers/models/llama/llama_architecture_cb.py +0 -764
  48. {optimum_rbln-0.1.7.dist-info → optimum_rbln-0.1.9.dist-info}/WHEEL +0 -0
  49. {optimum_rbln-0.1.7.dist-info → optimum_rbln-0.1.9.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,111 @@
1
+ from typing import Optional, Tuple
2
+
3
+ import torch
4
+ from transformers.cache_utils import DynamicCache
5
+
6
+
7
+ class RebelDynamicCache(DynamicCache):
8
+ """
9
+ A cache that grows dynamically as more tokens are generated. This is the default for generative models.
10
+
11
+ It stores the Key and Value states as a list of tensors, one for each layer. The expected shape for each tensor is
12
+ `[batch_size, num_heads, seq_len, head_dim]`.
13
+ """
14
+
15
+ def __init__(self, current_steps) -> None:
16
+ super().__init__()
17
+ self.current_steps = current_steps
18
+
19
+ def assign(
20
+ self,
21
+ key_states: torch.Tensor,
22
+ value_states: torch.Tensor,
23
+ layer_idx: int,
24
+ ) -> None:
25
+ self.key_cache[layer_idx] = key_states.squeeze(2)
26
+ self.value_cache[layer_idx] = value_states.squeeze(2)
27
+
28
+ def update(
29
+ self,
30
+ key_states: torch.Tensor,
31
+ value_states: torch.Tensor,
32
+ layer_idx: int,
33
+ batch_idx: int,
34
+ read_first_step: Optional[bool] = False,
35
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
36
+ """
37
+ Updates the cache with the new `key_states` and `value_states` for the layer `layer_idx` and the batch 'batch_inx'
38
+ based on self.current_step,
39
+ """
40
+ current_step = self.current_steps[0 if read_first_step else batch_idx]
41
+ kend = current_step + key_states.shape[-2]
42
+ vend = current_step + value_states.shape[-2]
43
+ update_key_states = (
44
+ self.key_cache[layer_idx][batch_idx]
45
+ .unsqueeze(0)
46
+ .unsqueeze(2)
47
+ .slice_scatter(key_states, dim=-2, start=current_step, end=kend)
48
+ )
49
+ update_value_states = (
50
+ self.value_cache[layer_idx][batch_idx]
51
+ .unsqueeze(0)
52
+ .unsqueeze(2)
53
+ .slice_scatter(value_states, dim=-2, start=current_step, end=vend)
54
+ )
55
+
56
+ return update_key_states, update_value_states
57
+
58
+ @classmethod
59
+ def from_input_format(cls, position_ids, num_hidden_layer, *past_key_values) -> "DynamicCache":
60
+ """Converts a cache in the rbln cache format (list of past_kv) into an equivalent `DynamicCache`."""
61
+
62
+ batch, _ = position_ids.shape
63
+ current_steps = [position_ids[b][0] for b in range(batch)]
64
+
65
+ assert len(current_steps) == batch
66
+ cache = cls(current_steps)
67
+
68
+ for layer_idx in range(num_hidden_layer):
69
+ key_states = past_key_values[layer_idx * 2]
70
+ value_states = past_key_values[layer_idx * 2 + 1]
71
+ cache.key_cache.append(key_states)
72
+ cache.value_cache.append(value_states)
73
+
74
+ return cache
75
+
76
+
77
+ class RebelDynamicCache_4D(RebelDynamicCache):
78
+ def assign(
79
+ self,
80
+ keys: torch.Tensor,
81
+ values: torch.Tensor,
82
+ layer_idx: int,
83
+ ) -> None:
84
+ self.key_cache[layer_idx] = keys
85
+ self.value_cache[layer_idx] = values
86
+
87
+ def update(
88
+ self,
89
+ keys: torch.Tensor,
90
+ values: torch.Tensor,
91
+ layer_idx: int,
92
+ batch_idx: int,
93
+ read_first_step: Optional[bool] = False,
94
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
95
+ """
96
+ Updates the cache with the new `keys` and `values` for the layer `layer_idx` and the batch 'batch_inx'
97
+ based on self.current_step,
98
+ """
99
+ current_step = self.current_steps[0 if read_first_step else batch_idx]
100
+ kend = current_step + keys.shape[-2]
101
+ vend = current_step + values.shape[-2]
102
+ update_keys = (
103
+ self.key_cache[layer_idx][batch_idx].unsqueeze(0).slice_scatter(keys, dim=-2, start=current_step, end=kend)
104
+ )
105
+ update_values = (
106
+ self.value_cache[layer_idx][batch_idx]
107
+ .unsqueeze(0)
108
+ .slice_scatter(values, dim=-2, start=current_step, end=vend)
109
+ )
110
+
111
+ return update_keys, update_values
@@ -32,7 +32,6 @@ class RBLNGenerationMixin:
32
32
  generation_config: Optional[GenerationConfig] = None, # thkim change for 4.41.0
33
33
  **model_kwargs,
34
34
  ) -> Union[SampleDecoderOnlyOutput, torch.LongTensor]:
35
-
36
35
  ###################### thkim change for 4.41.0 ############################
37
36
  if generation_config is not None:
38
37
  pad_token_id = generation_config.pad_token_id
@@ -216,7 +215,6 @@ class RBLNGenerationMixin:
216
215
  do_sample: Optional[bool] = True,
217
216
  **model_kwargs,
218
217
  ) -> Union[SampleDecoderOnlyOutput, torch.LongTensor]:
219
-
220
218
  ###################### thkim change for 4.41.0 ############################
221
219
  if generation_config is not None:
222
220
  pad_token_id = generation_config.pad_token_id
@@ -23,8 +23,11 @@
23
23
 
24
24
  from .clip import RBLNCLIPTextModel, RBLNCLIPTextModelWithProjection
25
25
  from .dpt import RBLNDPTForDepthEstimation
26
+ from .gemma import RBLNGemmaForCausalLM
26
27
  from .gpt2 import RBLNGPT2LMHeadModel
27
28
  from .llama import RBLNLlamaForCausalLM
28
29
  from .midm import RBLNMidmLMHeadModel
30
+ from .mistral import RBLNMistralForCausalLM
29
31
  from .wav2vec2 import RBLNWav2Vec2ForCTC
30
32
  from .whisper import RBLNWhisperForConditionalGeneration
33
+ from .xlm_roberta import RBLNXLMRobertaModel
@@ -56,7 +56,6 @@ class _BartAttention(BartAttention):
56
56
  cache_position: torch.Tensor,
57
57
  key_value_states: Optional[torch.Tensor] = None,
58
58
  ) -> Tuple[torch.Tensor, Tuple[torch.Tensor]]:
59
-
60
59
  bsz, tgt_len, _ = hidden_states.size()
61
60
  is_cross_attention = key_value_states is not None
62
61
 
@@ -111,7 +110,6 @@ class _BartSdpaAttention(BartSdpaAttention):
111
110
  cache_position: torch.Tensor,
112
111
  key_value_states: Optional[torch.Tensor] = None,
113
112
  ) -> Tuple[torch.Tensor, Tuple[torch.Tensor]]:
114
-
115
113
  bsz, tgt_len, _ = hidden_states.size()
116
114
  is_cross_attention = key_value_states is not None
117
115
 
@@ -166,7 +164,6 @@ class _BartDecoderLayer(BartDecoderLayer):
166
164
  cache_position: torch.Tensor,
167
165
  attn_impl: str = "eager",
168
166
  ) -> Tuple[torch.Tensor, Tuple[torch.Tensor]]:
169
-
170
167
  # Self Attention Block
171
168
  residual = hidden_states
172
169
  self_attn_past_key_value = past_key_value[:2]
@@ -218,7 +215,6 @@ class _BartDecoder(BartDecoder):
218
215
  cache_position: torch.Tensor,
219
216
  attn_impl: str = "eager",
220
217
  ):
221
-
222
218
  # embedding
223
219
  positions_idx = cache_position + self.embed_positions.offset
224
220
  positions = self.embed_positions.weight[positions_idx]
@@ -284,7 +280,6 @@ class BartDecoderWrapper(torch.nn.Module):
284
280
  self_kv_cache: torch.Tensor,
285
281
  cross_kv_cache: torch.Tensor,
286
282
  ) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor]]:
287
-
288
283
  # prepare past_key_values
289
284
  kv_cache = ()
290
285
  for i in range(0, self.num_layers * 2, 2):
@@ -70,7 +70,7 @@ class RBLNCLIPTextModel(RBLNModel):
70
70
  return rt
71
71
 
72
72
  @classmethod
73
- def wrap_model_if_needed(cls, model: torch.nn.Module) -> torch.nn.Module:
73
+ def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNConfig) -> torch.nn.Module:
74
74
  return _TextEncoder(model).eval()
75
75
 
76
76
  @classmethod
@@ -0,0 +1,36 @@
1
+ # Copyright 2024 Rebellions Inc.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # Portions of this software are licensed under the Apache License,
16
+ # Version 2.0. See the NOTICE file distributed with this work for
17
+ # additional information regarding copyright ownership.
18
+
19
+ # All other portions of this software, including proprietary code,
20
+ # are the intellectual property of Rebellions Inc. and may not be
21
+ # copied, modified, or distributed without prior written permission
22
+ # from Rebellions Inc.
23
+
24
+ from .decoderonly_architecture import (
25
+ DecoderOnlyAttention,
26
+ DecoderOnlyDecoderLayer,
27
+ DecoderOnlyModel,
28
+ DecoderOnlyWrapper,
29
+ DynamicNTKScalingRotaryEmbedding,
30
+ LinearScalingRotaryEmbedding,
31
+ RotaryEmbedding,
32
+ apply_rotary_pos_emb,
33
+ rotate_half,
34
+ slice_and_unsqueeze_cos_sin,
35
+ )
36
+ from .modeling_decoderonly import RBLNDecoderOnlyModelForCausalLM