optimum-rbln 0.1.9__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. optimum/rbln/__init__.py +47 -9
  2. optimum/rbln/__version__.py +1 -1
  3. optimum/rbln/diffusers/models/autoencoder_kl.py +36 -31
  4. optimum/rbln/diffusers/models/controlnet.py +53 -43
  5. optimum/rbln/diffusers/models/unet_2d_condition.py +40 -31
  6. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +4 -0
  7. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +28 -23
  8. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +28 -23
  9. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +28 -37
  10. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +30 -39
  11. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +24 -14
  12. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +24 -15
  13. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +26 -17
  14. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +26 -17
  15. optimum/rbln/modeling_alias.py +6 -11
  16. optimum/rbln/modeling_base.py +467 -261
  17. optimum/rbln/modeling_config.py +199 -73
  18. optimum/rbln/transformers/__init__.py +43 -1
  19. optimum/rbln/transformers/models/__init__.py +23 -1
  20. optimum/rbln/transformers/models/auto/__init__.py +14 -0
  21. optimum/rbln/transformers/models/auto/auto_factory.py +84 -0
  22. optimum/rbln/transformers/models/auto/modeling_auto.py +95 -0
  23. optimum/rbln/transformers/models/bart/__init__.py +1 -0
  24. optimum/rbln/transformers/models/bart/bart_architecture.py +203 -58
  25. optimum/rbln/transformers/models/bart/modeling_bart.py +125 -0
  26. optimum/rbln/transformers/models/bert/__init__.py +24 -0
  27. optimum/rbln/transformers/models/bert/modeling_bert.py +101 -0
  28. optimum/rbln/transformers/models/clip/__init__.py +1 -1
  29. optimum/rbln/transformers/models/clip/modeling_clip.py +127 -26
  30. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +28 -4
  31. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +409 -150
  32. optimum/rbln/transformers/models/dpt/modeling_dpt.py +21 -8
  33. optimum/rbln/transformers/models/exaone/__init__.py +32 -0
  34. optimum/rbln/transformers/models/exaone/exaone_architecture.py +72 -0
  35. optimum/rbln/transformers/models/exaone/hf_hub_cached/configuration_exaone.py +181 -0
  36. optimum/rbln/transformers/models/exaone/hf_hub_cached/modeling_exaone.py +1725 -0
  37. optimum/rbln/transformers/models/exaone/modeling_exaone.py +78 -0
  38. optimum/rbln/transformers/models/gemma/modeling_gemma.py +1 -1
  39. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +4 -1
  40. optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +1 -1
  41. optimum/rbln/transformers/models/llama/modeling_llama.py +1 -1
  42. optimum/rbln/transformers/models/llava_next/__init__.py +24 -0
  43. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +662 -0
  44. optimum/rbln/transformers/models/midm/midm_architecture.py +5 -1
  45. optimum/rbln/transformers/models/midm/modeling_midm.py +6 -1
  46. optimum/rbln/transformers/models/mistral/modeling_mistral.py +1 -1
  47. optimum/rbln/transformers/models/phi/__init__.py +24 -0
  48. optimum/rbln/transformers/models/phi/modeling_phi.py +69 -0
  49. optimum/rbln/transformers/models/phi/phi_architecture.py +406 -0
  50. optimum/rbln/transformers/models/qwen2/__init__.py +24 -0
  51. optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +67 -0
  52. optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +29 -0
  53. optimum/rbln/transformers/models/seq2seq/__init__.py +24 -0
  54. optimum/rbln/{modeling_seq2seq.py → transformers/models/seq2seq/modeling_seq2seq.py} +198 -168
  55. optimum/rbln/transformers/models/t5/__init__.py +1 -0
  56. optimum/rbln/transformers/models/t5/modeling_t5.py +55 -0
  57. optimum/rbln/transformers/models/t5/t5_architecture.py +122 -47
  58. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +17 -12
  59. optimum/rbln/transformers/models/whisper/generation_whisper.py +68 -0
  60. optimum/rbln/transformers/models/whisper/modeling_whisper.py +172 -111
  61. optimum/rbln/transformers/models/whisper/whisper_architecture.py +44 -17
  62. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +18 -16
  63. optimum/rbln/transformers/utils/rbln_quantization.py +48 -60
  64. optimum/rbln/utils/import_utils.py +50 -1
  65. optimum/rbln/utils/logging.py +82 -0
  66. optimum/rbln/utils/runtime_utils.py +33 -0
  67. optimum/rbln/utils/timer_utils.py +43 -0
  68. {optimum_rbln-0.1.9.dist-info → optimum_rbln-0.1.12.dist-info}/METADATA +9 -7
  69. optimum_rbln-0.1.12.dist-info/RECORD +103 -0
  70. {optimum_rbln-0.1.9.dist-info → optimum_rbln-0.1.12.dist-info}/WHEEL +1 -1
  71. optimum_rbln-0.1.12.dist-info/entry_points.txt +4 -0
  72. optimum_rbln-0.1.9.dist-info/RECORD +0 -78
  73. {optimum_rbln-0.1.9.dist-info → optimum_rbln-0.1.12.dist-info}/licenses/LICENSE +0 -0
@@ -22,14 +22,23 @@
22
22
  # from Rebellions Inc.
23
23
 
24
24
  import logging
25
- from typing import TYPE_CHECKING, Optional, Union
25
+ from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
26
26
 
27
27
  import torch
28
- from transformers import AutoConfig, AutoModel, CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection
28
+ from transformers import (
29
+ AutoConfig,
30
+ AutoModel,
31
+ CLIPTextConfig,
32
+ CLIPTextModel,
33
+ CLIPTextModelWithProjection,
34
+ CLIPVisionConfig,
35
+ CLIPVisionModel,
36
+ )
37
+ from transformers.modeling_outputs import BaseModelOutputWithPooling
29
38
  from transformers.models.clip.modeling_clip import CLIPTextModelOutput
30
39
 
31
40
  from ....modeling_base import RBLNModel
32
- from ....modeling_config import RBLNConfig, RBLNRuntimeConfig
41
+ from ....modeling_config import RBLNCompileConfig, RBLNConfig
33
42
 
34
43
 
35
44
  logger = logging.getLogger(__name__)
@@ -41,23 +50,17 @@ if TYPE_CHECKING:
41
50
  class _TextEncoder(torch.nn.Module):
42
51
  def __init__(self, enc: "CLIPTextModel"):
43
52
  super().__init__()
44
- enc.config.return_dict = False
45
- enc.config.output_hidden_states = True
46
53
  self.enc = enc
47
54
 
48
55
  def forward(self, inp):
49
- enc_out = self.enc(inp)
56
+ enc_out = self.enc(inp, output_hidden_states=True, return_dict=False)
50
57
  return enc_out
51
58
 
52
59
 
53
60
  class RBLNCLIPTextModel(RBLNModel):
54
- auto_model_class = AutoModel # feature extraction
55
61
  original_model_class = CLIPTextModel
56
62
  original_config_class = CLIPTextConfig
57
63
 
58
- def __post_init__(self, **kwargs):
59
- self.dtype = torch.float32
60
-
61
64
  @classmethod
62
65
  def from_pretrained(cls, *args, **kwargs):
63
66
  configtmp = AutoConfig.from_pretrained
@@ -78,28 +81,32 @@ class RBLNCLIPTextModel(RBLNModel):
78
81
  cls,
79
82
  preprocessors: Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"],
80
83
  model_config: "CLIPTextConfig",
84
+ rbln_kwargs: Dict[str, Any] = {},
81
85
  rbln_batch_size: Optional[int] = None,
82
- rbln_img_width: Optional[int] = None,
83
- rbln_img_height: Optional[int] = None,
84
86
  ) -> RBLNConfig:
85
- model_config.return_dict = False
87
+ rbln_batch_size = rbln_kwargs.get("batch_size", None)
86
88
  if rbln_batch_size is None:
87
89
  rbln_batch_size = 1
88
90
 
89
- rbln_runtime_config = RBLNRuntimeConfig(
90
- input_info=[
91
- (
92
- "input_ids",
93
- [
94
- rbln_batch_size,
95
- model_config.max_position_embeddings,
96
- ],
97
- "int64",
98
- ),
99
- ],
100
- )
91
+ model_config.return_dict = False
101
92
 
102
- rbln_config = RBLNConfig.from_rbln_runtime_configs([rbln_runtime_config])
93
+ input_info = [
94
+ (
95
+ "input_ids",
96
+ [
97
+ rbln_batch_size,
98
+ model_config.max_position_embeddings,
99
+ ],
100
+ "int64",
101
+ ),
102
+ ]
103
+
104
+ rbln_compile_config = RBLNCompileConfig(input_info=input_info)
105
+ rbln_config = RBLNConfig(
106
+ rbln_cls=cls.__name__,
107
+ compile_cfgs=[rbln_compile_config],
108
+ rbln_kwargs=rbln_kwargs,
109
+ )
103
110
  return rbln_config
104
111
 
105
112
  def forward(self, input_ids: "torch.Tensor", **kwargs):
@@ -113,3 +120,97 @@ class RBLNCLIPTextModel(RBLNModel):
113
120
 
114
121
  class RBLNCLIPTextModelWithProjection(RBLNCLIPTextModel):
115
122
  original_model_class = CLIPTextModelWithProjection
123
+
124
+
125
+ class _VisionEncoder(torch.nn.Module):
126
+ def __init__(self, enc: CLIPVisionModel):
127
+ super().__init__()
128
+ self.enc = enc
129
+
130
+ def forward(self, inp):
131
+ enc_out = self.enc(inp, output_hidden_states=True, return_dict=False)
132
+ return enc_out
133
+
134
+
135
+ class RBLNCLIPVisionModel(RBLNModel):
136
+ original_model_class = CLIPVisionModel
137
+ original_config_class = CLIPVisionConfig
138
+
139
+ @classmethod
140
+ def from_pretrained(cls, *args, **kwargs):
141
+ configtmp = AutoConfig.from_pretrained
142
+ modeltmp = AutoModel.from_pretrained
143
+ AutoConfig.from_pretrained = cls.original_config_class.from_pretrained
144
+ AutoModel.from_pretrained = cls.original_model_class.from_pretrained
145
+ rt = super().from_pretrained(*args, **kwargs)
146
+ AutoConfig.from_pretrained = configtmp
147
+ AutoModel.from_pretrained = modeltmp
148
+ return rt
149
+
150
+ @classmethod
151
+ def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNConfig) -> torch.nn.Module:
152
+ return _VisionEncoder(model).eval()
153
+
154
+ @classmethod
155
+ def _get_rbln_config(
156
+ cls,
157
+ preprocessors: Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"],
158
+ model_config: "CLIPTextConfig",
159
+ rbln_kwargs: Dict[str, Any] = {},
160
+ ) -> RBLNConfig:
161
+ rbln_batch_size = rbln_kwargs.get("batch_size", 1)
162
+ rbln_image_size = rbln_kwargs.get("image_size", None)
163
+
164
+ if rbln_image_size is None:
165
+ rbln_image_size = getattr(model_config, "image_size", None)
166
+
167
+ if isinstance(rbln_image_size, int):
168
+ rbln_image_size = (rbln_image_size, rbln_image_size)
169
+
170
+ if rbln_image_size is None:
171
+ raise ValueError("`rbln_image_size` should be specified!")
172
+
173
+ rbln_compile_config = RBLNCompileConfig(
174
+ input_info=[
175
+ (
176
+ "pixel_values",
177
+ [
178
+ rbln_batch_size,
179
+ 3,
180
+ rbln_image_size[0],
181
+ rbln_image_size[1],
182
+ ],
183
+ "float32",
184
+ )
185
+ ]
186
+ )
187
+
188
+ rbln_config = RBLNConfig(
189
+ rbln_cls=cls.__name__,
190
+ compile_cfgs=[rbln_compile_config],
191
+ rbln_kwargs=rbln_kwargs,
192
+ )
193
+
194
+ rbln_config.model_cfg.update(
195
+ {
196
+ "batch_size": rbln_batch_size,
197
+ "image_size": rbln_image_size,
198
+ }
199
+ )
200
+
201
+ return rbln_config
202
+
203
+ def forward(
204
+ self,
205
+ pixel_values: Optional[torch.FloatTensor] = None,
206
+ **kwargs,
207
+ ) -> Union[Tuple, BaseModelOutputWithPooling]:
208
+ if len(kwargs) > 0 and any(kwargs.values()):
209
+ logger.warning(f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__}.")
210
+
211
+ output = super().forward(pixel_values)
212
+ return BaseModelOutputWithPooling(
213
+ last_hidden_state=output[0],
214
+ pooler_output=output[1],
215
+ hidden_states=output[2:],
216
+ )
@@ -93,17 +93,29 @@ class DecoderOnlyWrapper(torch.nn.Module):
93
93
 
94
94
  def forward(
95
95
  self,
96
- input_ids,
96
+ input_ids_or_inputs_embeds,
97
97
  attention_mask,
98
98
  cache_position,
99
99
  batch_position,
100
+ query_idx,
100
101
  *past_key_values,
101
102
  ):
102
- if input_ids.shape[1] == 1:
103
+ if input_ids_or_inputs_embeds.shape[1] == 1:
103
104
  rbln_batch_position = None
104
105
  else:
105
106
  rbln_batch_position = batch_position
106
107
 
108
+ if input_ids_or_inputs_embeds.ndim == 2:
109
+ # input_ids
110
+ input_ids = input_ids_or_inputs_embeds
111
+ inputs_embeds = None
112
+ elif input_ids_or_inputs_embeds.ndim == 3:
113
+ # inputs_embeds
114
+ input_ids = None
115
+ inputs_embeds = input_ids_or_inputs_embeds
116
+ else:
117
+ raise NotImplementedError(f"Unknown ndim of input : {input_ids_or_inputs_embeds.ndim}")
118
+
107
119
  # Formatting list of past_kv to DynamicCache class.
108
120
  past_key_values = RebelDynamicCache.from_input_format(
109
121
  cache_position,
@@ -115,6 +127,7 @@ class DecoderOnlyWrapper(torch.nn.Module):
115
127
  outputs = forward_dict["wrapper"](
116
128
  self.model,
117
129
  input_ids=input_ids,
130
+ inputs_embeds=inputs_embeds,
118
131
  attention_mask=attention_mask,
119
132
  position_ids=cache_position,
120
133
  past_key_values=past_key_values,
@@ -124,11 +137,14 @@ class DecoderOnlyWrapper(torch.nn.Module):
124
137
  )
125
138
 
126
139
  hidden_states = outputs[0]
140
+ if batch_position >= 0:
141
+ hidden_states = hidden_states[:, query_idx].unsqueeze(1)
142
+
127
143
  logits = self.lm_head(hidden_states)
128
144
 
129
145
  output = (logits,) + outputs[1:]
130
146
 
131
- return output, batch_position
147
+ return output, batch_position + query_idx
132
148
 
133
149
 
134
150
  class DecoderOnlyAttention:
@@ -323,8 +339,16 @@ class DecoderOnlyModel:
323
339
  forward_dict: Optional[Dict[str, classmethod]] = None,
324
340
  rotary_pos_emb=None,
325
341
  ) -> BaseModelOutputWithPast:
342
+ # retrieve input_ids and inputs_embeds
343
+ if (input_ids is None) ^ (inputs_embeds is not None):
344
+ raise ValueError(
345
+ "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
346
+ )
347
+
326
348
  # embed positions
327
- inputs_embeds = self.embed_tokens(input_ids)
349
+ if inputs_embeds is None:
350
+ inputs_embeds = self.embed_tokens(input_ids)
351
+
328
352
  hidden_states = inputs_embeds
329
353
  attention_mask = (1 - attention_mask) * torch.finfo(torch.float16).min
330
354