optimum-rbln 0.1.9__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +47 -9
- optimum/rbln/__version__.py +1 -1
- optimum/rbln/diffusers/models/autoencoder_kl.py +36 -31
- optimum/rbln/diffusers/models/controlnet.py +53 -43
- optimum/rbln/diffusers/models/unet_2d_condition.py +40 -31
- optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +4 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +28 -23
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +28 -23
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +28 -37
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +30 -39
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +24 -14
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +24 -15
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +26 -17
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +26 -17
- optimum/rbln/modeling_alias.py +6 -11
- optimum/rbln/modeling_base.py +467 -261
- optimum/rbln/modeling_config.py +199 -73
- optimum/rbln/transformers/__init__.py +43 -1
- optimum/rbln/transformers/models/__init__.py +23 -1
- optimum/rbln/transformers/models/auto/__init__.py +14 -0
- optimum/rbln/transformers/models/auto/auto_factory.py +84 -0
- optimum/rbln/transformers/models/auto/modeling_auto.py +95 -0
- optimum/rbln/transformers/models/bart/__init__.py +1 -0
- optimum/rbln/transformers/models/bart/bart_architecture.py +203 -58
- optimum/rbln/transformers/models/bart/modeling_bart.py +125 -0
- optimum/rbln/transformers/models/bert/__init__.py +24 -0
- optimum/rbln/transformers/models/bert/modeling_bert.py +101 -0
- optimum/rbln/transformers/models/clip/__init__.py +1 -1
- optimum/rbln/transformers/models/clip/modeling_clip.py +127 -26
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +28 -4
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +409 -150
- optimum/rbln/transformers/models/dpt/modeling_dpt.py +21 -8
- optimum/rbln/transformers/models/exaone/__init__.py +32 -0
- optimum/rbln/transformers/models/exaone/exaone_architecture.py +72 -0
- optimum/rbln/transformers/models/exaone/hf_hub_cached/configuration_exaone.py +181 -0
- optimum/rbln/transformers/models/exaone/hf_hub_cached/modeling_exaone.py +1725 -0
- optimum/rbln/transformers/models/exaone/modeling_exaone.py +78 -0
- optimum/rbln/transformers/models/gemma/modeling_gemma.py +1 -1
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +4 -1
- optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +1 -1
- optimum/rbln/transformers/models/llama/modeling_llama.py +1 -1
- optimum/rbln/transformers/models/llava_next/__init__.py +24 -0
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +662 -0
- optimum/rbln/transformers/models/midm/midm_architecture.py +5 -1
- optimum/rbln/transformers/models/midm/modeling_midm.py +6 -1
- optimum/rbln/transformers/models/mistral/modeling_mistral.py +1 -1
- optimum/rbln/transformers/models/phi/__init__.py +24 -0
- optimum/rbln/transformers/models/phi/modeling_phi.py +69 -0
- optimum/rbln/transformers/models/phi/phi_architecture.py +406 -0
- optimum/rbln/transformers/models/qwen2/__init__.py +24 -0
- optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +67 -0
- optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +29 -0
- optimum/rbln/transformers/models/seq2seq/__init__.py +24 -0
- optimum/rbln/{modeling_seq2seq.py → transformers/models/seq2seq/modeling_seq2seq.py} +198 -168
- optimum/rbln/transformers/models/t5/__init__.py +1 -0
- optimum/rbln/transformers/models/t5/modeling_t5.py +55 -0
- optimum/rbln/transformers/models/t5/t5_architecture.py +122 -47
- optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +17 -12
- optimum/rbln/transformers/models/whisper/generation_whisper.py +68 -0
- optimum/rbln/transformers/models/whisper/modeling_whisper.py +172 -111
- optimum/rbln/transformers/models/whisper/whisper_architecture.py +44 -17
- optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +18 -16
- optimum/rbln/transformers/utils/rbln_quantization.py +48 -60
- optimum/rbln/utils/import_utils.py +50 -1
- optimum/rbln/utils/logging.py +82 -0
- optimum/rbln/utils/runtime_utils.py +33 -0
- optimum/rbln/utils/timer_utils.py +43 -0
- {optimum_rbln-0.1.9.dist-info → optimum_rbln-0.1.12.dist-info}/METADATA +9 -7
- optimum_rbln-0.1.12.dist-info/RECORD +103 -0
- {optimum_rbln-0.1.9.dist-info → optimum_rbln-0.1.12.dist-info}/WHEEL +1 -1
- optimum_rbln-0.1.12.dist-info/entry_points.txt +4 -0
- optimum_rbln-0.1.9.dist-info/RECORD +0 -78
- {optimum_rbln-0.1.9.dist-info → optimum_rbln-0.1.12.dist-info}/licenses/LICENSE +0 -0
@@ -22,14 +22,23 @@
|
|
22
22
|
# from Rebellions Inc.
|
23
23
|
|
24
24
|
import logging
|
25
|
-
from typing import TYPE_CHECKING, Optional, Union
|
25
|
+
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
|
26
26
|
|
27
27
|
import torch
|
28
|
-
from transformers import
|
28
|
+
from transformers import (
|
29
|
+
AutoConfig,
|
30
|
+
AutoModel,
|
31
|
+
CLIPTextConfig,
|
32
|
+
CLIPTextModel,
|
33
|
+
CLIPTextModelWithProjection,
|
34
|
+
CLIPVisionConfig,
|
35
|
+
CLIPVisionModel,
|
36
|
+
)
|
37
|
+
from transformers.modeling_outputs import BaseModelOutputWithPooling
|
29
38
|
from transformers.models.clip.modeling_clip import CLIPTextModelOutput
|
30
39
|
|
31
40
|
from ....modeling_base import RBLNModel
|
32
|
-
from ....modeling_config import
|
41
|
+
from ....modeling_config import RBLNCompileConfig, RBLNConfig
|
33
42
|
|
34
43
|
|
35
44
|
logger = logging.getLogger(__name__)
|
@@ -41,23 +50,17 @@ if TYPE_CHECKING:
|
|
41
50
|
class _TextEncoder(torch.nn.Module):
|
42
51
|
def __init__(self, enc: "CLIPTextModel"):
|
43
52
|
super().__init__()
|
44
|
-
enc.config.return_dict = False
|
45
|
-
enc.config.output_hidden_states = True
|
46
53
|
self.enc = enc
|
47
54
|
|
48
55
|
def forward(self, inp):
|
49
|
-
enc_out = self.enc(inp)
|
56
|
+
enc_out = self.enc(inp, output_hidden_states=True, return_dict=False)
|
50
57
|
return enc_out
|
51
58
|
|
52
59
|
|
53
60
|
class RBLNCLIPTextModel(RBLNModel):
|
54
|
-
auto_model_class = AutoModel # feature extraction
|
55
61
|
original_model_class = CLIPTextModel
|
56
62
|
original_config_class = CLIPTextConfig
|
57
63
|
|
58
|
-
def __post_init__(self, **kwargs):
|
59
|
-
self.dtype = torch.float32
|
60
|
-
|
61
64
|
@classmethod
|
62
65
|
def from_pretrained(cls, *args, **kwargs):
|
63
66
|
configtmp = AutoConfig.from_pretrained
|
@@ -78,28 +81,32 @@ class RBLNCLIPTextModel(RBLNModel):
|
|
78
81
|
cls,
|
79
82
|
preprocessors: Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"],
|
80
83
|
model_config: "CLIPTextConfig",
|
84
|
+
rbln_kwargs: Dict[str, Any] = {},
|
81
85
|
rbln_batch_size: Optional[int] = None,
|
82
|
-
rbln_img_width: Optional[int] = None,
|
83
|
-
rbln_img_height: Optional[int] = None,
|
84
86
|
) -> RBLNConfig:
|
85
|
-
|
87
|
+
rbln_batch_size = rbln_kwargs.get("batch_size", None)
|
86
88
|
if rbln_batch_size is None:
|
87
89
|
rbln_batch_size = 1
|
88
90
|
|
89
|
-
|
90
|
-
input_info=[
|
91
|
-
(
|
92
|
-
"input_ids",
|
93
|
-
[
|
94
|
-
rbln_batch_size,
|
95
|
-
model_config.max_position_embeddings,
|
96
|
-
],
|
97
|
-
"int64",
|
98
|
-
),
|
99
|
-
],
|
100
|
-
)
|
91
|
+
model_config.return_dict = False
|
101
92
|
|
102
|
-
|
93
|
+
input_info = [
|
94
|
+
(
|
95
|
+
"input_ids",
|
96
|
+
[
|
97
|
+
rbln_batch_size,
|
98
|
+
model_config.max_position_embeddings,
|
99
|
+
],
|
100
|
+
"int64",
|
101
|
+
),
|
102
|
+
]
|
103
|
+
|
104
|
+
rbln_compile_config = RBLNCompileConfig(input_info=input_info)
|
105
|
+
rbln_config = RBLNConfig(
|
106
|
+
rbln_cls=cls.__name__,
|
107
|
+
compile_cfgs=[rbln_compile_config],
|
108
|
+
rbln_kwargs=rbln_kwargs,
|
109
|
+
)
|
103
110
|
return rbln_config
|
104
111
|
|
105
112
|
def forward(self, input_ids: "torch.Tensor", **kwargs):
|
@@ -113,3 +120,97 @@ class RBLNCLIPTextModel(RBLNModel):
|
|
113
120
|
|
114
121
|
class RBLNCLIPTextModelWithProjection(RBLNCLIPTextModel):
|
115
122
|
original_model_class = CLIPTextModelWithProjection
|
123
|
+
|
124
|
+
|
125
|
+
class _VisionEncoder(torch.nn.Module):
|
126
|
+
def __init__(self, enc: CLIPVisionModel):
|
127
|
+
super().__init__()
|
128
|
+
self.enc = enc
|
129
|
+
|
130
|
+
def forward(self, inp):
|
131
|
+
enc_out = self.enc(inp, output_hidden_states=True, return_dict=False)
|
132
|
+
return enc_out
|
133
|
+
|
134
|
+
|
135
|
+
class RBLNCLIPVisionModel(RBLNModel):
|
136
|
+
original_model_class = CLIPVisionModel
|
137
|
+
original_config_class = CLIPVisionConfig
|
138
|
+
|
139
|
+
@classmethod
|
140
|
+
def from_pretrained(cls, *args, **kwargs):
|
141
|
+
configtmp = AutoConfig.from_pretrained
|
142
|
+
modeltmp = AutoModel.from_pretrained
|
143
|
+
AutoConfig.from_pretrained = cls.original_config_class.from_pretrained
|
144
|
+
AutoModel.from_pretrained = cls.original_model_class.from_pretrained
|
145
|
+
rt = super().from_pretrained(*args, **kwargs)
|
146
|
+
AutoConfig.from_pretrained = configtmp
|
147
|
+
AutoModel.from_pretrained = modeltmp
|
148
|
+
return rt
|
149
|
+
|
150
|
+
@classmethod
|
151
|
+
def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNConfig) -> torch.nn.Module:
|
152
|
+
return _VisionEncoder(model).eval()
|
153
|
+
|
154
|
+
@classmethod
|
155
|
+
def _get_rbln_config(
|
156
|
+
cls,
|
157
|
+
preprocessors: Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"],
|
158
|
+
model_config: "CLIPTextConfig",
|
159
|
+
rbln_kwargs: Dict[str, Any] = {},
|
160
|
+
) -> RBLNConfig:
|
161
|
+
rbln_batch_size = rbln_kwargs.get("batch_size", 1)
|
162
|
+
rbln_image_size = rbln_kwargs.get("image_size", None)
|
163
|
+
|
164
|
+
if rbln_image_size is None:
|
165
|
+
rbln_image_size = getattr(model_config, "image_size", None)
|
166
|
+
|
167
|
+
if isinstance(rbln_image_size, int):
|
168
|
+
rbln_image_size = (rbln_image_size, rbln_image_size)
|
169
|
+
|
170
|
+
if rbln_image_size is None:
|
171
|
+
raise ValueError("`rbln_image_size` should be specified!")
|
172
|
+
|
173
|
+
rbln_compile_config = RBLNCompileConfig(
|
174
|
+
input_info=[
|
175
|
+
(
|
176
|
+
"pixel_values",
|
177
|
+
[
|
178
|
+
rbln_batch_size,
|
179
|
+
3,
|
180
|
+
rbln_image_size[0],
|
181
|
+
rbln_image_size[1],
|
182
|
+
],
|
183
|
+
"float32",
|
184
|
+
)
|
185
|
+
]
|
186
|
+
)
|
187
|
+
|
188
|
+
rbln_config = RBLNConfig(
|
189
|
+
rbln_cls=cls.__name__,
|
190
|
+
compile_cfgs=[rbln_compile_config],
|
191
|
+
rbln_kwargs=rbln_kwargs,
|
192
|
+
)
|
193
|
+
|
194
|
+
rbln_config.model_cfg.update(
|
195
|
+
{
|
196
|
+
"batch_size": rbln_batch_size,
|
197
|
+
"image_size": rbln_image_size,
|
198
|
+
}
|
199
|
+
)
|
200
|
+
|
201
|
+
return rbln_config
|
202
|
+
|
203
|
+
def forward(
|
204
|
+
self,
|
205
|
+
pixel_values: Optional[torch.FloatTensor] = None,
|
206
|
+
**kwargs,
|
207
|
+
) -> Union[Tuple, BaseModelOutputWithPooling]:
|
208
|
+
if len(kwargs) > 0 and any(kwargs.values()):
|
209
|
+
logger.warning(f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__}.")
|
210
|
+
|
211
|
+
output = super().forward(pixel_values)
|
212
|
+
return BaseModelOutputWithPooling(
|
213
|
+
last_hidden_state=output[0],
|
214
|
+
pooler_output=output[1],
|
215
|
+
hidden_states=output[2:],
|
216
|
+
)
|
@@ -93,17 +93,29 @@ class DecoderOnlyWrapper(torch.nn.Module):
|
|
93
93
|
|
94
94
|
def forward(
|
95
95
|
self,
|
96
|
-
|
96
|
+
input_ids_or_inputs_embeds,
|
97
97
|
attention_mask,
|
98
98
|
cache_position,
|
99
99
|
batch_position,
|
100
|
+
query_idx,
|
100
101
|
*past_key_values,
|
101
102
|
):
|
102
|
-
if
|
103
|
+
if input_ids_or_inputs_embeds.shape[1] == 1:
|
103
104
|
rbln_batch_position = None
|
104
105
|
else:
|
105
106
|
rbln_batch_position = batch_position
|
106
107
|
|
108
|
+
if input_ids_or_inputs_embeds.ndim == 2:
|
109
|
+
# input_ids
|
110
|
+
input_ids = input_ids_or_inputs_embeds
|
111
|
+
inputs_embeds = None
|
112
|
+
elif input_ids_or_inputs_embeds.ndim == 3:
|
113
|
+
# inputs_embeds
|
114
|
+
input_ids = None
|
115
|
+
inputs_embeds = input_ids_or_inputs_embeds
|
116
|
+
else:
|
117
|
+
raise NotImplementedError(f"Unknown ndim of input : {input_ids_or_inputs_embeds.ndim}")
|
118
|
+
|
107
119
|
# Formatting list of past_kv to DynamicCache class.
|
108
120
|
past_key_values = RebelDynamicCache.from_input_format(
|
109
121
|
cache_position,
|
@@ -115,6 +127,7 @@ class DecoderOnlyWrapper(torch.nn.Module):
|
|
115
127
|
outputs = forward_dict["wrapper"](
|
116
128
|
self.model,
|
117
129
|
input_ids=input_ids,
|
130
|
+
inputs_embeds=inputs_embeds,
|
118
131
|
attention_mask=attention_mask,
|
119
132
|
position_ids=cache_position,
|
120
133
|
past_key_values=past_key_values,
|
@@ -124,11 +137,14 @@ class DecoderOnlyWrapper(torch.nn.Module):
|
|
124
137
|
)
|
125
138
|
|
126
139
|
hidden_states = outputs[0]
|
140
|
+
if batch_position >= 0:
|
141
|
+
hidden_states = hidden_states[:, query_idx].unsqueeze(1)
|
142
|
+
|
127
143
|
logits = self.lm_head(hidden_states)
|
128
144
|
|
129
145
|
output = (logits,) + outputs[1:]
|
130
146
|
|
131
|
-
return output, batch_position
|
147
|
+
return output, batch_position + query_idx
|
132
148
|
|
133
149
|
|
134
150
|
class DecoderOnlyAttention:
|
@@ -323,8 +339,16 @@ class DecoderOnlyModel:
|
|
323
339
|
forward_dict: Optional[Dict[str, classmethod]] = None,
|
324
340
|
rotary_pos_emb=None,
|
325
341
|
) -> BaseModelOutputWithPast:
|
342
|
+
# retrieve input_ids and inputs_embeds
|
343
|
+
if (input_ids is None) ^ (inputs_embeds is not None):
|
344
|
+
raise ValueError(
|
345
|
+
"You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
|
346
|
+
)
|
347
|
+
|
326
348
|
# embed positions
|
327
|
-
inputs_embeds
|
349
|
+
if inputs_embeds is None:
|
350
|
+
inputs_embeds = self.embed_tokens(input_ids)
|
351
|
+
|
328
352
|
hidden_states = inputs_embeds
|
329
353
|
attention_mask = (1 - attention_mask) * torch.finfo(torch.float16).min
|
330
354
|
|