optimum-rbln 0.7.3a4__py3-none-any.whl → 0.7.3a6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +8 -0
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/diffusers/__init__.py +8 -0
- optimum/rbln/diffusers/models/autoencoders/vq_model.py +11 -3
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +15 -8
- optimum/rbln/diffusers/pipelines/__init__.py +8 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +7 -1
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +25 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +106 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +25 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +3 -0
- optimum/rbln/modeling_base.py +0 -11
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +59 -62
- optimum/rbln/transformers/models/phi/phi_architecture.py +3 -3
- {optimum_rbln-0.7.3a4.dist-info → optimum_rbln-0.7.3a6.dist-info}/METADATA +1 -1
- {optimum_rbln-0.7.3a4.dist-info → optimum_rbln-0.7.3a6.dist-info}/RECORD +18 -16
- {optimum_rbln-0.7.3a4.dist-info → optimum_rbln-0.7.3a6.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.7.3a4.dist-info → optimum_rbln-0.7.3a6.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/__init__.py
CHANGED
@@ -78,9 +78,13 @@ _import_structure = {
|
|
78
78
|
"RBLNAutoencoderKL",
|
79
79
|
"RBLNControlNetModel",
|
80
80
|
"RBLNPriorTransformer",
|
81
|
+
"RBLNKandinskyV22CombinedPipeline",
|
82
|
+
"RBLNKandinskyV22Img2ImgCombinedPipeline",
|
81
83
|
"RBLNKandinskyV22InpaintCombinedPipeline",
|
82
84
|
"RBLNKandinskyV22InpaintPipeline",
|
85
|
+
"RBLNKandinskyV22Img2ImgPipeline",
|
83
86
|
"RBLNKandinskyV22PriorPipeline",
|
87
|
+
"RBLNKandinskyV22Pipeline",
|
84
88
|
"RBLNStableDiffusionPipeline",
|
85
89
|
"RBLNStableDiffusionXLPipeline",
|
86
90
|
"RBLNUNet2DConditionModel",
|
@@ -107,8 +111,12 @@ if TYPE_CHECKING:
|
|
107
111
|
RBLNAutoencoderKL,
|
108
112
|
RBLNControlNetModel,
|
109
113
|
RBLNDiffusionMixin,
|
114
|
+
RBLNKandinskyV22CombinedPipeline,
|
115
|
+
RBLNKandinskyV22Img2ImgCombinedPipeline,
|
116
|
+
RBLNKandinskyV22Img2ImgPipeline,
|
110
117
|
RBLNKandinskyV22InpaintCombinedPipeline,
|
111
118
|
RBLNKandinskyV22InpaintPipeline,
|
119
|
+
RBLNKandinskyV22Pipeline,
|
112
120
|
RBLNKandinskyV22PriorPipeline,
|
113
121
|
RBLNMultiControlNetModel,
|
114
122
|
RBLNPriorTransformer,
|
optimum/rbln/__version__.py
CHANGED
@@ -17,5 +17,5 @@ __version__: str
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
18
18
|
version_tuple: VERSION_TUPLE
|
19
19
|
|
20
|
-
__version__ = version = '0.7.
|
21
|
-
__version_tuple__ = version_tuple = (0, 7, 3, '
|
20
|
+
__version__ = version = '0.7.3a6'
|
21
|
+
__version_tuple__ = version_tuple = (0, 7, 3, 'a6')
|
@@ -24,9 +24,13 @@ ALL_IMPORTABLE_CLASSES.update(LOADABLE_CLASSES["optimum.rbln"])
|
|
24
24
|
|
25
25
|
_import_structure = {
|
26
26
|
"pipelines": [
|
27
|
+
"RBLNKandinskyV22CombinedPipeline",
|
28
|
+
"RBLNKandinskyV22Img2ImgCombinedPipeline",
|
27
29
|
"RBLNKandinskyV22InpaintCombinedPipeline",
|
28
30
|
"RBLNKandinskyV22InpaintPipeline",
|
31
|
+
"RBLNKandinskyV22Img2ImgPipeline",
|
29
32
|
"RBLNKandinskyV22PriorPipeline",
|
33
|
+
"RBLNKandinskyV22Pipeline",
|
30
34
|
"RBLNStableDiffusionPipeline",
|
31
35
|
"RBLNStableDiffusionXLPipeline",
|
32
36
|
"RBLNStableDiffusionImg2ImgPipeline",
|
@@ -66,8 +70,12 @@ if TYPE_CHECKING:
|
|
66
70
|
RBLNVQModel,
|
67
71
|
)
|
68
72
|
from .pipelines import (
|
73
|
+
RBLNKandinskyV22CombinedPipeline,
|
74
|
+
RBLNKandinskyV22Img2ImgCombinedPipeline,
|
75
|
+
RBLNKandinskyV22Img2ImgPipeline,
|
69
76
|
RBLNKandinskyV22InpaintCombinedPipeline,
|
70
77
|
RBLNKandinskyV22InpaintPipeline,
|
78
|
+
RBLNKandinskyV22Pipeline,
|
71
79
|
RBLNKandinskyV22PriorPipeline,
|
72
80
|
RBLNMultiControlNetModel,
|
73
81
|
RBLNStableDiffusion3Img2ImgPipeline,
|
@@ -90,9 +90,17 @@ class RBLNVQModel(RBLNModel):
|
|
90
90
|
model_config: "PretrainedConfig",
|
91
91
|
rbln_kwargs: Dict[str, Any] = {},
|
92
92
|
) -> RBLNConfig:
|
93
|
-
batch_size = rbln_kwargs.get("batch_size")
|
94
|
-
|
95
|
-
|
93
|
+
batch_size = rbln_kwargs.get("batch_size")
|
94
|
+
if batch_size is None:
|
95
|
+
batch_size = 1
|
96
|
+
|
97
|
+
height = rbln_kwargs.get("img_height")
|
98
|
+
if height is None:
|
99
|
+
height = 512
|
100
|
+
|
101
|
+
width = rbln_kwargs.get("img_width")
|
102
|
+
if width is None:
|
103
|
+
width = 512
|
96
104
|
|
97
105
|
if hasattr(model_config, "block_out_channels"):
|
98
106
|
scale_factor = 2 ** (len(model_config.block_out_channels) - 1)
|
@@ -176,15 +176,22 @@ class RBLNUNet2DConditionModel(RBLNModel):
|
|
176
176
|
raise ValueError("Both image height and image width must be given or not given")
|
177
177
|
elif image_size[0] is None and image_size[1] is None:
|
178
178
|
if rbln_config["img2img_pipeline"]:
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
179
|
+
if hasattr(pipe, "vae"):
|
180
|
+
# In case of img2img, sample size of unet is determined by vae encoder.
|
181
|
+
vae_sample_size = pipe.vae.config.sample_size
|
182
|
+
if isinstance(vae_sample_size, int):
|
183
|
+
sample_size = vae_sample_size // scale_factor
|
184
|
+
else:
|
185
|
+
sample_size = (
|
186
|
+
vae_sample_size[0] // scale_factor,
|
187
|
+
vae_sample_size[1] // scale_factor,
|
188
|
+
)
|
189
|
+
elif hasattr(pipe, "movq"):
|
190
|
+
logger.warning(
|
191
|
+
"RBLN config 'img_height' and 'img_width' should have been provided for this pipeline. "
|
192
|
+
"Both variable will be set 512 by default."
|
187
193
|
)
|
194
|
+
sample_size = (512 // scale_factor, 512 // scale_factor)
|
188
195
|
else:
|
189
196
|
sample_size = pipe.unet.config.sample_size
|
190
197
|
else:
|
@@ -26,9 +26,13 @@ _import_structure = {
|
|
26
26
|
"RBLNStableDiffusionXLControlNetPipeline",
|
27
27
|
],
|
28
28
|
"kandinsky2_2": [
|
29
|
+
"RBLNKandinskyV22CombinedPipeline",
|
30
|
+
"RBLNKandinskyV22Img2ImgCombinedPipeline",
|
29
31
|
"RBLNKandinskyV22InpaintCombinedPipeline",
|
30
32
|
"RBLNKandinskyV22InpaintPipeline",
|
33
|
+
"RBLNKandinskyV22Img2ImgPipeline",
|
31
34
|
"RBLNKandinskyV22PriorPipeline",
|
35
|
+
"RBLNKandinskyV22Pipeline",
|
32
36
|
],
|
33
37
|
"stable_diffusion": [
|
34
38
|
"RBLNStableDiffusionImg2ImgPipeline",
|
@@ -55,8 +59,12 @@ if TYPE_CHECKING:
|
|
55
59
|
RBLNStableDiffusionXLControlNetPipeline,
|
56
60
|
)
|
57
61
|
from .kandinsky2_2 import (
|
62
|
+
RBLNKandinskyV22CombinedPipeline,
|
63
|
+
RBLNKandinskyV22Img2ImgCombinedPipeline,
|
64
|
+
RBLNKandinskyV22Img2ImgPipeline,
|
58
65
|
RBLNKandinskyV22InpaintCombinedPipeline,
|
59
66
|
RBLNKandinskyV22InpaintPipeline,
|
67
|
+
RBLNKandinskyV22Pipeline,
|
60
68
|
RBLNKandinskyV22PriorPipeline,
|
61
69
|
)
|
62
70
|
from .stable_diffusion import (
|
@@ -12,6 +12,12 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from .
|
15
|
+
from .pipeline_kandinsky2_2 import RBLNKandinskyV22Pipeline
|
16
|
+
from .pipeline_kandinsky2_2_combined import (
|
17
|
+
RBLNKandinskyV22CombinedPipeline,
|
18
|
+
RBLNKandinskyV22Img2ImgCombinedPipeline,
|
19
|
+
RBLNKandinskyV22InpaintCombinedPipeline,
|
20
|
+
)
|
21
|
+
from .pipeline_kandinsky2_2_img2img import RBLNKandinskyV22Img2ImgPipeline
|
16
22
|
from .pipeline_kandinsky2_2_inpaint import RBLNKandinskyV22InpaintPipeline
|
17
23
|
from .pipeline_kandinsky2_2_prior import RBLNKandinskyV22PriorPipeline
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Copyright 2024 Rebellions Inc.
|
2
|
+
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from diffusers import KandinskyV22Pipeline
|
16
|
+
|
17
|
+
from ...modeling_diffusers import RBLNDiffusionMixin
|
18
|
+
|
19
|
+
|
20
|
+
class RBLNKandinskyV22Pipeline(RBLNDiffusionMixin, KandinskyV22Pipeline):
|
21
|
+
original_class = KandinskyV22Pipeline
|
22
|
+
_submodules = ["unet", "movq"]
|
23
|
+
|
24
|
+
def get_compiled_image_size(self):
|
25
|
+
return self.movq.image_size
|
@@ -14,6 +14,8 @@
|
|
14
14
|
|
15
15
|
from diffusers import (
|
16
16
|
DDPMScheduler,
|
17
|
+
KandinskyV22CombinedPipeline,
|
18
|
+
KandinskyV22Img2ImgCombinedPipeline,
|
17
19
|
KandinskyV22InpaintCombinedPipeline,
|
18
20
|
PriorTransformer,
|
19
21
|
UnCLIPScheduler,
|
@@ -28,10 +30,114 @@ from transformers import (
|
|
28
30
|
)
|
29
31
|
|
30
32
|
from ...modeling_diffusers import RBLNDiffusionMixin
|
33
|
+
from .pipeline_kandinsky2_2 import RBLNKandinskyV22Pipeline
|
34
|
+
from .pipeline_kandinsky2_2_img2img import RBLNKandinskyV22Img2ImgPipeline
|
31
35
|
from .pipeline_kandinsky2_2_inpaint import RBLNKandinskyV22InpaintPipeline
|
32
36
|
from .pipeline_kandinsky2_2_prior import RBLNKandinskyV22PriorPipeline
|
33
37
|
|
34
38
|
|
39
|
+
class RBLNKandinskyV22CombinedPipeline(RBLNDiffusionMixin, KandinskyV22CombinedPipeline):
|
40
|
+
original_class = KandinskyV22CombinedPipeline
|
41
|
+
_connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22Pipeline}
|
42
|
+
_submodules = ["prior_pipe", "decoder_pipe"]
|
43
|
+
_prefix = {"prior_pipe": "prior_"}
|
44
|
+
|
45
|
+
def __init__(
|
46
|
+
self,
|
47
|
+
unet: "UNet2DConditionModel",
|
48
|
+
scheduler: "DDPMScheduler",
|
49
|
+
movq: "VQModel",
|
50
|
+
prior_prior: "PriorTransformer",
|
51
|
+
prior_image_encoder: "CLIPVisionModelWithProjection",
|
52
|
+
prior_text_encoder: "CLIPTextModelWithProjection",
|
53
|
+
prior_tokenizer: "CLIPTokenizer",
|
54
|
+
prior_scheduler: "UnCLIPScheduler",
|
55
|
+
prior_image_processor: "CLIPImageProcessor",
|
56
|
+
):
|
57
|
+
RBLNDiffusionMixin.__init__(self)
|
58
|
+
super(KandinskyV22CombinedPipeline, self).__init__()
|
59
|
+
|
60
|
+
self.register_modules(
|
61
|
+
unet=unet,
|
62
|
+
scheduler=scheduler,
|
63
|
+
movq=movq,
|
64
|
+
prior_prior=prior_prior,
|
65
|
+
prior_image_encoder=prior_image_encoder,
|
66
|
+
prior_text_encoder=prior_text_encoder,
|
67
|
+
prior_tokenizer=prior_tokenizer,
|
68
|
+
prior_scheduler=prior_scheduler,
|
69
|
+
prior_image_processor=prior_image_processor,
|
70
|
+
)
|
71
|
+
|
72
|
+
self.prior_pipe = RBLNKandinskyV22PriorPipeline(
|
73
|
+
prior=prior_prior,
|
74
|
+
image_encoder=prior_image_encoder,
|
75
|
+
text_encoder=prior_text_encoder,
|
76
|
+
tokenizer=prior_tokenizer,
|
77
|
+
scheduler=prior_scheduler,
|
78
|
+
image_processor=prior_image_processor,
|
79
|
+
)
|
80
|
+
self.decoder_pipe = RBLNKandinskyV22Pipeline(
|
81
|
+
unet=unet,
|
82
|
+
scheduler=scheduler,
|
83
|
+
movq=movq,
|
84
|
+
)
|
85
|
+
|
86
|
+
def get_compiled_image_size(self):
|
87
|
+
return self.movq.image_size
|
88
|
+
|
89
|
+
|
90
|
+
class RBLNKandinskyV22Img2ImgCombinedPipeline(RBLNDiffusionMixin, KandinskyV22Img2ImgCombinedPipeline):
|
91
|
+
original_class = KandinskyV22Img2ImgCombinedPipeline
|
92
|
+
_connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22Img2ImgPipeline}
|
93
|
+
_submodules = ["prior_pipe", "decoder_pipe"]
|
94
|
+
_prefix = {"prior_pipe": "prior_"}
|
95
|
+
|
96
|
+
def __init__(
|
97
|
+
self,
|
98
|
+
unet: "UNet2DConditionModel",
|
99
|
+
scheduler: "DDPMScheduler",
|
100
|
+
movq: "VQModel",
|
101
|
+
prior_prior: "PriorTransformer",
|
102
|
+
prior_image_encoder: "CLIPVisionModelWithProjection",
|
103
|
+
prior_text_encoder: "CLIPTextModelWithProjection",
|
104
|
+
prior_tokenizer: "CLIPTokenizer",
|
105
|
+
prior_scheduler: "UnCLIPScheduler",
|
106
|
+
prior_image_processor: "CLIPImageProcessor",
|
107
|
+
):
|
108
|
+
RBLNDiffusionMixin.__init__(self)
|
109
|
+
super(KandinskyV22Img2ImgCombinedPipeline, self).__init__()
|
110
|
+
|
111
|
+
self.register_modules(
|
112
|
+
unet=unet,
|
113
|
+
scheduler=scheduler,
|
114
|
+
movq=movq,
|
115
|
+
prior_prior=prior_prior,
|
116
|
+
prior_image_encoder=prior_image_encoder,
|
117
|
+
prior_text_encoder=prior_text_encoder,
|
118
|
+
prior_tokenizer=prior_tokenizer,
|
119
|
+
prior_scheduler=prior_scheduler,
|
120
|
+
prior_image_processor=prior_image_processor,
|
121
|
+
)
|
122
|
+
|
123
|
+
self.prior_pipe = RBLNKandinskyV22PriorPipeline(
|
124
|
+
prior=prior_prior,
|
125
|
+
image_encoder=prior_image_encoder,
|
126
|
+
text_encoder=prior_text_encoder,
|
127
|
+
tokenizer=prior_tokenizer,
|
128
|
+
scheduler=prior_scheduler,
|
129
|
+
image_processor=prior_image_processor,
|
130
|
+
)
|
131
|
+
self.decoder_pipe = RBLNKandinskyV22Img2ImgPipeline(
|
132
|
+
unet=unet,
|
133
|
+
scheduler=scheduler,
|
134
|
+
movq=movq,
|
135
|
+
)
|
136
|
+
|
137
|
+
def get_compiled_image_size(self):
|
138
|
+
return self.movq.image_size
|
139
|
+
|
140
|
+
|
35
141
|
class RBLNKandinskyV22InpaintCombinedPipeline(RBLNDiffusionMixin, KandinskyV22InpaintCombinedPipeline):
|
36
142
|
original_class = KandinskyV22InpaintCombinedPipeline
|
37
143
|
_connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22InpaintPipeline}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Copyright 2024 Rebellions Inc.
|
2
|
+
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from diffusers import KandinskyV22Img2ImgPipeline
|
16
|
+
|
17
|
+
from ...modeling_diffusers import RBLNDiffusionMixin
|
18
|
+
|
19
|
+
|
20
|
+
class RBLNKandinskyV22Img2ImgPipeline(RBLNDiffusionMixin, KandinskyV22Img2ImgPipeline):
|
21
|
+
original_class = KandinskyV22Img2ImgPipeline
|
22
|
+
_submodules = ["unet", "movq"]
|
23
|
+
|
24
|
+
def get_compiled_image_size(self):
|
25
|
+
return self.movq.image_size
|
@@ -20,3 +20,6 @@ from ...modeling_diffusers import RBLNDiffusionMixin
|
|
20
20
|
class RBLNKandinskyV22InpaintPipeline(RBLNDiffusionMixin, KandinskyV22InpaintPipeline):
|
21
21
|
original_class = KandinskyV22InpaintPipeline
|
22
22
|
_submodules = ["unet", "movq"]
|
23
|
+
|
24
|
+
def get_compiled_image_size(self):
|
25
|
+
return self.movq.image_size
|
optimum/rbln/modeling_base.py
CHANGED
@@ -282,15 +282,6 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
282
282
|
**kwargs,
|
283
283
|
)
|
284
284
|
|
285
|
-
@classmethod
|
286
|
-
def _check_compiled_models(
|
287
|
-
cls, compiled_models: Dict[str, rebel.RBLNCompiledModel], rbln_config: RBLNConfig, config: "PretrainedConfig"
|
288
|
-
):
|
289
|
-
# check compiled model can create runtimes.
|
290
|
-
# this logic currently only works in LLM
|
291
|
-
# fail when LLM model using Paged Attention can't guarantee max sequence length
|
292
|
-
pass
|
293
|
-
|
294
285
|
@classmethod
|
295
286
|
def _from_compiled_models(
|
296
287
|
cls,
|
@@ -305,8 +296,6 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
305
296
|
if isinstance(model_save_dir, str):
|
306
297
|
model_save_dir = Path(model_save_dir)
|
307
298
|
|
308
|
-
cls._check_compiled_models(compiled_models=rbln_compiled_models, rbln_config=rbln_config, config=config)
|
309
|
-
|
310
299
|
# FIXME:: Should we convert it?
|
311
300
|
compiled_model_names = [cfg.compiled_model_name for cfg in rbln_config.compile_cfgs]
|
312
301
|
rbln_compiled_models = [rbln_compiled_models[cm_name] for cm_name in compiled_model_names]
|
@@ -98,9 +98,9 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
|
|
98
98
|
"""
|
99
99
|
|
100
100
|
NO_BLOCKS_ERROR = (
|
101
|
-
"No memory blocks are available for allocation."
|
102
|
-
"The generate() API cannot complete this inference task because Paged Attention is not fully supported by optimum-rbln."
|
103
|
-
"This is supported by vllm-rbln (see: https://docs.rbln.ai/software/model_serving/vllm_support/vllm-rbln.html)."
|
101
|
+
"No memory blocks are available for allocation. "
|
102
|
+
"The generate() API cannot complete this inference task because Paged Attention is not fully supported by optimum-rbln. "
|
103
|
+
"This is supported by vllm-rbln (see: https://docs.rbln.ai/software/model_serving/vllm_support/vllm-rbln.html). "
|
104
104
|
"Using vllm-rbln should fix this issue and enhance inference performance."
|
105
105
|
)
|
106
106
|
|
@@ -575,59 +575,58 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
575
575
|
nbits_per_param: int,
|
576
576
|
n_model_params: int,
|
577
577
|
) -> int:
|
578
|
-
num_attention_heads = getattr(config, "n_head", None) or getattr(config, "num_attention_heads")
|
579
|
-
num_layers = getattr(config, "n_layer", None) or getattr(config, "num_hidden_layers")
|
580
|
-
head_dim = getattr(config, "head_dim", None) or config.hidden_size // num_attention_heads
|
581
|
-
vocab_size = config.vocab_size
|
582
|
-
hidden_size = getattr(config, "n_embd", None) or getattr(config, "hidden_size")
|
583
|
-
num_key_value_heads = getattr(config, "num_key_value_heads", None) or num_attention_heads
|
584
|
-
|
585
|
-
TARGET_DRAM_LIMIT = int(tensor_parallel_size * 15.7 * 2**30) # 16GB # TODO(jongho): 더 정확한 값
|
586
|
-
|
587
578
|
def align(x: int, nbytes: int) -> int:
|
588
579
|
return int(math.ceil(x / nbytes) * nbytes)
|
589
580
|
|
590
581
|
def align_2MB(x: int) -> int:
|
591
582
|
return align(x, 2 * 1024 * 1024)
|
592
583
|
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
584
|
+
num_attention_heads = getattr(config, "n_head", None) or getattr(config, "num_attention_heads")
|
585
|
+
num_layers = getattr(config, "n_layer", None) or getattr(config, "num_hidden_layers")
|
586
|
+
head_dim = getattr(config, "head_dim", None) or config.hidden_size // num_attention_heads
|
587
|
+
vocab_size = config.vocab_size
|
588
|
+
hidden_size = getattr(config, "n_embd", None) or getattr(config, "hidden_size")
|
589
|
+
num_key_value_heads = getattr(config, "num_key_value_heads", None) or num_attention_heads
|
599
590
|
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
* tensor_parallel_size
|
605
|
-
)
|
591
|
+
# TODO(jongho): Update if target npu is REBEL.
|
592
|
+
ATOM_DRAM_NBYTES = 16 * 2**30
|
593
|
+
ATOM_SYS_DRAM_NBYTES = 288 * 2**20
|
594
|
+
available_dram = tensor_parallel_size * (ATOM_DRAM_NBYTES - ATOM_SYS_DRAM_NBYTES)
|
606
595
|
|
607
|
-
|
596
|
+
# Get estimated kernel size (approximated)
|
597
|
+
lm_heads_params = align(vocab_size, 64) * hidden_size
|
598
|
+
lm_heads_nbytes = (
|
599
|
+
align_2MB(lm_heads_params * nbits_per_param // 8 / tensor_parallel_size) * tensor_parallel_size
|
600
|
+
)
|
601
|
+
params = n_model_params - lm_heads_params
|
602
|
+
layer_nbytes = (
|
603
|
+
align_2MB(params * nbits_per_param // 8 / num_layers / tensor_parallel_size)
|
604
|
+
* num_layers
|
605
|
+
* tensor_parallel_size
|
606
|
+
)
|
607
|
+
kernel_size = layer_nbytes + lm_heads_nbytes
|
608
608
|
|
609
|
-
available_dram
|
609
|
+
available_dram -= kernel_size
|
610
610
|
|
611
|
-
|
612
|
-
|
611
|
+
# TODO: Accurate buffer estimation
|
612
|
+
buffer = 2**30 # 1GB Buffer
|
613
|
+
if tensor_parallel_size <= 4:
|
613
614
|
buffer /= 4
|
614
615
|
|
615
616
|
available_dram -= buffer
|
616
617
|
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
)
|
625
|
-
* num_layers
|
626
|
-
* 2 # (k, v)
|
627
|
-
* tensor_parallel_size
|
618
|
+
# Estimate nbytes per a single kvcache block
|
619
|
+
nbytes_per_block = (
|
620
|
+
align_2MB(
|
621
|
+
kvcache_block_size
|
622
|
+
* head_dim
|
623
|
+
* math.ceil(num_key_value_heads / tensor_parallel_size) # Shard
|
624
|
+
* 2 # (fp16)
|
628
625
|
)
|
629
|
-
|
630
|
-
|
626
|
+
* num_layers
|
627
|
+
* 2 # (k, v)
|
628
|
+
* tensor_parallel_size
|
629
|
+
)
|
631
630
|
n_blocks = available_dram // nbytes_per_block
|
632
631
|
|
633
632
|
return n_blocks, nbytes_per_block
|
@@ -685,27 +684,28 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
685
684
|
else:
|
686
685
|
rbln_kvcache_block_size = rbln_kvcache_partition_len
|
687
686
|
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
687
|
+
rbln_kvcache_num_blocks = (rbln_max_seq_len // rbln_kvcache_block_size) * rbln_batch_size
|
688
|
+
if rbln_attn_impl == "flash_attn":
|
689
|
+
max_num_blocks, _ = cls.get_maximum_num_blocks(
|
690
|
+
config=model_config,
|
691
|
+
tensor_parallel_size=rbln_kwargs.get("tensor_parallel_size", 1),
|
692
|
+
kvcache_block_size=rbln_kvcache_block_size,
|
693
|
+
nbits_per_param=16 if rbln_quantization is None else 4, # TODO(jongho): FIX Ad-hoc
|
694
|
+
n_model_params=rbln_kwargs["n_model_params"],
|
695
|
+
)
|
696
|
+
rbln_kvcache_num_blocks = min(rbln_kvcache_num_blocks, max_num_blocks)
|
697
697
|
|
698
|
-
|
699
|
-
|
700
|
-
|
698
|
+
required_blocks = rbln_max_seq_len // rbln_kvcache_block_size + 1
|
699
|
+
if rbln_kvcache_num_blocks < required_blocks:
|
700
|
+
rbln_kvcache_num_blocks = required_blocks
|
701
701
|
|
702
|
-
|
702
|
+
logger.info(f"[KVCache] Compiling with num_blocks: {rbln_kvcache_num_blocks}")
|
703
703
|
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
704
|
+
if rbln_kvcache_num_blocks < rbln_batch_size:
|
705
|
+
raise RuntimeError(
|
706
|
+
f"Batch size ({rbln_batch_size}) exceeds available KV cache blocks ({rbln_kvcache_num_blocks}). "
|
707
|
+
"Ensure the number of blocks is at least equal to the batch size."
|
708
|
+
)
|
709
709
|
|
710
710
|
num_attention_heads = getattr(model_config, "n_head", None) or getattr(model_config, "num_attention_heads")
|
711
711
|
num_key_value_heads = getattr(model_config, "num_key_value_heads", None) or num_attention_heads
|
@@ -805,9 +805,6 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
805
805
|
"kvcache_block_size": rbln_kvcache_block_size,
|
806
806
|
"attn_impl": rbln_attn_impl,
|
807
807
|
"kvcache_num_blocks": rbln_kvcache_num_blocks,
|
808
|
-
"model_num_blocks": model_num_blocks,
|
809
|
-
"max_num_blocks": max_num_blocks,
|
810
|
-
"nbytes_per_block": nbytes_per_block,
|
811
808
|
}
|
812
809
|
)
|
813
810
|
|
@@ -92,7 +92,7 @@ class PhiLayer(DecoderOnlyLayer):
|
|
92
92
|
|
93
93
|
hidden_states = self.get_pre_attention_layernorm()(hidden_states)
|
94
94
|
|
95
|
-
|
95
|
+
attn_output = self.self_attn(
|
96
96
|
hidden_states=hidden_states,
|
97
97
|
attention_mask=attention_mask,
|
98
98
|
seq_positions=seq_positions,
|
@@ -104,9 +104,9 @@ class PhiLayer(DecoderOnlyLayer):
|
|
104
104
|
|
105
105
|
feed_forward_hidden_states = self._original_mod.mlp(hidden_states)
|
106
106
|
|
107
|
-
hidden_states =
|
107
|
+
hidden_states = attn_output + feed_forward_hidden_states + residual
|
108
108
|
|
109
|
-
return hidden_states
|
109
|
+
return hidden_states
|
110
110
|
|
111
111
|
|
112
112
|
class PhiModel(DecoderOnlyModel):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: optimum-rbln
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.3a6
|
4
4
|
Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
|
5
5
|
Project-URL: Homepage, https://rebellions.ai
|
6
6
|
Project-URL: Documentation, https://docs.rbln.ai
|
@@ -1,31 +1,33 @@
|
|
1
|
-
optimum/rbln/__init__.py,sha256=
|
2
|
-
optimum/rbln/__version__.py,sha256=
|
1
|
+
optimum/rbln/__init__.py,sha256=ZDzXcl-oAcYJhKjJMpotjbTih9awo7HzUb6T3MUEP6Q,6894
|
2
|
+
optimum/rbln/__version__.py,sha256=9voT1MrnPHKvqTeiZK8bNEZcPseZOq7N_U5etptnmTE,519
|
3
3
|
optimum/rbln/modeling.py,sha256=nJsAs5zs--VVOYGFjYNpqfxYIemJIK4Lr0WEzlDLdP0,8390
|
4
|
-
optimum/rbln/modeling_base.py,sha256=
|
4
|
+
optimum/rbln/modeling_base.py,sha256=dNCL-BhrWCpuOVkZaj8-MW567Tf4lLo3p3Z3ldjWJfU,21779
|
5
5
|
optimum/rbln/modeling_config.py,sha256=7104bxmrvKW4Q6XTruQayiIGl8GHDFmPkJ3cknMIInE,11335
|
6
|
-
optimum/rbln/diffusers/__init__.py,sha256=
|
6
|
+
optimum/rbln/diffusers/__init__.py,sha256=Hq87CbtiCy85YmK2SB-OmUyfv77oe3j4bsTenTRnu6w,3623
|
7
7
|
optimum/rbln/diffusers/modeling_diffusers.py,sha256=zqVNgH9oeOx2iNE7VsW_FinVf4s6G5Idyh4TKz7XJJg,21116
|
8
8
|
optimum/rbln/diffusers/models/__init__.py,sha256=mkCvJyH1KcwrsUvYSq_bVC79oOfyqtBSFDyPS1_48wA,1478
|
9
9
|
optimum/rbln/diffusers/models/controlnet.py,sha256=EM_HlzCdaZdnnK0oGpY2fQeigPqHhlwh4NHCzlmoumI,10512
|
10
10
|
optimum/rbln/diffusers/models/autoencoders/__init__.py,sha256=dg17ZTUsiqTcbIaEE4fqew9uRbao0diQ21PXvRKIqKg,679
|
11
11
|
optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py,sha256=rCbC32bJnfXtsLdVvNVVHpRAkCYy6jeCSwIZ-JSReWk,9220
|
12
12
|
optimum/rbln/diffusers/models/autoencoders/vae.py,sha256=gB9HR7Bf7wpIXLv-Js4Pc3oyWRlqEe4cms4sI2AJicY,4380
|
13
|
-
optimum/rbln/diffusers/models/autoencoders/vq_model.py,sha256=
|
13
|
+
optimum/rbln/diffusers/models/autoencoders/vq_model.py,sha256=b36QqPbayjApKivceQVVyQxHyR1ZOZ1ffuGgdALEPTQ,6117
|
14
14
|
optimum/rbln/diffusers/models/transformers/__init__.py,sha256=V8rSR7WzHs-i8Cwb_MNxhY2NFbwPgxu24vGtkwl-6tk,706
|
15
15
|
optimum/rbln/diffusers/models/transformers/prior_transformer.py,sha256=VG9cQo-_eppDvQSW1q1euAGBt1socUHetN_fIN2u1iU,6169
|
16
16
|
optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=n_krmMgiRxWrG--567PNpk58EG_X7x7H4gidIkRvwjo,7308
|
17
17
|
optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
|
18
|
-
optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=
|
19
|
-
optimum/rbln/diffusers/pipelines/__init__.py,sha256=
|
18
|
+
optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=QIjVWQQf8KBn5rU7lvipdm3gNBxZl7l6HCAj7p5FjLU,15977
|
19
|
+
optimum/rbln/diffusers/pipelines/__init__.py,sha256=5KLZ5LrpMzBya2e_3_PvEoPwG24U8JMexfw_ygZREKc,3140
|
20
20
|
optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
|
21
21
|
optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=JWKtnZYBIfgmbAo0SLFIvHBQCv2BPSFNvpcdjG4GUOY,4113
|
22
22
|
optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py,sha256=dGdw5cwJLS4CLv6IHskk5ZCcPgS7UDuHKbfOZ8ojNUs,35187
|
23
23
|
optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py,sha256=7xCiXrH4ToCTHohVGFXqO7_f9G8HShYaHgZxoMZARkQ,33664
|
24
24
|
optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py,sha256=Gzt2wg4dgFg0TV3Bu0cs8Xru3wVrxWUxxgciwZ-QKLE,44755
|
25
25
|
optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py,sha256=RfwxNX_zQWFtvvFQJ5bt3qtHbdYdQV_3XLHm9WYCKOs,46084
|
26
|
-
optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py,sha256=
|
27
|
-
optimum/rbln/diffusers/pipelines/kandinsky2_2/
|
28
|
-
optimum/rbln/diffusers/pipelines/kandinsky2_2/
|
26
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py,sha256=I4YQq2HfA3xONbWsdJ870IEJPyLWeCDDG-UCJsu9YO8,1035
|
27
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py,sha256=aNFGOjth8tDvPrjYLbRWrkHr6p-8AFgcQx1Qay1fw70,904
|
28
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py,sha256=unqFDviA7dnx0yuo8L8tXVj2mjFYCPm7C9dcpdWBICc,6882
|
29
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py,sha256=fEs-WgJqWs5zvuCkKb7MuZokH9Mi6q-0DOEKxzfWxzo,932
|
30
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py,sha256=Ad2ZYCXaMiYpB0mz-8X1CGhILxrVbt7rRIXt6IPwYBM,932
|
29
31
|
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py,sha256=Mf7tzrXetwCgt7LuXfkX-CX1hltLgNZdwF9bHxAbDJM,874
|
30
32
|
optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py,sha256=gz6CbP4T6w8XH3PIGRIJXTmKFsChJIkwcAEAsiR5Ydg,830
|
31
33
|
optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py,sha256=DgRLzO9HxtgE1jICmHoHaqeVXM4Ih-5uo2JqNMAPMcc,876
|
@@ -60,7 +62,7 @@ optimum/rbln/transformers/models/clip/__init__.py,sha256=H9vuBwrmFO0-CqZhXUrKF-u
|
|
60
62
|
optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=NiSm7bHs4SReHDUr53BBWSX0Y8bkKOeUSpsBDrp8YDw,6628
|
61
63
|
optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=pDogsdpJKKB5rqnVFrRjwfhUvOSV-jZ3oARMsqSvOOQ,665
|
62
64
|
optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=7OIKteJLKNxOLOg0w3lLOM7TxZovQn4jkglI9wRkrtQ,40609
|
63
|
-
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=
|
65
|
+
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=uGdPGcFrWm2gAwFLjfBiALwFsl49VGCReVi4NUfOPxM,38898
|
64
66
|
optimum/rbln/transformers/models/dpt/__init__.py,sha256=gP1tkR3XMNlHq1GT87ugIVvb2o_1eAUg1JaniXjy1Lw,651
|
65
67
|
optimum/rbln/transformers/models/dpt/modeling_dpt.py,sha256=ZsS2SOiqcA4azULB-WFEMQZbgIoOyVUKqVKqrw_tWzA,3430
|
66
68
|
optimum/rbln/transformers/models/exaone/__init__.py,sha256=zYH_5tVa8-juEdsOIky7I33WSC3Zuhoq1upI0OHYeVw,859
|
@@ -85,7 +87,7 @@ optimum/rbln/transformers/models/mistral/mistral_architecture.py,sha256=_aU8TE_t
|
|
85
87
|
optimum/rbln/transformers/models/mistral/modeling_mistral.py,sha256=7nrddoBIHf8S12LZWBUpotnvG3gND11vMQda9yYXJ-s,1560
|
86
88
|
optimum/rbln/transformers/models/phi/__init__.py,sha256=mZLt1M7BbYEvSon5UlkniMUPa15SfjZFdw0kMSAF3VA,644
|
87
89
|
optimum/rbln/transformers/models/phi/modeling_phi.py,sha256=j-6Pqd5rR2JE8I1pnKFlCi4nW5Dv3wZjoPWxohissoo,1516
|
88
|
-
optimum/rbln/transformers/models/phi/phi_architecture.py,sha256=
|
90
|
+
optimum/rbln/transformers/models/phi/phi_architecture.py,sha256=TueyqmjPXWmOPOxBm4dIFyd0X3iV1jgw0U6c26iCAPk,4090
|
89
91
|
optimum/rbln/transformers/models/qwen2/__init__.py,sha256=RAMWc21W_2I6DH9xBjeNxPECmAcTrbKhSIefq3Lass0,648
|
90
92
|
optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=9-aFDvjMzPNUyGOz0qo33RE18bUFGYZ3Wt_68zb5uJY,1530
|
91
93
|
optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
|
@@ -114,7 +116,7 @@ optimum/rbln/utils/model_utils.py,sha256=DfD_Z2qvZHqcddXqnzTM1AN8khanj3-DXK2lJvV
|
|
114
116
|
optimum/rbln/utils/runtime_utils.py,sha256=5-DYniyP59nx-mrrbi7AqA77L85b4Cm5oLpaxidSyss,3699
|
115
117
|
optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
|
116
118
|
optimum/rbln/utils/submodule.py,sha256=oZoGrItB8WqY4i-K9WJPlLlcLohc1YGB9OHB8_XZw3A,4071
|
117
|
-
optimum_rbln-0.7.
|
118
|
-
optimum_rbln-0.7.
|
119
|
-
optimum_rbln-0.7.
|
120
|
-
optimum_rbln-0.7.
|
119
|
+
optimum_rbln-0.7.3a6.dist-info/METADATA,sha256=TGw8TCIfBQ9RWlzxf5JI16Zoy-xoEodnBO8m6SKXBsk,5300
|
120
|
+
optimum_rbln-0.7.3a6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
121
|
+
optimum_rbln-0.7.3a6.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
122
|
+
optimum_rbln-0.7.3a6.dist-info/RECORD,,
|
File without changes
|
File without changes
|