optimum-rbln 0.7.3a5__py3-none-any.whl → 0.7.3.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +8 -0
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/diffusers/__init__.py +8 -0
- optimum/rbln/diffusers/modeling_diffusers.py +99 -111
- optimum/rbln/diffusers/models/autoencoders/vq_model.py +11 -3
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +15 -8
- optimum/rbln/diffusers/pipelines/__init__.py +8 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +7 -1
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +25 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +107 -1
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +25 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +3 -0
- optimum/rbln/modeling_base.py +0 -11
- optimum/rbln/transformers/models/bart/modeling_bart.py +2 -0
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +12 -0
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +40 -41
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +19 -13
- optimum/rbln/transformers/models/t5/modeling_t5.py +3 -210
- optimum/rbln/utils/import_utils.py +7 -0
- {optimum_rbln-0.7.3a5.dist-info → optimum_rbln-0.7.3.post1.dist-info}/METADATA +1 -1
- {optimum_rbln-0.7.3a5.dist-info → optimum_rbln-0.7.3.post1.dist-info}/RECORD +23 -21
- {optimum_rbln-0.7.3a5.dist-info → optimum_rbln-0.7.3.post1.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.7.3a5.dist-info → optimum_rbln-0.7.3.post1.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/__init__.py
CHANGED
@@ -78,9 +78,13 @@ _import_structure = {
|
|
78
78
|
"RBLNAutoencoderKL",
|
79
79
|
"RBLNControlNetModel",
|
80
80
|
"RBLNPriorTransformer",
|
81
|
+
"RBLNKandinskyV22CombinedPipeline",
|
82
|
+
"RBLNKandinskyV22Img2ImgCombinedPipeline",
|
81
83
|
"RBLNKandinskyV22InpaintCombinedPipeline",
|
82
84
|
"RBLNKandinskyV22InpaintPipeline",
|
85
|
+
"RBLNKandinskyV22Img2ImgPipeline",
|
83
86
|
"RBLNKandinskyV22PriorPipeline",
|
87
|
+
"RBLNKandinskyV22Pipeline",
|
84
88
|
"RBLNStableDiffusionPipeline",
|
85
89
|
"RBLNStableDiffusionXLPipeline",
|
86
90
|
"RBLNUNet2DConditionModel",
|
@@ -107,8 +111,12 @@ if TYPE_CHECKING:
|
|
107
111
|
RBLNAutoencoderKL,
|
108
112
|
RBLNControlNetModel,
|
109
113
|
RBLNDiffusionMixin,
|
114
|
+
RBLNKandinskyV22CombinedPipeline,
|
115
|
+
RBLNKandinskyV22Img2ImgCombinedPipeline,
|
116
|
+
RBLNKandinskyV22Img2ImgPipeline,
|
110
117
|
RBLNKandinskyV22InpaintCombinedPipeline,
|
111
118
|
RBLNKandinskyV22InpaintPipeline,
|
119
|
+
RBLNKandinskyV22Pipeline,
|
112
120
|
RBLNKandinskyV22PriorPipeline,
|
113
121
|
RBLNMultiControlNetModel,
|
114
122
|
RBLNPriorTransformer,
|
optimum/rbln/__version__.py
CHANGED
@@ -17,5 +17,5 @@ __version__: str
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
18
18
|
version_tuple: VERSION_TUPLE
|
19
19
|
|
20
|
-
__version__ = version = '0.7.
|
21
|
-
__version_tuple__ = version_tuple = (0, 7, 3
|
20
|
+
__version__ = version = '0.7.3.post1'
|
21
|
+
__version_tuple__ = version_tuple = (0, 7, 3)
|
@@ -24,9 +24,13 @@ ALL_IMPORTABLE_CLASSES.update(LOADABLE_CLASSES["optimum.rbln"])
|
|
24
24
|
|
25
25
|
_import_structure = {
|
26
26
|
"pipelines": [
|
27
|
+
"RBLNKandinskyV22CombinedPipeline",
|
28
|
+
"RBLNKandinskyV22Img2ImgCombinedPipeline",
|
27
29
|
"RBLNKandinskyV22InpaintCombinedPipeline",
|
28
30
|
"RBLNKandinskyV22InpaintPipeline",
|
31
|
+
"RBLNKandinskyV22Img2ImgPipeline",
|
29
32
|
"RBLNKandinskyV22PriorPipeline",
|
33
|
+
"RBLNKandinskyV22Pipeline",
|
30
34
|
"RBLNStableDiffusionPipeline",
|
31
35
|
"RBLNStableDiffusionXLPipeline",
|
32
36
|
"RBLNStableDiffusionImg2ImgPipeline",
|
@@ -66,8 +70,12 @@ if TYPE_CHECKING:
|
|
66
70
|
RBLNVQModel,
|
67
71
|
)
|
68
72
|
from .pipelines import (
|
73
|
+
RBLNKandinskyV22CombinedPipeline,
|
74
|
+
RBLNKandinskyV22Img2ImgCombinedPipeline,
|
75
|
+
RBLNKandinskyV22Img2ImgPipeline,
|
69
76
|
RBLNKandinskyV22InpaintCombinedPipeline,
|
70
77
|
RBLNKandinskyV22InpaintPipeline,
|
78
|
+
RBLNKandinskyV22Pipeline,
|
71
79
|
RBLNKandinskyV22PriorPipeline,
|
72
80
|
RBLNMultiControlNetModel,
|
73
81
|
RBLNStableDiffusion3Img2ImgPipeline,
|
@@ -23,7 +23,6 @@ from ..modeling import RBLNModel
|
|
23
23
|
from ..modeling_config import RUNTIME_KEYWORDS, ContextRblnConfig, use_rbln_config
|
24
24
|
from ..utils.decorator_utils import remove_compile_time_kwargs
|
25
25
|
from ..utils.logging import get_logger
|
26
|
-
from . import pipelines
|
27
26
|
|
28
27
|
|
29
28
|
logger = get_logger(__name__)
|
@@ -67,6 +66,7 @@ class RBLNDiffusionMixin:
|
|
67
66
|
as keys in rbln_config
|
68
67
|
"""
|
69
68
|
|
69
|
+
_connected_classes = {}
|
70
70
|
_submodules = []
|
71
71
|
_prefix = {}
|
72
72
|
|
@@ -103,37 +103,6 @@ class RBLNDiffusionMixin:
|
|
103
103
|
}
|
104
104
|
)
|
105
105
|
submodule_config = submodule_cls.update_rbln_config_using_pipe(model, submodule_config)
|
106
|
-
elif hasattr(pipelines, submodule_class_name):
|
107
|
-
submodule_config = rbln_config.get(submodule_name, {})
|
108
|
-
submodule_config = copy.deepcopy(submodule_config)
|
109
|
-
|
110
|
-
submodule_cls: RBLNModel = getattr(importlib.import_module("optimum.rbln"), f"{submodule_class_name}")
|
111
|
-
prefix = cls._prefix.get(submodule_name, "")
|
112
|
-
connected_submodules = cls._connected_classes.get(submodule_name)._submodules
|
113
|
-
pipe_global_config = {k: v for k, v in submodule_config.items() if k not in connected_submodules}
|
114
|
-
submodule_config = {k: v for k, v in submodule_config.items() if k in connected_submodules}
|
115
|
-
for key in submodule_config.keys():
|
116
|
-
submodule_config[key].update(pipe_global_config)
|
117
|
-
|
118
|
-
for connected_submodule_name in connected_submodules:
|
119
|
-
connected_submodule_config = rbln_config.pop(prefix + connected_submodule_name, {})
|
120
|
-
if connected_submodule_name in submodule_config:
|
121
|
-
submodule_config[connected_submodule_name].update(connected_submodule_config)
|
122
|
-
else:
|
123
|
-
submodule_config[connected_submodule_name] = connected_submodule_config
|
124
|
-
|
125
|
-
pipe_global_config = {
|
126
|
-
k: v for k, v in rbln_config.items() if k != submodule_class_name and not isinstance(v, dict)
|
127
|
-
}
|
128
|
-
|
129
|
-
for connected_submodule_name in connected_submodules:
|
130
|
-
for k, v in pipe_global_config.items():
|
131
|
-
if "guidance_scale" in k:
|
132
|
-
if prefix + "guidance_scale" == k:
|
133
|
-
submodule_config[connected_submodule_name]["guidance_scale"] = v
|
134
|
-
else:
|
135
|
-
submodule_config[connected_submodule_name][k] = v
|
136
|
-
rbln_config[submodule_name] = submodule_config
|
137
106
|
else:
|
138
107
|
raise ValueError(f"submodule {submodule_name} isn't supported")
|
139
108
|
return submodule_config
|
@@ -199,25 +168,8 @@ class RBLNDiffusionMixin:
|
|
199
168
|
else:
|
200
169
|
# raise error if any of submodules are torch module.
|
201
170
|
model_index_config = cls.load_config(pretrained_model_name_or_path=model_id)
|
202
|
-
|
203
|
-
|
204
|
-
for submodule in cls._submodules:
|
205
|
-
submodule_config = rbln_config.pop(submodule, {})
|
206
|
-
prefix = cls._prefix.get(submodule, "")
|
207
|
-
connected_submodules = cls._connected_classes.get(submodule)._submodules
|
208
|
-
for connected_submodule_name in connected_submodules:
|
209
|
-
connected_submodule_config = submodule_config.pop(connected_submodule_name, {})
|
210
|
-
if connected_submodule_config:
|
211
|
-
rbln_config[prefix + connected_submodule_name] = connected_submodule_config
|
212
|
-
submodules.append(prefix + connected_submodule_name)
|
213
|
-
pipe_global_config = {k: v for k, v in rbln_config.items() if k not in submodules}
|
214
|
-
for submodule in submodules:
|
215
|
-
if submodule in rbln_config:
|
216
|
-
rbln_config[submodule].update(pipe_global_config)
|
217
|
-
else:
|
218
|
-
submodules = cls._submodules
|
219
|
-
|
220
|
-
for submodule_name in submodules:
|
171
|
+
rbln_config = cls._flatten_rbln_config(rbln_config)
|
172
|
+
for submodule_name in cls._submodules:
|
221
173
|
if isinstance(kwargs.get(submodule_name), torch.nn.Module):
|
222
174
|
raise AssertionError(
|
223
175
|
f"{submodule_name} is not compiled torch module. If you want to compile, set `export=True`."
|
@@ -266,9 +218,89 @@ class RBLNDiffusionMixin:
|
|
266
218
|
lora_scales=lora_scales,
|
267
219
|
)
|
268
220
|
|
269
|
-
|
221
|
+
if cls._load_connected_pipes:
|
222
|
+
compiled_submodules = cls._compile_pipelines(model, passed_submodules, model_save_dir, rbln_config)
|
223
|
+
else:
|
224
|
+
compiled_submodules = cls._compile_submodules(model, passed_submodules, model_save_dir, rbln_config)
|
270
225
|
return cls._construct_pipe(model, compiled_submodules, model_save_dir, rbln_config)
|
271
226
|
|
227
|
+
@classmethod
|
228
|
+
def _prepare_rbln_config(
|
229
|
+
cls,
|
230
|
+
rbln_config,
|
231
|
+
) -> Dict[str, Any]:
|
232
|
+
prepared_config = {}
|
233
|
+
for connected_pipe_name, connected_pipe_cls in cls._connected_classes.items():
|
234
|
+
connected_pipe_config = rbln_config.pop(connected_pipe_name, {})
|
235
|
+
prefix = cls._prefix.get(connected_pipe_name, "")
|
236
|
+
guidance_scale = rbln_config.pop(f"{prefix}guidance_scale", None)
|
237
|
+
if "guidance_scale" not in connected_pipe_config and guidance_scale is not None:
|
238
|
+
connected_pipe_config["guidance_scale"] = guidance_scale
|
239
|
+
for submodule_name in connected_pipe_cls._submodules:
|
240
|
+
submodule_config = rbln_config.pop(prefix + submodule_name, {})
|
241
|
+
if submodule_name not in connected_pipe_config:
|
242
|
+
connected_pipe_config[submodule_name] = {}
|
243
|
+
connected_pipe_config[submodule_name].update(
|
244
|
+
{k: v for k, v in submodule_config.items() if k not in connected_pipe_config[submodule_name]}
|
245
|
+
)
|
246
|
+
prepared_config[connected_pipe_name] = connected_pipe_config
|
247
|
+
prepared_config.update(rbln_config)
|
248
|
+
return prepared_config
|
249
|
+
|
250
|
+
@classmethod
|
251
|
+
def _flatten_rbln_config(
|
252
|
+
cls,
|
253
|
+
rbln_config,
|
254
|
+
) -> Dict[str, Any]:
|
255
|
+
prepared_config = cls._prepare_rbln_config(rbln_config)
|
256
|
+
flattened_config = {}
|
257
|
+
pipe_global_config = {k: v for k, v in prepared_config.items() if k not in cls._connected_classes.keys()}
|
258
|
+
for connected_pipe_name, connected_pipe_cls in cls._connected_classes.items():
|
259
|
+
connected_pipe_config = prepared_config.pop(connected_pipe_name)
|
260
|
+
prefix = cls._prefix.get(connected_pipe_name, "")
|
261
|
+
connected_pipe_global_config = {
|
262
|
+
k: v for k, v in connected_pipe_config.items() if k not in connected_pipe_cls._submodules
|
263
|
+
}
|
264
|
+
for submodule_name in connected_pipe_cls._submodules:
|
265
|
+
flattened_config[prefix + submodule_name] = connected_pipe_config[submodule_name]
|
266
|
+
flattened_config[prefix + submodule_name].update(
|
267
|
+
{
|
268
|
+
k: v
|
269
|
+
for k, v in connected_pipe_global_config.items()
|
270
|
+
if k not in flattened_config[prefix + submodule_name]
|
271
|
+
}
|
272
|
+
)
|
273
|
+
flattened_config.update(pipe_global_config)
|
274
|
+
return flattened_config
|
275
|
+
|
276
|
+
@classmethod
|
277
|
+
def _compile_pipelines(
|
278
|
+
cls,
|
279
|
+
model: torch.nn.Module,
|
280
|
+
passed_submodules: Dict[str, RBLNModel],
|
281
|
+
model_save_dir: Optional[PathLike],
|
282
|
+
rbln_config: Dict[str, Any],
|
283
|
+
) -> Dict[str, RBLNModel]:
|
284
|
+
compiled_submodules = {}
|
285
|
+
|
286
|
+
rbln_config = cls._prepare_rbln_config(rbln_config)
|
287
|
+
pipe_global_config = {k: v for k, v in rbln_config.items() if k not in cls._connected_classes.keys()}
|
288
|
+
for connected_pipe_name, connected_pipe_cls in cls._connected_classes.items():
|
289
|
+
connected_pipe_submodules = {}
|
290
|
+
prefix = cls._prefix.get(connected_pipe_name, "")
|
291
|
+
for submodule_name in connected_pipe_cls._submodules:
|
292
|
+
connected_pipe_submodules[submodule_name] = passed_submodules.get(prefix + submodule_name, None)
|
293
|
+
connected_pipe = getattr(model, connected_pipe_name)
|
294
|
+
connected_pipe_config = {}
|
295
|
+
connected_pipe_config.update(pipe_global_config)
|
296
|
+
connected_pipe_config.update(rbln_config[connected_pipe_name])
|
297
|
+
connected_pipe_compiled_submodules = connected_pipe_cls._compile_submodules(
|
298
|
+
connected_pipe, connected_pipe_submodules, model_save_dir, connected_pipe_config, prefix
|
299
|
+
)
|
300
|
+
for submodule_name, compiled_submodule in connected_pipe_compiled_submodules.items():
|
301
|
+
compiled_submodules[prefix + submodule_name] = compiled_submodule
|
302
|
+
return compiled_submodules
|
303
|
+
|
272
304
|
@classmethod
|
273
305
|
def _compile_submodules(
|
274
306
|
cls,
|
@@ -307,41 +339,6 @@ class RBLNDiffusionMixin:
|
|
307
339
|
model_save_dir=model_save_dir,
|
308
340
|
rbln_config=submodule_rbln_config,
|
309
341
|
)
|
310
|
-
elif hasattr(pipelines, submodule.__class__.__name__):
|
311
|
-
connected_pipe = submodule
|
312
|
-
connected_pipe_model_save_dir = model_save_dir
|
313
|
-
connected_pipe_rbln_config = submodule_rbln_config
|
314
|
-
connected_pipe_cls: RBLNDiffusionMixin = getattr(
|
315
|
-
importlib.import_module("optimum.rbln"), connected_pipe.__class__.__name__
|
316
|
-
)
|
317
|
-
submodule_dict = {}
|
318
|
-
for name in connected_pipe.config.keys():
|
319
|
-
if hasattr(connected_pipe, name):
|
320
|
-
submodule_dict[name] = getattr(connected_pipe, name)
|
321
|
-
connected_pipe = connected_pipe_cls(**submodule_dict)
|
322
|
-
connected_pipe_submodules = {}
|
323
|
-
prefix = cls._prefix.get(submodule_name, "")
|
324
|
-
for name in connected_pipe_cls._submodules:
|
325
|
-
if prefix + name in passed_submodules:
|
326
|
-
connected_pipe_submodules[name] = passed_submodules.get(prefix + name)
|
327
|
-
|
328
|
-
connected_pipe_compiled_submodules = connected_pipe_cls._compile_submodules(
|
329
|
-
model=connected_pipe,
|
330
|
-
passed_submodules=connected_pipe_submodules,
|
331
|
-
model_save_dir=model_save_dir,
|
332
|
-
rbln_config=connected_pipe_rbln_config,
|
333
|
-
prefix=prefix,
|
334
|
-
)
|
335
|
-
connected_pipe = connected_pipe_cls._construct_pipe(
|
336
|
-
connected_pipe,
|
337
|
-
connected_pipe_compiled_submodules,
|
338
|
-
connected_pipe_model_save_dir,
|
339
|
-
connected_pipe_rbln_config,
|
340
|
-
)
|
341
|
-
|
342
|
-
for name in connected_pipe_cls._submodules:
|
343
|
-
compiled_submodules[prefix + name] = getattr(connected_pipe, name)
|
344
|
-
submodule = connected_pipe
|
345
342
|
else:
|
346
343
|
raise ValueError(f"Unknown class of submodule({submodule_name}) : {submodule.__class__.__name__} ")
|
347
344
|
|
@@ -374,23 +371,16 @@ class RBLNDiffusionMixin:
|
|
374
371
|
@classmethod
|
375
372
|
def _construct_pipe(cls, model, submodules, model_save_dir, rbln_config):
|
376
373
|
# Construct finalize pipe setup with compiled submodules and configurations
|
377
|
-
submodule_names = []
|
378
|
-
for submodule_name in cls._submodules:
|
379
|
-
submodule = getattr(model, submodule_name)
|
380
|
-
if hasattr(pipelines, submodule.__class__.__name__):
|
381
|
-
prefix = cls._prefix.get(submodule_name, "")
|
382
|
-
connected_pipe_submodules = submodules[submodule_name].__class__._submodules
|
383
|
-
connected_pipe_submodules = [prefix + name for name in connected_pipe_submodules]
|
384
|
-
submodule_names += connected_pipe_submodules
|
385
|
-
setattr(model, submodule_name, submodules[submodule_name])
|
386
|
-
else:
|
387
|
-
submodule_names.append(submodule_name)
|
388
|
-
|
389
374
|
if model_save_dir is not None:
|
390
375
|
# To skip saving original pytorch modules
|
391
|
-
for submodule_name in
|
376
|
+
for submodule_name in cls._submodules:
|
392
377
|
delattr(model, submodule_name)
|
393
378
|
|
379
|
+
if cls._load_connected_pipes:
|
380
|
+
for connected_pipe_name, connected_pipe_cls in cls._connected_classes.items():
|
381
|
+
for submodule_name in connected_pipe_cls._submodules:
|
382
|
+
delattr(getattr(model, connected_pipe_name), submodule_name)
|
383
|
+
|
394
384
|
# Direct calling of `save_pretrained` causes config.unet = (None, None).
|
395
385
|
# So config must be saved again, later.
|
396
386
|
model.save_pretrained(model_save_dir)
|
@@ -398,10 +388,15 @@ class RBLNDiffusionMixin:
|
|
398
388
|
# Causing warning messeages.
|
399
389
|
|
400
390
|
update_dict = {}
|
401
|
-
for submodule_name in
|
391
|
+
for submodule_name in cls._submodules:
|
402
392
|
# replace submodule
|
403
393
|
setattr(model, submodule_name, submodules[submodule_name])
|
404
394
|
update_dict[submodule_name] = ("optimum.rbln", submodules[submodule_name].__class__.__name__)
|
395
|
+
if cls._load_connected_pipes:
|
396
|
+
for connected_pipe_name, connected_pipe_cls in cls._connected_classes.items():
|
397
|
+
prefix = cls._prefix.get(connected_pipe_name, "")
|
398
|
+
for submodule_name in connected_pipe_cls._submodules:
|
399
|
+
setattr(getattr(model, connected_pipe_name), submodule_name, submodules[prefix + submodule_name])
|
405
400
|
|
406
401
|
# Update config to be able to load from model directory.
|
407
402
|
#
|
@@ -420,16 +415,9 @@ class RBLNDiffusionMixin:
|
|
420
415
|
if rbln_config.get("optimize_host_memory") is False:
|
421
416
|
# Keep compiled_model objs to further analysis. -> TODO: remove soon...
|
422
417
|
model.compiled_models = []
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
for submodule_name in connected_pipe.__class__._submodules:
|
427
|
-
submodule = getattr(connected_pipe, submodule_name)
|
428
|
-
model.compiled_models.extend(submodule.compiled_models)
|
429
|
-
else:
|
430
|
-
for name in cls._submodules:
|
431
|
-
submodule = getattr(model, name)
|
432
|
-
model.compiled_models.extend(submodule.compiled_models)
|
418
|
+
for name in cls._submodules:
|
419
|
+
submodule = getattr(model, name)
|
420
|
+
model.compiled_models.extend(submodule.compiled_models)
|
433
421
|
|
434
422
|
return model
|
435
423
|
|
@@ -90,9 +90,17 @@ class RBLNVQModel(RBLNModel):
|
|
90
90
|
model_config: "PretrainedConfig",
|
91
91
|
rbln_kwargs: Dict[str, Any] = {},
|
92
92
|
) -> RBLNConfig:
|
93
|
-
batch_size = rbln_kwargs.get("batch_size")
|
94
|
-
|
95
|
-
|
93
|
+
batch_size = rbln_kwargs.get("batch_size")
|
94
|
+
if batch_size is None:
|
95
|
+
batch_size = 1
|
96
|
+
|
97
|
+
height = rbln_kwargs.get("img_height")
|
98
|
+
if height is None:
|
99
|
+
height = 512
|
100
|
+
|
101
|
+
width = rbln_kwargs.get("img_width")
|
102
|
+
if width is None:
|
103
|
+
width = 512
|
96
104
|
|
97
105
|
if hasattr(model_config, "block_out_channels"):
|
98
106
|
scale_factor = 2 ** (len(model_config.block_out_channels) - 1)
|
@@ -176,15 +176,22 @@ class RBLNUNet2DConditionModel(RBLNModel):
|
|
176
176
|
raise ValueError("Both image height and image width must be given or not given")
|
177
177
|
elif image_size[0] is None and image_size[1] is None:
|
178
178
|
if rbln_config["img2img_pipeline"]:
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
179
|
+
if hasattr(pipe, "vae"):
|
180
|
+
# In case of img2img, sample size of unet is determined by vae encoder.
|
181
|
+
vae_sample_size = pipe.vae.config.sample_size
|
182
|
+
if isinstance(vae_sample_size, int):
|
183
|
+
sample_size = vae_sample_size // scale_factor
|
184
|
+
else:
|
185
|
+
sample_size = (
|
186
|
+
vae_sample_size[0] // scale_factor,
|
187
|
+
vae_sample_size[1] // scale_factor,
|
188
|
+
)
|
189
|
+
elif hasattr(pipe, "movq"):
|
190
|
+
logger.warning(
|
191
|
+
"RBLN config 'img_height' and 'img_width' should have been provided for this pipeline. "
|
192
|
+
"Both variable will be set 512 by default."
|
187
193
|
)
|
194
|
+
sample_size = (512 // scale_factor, 512 // scale_factor)
|
188
195
|
else:
|
189
196
|
sample_size = pipe.unet.config.sample_size
|
190
197
|
else:
|
@@ -26,9 +26,13 @@ _import_structure = {
|
|
26
26
|
"RBLNStableDiffusionXLControlNetPipeline",
|
27
27
|
],
|
28
28
|
"kandinsky2_2": [
|
29
|
+
"RBLNKandinskyV22CombinedPipeline",
|
30
|
+
"RBLNKandinskyV22Img2ImgCombinedPipeline",
|
29
31
|
"RBLNKandinskyV22InpaintCombinedPipeline",
|
30
32
|
"RBLNKandinskyV22InpaintPipeline",
|
33
|
+
"RBLNKandinskyV22Img2ImgPipeline",
|
31
34
|
"RBLNKandinskyV22PriorPipeline",
|
35
|
+
"RBLNKandinskyV22Pipeline",
|
32
36
|
],
|
33
37
|
"stable_diffusion": [
|
34
38
|
"RBLNStableDiffusionImg2ImgPipeline",
|
@@ -55,8 +59,12 @@ if TYPE_CHECKING:
|
|
55
59
|
RBLNStableDiffusionXLControlNetPipeline,
|
56
60
|
)
|
57
61
|
from .kandinsky2_2 import (
|
62
|
+
RBLNKandinskyV22CombinedPipeline,
|
63
|
+
RBLNKandinskyV22Img2ImgCombinedPipeline,
|
64
|
+
RBLNKandinskyV22Img2ImgPipeline,
|
58
65
|
RBLNKandinskyV22InpaintCombinedPipeline,
|
59
66
|
RBLNKandinskyV22InpaintPipeline,
|
67
|
+
RBLNKandinskyV22Pipeline,
|
60
68
|
RBLNKandinskyV22PriorPipeline,
|
61
69
|
)
|
62
70
|
from .stable_diffusion import (
|
@@ -12,6 +12,12 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from .
|
15
|
+
from .pipeline_kandinsky2_2 import RBLNKandinskyV22Pipeline
|
16
|
+
from .pipeline_kandinsky2_2_combined import (
|
17
|
+
RBLNKandinskyV22CombinedPipeline,
|
18
|
+
RBLNKandinskyV22Img2ImgCombinedPipeline,
|
19
|
+
RBLNKandinskyV22InpaintCombinedPipeline,
|
20
|
+
)
|
21
|
+
from .pipeline_kandinsky2_2_img2img import RBLNKandinskyV22Img2ImgPipeline
|
16
22
|
from .pipeline_kandinsky2_2_inpaint import RBLNKandinskyV22InpaintPipeline
|
17
23
|
from .pipeline_kandinsky2_2_prior import RBLNKandinskyV22PriorPipeline
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Copyright 2024 Rebellions Inc.
|
2
|
+
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from diffusers import KandinskyV22Pipeline
|
16
|
+
|
17
|
+
from ...modeling_diffusers import RBLNDiffusionMixin
|
18
|
+
|
19
|
+
|
20
|
+
class RBLNKandinskyV22Pipeline(RBLNDiffusionMixin, KandinskyV22Pipeline):
|
21
|
+
original_class = KandinskyV22Pipeline
|
22
|
+
_submodules = ["unet", "movq"]
|
23
|
+
|
24
|
+
def get_compiled_image_size(self):
|
25
|
+
return self.movq.image_size
|
@@ -14,6 +14,8 @@
|
|
14
14
|
|
15
15
|
from diffusers import (
|
16
16
|
DDPMScheduler,
|
17
|
+
KandinskyV22CombinedPipeline,
|
18
|
+
KandinskyV22Img2ImgCombinedPipeline,
|
17
19
|
KandinskyV22InpaintCombinedPipeline,
|
18
20
|
PriorTransformer,
|
19
21
|
UnCLIPScheduler,
|
@@ -28,14 +30,118 @@ from transformers import (
|
|
28
30
|
)
|
29
31
|
|
30
32
|
from ...modeling_diffusers import RBLNDiffusionMixin
|
33
|
+
from .pipeline_kandinsky2_2 import RBLNKandinskyV22Pipeline
|
34
|
+
from .pipeline_kandinsky2_2_img2img import RBLNKandinskyV22Img2ImgPipeline
|
31
35
|
from .pipeline_kandinsky2_2_inpaint import RBLNKandinskyV22InpaintPipeline
|
32
36
|
from .pipeline_kandinsky2_2_prior import RBLNKandinskyV22PriorPipeline
|
33
37
|
|
34
38
|
|
39
|
+
class RBLNKandinskyV22CombinedPipeline(RBLNDiffusionMixin, KandinskyV22CombinedPipeline):
|
40
|
+
original_class = KandinskyV22CombinedPipeline
|
41
|
+
_connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22Pipeline}
|
42
|
+
_submodules = ["prior_image_encoder", "prior_text_encoder", "prior_prior", "unet", "movq"]
|
43
|
+
_prefix = {"prior_pipe": "prior_"}
|
44
|
+
|
45
|
+
def __init__(
|
46
|
+
self,
|
47
|
+
unet: "UNet2DConditionModel",
|
48
|
+
scheduler: "DDPMScheduler",
|
49
|
+
movq: "VQModel",
|
50
|
+
prior_prior: "PriorTransformer",
|
51
|
+
prior_image_encoder: "CLIPVisionModelWithProjection",
|
52
|
+
prior_text_encoder: "CLIPTextModelWithProjection",
|
53
|
+
prior_tokenizer: "CLIPTokenizer",
|
54
|
+
prior_scheduler: "UnCLIPScheduler",
|
55
|
+
prior_image_processor: "CLIPImageProcessor",
|
56
|
+
):
|
57
|
+
RBLNDiffusionMixin.__init__(self)
|
58
|
+
super(KandinskyV22CombinedPipeline, self).__init__()
|
59
|
+
|
60
|
+
self.register_modules(
|
61
|
+
unet=unet,
|
62
|
+
scheduler=scheduler,
|
63
|
+
movq=movq,
|
64
|
+
prior_prior=prior_prior,
|
65
|
+
prior_image_encoder=prior_image_encoder,
|
66
|
+
prior_text_encoder=prior_text_encoder,
|
67
|
+
prior_tokenizer=prior_tokenizer,
|
68
|
+
prior_scheduler=prior_scheduler,
|
69
|
+
prior_image_processor=prior_image_processor,
|
70
|
+
)
|
71
|
+
|
72
|
+
self.prior_pipe = RBLNKandinskyV22PriorPipeline(
|
73
|
+
prior=prior_prior,
|
74
|
+
image_encoder=prior_image_encoder,
|
75
|
+
text_encoder=prior_text_encoder,
|
76
|
+
tokenizer=prior_tokenizer,
|
77
|
+
scheduler=prior_scheduler,
|
78
|
+
image_processor=prior_image_processor,
|
79
|
+
)
|
80
|
+
self.decoder_pipe = RBLNKandinskyV22Pipeline(
|
81
|
+
unet=unet,
|
82
|
+
scheduler=scheduler,
|
83
|
+
movq=movq,
|
84
|
+
)
|
85
|
+
|
86
|
+
def get_compiled_image_size(self):
|
87
|
+
return self.movq.image_size
|
88
|
+
|
89
|
+
|
90
|
+
class RBLNKandinskyV22Img2ImgCombinedPipeline(RBLNDiffusionMixin, KandinskyV22Img2ImgCombinedPipeline):
|
91
|
+
original_class = KandinskyV22Img2ImgCombinedPipeline
|
92
|
+
_connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22Img2ImgPipeline}
|
93
|
+
_submodules = ["prior_image_encoder", "prior_text_encoder", "prior_prior", "unet", "movq"]
|
94
|
+
_prefix = {"prior_pipe": "prior_"}
|
95
|
+
|
96
|
+
def __init__(
|
97
|
+
self,
|
98
|
+
unet: "UNet2DConditionModel",
|
99
|
+
scheduler: "DDPMScheduler",
|
100
|
+
movq: "VQModel",
|
101
|
+
prior_prior: "PriorTransformer",
|
102
|
+
prior_image_encoder: "CLIPVisionModelWithProjection",
|
103
|
+
prior_text_encoder: "CLIPTextModelWithProjection",
|
104
|
+
prior_tokenizer: "CLIPTokenizer",
|
105
|
+
prior_scheduler: "UnCLIPScheduler",
|
106
|
+
prior_image_processor: "CLIPImageProcessor",
|
107
|
+
):
|
108
|
+
RBLNDiffusionMixin.__init__(self)
|
109
|
+
super(KandinskyV22Img2ImgCombinedPipeline, self).__init__()
|
110
|
+
|
111
|
+
self.register_modules(
|
112
|
+
unet=unet,
|
113
|
+
scheduler=scheduler,
|
114
|
+
movq=movq,
|
115
|
+
prior_prior=prior_prior,
|
116
|
+
prior_image_encoder=prior_image_encoder,
|
117
|
+
prior_text_encoder=prior_text_encoder,
|
118
|
+
prior_tokenizer=prior_tokenizer,
|
119
|
+
prior_scheduler=prior_scheduler,
|
120
|
+
prior_image_processor=prior_image_processor,
|
121
|
+
)
|
122
|
+
|
123
|
+
self.prior_pipe = RBLNKandinskyV22PriorPipeline(
|
124
|
+
prior=prior_prior,
|
125
|
+
image_encoder=prior_image_encoder,
|
126
|
+
text_encoder=prior_text_encoder,
|
127
|
+
tokenizer=prior_tokenizer,
|
128
|
+
scheduler=prior_scheduler,
|
129
|
+
image_processor=prior_image_processor,
|
130
|
+
)
|
131
|
+
self.decoder_pipe = RBLNKandinskyV22Img2ImgPipeline(
|
132
|
+
unet=unet,
|
133
|
+
scheduler=scheduler,
|
134
|
+
movq=movq,
|
135
|
+
)
|
136
|
+
|
137
|
+
def get_compiled_image_size(self):
|
138
|
+
return self.movq.image_size
|
139
|
+
|
140
|
+
|
35
141
|
class RBLNKandinskyV22InpaintCombinedPipeline(RBLNDiffusionMixin, KandinskyV22InpaintCombinedPipeline):
|
36
142
|
original_class = KandinskyV22InpaintCombinedPipeline
|
37
143
|
_connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22InpaintPipeline}
|
38
|
-
_submodules = ["
|
144
|
+
_submodules = ["prior_image_encoder", "prior_text_encoder", "prior_prior", "unet", "movq"]
|
39
145
|
_prefix = {"prior_pipe": "prior_"}
|
40
146
|
|
41
147
|
def __init__(
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Copyright 2024 Rebellions Inc.
|
2
|
+
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from diffusers import KandinskyV22Img2ImgPipeline
|
16
|
+
|
17
|
+
from ...modeling_diffusers import RBLNDiffusionMixin
|
18
|
+
|
19
|
+
|
20
|
+
class RBLNKandinskyV22Img2ImgPipeline(RBLNDiffusionMixin, KandinskyV22Img2ImgPipeline):
|
21
|
+
original_class = KandinskyV22Img2ImgPipeline
|
22
|
+
_submodules = ["unet", "movq"]
|
23
|
+
|
24
|
+
def get_compiled_image_size(self):
|
25
|
+
return self.movq.image_size
|
@@ -20,3 +20,6 @@ from ...modeling_diffusers import RBLNDiffusionMixin
|
|
20
20
|
class RBLNKandinskyV22InpaintPipeline(RBLNDiffusionMixin, KandinskyV22InpaintPipeline):
|
21
21
|
original_class = KandinskyV22InpaintPipeline
|
22
22
|
_submodules = ["unet", "movq"]
|
23
|
+
|
24
|
+
def get_compiled_image_size(self):
|
25
|
+
return self.movq.image_size
|
optimum/rbln/modeling_base.py
CHANGED
@@ -282,15 +282,6 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
282
282
|
**kwargs,
|
283
283
|
)
|
284
284
|
|
285
|
-
@classmethod
|
286
|
-
def _check_compiled_models(
|
287
|
-
cls, compiled_models: Dict[str, rebel.RBLNCompiledModel], rbln_config: RBLNConfig, config: "PretrainedConfig"
|
288
|
-
):
|
289
|
-
# check compiled model can create runtimes.
|
290
|
-
# this logic currently only works in LLM
|
291
|
-
# fail when LLM model using Paged Attention can't guarantee max sequence length
|
292
|
-
pass
|
293
|
-
|
294
285
|
@classmethod
|
295
286
|
def _from_compiled_models(
|
296
287
|
cls,
|
@@ -305,8 +296,6 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
305
296
|
if isinstance(model_save_dir, str):
|
306
297
|
model_save_dir = Path(model_save_dir)
|
307
298
|
|
308
|
-
cls._check_compiled_models(compiled_models=rbln_compiled_models, rbln_config=rbln_config, config=config)
|
309
|
-
|
310
299
|
# FIXME:: Should we convert it?
|
311
300
|
compiled_model_names = [cfg.compiled_model_name for cfg in rbln_config.compile_cfgs]
|
312
301
|
rbln_compiled_models = [rbln_compiled_models[cm_name] for cm_name in compiled_model_names]
|
@@ -108,6 +108,8 @@ class RBLNBartModel(RBLNModel):
|
|
108
108
|
|
109
109
|
|
110
110
|
class RBLNBartForConditionalGeneration(RBLNModelForSeq2SeqLM):
|
111
|
+
support_paged_causal_attn = True
|
112
|
+
|
111
113
|
@classmethod
|
112
114
|
def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: "RBLNConfig"):
|
113
115
|
enc_max_seq_len = (
|
@@ -98,6 +98,18 @@ def validate_attention_method(
|
|
98
98
|
"this requirement, or consider switching `rbln_attn_impl` to 'eager' for shorter lengths."
|
99
99
|
)
|
100
100
|
|
101
|
+
if rbln_kvcache_block_size is not None:
|
102
|
+
if rbln_attn_impl == "flash_attn" and rbln_kvcache_partition_len != rbln_kvcache_block_size:
|
103
|
+
raise ValueError(
|
104
|
+
f" When using 'flash attention', the `rbln_kvcache_block_size` ({rbln_kvcache_block_size}) "
|
105
|
+
f"must always be set equal to the `rbln_kvcache_partition_len` {rbln_kvcache_partition_len}."
|
106
|
+
)
|
107
|
+
elif rbln_attn_impl == "eager" and rbln_kvcache_block_size != rbln_max_seq_len:
|
108
|
+
raise ValueError(
|
109
|
+
f" When using 'eager attention', the `rbln_kvcache_block_size` ({rbln_kvcache_block_size}) "
|
110
|
+
f"must always be set equal to the `rbln_max_seq_len` {rbln_max_seq_len}."
|
111
|
+
)
|
112
|
+
|
101
113
|
return rbln_attn_impl, rbln_kvcache_partition_len, rbln_kvcache_block_size
|
102
114
|
|
103
115
|
|
@@ -98,9 +98,9 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
|
|
98
98
|
"""
|
99
99
|
|
100
100
|
NO_BLOCKS_ERROR = (
|
101
|
-
"No memory blocks are available for allocation."
|
102
|
-
"The generate() API cannot complete this inference task because Paged Attention is not fully supported by optimum-rbln."
|
103
|
-
"This is supported by vllm-rbln (see: https://docs.rbln.ai/software/model_serving/vllm_support/vllm-rbln.html)."
|
101
|
+
"No memory blocks are available for allocation. "
|
102
|
+
"The generate() API cannot complete this inference task because Paged Attention is not fully supported by optimum-rbln. "
|
103
|
+
"This is supported by vllm-rbln (see: https://docs.rbln.ai/software/model_serving/vllm_support/vllm-rbln.html). "
|
104
104
|
"Using vllm-rbln should fix this issue and enhance inference performance."
|
105
105
|
)
|
106
106
|
|
@@ -575,59 +575,58 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
575
575
|
nbits_per_param: int,
|
576
576
|
n_model_params: int,
|
577
577
|
) -> int:
|
578
|
-
num_attention_heads = getattr(config, "n_head", None) or getattr(config, "num_attention_heads")
|
579
|
-
num_layers = getattr(config, "n_layer", None) or getattr(config, "num_hidden_layers")
|
580
|
-
head_dim = getattr(config, "head_dim", None) or config.hidden_size // num_attention_heads
|
581
|
-
vocab_size = config.vocab_size
|
582
|
-
hidden_size = getattr(config, "n_embd", None) or getattr(config, "hidden_size")
|
583
|
-
num_key_value_heads = getattr(config, "num_key_value_heads", None) or num_attention_heads
|
584
|
-
|
585
|
-
TARGET_DRAM_LIMIT = int(tensor_parallel_size * 15.7 * 2**30) # 16GB # TODO(jongho): 더 정확한 값
|
586
|
-
|
587
578
|
def align(x: int, nbytes: int) -> int:
|
588
579
|
return int(math.ceil(x / nbytes) * nbytes)
|
589
580
|
|
590
581
|
def align_2MB(x: int) -> int:
|
591
582
|
return align(x, 2 * 1024 * 1024)
|
592
583
|
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
584
|
+
num_attention_heads = getattr(config, "n_head", None) or getattr(config, "num_attention_heads")
|
585
|
+
num_layers = getattr(config, "n_layer", None) or getattr(config, "num_hidden_layers")
|
586
|
+
head_dim = getattr(config, "head_dim", None) or config.hidden_size // num_attention_heads
|
587
|
+
vocab_size = config.vocab_size
|
588
|
+
hidden_size = getattr(config, "n_embd", None) or getattr(config, "hidden_size")
|
589
|
+
num_key_value_heads = getattr(config, "num_key_value_heads", None) or num_attention_heads
|
599
590
|
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
* tensor_parallel_size
|
605
|
-
)
|
591
|
+
# TODO(jongho): Update if target npu is REBEL.
|
592
|
+
ATOM_DRAM_NBYTES = 16 * 2**30
|
593
|
+
ATOM_SYS_DRAM_NBYTES = 288 * 2**20
|
594
|
+
available_dram = tensor_parallel_size * (ATOM_DRAM_NBYTES - ATOM_SYS_DRAM_NBYTES)
|
606
595
|
|
607
|
-
|
596
|
+
# Get estimated kernel size (approximated)
|
597
|
+
lm_heads_params = align(vocab_size, 64) * hidden_size
|
598
|
+
lm_heads_nbytes = (
|
599
|
+
align_2MB(lm_heads_params * nbits_per_param // 8 / tensor_parallel_size) * tensor_parallel_size
|
600
|
+
)
|
601
|
+
params = n_model_params - lm_heads_params
|
602
|
+
layer_nbytes = (
|
603
|
+
align_2MB(params * nbits_per_param // 8 / num_layers / tensor_parallel_size)
|
604
|
+
* num_layers
|
605
|
+
* tensor_parallel_size
|
606
|
+
)
|
607
|
+
kernel_size = layer_nbytes + lm_heads_nbytes
|
608
608
|
|
609
|
-
available_dram
|
609
|
+
available_dram -= kernel_size
|
610
610
|
|
611
|
-
|
612
|
-
|
611
|
+
# TODO: Accurate buffer estimation
|
612
|
+
buffer = 2**30 # 1GB Buffer
|
613
|
+
if tensor_parallel_size <= 4:
|
613
614
|
buffer /= 4
|
614
615
|
|
615
616
|
available_dram -= buffer
|
616
617
|
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
)
|
625
|
-
* num_layers
|
626
|
-
* 2 # (k, v)
|
627
|
-
* tensor_parallel_size
|
618
|
+
# Estimate nbytes per a single kvcache block
|
619
|
+
nbytes_per_block = (
|
620
|
+
align_2MB(
|
621
|
+
kvcache_block_size
|
622
|
+
* head_dim
|
623
|
+
* math.ceil(num_key_value_heads / tensor_parallel_size) # Shard
|
624
|
+
* 2 # (fp16)
|
628
625
|
)
|
629
|
-
|
630
|
-
|
626
|
+
* num_layers
|
627
|
+
* 2 # (k, v)
|
628
|
+
* tensor_parallel_size
|
629
|
+
)
|
631
630
|
n_blocks = available_dram // nbytes_per_block
|
632
631
|
|
633
632
|
return n_blocks, nbytes_per_block
|
@@ -50,6 +50,7 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
|
|
50
50
|
runtime: rebel.Runtime,
|
51
51
|
batch_size: int,
|
52
52
|
dec_max_seq_len: int,
|
53
|
+
support_paged_causal_attn: Optional[bool] = None,
|
53
54
|
use_attention_mask: Optional[bool] = None,
|
54
55
|
**kwargs: Any,
|
55
56
|
) -> None:
|
@@ -57,7 +58,10 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
|
|
57
58
|
self.batch_size = batch_size
|
58
59
|
self.dec_max_seq_len = dec_max_seq_len
|
59
60
|
self.use_attention_mask = use_attention_mask
|
60
|
-
|
61
|
+
if support_paged_causal_attn:
|
62
|
+
self.default_block_tables = torch.arange(0, self.batch_size, dtype=torch.int16).view(self.batch_size, 1)
|
63
|
+
else:
|
64
|
+
self.default_block_tables = None
|
61
65
|
|
62
66
|
def forward(
|
63
67
|
self,
|
@@ -94,7 +98,7 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
|
|
94
98
|
decoder_attention_mask if self.use_attention_mask else None,
|
95
99
|
attention_mask,
|
96
100
|
cache_position,
|
97
|
-
block_tables,
|
101
|
+
block_tables=block_tables,
|
98
102
|
)
|
99
103
|
|
100
104
|
return Seq2SeqLMOutput(logits=lm_logits)
|
@@ -115,6 +119,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
|
|
115
119
|
|
116
120
|
main_input_name = "input_ids"
|
117
121
|
auto_model_class = AutoModelForSeq2SeqLM
|
122
|
+
support_paged_causal_attn = None
|
118
123
|
|
119
124
|
def __post_init__(self, **kwargs):
|
120
125
|
batch_size = self.rbln_config.model_cfg["batch_size"]
|
@@ -130,6 +135,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
|
|
130
135
|
main_input_name="input_ids",
|
131
136
|
batch_size=batch_size,
|
132
137
|
dec_max_seq_len=dec_max_seq_len,
|
138
|
+
support_paged_causal_attn=self.support_paged_causal_attn,
|
133
139
|
use_attention_mask=self.use_attention_mask,
|
134
140
|
)
|
135
141
|
|
@@ -186,13 +192,16 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
|
|
186
192
|
rbln_dec_max_seq_len = rbln_kwargs.get("dec_max_seq_len", None)
|
187
193
|
rbln_batch_size = rbln_kwargs.get("batch_size", None)
|
188
194
|
rbln_batch_size = 1 if rbln_batch_size is None else rbln_batch_size
|
189
|
-
rbln_use_attention_mask = rbln_kwargs.get("use_attention_mask", None)
|
190
195
|
|
191
|
-
if
|
192
|
-
rbln_use_attention_mask =
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
+
if cls.support_paged_causal_attn:
|
197
|
+
rbln_use_attention_mask = rbln_kwargs.get("use_attention_mask", None)
|
198
|
+
if rbln_use_attention_mask is None:
|
199
|
+
rbln_use_attention_mask = False
|
200
|
+
rbln_npu = rbln_kwargs.get("npu", None) or rebel.get_npu_name()
|
201
|
+
if rbln_npu == "RBLN-CA02":
|
202
|
+
rbln_use_attention_mask = True
|
203
|
+
else:
|
204
|
+
rbln_use_attention_mask = True
|
196
205
|
|
197
206
|
n_layer = getattr(model_config, "decoder_layers", None) or getattr(model_config, "num_layers")
|
198
207
|
n_head = getattr(model_config, "decoder_attention_heads", None) or getattr(model_config, "num_heads")
|
@@ -265,11 +274,6 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
|
|
265
274
|
[rbln_batch_size, 1],
|
266
275
|
"int32",
|
267
276
|
),
|
268
|
-
(
|
269
|
-
"block_tables",
|
270
|
-
[rbln_batch_size, 1],
|
271
|
-
"int16",
|
272
|
-
),
|
273
277
|
]
|
274
278
|
dec_input_info.extend(
|
275
279
|
[
|
@@ -302,6 +306,8 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
|
|
302
306
|
]
|
303
307
|
)
|
304
308
|
|
309
|
+
if cls.support_paged_causal_attn:
|
310
|
+
dec_input_info.insert(3, ("block_tables", [rbln_batch_size, 1], "int16"))
|
305
311
|
if rbln_use_attention_mask:
|
306
312
|
dec_input_info.insert(1, ("attention_mask", [rbln_batch_size, rbln_dec_max_seq_len], "float32"))
|
307
313
|
|
@@ -13,9 +13,8 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
import inspect
|
16
|
-
from typing import TYPE_CHECKING, Any, Callable, Dict,
|
16
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Union
|
17
17
|
|
18
|
-
import rebel
|
19
18
|
import torch
|
20
19
|
from transformers import (
|
21
20
|
AutoModelForTextEncoding,
|
@@ -23,7 +22,7 @@ from transformers import (
|
|
23
22
|
T5EncoderModel,
|
24
23
|
T5ForConditionalGeneration,
|
25
24
|
)
|
26
|
-
from transformers.modeling_outputs import BaseModelOutput
|
25
|
+
from transformers.modeling_outputs import BaseModelOutput
|
27
26
|
|
28
27
|
from ....diffusers.modeling_diffusers import RBLNDiffusionMixin
|
29
28
|
from ....modeling import RBLNModel
|
@@ -58,63 +57,6 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
|
|
58
57
|
)
|
59
58
|
|
60
59
|
|
61
|
-
class RBLNRuntimeEncoder(RBLNPytorchRuntime):
|
62
|
-
mandatory_members = ["main_input_name"]
|
63
|
-
|
64
|
-
def forward(self, *args: List[torch.Tensor], **kwargs: Dict[str, torch.Tensor]):
|
65
|
-
_ = super().forward(*args, **kwargs)
|
66
|
-
return BaseModelOutput(last_hidden_state=torch.tensor([1.0]))
|
67
|
-
|
68
|
-
|
69
|
-
class RBLNRuntimeDecoder(RBLNPytorchRuntime):
|
70
|
-
mandatory_members = ["main_input_name"]
|
71
|
-
|
72
|
-
def __init__(
|
73
|
-
self,
|
74
|
-
runtime: rebel.Runtime,
|
75
|
-
batch_size: int,
|
76
|
-
dec_max_seq_len: int,
|
77
|
-
**kwargs: Any,
|
78
|
-
) -> None:
|
79
|
-
super().__init__(runtime, **kwargs)
|
80
|
-
self.batch_size = batch_size
|
81
|
-
self.dec_max_seq_len = dec_max_seq_len
|
82
|
-
|
83
|
-
def forward(
|
84
|
-
self,
|
85
|
-
decoder_input_ids: Optional[torch.LongTensor] = None,
|
86
|
-
attention_mask: Optional[torch.FloatTensor] = None,
|
87
|
-
decoder_attention_mask: Optional[torch.BoolTensor] = None,
|
88
|
-
cache_position: Optional[torch.Tensor] = None,
|
89
|
-
**kwargs,
|
90
|
-
) -> Tuple[torch.FloatTensor]:
|
91
|
-
batch_size = decoder_input_ids.shape[0]
|
92
|
-
if batch_size != self.batch_size:
|
93
|
-
raise RuntimeError(
|
94
|
-
f"Batch size mismatch: got {batch_size}, expected {self.batch_size} (compiled batch size)."
|
95
|
-
)
|
96
|
-
|
97
|
-
if batch_size != cache_position.shape[0]:
|
98
|
-
raise RuntimeError(f"Cache position size mismatch: got {cache_position.shape[0]}, expected {batch_size}.")
|
99
|
-
|
100
|
-
for b_idx in range(self.batch_size):
|
101
|
-
decoding_step = cache_position[b_idx].item()
|
102
|
-
if not (0 <= decoding_step < self.dec_max_seq_len):
|
103
|
-
raise ValueError(
|
104
|
-
f"Decoding step {decoding_step} out of bounds for attention mask with shape {self.dec_attn_mask.shape}."
|
105
|
-
)
|
106
|
-
decoder_attention_mask[b_idx, : decoding_step + 1] = 1
|
107
|
-
|
108
|
-
lm_logits = super().forward(
|
109
|
-
decoder_input_ids,
|
110
|
-
decoder_attention_mask,
|
111
|
-
attention_mask,
|
112
|
-
cache_position,
|
113
|
-
)
|
114
|
-
|
115
|
-
return Seq2SeqLMOutput(logits=lm_logits)
|
116
|
-
|
117
|
-
|
118
60
|
class T5EncoderWrapper(torch.nn.Module):
|
119
61
|
def __init__(self, model: "T5EncoderModel") -> None:
|
120
62
|
super().__init__()
|
@@ -247,20 +189,7 @@ class RBLNT5EncoderModel(RBLNModel):
|
|
247
189
|
|
248
190
|
|
249
191
|
class RBLNT5ForConditionalGeneration(RBLNModelForSeq2SeqLM):
|
250
|
-
|
251
|
-
batch_size = self.rbln_config.model_cfg["batch_size"]
|
252
|
-
dec_max_seq_len = self.rbln_config.model_cfg["dec_max_seq_len"]
|
253
|
-
|
254
|
-
self.encoder = RBLNRuntimeEncoder(
|
255
|
-
runtime=self.model[0],
|
256
|
-
main_input_name="input_ids",
|
257
|
-
)
|
258
|
-
self.decoder = RBLNRuntimeDecoder(
|
259
|
-
runtime=self.model[1],
|
260
|
-
main_input_name="input_ids",
|
261
|
-
batch_size=batch_size,
|
262
|
-
dec_max_seq_len=dec_max_seq_len,
|
263
|
-
)
|
192
|
+
support_causal_paged_attn = False
|
264
193
|
|
265
194
|
@classmethod
|
266
195
|
def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: "RBLNConfig"):
|
@@ -279,139 +208,3 @@ class RBLNT5ForConditionalGeneration(RBLNModelForSeq2SeqLM):
|
|
279
208
|
return redirect(val)
|
280
209
|
|
281
210
|
return val
|
282
|
-
|
283
|
-
@classmethod
|
284
|
-
def _get_rbln_config(
|
285
|
-
cls,
|
286
|
-
preprocessors: Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"],
|
287
|
-
model_config: "PretrainedConfig",
|
288
|
-
rbln_kwargs: Dict[str, Any] = {},
|
289
|
-
) -> RBLNConfig:
|
290
|
-
rbln_enc_max_seq_len = rbln_kwargs.get("enc_max_seq_len", None)
|
291
|
-
rbln_dec_max_seq_len = rbln_kwargs.get("dec_max_seq_len", None)
|
292
|
-
rbln_batch_size = rbln_kwargs.get("batch_size", None)
|
293
|
-
rbln_batch_size = 1 if rbln_batch_size is None else rbln_batch_size
|
294
|
-
|
295
|
-
n_layer = getattr(model_config, "decoder_layers", None) or getattr(model_config, "num_layers")
|
296
|
-
n_head = getattr(model_config, "decoder_attention_heads", None) or getattr(model_config, "num_heads")
|
297
|
-
d_kv = (
|
298
|
-
model_config.d_kv
|
299
|
-
if hasattr(model_config, "d_kv")
|
300
|
-
else model_config.d_model // model_config.encoder_attention_heads
|
301
|
-
)
|
302
|
-
|
303
|
-
max_position_embeddings = getattr(model_config, "n_positions", None) or getattr(
|
304
|
-
model_config, "max_position_embeddings", None
|
305
|
-
)
|
306
|
-
|
307
|
-
rbln_pad_token_id = getattr(model_config, "pad_token_id", None)
|
308
|
-
if rbln_pad_token_id is None:
|
309
|
-
rbln_pad_token_id = getattr(model_config, "bos_token_id", None)
|
310
|
-
if rbln_pad_token_id is None:
|
311
|
-
rbln_pad_token_id = getattr(model_config, "eos_token_id", None)
|
312
|
-
if rbln_pad_token_id is None:
|
313
|
-
rbln_pad_token_id = -1
|
314
|
-
|
315
|
-
if rbln_enc_max_seq_len is None:
|
316
|
-
rbln_enc_max_seq_len = max_position_embeddings
|
317
|
-
if rbln_enc_max_seq_len is None:
|
318
|
-
for tokenizer in preprocessors:
|
319
|
-
if hasattr(tokenizer, "model_max_length"):
|
320
|
-
rbln_enc_max_seq_len = tokenizer.model_max_length
|
321
|
-
break
|
322
|
-
if rbln_enc_max_seq_len is None:
|
323
|
-
raise ValueError("`rbln_enc_max_seq_len` should be specified!")
|
324
|
-
if max_position_embeddings is not None and rbln_enc_max_seq_len > max_position_embeddings:
|
325
|
-
raise ValueError("`rbln_enc_max_seq_len` should be less or equal than max_position_embeddings!")
|
326
|
-
|
327
|
-
if rbln_dec_max_seq_len is None:
|
328
|
-
rbln_dec_max_seq_len = max_position_embeddings
|
329
|
-
if rbln_dec_max_seq_len is None:
|
330
|
-
for tokenizer in preprocessors:
|
331
|
-
if hasattr(tokenizer, "model_max_length"):
|
332
|
-
rbln_dec_max_seq_len = tokenizer.model_max_length
|
333
|
-
break
|
334
|
-
if rbln_dec_max_seq_len is None:
|
335
|
-
raise ValueError("`rbln_dec_max_seq_len` should be specified!")
|
336
|
-
|
337
|
-
if max_position_embeddings is not None and rbln_dec_max_seq_len > max_position_embeddings:
|
338
|
-
raise ValueError("`rbln_dec_max_seq_len` should be less or equal than max_position_embeddings!")
|
339
|
-
|
340
|
-
# model input info
|
341
|
-
enc_input_info = [
|
342
|
-
("input_ids", [1, rbln_enc_max_seq_len], "int64"),
|
343
|
-
("attention_mask", [1, rbln_enc_max_seq_len], "float32"),
|
344
|
-
(
|
345
|
-
"cross_key_value_states",
|
346
|
-
[
|
347
|
-
n_layer * 2,
|
348
|
-
rbln_batch_size,
|
349
|
-
n_head,
|
350
|
-
rbln_enc_max_seq_len,
|
351
|
-
d_kv,
|
352
|
-
],
|
353
|
-
"float32",
|
354
|
-
),
|
355
|
-
("block_tables", [1], "int16"),
|
356
|
-
]
|
357
|
-
|
358
|
-
dec_input_info = [
|
359
|
-
("input_ids", [rbln_batch_size, 1], "int64"),
|
360
|
-
("attention_mask", [rbln_batch_size, rbln_dec_max_seq_len], "float32"),
|
361
|
-
("encoder_attention_mask", [rbln_batch_size, rbln_enc_max_seq_len], "float32"),
|
362
|
-
(
|
363
|
-
"cache_position",
|
364
|
-
[rbln_batch_size, 1],
|
365
|
-
"int32",
|
366
|
-
),
|
367
|
-
]
|
368
|
-
dec_input_info.extend(
|
369
|
-
[
|
370
|
-
(
|
371
|
-
"cross_key_value_states",
|
372
|
-
[
|
373
|
-
n_layer * 2,
|
374
|
-
rbln_batch_size,
|
375
|
-
n_head,
|
376
|
-
rbln_enc_max_seq_len,
|
377
|
-
d_kv,
|
378
|
-
],
|
379
|
-
"float32",
|
380
|
-
)
|
381
|
-
]
|
382
|
-
)
|
383
|
-
dec_input_info.extend(
|
384
|
-
[
|
385
|
-
(
|
386
|
-
f"self_key_value_states_{i}",
|
387
|
-
[
|
388
|
-
rbln_batch_size,
|
389
|
-
n_head,
|
390
|
-
rbln_dec_max_seq_len,
|
391
|
-
d_kv,
|
392
|
-
],
|
393
|
-
"float32",
|
394
|
-
)
|
395
|
-
for i in range(n_layer * 2)
|
396
|
-
]
|
397
|
-
)
|
398
|
-
|
399
|
-
enc_compile_config = RBLNCompileConfig(compiled_model_name="encoder", input_info=enc_input_info)
|
400
|
-
dec_compile_config = RBLNCompileConfig(compiled_model_name="decoder", input_info=dec_input_info)
|
401
|
-
|
402
|
-
rbln_config = RBLNConfig(
|
403
|
-
rbln_cls=cls.__name__,
|
404
|
-
compile_cfgs=[enc_compile_config, dec_compile_config],
|
405
|
-
rbln_kwargs=rbln_kwargs,
|
406
|
-
)
|
407
|
-
|
408
|
-
rbln_config.model_cfg.update(
|
409
|
-
{
|
410
|
-
"enc_max_seq_len": rbln_enc_max_seq_len,
|
411
|
-
"dec_max_seq_len": rbln_dec_max_seq_len,
|
412
|
-
"batch_size": rbln_batch_size,
|
413
|
-
"pad_token_id": rbln_pad_token_id,
|
414
|
-
}
|
415
|
-
)
|
416
|
-
|
417
|
-
return rbln_config
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: optimum-rbln
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.3.post1
|
4
4
|
Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
|
5
5
|
Project-URL: Homepage, https://rebellions.ai
|
6
6
|
Project-URL: Documentation, https://docs.rbln.ai
|
@@ -1,31 +1,33 @@
|
|
1
|
-
optimum/rbln/__init__.py,sha256=
|
2
|
-
optimum/rbln/__version__.py,sha256=
|
1
|
+
optimum/rbln/__init__.py,sha256=ZDzXcl-oAcYJhKjJMpotjbTih9awo7HzUb6T3MUEP6Q,6894
|
2
|
+
optimum/rbln/__version__.py,sha256=aegWGVZeZJ9bIegWWNAgPL2y9SAs5kPTsXCQi0EZ9go,517
|
3
3
|
optimum/rbln/modeling.py,sha256=nJsAs5zs--VVOYGFjYNpqfxYIemJIK4Lr0WEzlDLdP0,8390
|
4
|
-
optimum/rbln/modeling_base.py,sha256=
|
4
|
+
optimum/rbln/modeling_base.py,sha256=dNCL-BhrWCpuOVkZaj8-MW567Tf4lLo3p3Z3ldjWJfU,21779
|
5
5
|
optimum/rbln/modeling_config.py,sha256=7104bxmrvKW4Q6XTruQayiIGl8GHDFmPkJ3cknMIInE,11335
|
6
|
-
optimum/rbln/diffusers/__init__.py,sha256=
|
7
|
-
optimum/rbln/diffusers/modeling_diffusers.py,sha256=
|
6
|
+
optimum/rbln/diffusers/__init__.py,sha256=Hq87CbtiCy85YmK2SB-OmUyfv77oe3j4bsTenTRnu6w,3623
|
7
|
+
optimum/rbln/diffusers/modeling_diffusers.py,sha256=IS6Mlgexofap7f9Lefk5cKFP7ejSG_oWN3v2PX9_IDQ,20118
|
8
8
|
optimum/rbln/diffusers/models/__init__.py,sha256=mkCvJyH1KcwrsUvYSq_bVC79oOfyqtBSFDyPS1_48wA,1478
|
9
9
|
optimum/rbln/diffusers/models/controlnet.py,sha256=EM_HlzCdaZdnnK0oGpY2fQeigPqHhlwh4NHCzlmoumI,10512
|
10
10
|
optimum/rbln/diffusers/models/autoencoders/__init__.py,sha256=dg17ZTUsiqTcbIaEE4fqew9uRbao0diQ21PXvRKIqKg,679
|
11
11
|
optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py,sha256=rCbC32bJnfXtsLdVvNVVHpRAkCYy6jeCSwIZ-JSReWk,9220
|
12
12
|
optimum/rbln/diffusers/models/autoencoders/vae.py,sha256=gB9HR7Bf7wpIXLv-Js4Pc3oyWRlqEe4cms4sI2AJicY,4380
|
13
|
-
optimum/rbln/diffusers/models/autoencoders/vq_model.py,sha256=
|
13
|
+
optimum/rbln/diffusers/models/autoencoders/vq_model.py,sha256=b36QqPbayjApKivceQVVyQxHyR1ZOZ1ffuGgdALEPTQ,6117
|
14
14
|
optimum/rbln/diffusers/models/transformers/__init__.py,sha256=V8rSR7WzHs-i8Cwb_MNxhY2NFbwPgxu24vGtkwl-6tk,706
|
15
15
|
optimum/rbln/diffusers/models/transformers/prior_transformer.py,sha256=VG9cQo-_eppDvQSW1q1euAGBt1socUHetN_fIN2u1iU,6169
|
16
16
|
optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=n_krmMgiRxWrG--567PNpk58EG_X7x7H4gidIkRvwjo,7308
|
17
17
|
optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
|
18
|
-
optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=
|
19
|
-
optimum/rbln/diffusers/pipelines/__init__.py,sha256=
|
18
|
+
optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=QIjVWQQf8KBn5rU7lvipdm3gNBxZl7l6HCAj7p5FjLU,15977
|
19
|
+
optimum/rbln/diffusers/pipelines/__init__.py,sha256=5KLZ5LrpMzBya2e_3_PvEoPwG24U8JMexfw_ygZREKc,3140
|
20
20
|
optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
|
21
21
|
optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=JWKtnZYBIfgmbAo0SLFIvHBQCv2BPSFNvpcdjG4GUOY,4113
|
22
22
|
optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py,sha256=dGdw5cwJLS4CLv6IHskk5ZCcPgS7UDuHKbfOZ8ojNUs,35187
|
23
23
|
optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py,sha256=7xCiXrH4ToCTHohVGFXqO7_f9G8HShYaHgZxoMZARkQ,33664
|
24
24
|
optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py,sha256=Gzt2wg4dgFg0TV3Bu0cs8Xru3wVrxWUxxgciwZ-QKLE,44755
|
25
25
|
optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py,sha256=RfwxNX_zQWFtvvFQJ5bt3qtHbdYdQV_3XLHm9WYCKOs,46084
|
26
|
-
optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py,sha256=
|
27
|
-
optimum/rbln/diffusers/pipelines/kandinsky2_2/
|
28
|
-
optimum/rbln/diffusers/pipelines/kandinsky2_2/
|
26
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py,sha256=I4YQq2HfA3xONbWsdJ870IEJPyLWeCDDG-UCJsu9YO8,1035
|
27
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py,sha256=aNFGOjth8tDvPrjYLbRWrkHr6p-8AFgcQx1Qay1fw70,904
|
28
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py,sha256=BVXOpdrezWVTCibpuAMu9KkD5oEQUY00cSqm6dFbTnk,7020
|
29
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py,sha256=fEs-WgJqWs5zvuCkKb7MuZokH9Mi6q-0DOEKxzfWxzo,932
|
30
|
+
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py,sha256=Ad2ZYCXaMiYpB0mz-8X1CGhILxrVbt7rRIXt6IPwYBM,932
|
29
31
|
optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py,sha256=Mf7tzrXetwCgt7LuXfkX-CX1hltLgNZdwF9bHxAbDJM,874
|
30
32
|
optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py,sha256=gz6CbP4T6w8XH3PIGRIJXTmKFsChJIkwcAEAsiR5Ydg,830
|
31
33
|
optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py,sha256=DgRLzO9HxtgE1jICmHoHaqeVXM4Ih-5uo2JqNMAPMcc,876
|
@@ -53,14 +55,14 @@ optimum/rbln/transformers/models/auto/auto_factory.py,sha256=IK9jFrJ3EEzYQa9_aKp
|
|
53
55
|
optimum/rbln/transformers/models/auto/modeling_auto.py,sha256=Un9qoqdy3dO8JBza_bTJF_6_fRVNM9QisihSgTRFI-o,3933
|
54
56
|
optimum/rbln/transformers/models/bart/__init__.py,sha256=32HPe0_GIO0hp9U464Iv6Jd7M-1nop9g8hA1UZMHhyw,674
|
55
57
|
optimum/rbln/transformers/models/bart/bart_architecture.py,sha256=Oo-Cdne7igKEex8wwP-gztKJHgs5GLHQjK1oc3IZIDE,5801
|
56
|
-
optimum/rbln/transformers/models/bart/modeling_bart.py,sha256=
|
58
|
+
optimum/rbln/transformers/models/bart/modeling_bart.py,sha256=6IpWXlBCd02v66KF77oEWfrv8-FnPBYjjjL_8KZL3Ow,5835
|
57
59
|
optimum/rbln/transformers/models/bert/__init__.py,sha256=YVV7k_laU6yJBawZrgjIWjRmIF-Y4oQQHqyf8lsraQs,691
|
58
60
|
optimum/rbln/transformers/models/bert/modeling_bert.py,sha256=p3utRqf3dv9_RkHwaMCa1EfXttNJkqCJUIZo3CeZ9YY,4674
|
59
61
|
optimum/rbln/transformers/models/clip/__init__.py,sha256=H9vuBwrmFO0-CqZhXUrKF-uQL6igCqMlqrT1X_ELaAI,754
|
60
62
|
optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=NiSm7bHs4SReHDUr53BBWSX0Y8bkKOeUSpsBDrp8YDw,6628
|
61
63
|
optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=pDogsdpJKKB5rqnVFrRjwfhUvOSV-jZ3oARMsqSvOOQ,665
|
62
|
-
optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=
|
63
|
-
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=
|
64
|
+
optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=m93-qKN7NMw3i0XDmFmttmRIRK4np_fWtLFlBb2RFgU,41351
|
65
|
+
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=uGdPGcFrWm2gAwFLjfBiALwFsl49VGCReVi4NUfOPxM,38898
|
64
66
|
optimum/rbln/transformers/models/dpt/__init__.py,sha256=gP1tkR3XMNlHq1GT87ugIVvb2o_1eAUg1JaniXjy1Lw,651
|
65
67
|
optimum/rbln/transformers/models/dpt/modeling_dpt.py,sha256=ZsS2SOiqcA4azULB-WFEMQZbgIoOyVUKqVKqrw_tWzA,3430
|
66
68
|
optimum/rbln/transformers/models/exaone/__init__.py,sha256=zYH_5tVa8-juEdsOIky7I33WSC3Zuhoq1upI0OHYeVw,859
|
@@ -90,10 +92,10 @@ optimum/rbln/transformers/models/qwen2/__init__.py,sha256=RAMWc21W_2I6DH9xBjeNxP
|
|
90
92
|
optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=9-aFDvjMzPNUyGOz0qo33RE18bUFGYZ3Wt_68zb5uJY,1530
|
91
93
|
optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
|
92
94
|
optimum/rbln/transformers/models/seq2seq/__init__.py,sha256=EmEMV4rOYqKyruX85d0fR73-b8N6BSD6CPcbpYdBuVk,651
|
93
|
-
optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py,sha256=
|
95
|
+
optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py,sha256=9Pf9Y86ABDfhwIenlZqYfgqjbyFmtKBiPnbCD7zxw4M,18017
|
94
96
|
optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py,sha256=tvzacIZam1sIr_1BvvZ_fDr8u5dXAiYiynFdX9tArtY,18877
|
95
97
|
optimum/rbln/transformers/models/t5/__init__.py,sha256=1skR1RmnG62WTAP3-F5P1x-V_ReFhMyirH3u56vWwvc,675
|
96
|
-
optimum/rbln/transformers/models/t5/modeling_t5.py,sha256=
|
98
|
+
optimum/rbln/transformers/models/t5/modeling_t5.py,sha256=8PAhPlYT1dmpcWM7hUMmZV9lPd4d75CuMuFen1pzr3Q,8088
|
97
99
|
optimum/rbln/transformers/models/t5/t5_architecture.py,sha256=AArCQhZRETVM583wlIRzMFOSYq7t2nzxaAeyhZxyxKk,9508
|
98
100
|
optimum/rbln/transformers/models/wav2vec2/__init__.py,sha256=YpgA0K-vyg9veh0eL_jxauosbRpb_kpGKHvvQLBspKM,649
|
99
101
|
optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py,sha256=JYJmV52j6cBwim4RanVJryfKnV80V96ol0A-oR6o7cg,3856
|
@@ -108,13 +110,13 @@ optimum/rbln/transformers/utils/rbln_quantization.py,sha256=gwBVHf97sQgPNmGa0wq8
|
|
108
110
|
optimum/rbln/utils/__init__.py,sha256=ieDBT2VFTt2E0M4v_POLBpuGW9LxSydpb_DuPd6PQqc,712
|
109
111
|
optimum/rbln/utils/decorator_utils.py,sha256=xu-TrsNi33SRC2a7DBsyoo6-pEQxWKZPZSmM9QlDe2Y,3745
|
110
112
|
optimum/rbln/utils/hub.py,sha256=bNmOJGEO9Jfux4Cg8Xli-898I4mxk20KuwQOhP0Zs1U,4198
|
111
|
-
optimum/rbln/utils/import_utils.py,sha256=
|
113
|
+
optimum/rbln/utils/import_utils.py,sha256=uMldLJmDVMj5uHvxBfb96uV29bfGEDvlksLY26GOHAs,4389
|
112
114
|
optimum/rbln/utils/logging.py,sha256=VKKBmlQSdg6iZCGmAXaWYiW67K84jyp1QJhLQSSjPPE,3453
|
113
115
|
optimum/rbln/utils/model_utils.py,sha256=DfD_Z2qvZHqcddXqnzTM1AN8khanj3-DXK2lJvVxDvs,1278
|
114
116
|
optimum/rbln/utils/runtime_utils.py,sha256=5-DYniyP59nx-mrrbi7AqA77L85b4Cm5oLpaxidSyss,3699
|
115
117
|
optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
|
116
118
|
optimum/rbln/utils/submodule.py,sha256=oZoGrItB8WqY4i-K9WJPlLlcLohc1YGB9OHB8_XZw3A,4071
|
117
|
-
optimum_rbln-0.7.
|
118
|
-
optimum_rbln-0.7.
|
119
|
-
optimum_rbln-0.7.
|
120
|
-
optimum_rbln-0.7.
|
119
|
+
optimum_rbln-0.7.3.post1.dist-info/METADATA,sha256=dKER74SsqGQwVQgTXVM854y97xzhfRl5LKaGedd4IIw,5304
|
120
|
+
optimum_rbln-0.7.3.post1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
121
|
+
optimum_rbln-0.7.3.post1.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
122
|
+
optimum_rbln-0.7.3.post1.dist-info/RECORD,,
|
File without changes
|
File without changes
|