optimum-rbln 0.7.5rc0__py3-none-any.whl → 0.7.5rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +50 -6
- optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +53 -5
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +25 -3
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +25 -3
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +25 -3
- optimum/rbln/diffusers/models/transformers/prior_transformer.py +1 -2
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py +1 -2
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +1 -2
- optimum/rbln/modeling.py +53 -9
- optimum/rbln/modeling_base.py +22 -3
- optimum/rbln/transformers/modeling_generic.py +0 -19
- optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +3 -24
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +124 -33
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +5 -0
- optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +0 -3
- {optimum_rbln-0.7.5rc0.dist-info → optimum_rbln-0.7.5rc2.dist-info}/METADATA +1 -1
- {optimum_rbln-0.7.5rc0.dist-info → optimum_rbln-0.7.5rc2.dist-info}/RECORD +20 -20
- {optimum_rbln-0.7.5rc0.dist-info → optimum_rbln-0.7.5rc2.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.7.5rc0.dist-info → optimum_rbln-0.7.5rc2.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/__version__.py
CHANGED
@@ -17,5 +17,5 @@ __version__: str
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
18
18
|
version_tuple: VERSION_TUPLE
|
19
19
|
|
20
|
-
__version__ = version = '0.7.
|
21
|
-
__version_tuple__ = version_tuple = (0, 7, 5, '
|
20
|
+
__version__ = version = '0.7.5rc2'
|
21
|
+
__version_tuple__ = version_tuple = (0, 7, 5, 'rc2')
|
@@ -33,6 +33,8 @@ class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
|
|
33
33
|
batch_size: Optional[int] = None,
|
34
34
|
img_height: Optional[int] = None,
|
35
35
|
img_width: Optional[int] = None,
|
36
|
+
height: Optional[int] = None,
|
37
|
+
width: Optional[int] = None,
|
36
38
|
sample_size: Optional[Tuple[int, int]] = None,
|
37
39
|
image_size: Optional[Tuple[int, int]] = None,
|
38
40
|
guidance_scale: Optional[float] = None,
|
@@ -51,6 +53,8 @@ class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
|
|
51
53
|
batch_size (Optional[int]): Batch size for inference, applied to all submodules.
|
52
54
|
img_height (Optional[int]): Height of the generated images.
|
53
55
|
img_width (Optional[int]): Width of the generated images.
|
56
|
+
height (Optional[int]): Height of the generated images.
|
57
|
+
width (Optional[int]): Width of the generated images.
|
54
58
|
sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
|
55
59
|
image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
|
56
60
|
Cannot be used together with img_height/img_width.
|
@@ -65,11 +69,29 @@ class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
|
|
65
69
|
accommodate classifier-free guidance.
|
66
70
|
"""
|
67
71
|
super().__init__(**kwargs)
|
68
|
-
if image_size is not None and (img_height is not None or img_width is not None):
|
69
|
-
raise ValueError("image_size and img_height/img_width cannot both be provided")
|
70
72
|
|
71
|
-
|
73
|
+
# Initial check for image_size conflict remains as is
|
74
|
+
if image_size is not None and (
|
75
|
+
img_height is not None or img_width is not None or height is not None or width is not None
|
76
|
+
):
|
77
|
+
raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
|
78
|
+
|
79
|
+
# Prioritize height/width (HF-aligned)
|
80
|
+
if height is not None and width is not None:
|
81
|
+
if img_height is not None or img_width is not None:
|
82
|
+
# Raise error if both sets of arguments are provided
|
83
|
+
raise ValueError(
|
84
|
+
"Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
|
85
|
+
"Please use one set of arguments for image dimensions, preferring 'height'/'width'."
|
86
|
+
)
|
87
|
+
image_size = (height, width)
|
88
|
+
elif (height is not None and width is None) or (height is None and width is not None):
|
89
|
+
raise ValueError("Both height and width must be provided together if used")
|
90
|
+
# Fallback to img_height/img_width for backward compatibility
|
91
|
+
elif img_height is not None and img_width is not None:
|
72
92
|
image_size = (img_height, img_width)
|
93
|
+
elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
|
94
|
+
raise ValueError("Both img_height and img_width must be provided together if used")
|
73
95
|
|
74
96
|
self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
|
75
97
|
self.unet = self.init_submodule_config(
|
@@ -139,6 +161,8 @@ class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
|
|
139
161
|
batch_size: Optional[int] = None,
|
140
162
|
img_height: Optional[int] = None,
|
141
163
|
img_width: Optional[int] = None,
|
164
|
+
height: Optional[int] = None,
|
165
|
+
width: Optional[int] = None,
|
142
166
|
sample_size: Optional[Tuple[int, int]] = None,
|
143
167
|
image_size: Optional[Tuple[int, int]] = None,
|
144
168
|
guidance_scale: Optional[float] = None,
|
@@ -159,6 +183,8 @@ class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
|
|
159
183
|
batch_size (Optional[int]): Batch size for inference, applied to all submodules.
|
160
184
|
img_height (Optional[int]): Height of the generated images.
|
161
185
|
img_width (Optional[int]): Width of the generated images.
|
186
|
+
height (Optional[int]): Height of the generated images.
|
187
|
+
width (Optional[int]): Width of the generated images.
|
162
188
|
sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
|
163
189
|
image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
|
164
190
|
Cannot be used together with img_height/img_width.
|
@@ -173,11 +199,29 @@ class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
|
|
173
199
|
accommodate classifier-free guidance.
|
174
200
|
"""
|
175
201
|
super().__init__(**kwargs)
|
176
|
-
if image_size is not None and (img_height is not None or img_width is not None):
|
177
|
-
raise ValueError("image_size and img_height/img_width cannot both be provided")
|
178
202
|
|
179
|
-
|
203
|
+
# Initial check for image_size conflict remains as is
|
204
|
+
if image_size is not None and (
|
205
|
+
img_height is not None or img_width is not None or height is not None or width is not None
|
206
|
+
):
|
207
|
+
raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
|
208
|
+
|
209
|
+
# Prioritize height/width (HF-aligned)
|
210
|
+
if height is not None and width is not None:
|
211
|
+
if img_height is not None or img_width is not None:
|
212
|
+
# Raise error if both sets of arguments are provided
|
213
|
+
raise ValueError(
|
214
|
+
"Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
|
215
|
+
"Please use one set of arguments for image dimensions, preferring 'height'/'width'."
|
216
|
+
)
|
217
|
+
image_size = (height, width)
|
218
|
+
elif (height is not None and width is None) or (height is None and width is not None):
|
219
|
+
raise ValueError("Both height and width must be provided together if used")
|
220
|
+
# Fallback to img_height/img_width for backward compatibility
|
221
|
+
elif img_height is not None and img_width is not None:
|
180
222
|
image_size = (img_height, img_width)
|
223
|
+
elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
|
224
|
+
raise ValueError("Both img_height and img_width must be provided together if used")
|
181
225
|
|
182
226
|
self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
|
183
227
|
self.text_encoder_2 = self.init_submodule_config(
|
@@ -35,6 +35,8 @@ class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
|
|
35
35
|
image_size: Optional[Tuple[int, int]] = None,
|
36
36
|
img_height: Optional[int] = None,
|
37
37
|
img_width: Optional[int] = None,
|
38
|
+
height: Optional[int] = None,
|
39
|
+
width: Optional[int] = None,
|
38
40
|
**kwargs,
|
39
41
|
):
|
40
42
|
"""
|
@@ -50,6 +52,8 @@ class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
|
|
50
52
|
Cannot be used together with img_height/img_width.
|
51
53
|
img_height (Optional[int]): Height of the generated images.
|
52
54
|
img_width (Optional[int]): Width of the generated images.
|
55
|
+
height (Optional[int]): Height of the generated images.
|
56
|
+
width (Optional[int]): Width of the generated images.
|
53
57
|
**kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
54
58
|
|
55
59
|
Raises:
|
@@ -60,11 +64,29 @@ class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
|
|
60
64
|
accommodate classifier-free guidance.
|
61
65
|
"""
|
62
66
|
super().__init__(**kwargs)
|
63
|
-
if image_size is not None and (img_height is not None or img_width is not None):
|
64
|
-
raise ValueError("image_size and img_height/img_width cannot both be provided")
|
65
67
|
|
66
|
-
|
68
|
+
# Initial check for image_size conflict remains as is
|
69
|
+
if image_size is not None and (
|
70
|
+
img_height is not None or img_width is not None or height is not None or width is not None
|
71
|
+
):
|
72
|
+
raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
|
73
|
+
|
74
|
+
# Prioritize height/width (HF-aligned)
|
75
|
+
if height is not None and width is not None:
|
76
|
+
if img_height is not None or img_width is not None:
|
77
|
+
# Raise error if both sets of arguments are provided
|
78
|
+
raise ValueError(
|
79
|
+
"Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
|
80
|
+
"Please use one set of arguments for image dimensions, preferring 'height'/'width'."
|
81
|
+
)
|
82
|
+
image_size = (height, width)
|
83
|
+
elif (height is not None and width is None) or (height is None and width is not None):
|
84
|
+
raise ValueError("Both height and width must be provided together if used")
|
85
|
+
# Fallback to img_height/img_width for backward compatibility
|
86
|
+
elif img_height is not None and img_width is not None:
|
67
87
|
image_size = (img_height, img_width)
|
88
|
+
elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
|
89
|
+
raise ValueError("Both img_height and img_width must be provided together if used")
|
68
90
|
|
69
91
|
self.unet = self.init_submodule_config(RBLNUNet2DConditionModelConfig, unet, sample_size=sample_size)
|
70
92
|
self.movq = self.init_submodule_config(
|
@@ -186,6 +208,8 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
|
|
186
208
|
batch_size: Optional[int] = None,
|
187
209
|
img_height: Optional[int] = None,
|
188
210
|
img_width: Optional[int] = None,
|
211
|
+
height: Optional[int] = None,
|
212
|
+
width: Optional[int] = None,
|
189
213
|
guidance_scale: Optional[float] = None,
|
190
214
|
prior_prior: Optional[RBLNPriorTransformerConfig] = None,
|
191
215
|
prior_image_encoder: Optional[RBLNCLIPVisionModelWithProjectionConfig] = None,
|
@@ -212,6 +236,8 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
|
|
212
236
|
batch_size (Optional[int]): Batch size for inference, applied to all submodules.
|
213
237
|
img_height (Optional[int]): Height of the generated images.
|
214
238
|
img_width (Optional[int]): Width of the generated images.
|
239
|
+
height (Optional[int]): Height of the generated images.
|
240
|
+
width (Optional[int]): Width of the generated images.
|
215
241
|
guidance_scale (Optional[float]): Scale for classifier-free guidance.
|
216
242
|
prior_prior (Optional[RBLNPriorTransformerConfig]): Direct configuration for the prior transformer.
|
217
243
|
Used if prior_pipe is not provided.
|
@@ -226,6 +252,30 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
|
|
226
252
|
**kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
227
253
|
"""
|
228
254
|
super().__init__(**kwargs)
|
255
|
+
|
256
|
+
# Initial check for image_size conflict remains as is
|
257
|
+
if image_size is not None and (
|
258
|
+
img_height is not None or img_width is not None or height is not None or width is not None
|
259
|
+
):
|
260
|
+
raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
|
261
|
+
|
262
|
+
# Prioritize height/width (HF-aligned)
|
263
|
+
if height is not None and width is not None:
|
264
|
+
if img_height is not None or img_width is not None:
|
265
|
+
# Raise error if both sets of arguments are provided
|
266
|
+
raise ValueError(
|
267
|
+
"Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
|
268
|
+
"Please use one set of arguments for image dimensions, preferring 'height'/'width'."
|
269
|
+
)
|
270
|
+
image_size = (height, width)
|
271
|
+
elif (height is not None and width is None) or (height is None and width is not None):
|
272
|
+
raise ValueError("Both height and width must be provided together if used")
|
273
|
+
# Fallback to img_height/img_width for backward compatibility
|
274
|
+
elif img_height is not None and img_width is not None:
|
275
|
+
image_size = (img_height, img_width)
|
276
|
+
elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
|
277
|
+
raise ValueError("Both img_height and img_width must be provided together if used")
|
278
|
+
|
229
279
|
self.prior_pipe = self.init_submodule_config(
|
230
280
|
RBLNKandinskyV22PriorPipelineConfig,
|
231
281
|
prior_pipe,
|
@@ -243,8 +293,6 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
|
|
243
293
|
batch_size=batch_size,
|
244
294
|
sample_size=sample_size,
|
245
295
|
image_size=image_size,
|
246
|
-
img_height=img_height,
|
247
|
-
img_width=img_width,
|
248
296
|
guidance_scale=guidance_scale,
|
249
297
|
)
|
250
298
|
|
@@ -32,6 +32,8 @@ class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
|
|
32
32
|
batch_size: Optional[int] = None,
|
33
33
|
img_height: Optional[int] = None,
|
34
34
|
img_width: Optional[int] = None,
|
35
|
+
height: Optional[int] = None,
|
36
|
+
width: Optional[int] = None,
|
35
37
|
sample_size: Optional[Tuple[int, int]] = None,
|
36
38
|
image_size: Optional[Tuple[int, int]] = None,
|
37
39
|
guidance_scale: Optional[float] = None,
|
@@ -48,6 +50,8 @@ class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
|
|
48
50
|
batch_size (Optional[int]): Batch size for inference, applied to all submodules.
|
49
51
|
img_height (Optional[int]): Height of the generated images.
|
50
52
|
img_width (Optional[int]): Width of the generated images.
|
53
|
+
height (Optional[int]): Height of the generated images.
|
54
|
+
width (Optional[int]): Width of the generated images.
|
51
55
|
sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
|
52
56
|
image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
|
53
57
|
Cannot be used together with img_height/img_width.
|
@@ -62,11 +66,29 @@ class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
|
|
62
66
|
accommodate classifier-free guidance.
|
63
67
|
"""
|
64
68
|
super().__init__(**kwargs)
|
65
|
-
if image_size is not None and (img_height is not None or img_width is not None):
|
66
|
-
raise ValueError("image_size and img_height/img_width cannot both be provided")
|
67
69
|
|
68
|
-
|
70
|
+
# Initial check for image_size conflict remains as is
|
71
|
+
if image_size is not None and (
|
72
|
+
img_height is not None or img_width is not None or height is not None or width is not None
|
73
|
+
):
|
74
|
+
raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
|
75
|
+
|
76
|
+
# Prioritize height/width (HF-aligned)
|
77
|
+
if height is not None and width is not None:
|
78
|
+
if img_height is not None or img_width is not None:
|
79
|
+
# Raise error if both sets of arguments are provided
|
80
|
+
raise ValueError(
|
81
|
+
"Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
|
82
|
+
"Please use one set of arguments for image dimensions, preferring 'height'/'width'."
|
83
|
+
)
|
84
|
+
image_size = (height, width)
|
85
|
+
elif (height is not None and width is None) or (height is None and width is not None):
|
86
|
+
raise ValueError("Both height and width must be provided together if used")
|
87
|
+
# Fallback to img_height/img_width for backward compatibility
|
88
|
+
elif img_height is not None and img_width is not None:
|
69
89
|
image_size = (img_height, img_width)
|
90
|
+
elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
|
91
|
+
raise ValueError("Both img_height and img_width must be provided together if used")
|
70
92
|
|
71
93
|
self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
|
72
94
|
self.unet = self.init_submodule_config(
|
@@ -37,6 +37,8 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
|
|
37
37
|
batch_size: Optional[int] = None,
|
38
38
|
img_height: Optional[int] = None,
|
39
39
|
img_width: Optional[int] = None,
|
40
|
+
height: Optional[int] = None,
|
41
|
+
width: Optional[int] = None,
|
40
42
|
guidance_scale: Optional[float] = None,
|
41
43
|
**kwargs,
|
42
44
|
):
|
@@ -59,6 +61,8 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
|
|
59
61
|
batch_size (Optional[int]): Batch size for inference, applied to all submodules.
|
60
62
|
img_height (Optional[int]): Height of the generated images.
|
61
63
|
img_width (Optional[int]): Width of the generated images.
|
64
|
+
height (Optional[int]): Height of the generated images.
|
65
|
+
width (Optional[int]): Width of the generated images.
|
62
66
|
guidance_scale (Optional[float]): Scale for classifier-free guidance.
|
63
67
|
**kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
64
68
|
|
@@ -70,11 +74,29 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
|
|
70
74
|
accommodate classifier-free guidance.
|
71
75
|
"""
|
72
76
|
super().__init__(**kwargs)
|
73
|
-
if image_size is not None and (img_height is not None or img_width is not None):
|
74
|
-
raise ValueError("image_size and img_height/img_width cannot both be provided")
|
75
77
|
|
76
|
-
|
78
|
+
# Initial check for image_size conflict remains as is
|
79
|
+
if image_size is not None and (
|
80
|
+
img_height is not None or img_width is not None or height is not None or width is not None
|
81
|
+
):
|
82
|
+
raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
|
83
|
+
|
84
|
+
# Prioritize height/width (HF-aligned)
|
85
|
+
if height is not None and width is not None:
|
86
|
+
if img_height is not None or img_width is not None:
|
87
|
+
# Raise error if both sets of arguments are provided
|
88
|
+
raise ValueError(
|
89
|
+
"Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
|
90
|
+
"Please use one set of arguments for image dimensions, preferring 'height'/'width'."
|
91
|
+
)
|
92
|
+
image_size = (height, width)
|
93
|
+
elif (height is not None and width is None) or (height is None and width is not None):
|
94
|
+
raise ValueError("Both height and width must be provided together if used")
|
95
|
+
# Fallback to img_height/img_width for backward compatibility
|
96
|
+
elif img_height is not None and img_width is not None:
|
77
97
|
image_size = (img_height, img_width)
|
98
|
+
elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
|
99
|
+
raise ValueError("Both img_height and img_width must be provided together if used")
|
78
100
|
|
79
101
|
max_seq_len = max_seq_len or 256
|
80
102
|
|
@@ -33,6 +33,8 @@ class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
|
|
33
33
|
batch_size: Optional[int] = None,
|
34
34
|
img_height: Optional[int] = None,
|
35
35
|
img_width: Optional[int] = None,
|
36
|
+
height: Optional[int] = None,
|
37
|
+
width: Optional[int] = None,
|
36
38
|
sample_size: Optional[Tuple[int, int]] = None,
|
37
39
|
image_size: Optional[Tuple[int, int]] = None,
|
38
40
|
guidance_scale: Optional[float] = None,
|
@@ -51,6 +53,8 @@ class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
|
|
51
53
|
batch_size (Optional[int]): Batch size for inference, applied to all submodules.
|
52
54
|
img_height (Optional[int]): Height of the generated images.
|
53
55
|
img_width (Optional[int]): Width of the generated images.
|
56
|
+
height (Optional[int]): Height of the generated images.
|
57
|
+
width (Optional[int]): Width of the generated images.
|
54
58
|
sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
|
55
59
|
image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
|
56
60
|
Cannot be used together with img_height/img_width.
|
@@ -65,11 +69,29 @@ class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
|
|
65
69
|
accommodate classifier-free guidance.
|
66
70
|
"""
|
67
71
|
super().__init__(**kwargs)
|
68
|
-
if image_size is not None and (img_height is not None or img_width is not None):
|
69
|
-
raise ValueError("image_size and img_height/img_width cannot both be provided")
|
70
72
|
|
71
|
-
|
73
|
+
# Initial check for image_size conflict remains as is
|
74
|
+
if image_size is not None and (
|
75
|
+
img_height is not None or img_width is not None or height is not None or width is not None
|
76
|
+
):
|
77
|
+
raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
|
78
|
+
|
79
|
+
# Prioritize height/width (HF-aligned)
|
80
|
+
if height is not None and width is not None:
|
81
|
+
if img_height is not None or img_width is not None:
|
82
|
+
# Raise error if both sets of arguments are provided
|
83
|
+
raise ValueError(
|
84
|
+
"Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
|
85
|
+
"Please use one set of arguments for image dimensions, preferring 'height'/'width'."
|
86
|
+
)
|
87
|
+
image_size = (height, width)
|
88
|
+
elif (height is not None and width is None) or (height is None and width is not None):
|
89
|
+
raise ValueError("Both height and width must be provided together if used")
|
90
|
+
# Fallback to img_height/img_width for backward compatibility
|
91
|
+
elif img_height is not None and img_width is not None:
|
72
92
|
image_size = (img_height, img_width)
|
93
|
+
elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
|
94
|
+
raise ValueError("Both img_height and img_width must be provided together if used")
|
73
95
|
|
74
96
|
self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
|
75
97
|
self.text_encoder_2 = self.init_submodule_config(
|
@@ -58,8 +58,7 @@ class _PriorTransformer(torch.nn.Module):
|
|
58
58
|
class RBLNPriorTransformer(RBLNModel):
|
59
59
|
hf_library_name = "diffusers"
|
60
60
|
auto_model_class = PriorTransformer
|
61
|
-
|
62
|
-
output_key = "predicted_image_embedding"
|
61
|
+
_output_class = PriorTransformerOutput
|
63
62
|
|
64
63
|
def __post_init__(self, **kwargs):
|
65
64
|
super().__post_init__(**kwargs)
|
@@ -61,8 +61,7 @@ class SD3Transformer2DModelWrapper(torch.nn.Module):
|
|
61
61
|
class RBLNSD3Transformer2DModel(RBLNModel):
|
62
62
|
hf_library_name = "diffusers"
|
63
63
|
auto_model_class = SD3Transformer2DModel
|
64
|
-
|
65
|
-
output_key = "sample"
|
64
|
+
_output_class = Transformer2DModelOutput
|
66
65
|
|
67
66
|
def __post_init__(self, **kwargs):
|
68
67
|
super().__post_init__(**kwargs)
|
@@ -143,8 +143,7 @@ class RBLNUNet2DConditionModel(RBLNModel):
|
|
143
143
|
hf_library_name = "diffusers"
|
144
144
|
auto_model_class = UNet2DConditionModel
|
145
145
|
_rbln_config_class = RBLNUNet2DConditionModelConfig
|
146
|
-
|
147
|
-
output_key = "sample"
|
146
|
+
_output_class = UNet2DConditionOutput
|
148
147
|
|
149
148
|
def __post_init__(self, **kwargs):
|
150
149
|
super().__post_init__(**kwargs)
|
optimum/rbln/modeling.py
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
|
15
15
|
from pathlib import Path
|
16
16
|
from tempfile import TemporaryDirectory
|
17
|
-
from typing import TYPE_CHECKING, Dict, List, Optional, Union
|
17
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Union, get_args, get_origin, get_type_hints
|
18
18
|
|
19
19
|
import rebel
|
20
20
|
import torch
|
@@ -49,8 +49,7 @@ class RBLNModel(RBLNBaseModel):
|
|
49
49
|
```
|
50
50
|
"""
|
51
51
|
|
52
|
-
|
53
|
-
output_key = "last_hidden_state"
|
52
|
+
_output_class = None
|
54
53
|
|
55
54
|
@classmethod
|
56
55
|
def update_kwargs(cls, kwargs):
|
@@ -245,16 +244,61 @@ class RBLNModel(RBLNBaseModel):
|
|
245
244
|
# Format output according to task requirements
|
246
245
|
return self._prepare_output(output, return_dict)
|
247
246
|
|
247
|
+
@classmethod
|
248
|
+
def get_hf_output_class(cls):
|
249
|
+
"""
|
250
|
+
Dynamically gets the output class from the corresponding HuggingFace model class.
|
251
|
+
|
252
|
+
Returns:
|
253
|
+
type: The appropriate output class from transformers or diffusers
|
254
|
+
"""
|
255
|
+
if cls._output_class:
|
256
|
+
return cls._output_class
|
257
|
+
|
258
|
+
hf_class = cls.get_hf_class()
|
259
|
+
if hf_class is None:
|
260
|
+
raise ValueError(f"No HuggingFace model class found for {cls.__name__}")
|
261
|
+
|
262
|
+
hints = get_type_hints(hf_class.forward) if hasattr(hf_class, "forward") else {}
|
263
|
+
ret = hints.get("return")
|
264
|
+
|
265
|
+
if ret is not None:
|
266
|
+
candidates = get_args(ret) if get_origin(ret) is Union else (ret,)
|
267
|
+
|
268
|
+
for t in candidates:
|
269
|
+
if t is type(None): # Skip NoneType in Union
|
270
|
+
continue
|
271
|
+
mod = getattr(t, "__module__", "")
|
272
|
+
if "transformers" in mod or "diffusers" in mod:
|
273
|
+
cls._output_class = t
|
274
|
+
return t
|
275
|
+
|
276
|
+
# Fallback to BaseModelOutput
|
277
|
+
cls._output_class = BaseModelOutput
|
278
|
+
return BaseModelOutput
|
279
|
+
|
248
280
|
def _prepare_output(self, output, return_dict):
|
249
281
|
"""
|
250
282
|
Prepare model output based on return_dict flag.
|
251
283
|
This method can be overridden by subclasses to provide task-specific output handling.
|
252
284
|
"""
|
285
|
+
tuple_output = (output,) if not isinstance(output, (tuple, list)) else tuple(output)
|
253
286
|
if not return_dict:
|
254
|
-
return
|
287
|
+
return tuple_output
|
255
288
|
else:
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
289
|
+
output_class = self.get_hf_output_class()
|
290
|
+
if hasattr(output_class, "loss"):
|
291
|
+
tuple_output = (None,) + tuple_output
|
292
|
+
|
293
|
+
# Truncate if we have too many outputs, otherwise use as is
|
294
|
+
if hasattr(output_class, "__annotations__"):
|
295
|
+
num_fields = len(output_class.__annotations__)
|
296
|
+
if len(tuple_output) > num_fields:
|
297
|
+
tuple_output = tuple_output[:num_fields]
|
298
|
+
logger.warning(
|
299
|
+
f"Truncating output to {num_fields} fields for {output_class.__name__}. "
|
300
|
+
f"Expected {num_fields} fields, but got {len(tuple_output)} fields."
|
301
|
+
"This is unexpected. Please report this issue to the developers."
|
302
|
+
)
|
303
|
+
|
304
|
+
return output_class(*tuple_output)
|
optimum/rbln/modeling_base.py
CHANGED
@@ -178,9 +178,27 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
178
178
|
return str(model_path)
|
179
179
|
|
180
180
|
@classmethod
|
181
|
-
def _load_compiled_models(cls, model_path: str):
|
181
|
+
def _load_compiled_models(cls, model_path: str, expected_compiled_model_names: List[str]):
|
182
182
|
compiled_models = Path(model_path).glob("*.rbln")
|
183
|
-
|
183
|
+
expected_compiled_models = [
|
184
|
+
Path(model_path) / f"{compiled_model_name}.rbln" for compiled_model_name in expected_compiled_model_names
|
185
|
+
]
|
186
|
+
unexpected_compiled_models = [cm for cm in compiled_models if cm not in expected_compiled_models]
|
187
|
+
if unexpected_compiled_models:
|
188
|
+
# TODO(jongho): fix after May release. raise error if unexpected compiled models are found
|
189
|
+
logger.warning(
|
190
|
+
f"Unexpected compiled models found: {[cm.name for cm in unexpected_compiled_models]}. "
|
191
|
+
f"Please check the model path: {model_path}"
|
192
|
+
)
|
193
|
+
|
194
|
+
rbln_compiled_models = {}
|
195
|
+
for compiled_model in expected_compiled_models:
|
196
|
+
if not compiled_model.exists():
|
197
|
+
raise FileNotFoundError(
|
198
|
+
f"Expected RBLN compiled model '{compiled_model.name}' not found at '{model_path}'. "
|
199
|
+
"Please ensure all models specified in `rbln_config` are present."
|
200
|
+
)
|
201
|
+
rbln_compiled_models[compiled_model.stem] = rebel.RBLNCompiledModel(compiled_model)
|
184
202
|
return rbln_compiled_models
|
185
203
|
|
186
204
|
@classmethod
|
@@ -271,7 +289,8 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
271
289
|
)
|
272
290
|
config = PretrainedConfig(**config)
|
273
291
|
|
274
|
-
|
292
|
+
compiled_model_names = [cfg.compiled_model_name for cfg in rbln_config.compile_cfgs]
|
293
|
+
rbln_compiled_models = cls._load_compiled_models(model_path_subfolder, compiled_model_names)
|
275
294
|
|
276
295
|
if subfolder != "":
|
277
296
|
model_save_dir = Path(model_path_subfolder).absolute().parent
|
@@ -36,11 +36,7 @@ from transformers import (
|
|
36
36
|
)
|
37
37
|
from transformers.modeling_outputs import (
|
38
38
|
BaseModelOutput,
|
39
|
-
DepthEstimatorOutput,
|
40
|
-
ImageClassifierOutput,
|
41
|
-
MaskedLMOutput,
|
42
39
|
QuestionAnsweringModelOutput,
|
43
|
-
SequenceClassifierOutput,
|
44
40
|
)
|
45
41
|
|
46
42
|
from ..configuration_utils import RBLNCompileConfig
|
@@ -63,8 +59,6 @@ class _RBLNTransformerEncoder(RBLNModel):
|
|
63
59
|
auto_model_class = AutoModel
|
64
60
|
rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
|
65
61
|
rbln_dtype = "int64"
|
66
|
-
output_class = BaseModelOutput
|
67
|
-
output_key = "last_hidden_state"
|
68
62
|
|
69
63
|
@classmethod
|
70
64
|
def _update_rbln_config(
|
@@ -149,7 +143,6 @@ class _RBLNImageModel(RBLNModel):
|
|
149
143
|
auto_model_class = AutoModel
|
150
144
|
main_input_name = "pixel_values"
|
151
145
|
output_class = BaseModelOutput
|
152
|
-
output_key = "last_hidden_state"
|
153
146
|
|
154
147
|
@classmethod
|
155
148
|
def _update_rbln_config(
|
@@ -223,15 +216,11 @@ class RBLNModelForQuestionAnswering(_RBLNTransformerEncoder):
|
|
223
216
|
class RBLNModelForSequenceClassification(_RBLNTransformerEncoder):
|
224
217
|
auto_model_class = AutoModelForSequenceClassification
|
225
218
|
rbln_model_input_names = ["input_ids", "attention_mask"]
|
226
|
-
output_class = SequenceClassifierOutput
|
227
|
-
output_key = "logits"
|
228
219
|
|
229
220
|
|
230
221
|
class RBLNModelForMaskedLM(_RBLNTransformerEncoder):
|
231
222
|
auto_model_class = AutoModelForMaskedLM
|
232
223
|
rbln_model_input_names = ["input_ids", "attention_mask"]
|
233
|
-
output_class = MaskedLMOutput
|
234
|
-
output_key = "logits"
|
235
224
|
|
236
225
|
|
237
226
|
class RBLNModelForTextEncoding(_RBLNTransformerEncoder):
|
@@ -243,20 +232,14 @@ class RBLNTransformerEncoderForFeatureExtraction(_RBLNTransformerEncoder):
|
|
243
232
|
# TODO: RBLNModel is also for feature extraction.
|
244
233
|
auto_model_class = AutoModel
|
245
234
|
rbln_model_input_names = ["input_ids", "attention_mask"]
|
246
|
-
output_class = BaseModelOutput
|
247
|
-
output_key = "last_hidden_state"
|
248
235
|
|
249
236
|
|
250
237
|
class RBLNModelForImageClassification(_RBLNImageModel):
|
251
238
|
auto_model_class = AutoModelForImageClassification
|
252
|
-
output_class = ImageClassifierOutput
|
253
|
-
output_key = "logits"
|
254
239
|
|
255
240
|
|
256
241
|
class RBLNModelForDepthEstimation(_RBLNImageModel):
|
257
242
|
auto_model_class = AutoModelForDepthEstimation
|
258
|
-
output_class = DepthEstimatorOutput
|
259
|
-
output_key = "predicted_depth"
|
260
243
|
|
261
244
|
|
262
245
|
class RBLNModelForAudioClassification(RBLNModel):
|
@@ -273,8 +256,6 @@ class RBLNModelForAudioClassification(RBLNModel):
|
|
273
256
|
"""
|
274
257
|
|
275
258
|
auto_model_class = AutoModelForAudioClassification
|
276
|
-
output_class = SequenceClassifierOutput
|
277
|
-
output_key = "logits"
|
278
259
|
|
279
260
|
@classmethod
|
280
261
|
def _update_rbln_config(
|
@@ -18,29 +18,12 @@ from ....configuration_utils import RBLNModelConfig
|
|
18
18
|
|
19
19
|
|
20
20
|
class RBLNBlip2VisionModelConfig(RBLNModelConfig):
|
21
|
-
|
22
|
-
self,
|
23
|
-
batch_size: Optional[int] = None,
|
24
|
-
**kwargs,
|
25
|
-
):
|
26
|
-
"""
|
27
|
-
Args:
|
28
|
-
batch_size (Optional[int]): The batch size for inference. Defaults to 1.
|
29
|
-
**kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
30
|
-
|
31
|
-
Raises:
|
32
|
-
ValueError: If batch_size is not a positive integer.
|
33
|
-
"""
|
34
|
-
super().__init__(**kwargs)
|
35
|
-
self.batch_size = batch_size or 1
|
36
|
-
if not isinstance(self.batch_size, int) or self.batch_size < 0:
|
37
|
-
raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
|
21
|
+
pass
|
38
22
|
|
39
23
|
|
40
24
|
class RBLNBlip2QFormerModelConfig(RBLNModelConfig):
|
41
25
|
def __init__(
|
42
26
|
self,
|
43
|
-
batch_size: Optional[int] = None,
|
44
27
|
num_query_tokens: Optional[int] = None,
|
45
28
|
image_text_hidden_size: Optional[int] = None,
|
46
29
|
**kwargs,
|
@@ -54,10 +37,6 @@ class RBLNBlip2QFormerModelConfig(RBLNModelConfig):
|
|
54
37
|
ValueError: If batch_size is not a positive integer.
|
55
38
|
"""
|
56
39
|
super().__init__(**kwargs)
|
57
|
-
self.batch_size = batch_size or 1
|
58
|
-
if not isinstance(self.batch_size, int) or self.batch_size < 0:
|
59
|
-
raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
|
60
|
-
|
61
40
|
self.num_query_tokens = num_query_tokens
|
62
41
|
self.image_text_hidden_size = image_text_hidden_size
|
63
42
|
|
@@ -88,6 +67,6 @@ class RBLNBlip2ForConditionalGenerationConfig(RBLNModelConfig):
|
|
88
67
|
if not isinstance(self.batch_size, int) or self.batch_size < 0:
|
89
68
|
raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
|
90
69
|
|
91
|
-
self.vision_model = self.init_submodule_config(RBLNBlip2VisionModelConfig, vision_model
|
70
|
+
self.vision_model = self.init_submodule_config(RBLNBlip2VisionModelConfig, vision_model)
|
92
71
|
self.language_model = language_model
|
93
|
-
self.qformer = self.init_submodule_config(RBLNBlip2QFormerModelConfig, qformer
|
72
|
+
self.qformer = self.init_submodule_config(RBLNBlip2QFormerModelConfig, qformer)
|
@@ -42,6 +42,28 @@ if TYPE_CHECKING:
|
|
42
42
|
)
|
43
43
|
|
44
44
|
|
45
|
+
class LoopProjector:
|
46
|
+
def __init__(self, language_projection) -> None:
|
47
|
+
self.language_projection = language_projection
|
48
|
+
|
49
|
+
def forward(self, *args, **kwargs):
|
50
|
+
query_output = args[0]
|
51
|
+
|
52
|
+
batch_size = query_output.shape[0]
|
53
|
+
outputs = []
|
54
|
+
for i in range(batch_size):
|
55
|
+
outputs.append(self.language_projection(query_output[i : i + 1]))
|
56
|
+
|
57
|
+
outputs = torch.cat(outputs, dim=0)
|
58
|
+
return outputs
|
59
|
+
|
60
|
+
def __call__(self, *args: Any, **kwds: Any) -> Any:
|
61
|
+
return self.forward(*args, **kwds)
|
62
|
+
|
63
|
+
def __repr__(self) -> str:
|
64
|
+
return repr(self.language_projection)
|
65
|
+
|
66
|
+
|
45
67
|
class RBLNBlip2VisionModel(RBLNModel):
|
46
68
|
def get_input_embeddings(self):
|
47
69
|
return self.embeddings
|
@@ -71,7 +93,8 @@ class RBLNBlip2VisionModel(RBLNModel):
|
|
71
93
|
(
|
72
94
|
"pixel_values",
|
73
95
|
[
|
74
|
-
|
96
|
+
# support for vllm CB (prefill)
|
97
|
+
1,
|
75
98
|
model_config.num_channels,
|
76
99
|
model_config.image_size,
|
77
100
|
model_config.image_size,
|
@@ -86,27 +109,30 @@ class RBLNBlip2VisionModel(RBLNModel):
|
|
86
109
|
|
87
110
|
def forward(
|
88
111
|
self,
|
89
|
-
pixel_values
|
112
|
+
pixel_values,
|
90
113
|
output_attentions: Optional[bool] = None,
|
91
114
|
output_hidden_states: Optional[bool] = None,
|
92
115
|
return_dict: Optional[bool] = None,
|
93
116
|
interpolate_pos_encoding: bool = False,
|
94
117
|
) -> Union[Tuple, BaseModelOutputWithPooling]:
|
95
|
-
|
96
|
-
|
118
|
+
batch_size = pixel_values.shape[0]
|
119
|
+
outputs = []
|
120
|
+
for i in range(batch_size):
|
121
|
+
outputs.append(self.model[0](pixel_values[i : i + 1]))
|
122
|
+
|
123
|
+
last_hidden_state = [output[0] for output in outputs]
|
124
|
+
pooler_output = [output[1] for output in outputs]
|
125
|
+
|
126
|
+
last_hidden_state = torch.cat(last_hidden_state, dim=0)
|
127
|
+
pooler_output = torch.cat(pooler_output, dim=0)
|
97
128
|
|
98
|
-
def _prepare_output(self, output, return_dict):
|
99
|
-
"""
|
100
|
-
Prepare model output based on return_dict flag.
|
101
|
-
This method can be overridden by subclasses to provide task-specific output handling.
|
102
|
-
"""
|
103
129
|
if not return_dict:
|
104
|
-
return (
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
130
|
+
return (last_hidden_state, pooler_output)
|
131
|
+
|
132
|
+
return BaseModelOutputWithPooling(
|
133
|
+
last_hidden_state=last_hidden_state,
|
134
|
+
pooler_output=pooler_output,
|
135
|
+
)
|
110
136
|
|
111
137
|
|
112
138
|
class RBLNBlip2QFormerModel(RBLNModel):
|
@@ -158,7 +184,7 @@ class RBLNBlip2QFormerModel(RBLNModel):
|
|
158
184
|
(
|
159
185
|
"query_embeds",
|
160
186
|
[
|
161
|
-
|
187
|
+
1,
|
162
188
|
rbln_config.num_query_tokens,
|
163
189
|
model_config.hidden_size,
|
164
190
|
],
|
@@ -167,7 +193,7 @@ class RBLNBlip2QFormerModel(RBLNModel):
|
|
167
193
|
(
|
168
194
|
"encoder_hidden_states",
|
169
195
|
[
|
170
|
-
|
196
|
+
1,
|
171
197
|
# image_text_hidden_size + cls token
|
172
198
|
rbln_config.image_text_hidden_size + 1,
|
173
199
|
model_config.encoder_hidden_size,
|
@@ -177,7 +203,7 @@ class RBLNBlip2QFormerModel(RBLNModel):
|
|
177
203
|
(
|
178
204
|
"encoder_attention_mask",
|
179
205
|
# image_text_hidden_size + cls token
|
180
|
-
[
|
206
|
+
[1, rbln_config.image_text_hidden_size + 1],
|
181
207
|
"int64",
|
182
208
|
),
|
183
209
|
]
|
@@ -200,21 +226,28 @@ class RBLNBlip2QFormerModel(RBLNModel):
|
|
200
226
|
output_hidden_states: Optional[bool] = None,
|
201
227
|
return_dict: Optional[bool] = None,
|
202
228
|
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
|
203
|
-
|
204
|
-
|
229
|
+
batch_size = query_embeds.shape[0]
|
230
|
+
outputs = []
|
231
|
+
for i in range(batch_size):
|
232
|
+
outputs.append(
|
233
|
+
self.model[0](
|
234
|
+
query_embeds[i : i + 1], encoder_hidden_states[i : i + 1], encoder_attention_mask[i : i + 1]
|
235
|
+
)
|
236
|
+
)
|
237
|
+
|
238
|
+
sequence_output = [output[0] for output in outputs]
|
239
|
+
pooled_output = [output[1] for output in outputs]
|
240
|
+
|
241
|
+
sequence_output = torch.cat(sequence_output, dim=0)
|
242
|
+
pooled_output = torch.cat(pooled_output, dim=0)
|
205
243
|
|
206
|
-
def _prepare_output(self, output, return_dict):
|
207
|
-
"""
|
208
|
-
Prepare model output based on return_dict flag.
|
209
|
-
This method can be overridden by subclasses to provide task-specific output handling.
|
210
|
-
"""
|
211
244
|
if not return_dict:
|
212
|
-
return (
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
245
|
+
return (sequence_output, pooled_output)
|
246
|
+
|
247
|
+
return BaseModelOutputWithPoolingAndCrossAttentions(
|
248
|
+
last_hidden_state=sequence_output,
|
249
|
+
pooler_output=pooled_output,
|
250
|
+
)
|
218
251
|
|
219
252
|
|
220
253
|
class RBLNBlip2ForConditionalGeneration(RBLNModel):
|
@@ -254,7 +287,7 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel):
|
|
254
287
|
self.vision_model = self.rbln_submodules[0]
|
255
288
|
self.language_model = self.rbln_submodules[2]
|
256
289
|
self.qformer = self.rbln_submodules[1]
|
257
|
-
self.language_projection = self.model[0]
|
290
|
+
self.language_projection = LoopProjector(self.model[0])
|
258
291
|
|
259
292
|
artifacts = torch.load(self.model_save_dir / self.subfolder / "query_tokens.pth", weights_only=False)
|
260
293
|
self.query_tokens = artifacts["query_tokens"]
|
@@ -284,7 +317,7 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel):
|
|
284
317
|
(
|
285
318
|
"query_output",
|
286
319
|
[
|
287
|
-
|
320
|
+
1,
|
288
321
|
model_config.num_query_tokens,
|
289
322
|
model_config.qformer_config.hidden_size,
|
290
323
|
],
|
@@ -296,3 +329,61 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel):
|
|
296
329
|
rbln_config.set_compile_cfgs([rbln_compile_config])
|
297
330
|
|
298
331
|
return rbln_config
|
332
|
+
|
333
|
+
def _preprocess_prefill(
|
334
|
+
self,
|
335
|
+
pixel_values: torch.FloatTensor,
|
336
|
+
input_ids: torch.FloatTensor,
|
337
|
+
attention_mask: Optional[torch.LongTensor] = None,
|
338
|
+
return_dict: Optional[bool] = None,
|
339
|
+
interpolate_pos_encoding: bool = False,
|
340
|
+
**kwargs,
|
341
|
+
):
|
342
|
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
343
|
+
|
344
|
+
vision_outputs = self.vision_model(
|
345
|
+
pixel_values=pixel_values,
|
346
|
+
return_dict=return_dict,
|
347
|
+
interpolate_pos_encoding=interpolate_pos_encoding,
|
348
|
+
)
|
349
|
+
image_embeds = vision_outputs[0]
|
350
|
+
|
351
|
+
image_attention_mask = torch.ones(image_embeds.size()[:-1], dtype=torch.long, device=image_embeds.device)
|
352
|
+
|
353
|
+
query_tokens = self.query_tokens.expand(image_embeds.shape[0], -1, -1)
|
354
|
+
|
355
|
+
query_outputs = self.qformer(
|
356
|
+
query_embeds=query_tokens,
|
357
|
+
encoder_hidden_states=image_embeds,
|
358
|
+
encoder_attention_mask=image_attention_mask,
|
359
|
+
return_dict=return_dict,
|
360
|
+
)
|
361
|
+
query_output = query_outputs[0]
|
362
|
+
|
363
|
+
if query_output.dtype != image_embeds.dtype:
|
364
|
+
query_output = query_output.to(image_embeds.dtype)
|
365
|
+
|
366
|
+
language_model_inputs = self.language_projection(query_output)
|
367
|
+
language_model_attention_mask = torch.ones(
|
368
|
+
language_model_inputs.size()[:-1], dtype=torch.long, device=language_model_inputs.device
|
369
|
+
)
|
370
|
+
inputs_embeds = self.language_model.get_input_embeddings()(input_ids)
|
371
|
+
if attention_mask is None:
|
372
|
+
attention_mask = torch.ones_like(input_ids)
|
373
|
+
|
374
|
+
if getattr(self.config, "image_token_index", None) is not None:
|
375
|
+
special_image_mask = (input_ids == self.config.image_token_index).unsqueeze(-1).expand_as(inputs_embeds)
|
376
|
+
language_model_inputs = language_model_inputs.to(inputs_embeds.device, inputs_embeds.dtype)
|
377
|
+
inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, language_model_inputs)
|
378
|
+
else:
|
379
|
+
logger.warning_once(
|
380
|
+
"Expanding inputs for image tokens in BLIP-2 should be done in processing. "
|
381
|
+
"Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your BLIP-2 model. "
|
382
|
+
"Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
|
383
|
+
)
|
384
|
+
inputs_embeds = torch.cat([language_model_inputs, inputs_embeds.to(language_model_inputs.device)], dim=1)
|
385
|
+
attention_mask = torch.cat(
|
386
|
+
[language_model_attention_mask, attention_mask.to(language_model_attention_mask.device)], dim=1
|
387
|
+
)
|
388
|
+
|
389
|
+
return inputs_embeds
|
@@ -858,6 +858,11 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
|
858
858
|
max_seq_len=rbln_config.max_seq_len,
|
859
859
|
)
|
860
860
|
|
861
|
+
if rbln_config.attn_impl == "eager":
|
862
|
+
raise ValueError(
|
863
|
+
"Eager attention is not supported for Gemma3. Please use flash attention by setting `rbln_attn_impl='flash_attn'`. Stay tuned for future updates!"
|
864
|
+
)
|
865
|
+
|
861
866
|
validate_attention_method(
|
862
867
|
attn_impl=rbln_config.attn_impl,
|
863
868
|
kvcache_partition_len=rbln_config.kvcache_partition_len,
|
@@ -15,7 +15,6 @@
|
|
15
15
|
|
16
16
|
import torch
|
17
17
|
from transformers import AutoModelForMaskedLM, Wav2Vec2ForCTC
|
18
|
-
from transformers.modeling_outputs import CausalLMOutput
|
19
18
|
|
20
19
|
from ...modeling_generic import RBLNModelForMaskedLM
|
21
20
|
from .configuration_wav2vec import RBLNWav2Vec2ForCTCConfig
|
@@ -46,8 +45,6 @@ class RBLNWav2Vec2ForCTC(RBLNModelForMaskedLM):
|
|
46
45
|
main_input_name = "input_values"
|
47
46
|
auto_model_class = AutoModelForMaskedLM
|
48
47
|
rbln_dtype = "float32"
|
49
|
-
output_class = CausalLMOutput
|
50
|
-
output_key = "logits"
|
51
48
|
|
52
49
|
@classmethod
|
53
50
|
def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNWav2Vec2ForCTCConfig) -> torch.nn.Module:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: optimum-rbln
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.5rc2
|
4
4
|
Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
|
5
5
|
Project-URL: Homepage, https://rebellions.ai
|
6
6
|
Project-URL: Documentation, https://docs.rbln.ai
|
@@ -1,8 +1,8 @@
|
|
1
1
|
optimum/rbln/__init__.py,sha256=oAnsJSMrPYwBGEttUt3CMXTIESVNe15ftTWRTShwhZI,14386
|
2
|
-
optimum/rbln/__version__.py,sha256=
|
2
|
+
optimum/rbln/__version__.py,sha256=fpFaQLT4vGQYujVJTSb1WZo3X-GKEGeYInrc_bpJrpQ,521
|
3
3
|
optimum/rbln/configuration_utils.py,sha256=gvAjRFEGw5rnSoH0IoyuLrE4fkxtk3DN1pikqrN_Rpk,31277
|
4
|
-
optimum/rbln/modeling.py,sha256=
|
5
|
-
optimum/rbln/modeling_base.py,sha256=
|
4
|
+
optimum/rbln/modeling.py,sha256=BpydF-bLBF60NnRMbtZwn5odOUjU4Awu9azqGeSufTI,11462
|
5
|
+
optimum/rbln/modeling_base.py,sha256=HQgscr5jpUEtuXU1ACJHSLIntX-kq6Ef0SQ_W2-rp5A,25341
|
6
6
|
optimum/rbln/diffusers/__init__.py,sha256=XL6oKPHbPCV6IVCw3fu0-M9mD2KO_x6unx5kJdAtpVY,6180
|
7
7
|
optimum/rbln/diffusers/modeling_diffusers.py,sha256=bPyP5RMbOFLb2DfEAuLVp7hTuQWJvWid7El72wGmFrY,19535
|
8
8
|
optimum/rbln/diffusers/configurations/__init__.py,sha256=Sk_sQVTuTl01RVgYViWknQSLmulxKaISS0w-oPdNoBQ,1164
|
@@ -14,11 +14,11 @@ optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py,sh
|
|
14
14
|
optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py,sha256=c-1xAFgA1st8djLXkLeXtctcFp1MqZZYZp3Phqn1Wxo,3366
|
15
15
|
optimum/rbln/diffusers/configurations/models/configuration_vq_model.py,sha256=su4Ceok4Wx4m1hKp5YEM3zigrlTmj3NIs9X7aAKOeWg,2980
|
16
16
|
optimum/rbln/diffusers/configurations/pipelines/__init__.py,sha256=HJlu5lRZJWy4nYjBw3-ed93Pfb5QQmUbCJZKDW1bGH4,1160
|
17
|
-
optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py,sha256=
|
18
|
-
optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py,sha256=
|
19
|
-
optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py,sha256=
|
20
|
-
optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py,sha256=
|
21
|
-
optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py,sha256=
|
17
|
+
optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py,sha256=L5WeSCpYUdcTG4wBrMZIQIEUZV2Jxegdr53n8oSf6II,13748
|
18
|
+
optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py,sha256=JkdeFVU4RNiCJaSQUOJ3gWfKAyRhDRxMqEd68NJIij0,15675
|
19
|
+
optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py,sha256=C4PKcN6SZYmzTwyaIxXKXtltzUOWs7p1cwzudUwodY0,6344
|
20
|
+
optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py,sha256=lfyjjHKYHZ470tDAeNKRL2tJf_TpECzSGEGm5iqoZBo,7722
|
21
|
+
optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py,sha256=SHA1IV-oqdRpFM_ZLJ8GTmrrSdPXAUvLVtx5I3VQN7U,6880
|
22
22
|
optimum/rbln/diffusers/models/__init__.py,sha256=mkCvJyH1KcwrsUvYSq_bVC79oOfyqtBSFDyPS1_48wA,1478
|
23
23
|
optimum/rbln/diffusers/models/controlnet.py,sha256=kzDbUckGlioor8t0kKBvwi-hzNaG15XluEzTa7xZs1Q,10292
|
24
24
|
optimum/rbln/diffusers/models/autoencoders/__init__.py,sha256=dg17ZTUsiqTcbIaEE4fqew9uRbao0diQ21PXvRKIqKg,679
|
@@ -26,10 +26,10 @@ optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py,sha256=qjReFNmuQEjn
|
|
26
26
|
optimum/rbln/diffusers/models/autoencoders/vae.py,sha256=_fyFco2697uT1zo_P_fGML-_zqZw2sUQp3tRRjA5pg4,4172
|
27
27
|
optimum/rbln/diffusers/models/autoencoders/vq_model.py,sha256=DC8Nee8_BabGhagJgpCUDhA-oaTpZMg-lCVzXJ6dNEw,6134
|
28
28
|
optimum/rbln/diffusers/models/transformers/__init__.py,sha256=V8rSR7WzHs-i8Cwb_MNxhY2NFbwPgxu24vGtkwl-6tk,706
|
29
|
-
optimum/rbln/diffusers/models/transformers/prior_transformer.py,sha256=
|
30
|
-
optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=
|
29
|
+
optimum/rbln/diffusers/models/transformers/prior_transformer.py,sha256=XaIICLeMdGyqm9B3f2A3vqh1haJpqfT3GJ3ZM0DKcaY,4945
|
30
|
+
optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=H1dsDOnAK4Dp0ixCVIt_4_4KJ5ZcTygfG7sFFdpOvrI,6554
|
31
31
|
optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
|
32
|
-
optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=
|
32
|
+
optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=3dzqJQPiklkgoyxRHysOE7q9hrhaT4K0_SNiCflFvLg,15530
|
33
33
|
optimum/rbln/diffusers/pipelines/__init__.py,sha256=5KLZ5LrpMzBya2e_3_PvEoPwG24U8JMexfw_ygZREKc,3140
|
34
34
|
optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
|
35
35
|
optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=Cv__E0Boc6TSOIv8TdXVE821zIiPG4MVI_lnaGSqquk,4102
|
@@ -65,7 +65,7 @@ optimum/rbln/transformers/__init__.py,sha256=LW6abfb0W0jHziE8dIEDBeyb4Cj-aq8dUld
|
|
65
65
|
optimum/rbln/transformers/configuration_alias.py,sha256=qFVfg6ohsR7a6b-CBgxjBUPDrk9MyiJwtO8AQah_RTU,1505
|
66
66
|
optimum/rbln/transformers/configuration_generic.py,sha256=XIiZ1-5p1CMHhG7Sr2qR4SLYKcYw9aph7eGlga3Opx0,5056
|
67
67
|
optimum/rbln/transformers/modeling_alias.py,sha256=yx7FnZQWAnrWzivaO5hI7T6i-fyLzt2tMIXG2oDNbPo,1657
|
68
|
-
optimum/rbln/transformers/modeling_generic.py,sha256=
|
68
|
+
optimum/rbln/transformers/modeling_generic.py,sha256=L5ndJJzKhXa4de1YAA8uxNzMKWOHsAHPoJrANxWYWjE,12265
|
69
69
|
optimum/rbln/transformers/modeling_rope_utils.py,sha256=3zwkhYUyTZhxCJUSmwCc88iiY1TppRWEY9ShwUqNB2k,14293
|
70
70
|
optimum/rbln/transformers/models/__init__.py,sha256=qNh_d7bBKxhxBbUImXJ66n0Vo0NW1m7tMIU5M2ZxGmw,8510
|
71
71
|
optimum/rbln/transformers/models/auto/__init__.py,sha256=34Xghf1ogG4u-jhBMlj134nHdgnR3JEHSeZTPuy3MpY,1071
|
@@ -79,8 +79,8 @@ optimum/rbln/transformers/models/bert/__init__.py,sha256=86FuGRBLw315_Roa9D5OUx6
|
|
79
79
|
optimum/rbln/transformers/models/bert/configuration_bert.py,sha256=NIlBRn-zrnNirkEfJ4Uv2TZRIBL761PLJ9-cZaPyzpg,1017
|
80
80
|
optimum/rbln/transformers/models/bert/modeling_bert.py,sha256=XxsRhBhexZ2w3mRCvKl73pIyGdqcFR1RrOKG7h4EAyk,1223
|
81
81
|
optimum/rbln/transformers/models/blip_2/__init__.py,sha256=L01gPXcUCa8Vg-bcng20vZvBIN_jlqCzwUSFuq0QOag,855
|
82
|
-
optimum/rbln/transformers/models/blip_2/configuration_blip_2.py,sha256=
|
83
|
-
optimum/rbln/transformers/models/blip_2/modeling_blip_2.py,sha256=
|
82
|
+
optimum/rbln/transformers/models/blip_2/configuration_blip_2.py,sha256=bAr3tlW2upxdBiihR7wUJGRxpdtelxt9BAkL9UXLJGE,2746
|
83
|
+
optimum/rbln/transformers/models/blip_2/modeling_blip_2.py,sha256=8pjFNXYM2phJQBoTWw08hK_wi7APjjhFTJfBZ3cx_Xo,14301
|
84
84
|
optimum/rbln/transformers/models/clip/__init__.py,sha256=TLeXDqcFK6M6v9x7Xr64kBbqGu3hFHM7p754dQ8UVQc,938
|
85
85
|
optimum/rbln/transformers/models/clip/configuration_clip.py,sha256=wgfZeVvcVdSzrN9tcnt7DKJQ0NLR0REvW7MyUXyv2Bg,2976
|
86
86
|
optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=UslcDN6otyQ_psou7F_YcdK5vCImEtgIdcbwmexSfOM,7256
|
@@ -102,7 +102,7 @@ optimum/rbln/transformers/models/gemma/modeling_gemma.py,sha256=-U3w3cEOv3ps1S8a
|
|
102
102
|
optimum/rbln/transformers/models/gemma3/__init__.py,sha256=6rugk3615SEt4lh7gduo_J9VyGiSReuEIvL0Uno0eaI,790
|
103
103
|
optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=nndcYVwDYkOige_qO4td-YwLNtUz6aLiSQDIfPdGG9A,2840
|
104
104
|
optimum/rbln/transformers/models/gemma3/gemma3_architecture.py,sha256=Uer27wG06hgV1WNf92x1ZeUpl4Q0zskfCqzlLhtgtNU,17348
|
105
|
-
optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=
|
105
|
+
optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=mx3upghkboeyRGYxwPfA1fzRNPWj5MzX8bIy0kszHWY,45235
|
106
106
|
optimum/rbln/transformers/models/gpt2/__init__.py,sha256=socBMIBZSiLbrVN12rQ4nL9gFeT0axMgz6SWaCaD4Ac,704
|
107
107
|
optimum/rbln/transformers/models/gpt2/configuration_gpt2.py,sha256=vKvJD8P9Li4W9wdVoQcqMEr1MwEXojPBnF2NE85VXAo,772
|
108
108
|
optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=kf5jk7Djv9XXX3Q83oTosiMpt9g44TF_gCT-vMiWDJk,3097
|
@@ -158,7 +158,7 @@ optimum/rbln/transformers/models/time_series_transformers/modeling_time_series_t
|
|
158
158
|
optimum/rbln/transformers/models/time_series_transformers/time_series_transformers_architecture.py,sha256=XJDjQGbWXUq4ZimNojlcbm3mTDpxUMCl6tkFSzfYFl4,13769
|
159
159
|
optimum/rbln/transformers/models/wav2vec2/__init__.py,sha256=dzXqyf_uiI45hPJGbnF1v780Izi2TigsbAo3hxFmhy0,709
|
160
160
|
optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec.py,sha256=hXsN_pc_gb_xcQdXXnvpp-o0dk5lNepXnt9O5HB-3g4,771
|
161
|
-
optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py,sha256=
|
161
|
+
optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py,sha256=Lmm39NhvJIQtCkBa5BynkJYNqWOF7GaWsV5qYX-4L94,1943
|
162
162
|
optimum/rbln/transformers/models/whisper/__init__.py,sha256=ErquiUlYycSYPsDcq9IwwmbZXoYLn1MVZ8VikWY5gQo,792
|
163
163
|
optimum/rbln/transformers/models/whisper/configuration_whisper.py,sha256=-Su7pbkg3gkYTf-ECRJyxkpD3JtUJX4y5Mfml8tJJBI,2612
|
164
164
|
optimum/rbln/transformers/models/whisper/generation_whisper.py,sha256=GIHTca3b1VtW81kp7BzKQ7f77c2t9OsEsbZetripgDo,4582
|
@@ -178,7 +178,7 @@ optimum/rbln/utils/model_utils.py,sha256=V2kFpUe2aqVzLwbpztD8JOVFQqRHncvIWwJbgnU
|
|
178
178
|
optimum/rbln/utils/runtime_utils.py,sha256=LoKNK3AQNV_BSScstIZWjICkJf265MnUgy360BOocVI,5454
|
179
179
|
optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
|
180
180
|
optimum/rbln/utils/submodule.py,sha256=ZfI7e3YzbjbbBW4Yjfucj8NygEsukfIkaJi3PtwHrhc,5105
|
181
|
-
optimum_rbln-0.7.
|
182
|
-
optimum_rbln-0.7.
|
183
|
-
optimum_rbln-0.7.
|
184
|
-
optimum_rbln-0.7.
|
181
|
+
optimum_rbln-0.7.5rc2.dist-info/METADATA,sha256=WltiXJxC_INRxeoxhT4xyvLOO8FM6cqJOqbpnTQ6xhg,5300
|
182
|
+
optimum_rbln-0.7.5rc2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
183
|
+
optimum_rbln-0.7.5rc2.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
184
|
+
optimum_rbln-0.7.5rc2.dist-info/RECORD,,
|
File without changes
|
File without changes
|