optimum-rbln 0.7.5rc1__py3-none-any.whl → 0.7.5rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.7.5rc1'
21
- __version_tuple__ = version_tuple = (0, 7, 5, 'rc1')
20
+ __version__ = version = '0.7.5rc2'
21
+ __version_tuple__ = version_tuple = (0, 7, 5, 'rc2')
@@ -33,6 +33,8 @@ class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
33
33
  batch_size: Optional[int] = None,
34
34
  img_height: Optional[int] = None,
35
35
  img_width: Optional[int] = None,
36
+ height: Optional[int] = None,
37
+ width: Optional[int] = None,
36
38
  sample_size: Optional[Tuple[int, int]] = None,
37
39
  image_size: Optional[Tuple[int, int]] = None,
38
40
  guidance_scale: Optional[float] = None,
@@ -51,6 +53,8 @@ class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
51
53
  batch_size (Optional[int]): Batch size for inference, applied to all submodules.
52
54
  img_height (Optional[int]): Height of the generated images.
53
55
  img_width (Optional[int]): Width of the generated images.
56
+ height (Optional[int]): Height of the generated images.
57
+ width (Optional[int]): Width of the generated images.
54
58
  sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
55
59
  image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
56
60
  Cannot be used together with img_height/img_width.
@@ -65,11 +69,29 @@ class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
65
69
  accommodate classifier-free guidance.
66
70
  """
67
71
  super().__init__(**kwargs)
68
- if image_size is not None and (img_height is not None or img_width is not None):
69
- raise ValueError("image_size and img_height/img_width cannot both be provided")
70
72
 
71
- if img_height is not None and img_width is not None:
73
+ # Initial check for image_size conflict remains as is
74
+ if image_size is not None and (
75
+ img_height is not None or img_width is not None or height is not None or width is not None
76
+ ):
77
+ raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
78
+
79
+ # Prioritize height/width (HF-aligned)
80
+ if height is not None and width is not None:
81
+ if img_height is not None or img_width is not None:
82
+ # Raise error if both sets of arguments are provided
83
+ raise ValueError(
84
+ "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
85
+ "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
86
+ )
87
+ image_size = (height, width)
88
+ elif (height is not None and width is None) or (height is None and width is not None):
89
+ raise ValueError("Both height and width must be provided together if used")
90
+ # Fallback to img_height/img_width for backward compatibility
91
+ elif img_height is not None and img_width is not None:
72
92
  image_size = (img_height, img_width)
93
+ elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
94
+ raise ValueError("Both img_height and img_width must be provided together if used")
73
95
 
74
96
  self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
75
97
  self.unet = self.init_submodule_config(
@@ -139,6 +161,8 @@ class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
139
161
  batch_size: Optional[int] = None,
140
162
  img_height: Optional[int] = None,
141
163
  img_width: Optional[int] = None,
164
+ height: Optional[int] = None,
165
+ width: Optional[int] = None,
142
166
  sample_size: Optional[Tuple[int, int]] = None,
143
167
  image_size: Optional[Tuple[int, int]] = None,
144
168
  guidance_scale: Optional[float] = None,
@@ -159,6 +183,8 @@ class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
159
183
  batch_size (Optional[int]): Batch size for inference, applied to all submodules.
160
184
  img_height (Optional[int]): Height of the generated images.
161
185
  img_width (Optional[int]): Width of the generated images.
186
+ height (Optional[int]): Height of the generated images.
187
+ width (Optional[int]): Width of the generated images.
162
188
  sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
163
189
  image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
164
190
  Cannot be used together with img_height/img_width.
@@ -173,11 +199,29 @@ class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
173
199
  accommodate classifier-free guidance.
174
200
  """
175
201
  super().__init__(**kwargs)
176
- if image_size is not None and (img_height is not None or img_width is not None):
177
- raise ValueError("image_size and img_height/img_width cannot both be provided")
178
202
 
179
- if img_height is not None and img_width is not None:
203
+ # Initial check for image_size conflict remains as is
204
+ if image_size is not None and (
205
+ img_height is not None or img_width is not None or height is not None or width is not None
206
+ ):
207
+ raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
208
+
209
+ # Prioritize height/width (HF-aligned)
210
+ if height is not None and width is not None:
211
+ if img_height is not None or img_width is not None:
212
+ # Raise error if both sets of arguments are provided
213
+ raise ValueError(
214
+ "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
215
+ "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
216
+ )
217
+ image_size = (height, width)
218
+ elif (height is not None and width is None) or (height is None and width is not None):
219
+ raise ValueError("Both height and width must be provided together if used")
220
+ # Fallback to img_height/img_width for backward compatibility
221
+ elif img_height is not None and img_width is not None:
180
222
  image_size = (img_height, img_width)
223
+ elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
224
+ raise ValueError("Both img_height and img_width must be provided together if used")
181
225
 
182
226
  self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
183
227
  self.text_encoder_2 = self.init_submodule_config(
@@ -35,6 +35,8 @@ class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
35
35
  image_size: Optional[Tuple[int, int]] = None,
36
36
  img_height: Optional[int] = None,
37
37
  img_width: Optional[int] = None,
38
+ height: Optional[int] = None,
39
+ width: Optional[int] = None,
38
40
  **kwargs,
39
41
  ):
40
42
  """
@@ -50,6 +52,8 @@ class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
50
52
  Cannot be used together with img_height/img_width.
51
53
  img_height (Optional[int]): Height of the generated images.
52
54
  img_width (Optional[int]): Width of the generated images.
55
+ height (Optional[int]): Height of the generated images.
56
+ width (Optional[int]): Width of the generated images.
53
57
  **kwargs: Additional arguments passed to the parent RBLNModelConfig.
54
58
 
55
59
  Raises:
@@ -60,11 +64,29 @@ class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
60
64
  accommodate classifier-free guidance.
61
65
  """
62
66
  super().__init__(**kwargs)
63
- if image_size is not None and (img_height is not None or img_width is not None):
64
- raise ValueError("image_size and img_height/img_width cannot both be provided")
65
67
 
66
- if img_height is not None and img_width is not None:
68
+ # Initial check for image_size conflict remains as is
69
+ if image_size is not None and (
70
+ img_height is not None or img_width is not None or height is not None or width is not None
71
+ ):
72
+ raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
73
+
74
+ # Prioritize height/width (HF-aligned)
75
+ if height is not None and width is not None:
76
+ if img_height is not None or img_width is not None:
77
+ # Raise error if both sets of arguments are provided
78
+ raise ValueError(
79
+ "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
80
+ "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
81
+ )
82
+ image_size = (height, width)
83
+ elif (height is not None and width is None) or (height is None and width is not None):
84
+ raise ValueError("Both height and width must be provided together if used")
85
+ # Fallback to img_height/img_width for backward compatibility
86
+ elif img_height is not None and img_width is not None:
67
87
  image_size = (img_height, img_width)
88
+ elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
89
+ raise ValueError("Both img_height and img_width must be provided together if used")
68
90
 
69
91
  self.unet = self.init_submodule_config(RBLNUNet2DConditionModelConfig, unet, sample_size=sample_size)
70
92
  self.movq = self.init_submodule_config(
@@ -186,6 +208,8 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
186
208
  batch_size: Optional[int] = None,
187
209
  img_height: Optional[int] = None,
188
210
  img_width: Optional[int] = None,
211
+ height: Optional[int] = None,
212
+ width: Optional[int] = None,
189
213
  guidance_scale: Optional[float] = None,
190
214
  prior_prior: Optional[RBLNPriorTransformerConfig] = None,
191
215
  prior_image_encoder: Optional[RBLNCLIPVisionModelWithProjectionConfig] = None,
@@ -212,6 +236,8 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
212
236
  batch_size (Optional[int]): Batch size for inference, applied to all submodules.
213
237
  img_height (Optional[int]): Height of the generated images.
214
238
  img_width (Optional[int]): Width of the generated images.
239
+ height (Optional[int]): Height of the generated images.
240
+ width (Optional[int]): Width of the generated images.
215
241
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
216
242
  prior_prior (Optional[RBLNPriorTransformerConfig]): Direct configuration for the prior transformer.
217
243
  Used if prior_pipe is not provided.
@@ -226,6 +252,30 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
226
252
  **kwargs: Additional arguments passed to the parent RBLNModelConfig.
227
253
  """
228
254
  super().__init__(**kwargs)
255
+
256
+ # Initial check for image_size conflict remains as is
257
+ if image_size is not None and (
258
+ img_height is not None or img_width is not None or height is not None or width is not None
259
+ ):
260
+ raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
261
+
262
+ # Prioritize height/width (HF-aligned)
263
+ if height is not None and width is not None:
264
+ if img_height is not None or img_width is not None:
265
+ # Raise error if both sets of arguments are provided
266
+ raise ValueError(
267
+ "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
268
+ "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
269
+ )
270
+ image_size = (height, width)
271
+ elif (height is not None and width is None) or (height is None and width is not None):
272
+ raise ValueError("Both height and width must be provided together if used")
273
+ # Fallback to img_height/img_width for backward compatibility
274
+ elif img_height is not None and img_width is not None:
275
+ image_size = (img_height, img_width)
276
+ elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
277
+ raise ValueError("Both img_height and img_width must be provided together if used")
278
+
229
279
  self.prior_pipe = self.init_submodule_config(
230
280
  RBLNKandinskyV22PriorPipelineConfig,
231
281
  prior_pipe,
@@ -243,8 +293,6 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
243
293
  batch_size=batch_size,
244
294
  sample_size=sample_size,
245
295
  image_size=image_size,
246
- img_height=img_height,
247
- img_width=img_width,
248
296
  guidance_scale=guidance_scale,
249
297
  )
250
298
 
@@ -32,6 +32,8 @@ class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
32
32
  batch_size: Optional[int] = None,
33
33
  img_height: Optional[int] = None,
34
34
  img_width: Optional[int] = None,
35
+ height: Optional[int] = None,
36
+ width: Optional[int] = None,
35
37
  sample_size: Optional[Tuple[int, int]] = None,
36
38
  image_size: Optional[Tuple[int, int]] = None,
37
39
  guidance_scale: Optional[float] = None,
@@ -48,6 +50,8 @@ class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
48
50
  batch_size (Optional[int]): Batch size for inference, applied to all submodules.
49
51
  img_height (Optional[int]): Height of the generated images.
50
52
  img_width (Optional[int]): Width of the generated images.
53
+ height (Optional[int]): Height of the generated images.
54
+ width (Optional[int]): Width of the generated images.
51
55
  sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
52
56
  image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
53
57
  Cannot be used together with img_height/img_width.
@@ -62,11 +66,29 @@ class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
62
66
  accommodate classifier-free guidance.
63
67
  """
64
68
  super().__init__(**kwargs)
65
- if image_size is not None and (img_height is not None or img_width is not None):
66
- raise ValueError("image_size and img_height/img_width cannot both be provided")
67
69
 
68
- if img_height is not None and img_width is not None:
70
+ # Initial check for image_size conflict remains as is
71
+ if image_size is not None and (
72
+ img_height is not None or img_width is not None or height is not None or width is not None
73
+ ):
74
+ raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
75
+
76
+ # Prioritize height/width (HF-aligned)
77
+ if height is not None and width is not None:
78
+ if img_height is not None or img_width is not None:
79
+ # Raise error if both sets of arguments are provided
80
+ raise ValueError(
81
+ "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
82
+ "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
83
+ )
84
+ image_size = (height, width)
85
+ elif (height is not None and width is None) or (height is None and width is not None):
86
+ raise ValueError("Both height and width must be provided together if used")
87
+ # Fallback to img_height/img_width for backward compatibility
88
+ elif img_height is not None and img_width is not None:
69
89
  image_size = (img_height, img_width)
90
+ elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
91
+ raise ValueError("Both img_height and img_width must be provided together if used")
70
92
 
71
93
  self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
72
94
  self.unet = self.init_submodule_config(
@@ -37,6 +37,8 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
37
37
  batch_size: Optional[int] = None,
38
38
  img_height: Optional[int] = None,
39
39
  img_width: Optional[int] = None,
40
+ height: Optional[int] = None,
41
+ width: Optional[int] = None,
40
42
  guidance_scale: Optional[float] = None,
41
43
  **kwargs,
42
44
  ):
@@ -59,6 +61,8 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
59
61
  batch_size (Optional[int]): Batch size for inference, applied to all submodules.
60
62
  img_height (Optional[int]): Height of the generated images.
61
63
  img_width (Optional[int]): Width of the generated images.
64
+ height (Optional[int]): Height of the generated images.
65
+ width (Optional[int]): Width of the generated images.
62
66
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
63
67
  **kwargs: Additional arguments passed to the parent RBLNModelConfig.
64
68
 
@@ -70,11 +74,29 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
70
74
  accommodate classifier-free guidance.
71
75
  """
72
76
  super().__init__(**kwargs)
73
- if image_size is not None and (img_height is not None or img_width is not None):
74
- raise ValueError("image_size and img_height/img_width cannot both be provided")
75
77
 
76
- if img_height is not None and img_width is not None:
78
+ # Initial check for image_size conflict remains as is
79
+ if image_size is not None and (
80
+ img_height is not None or img_width is not None or height is not None or width is not None
81
+ ):
82
+ raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
83
+
84
+ # Prioritize height/width (HF-aligned)
85
+ if height is not None and width is not None:
86
+ if img_height is not None or img_width is not None:
87
+ # Raise error if both sets of arguments are provided
88
+ raise ValueError(
89
+ "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
90
+ "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
91
+ )
92
+ image_size = (height, width)
93
+ elif (height is not None and width is None) or (height is None and width is not None):
94
+ raise ValueError("Both height and width must be provided together if used")
95
+ # Fallback to img_height/img_width for backward compatibility
96
+ elif img_height is not None and img_width is not None:
77
97
  image_size = (img_height, img_width)
98
+ elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
99
+ raise ValueError("Both img_height and img_width must be provided together if used")
78
100
 
79
101
  max_seq_len = max_seq_len or 256
80
102
 
@@ -33,6 +33,8 @@ class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
33
33
  batch_size: Optional[int] = None,
34
34
  img_height: Optional[int] = None,
35
35
  img_width: Optional[int] = None,
36
+ height: Optional[int] = None,
37
+ width: Optional[int] = None,
36
38
  sample_size: Optional[Tuple[int, int]] = None,
37
39
  image_size: Optional[Tuple[int, int]] = None,
38
40
  guidance_scale: Optional[float] = None,
@@ -51,6 +53,8 @@ class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
51
53
  batch_size (Optional[int]): Batch size for inference, applied to all submodules.
52
54
  img_height (Optional[int]): Height of the generated images.
53
55
  img_width (Optional[int]): Width of the generated images.
56
+ height (Optional[int]): Height of the generated images.
57
+ width (Optional[int]): Width of the generated images.
54
58
  sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
55
59
  image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
56
60
  Cannot be used together with img_height/img_width.
@@ -65,11 +69,29 @@ class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
65
69
  accommodate classifier-free guidance.
66
70
  """
67
71
  super().__init__(**kwargs)
68
- if image_size is not None and (img_height is not None or img_width is not None):
69
- raise ValueError("image_size and img_height/img_width cannot both be provided")
70
72
 
71
- if img_height is not None and img_width is not None:
73
+ # Initial check for image_size conflict remains as is
74
+ if image_size is not None and (
75
+ img_height is not None or img_width is not None or height is not None or width is not None
76
+ ):
77
+ raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
78
+
79
+ # Prioritize height/width (HF-aligned)
80
+ if height is not None and width is not None:
81
+ if img_height is not None or img_width is not None:
82
+ # Raise error if both sets of arguments are provided
83
+ raise ValueError(
84
+ "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
85
+ "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
86
+ )
87
+ image_size = (height, width)
88
+ elif (height is not None and width is None) or (height is None and width is not None):
89
+ raise ValueError("Both height and width must be provided together if used")
90
+ # Fallback to img_height/img_width for backward compatibility
91
+ elif img_height is not None and img_width is not None:
72
92
  image_size = (img_height, img_width)
93
+ elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
94
+ raise ValueError("Both img_height and img_width must be provided together if used")
73
95
 
74
96
  self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
75
97
  self.text_encoder_2 = self.init_submodule_config(
optimum/rbln/modeling.py CHANGED
@@ -282,7 +282,7 @@ class RBLNModel(RBLNBaseModel):
282
282
  Prepare model output based on return_dict flag.
283
283
  This method can be overridden by subclasses to provide task-specific output handling.
284
284
  """
285
- tuple_output = (output,) if not isinstance(output, (tuple, list)) else output
285
+ tuple_output = (output,) if not isinstance(output, (tuple, list)) else tuple(output)
286
286
  if not return_dict:
287
287
  return tuple_output
288
288
  else:
@@ -18,29 +18,12 @@ from ....configuration_utils import RBLNModelConfig
18
18
 
19
19
 
20
20
  class RBLNBlip2VisionModelConfig(RBLNModelConfig):
21
- def __init__(
22
- self,
23
- batch_size: Optional[int] = None,
24
- **kwargs,
25
- ):
26
- """
27
- Args:
28
- batch_size (Optional[int]): The batch size for inference. Defaults to 1.
29
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
30
-
31
- Raises:
32
- ValueError: If batch_size is not a positive integer.
33
- """
34
- super().__init__(**kwargs)
35
- self.batch_size = batch_size or 1
36
- if not isinstance(self.batch_size, int) or self.batch_size < 0:
37
- raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
21
+ pass
38
22
 
39
23
 
40
24
  class RBLNBlip2QFormerModelConfig(RBLNModelConfig):
41
25
  def __init__(
42
26
  self,
43
- batch_size: Optional[int] = None,
44
27
  num_query_tokens: Optional[int] = None,
45
28
  image_text_hidden_size: Optional[int] = None,
46
29
  **kwargs,
@@ -54,10 +37,6 @@ class RBLNBlip2QFormerModelConfig(RBLNModelConfig):
54
37
  ValueError: If batch_size is not a positive integer.
55
38
  """
56
39
  super().__init__(**kwargs)
57
- self.batch_size = batch_size or 1
58
- if not isinstance(self.batch_size, int) or self.batch_size < 0:
59
- raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
60
-
61
40
  self.num_query_tokens = num_query_tokens
62
41
  self.image_text_hidden_size = image_text_hidden_size
63
42
 
@@ -88,6 +67,6 @@ class RBLNBlip2ForConditionalGenerationConfig(RBLNModelConfig):
88
67
  if not isinstance(self.batch_size, int) or self.batch_size < 0:
89
68
  raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
90
69
 
91
- self.vision_model = self.init_submodule_config(RBLNBlip2VisionModelConfig, vision_model, batch_size=batch_size)
70
+ self.vision_model = self.init_submodule_config(RBLNBlip2VisionModelConfig, vision_model)
92
71
  self.language_model = language_model
93
- self.qformer = self.init_submodule_config(RBLNBlip2QFormerModelConfig, qformer, batch_size=batch_size)
72
+ self.qformer = self.init_submodule_config(RBLNBlip2QFormerModelConfig, qformer)
@@ -42,6 +42,28 @@ if TYPE_CHECKING:
42
42
  )
43
43
 
44
44
 
45
+ class LoopProjector:
46
+ def __init__(self, language_projection) -> None:
47
+ self.language_projection = language_projection
48
+
49
+ def forward(self, *args, **kwargs):
50
+ query_output = args[0]
51
+
52
+ batch_size = query_output.shape[0]
53
+ outputs = []
54
+ for i in range(batch_size):
55
+ outputs.append(self.language_projection(query_output[i : i + 1]))
56
+
57
+ outputs = torch.cat(outputs, dim=0)
58
+ return outputs
59
+
60
+ def __call__(self, *args: Any, **kwds: Any) -> Any:
61
+ return self.forward(*args, **kwds)
62
+
63
+ def __repr__(self) -> str:
64
+ return repr(self.language_projection)
65
+
66
+
45
67
  class RBLNBlip2VisionModel(RBLNModel):
46
68
  def get_input_embeddings(self):
47
69
  return self.embeddings
@@ -71,7 +93,8 @@ class RBLNBlip2VisionModel(RBLNModel):
71
93
  (
72
94
  "pixel_values",
73
95
  [
74
- rbln_config.batch_size,
96
+ # support for vllm CB (prefill)
97
+ 1,
75
98
  model_config.num_channels,
76
99
  model_config.image_size,
77
100
  model_config.image_size,
@@ -86,27 +109,30 @@ class RBLNBlip2VisionModel(RBLNModel):
86
109
 
87
110
  def forward(
88
111
  self,
89
- pixel_values: Optional[torch.FloatTensor] = None,
112
+ pixel_values,
90
113
  output_attentions: Optional[bool] = None,
91
114
  output_hidden_states: Optional[bool] = None,
92
115
  return_dict: Optional[bool] = None,
93
116
  interpolate_pos_encoding: bool = False,
94
117
  ) -> Union[Tuple, BaseModelOutputWithPooling]:
95
- output = super().forward(pixel_values, return_dict=return_dict)
96
- return output
118
+ batch_size = pixel_values.shape[0]
119
+ outputs = []
120
+ for i in range(batch_size):
121
+ outputs.append(self.model[0](pixel_values[i : i + 1]))
122
+
123
+ last_hidden_state = [output[0] for output in outputs]
124
+ pooler_output = [output[1] for output in outputs]
125
+
126
+ last_hidden_state = torch.cat(last_hidden_state, dim=0)
127
+ pooler_output = torch.cat(pooler_output, dim=0)
97
128
 
98
- def _prepare_output(self, output, return_dict):
99
- """
100
- Prepare model output based on return_dict flag.
101
- This method can be overridden by subclasses to provide task-specific output handling.
102
- """
103
129
  if not return_dict:
104
- return (output,) if not isinstance(output, (tuple, list)) else output
105
- else:
106
- return BaseModelOutputWithPooling(
107
- last_hidden_state=output[0],
108
- pooler_output=output[1],
109
- )
130
+ return (last_hidden_state, pooler_output)
131
+
132
+ return BaseModelOutputWithPooling(
133
+ last_hidden_state=last_hidden_state,
134
+ pooler_output=pooler_output,
135
+ )
110
136
 
111
137
 
112
138
  class RBLNBlip2QFormerModel(RBLNModel):
@@ -158,7 +184,7 @@ class RBLNBlip2QFormerModel(RBLNModel):
158
184
  (
159
185
  "query_embeds",
160
186
  [
161
- rbln_config.batch_size,
187
+ 1,
162
188
  rbln_config.num_query_tokens,
163
189
  model_config.hidden_size,
164
190
  ],
@@ -167,7 +193,7 @@ class RBLNBlip2QFormerModel(RBLNModel):
167
193
  (
168
194
  "encoder_hidden_states",
169
195
  [
170
- rbln_config.batch_size,
196
+ 1,
171
197
  # image_text_hidden_size + cls token
172
198
  rbln_config.image_text_hidden_size + 1,
173
199
  model_config.encoder_hidden_size,
@@ -177,7 +203,7 @@ class RBLNBlip2QFormerModel(RBLNModel):
177
203
  (
178
204
  "encoder_attention_mask",
179
205
  # image_text_hidden_size + cls token
180
- [rbln_config.batch_size, rbln_config.image_text_hidden_size + 1],
206
+ [1, rbln_config.image_text_hidden_size + 1],
181
207
  "int64",
182
208
  ),
183
209
  ]
@@ -200,21 +226,28 @@ class RBLNBlip2QFormerModel(RBLNModel):
200
226
  output_hidden_states: Optional[bool] = None,
201
227
  return_dict: Optional[bool] = None,
202
228
  ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
203
- output = super().forward(query_embeds, encoder_hidden_states, encoder_attention_mask, return_dict=return_dict)
204
- return output
229
+ batch_size = query_embeds.shape[0]
230
+ outputs = []
231
+ for i in range(batch_size):
232
+ outputs.append(
233
+ self.model[0](
234
+ query_embeds[i : i + 1], encoder_hidden_states[i : i + 1], encoder_attention_mask[i : i + 1]
235
+ )
236
+ )
237
+
238
+ sequence_output = [output[0] for output in outputs]
239
+ pooled_output = [output[1] for output in outputs]
240
+
241
+ sequence_output = torch.cat(sequence_output, dim=0)
242
+ pooled_output = torch.cat(pooled_output, dim=0)
205
243
 
206
- def _prepare_output(self, output, return_dict):
207
- """
208
- Prepare model output based on return_dict flag.
209
- This method can be overridden by subclasses to provide task-specific output handling.
210
- """
211
244
  if not return_dict:
212
- return (output,) if not isinstance(output, (tuple, list)) else output
213
- else:
214
- return BaseModelOutputWithPoolingAndCrossAttentions(
215
- last_hidden_state=output[0],
216
- pooler_output=output[1],
217
- )
245
+ return (sequence_output, pooled_output)
246
+
247
+ return BaseModelOutputWithPoolingAndCrossAttentions(
248
+ last_hidden_state=sequence_output,
249
+ pooler_output=pooled_output,
250
+ )
218
251
 
219
252
 
220
253
  class RBLNBlip2ForConditionalGeneration(RBLNModel):
@@ -254,7 +287,7 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel):
254
287
  self.vision_model = self.rbln_submodules[0]
255
288
  self.language_model = self.rbln_submodules[2]
256
289
  self.qformer = self.rbln_submodules[1]
257
- self.language_projection = self.model[0]
290
+ self.language_projection = LoopProjector(self.model[0])
258
291
 
259
292
  artifacts = torch.load(self.model_save_dir / self.subfolder / "query_tokens.pth", weights_only=False)
260
293
  self.query_tokens = artifacts["query_tokens"]
@@ -284,7 +317,7 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel):
284
317
  (
285
318
  "query_output",
286
319
  [
287
- rbln_config.batch_size,
320
+ 1,
288
321
  model_config.num_query_tokens,
289
322
  model_config.qformer_config.hidden_size,
290
323
  ],
@@ -296,3 +329,61 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel):
296
329
  rbln_config.set_compile_cfgs([rbln_compile_config])
297
330
 
298
331
  return rbln_config
332
+
333
+ def _preprocess_prefill(
334
+ self,
335
+ pixel_values: torch.FloatTensor,
336
+ input_ids: torch.FloatTensor,
337
+ attention_mask: Optional[torch.LongTensor] = None,
338
+ return_dict: Optional[bool] = None,
339
+ interpolate_pos_encoding: bool = False,
340
+ **kwargs,
341
+ ):
342
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
343
+
344
+ vision_outputs = self.vision_model(
345
+ pixel_values=pixel_values,
346
+ return_dict=return_dict,
347
+ interpolate_pos_encoding=interpolate_pos_encoding,
348
+ )
349
+ image_embeds = vision_outputs[0]
350
+
351
+ image_attention_mask = torch.ones(image_embeds.size()[:-1], dtype=torch.long, device=image_embeds.device)
352
+
353
+ query_tokens = self.query_tokens.expand(image_embeds.shape[0], -1, -1)
354
+
355
+ query_outputs = self.qformer(
356
+ query_embeds=query_tokens,
357
+ encoder_hidden_states=image_embeds,
358
+ encoder_attention_mask=image_attention_mask,
359
+ return_dict=return_dict,
360
+ )
361
+ query_output = query_outputs[0]
362
+
363
+ if query_output.dtype != image_embeds.dtype:
364
+ query_output = query_output.to(image_embeds.dtype)
365
+
366
+ language_model_inputs = self.language_projection(query_output)
367
+ language_model_attention_mask = torch.ones(
368
+ language_model_inputs.size()[:-1], dtype=torch.long, device=language_model_inputs.device
369
+ )
370
+ inputs_embeds = self.language_model.get_input_embeddings()(input_ids)
371
+ if attention_mask is None:
372
+ attention_mask = torch.ones_like(input_ids)
373
+
374
+ if getattr(self.config, "image_token_index", None) is not None:
375
+ special_image_mask = (input_ids == self.config.image_token_index).unsqueeze(-1).expand_as(inputs_embeds)
376
+ language_model_inputs = language_model_inputs.to(inputs_embeds.device, inputs_embeds.dtype)
377
+ inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, language_model_inputs)
378
+ else:
379
+ logger.warning_once(
380
+ "Expanding inputs for image tokens in BLIP-2 should be done in processing. "
381
+ "Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your BLIP-2 model. "
382
+ "Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
383
+ )
384
+ inputs_embeds = torch.cat([language_model_inputs, inputs_embeds.to(language_model_inputs.device)], dim=1)
385
+ attention_mask = torch.cat(
386
+ [language_model_attention_mask, attention_mask.to(language_model_attention_mask.device)], dim=1
387
+ )
388
+
389
+ return inputs_embeds
@@ -858,6 +858,11 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
858
858
  max_seq_len=rbln_config.max_seq_len,
859
859
  )
860
860
 
861
+ if rbln_config.attn_impl == "eager":
862
+ raise ValueError(
863
+ "Eager attention is not supported for Gemma3. Please use flash attention by setting `rbln_attn_impl='flash_attn'`. Stay tuned for future updates!"
864
+ )
865
+
861
866
  validate_attention_method(
862
867
  attn_impl=rbln_config.attn_impl,
863
868
  kvcache_partition_len=rbln_config.kvcache_partition_len,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.7.5rc1
3
+ Version: 0.7.5rc2
4
4
  Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -1,7 +1,7 @@
1
1
  optimum/rbln/__init__.py,sha256=oAnsJSMrPYwBGEttUt3CMXTIESVNe15ftTWRTShwhZI,14386
2
- optimum/rbln/__version__.py,sha256=6UGbTME6xZZ1ojJoRpul_clf4TsgGIZHt3214_8maxM,521
2
+ optimum/rbln/__version__.py,sha256=fpFaQLT4vGQYujVJTSb1WZo3X-GKEGeYInrc_bpJrpQ,521
3
3
  optimum/rbln/configuration_utils.py,sha256=gvAjRFEGw5rnSoH0IoyuLrE4fkxtk3DN1pikqrN_Rpk,31277
4
- optimum/rbln/modeling.py,sha256=CWYpOLQOu1RUQrHvoX3FoidiP2XltDzC9gWIzznUTFo,11455
4
+ optimum/rbln/modeling.py,sha256=BpydF-bLBF60NnRMbtZwn5odOUjU4Awu9azqGeSufTI,11462
5
5
  optimum/rbln/modeling_base.py,sha256=HQgscr5jpUEtuXU1ACJHSLIntX-kq6Ef0SQ_W2-rp5A,25341
6
6
  optimum/rbln/diffusers/__init__.py,sha256=XL6oKPHbPCV6IVCw3fu0-M9mD2KO_x6unx5kJdAtpVY,6180
7
7
  optimum/rbln/diffusers/modeling_diffusers.py,sha256=bPyP5RMbOFLb2DfEAuLVp7hTuQWJvWid7El72wGmFrY,19535
@@ -14,11 +14,11 @@ optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py,sh
14
14
  optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py,sha256=c-1xAFgA1st8djLXkLeXtctcFp1MqZZYZp3Phqn1Wxo,3366
15
15
  optimum/rbln/diffusers/configurations/models/configuration_vq_model.py,sha256=su4Ceok4Wx4m1hKp5YEM3zigrlTmj3NIs9X7aAKOeWg,2980
16
16
  optimum/rbln/diffusers/configurations/pipelines/__init__.py,sha256=HJlu5lRZJWy4nYjBw3-ed93Pfb5QQmUbCJZKDW1bGH4,1160
17
- optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py,sha256=G9e2fxVmQ2JD-0iWrRe4jjCTtj9ZysI8oM_dWohtMO8,11044
18
- optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py,sha256=u26JzBCgGnc581up4y3XXyFX5lqAsKGF0IyDRGdYPp8,12746
19
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py,sha256=cE5BHI2sy3PCz0kfhQic611feGwavb7wtpx04MPR6us,4992
20
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py,sha256=54NTvVur7RADGgjGwO33s76dgKQ4zVNvmFl68rQFapw,6370
21
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py,sha256=H0hqsqpAfqb9gBIK5KsfUf9gX9cTnggK9Nt2aqfzeIM,5528
17
+ optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py,sha256=L5WeSCpYUdcTG4wBrMZIQIEUZV2Jxegdr53n8oSf6II,13748
18
+ optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py,sha256=JkdeFVU4RNiCJaSQUOJ3gWfKAyRhDRxMqEd68NJIij0,15675
19
+ optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py,sha256=C4PKcN6SZYmzTwyaIxXKXtltzUOWs7p1cwzudUwodY0,6344
20
+ optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py,sha256=lfyjjHKYHZ470tDAeNKRL2tJf_TpECzSGEGm5iqoZBo,7722
21
+ optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py,sha256=SHA1IV-oqdRpFM_ZLJ8GTmrrSdPXAUvLVtx5I3VQN7U,6880
22
22
  optimum/rbln/diffusers/models/__init__.py,sha256=mkCvJyH1KcwrsUvYSq_bVC79oOfyqtBSFDyPS1_48wA,1478
23
23
  optimum/rbln/diffusers/models/controlnet.py,sha256=kzDbUckGlioor8t0kKBvwi-hzNaG15XluEzTa7xZs1Q,10292
24
24
  optimum/rbln/diffusers/models/autoencoders/__init__.py,sha256=dg17ZTUsiqTcbIaEE4fqew9uRbao0diQ21PXvRKIqKg,679
@@ -79,8 +79,8 @@ optimum/rbln/transformers/models/bert/__init__.py,sha256=86FuGRBLw315_Roa9D5OUx6
79
79
  optimum/rbln/transformers/models/bert/configuration_bert.py,sha256=NIlBRn-zrnNirkEfJ4Uv2TZRIBL761PLJ9-cZaPyzpg,1017
80
80
  optimum/rbln/transformers/models/bert/modeling_bert.py,sha256=XxsRhBhexZ2w3mRCvKl73pIyGdqcFR1RrOKG7h4EAyk,1223
81
81
  optimum/rbln/transformers/models/blip_2/__init__.py,sha256=L01gPXcUCa8Vg-bcng20vZvBIN_jlqCzwUSFuq0QOag,855
82
- optimum/rbln/transformers/models/blip_2/configuration_blip_2.py,sha256=Dh_gbeF46Tg3DKK4lq9DRblweI3B7XZHE2PlxO8qStU,3662
83
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py,sha256=4-EWHRau363-YoZQcTfLXqm97IsAs3-Uya2L1IVGfxE,10830
82
+ optimum/rbln/transformers/models/blip_2/configuration_blip_2.py,sha256=bAr3tlW2upxdBiihR7wUJGRxpdtelxt9BAkL9UXLJGE,2746
83
+ optimum/rbln/transformers/models/blip_2/modeling_blip_2.py,sha256=8pjFNXYM2phJQBoTWw08hK_wi7APjjhFTJfBZ3cx_Xo,14301
84
84
  optimum/rbln/transformers/models/clip/__init__.py,sha256=TLeXDqcFK6M6v9x7Xr64kBbqGu3hFHM7p754dQ8UVQc,938
85
85
  optimum/rbln/transformers/models/clip/configuration_clip.py,sha256=wgfZeVvcVdSzrN9tcnt7DKJQ0NLR0REvW7MyUXyv2Bg,2976
86
86
  optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=UslcDN6otyQ_psou7F_YcdK5vCImEtgIdcbwmexSfOM,7256
@@ -102,7 +102,7 @@ optimum/rbln/transformers/models/gemma/modeling_gemma.py,sha256=-U3w3cEOv3ps1S8a
102
102
  optimum/rbln/transformers/models/gemma3/__init__.py,sha256=6rugk3615SEt4lh7gduo_J9VyGiSReuEIvL0Uno0eaI,790
103
103
  optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=nndcYVwDYkOige_qO4td-YwLNtUz6aLiSQDIfPdGG9A,2840
104
104
  optimum/rbln/transformers/models/gemma3/gemma3_architecture.py,sha256=Uer27wG06hgV1WNf92x1ZeUpl4Q0zskfCqzlLhtgtNU,17348
105
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=OUGsML-H6FOKldld7KRW9l0nRoT4DojWBDl8ZHpfXVA,44982
105
+ optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=mx3upghkboeyRGYxwPfA1fzRNPWj5MzX8bIy0kszHWY,45235
106
106
  optimum/rbln/transformers/models/gpt2/__init__.py,sha256=socBMIBZSiLbrVN12rQ4nL9gFeT0axMgz6SWaCaD4Ac,704
107
107
  optimum/rbln/transformers/models/gpt2/configuration_gpt2.py,sha256=vKvJD8P9Li4W9wdVoQcqMEr1MwEXojPBnF2NE85VXAo,772
108
108
  optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=kf5jk7Djv9XXX3Q83oTosiMpt9g44TF_gCT-vMiWDJk,3097
@@ -178,7 +178,7 @@ optimum/rbln/utils/model_utils.py,sha256=V2kFpUe2aqVzLwbpztD8JOVFQqRHncvIWwJbgnU
178
178
  optimum/rbln/utils/runtime_utils.py,sha256=LoKNK3AQNV_BSScstIZWjICkJf265MnUgy360BOocVI,5454
179
179
  optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
180
180
  optimum/rbln/utils/submodule.py,sha256=ZfI7e3YzbjbbBW4Yjfucj8NygEsukfIkaJi3PtwHrhc,5105
181
- optimum_rbln-0.7.5rc1.dist-info/METADATA,sha256=RUPCGW8cEzu6extEsTB9xYDgOb8hAqgEKG0tG3K5feA,5300
182
- optimum_rbln-0.7.5rc1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
183
- optimum_rbln-0.7.5rc1.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
184
- optimum_rbln-0.7.5rc1.dist-info/RECORD,,
181
+ optimum_rbln-0.7.5rc2.dist-info/METADATA,sha256=WltiXJxC_INRxeoxhT4xyvLOO8FM6cqJOqbpnTQ6xhg,5300
182
+ optimum_rbln-0.7.5rc2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
183
+ optimum_rbln-0.7.5rc2.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
184
+ optimum_rbln-0.7.5rc2.dist-info/RECORD,,