optimum-rbln 0.9.3__py3-none-any.whl → 0.9.4a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. optimum/rbln/__version__.py +2 -2
  2. optimum/rbln/configuration_utils.py +12 -4
  3. optimum/rbln/diffusers/modeling_diffusers.py +1 -1
  4. optimum/rbln/diffusers/models/controlnet.py +1 -1
  5. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +1 -1
  6. optimum/rbln/diffusers/pipelines/auto_pipeline.py +2 -2
  7. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +13 -4
  8. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +13 -4
  9. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +13 -4
  10. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -4
  11. optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +1 -1
  12. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +1 -1
  13. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +1 -2
  14. optimum/rbln/modeling_base.py +12 -7
  15. optimum/rbln/transformers/modeling_attention_utils.py +4 -4
  16. optimum/rbln/transformers/modeling_outputs.py +1 -0
  17. optimum/rbln/transformers/models/auto/auto_factory.py +1 -0
  18. optimum/rbln/transformers/models/colpali/colpali_architecture.py +2 -2
  19. optimum/rbln/transformers/models/colpali/modeling_colpali.py +1 -1
  20. optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +0 -2
  21. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +4 -0
  22. optimum/rbln/transformers/models/decoderonly/configuration_lora.py +1 -1
  23. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +92 -43
  24. optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +201 -62
  25. optimum/rbln/transformers/models/decoderonly/lora_architecture.py +1 -1
  26. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +106 -36
  27. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +7 -1
  28. optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py +42 -70
  29. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +43 -26
  30. optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +1 -1
  31. optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py +0 -1
  32. optimum/rbln/transformers/models/llava/modeling_llava.py +1 -1
  33. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +1 -1
  34. optimum/rbln/transformers/models/mistral/modeling_mistral.py +0 -22
  35. optimum/rbln/transformers/models/pixtral/pixtral_architecture.py +2 -2
  36. optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +0 -28
  37. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +6 -6
  38. optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +4 -4
  39. optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +1 -1
  40. optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +0 -20
  41. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +2 -2
  42. optimum/rbln/transformers/models/swin/modeling_swin.py +3 -3
  43. optimum/rbln/transformers/models/t5/t5_architecture.py +1 -1
  44. optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +9 -8
  45. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +12 -2
  46. optimum/rbln/utils/import_utils.py +7 -1
  47. optimum/rbln/utils/submodule.py +3 -1
  48. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.4a2.dist-info}/METADATA +1 -1
  49. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.4a2.dist-info}/RECORD +52 -52
  50. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.4a2.dist-info}/WHEEL +0 -0
  51. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.4a2.dist-info}/entry_points.txt +0 -0
  52. {optimum_rbln-0.9.3.dist-info → optimum_rbln-0.9.4a2.dist-info}/licenses/LICENSE +0 -0
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.9.3'
32
- __version_tuple__ = version_tuple = (0, 9, 3)
31
+ __version__ = version = '0.9.4a2'
32
+ __version_tuple__ = version_tuple = (0, 9, 4, 'a2')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -117,9 +117,14 @@ class RBLNCompileConfig:
117
117
  return self
118
118
 
119
119
  def get_dummy_inputs(
120
- self, fill=0, static_tensors: Dict[str, torch.Tensor] = {}, meta_tensor_names: List[str] = []
120
+ self,
121
+ fill=0,
122
+ static_tensors: Optional[Dict[str, torch.Tensor]] = None,
123
+ meta_tensor_names: Optional[List[str]] = None,
121
124
  ):
122
125
  dummy = []
126
+ static_tensors = static_tensors if static_tensors is not None else {}
127
+ meta_tensor_names = meta_tensor_names if meta_tensor_names is not None else []
123
128
  for name, shape, dtype in self.input_info:
124
129
  if name in static_tensors:
125
130
  tensor = static_tensors[name]
@@ -255,7 +260,7 @@ class RBLNAutoConfig:
255
260
  def load(
256
261
  path: str,
257
262
  passed_rbln_config: Optional["RBLNModelConfig"] = None,
258
- kwargs: Optional[Dict[str, Any]] = {},
263
+ kwargs: Optional[Dict[str, Any]] = None,
259
264
  return_unused_kwargs: bool = False,
260
265
  ) -> Union["RBLNModelConfig", Tuple["RBLNModelConfig", Dict[str, Any]]]:
261
266
  """
@@ -269,6 +274,8 @@ class RBLNAutoConfig:
269
274
  Returns:
270
275
  RBLNModelConfig: The loaded RBLNModelConfig.
271
276
  """
277
+ if kwargs is None:
278
+ kwargs = {}
272
279
  cls, config_file = load_config(path)
273
280
 
274
281
  rbln_keys = [key for key in kwargs.keys() if key.startswith("rbln_")]
@@ -655,7 +662,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
655
662
  timeout: Optional[int] = None,
656
663
  optimum_rbln_version: Optional[str] = None,
657
664
  _torch_dtype: Optional[str] = None,
658
- _compile_cfgs: List[RBLNCompileConfig] = [],
665
+ _compile_cfgs: Optional[List[RBLNCompileConfig]] = None,
659
666
  *,
660
667
  optimize_host_memory: Optional[bool] = None,
661
668
  **kwargs: Any,
@@ -708,7 +715,8 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
708
715
  if self.optimum_rbln_version is None:
709
716
  self.optimum_rbln_version = __version__
710
717
 
711
- self._compile_cfgs: List[RBLNCompileConfig] = _compile_cfgs
718
+ compile_cfgs = _compile_cfgs if _compile_cfgs is not None else []
719
+ self._compile_cfgs: List[RBLNCompileConfig] = compile_cfgs
712
720
 
713
721
  if not isinstance(self._compile_cfgs, list):
714
722
  raise ValueError("`compile_cfgs` must be a list of `RBLNCompileConfig`.")
@@ -136,7 +136,7 @@ class RBLNDiffusionMixin:
136
136
  *,
137
137
  export: bool = None,
138
138
  model_save_dir: Optional[PathLike] = None,
139
- rbln_config: Dict[str, Any] = {},
139
+ rbln_config: Optional[Dict[str, Any]] = None,
140
140
  lora_ids: Optional[Union[str, List[str]]] = None,
141
141
  lora_weights_names: Optional[Union[str, List[str]]] = None,
142
142
  lora_scales: Optional[Union[float, List[float]]] = None,
@@ -215,7 +215,7 @@ class RBLNControlNetModel(RBLNModel):
215
215
  encoder_hidden_states: torch.Tensor,
216
216
  controlnet_cond: torch.FloatTensor,
217
217
  conditioning_scale: torch.Tensor = 1.0,
218
- added_cond_kwargs: Dict[str, torch.Tensor] = {},
218
+ added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
219
219
  return_dict: bool = True,
220
220
  **kwargs,
221
221
  ):
@@ -341,7 +341,7 @@ class RBLNUNet2DConditionModel(RBLNModel):
341
341
  timestep_cond: Optional[torch.Tensor] = None,
342
342
  attention_mask: Optional[torch.Tensor] = None,
343
343
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
344
- added_cond_kwargs: Dict[str, torch.Tensor] = {},
344
+ added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
345
345
  down_block_additional_residuals: Optional[Tuple[torch.Tensor]] = None,
346
346
  mid_block_additional_residual: Optional[torch.Tensor] = None,
347
347
  down_intrablock_additional_residuals: Optional[Tuple[torch.Tensor]] = None,
@@ -15,7 +15,7 @@
15
15
 
16
16
  import importlib
17
17
  from pathlib import Path
18
- from typing import Any, Dict, Type, Union
18
+ from typing import Any, Dict, Optional, Type, Union
19
19
 
20
20
  from diffusers.models.controlnets import ControlNetUnionModel
21
21
  from diffusers.pipelines.auto_pipeline import (
@@ -174,7 +174,7 @@ class RBLNAutoPipelineBase:
174
174
  model_id: Union[str, Path],
175
175
  *,
176
176
  export: bool = None,
177
- rbln_config: Union[Dict[str, Any], RBLNModelConfig] = {},
177
+ rbln_config: Optional[Union[Dict[str, Any], RBLNModelConfig]] = None,
178
178
  **kwargs: Any,
179
179
  ):
180
180
  """
@@ -151,7 +151,9 @@ class RBLNStableDiffusionControlNetPipeline(RBLNDiffusionMixin, StableDiffusionC
151
151
  for image_ in image:
152
152
  self.check_image(image_, prompt, prompt_embeds)
153
153
  else:
154
- assert False
154
+ raise TypeError(
155
+ "Unsupported controlnet type. Expected `RBLNControlNetModel` or `RBLNMultiControlNetModel`."
156
+ )
155
157
 
156
158
  # Check `controlnet_conditioning_scale`
157
159
  if (
@@ -180,7 +182,9 @@ class RBLNStableDiffusionControlNetPipeline(RBLNDiffusionMixin, StableDiffusionC
180
182
  " the same length as the number of controlnets"
181
183
  )
182
184
  else:
183
- assert False
185
+ raise TypeError(
186
+ "Unsupported controlnet type. Expected `RBLNControlNetModel` or `RBLNMultiControlNetModel`."
187
+ )
184
188
 
185
189
  if not isinstance(control_guidance_start, (tuple, list)):
186
190
  control_guidance_start = [control_guidance_start]
@@ -254,7 +258,7 @@ class RBLNStableDiffusionControlNetPipeline(RBLNDiffusionMixin, StableDiffusionC
254
258
  control_guidance_end: Union[float, List[float]] = 1.0,
255
259
  clip_skip: Optional[int] = None,
256
260
  callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
257
- callback_on_step_end_tensor_inputs: List[str] = ["latents"],
261
+ callback_on_step_end_tensor_inputs: Optional[List[str]] = None,
258
262
  **kwargs,
259
263
  ):
260
264
  r"""
@@ -393,6 +397,9 @@ class RBLNStableDiffusionControlNetPipeline(RBLNDiffusionMixin, StableDiffusionC
393
397
  )
394
398
 
395
399
  # 1. Check inputs. Raise error if not correct
400
+ if callback_on_step_end_tensor_inputs is None:
401
+ callback_on_step_end_tensor_inputs = ["latents"]
402
+
396
403
  self.check_inputs(
397
404
  prompt,
398
405
  image,
@@ -503,7 +510,9 @@ class RBLNStableDiffusionControlNetPipeline(RBLNDiffusionMixin, StableDiffusionC
503
510
  image = images
504
511
  height, width = image[0].shape[-2:]
505
512
  else:
506
- assert False
513
+ raise TypeError(
514
+ "Unsupported controlnet type. Expected `RBLNControlNetModel` or `RBLNMultiControlNetModel`."
515
+ )
507
516
 
508
517
  # 5. Prepare timesteps
509
518
  timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
@@ -152,7 +152,9 @@ class RBLNStableDiffusionControlNetImg2ImgPipeline(RBLNDiffusionMixin, StableDif
152
152
  for image_ in image:
153
153
  self.check_image(image_, prompt, prompt_embeds)
154
154
  else:
155
- assert False
155
+ raise TypeError(
156
+ "Unsupported controlnet type. Expected `RBLNControlNetModel` or `RBLNMultiControlNetModel`."
157
+ )
156
158
 
157
159
  # Check `controlnet_conditioning_scale`
158
160
  if (
@@ -178,7 +180,9 @@ class RBLNStableDiffusionControlNetImg2ImgPipeline(RBLNDiffusionMixin, StableDif
178
180
  " the same length as the number of controlnets"
179
181
  )
180
182
  else:
181
- assert False
183
+ raise TypeError(
184
+ "Unsupported controlnet type. Expected `RBLNControlNetModel` or `RBLNMultiControlNetModel`."
185
+ )
182
186
 
183
187
  if len(control_guidance_start) != len(control_guidance_end):
184
188
  raise ValueError(
@@ -247,7 +251,7 @@ class RBLNStableDiffusionControlNetImg2ImgPipeline(RBLNDiffusionMixin, StableDif
247
251
  control_guidance_end: Union[float, List[float]] = 1.0,
248
252
  clip_skip: Optional[int] = None,
249
253
  callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
250
- callback_on_step_end_tensor_inputs: List[str] = ["latents"],
254
+ callback_on_step_end_tensor_inputs: Optional[List[str]] = None,
251
255
  **kwargs,
252
256
  ):
253
257
  r"""
@@ -384,6 +388,9 @@ class RBLNStableDiffusionControlNetImg2ImgPipeline(RBLNDiffusionMixin, StableDif
384
388
  )
385
389
 
386
390
  # 1. Check inputs. Raise error if not correct
391
+ if callback_on_step_end_tensor_inputs is None:
392
+ callback_on_step_end_tensor_inputs = ["latents"]
393
+
387
394
  self.check_inputs(
388
395
  prompt,
389
396
  control_image,
@@ -490,7 +497,9 @@ class RBLNStableDiffusionControlNetImg2ImgPipeline(RBLNDiffusionMixin, StableDif
490
497
 
491
498
  control_image = control_images
492
499
  else:
493
- assert False
500
+ raise TypeError(
501
+ "Unsupported controlnet type. Expected `RBLNControlNetModel` or `RBLNMultiControlNetModel`."
502
+ )
494
503
 
495
504
  # 5. Prepare timesteps
496
505
  self.scheduler.set_timesteps(num_inference_steps, device=device)
@@ -178,7 +178,9 @@ class RBLNStableDiffusionXLControlNetPipeline(RBLNDiffusionMixin, StableDiffusio
178
178
  for image_ in image:
179
179
  self.check_image(image_, prompt, prompt_embeds)
180
180
  else:
181
- assert False
181
+ raise TypeError(
182
+ "Unsupported controlnet type. Expected `RBLNControlNetModel` or `RBLNMultiControlNetModel`."
183
+ )
182
184
 
183
185
  # Check `controlnet_conditioning_scale`
184
186
  if (
@@ -204,7 +206,9 @@ class RBLNStableDiffusionXLControlNetPipeline(RBLNDiffusionMixin, StableDiffusio
204
206
  " the same length as the number of controlnets"
205
207
  )
206
208
  else:
207
- assert False
209
+ raise TypeError(
210
+ "Unsupported controlnet type. Expected `RBLNControlNetModel` or `RBLNMultiControlNetModel`."
211
+ )
208
212
 
209
213
  if not isinstance(control_guidance_start, (tuple, list)):
210
214
  control_guidance_start = [control_guidance_start]
@@ -288,7 +292,7 @@ class RBLNStableDiffusionXLControlNetPipeline(RBLNDiffusionMixin, StableDiffusio
288
292
  negative_target_size: Optional[Tuple[int, int]] = None,
289
293
  clip_skip: Optional[int] = None,
290
294
  callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
291
- callback_on_step_end_tensor_inputs: List[str] = ["latents"],
295
+ callback_on_step_end_tensor_inputs: Optional[List[str]] = None,
292
296
  **kwargs,
293
297
  ):
294
298
  r"""
@@ -466,6 +470,9 @@ class RBLNStableDiffusionXLControlNetPipeline(RBLNDiffusionMixin, StableDiffusio
466
470
  )
467
471
 
468
472
  # 1. Check inputs. Raise error if not correct
473
+ if callback_on_step_end_tensor_inputs is None:
474
+ callback_on_step_end_tensor_inputs = ["latents"]
475
+
469
476
  self.check_inputs(
470
477
  prompt,
471
478
  prompt_2,
@@ -581,7 +588,9 @@ class RBLNStableDiffusionXLControlNetPipeline(RBLNDiffusionMixin, StableDiffusio
581
588
  image = images
582
589
  height, width = image[0].shape[-2:]
583
590
  else:
584
- assert False
591
+ raise TypeError(
592
+ "Unsupported controlnet type. Expected `RBLNControlNetModel` or `RBLNMultiControlNetModel`."
593
+ )
585
594
 
586
595
  # 5. Prepare timesteps
587
596
  self.scheduler.set_timesteps(num_inference_steps, device=device)
@@ -190,7 +190,9 @@ class RBLNStableDiffusionXLControlNetImg2ImgPipeline(RBLNDiffusionMixin, StableD
190
190
  for image_ in image:
191
191
  self.check_image(image_, prompt, prompt_embeds)
192
192
  else:
193
- assert False
193
+ raise TypeError(
194
+ "Unsupported controlnet type. Expected `RBLNControlNetModel` or `RBLNMultiControlNetModel`."
195
+ )
194
196
 
195
197
  # Check `controlnet_conditioning_scale`
196
198
  if (
@@ -216,7 +218,9 @@ class RBLNStableDiffusionXLControlNetImg2ImgPipeline(RBLNDiffusionMixin, StableD
216
218
  " the same length as the number of controlnets"
217
219
  )
218
220
  else:
219
- assert False
221
+ raise TypeError(
222
+ "Unsupported controlnet type. Expected `RBLNControlNetModel` or `RBLNMultiControlNetModel`."
223
+ )
220
224
 
221
225
  if not isinstance(control_guidance_start, (tuple, list)):
222
226
  control_guidance_start = [control_guidance_start]
@@ -303,7 +307,7 @@ class RBLNStableDiffusionXLControlNetImg2ImgPipeline(RBLNDiffusionMixin, StableD
303
307
  negative_aesthetic_score: float = 2.5,
304
308
  clip_skip: Optional[int] = None,
305
309
  callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
306
- callback_on_step_end_tensor_inputs: List[str] = ["latents"],
310
+ callback_on_step_end_tensor_inputs: Optional[List[str]] = None,
307
311
  **kwargs,
308
312
  ):
309
313
  r"""
@@ -500,6 +504,9 @@ class RBLNStableDiffusionXLControlNetImg2ImgPipeline(RBLNDiffusionMixin, StableD
500
504
  )
501
505
 
502
506
  # 1. Check inputs. Raise error if not correct
507
+ if callback_on_step_end_tensor_inputs is None:
508
+ callback_on_step_end_tensor_inputs = ["latents"]
509
+
503
510
  self.check_inputs(
504
511
  prompt,
505
512
  prompt_2,
@@ -618,7 +625,9 @@ class RBLNStableDiffusionXLControlNetImg2ImgPipeline(RBLNDiffusionMixin, StableD
618
625
  control_image = control_images
619
626
  height, width = control_image[0].shape[-2:]
620
627
  else:
621
- assert False
628
+ raise TypeError(
629
+ "Unsupported controlnet type. Expected `RBLNControlNetModel` or `RBLNMultiControlNetModel`."
630
+ )
622
631
 
623
632
  # 5. Prepare timesteps
624
633
  self.scheduler.set_timesteps(num_inference_steps, device=device)
@@ -81,7 +81,7 @@ def is_compiled_dir(dir: str) -> bool:
81
81
  if not os.path.exists(dir):
82
82
  return False
83
83
 
84
- for root, dirs, files in os.walk(dir):
84
+ for _, _, files in os.walk(dir):
85
85
  for file in files:
86
86
  if file.endswith(".rbln"):
87
87
  return True
@@ -86,7 +86,7 @@ class RBLNCosmosTextToWorldPipeline(RBLNDiffusionMixin, CosmosTextToWorldPipelin
86
86
  *,
87
87
  export: bool = False,
88
88
  safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
89
- rbln_config: Dict[str, Any] = {},
89
+ rbln_config: Optional[Dict[str, Any]] = None,
90
90
  **kwargs: Any,
91
91
  ):
92
92
  """
@@ -86,7 +86,7 @@ class RBLNCosmosVideoToWorldPipeline(RBLNDiffusionMixin, CosmosVideoToWorldPipel
86
86
  *,
87
87
  export: bool = False,
88
88
  safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
89
- rbln_config: Dict[str, Any] = {},
89
+ rbln_config: Optional[Dict[str, Any]] = None,
90
90
  **kwargs: Any,
91
91
  ):
92
92
  """
@@ -118,7 +118,6 @@ class RBLNCosmosVideoToWorldPipeline(RBLNDiffusionMixin, CosmosVideoToWorldPipel
118
118
  RBLN compilation process. These may include parameters specific to individual submodules
119
119
  or the particular diffusion pipeline being used.
120
120
  """
121
-
122
121
  rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
123
122
  if safety_checker is None and export:
124
123
  safety_checker = RBLNCosmosSafetyChecker(rbln_config=rbln_config.safety_checker)
@@ -15,7 +15,6 @@
15
15
  import importlib
16
16
  import os
17
17
  import shutil
18
- from abc import ABC
19
18
  from pathlib import Path
20
19
  from tempfile import TemporaryDirectory
21
20
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union
@@ -39,7 +38,7 @@ if TYPE_CHECKING:
39
38
  logger = get_logger(__name__)
40
39
 
41
40
 
42
- class PreTrainedModel(ABC): # noqa: F811
41
+ class PreTrainedModel: # noqa: F811
43
42
  pass
44
43
 
45
44
 
@@ -63,7 +62,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
63
62
  model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
64
63
  subfolder: str = "",
65
64
  rbln_compiled_models: Optional[rebel.RBLNCompiledModel] = None,
66
- rbln_submodules: List["RBLNBaseModel"] = [],
65
+ rbln_submodules: Optional[List["RBLNBaseModel"]] = None,
67
66
  **kwargs,
68
67
  ):
69
68
  self.model = models
@@ -106,6 +105,8 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
106
105
  self.model_save_dir = model_save_dir
107
106
  self.subfolder = subfolder
108
107
 
108
+ if rbln_submodules is None:
109
+ rbln_submodules = []
109
110
  self.rbln_submodules = rbln_submodules
110
111
  self.__post_init__(**kwargs)
111
112
 
@@ -181,7 +182,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
181
182
  # passed from compile function
182
183
  rbln_config: Optional[RBLNModelConfig] = None,
183
184
  rbln_compiled_models: Optional[Dict[str, rebel.RBLNCompiledModel]] = None,
184
- rbln_submodules: List["RBLNBaseModel"] = [],
185
+ rbln_submodules: Optional[List["RBLNBaseModel"]] = None,
185
186
  **kwargs,
186
187
  ) -> "RBLNBaseModel":
187
188
  if rbln_compiled_models is None:
@@ -217,8 +218,9 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
217
218
  )
218
219
 
219
220
  if len(cls._rbln_submodules) > 0:
220
- rbln_submodules = cls._load_submodules(model_save_dir=model_id, rbln_config=rbln_config, **kwargs)
221
- else:
221
+ if rbln_submodules is None:
222
+ rbln_submodules = cls._load_submodules(model_save_dir=model_id, rbln_config=rbln_config, **kwargs)
223
+ elif rbln_submodules is None:
222
224
  rbln_submodules = []
223
225
 
224
226
  rbln_config.freeze()
@@ -279,9 +281,12 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
279
281
  config: "PretrainedConfig",
280
282
  model_save_dir: Union[Path, str],
281
283
  subfolder: Union[Path, str],
282
- rbln_submodules: List["RBLNBaseModel"] = [],
284
+ rbln_submodules: Optional[List["RBLNBaseModel"]] = None,
283
285
  **kwargs,
284
286
  ):
287
+ if rbln_submodules is None:
288
+ rbln_submodules = []
289
+
285
290
  if isinstance(model_save_dir, str):
286
291
  model_save_dir = Path(model_save_dir)
287
292
 
@@ -195,7 +195,7 @@ class RBLNDecoderOnlyFlashAttentionMixin:
195
195
  ) -> int:
196
196
  tensor_parallel_size = rbln_config.tensor_parallel_size or 1
197
197
  vocab_size = model_config.vocab_size
198
- hidden_size = getattr(model_config, "n_embd", None) or getattr(model_config, "hidden_size")
198
+ hidden_size = getattr(model_config, "n_embd", None) or model_config.hidden_size
199
199
  lm_head_params = align(vocab_size, 64) * hidden_size
200
200
 
201
201
  nbytes_per_param = 2 # Assuming lm_head is always not quantized
@@ -214,7 +214,7 @@ class RBLNDecoderOnlyFlashAttentionMixin:
214
214
  ) -> int:
215
215
  # This is an *APPROXIMATE* calculation based on the number of parameters
216
216
  tensor_parallel_size = rbln_config.tensor_parallel_size or 1
217
- num_hidden_layers = getattr(model_config, "n_layer", None) or getattr(model_config, "num_hidden_layers")
217
+ num_hidden_layers = getattr(model_config, "n_layer", None) or model_config.num_hidden_layers
218
218
 
219
219
  n_model_params = sum(p.numel() for p in model.parameters())
220
220
  embed_token_params = sum(p.numel() for p in model.get_input_embeddings().parameters())
@@ -307,9 +307,9 @@ class RBLNDecoderOnlyFlashAttentionMixin:
307
307
 
308
308
  return dram_per_block
309
309
 
310
- num_attention_heads = getattr(model_config, "n_head", None) or getattr(model_config, "num_attention_heads")
310
+ num_attention_heads = getattr(model_config, "n_head", None) or model_config.num_attention_heads
311
311
  head_dim = getattr(model_config, "head_dim", None) or model_config.hidden_size // num_attention_heads
312
- num_hidden_layers = getattr(model_config, "n_layer", None) or getattr(model_config, "num_hidden_layers")
312
+ num_hidden_layers = getattr(model_config, "n_layer", None) or model_config.num_hidden_layers
313
313
  num_key_value_heads = getattr(model_config, "num_key_value_heads", None) or num_attention_heads
314
314
  tensor_parallel_size = rbln_config.tensor_parallel_size or 1
315
315
 
@@ -24,6 +24,7 @@ class RBLNDecoderOnlyOutput(ModelOutput):
24
24
  logits: torch.FloatTensor = None
25
25
  generate_idx: torch.Tensor = None
26
26
  padded_cache_lengths: int = None
27
+ hidden_states: Tuple[torch.FloatTensor] = None
27
28
 
28
29
 
29
30
  @dataclass
@@ -150,6 +150,7 @@ class _BaseAutoModelClass:
150
150
  f"from the checkpoint, leading to potential unintended behavior. If this is not intentional, consider calling the "
151
151
  f"`from_pretrained()` method directly from the `RBLN{config.architectures[0]}` class instead.",
152
152
  UserWarning,
153
+ stacklevel=2,
153
154
  )
154
155
 
155
156
  return model_class
@@ -156,8 +156,8 @@ class ColPaliAttention(nn.Module):
156
156
  def __init__(self, self_attn):
157
157
  super().__init__()
158
158
  self._original_mod = self_attn
159
- self.num_heads = getattr(self._original_mod, "num_heads", None) or getattr(
160
- self._original_mod.config, "num_attention_heads"
159
+ self.num_heads = (
160
+ getattr(self._original_mod, "num_heads", None) or self._original_mod.config.num_attention_heads
161
161
  )
162
162
  self.head_dim = self._original_mod.head_dim
163
163
  self.scaling = self.head_dim**-0.5
@@ -332,7 +332,7 @@ class RBLNColPaliForRetrieval(RBLNModel):
332
332
  ]
333
333
  outputs.append(torch.empty(size=language_model_out_size, dtype=torch.float32, device="cpu"))
334
334
  if self.rbln_config.output_hidden_states:
335
- for i in range(self.config.vlm_config.text_config.num_hidden_layers + 1):
335
+ for _ in range(self.config.vlm_config.text_config.num_hidden_layers + 1):
336
336
  outputs.append(torch.empty(size=language_model_hidden_states_size, dtype=torch.float32, device="cpu"))
337
337
 
338
338
  # Embedding_proj_layer is fused on the bottom of the language model.
@@ -58,7 +58,6 @@ class RBLNColQwen2ForRetrievalConfig(RBLNDecoderOnlyModelConfig):
58
58
  visual: Optional[RBLNModelConfig] = None,
59
59
  batch_size: Optional[int] = None,
60
60
  use_inputs_embeds: bool = True,
61
- output_hidden_states: Optional[bool] = False,
62
61
  **kwargs,
63
62
  ):
64
63
  super().__init__(use_inputs_embeds=use_inputs_embeds, **kwargs)
@@ -71,4 +70,3 @@ class RBLNColQwen2ForRetrievalConfig(RBLNDecoderOnlyModelConfig):
71
70
  raise ValueError("batch_size is not supported for RBLNColQwen2ForRetrievalConfig")
72
71
 
73
72
  self.visual = visual
74
- self.output_hidden_states = output_hidden_states
@@ -58,6 +58,7 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
58
58
  sliding_window_layers: Optional[List[int]] = None,
59
59
  phases: Optional[List[PhaseType]] = None,
60
60
  logits_to_keep: Optional[int] = None,
61
+ output_hidden_states: Optional[bool] = None,
61
62
  **kwargs,
62
63
  ):
63
64
  """
@@ -112,6 +113,7 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
112
113
  ["prefill", "decode"] if DecoderOnlyModelForCausalLM is used.
113
114
  logits_to_keep (Optional[int]): The number of logits to keep for the decoder. If set to 0, the decoder will keep all logits.
114
115
  Defaults to 0 if DecoderOnlyModel is used, 1 if DecoderOnlyModelForCausalLM is used.
116
+ output_hidden_states (Optional[bool]): Whether to output the hidden states of the decoder. Defaults to False.
115
117
  kwargs: Additional arguments passed to the parent RBLNModelConfig.
116
118
 
117
119
  Raises:
@@ -232,6 +234,8 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
232
234
  if self.logits_to_keep is not None and self.logits_to_keep > 1:
233
235
  raise NotImplementedError("`logits_to_keep` > 1 is currently not supported for RBLN models.")
234
236
 
237
+ self.output_hidden_states = output_hidden_states or False
238
+
235
239
  self.decoder_batch_sizes = None
236
240
  if "decode" in self.phases:
237
241
  self.decoder_batch_sizes = decoder_batch_sizes
@@ -183,7 +183,7 @@ class RBLNLoRAAdapterConfig(RBLNSerializableConfigProtocol):
183
183
  f"Failed to download LoRA adapter '{path.as_posix()}' from HuggingFace Hub. "
184
184
  f"Please check if the model ID is correct or provide a valid local path. "
185
185
  f"Error: {e}"
186
- )
186
+ ) from e
187
187
 
188
188
  def _load_adapter_config(self) -> Dict[str, Any]:
189
189
  """