optimum-rbln 0.8.2a2__py3-none-any.whl → 0.8.2a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of optimum-rbln might be problematic. Click here for more details.

@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.8.2a2'
21
- __version_tuple__ = version_tuple = (0, 8, 2, 'a2')
20
+ __version__ = version = '0.8.2a3'
21
+ __version_tuple__ = version_tuple = (0, 8, 2, 'a3')
@@ -70,8 +70,6 @@ class RBLNDiffusionMixin:
70
70
  _submodules = []
71
71
  _optional_submodules = []
72
72
  _prefix = {}
73
- _rbln_config_class = None
74
- _hf_class = None
75
73
 
76
74
  @staticmethod
77
75
  def _maybe_apply_and_fuse_lora(
@@ -114,14 +112,14 @@ class RBLNDiffusionMixin:
114
112
  @classmethod
115
113
  def get_rbln_config_class(cls) -> Type[RBLNModelConfig]:
116
114
  # Lazily loads and caches the corresponding RBLN model config class.
117
- if cls._rbln_config_class is None:
115
+ if "_rbln_config_class" not in cls.__dict__ or cls._rbln_config_class is None:
118
116
  rbln_config_class_name = cls.__name__ + "Config"
119
117
  cls._rbln_config_class = get_rbln_config_class(rbln_config_class_name)
120
118
  return cls._rbln_config_class
121
119
 
122
120
  @classmethod
123
121
  def get_hf_class(cls):
124
- if cls._hf_class is None:
122
+ if "_hf_class" not in cls.__dict__ or cls._hf_class is None:
125
123
  hf_cls_name = cls.__name__[4:]
126
124
  library = importlib.import_module("diffusers")
127
125
  cls._hf_class = getattr(library, hf_cls_name, None)
optimum/rbln/modeling.py CHANGED
@@ -35,8 +35,6 @@ logger = get_logger(__name__)
35
35
 
36
36
 
37
37
  class RBLNModel(RBLNBaseModel):
38
- _output_class = None
39
-
40
38
  @classmethod
41
39
  def update_kwargs(cls, kwargs):
42
40
  # Update user-given kwargs to get proper pytorch model.
@@ -289,7 +287,7 @@ class RBLNModel(RBLNBaseModel):
289
287
  @classmethod
290
288
  def get_hf_output_class(cls):
291
289
  # Dynamically gets the output class from the corresponding HuggingFace model class.
292
- if cls._output_class:
290
+ if "_output_class" in cls.__dict__ and cls._output_class is not None:
293
291
  return cls._output_class
294
292
 
295
293
  hf_class = cls.get_hf_class()
@@ -23,9 +23,10 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union
23
23
  import rebel
24
24
  import torch
25
25
  from transformers import AutoConfig, AutoModel, GenerationConfig, PretrainedConfig
26
+ from transformers.utils.hub import PushToHubMixin
26
27
 
27
28
  from .configuration_utils import RBLNAutoConfig, RBLNCompileConfig, RBLNModelConfig, get_rbln_config_class
28
- from .utils.hub import PushToHubMixin, pull_compiled_model_from_hub, validate_files
29
+ from .utils.hub import pull_compiled_model_from_hub, validate_files
29
30
  from .utils.logging import get_logger
30
31
  from .utils.runtime_utils import UnavailableRuntime, tp_and_devices_are_ok
31
32
  from .utils.save_utils import maybe_load_preprocessors
@@ -50,11 +51,8 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
50
51
  model_type = "rbln_model"
51
52
  auto_model_class = AutoModel
52
53
  config_class = AutoConfig
53
-
54
54
  config_name = "config.json"
55
55
  hf_library_name = "transformers"
56
- _hf_class = None
57
- _rbln_config_class = None
58
56
 
59
57
  def __init__(
60
58
  self,
@@ -115,7 +113,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
115
113
  def _load_compiled_model_dir(
116
114
  cls,
117
115
  model_id: Union[str, Path],
118
- use_auth_token: Optional[Union[bool, str]] = None,
116
+ token: Optional[Union[bool, str]] = None,
119
117
  revision: Optional[str] = None,
120
118
  force_download: bool = False,
121
119
  cache_dir: Optional[str] = None,
@@ -134,7 +132,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
134
132
  model_path = pull_compiled_model_from_hub(
135
133
  model_id=model_id,
136
134
  subfolder=subfolder,
137
- use_auth_token=use_auth_token,
135
+ token=token,
138
136
  revision=revision,
139
137
  cache_dir=cache_dir,
140
138
  force_download=force_download,
@@ -172,7 +170,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
172
170
  cls,
173
171
  model_id: Union[str, Path],
174
172
  config: Optional["PretrainedConfig"] = None,
175
- use_auth_token: Optional[Union[bool, str]] = None,
173
+ token: Optional[Union[bool, str]] = None,
176
174
  revision: Optional[str] = None,
177
175
  force_download: bool = False,
178
176
  cache_dir: Optional[str] = None,
@@ -189,7 +187,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
189
187
  if rbln_compiled_models is None:
190
188
  model_path_subfolder = cls._load_compiled_model_dir(
191
189
  model_id=model_id,
192
- use_auth_token=use_auth_token,
190
+ token=token,
193
191
  revision=revision,
194
192
  force_download=force_download,
195
193
  cache_dir=cache_dir,
@@ -232,7 +230,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
232
230
  cache_dir=cache_dir,
233
231
  force_download=force_download,
234
232
  revision=revision,
235
- token=use_auth_token,
233
+ token=token,
236
234
  trust_remote_code=trust_remote_code,
237
235
  )
238
236
  elif cls.hf_library_name == "diffusers":
@@ -250,7 +248,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
250
248
  force_download=force_download,
251
249
  local_files_only=local_files_only,
252
250
  revision=revision,
253
- token=use_auth_token,
251
+ token=token,
254
252
  subfolder=subfolder,
255
253
  )
256
254
  config = PretrainedConfig(**config)
@@ -421,7 +419,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
421
419
 
422
420
  # Returns:
423
421
  # type: The original HuggingFace model class
424
- if cls._hf_class is None:
422
+ if "_hf_class" not in cls.__dict__ or cls._hf_class is None:
425
423
  hf_cls_name = cls.__name__[4:]
426
424
  library = importlib.import_module(cls.hf_library_name)
427
425
  cls._hf_class = getattr(library, hf_cls_name, None)
@@ -430,7 +428,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
430
428
  @classmethod
431
429
  def get_rbln_config_class(cls) -> Type[RBLNModelConfig]:
432
430
  # Lazily loads and caches the corresponding RBLN model config class.
433
- if cls._rbln_config_class is None:
431
+ if "_rbln_config_class" not in cls.__dict__ or cls._rbln_config_class is None:
434
432
  rbln_config_class_name = cls.__name__ + "Config"
435
433
  cls._rbln_config_class = get_rbln_config_class(rbln_config_class_name)
436
434
  return cls._rbln_config_class
@@ -507,6 +505,9 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
507
505
  f"Please ensure the model directory exists and you have the necessary permissions to access it."
508
506
  )
509
507
 
508
+ if isinstance(self.config, PretrainedConfig):
509
+ self.config.save_pretrained(real_save_dir)
510
+
510
511
  if save_directory_path == real_save_dir:
511
512
  raise FileExistsError(
512
513
  f"Cannot save model to '{save_directory}'. This directory already exists and contains the model files."
@@ -534,7 +535,10 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
534
535
  raise e # Re-raise the exception after cleanup
535
536
 
536
537
  if push_to_hub:
537
- return super().push_to_hub(str(save_directory_path), **kwargs)
538
+ repo_id = kwargs.pop("repo_id", None)
539
+ if repo_id is None:
540
+ raise ValueError("`repo_id` must be provided to push the model to the HuggingFace model hub.")
541
+ return super().push_to_hub(repo_id=repo_id, **kwargs)
538
542
 
539
543
  @staticmethod
540
544
  def _raise_missing_compiled_file_error(missing_files: List[str]):
@@ -43,7 +43,15 @@ class RBLNCLIPTextModelWithProjectionConfig(RBLNCLIPTextModelConfig):
43
43
 
44
44
 
45
45
  class RBLNCLIPVisionModelConfig(RBLNModelConfig):
46
- def __init__(self, batch_size: Optional[int] = None, image_size: Optional[int] = None, **kwargs: Dict[str, Any]):
46
+ def __init__(
47
+ self,
48
+ batch_size: Optional[int] = None,
49
+ image_size: Optional[int] = None,
50
+ interpolate_pos_encoding: Optional[bool] = None,
51
+ output_hidden_states: Optional[bool] = None,
52
+ output_attentions: Optional[bool] = None,
53
+ **kwargs: Dict[str, Any],
54
+ ):
47
55
  """
48
56
  Args:
49
57
  batch_size (Optional[int]): The batch size for image processing. Defaults to 1.
@@ -60,6 +68,9 @@ class RBLNCLIPVisionModelConfig(RBLNModelConfig):
60
68
  raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
61
69
 
62
70
  self.image_size = image_size
71
+ self.interpolate_pos_encoding = interpolate_pos_encoding or False
72
+ self.output_hidden_states = output_hidden_states
73
+ self.output_attentions = output_attentions
63
74
 
64
75
  @property
65
76
  def image_width(self):
@@ -16,6 +16,7 @@ from typing import TYPE_CHECKING, Optional, Tuple, Union
16
16
 
17
17
  import torch
18
18
  from transformers import CLIPTextConfig, CLIPTextModel, CLIPVisionConfig, CLIPVisionModel
19
+ from transformers.modeling_outputs import BaseModelOutputWithPooling
19
20
  from transformers.models.clip.modeling_clip import CLIPTextModelOutput, CLIPVisionModelOutput
20
21
 
21
22
  from ....configuration_utils import RBLNCompileConfig
@@ -111,12 +112,27 @@ class RBLNCLIPTextModelWithProjection(RBLNCLIPTextModel):
111
112
 
112
113
 
113
114
  class _VisionEncoder(torch.nn.Module):
114
- def __init__(self, enc: CLIPVisionModel):
115
+ def __init__(
116
+ self,
117
+ enc: CLIPVisionModel,
118
+ interpolate_pos_encoding: bool,
119
+ output_hidden_states: bool,
120
+ output_attentions: bool,
121
+ ):
115
122
  super().__init__()
116
123
  self.enc = enc
124
+ self.interpolate_pos_encoding = interpolate_pos_encoding
125
+ self.output_hidden_states = output_hidden_states
126
+ self.output_attentions = output_attentions
117
127
 
118
128
  def forward(self, inp):
119
- enc_out = self.enc(inp, output_hidden_states=True, return_dict=False)
129
+ enc_out = self.enc(
130
+ inp,
131
+ output_hidden_states=self.output_hidden_states,
132
+ interpolate_pos_encoding=self.interpolate_pos_encoding,
133
+ output_attentions=self.output_attentions,
134
+ return_dict=False,
135
+ )
120
136
  return enc_out
121
137
 
122
138
 
@@ -130,7 +146,12 @@ class RBLNCLIPVisionModel(RBLNModel):
130
146
 
131
147
  @classmethod
132
148
  def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPVisionModelConfig) -> torch.nn.Module:
133
- return _VisionEncoder(model).eval()
149
+ wrapper_cfg = {
150
+ "interpolate_pos_encoding": rbln_config.interpolate_pos_encoding,
151
+ "output_hidden_states": rbln_config.output_hidden_states,
152
+ "output_attentions": rbln_config.output_attentions,
153
+ }
154
+ return _VisionEncoder(model, **wrapper_cfg).eval()
134
155
 
135
156
  @classmethod
136
157
  def update_rbln_config_using_pipe(
@@ -155,6 +176,12 @@ class RBLNCLIPVisionModel(RBLNModel):
155
176
  if rbln_config.image_size is None:
156
177
  raise ValueError("`rbln_image_size` should be specified!")
157
178
 
179
+ if rbln_config.output_attentions is None:
180
+ rbln_config.output_attentions = getattr(model_config, "output_attentions", False)
181
+
182
+ if rbln_config.output_hidden_states is None:
183
+ rbln_config.output_hidden_states = getattr(model_config, "output_hidden_states", False)
184
+
158
185
  rbln_compile_config = RBLNCompileConfig(
159
186
  input_info=[
160
187
  (
@@ -176,27 +203,76 @@ class RBLNCLIPVisionModel(RBLNModel):
176
203
  def forward(
177
204
  self,
178
205
  pixel_values: Optional[torch.FloatTensor] = None,
179
- return_dict: bool = None,
206
+ return_dict: bool = True,
207
+ output_attentions: bool = None,
208
+ output_hidden_states: bool = None,
209
+ interpolate_pos_encoding: bool = False,
180
210
  **kwargs,
181
- ) -> Union[Tuple, CLIPVisionModelOutput]:
211
+ ) -> Union[Tuple, BaseModelOutputWithPooling]:
182
212
  if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
183
213
  logger.warning(
184
214
  f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__.__name__}."
185
215
  )
216
+
217
+ output_attentions = output_attentions if output_attentions is not None else self.rbln_config.output_attentions
218
+ output_hidden_states = (
219
+ output_hidden_states if output_hidden_states is not None else self.rbln_config.output_hidden_states
220
+ )
221
+
222
+ if output_attentions != self.rbln_config.output_attentions:
223
+ raise ValueError(
224
+ f"Variable output_attentions {output_attentions} is not equal to rbln_config.output_attentions {self.rbln_config.output_attentions} "
225
+ f"Please compile again with the correct argument."
226
+ )
227
+
228
+ if output_hidden_states != self.rbln_config.output_hidden_states:
229
+ raise ValueError(
230
+ f"Variable output_hidden_states {output_hidden_states} is not equal to rbln_config.output_hidden_states {self.rbln_config.output_hidden_states} "
231
+ f"Please compile again with the correct argument."
232
+ )
233
+
234
+ if interpolate_pos_encoding != self.rbln_config.interpolate_pos_encoding:
235
+ raise ValueError(
236
+ f"Variable interpolate_pos_encoding {interpolate_pos_encoding} is not equal to rbln_config.interpolate_pos_encoding {self.rbln_config.interpolate_pos_encoding} "
237
+ f"Please compile again with the correct argument."
238
+ )
239
+
186
240
  output = super().forward(pixel_values, return_dict=return_dict)
187
241
  return output
188
242
 
189
243
  def _prepare_output(self, output, return_dict):
190
244
  # Prepare model output based on return_dict flag.
191
245
  # This method can be overridden by subclasses to provide task-specific output handling.
246
+ last_hidden_state = output.pop(0)
247
+ pooler_output = output.pop(0)
248
+ vision_config = self.config.vision_config if hasattr(self.config, "vision_config") else self.config
249
+
250
+ if self.rbln_config.output_hidden_states:
251
+ hidden_states = ()
252
+ num_hidden_layers = vision_config.num_hidden_layers
253
+ for _ in range(num_hidden_layers + 1):
254
+ hidden_states += (output.pop(0),)
255
+ else:
256
+ hidden_states = None
257
+
258
+ if self.rbln_config.output_attentions:
259
+ attentions = ()
260
+ num_hidden_layers = vision_config.num_hidden_layers
261
+ for _ in range(num_hidden_layers):
262
+ attentions += (output.pop(0),)
263
+ else:
264
+ attentions = None
192
265
 
193
266
  if not return_dict:
194
- return (output,) if not isinstance(output, (tuple, list)) else output
267
+ return tuple(
268
+ item for item in (last_hidden_state, pooler_output, hidden_states, attentions) if item is not None
269
+ )
195
270
  else:
196
- return CLIPVisionModelOutput(
197
- image_embeds=output[0],
198
- last_hidden_state=output[1],
199
- hidden_states=output[2:],
271
+ return BaseModelOutputWithPooling(
272
+ last_hidden_state=last_hidden_state,
273
+ pooler_output=pooler_output,
274
+ hidden_states=hidden_states,
275
+ attentions=attentions,
200
276
  )
201
277
 
202
278
 
@@ -208,21 +284,40 @@ class RBLNCLIPVisionModelWithProjection(RBLNCLIPVisionModel):
208
284
  multimodal embedding alignment tasks.
209
285
  """
210
286
 
211
- def forward(
212
- self,
213
- pixel_values: Optional[torch.FloatTensor] = None,
214
- **kwargs,
215
- ) -> Union[Tuple, CLIPVisionModelOutput]:
216
- if len(kwargs) > 0 and any(kwargs.values()):
217
- logger.warning(f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__}.")
218
-
219
- output = super().forward(pixel_values)
220
- image_embeds = output[0]
221
- last_hidden_state = output[1]
222
- hidden_states = output[2:]
223
-
224
- return CLIPVisionModelOutput(
225
- image_embeds=image_embeds,
226
- last_hidden_state=last_hidden_state,
227
- hidden_states=hidden_states,
228
- )
287
+ def _prepare_output(self, output, return_dict):
288
+ # Prepare model output based on return_dict flag.
289
+ # This method can be overridden by subclasses to provide task-specific output handling.
290
+
291
+ image_embeds = output.pop(0) if isinstance(output, (tuple, list)) else output
292
+ last_hidden_state = output.pop(0)
293
+
294
+ vision_config = self.config.vision_config if hasattr(self.config, "vision_config") else self.config
295
+
296
+ if self.rbln_config.output_hidden_states:
297
+ hidden_states = ()
298
+ num_hidden_layers = vision_config.num_hidden_layers
299
+ for _ in range(num_hidden_layers + 1):
300
+ hidden_states += (output.pop(0),)
301
+ else:
302
+ hidden_states = None
303
+
304
+ if self.rbln_config.output_attentions:
305
+ attentions = ()
306
+ num_hidden_layers = vision_config.num_hidden_layers
307
+ for _ in range(num_hidden_layers):
308
+ attentions += (output.pop(0),)
309
+ else:
310
+ attentions = None
311
+
312
+ if not return_dict:
313
+ return tuple(
314
+ item for item in (image_embeds, last_hidden_state, hidden_states, attentions) if item is not None
315
+ )
316
+
317
+ else:
318
+ return CLIPVisionModelOutput(
319
+ image_embeds=image_embeds,
320
+ last_hidden_state=last_hidden_state,
321
+ hidden_states=hidden_states,
322
+ attentions=attentions,
323
+ )
@@ -352,8 +352,11 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
352
352
  if position_embed is not None:
353
353
  position_embed = torch.nn.functional.pad(position_embed, (0, 0, 0, padding_size))
354
354
 
355
+ if token_type_ids is not None:
356
+ token_type_ids = torch.nn.functional.pad(token_type_ids, (0, padding_size), value=-1)
357
+
355
358
  # Overwrite position_ids and padded_cache_lengths
356
- position_ids = None
359
+ position_ids = cache_position.clone()
357
360
  padded_cache_lengths = 0
358
361
 
359
362
  return (
@@ -365,6 +368,7 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
365
368
  position_embed,
366
369
  padded_cache_lengths,
367
370
  query_length,
371
+ token_type_ids,
368
372
  )
369
373
 
370
374
  def prefill_forward(
@@ -393,6 +397,7 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
393
397
  position_embed,
394
398
  padded_cache_lengths,
395
399
  query_length,
400
+ token_type_ids,
396
401
  ) = self._prepare_prefill_inputs(
397
402
  inputs, cache_position, attention_mask, position_embed, token_type_ids=token_type_ids
398
403
  )
@@ -23,22 +23,21 @@ from ..siglip.configuration_siglip import RBLNSiglipVisionModelConfig
23
23
  class RBLNGemma3ForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
24
24
  def __init__(
25
25
  self,
26
- prefill_chunk_size: Optional[int] = None,
27
26
  use_position_ids: Optional[bool] = None,
28
27
  use_attention_mask: Optional[bool] = None,
28
+ image_prefill_chunk_size: Optional[int] = None,
29
29
  **kwargs: Dict[str, Any],
30
30
  ):
31
31
  # use_attention_mask and use_position_ids are always True for Gemma3
32
32
  use_attention_mask = use_attention_mask or True
33
33
  use_position_ids = use_position_ids or True
34
- prefill_chunk_size = prefill_chunk_size or 256
35
34
 
36
35
  super().__init__(
37
- prefill_chunk_size=prefill_chunk_size,
38
36
  use_attention_mask=use_attention_mask,
39
37
  use_position_ids=use_position_ids,
40
38
  **kwargs,
41
39
  )
40
+ self.image_prefill_chunk_size = image_prefill_chunk_size
42
41
 
43
42
  npu = self.npu or rebel.get_npu_name()
44
43
  if npu == "RBLN-CA02":
@@ -31,15 +31,11 @@ from transformers.models.gemma3.modeling_gemma3 import Gemma3TextScaledWordEmbed
31
31
 
32
32
  from ....configuration_utils import RBLNCompileConfig, RBLNModelConfig
33
33
  from ....modeling import RBLNModel
34
- from ....utils.logging import get_logger
35
34
  from ..decoderonly.modeling_decoderonly import RBLNDecoderOnlyModelForCausalLM, RBLNDecoderOnlyOutput, RBLNRuntimeModel
36
35
  from .configuration_gemma3 import RBLNGemma3ForCausalLMConfig
37
36
  from .gemma3_architecture import Gemma3ForCausalLMWrapper
38
37
 
39
38
 
40
- logger = get_logger()
41
-
42
-
43
39
  if TYPE_CHECKING:
44
40
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, Gemma3ForConditionalGeneration
45
41
 
@@ -320,194 +316,28 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
320
316
  self.prefill = self.runtime if self.phase == "prefill" else None # FIXME
321
317
  self.decode = self.runtime if self.phase == "decode" else None
322
318
 
323
- def pad_for_chunked_images(
324
- self,
325
- inputs: torch.Tensor,
326
- attention_mask: torch.Tensor,
327
- position_ids: torch.Tensor,
328
- token_type_ids: Optional[torch.Tensor] = None,
329
- ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, int, torch.Tensor]:
330
- """
331
- Pads inputs, attention_mask, and position_ids so image token groups (256 tokens with token_type_ids == 1)
332
- start at multiples of prefill_chunk_size (256). Returns padded tensors and total padded length.
333
-
334
- Args:
335
- inputs: (1, seq_len, hidden_size) tensor.
336
- attention_mask: (1, seq_len) tensor, 1 for valid, 0 for masked.
337
- position_ids: (1, seq_len) tensor for RoPE.
338
- token_type_ids: (1, seq_len) tensor, 0 for text, 1 for image.
339
-
340
- Returns:
341
- (inputs_padded, attention_mask_padded, position_ids_padded, padded_len, token_type_ids_padded).
342
- """
343
-
344
- if token_type_ids is None:
345
- return inputs, attention_mask, position_ids, 0, torch.zeros(inputs.shape[:2], dtype=torch.long)
346
-
347
- seq_len = inputs.shape[1]
348
-
349
- # Find image start positions
350
- image_starts = [
351
- s
352
- for s in range(seq_len - self.rbln_config.prefill_chunk_size + 1)
353
- if torch.all(token_type_ids[:, s : s + self.rbln_config.prefill_chunk_size] == 1)
354
- ]
355
-
356
- # Initialize padded tensors
357
- padded_input_len = seq_len
358
- for image_start in image_starts:
359
- pad_needed = (
360
- self.rbln_config.prefill_chunk_size
361
- - (image_start + padded_input_len - seq_len) % self.rbln_config.prefill_chunk_size
362
- ) % self.rbln_config.prefill_chunk_size
363
- padded_input_len += pad_needed
364
- total_padding = padded_input_len - seq_len
365
-
366
- if inputs.dim() == 3:
367
- inputs_padded = torch.zeros(1, padded_input_len, inputs.shape[2], dtype=inputs.dtype)
368
- else:
369
- inputs_padded = torch.zeros(1, padded_input_len, dtype=inputs.dtype)
370
- attention_mask_padded = torch.zeros(1, padded_input_len, dtype=attention_mask.dtype)
371
- position_ids_padded = torch.zeros(1, padded_input_len, dtype=position_ids.dtype)
372
- token_type_ids_padded = torch.zeros(1, padded_input_len, dtype=token_type_ids.dtype)
373
-
374
- # Fill padded tensors
375
- dest_pos = 0
376
- src_pos = 0
377
- last_pos_id = -1
378
- for image_start in image_starts + [seq_len]:
379
- # Text segment
380
- if src_pos < image_start:
381
- length = image_start - src_pos
382
- inputs_padded[:, dest_pos : dest_pos + length] = inputs[:, src_pos:image_start]
383
- attention_mask_padded[:, dest_pos : dest_pos + length] = attention_mask[:, src_pos:image_start]
384
- position_ids_padded[:, dest_pos : dest_pos + length] = position_ids[:, src_pos:image_start]
385
- token_type_ids_padded[:, dest_pos : dest_pos + length] = token_type_ids[:, src_pos:image_start]
386
- dest_pos += length
387
- last_pos_id = position_ids[0, image_start - 1].item()
388
- src_pos = image_start
389
-
390
- # Padding
391
- pad_needed = (
392
- self.rbln_config.prefill_chunk_size - dest_pos % self.rbln_config.prefill_chunk_size
393
- ) % self.rbln_config.prefill_chunk_size
394
- if pad_needed and dest_pos < padded_input_len:
395
- position_ids_padded[:, dest_pos : dest_pos + pad_needed] = torch.arange(
396
- last_pos_id + 1, last_pos_id + pad_needed + 1, dtype=position_ids.dtype
397
- ).unsqueeze(0)
398
- dest_pos += pad_needed
399
-
400
- # Image segment
401
- if src_pos < seq_len and src_pos == image_start:
402
- inputs_padded[:, dest_pos : dest_pos + self.rbln_config.prefill_chunk_size] = inputs[
403
- :, src_pos : src_pos + self.rbln_config.prefill_chunk_size
404
- ]
405
- attention_mask_padded[:, dest_pos : dest_pos + self.rbln_config.prefill_chunk_size] = attention_mask[
406
- :, src_pos : src_pos + self.rbln_config.prefill_chunk_size
407
- ]
408
- position_ids_padded[:, dest_pos : dest_pos + self.rbln_config.prefill_chunk_size] = position_ids[
409
- :, src_pos : src_pos + self.rbln_config.prefill_chunk_size
410
- ]
411
- token_type_ids_padded[:, dest_pos : dest_pos + self.rbln_config.prefill_chunk_size] = token_type_ids[
412
- :, src_pos : src_pos + self.rbln_config.prefill_chunk_size
413
- ]
414
- dest_pos += self.rbln_config.prefill_chunk_size
415
- src_pos += self.rbln_config.prefill_chunk_size
416
- last_pos_id = position_ids[0, image_start + self.rbln_config.prefill_chunk_size - 1].item()
417
-
418
- return inputs_padded, attention_mask_padded, position_ids_padded, total_padding, token_type_ids_padded
419
-
420
- def _prepare_prefill_inputs(
421
- self,
422
- inputs: torch.Tensor,
423
- cache_position: torch.Tensor,
424
- attention_mask: Optional[torch.Tensor] = None,
425
- position_embed: Optional[torch.Tensor] = None,
426
- token_type_ids: Optional[torch.Tensor] = None,
427
- ):
428
- """
429
- Prepare inputs for prefill phase.
430
- """
431
- # Handle continuous batching in a compiled graph by extracting valid inputs
432
- # If an attention mask is provided, select only the valid (non-masked) inputs
433
- inputs = inputs[:, attention_mask.bool()] if attention_mask is not None else inputs
434
- token_type_ids = (
435
- token_type_ids[:, attention_mask.bool()]
436
- if attention_mask is not None and token_type_ids is not None
437
- else token_type_ids
438
- )
439
-
440
- if position_embed is not None:
441
- position_embed = (
442
- position_embed[:, :, :, attention_mask.bool(), :] if attention_mask is not None else position_embed
443
- )
444
-
445
- seq_len = inputs.shape[1]
446
- # Initialize attention mask for chunked processing
447
- if self.rbln_config.use_attention_mask:
448
- chunked_attention_mask = (
449
- torch.ones(1, seq_len, dtype=torch.float32)
450
- if self.rbln_config.use_position_ids
451
- else torch.zeros(
452
- 1, 1, self.rbln_config.prefill_chunk_size, self.rbln_config.max_seq_len, dtype=torch.float32
453
- )
454
- )
455
- else:
456
- chunked_attention_mask = None
457
-
458
- # Buffer for storing output logits
459
- out_buffers = [
460
- torch.empty(
461
- size=self.output_size,
462
- dtype=torch.float32,
463
- device="cpu",
464
- )
465
- ]
466
-
467
- inputs, chunked_attention_mask, position_ids, padded_cache_lengths, token_type_ids_padded = (
468
- self.pad_for_chunked_images(inputs, chunked_attention_mask, cache_position, token_type_ids)
469
- )
470
-
471
- query_length = inputs.shape[1]
472
- if query_length > self.rbln_config.max_seq_len:
473
- raise ValueError(
474
- f"Input length ({query_length}) exceeds the maximum allowed sequence length ({self.rbln_config.max_seq_len})."
475
- )
476
-
477
- # Align attention_mask to compiled shape
478
- if self.rbln_config.use_position_ids:
479
- chunked_attention_mask = torch.nn.functional.pad(
480
- chunked_attention_mask, (0, self.rbln_config.max_seq_len - query_length)
481
- )
482
-
483
- # Pad input and cache_position if the last chunk is smaller than `prefill_chunk_size`
484
- padding_size = 0
485
- if query_length % self.rbln_config.prefill_chunk_size != 0:
486
- padding_size = (self.rbln_config.prefill_chunk_size - query_length) % self.rbln_config.prefill_chunk_size
487
- # inputs_embeds
488
- if inputs.dim() == 3:
489
- inputs = torch.nn.functional.pad(inputs, (0, 0, 0, padding_size))
490
- # inputs_ids
491
- else:
492
- inputs = torch.nn.functional.pad(inputs, (0, padding_size))
493
-
494
- position_ids = torch.cat(
495
- [
496
- position_ids,
497
- torch.arange(
498
- query_length,
499
- query_length + padding_size,
500
- dtype=torch.int32,
501
- ).unsqueeze(0),
502
- ],
503
- dim=-1,
504
- )
505
- token_type_ids_padded = torch.nn.functional.pad(token_type_ids_padded, (0, padding_size))
319
+ def _prepare_prefill_inputs(self, *args, **kwargs):
320
+ (
321
+ inputs,
322
+ cache_position,
323
+ chunked_attention_mask,
324
+ out_buffers,
325
+ position_ids,
326
+ position_embed,
327
+ padded_cache_lengths,
328
+ query_length,
329
+ token_type_ids,
330
+ ) = super()._prepare_prefill_inputs(*args, **kwargs)
506
331
 
507
- if position_embed is not None:
508
- position_embed = torch.nn.functional.pad(position_embed, (0, 0, 0, padding_size))
332
+ # chunked_attention_mask shape
333
+ chunked_attention_mask = torch.zeros(1, chunked_attention_mask.shape[-1], dtype=torch.float32)
509
334
 
510
- cache_position = torch.arange(0, query_length + padding_size, dtype=torch.int32).unsqueeze(0)
335
+ # as gemma3 has different prefill chunk size for image and text, we need to pad the inputs to the max of the two.
336
+ padding_size = max(self.rbln_config.prefill_chunk_size, self.rbln_config.image_prefill_chunk_size)
337
+ inputs = torch.nn.functional.pad(inputs, (0, 0, 0, padding_size))
338
+ cache_position = torch.nn.functional.pad(cache_position, (0, padding_size))
339
+ position_ids = torch.nn.functional.pad(position_ids, (0, padding_size))
340
+ token_type_ids = torch.nn.functional.pad(token_type_ids, (0, padding_size), value=-1)
511
341
 
512
342
  return (
513
343
  inputs,
@@ -518,7 +348,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
518
348
  position_embed,
519
349
  padded_cache_lengths,
520
350
  query_length,
521
- token_type_ids_padded,
351
+ token_type_ids,
522
352
  )
523
353
 
524
354
  def prefill_forward(
@@ -541,65 +371,73 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
541
371
  (
542
372
  inputs,
543
373
  cache_position,
544
- padded_attention_mask,
374
+ chunked_attention_mask,
545
375
  out_buffers,
546
376
  position_ids,
547
377
  position_embed,
548
378
  padded_cache_lengths,
549
379
  query_length,
550
- token_type_ids_padded,
380
+ token_type_ids,
551
381
  ) = self._prepare_prefill_inputs(
552
382
  inputs, cache_position, attention_mask, position_embed, token_type_ids=token_type_ids
553
383
  )
554
- if not is_external_block_tables:
555
- local_block_tables = torch.tensor([batch_idx], dtype=torch.int16)
556
- self.dec_attn_mask[batch_idx : batch_idx + 1] = padded_attention_mask[:1]
557
-
558
- if self.rbln_config.use_attention_mask and self.rbln_config.use_position_ids:
559
- chunked_attention_mask = torch.zeros(1, self.rbln_config.max_seq_len, dtype=torch.float32)
560
-
561
- # Process input in chunks of size `prefill_chunk_size`
562
- for step in range(0, query_length, self.rbln_config.prefill_chunk_size):
563
- # Extract the current chunk of inputs and cache positions
564
- input_chunk = inputs[:, step : step + self.rbln_config.prefill_chunk_size]
565
- cache_pos_chunk = cache_position[:, step : step + self.rbln_config.prefill_chunk_size]
566
- position_ids_chunk = (
567
- position_ids[:, step : step + self.rbln_config.prefill_chunk_size]
568
- if position_ids is not None
569
- else None
384
+
385
+ step = 0
386
+ while step < query_length:
387
+ # Check if the prefill chunk is an image prefill
388
+ is_image_prefill = torch.all(
389
+ token_type_ids[:, step : step + self.rbln_config.image_prefill_chunk_size] == 1
390
+ )
391
+ prefill_chunk_size = (
392
+ self.rbln_config.image_prefill_chunk_size if is_image_prefill else self.rbln_config.prefill_chunk_size
570
393
  )
571
394
 
572
- if self.rbln_config.use_attention_mask:
573
- if self.rbln_config.use_position_ids:
574
- chunked_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size] = (
575
- padded_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size]
576
- )
577
-
578
- # Define query position
579
- query_position = (
580
- torch.sum(
581
- chunked_attention_mask[0][step : step + self.rbln_config.prefill_chunk_size],
582
- dim=-1,
583
- dtype=torch.int16,
584
- ).squeeze(0)
585
- - 1
395
+ # Check if the prefill chunk is a text prefill which have image_tokens in it.
396
+ is_text_prefill_with_image_tokens = not is_image_prefill and torch.any(
397
+ token_type_ids[:, step : step + prefill_chunk_size] == 1
586
398
  )
587
- if token_type_ids_padded[:, step] == 1:
588
- if torch.any(token_type_ids_padded[:, step : step + self.rbln_config.prefill_chunk_size] == 0):
589
- raise ValueError("All tokens of image_prefill should be the same image.")
590
- else:
591
- logits = self.image_prefill(
592
- input_chunk,
593
- cache_pos_chunk,
594
- block_tables,
595
- local_block_tables,
596
- query_position,
597
- chunked_attention_mask,
598
- position_ids_chunk,
599
- out=out_buffers,
600
- )
399
+
400
+ # Check if the prefill chunk crosses a block boundary, requiring padding to align with block boundaries
401
+ is_cross_block_boundary = (
402
+ step // self.rbln_config.kvcache_block_size
403
+ != (step + prefill_chunk_size) // self.rbln_config.kvcache_block_size
404
+ )
405
+
406
+ # Check if the prefill chunk is the last chunk
407
+ is_last_chunk = step + prefill_chunk_size >= query_length
408
+
409
+ if is_cross_block_boundary:
410
+ padding_size = prefill_chunk_size - (step + prefill_chunk_size) % self.rbln_config.kvcache_block_size
411
+ padded_cache_lengths += padding_size
412
+
413
+ # if text_prefill end with image_tokens, we only treat the text part.
414
+ num_processed_tokens = prefill_chunk_size
415
+ if is_text_prefill_with_image_tokens:
416
+ first_image_token_idx = torch.where(token_type_ids[:, step : step + prefill_chunk_size] == 1)[1][0]
417
+ num_processed_tokens = first_image_token_idx
418
+ if is_last_chunk:
419
+ num_processed_tokens = query_length - step
420
+
421
+ input_chunk = inputs[:, step : step + prefill_chunk_size]
422
+ cache_pos_chunk = cache_position[:, step : step + prefill_chunk_size].clone() + padded_cache_lengths
423
+ position_ids_chunk = position_ids[:, step : step + prefill_chunk_size].clone()
424
+ chunked_attention_mask[
425
+ :, step + padded_cache_lengths : step + num_processed_tokens + padded_cache_lengths
426
+ ] = 1
427
+ query_position = torch.tensor(num_processed_tokens - 1, dtype=torch.int16)
428
+
429
+ if is_image_prefill:
430
+ logits = self.image_prefill(
431
+ input_chunk,
432
+ cache_pos_chunk,
433
+ block_tables,
434
+ local_block_tables,
435
+ query_position,
436
+ chunked_attention_mask,
437
+ position_ids_chunk,
438
+ out=out_buffers,
439
+ )
601
440
  else:
602
- # Forward pass for the current chunk
603
441
  logits = self.prefill(
604
442
  input_chunk,
605
443
  cache_pos_chunk,
@@ -611,6 +449,11 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
611
449
  out=out_buffers,
612
450
  )
613
451
 
452
+ step += num_processed_tokens
453
+
454
+ if not is_external_block_tables:
455
+ self.dec_attn_mask[batch_idx : batch_idx + 1] = chunked_attention_mask
456
+
614
457
  return RBLNGemma3ForCausalLMOutput(
615
458
  logits=logits, padded_cache_lengths=padded_cache_lengths, attention_mask=chunked_attention_mask
616
459
  )
@@ -757,13 +600,14 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
757
600
 
758
601
  @classmethod
759
602
  def _update_submodule_config(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
760
- if rbln_config.prefill_chunk_size is None:
761
- rbln_config.prefill_chunk_size = model.config.mm_tokens_per_image
603
+ if rbln_config.image_prefill_chunk_size is None:
604
+ rbln_config.image_prefill_chunk_size = model.config.mm_tokens_per_image
762
605
 
763
- if rbln_config.prefill_chunk_size != model.config.mm_tokens_per_image:
764
- logger.warning(
765
- f"Prefill chunk size is different from mm_tokens_per_image: {rbln_config.prefill_chunk_size} != {model.config.mm_tokens_per_image}"
606
+ if rbln_config.image_prefill_chunk_size != model.config.mm_tokens_per_image:
607
+ raise ValueError(
608
+ f"Image prefill chunk size is different from mm_tokens_per_image: {rbln_config.image_prefill_chunk_size} != {model.config.mm_tokens_per_image}"
766
609
  )
610
+
767
611
  return rbln_config
768
612
 
769
613
  @classmethod
@@ -777,14 +621,22 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
777
621
  # Update rbln_config with super class
778
622
  rbln_config = super()._update_rbln_config(preprocessors, model, model_config, rbln_config)
779
623
 
780
- # Assume that prefill compile config is at index 0
781
- compile_cfgs = rbln_config.compile_cfgs
624
+ if not (rbln_config.use_attention_mask and rbln_config.use_position_ids):
625
+ raise ValueError("use_attention_mask and use_position_ids must be True for RBLNGemma3ForCausalLM")
626
+
627
+ # Update image prefill compile config
628
+ img_prefill_input_info = cls.get_input_info(
629
+ batch_size=1,
630
+ query_length=rbln_config.image_prefill_chunk_size,
631
+ rbln_config=rbln_config,
632
+ model_config=model_config,
633
+ )
782
634
  image_prefill_compile_config = RBLNCompileConfig(
783
- compiled_model_name="image_prefill", input_info=compile_cfgs[0].input_info
635
+ compiled_model_name="image_prefill", input_info=img_prefill_input_info
784
636
  )
785
637
  # Insert image_prefill compile config at index 1
786
- image_idx = 1
787
- compile_cfgs.insert(image_idx, image_prefill_compile_config)
638
+ compile_cfgs = rbln_config.compile_cfgs
639
+ compile_cfgs.insert(1, image_prefill_compile_config)
788
640
  rbln_config.set_compile_cfgs(compile_cfgs)
789
641
 
790
642
  return rbln_config
@@ -840,11 +692,14 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
840
692
  )
841
693
 
842
694
  image_prefill_compile_config = rbln_compile_configs[1]
695
+ image_prefill_example_inputs = image_prefill_compile_config.get_dummy_inputs(
696
+ fill=0, static_tensors=static_tensors
697
+ )
843
698
  wrapped_model.phase = "image_prefill"
844
699
  compiled_image_prefill = compile_model(
845
700
  wrapped_model,
846
701
  image_prefill_compile_config,
847
- prefill_example_inputs,
702
+ image_prefill_example_inputs,
848
703
  context,
849
704
  rbln_config.quantization,
850
705
  )
@@ -15,6 +15,11 @@
15
15
  from typing import Any, Dict, Optional
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
+ from ....utils.logging import get_logger
19
+ from ...models.clip import RBLNCLIPVisionModelConfig
20
+
21
+
22
+ logger = get_logger(__name__)
18
23
 
19
24
 
20
25
  class RBLNLlavaNextForConditionalGenerationConfig(RBLNModelConfig):
@@ -50,5 +55,17 @@ class RBLNLlavaNextForConditionalGenerationConfig(RBLNModelConfig):
50
55
  if not isinstance(self.batch_size, int) or self.batch_size < 0:
51
56
  raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
52
57
 
53
- self.vision_tower = vision_tower
58
+ self.vision_tower = self.init_submodule_config(
59
+ RBLNCLIPVisionModelConfig,
60
+ vision_tower,
61
+ )
62
+
63
+ if self.vision_tower.output_hidden_states is False:
64
+ raise ValueError(
65
+ f"LlavaNext requires output_hidden_states to be True, but found output_hidden_states={self.vision_tower.output_hidden_states}. "
66
+ f"Please compile again with the correct argument."
67
+ )
68
+ else:
69
+ self.vision_tower.output_hidden_states = True
70
+
54
71
  self.language_model = language_model
optimum/rbln/utils/hub.py CHANGED
@@ -12,59 +12,23 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import os
16
15
  from pathlib import Path
17
16
  from typing import List, Optional, Union
18
17
 
19
- from huggingface_hub import HfApi, HfFolder, hf_hub_download
20
-
21
-
22
- class PushToHubMixin:
23
- def push_to_hub(
24
- self,
25
- save_directory: str,
26
- repository_id: str,
27
- private: Optional[bool] = None,
28
- use_auth_token: Union[bool, str] = True,
29
- ) -> str:
30
- huggingface_token = _get_huggingface_token(use_auth_token)
31
- api = HfApi()
32
-
33
- api.create_repo(
34
- token=huggingface_token,
35
- repo_id=repository_id,
36
- exist_ok=True,
37
- private=private,
38
- )
39
- for path, subdirs, files in os.walk(save_directory):
40
- for name in files:
41
- local_file_path = os.path.join(path, name)
42
- _, hub_file_path = os.path.split(local_file_path)
43
- # FIXME: when huggingface_hub fixes the return of upload_file
44
- try:
45
- api.upload_file(
46
- token=huggingface_token,
47
- repo_id=f"{repository_id}",
48
- path_or_fileobj=os.path.join(os.getcwd(), local_file_path),
49
- path_in_repo=hub_file_path,
50
- )
51
- except KeyError:
52
- pass
53
- except NameError:
54
- pass
18
+ from huggingface_hub import HfApi, get_token, hf_hub_download
55
19
 
56
20
 
57
21
  def pull_compiled_model_from_hub(
58
22
  model_id: Union[str, Path],
59
23
  subfolder: str,
60
- use_auth_token: Optional[Union[bool, str]],
24
+ token: Union[bool, str],
61
25
  revision: Optional[str],
62
26
  cache_dir: Optional[str],
63
27
  force_download: bool,
64
28
  local_files_only: bool,
65
29
  ) -> Path:
66
30
  """Pull model files from the HuggingFace Hub."""
67
- huggingface_token = _get_huggingface_token(use_auth_token)
31
+ huggingface_token = _get_huggingface_token(token)
68
32
  repo_files = list(
69
33
  map(
70
34
  Path,
@@ -87,7 +51,7 @@ def pull_compiled_model_from_hub(
87
51
  repo_id=model_id,
88
52
  filename=filename,
89
53
  subfolder=subfolder,
90
- use_auth_token=use_auth_token,
54
+ token=token,
91
55
  revision=revision,
92
56
  cache_dir=cache_dir,
93
57
  force_download=force_download,
@@ -113,10 +77,7 @@ def validate_files(
113
77
  raise FileExistsError(f"Multiple rbln_config.json files found in {location}. This is not expected.")
114
78
 
115
79
 
116
- def _get_huggingface_token(use_auth_token: Union[bool, str]) -> str:
117
- if isinstance(use_auth_token, str):
118
- return use_auth_token
119
- elif use_auth_token:
120
- return HfFolder.get_token()
121
- else:
122
- raise ValueError("`use_auth_token` must be provided to interact with the HuggingFace Hub.")
80
+ def _get_huggingface_token(token: Union[bool, str]) -> str:
81
+ if isinstance(token, str):
82
+ return token
83
+ return get_token()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.8.2a2
3
+ Version: 0.8.2a3
4
4
  Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -1,10 +1,10 @@
1
1
  optimum/rbln/__init__.py,sha256=Pl_On5dBWYbmJk6776cc0eU49oc0pebdCvgb2l4hQRA,15564
2
- optimum/rbln/__version__.py,sha256=0WA5HC48CQUocr8qaZZFi2jxgajnt6fwHhbsacDdi4g,519
2
+ optimum/rbln/__version__.py,sha256=0oZYlGe9YqrAvb-FclaewmYV23Y0qIWNISVrbrIQsrM,519
3
3
  optimum/rbln/configuration_utils.py,sha256=tnR9XZfzZ9oHc7nU_kX33oo3qFFyicICSVQSujYPiOM,32911
4
- optimum/rbln/modeling.py,sha256=wZ_Cw83tgPDDJAB84_iLEB_H7ycL-rrlTsCzrHk-GWo,14293
5
- optimum/rbln/modeling_base.py,sha256=QpNkU_Do__JKmnHjaPzv47OhQwgGfVohisip1jqXa7A,23871
4
+ optimum/rbln/modeling.py,sha256=gww-H-q16_mGw2qGnFwOjEj3J9yMjBKnRTKlnCkVlx8,14315
5
+ optimum/rbln/modeling_base.py,sha256=AShxAt3KIOcCqfyF4U83dIrKwoj4p2Kxtc1ns_9-ltU,24154
6
6
  optimum/rbln/diffusers/__init__.py,sha256=_3FoA0uxCdFd6YK9PMaptFmR9XvdB_PcvYR1MkbGpN8,6957
7
- optimum/rbln/diffusers/modeling_diffusers.py,sha256=0Pwqw2PbLSQ6FXsXzbx44Jfz3Vm9hfSrfVRH5zcN0yk,19885
7
+ optimum/rbln/diffusers/modeling_diffusers.py,sha256=gnCsDZ6WD6N7yOfcZU8pK5rtwyh4zwSCtmR6Wa4zRG8,19913
8
8
  optimum/rbln/diffusers/configurations/__init__.py,sha256=vMRnPY4s-Uju43xP038D2EA18X_mhy2YfsZVpSU-VoA,1322
9
9
  optimum/rbln/diffusers/configurations/models/__init__.py,sha256=7q95gtgDzCeIBogGw8SLQoHT4Wch7vpLJVF2UQovuoo,567
10
10
  optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py,sha256=ySetuNq6koleFIZ542zZLTzEEyl_CTul9l12ufWlQ_Y,3218
@@ -93,8 +93,8 @@ optimum/rbln/transformers/models/blip_2/__init__.py,sha256=L01gPXcUCa8Vg-bcng20v
93
93
  optimum/rbln/transformers/models/blip_2/configuration_blip_2.py,sha256=ke75GqPU139dNOY1nm6QE661LepbD_0V9Bx1QbtHhKA,3210
94
94
  optimum/rbln/transformers/models/blip_2/modeling_blip_2.py,sha256=2sIVGrIn1B2nUZ8hw1sgW3VbJ2vxrlBRN37GgDiw0GU,16191
95
95
  optimum/rbln/transformers/models/clip/__init__.py,sha256=TLeXDqcFK6M6v9x7Xr64kBbqGu3hFHM7p754dQ8UVQc,938
96
- optimum/rbln/transformers/models/clip/configuration_clip.py,sha256=D7CIWpbMhXUrGv-CnhxRtSS3vAYb427-w7zSkfuJHEU,3455
97
- optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=QbYrt7pUWNal-p93fxmuKrHa2CPlCaq8F16qOfMAst0,8090
96
+ optimum/rbln/transformers/models/clip/configuration_clip.py,sha256=HeKqLtTKrpQbx1Gq5GRSSLvjjgXHYD0rWIUhQOZDQCY,3836
97
+ optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=knK7gINAluSHcWvg3zaByb3XRLNmSEGw2NcsOGHnIow,12364
98
98
  optimum/rbln/transformers/models/colpali/__init__.py,sha256=n3rueXT_oC0N8myoZiic0YkVK24CW5hZBPa-0L8so6Y,119
99
99
  optimum/rbln/transformers/models/colpali/colpali_architecture.py,sha256=bWG7TehWRZkTh2y6mGkpd85_onWAyiyKdaQC9TFsy3E,8065
100
100
  optimum/rbln/transformers/models/colpali/configuration_colpali.py,sha256=ieY-tuyDPObFUIJ5sfpcfuCsJ_HTAizN7ZGqirqeFRU,2636
@@ -102,7 +102,7 @@ optimum/rbln/transformers/models/colpali/modeling_colpali.py,sha256=jzvJCBrrCXSp
102
102
  optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=vQYZDDdoddwA7yKc5zzrq2Zs9sax-0p8rNF_aYfF4bk,1006
103
103
  optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py,sha256=cakn8RGo8gS3nmXdEqOfC2xUBOMGInROgLEbCOoLFR0,13398
104
104
  optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=HrI12t9X9wV_-AZuTBSs-W7c5yVUkvd0secWlI72x2A,46325
105
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=qe0icOCkPY1k1Db9gP5fXu-kHIjZV3M_tOjrs-EeULQ,53701
105
+ optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=H2Qc8sC3wMmQlkx0YCrQCAy6zKiLm6Db1DbqQvlLXPE,53921
106
106
  optimum/rbln/transformers/models/distilbert/__init__.py,sha256=zXL78SOEORTnUN_wrdoaDaYpntG8lcFHvPobM6jC0CI,841
107
107
  optimum/rbln/transformers/models/distilbert/configuration_distilbert.py,sha256=O3BW9JjyYk9PLyiofvOKEgTdMZ_jpIuPfot281pSsyg,984
108
108
  optimum/rbln/transformers/models/distilbert/modeling_distilbert.py,sha256=LUh6zYGa8AR3Yxaj3gtyJRc-czBN3qnHTc-JTAhuqY0,1099
@@ -118,9 +118,9 @@ optimum/rbln/transformers/models/gemma/configuration_gemma.py,sha256=3hAxl7LL9vF
118
118
  optimum/rbln/transformers/models/gemma/gemma_architecture.py,sha256=2Ivay8NTSHmQAqXFh9JvG6Ja5rMThcRAjYPzyipcRI8,956
119
119
  optimum/rbln/transformers/models/gemma/modeling_gemma.py,sha256=Ojvum34EhDHWfMB4D6S1BrwoTNwuBSZuBzwdnAgvq38,3095
120
120
  optimum/rbln/transformers/models/gemma3/__init__.py,sha256=6rugk3615SEt4lh7gduo_J9VyGiSReuEIvL0Uno0eaI,790
121
- optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=eupMGTHJGJNNrAZ3GE6M6GQBAQzBb7KFJvalyDmbM-A,3063
121
+ optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=LwzlThcIXlpK1PdvgaIobp3uHQgaDCiKngi1XR9hDvU,3028
122
122
  optimum/rbln/transformers/models/gemma3/gemma3_architecture.py,sha256=fpLDAXCe5paWVsfc0tL59JkRQMRF-WNgIzOIb_QpSLU,6191
123
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=iVzH7dm4ZO1raw6mznhmTkCY4UYbTzI5lm-8X7I1vN0,38528
123
+ optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=Hbh_Cfz0j_so8zCLP3ExO2lrCkyJqVnHI2Vqfpglfn4,31895
124
124
  optimum/rbln/transformers/models/gpt2/__init__.py,sha256=socBMIBZSiLbrVN12rQ4nL9gFeT0axMgz6SWaCaD4Ac,704
125
125
  optimum/rbln/transformers/models/gpt2/configuration_gpt2.py,sha256=9sS6-EGapmow3rG9ViejK9qwrqy_X86VBxQ7u9x0Yqk,923
126
126
  optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=O7hBiaFJrpLSswGwW83cX9S9Q2wKRBDrpAqOgOS7zQg,2733
@@ -133,7 +133,7 @@ optimum/rbln/transformers/models/llama/configuration_llama.py,sha256=X6SXtRXGBC8
133
133
  optimum/rbln/transformers/models/llama/llama_architecture.py,sha256=S7MCPfyjG5eUqgaS-QNBB0ApUD6wnb5fR0RHq7k7-pA,728
134
134
  optimum/rbln/transformers/models/llama/modeling_llama.py,sha256=G91Yd8BfhiD8GT5bF46ZIHuc-ajGC-PO-mOQN3BhE1A,3122
135
135
  optimum/rbln/transformers/models/llava_next/__init__.py,sha256=kDXKr7wMkp1XqE__DER2B8kQF_NYMxhzsQS5ytGg56I,752
136
- optimum/rbln/transformers/models/llava_next/configuration_llava_next.py,sha256=b3roSXoAApUXk8dI4X__f3mWUnHcZDI1GAY34ckyYc4,2170
136
+ optimum/rbln/transformers/models/llava_next/configuration_llava_next.py,sha256=U6_DQoaXugN2Bc4ntUb7WkelbNmw1L4VbgqsMRVuuE4,2776
137
137
  optimum/rbln/transformers/models/llava_next/modeling_llava_next.py,sha256=paYtCk58--FSZp8xjVrfZAxkJxO02X-jxaVPqL-l7ZU,27421
138
138
  optimum/rbln/transformers/models/midm/__init__.py,sha256=IC3FETwgYinbp3wDj7tp4zIHJhbqM-c6GfTRdYcMNj8,913
139
139
  optimum/rbln/transformers/models/midm/configuration_midm.py,sha256=DxhcSJlApxfi00XxYmSkKZ6bY9vfLXT0zh-oMKkZot0,1365
@@ -202,14 +202,14 @@ optimum/rbln/transformers/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
202
202
  optimum/rbln/transformers/utils/rbln_quantization.py,sha256=um0N2ZruU_lNibo6rlzgwHAI2_8QOrYE7W7pA1qfXKM,9396
203
203
  optimum/rbln/utils/__init__.py,sha256=ieDBT2VFTt2E0M4v_POLBpuGW9LxSydpb_DuPd6PQqc,712
204
204
  optimum/rbln/utils/decorator_utils.py,sha256=xu-TrsNi33SRC2a7DBsyoo6-pEQxWKZPZSmM9QlDe2Y,3745
205
- optimum/rbln/utils/hub.py,sha256=Z_R9Ic9VAew8bUmlaAlxZf5JGMDBivHvvFRI557pILY,4196
205
+ optimum/rbln/utils/hub.py,sha256=FPBGslHJAMeyfBID3viLmh51xJzcR29xWtYtMN8y2CI,2765
206
206
  optimum/rbln/utils/import_utils.py,sha256=fpOERIIxXm-cDYGn1NN6c7aWDPQYVitPQW2MiyZ9NEY,5471
207
207
  optimum/rbln/utils/logging.py,sha256=VKKBmlQSdg6iZCGmAXaWYiW67K84jyp1QJhLQSSjPPE,3453
208
208
  optimum/rbln/utils/model_utils.py,sha256=4k5879Kh75m3x_vS4-qOGfqsOiAvc2kdNFFfvsFvz3k,1748
209
209
  optimum/rbln/utils/runtime_utils.py,sha256=nIJioiN16nAyAzoArKjsy5ocLUsrr0UEy4f3LNT82SA,7961
210
210
  optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
211
211
  optimum/rbln/utils/submodule.py,sha256=w5mgPgncI740gVKMu3S-69DGNdUSI0bTZxegQGcZ98Y,5011
212
- optimum_rbln-0.8.2a2.dist-info/METADATA,sha256=ncxtO1-Q-UrBAF4eatV3ZKNIpdpj5nwFNmHdxvJ0c3U,5299
213
- optimum_rbln-0.8.2a2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
214
- optimum_rbln-0.8.2a2.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
215
- optimum_rbln-0.8.2a2.dist-info/RECORD,,
212
+ optimum_rbln-0.8.2a3.dist-info/METADATA,sha256=T21cDk1oAC5ZWwCMO7fRsBL7BlrhaW_0iKHERjpQr1E,5299
213
+ optimum_rbln-0.8.2a3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
214
+ optimum_rbln-0.8.2a3.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
215
+ optimum_rbln-0.8.2a3.dist-info/RECORD,,