optimum-rbln 0.7.4a5__py3-none-any.whl → 0.7.4a7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
optimum/rbln/__init__.py CHANGED
@@ -74,6 +74,10 @@ _import_structure = {
74
74
  "RBLNGemmaForCausalLMConfig",
75
75
  "RBLNGPT2LMHeadModel",
76
76
  "RBLNGPT2LMHeadModelConfig",
77
+ "RBLNIdefics3VisionTransformer",
78
+ "RBLNIdefics3ForConditionalGeneration",
79
+ "RBLNIdefics3ForConditionalGenerationConfig",
80
+ "RBLNIdefics3VisionTransformerConfig",
77
81
  "RBLNLlamaForCausalLM",
78
82
  "RBLNLlamaForCausalLMConfig",
79
83
  "RBLNLlavaNextForConditionalGeneration",
@@ -86,6 +90,10 @@ _import_structure = {
86
90
  "RBLNPhiForCausalLMConfig",
87
91
  "RBLNQwen2ForCausalLM",
88
92
  "RBLNQwen2ForCausalLMConfig",
93
+ "RBLNQwen2_5_VisionTransformerPretrainedModel",
94
+ "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
95
+ "RBLNQwen2_5_VLForConditionalGeneration",
96
+ "RBLNQwen2_5_VLForConditionalGenerationConfig",
89
97
  "RBLNResNetForImageClassification",
90
98
  "RBLNResNetForImageClassificationConfig",
91
99
  "RBLNRobertaForMaskedLM",
@@ -277,6 +285,10 @@ if TYPE_CHECKING:
277
285
  RBLNGemmaForCausalLMConfig,
278
286
  RBLNGPT2LMHeadModel,
279
287
  RBLNGPT2LMHeadModelConfig,
288
+ RBLNIdefics3ForConditionalGeneration,
289
+ RBLNIdefics3ForConditionalGenerationConfig,
290
+ RBLNIdefics3VisionTransformer,
291
+ RBLNIdefics3VisionTransformerConfig,
280
292
  RBLNLlamaForCausalLM,
281
293
  RBLNLlamaForCausalLMConfig,
282
294
  RBLNLlavaNextForConditionalGeneration,
@@ -287,6 +299,10 @@ if TYPE_CHECKING:
287
299
  RBLNMistralForCausalLMConfig,
288
300
  RBLNPhiForCausalLM,
289
301
  RBLNPhiForCausalLMConfig,
302
+ RBLNQwen2_5_VisionTransformerPretrainedModel,
303
+ RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
304
+ RBLNQwen2_5_VLForConditionalGeneration,
305
+ RBLNQwen2_5_VLForConditionalGenerationConfig,
290
306
  RBLNQwen2ForCausalLM,
291
307
  RBLNQwen2ForCausalLMConfig,
292
308
  RBLNResNetForImageClassification,
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.7.4a5'
21
- __version_tuple__ = version_tuple = (0, 7, 4)
20
+ __version__ = version = '0.7.4a7'
21
+ __version_tuple__ = version_tuple = (0, 7, 4, 'a7')
@@ -314,10 +314,15 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
314
314
  )
315
315
 
316
316
  except rebel.core.exception.RBLNRuntimeError as e:
317
- logger.warning(
318
- f"Failed to create the runtime for the model due to a runtime error: {e.__class__.__name__} - {e}"
317
+ error_msg = (
318
+ f"\nFailed to create RBLN runtime: {str(e)}\n\n"
319
+ f"If you only need to compile the model without loading it to NPU, you can use:\n"
320
+ f" from_pretrained(..., rbln_create_runtimes=False) or\n"
321
+ f" from_pretrained(..., rbln_config={{..., 'create_runtimes': False}})\n\n"
322
+ f"To check your NPU status, run the 'rbln-stat' command in your terminal.\n"
323
+ f"Make sure your NPU is properly installed and operational."
319
324
  )
320
- models = UnavailableRuntime()
325
+ raise rebel.core.exception.RBLNRuntimeError(error_msg) from e
321
326
 
322
327
  return cls(
323
328
  models,
@@ -423,6 +428,20 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
423
428
  def to(self, *args, **kwargs):
424
429
  return self
425
430
 
431
+ def parameters(self):
432
+ """
433
+ Provides a dummy parameter generator for compatibility.
434
+
435
+ This method mimics the interface of torch.nn.Module.parameters()
436
+ specifically for code that uses `next(model.parameters())` to infer
437
+ the device or dtype. It yields a single dummy tensor on CPU with float32 dtype.
438
+
439
+ Warning:
440
+ This does NOT yield the actual model parameters used by the RBLN runtime.
441
+ Code relying on iterating through all model parameters will not work as expected.
442
+ """
443
+ yield torch.tensor([1.0], dtype=torch.float32, device=torch.device("cpu"))
444
+
426
445
  def __call__(self, *args, **kwargs):
427
446
  return self.forward(*args, **kwargs)
428
447
 
@@ -68,6 +68,10 @@ _import_structure = {
68
68
  "RBLNGemmaForCausalLMConfig",
69
69
  "RBLNGPT2LMHeadModel",
70
70
  "RBLNGPT2LMHeadModelConfig",
71
+ "RBLNIdefics3VisionTransformer",
72
+ "RBLNIdefics3ForConditionalGeneration",
73
+ "RBLNIdefics3ForConditionalGenerationConfig",
74
+ "RBLNIdefics3VisionTransformerConfig",
71
75
  "RBLNLlamaForCausalLM",
72
76
  "RBLNLlamaForCausalLMConfig",
73
77
  "RBLNLlavaNextForConditionalGeneration",
@@ -80,6 +84,10 @@ _import_structure = {
80
84
  "RBLNPhiForCausalLMConfig",
81
85
  "RBLNQwen2ForCausalLM",
82
86
  "RBLNQwen2ForCausalLMConfig",
87
+ "RBLNQwen2_5_VisionTransformerPretrainedModel",
88
+ "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
89
+ "RBLNQwen2_5_VLForConditionalGeneration",
90
+ "RBLNQwen2_5_VLForConditionalGenerationConfig",
83
91
  "RBLNT5EncoderModel",
84
92
  "RBLNT5EncoderModelConfig",
85
93
  "RBLNT5ForConditionalGeneration",
@@ -165,6 +173,10 @@ if TYPE_CHECKING:
165
173
  RBLNGemmaForCausalLMConfig,
166
174
  RBLNGPT2LMHeadModel,
167
175
  RBLNGPT2LMHeadModelConfig,
176
+ RBLNIdefics3ForConditionalGeneration,
177
+ RBLNIdefics3ForConditionalGenerationConfig,
178
+ RBLNIdefics3VisionTransformer,
179
+ RBLNIdefics3VisionTransformerConfig,
168
180
  RBLNLlamaForCausalLM,
169
181
  RBLNLlamaForCausalLMConfig,
170
182
  RBLNLlavaNextForConditionalGeneration,
@@ -175,6 +187,10 @@ if TYPE_CHECKING:
175
187
  RBLNMistralForCausalLMConfig,
176
188
  RBLNPhiForCausalLM,
177
189
  RBLNPhiForCausalLMConfig,
190
+ RBLNQwen2_5_VisionTransformerPretrainedModel,
191
+ RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
192
+ RBLNQwen2_5_VLForConditionalGeneration,
193
+ RBLNQwen2_5_VLForConditionalGenerationConfig,
178
194
  RBLNQwen2ForCausalLM,
179
195
  RBLNQwen2ForCausalLMConfig,
180
196
  RBLNT5EncoderModel,
@@ -56,6 +56,12 @@ _import_structure = {
56
56
  "RBLNCLIPVisionModelWithProjection",
57
57
  "RBLNCLIPVisionModelWithProjectionConfig",
58
58
  ],
59
+ "qwen2_5_vl": [
60
+ "RBLNQwen2_5_VisionTransformerPretrainedModel",
61
+ "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
62
+ "RBLNQwen2_5_VLForConditionalGeneration",
63
+ "RBLNQwen2_5_VLForConditionalGenerationConfig",
64
+ ],
59
65
  "decoderonly": [
60
66
  "RBLNDecoderOnlyModelForCausalLM",
61
67
  "RBLNDecoderOnlyModelForCausalLMConfig",
@@ -67,6 +73,12 @@ _import_structure = {
67
73
  "exaone": ["RBLNExaoneForCausalLM", "RBLNExaoneForCausalLMConfig"],
68
74
  "gemma": ["RBLNGemmaForCausalLM", "RBLNGemmaForCausalLMConfig"],
69
75
  "gpt2": ["RBLNGPT2LMHeadModel", "RBLNGPT2LMHeadModelConfig"],
76
+ "idefics3": [
77
+ "RBLNIdefics3VisionTransformer",
78
+ "RBLNIdefics3ForConditionalGeneration",
79
+ "RBLNIdefics3ForConditionalGenerationConfig",
80
+ "RBLNIdefics3VisionTransformerConfig",
81
+ ],
70
82
  "llama": ["RBLNLlamaForCausalLM", "RBLNLlamaForCausalLMConfig"],
71
83
  "llava_next": ["RBLNLlavaNextForConditionalGeneration", "RBLNLlavaNextForConditionalGenerationConfig"],
72
84
  "midm": ["RBLNMidmLMHeadModel", "RBLNMidmLMHeadModelConfig"],
@@ -138,12 +150,24 @@ if TYPE_CHECKING:
138
150
  from .exaone import RBLNExaoneForCausalLM, RBLNExaoneForCausalLMConfig
139
151
  from .gemma import RBLNGemmaForCausalLM, RBLNGemmaForCausalLMConfig
140
152
  from .gpt2 import RBLNGPT2LMHeadModel, RBLNGPT2LMHeadModelConfig
153
+ from .idefics3 import (
154
+ RBLNIdefics3ForConditionalGeneration,
155
+ RBLNIdefics3ForConditionalGenerationConfig,
156
+ RBLNIdefics3VisionTransformer,
157
+ RBLNIdefics3VisionTransformerConfig,
158
+ )
141
159
  from .llama import RBLNLlamaForCausalLM, RBLNLlamaForCausalLMConfig
142
160
  from .llava_next import RBLNLlavaNextForConditionalGeneration, RBLNLlavaNextForConditionalGenerationConfig
143
161
  from .midm import RBLNMidmLMHeadModel, RBLNMidmLMHeadModelConfig
144
162
  from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig
145
163
  from .phi import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig
146
164
  from .qwen2 import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig
165
+ from .qwen2_5_vl import (
166
+ RBLNQwen2_5_VisionTransformerPretrainedModel,
167
+ RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
168
+ RBLNQwen2_5_VLForConditionalGeneration,
169
+ RBLNQwen2_5_VLForConditionalGenerationConfig,
170
+ )
147
171
  from .t5 import (
148
172
  RBLNT5EncoderModel,
149
173
  RBLNT5EncoderModelConfig,
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import math
16
- from typing import List, Optional, Tuple
16
+ from typing import List, Optional, Tuple, Union
17
17
 
18
18
  import torch
19
19
  from torch import nn
@@ -184,6 +184,7 @@ class DecoderOnlyWrapper(nn.Module):
184
184
 
185
185
  def convert_to_rbln_causal_lm(self, causal_lm: PreTrainedModel, max_seq_len: int):
186
186
  new_layers = []
187
+
187
188
  for layer in causal_lm.model.layers:
188
189
  if self.attn_impl == "eager":
189
190
  new_self_attn = DecoderOnlyAttention(
@@ -201,6 +202,7 @@ class DecoderOnlyWrapper(nn.Module):
201
202
 
202
203
  new_layer = DecoderOnlyLayer(layer, new_self_attn)
203
204
  new_layers.append(new_layer)
205
+
204
206
  new_model = DecoderOnlyModel(
205
207
  causal_lm.model,
206
208
  new_layers,
@@ -220,6 +222,53 @@ class DecoderOnlyWrapper(nn.Module):
220
222
  self._phase = phase
221
223
  self.causal_lm.phase = phase
222
224
 
225
+ def forward_common(
226
+ self,
227
+ input_ids_or_inputs_embeds: torch.Tensor,
228
+ cache_position: torch.Tensor,
229
+ attention_mask: torch.Tensor,
230
+ query_position: torch.Tensor,
231
+ block_tables: torch.Tensor,
232
+ rotary_emb: Union[nn.Module, torch.Tensor],
233
+ *past_key_values: List[torch.Tensor],
234
+ ):
235
+ if input_ids_or_inputs_embeds.ndim == 2:
236
+ input_ids = input_ids_or_inputs_embeds
237
+ inputs_embeds = None
238
+ elif input_ids_or_inputs_embeds.ndim == 3:
239
+ input_ids = None
240
+ inputs_embeds = input_ids_or_inputs_embeds
241
+ else:
242
+ raise NotImplementedError(f"Unknown ndim of input : {input_ids_or_inputs_embeds.ndim}")
243
+
244
+ if len(past_key_values) != 2 * self.num_hidden_layers:
245
+ raise ValueError(
246
+ f"Different past_key_values to model's config. {len(past_key_values)} != {2 * self.num_hidden_layers}"
247
+ )
248
+
249
+ # [key, value] * n_layer -> ( (key, value) ) * n_layer
250
+ # cache shape : batch, n_heads, 1, max_seq_len, head_dim
251
+ _past_key_values = []
252
+ for i in range(self.config.num_hidden_layers):
253
+ key_states = past_key_values[i * 2]
254
+ value_states = past_key_values[i * 2 + 1]
255
+ past_key_value = [key_states, value_states]
256
+ _past_key_values.append(past_key_value)
257
+ past_key_values = _past_key_values
258
+
259
+ logit = self.causal_lm(
260
+ input_ids=input_ids,
261
+ inputs_embeds=inputs_embeds,
262
+ attention_mask=attention_mask,
263
+ cache_position=cache_position,
264
+ query_position=query_position,
265
+ past_key_values=past_key_values,
266
+ rotary_emb=rotary_emb,
267
+ block_tables=block_tables,
268
+ )
269
+
270
+ return logit
271
+
223
272
  def forward(self, *args):
224
273
  if self.phase == "decode":
225
274
  if self.use_attention_mask:
@@ -262,43 +311,16 @@ class DecoderOnlyWrapper(nn.Module):
262
311
  else:
263
312
  raise ValueError(f"Unknown phase: {self.phase}")
264
313
 
265
- if input_ids_or_inputs_embeds.ndim == 2:
266
- input_ids = input_ids_or_inputs_embeds
267
- inputs_embeds = None
268
- elif input_ids_or_inputs_embeds.ndim == 3:
269
- input_ids = None
270
- inputs_embeds = input_ids_or_inputs_embeds
271
- else:
272
- raise NotImplementedError(f"Unknown ndim of input : {input_ids_or_inputs_embeds.ndim}")
273
-
274
- if len(past_key_values) != 2 * self.num_hidden_layers:
275
- raise ValueError(
276
- f"Different past_key_values to model's config. {len(past_key_values)} != {2 * self.num_hidden_layers}"
277
- )
278
-
279
- # [key, value] * n_layer -> ( (key, value) ) * n_layer
280
- # cache shape : batch, n_heads, 1, max_seq_len, head_dim
281
- _past_key_values = []
282
- for i in range(self.config.num_hidden_layers):
283
- key_states = past_key_values[i * 2]
284
- value_states = past_key_values[i * 2 + 1]
285
- past_key_value = [key_states, value_states]
286
- _past_key_values.append(past_key_value)
287
- past_key_values = _past_key_values
288
-
289
- logit = self.causal_lm(
290
- input_ids=input_ids,
291
- inputs_embeds=inputs_embeds,
292
- attention_mask=attention_mask,
293
- cache_position=cache_position,
294
- query_position=query_position,
295
- past_key_values=past_key_values,
296
- rotary_emb=self.rotary_emb,
297
- block_tables=block_tables,
314
+ return self.forward_common(
315
+ input_ids_or_inputs_embeds,
316
+ cache_position,
317
+ attention_mask,
318
+ query_position,
319
+ block_tables,
320
+ self.rotary_emb,
321
+ *past_key_values,
298
322
  )
299
323
 
300
- return logit
301
-
302
324
 
303
325
  class DecoderOnlyForCausalLM(nn.Module):
304
326
  """A specialized wrapper for Causal Language Models optimized for RBLN compilation.
@@ -322,12 +344,13 @@ class DecoderOnlyForCausalLM(nn.Module):
322
344
  _phase: Current processing phase ("prefill" or "decode")
323
345
  """
324
346
 
325
- def __init__(self, causal_lm: PreTrainedModel, model):
347
+ def __init__(self, causal_lm: PreTrainedModel, model: nn.Module):
326
348
  super().__init__()
327
349
  self.config = causal_lm.config
328
350
  self._original_mod = causal_lm
329
351
  self.model = model
330
352
  self._phase = "prefill"
353
+ self.lm_head = self._original_mod.lm_head
331
354
 
332
355
  @property
333
356
  def phase(self):
@@ -363,7 +386,7 @@ class DecoderOnlyForCausalLM(nn.Module):
363
386
  if self.phase == "prefill":
364
387
  hidden_states = hidden_states[:, query_position.to(torch.int).unsqueeze(0)]
365
388
 
366
- logits = self._original_mod.lm_head(hidden_states)
389
+ logits = self.lm_head(hidden_states)
367
390
  return logits
368
391
 
369
392
 
@@ -455,8 +478,12 @@ class DecoderOnlyModel(nn.Module):
455
478
 
456
479
  # get cos,sin vector if needed
457
480
  if rotary_emb is not None:
458
- cos, sin = rotary_emb(hidden_states, self.max_seq_len) # dtype carrier, max_seq_len
459
- cos, sin = slice_and_unsqueeze_cos_sin(cos, sin, cache_position)
481
+ if isinstance(rotary_emb, torch.Tensor):
482
+ cos = rotary_emb[0]
483
+ sin = rotary_emb[1]
484
+ else:
485
+ cos, sin = rotary_emb(hidden_states, self.max_seq_len) # dtype carrier, max_seq_len
486
+ cos, sin = slice_and_unsqueeze_cos_sin(cos, sin, cache_position)
460
487
  else:
461
488
  batch_size = inputs_embeds.shape[0]
462
489
  if cache_position.shape[0] > 1:
@@ -833,7 +860,6 @@ def rotate_half(x):
833
860
 
834
861
  def apply_rotary_pos_emb(q, k, cos, sin):
835
862
  """Applies Rotary Position Embedding to the query and key tensors."""
836
-
837
863
  q_embed = (q * cos) + (rotate_half(q) * sin)
838
864
  k_embed = (k * cos) + (rotate_half(k) * sin)
839
865
  return q_embed, k_embed