optimum-rbln 0.7.4a5__py3-none-any.whl → 0.7.4a6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
optimum/rbln/__init__.py CHANGED
@@ -86,6 +86,10 @@ _import_structure = {
86
86
  "RBLNPhiForCausalLMConfig",
87
87
  "RBLNQwen2ForCausalLM",
88
88
  "RBLNQwen2ForCausalLMConfig",
89
+ "RBLNQwen2_5_VisionTransformerPretrainedModel",
90
+ "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
91
+ "RBLNQwen2_5_VLForConditionalGeneration",
92
+ "RBLNQwen2_5_VLForConditionalGenerationConfig",
89
93
  "RBLNResNetForImageClassification",
90
94
  "RBLNResNetForImageClassificationConfig",
91
95
  "RBLNRobertaForMaskedLM",
@@ -287,6 +291,10 @@ if TYPE_CHECKING:
287
291
  RBLNMistralForCausalLMConfig,
288
292
  RBLNPhiForCausalLM,
289
293
  RBLNPhiForCausalLMConfig,
294
+ RBLNQwen2_5_VisionTransformerPretrainedModel,
295
+ RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
296
+ RBLNQwen2_5_VLForConditionalGeneration,
297
+ RBLNQwen2_5_VLForConditionalGenerationConfig,
290
298
  RBLNQwen2ForCausalLM,
291
299
  RBLNQwen2ForCausalLMConfig,
292
300
  RBLNResNetForImageClassification,
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.7.4a5'
21
- __version_tuple__ = version_tuple = (0, 7, 4)
20
+ __version__ = version = '0.7.4a6'
21
+ __version_tuple__ = version_tuple = (0, 7, 4, 'a6')
@@ -314,10 +314,15 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
314
314
  )
315
315
 
316
316
  except rebel.core.exception.RBLNRuntimeError as e:
317
- logger.warning(
318
- f"Failed to create the runtime for the model due to a runtime error: {e.__class__.__name__} - {e}"
317
+ error_msg = (
318
+ f"\nFailed to create RBLN runtime: {str(e)}\n\n"
319
+ f"If you only need to compile the model without loading it to NPU, you can use:\n"
320
+ f" from_pretrained(..., rbln_create_runtimes=False) or\n"
321
+ f" from_pretrained(..., rbln_config={{..., 'create_runtimes': False}})\n\n"
322
+ f"To check your NPU status, run the 'rbln-stat' command in your terminal.\n"
323
+ f"Make sure your NPU is properly installed and operational."
319
324
  )
320
- models = UnavailableRuntime()
325
+ raise rebel.core.exception.RBLNRuntimeError(error_msg) from e
321
326
 
322
327
  return cls(
323
328
  models,
@@ -423,6 +428,20 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
423
428
  def to(self, *args, **kwargs):
424
429
  return self
425
430
 
431
+ def parameters(self):
432
+ """
433
+ Provides a dummy parameter generator for compatibility.
434
+
435
+ This method mimics the interface of torch.nn.Module.parameters()
436
+ specifically for code that uses `next(model.parameters())` to infer
437
+ the device or dtype. It yields a single dummy tensor on CPU with float32 dtype.
438
+
439
+ Warning:
440
+ This does NOT yield the actual model parameters used by the RBLN runtime.
441
+ Code relying on iterating through all model parameters will not work as expected.
442
+ """
443
+ yield torch.tensor([1.0], dtype=torch.float32, device=torch.device("cpu"))
444
+
426
445
  def __call__(self, *args, **kwargs):
427
446
  return self.forward(*args, **kwargs)
428
447
 
@@ -80,6 +80,10 @@ _import_structure = {
80
80
  "RBLNPhiForCausalLMConfig",
81
81
  "RBLNQwen2ForCausalLM",
82
82
  "RBLNQwen2ForCausalLMConfig",
83
+ "RBLNQwen2_5_VisionTransformerPretrainedModel",
84
+ "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
85
+ "RBLNQwen2_5_VLForConditionalGeneration",
86
+ "RBLNQwen2_5_VLForConditionalGenerationConfig",
83
87
  "RBLNT5EncoderModel",
84
88
  "RBLNT5EncoderModelConfig",
85
89
  "RBLNT5ForConditionalGeneration",
@@ -175,6 +179,10 @@ if TYPE_CHECKING:
175
179
  RBLNMistralForCausalLMConfig,
176
180
  RBLNPhiForCausalLM,
177
181
  RBLNPhiForCausalLMConfig,
182
+ RBLNQwen2_5_VisionTransformerPretrainedModel,
183
+ RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
184
+ RBLNQwen2_5_VLForConditionalGeneration,
185
+ RBLNQwen2_5_VLForConditionalGenerationConfig,
178
186
  RBLNQwen2ForCausalLM,
179
187
  RBLNQwen2ForCausalLMConfig,
180
188
  RBLNT5EncoderModel,
@@ -56,6 +56,12 @@ _import_structure = {
56
56
  "RBLNCLIPVisionModelWithProjection",
57
57
  "RBLNCLIPVisionModelWithProjectionConfig",
58
58
  ],
59
+ "qwen2_5_vl": [
60
+ "RBLNQwen2_5_VisionTransformerPretrainedModel",
61
+ "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
62
+ "RBLNQwen2_5_VLForConditionalGeneration",
63
+ "RBLNQwen2_5_VLForConditionalGenerationConfig",
64
+ ],
59
65
  "decoderonly": [
60
66
  "RBLNDecoderOnlyModelForCausalLM",
61
67
  "RBLNDecoderOnlyModelForCausalLMConfig",
@@ -144,6 +150,12 @@ if TYPE_CHECKING:
144
150
  from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig
145
151
  from .phi import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig
146
152
  from .qwen2 import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig
153
+ from .qwen2_5_vl import (
154
+ RBLNQwen2_5_VisionTransformerPretrainedModel,
155
+ RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
156
+ RBLNQwen2_5_VLForConditionalGeneration,
157
+ RBLNQwen2_5_VLForConditionalGenerationConfig,
158
+ )
147
159
  from .t5 import (
148
160
  RBLNT5EncoderModel,
149
161
  RBLNT5EncoderModelConfig,
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import math
16
- from typing import List, Optional, Tuple
16
+ from typing import List, Optional, Tuple, Union
17
17
 
18
18
  import torch
19
19
  from torch import nn
@@ -220,6 +220,53 @@ class DecoderOnlyWrapper(nn.Module):
220
220
  self._phase = phase
221
221
  self.causal_lm.phase = phase
222
222
 
223
+ def forward_common(
224
+ self,
225
+ input_ids_or_inputs_embeds: torch.Tensor,
226
+ cache_position: torch.Tensor,
227
+ attention_mask: torch.Tensor,
228
+ query_position: torch.Tensor,
229
+ block_tables: torch.Tensor,
230
+ rotary_emb: Union[nn.Module, torch.Tensor],
231
+ *past_key_values: List[torch.Tensor],
232
+ ):
233
+ if input_ids_or_inputs_embeds.ndim == 2:
234
+ input_ids = input_ids_or_inputs_embeds
235
+ inputs_embeds = None
236
+ elif input_ids_or_inputs_embeds.ndim == 3:
237
+ input_ids = None
238
+ inputs_embeds = input_ids_or_inputs_embeds
239
+ else:
240
+ raise NotImplementedError(f"Unknown ndim of input : {input_ids_or_inputs_embeds.ndim}")
241
+
242
+ if len(past_key_values) != 2 * self.num_hidden_layers:
243
+ raise ValueError(
244
+ f"Different past_key_values to model's config. {len(past_key_values)} != {2 * self.num_hidden_layers}"
245
+ )
246
+
247
+ # [key, value] * n_layer -> ( (key, value) ) * n_layer
248
+ # cache shape : batch, n_heads, 1, max_seq_len, head_dim
249
+ _past_key_values = []
250
+ for i in range(self.config.num_hidden_layers):
251
+ key_states = past_key_values[i * 2]
252
+ value_states = past_key_values[i * 2 + 1]
253
+ past_key_value = [key_states, value_states]
254
+ _past_key_values.append(past_key_value)
255
+ past_key_values = _past_key_values
256
+
257
+ logit = self.causal_lm(
258
+ input_ids=input_ids,
259
+ inputs_embeds=inputs_embeds,
260
+ attention_mask=attention_mask,
261
+ cache_position=cache_position,
262
+ query_position=query_position,
263
+ past_key_values=past_key_values,
264
+ rotary_emb=rotary_emb,
265
+ block_tables=block_tables,
266
+ )
267
+
268
+ return logit
269
+
223
270
  def forward(self, *args):
224
271
  if self.phase == "decode":
225
272
  if self.use_attention_mask:
@@ -262,43 +309,16 @@ class DecoderOnlyWrapper(nn.Module):
262
309
  else:
263
310
  raise ValueError(f"Unknown phase: {self.phase}")
264
311
 
265
- if input_ids_or_inputs_embeds.ndim == 2:
266
- input_ids = input_ids_or_inputs_embeds
267
- inputs_embeds = None
268
- elif input_ids_or_inputs_embeds.ndim == 3:
269
- input_ids = None
270
- inputs_embeds = input_ids_or_inputs_embeds
271
- else:
272
- raise NotImplementedError(f"Unknown ndim of input : {input_ids_or_inputs_embeds.ndim}")
273
-
274
- if len(past_key_values) != 2 * self.num_hidden_layers:
275
- raise ValueError(
276
- f"Different past_key_values to model's config. {len(past_key_values)} != {2 * self.num_hidden_layers}"
277
- )
278
-
279
- # [key, value] * n_layer -> ( (key, value) ) * n_layer
280
- # cache shape : batch, n_heads, 1, max_seq_len, head_dim
281
- _past_key_values = []
282
- for i in range(self.config.num_hidden_layers):
283
- key_states = past_key_values[i * 2]
284
- value_states = past_key_values[i * 2 + 1]
285
- past_key_value = [key_states, value_states]
286
- _past_key_values.append(past_key_value)
287
- past_key_values = _past_key_values
288
-
289
- logit = self.causal_lm(
290
- input_ids=input_ids,
291
- inputs_embeds=inputs_embeds,
292
- attention_mask=attention_mask,
293
- cache_position=cache_position,
294
- query_position=query_position,
295
- past_key_values=past_key_values,
296
- rotary_emb=self.rotary_emb,
297
- block_tables=block_tables,
312
+ return self.forward_common(
313
+ input_ids_or_inputs_embeds,
314
+ cache_position,
315
+ attention_mask,
316
+ query_position,
317
+ block_tables,
318
+ self.rotary_emb,
319
+ *past_key_values,
298
320
  )
299
321
 
300
- return logit
301
-
302
322
 
303
323
  class DecoderOnlyForCausalLM(nn.Module):
304
324
  """A specialized wrapper for Causal Language Models optimized for RBLN compilation.
@@ -322,12 +342,13 @@ class DecoderOnlyForCausalLM(nn.Module):
322
342
  _phase: Current processing phase ("prefill" or "decode")
323
343
  """
324
344
 
325
- def __init__(self, causal_lm: PreTrainedModel, model):
345
+ def __init__(self, causal_lm: PreTrainedModel, model: nn.Module):
326
346
  super().__init__()
327
347
  self.config = causal_lm.config
328
348
  self._original_mod = causal_lm
329
349
  self.model = model
330
350
  self._phase = "prefill"
351
+ self.lm_head = self._original_mod.lm_head
331
352
 
332
353
  @property
333
354
  def phase(self):
@@ -363,7 +384,7 @@ class DecoderOnlyForCausalLM(nn.Module):
363
384
  if self.phase == "prefill":
364
385
  hidden_states = hidden_states[:, query_position.to(torch.int).unsqueeze(0)]
365
386
 
366
- logits = self._original_mod.lm_head(hidden_states)
387
+ logits = self.lm_head(hidden_states)
367
388
  return logits
368
389
 
369
390
 
@@ -455,8 +476,12 @@ class DecoderOnlyModel(nn.Module):
455
476
 
456
477
  # get cos,sin vector if needed
457
478
  if rotary_emb is not None:
458
- cos, sin = rotary_emb(hidden_states, self.max_seq_len) # dtype carrier, max_seq_len
459
- cos, sin = slice_and_unsqueeze_cos_sin(cos, sin, cache_position)
479
+ if isinstance(rotary_emb, torch.Tensor):
480
+ cos = rotary_emb[0]
481
+ sin = rotary_emb[1]
482
+ else:
483
+ cos, sin = rotary_emb(hidden_states, self.max_seq_len) # dtype carrier, max_seq_len
484
+ cos, sin = slice_and_unsqueeze_cos_sin(cos, sin, cache_position)
460
485
  else:
461
486
  batch_size = inputs_embeds.shape[0]
462
487
  if cache_position.shape[0] > 1:
@@ -833,7 +858,6 @@ def rotate_half(x):
833
858
 
834
859
  def apply_rotary_pos_emb(q, k, cos, sin):
835
860
  """Applies Rotary Position Embedding to the query and key tensors."""
836
-
837
861
  q_embed = (q * cos) + (rotate_half(q) * sin)
838
862
  k_embed = (k * cos) + (rotate_half(k) * sin)
839
863
  return q_embed, k_embed