optimum-rbln 0.8.3rc0__py3-none-any.whl → 0.8.4a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of optimum-rbln might be problematic. Click here for more details.

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.8.3rc0'
32
- __version_tuple__ = version_tuple = (0, 8, 3, 'rc0')
31
+ __version__ = version = '0.8.4a1'
32
+ __version_tuple__ = version_tuple = (0, 8, 4, 'a1')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -248,9 +248,6 @@ class RBLNAutoConfig:
248
248
  if key[5:] not in RUNTIME_KEYWORDS and key[5:] not in cls.submodules
249
249
  }
250
250
 
251
- if len(rbln_kwargs) > 0:
252
- raise ValueError(f"Cannot set the following arguments: {list(rbln_kwargs.keys())}")
253
-
254
251
  # Process submodule's rbln_config
255
252
  for submodule in cls.submodules:
256
253
  if submodule not in config_file:
@@ -265,6 +262,16 @@ class RBLNAutoConfig:
265
262
 
266
263
  config_file.update(rbln_runtime_kwargs)
267
264
 
265
+ rbln_config = cls(**config_file)
266
+
267
+ if len(rbln_kwargs) > 0:
268
+ for key, value in rbln_kwargs.items():
269
+ if getattr(rbln_config, key) != value:
270
+ raise ValueError(
271
+ f"Cannot set the following arguments: {list(rbln_kwargs.keys())} "
272
+ f"Since the value is already set to {getattr(rbln_config, key)}"
273
+ )
274
+
268
275
  if return_unused_kwargs:
269
276
  return cls(**config_file), kwargs
270
277
  else:
@@ -476,6 +483,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
476
483
  non_save_attributes = [
477
484
  "_frozen",
478
485
  "_runtime_options",
486
+ "torch_dtype",
479
487
  "npu",
480
488
  "tensor_parallel_size",
481
489
  "create_runtimes",
@@ -566,6 +574,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
566
574
  tensor_parallel_size: Optional[int] = None,
567
575
  timeout: Optional[int] = None,
568
576
  optimum_rbln_version: Optional[str] = None,
577
+ _torch_dtype: Optional[str] = None,
569
578
  _compile_cfgs: List[RBLNCompileConfig] = [],
570
579
  **kwargs: Any,
571
580
  ):
@@ -583,6 +592,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
583
592
  tensor_parallel_size (Optional[int]): Size for tensor parallelism to distribute the model across devices.
584
593
  timeout (Optional[int]): The timeout for the runtime in seconds. If it isn't provided, it will be set to 60 by default.
585
594
  optimum_rbln_version (Optional[str]): The optimum-rbln version used for this configuration.
595
+ _torch_dtype (Optional[str]): The data type to use for the model.
586
596
  _compile_cfgs (List[RBLNCompileConfig]): List of compilation configurations for the model.
587
597
  **kwargs: Additional keyword arguments.
588
598
 
@@ -610,6 +620,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
610
620
  self.npu = npu
611
621
  self.tensor_parallel_size = tensor_parallel_size
612
622
 
623
+ self._torch_dtype = _torch_dtype or "float32"
613
624
  self.optimum_rbln_version = optimum_rbln_version
614
625
  if self.optimum_rbln_version is None:
615
626
  self.optimum_rbln_version = __version__
@@ -639,6 +650,17 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
639
650
 
640
651
  raise ValueError(f"Unexpected arguments: {kwargs.keys()}")
641
652
 
653
+ @property
654
+ def torch_dtype(self):
655
+ return getattr(torch, self._torch_dtype)
656
+
657
+ @torch_dtype.setter
658
+ def torch_dtype(self, torch_dtype: Union[str, torch.dtype]):
659
+ if isinstance(torch_dtype, torch.dtype):
660
+ torch_dtype = RBLNCompileConfig.normalize_dtype(torch_dtype)
661
+
662
+ self._torch_dtype = torch_dtype
663
+
642
664
  @property
643
665
  def rbln_model_cls_name(self) -> str:
644
666
  return self.__class__.__name__[:-6]
@@ -130,7 +130,7 @@ class RBLNDiffusionMixin:
130
130
  cls,
131
131
  model_id: str,
132
132
  *,
133
- export: bool = False,
133
+ export: bool = None,
134
134
  model_save_dir: Optional[PathLike] = None,
135
135
  rbln_config: Dict[str, Any] = {},
136
136
  lora_ids: Optional[Union[str, List[str]]] = None,
@@ -181,6 +181,20 @@ class RBLNDiffusionMixin:
181
181
  """
182
182
  rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
183
183
 
184
+ if export is None:
185
+ export = any(
186
+ not RBLNModel._is_compiled(
187
+ model_id,
188
+ token=kwargs.get("token"),
189
+ revision=kwargs.get("revision"),
190
+ force_download=kwargs.get("force_download", False),
191
+ cache_dir=kwargs.get("cache_dir"),
192
+ subfolder=submodule_name,
193
+ local_files_only=kwargs.get("local_files_only", False),
194
+ )
195
+ for submodule_name in cls._submodules
196
+ )
197
+
184
198
  if export:
185
199
  # keep submodules if user passed any of them.
186
200
  passed_submodules = {
@@ -14,7 +14,8 @@
14
14
 
15
15
 
16
16
  import importlib
17
- from typing import Type
17
+ from pathlib import Path
18
+ from typing import Type, Union
18
19
 
19
20
  from diffusers.models.controlnets import ControlNetUnionModel
20
21
  from diffusers.pipelines.auto_pipeline import (
@@ -42,7 +43,13 @@ class RBLNAutoPipelineBase:
42
43
  _model_mapping_names = None
43
44
 
44
45
  @classmethod
45
- def get_rbln_cls(cls, pretrained_model_name_or_path, export=True, **kwargs):
46
+ def get_rbln_cls(cls, pretrained_model_name_or_path: Union[str, Path], export: bool = None, **kwargs):
47
+ if isinstance(pretrained_model_name_or_path, Path):
48
+ pretrained_model_name_or_path = pretrained_model_name_or_path.as_posix()
49
+
50
+ if export is None:
51
+ export = not cls._is_compiled_pipeline(pretrained_model_name_or_path, **kwargs)
52
+
46
53
  if export:
47
54
  hf_model_class = cls.infer_hf_model_class(pretrained_model_name_or_path, **kwargs)
48
55
  rbln_class_name = convert_hf_to_rbln_model_name(hf_model_class.__name__)
@@ -66,7 +73,7 @@ class RBLNAutoPipelineBase:
66
73
  return rbln_cls
67
74
 
68
75
  @classmethod
69
- def get_rbln_model_cls_name(cls, pretrained_model_name_or_path, **kwargs):
76
+ def get_rbln_model_cls_name(cls, pretrained_model_name_or_path: Union[str, Path], **kwargs):
70
77
  """
71
78
  Retrieve the path to the compiled model directory for a given RBLN model.
72
79
 
@@ -86,10 +93,36 @@ class RBLNAutoPipelineBase:
86
93
 
87
94
  return model_index_config["_class_name"]
88
95
 
96
+ @classmethod
97
+ def _is_compiled_pipeline(
98
+ cls,
99
+ pretrained_model_name_or_path: Union[str, Path],
100
+ cache_dir=None,
101
+ force_download=False,
102
+ proxies=None,
103
+ token=None,
104
+ local_files_only=False,
105
+ revision=None,
106
+ **kwargs,
107
+ ):
108
+ config: dict = cls.load_config(
109
+ pretrained_model_name_or_path,
110
+ cache_dir=cache_dir,
111
+ force_download=force_download,
112
+ proxies=proxies,
113
+ token=token,
114
+ local_files_only=local_files_only,
115
+ revision=revision,
116
+ )
117
+ for value in config.values():
118
+ if isinstance(value, list) and len(value) > 0 and value[0] == "optimum.rbln":
119
+ return True
120
+ return False
121
+
89
122
  @classmethod
90
123
  def infer_hf_model_class(
91
124
  cls,
92
- pretrained_model_or_path,
125
+ pretrained_model_or_path: Union[str, Path],
93
126
  cache_dir=None,
94
127
  force_download=False,
95
128
  proxies=None,
optimum/rbln/modeling.py CHANGED
@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, get_args, ge
19
19
  import rebel
20
20
  import torch
21
21
  from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
22
- from transformers import AutoConfig, PretrainedConfig
22
+ from transformers import PretrainedConfig
23
23
  from transformers.modeling_outputs import BaseModelOutput
24
24
 
25
25
  from .configuration_utils import DEFAULT_COMPILED_MODEL_NAME, RBLNModelConfig
@@ -119,9 +119,6 @@ class RBLNModel(RBLNBaseModel):
119
119
  # Save configs
120
120
  if config is None:
121
121
  config = model.config
122
- # remote_config
123
- if hasattr(config, "auto_map") and "AutoConfig" in config.auto_map:
124
- config = AutoConfig.from_pretrained(config._name_or_path, **kwargs)
125
122
 
126
123
  if hasattr(model, "can_generate") and model.can_generate():
127
124
  import json
@@ -34,7 +34,7 @@ from .utils.submodule import SubModulesMixin
34
34
 
35
35
 
36
36
  if TYPE_CHECKING:
37
- from transformers import PreTrainedModel
37
+ from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel
38
38
 
39
39
  logger = get_logger(__name__)
40
40
 
@@ -53,6 +53,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
53
53
  config_class = AutoConfig
54
54
  config_name = "config.json"
55
55
  hf_library_name = "transformers"
56
+ _supports_non_fp32 = False
56
57
 
57
58
  def __init__(
58
59
  self,
@@ -91,7 +92,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
91
92
 
92
93
  self.device = torch.device("cpu")
93
94
  self.training = False
94
- self.dtype = torch.float32
95
+ self.dtype = rbln_config.torch_dtype
95
96
 
96
97
  # FIXME :: model_save_dir is not used after initialized. (This can be used when save/load)
97
98
  # This attribute is needed to keep one reference on the temporary directory, since garbage collecting it
@@ -342,11 +343,37 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
342
343
  rbln_config, kwargs = config_cls.initialize_from_kwargs(rbln_config, **kwargs)
343
344
  return rbln_config, kwargs
344
345
 
346
+ @classmethod
347
+ def _is_compiled(
348
+ cls,
349
+ model_id: Union[str, Path],
350
+ token: Optional[Union[bool, str]] = None,
351
+ revision: Optional[str] = None,
352
+ force_download: bool = False,
353
+ cache_dir: Optional[str] = None,
354
+ subfolder: str = "",
355
+ local_files_only: bool = False,
356
+ ) -> bool:
357
+ # Check if the model is already compiled.
358
+ try:
359
+ cls._load_compiled_model_dir(
360
+ model_id=model_id,
361
+ token=token,
362
+ revision=revision,
363
+ force_download=force_download,
364
+ cache_dir=cache_dir,
365
+ subfolder=subfolder,
366
+ local_files_only=local_files_only,
367
+ )
368
+ return True
369
+ except (FileNotFoundError, KeyError):
370
+ return False
371
+
345
372
  @classmethod
346
373
  def from_pretrained(
347
374
  cls: Type["RBLNBaseModel"],
348
375
  model_id: Union[str, Path],
349
- export: bool = False,
376
+ export: bool = None,
350
377
  rbln_config: Optional[Union[Dict, RBLNModelConfig]] = None,
351
378
  **kwargs: Any,
352
379
  ) -> "RBLNBaseModel":
@@ -356,7 +383,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
356
383
 
357
384
  Args:
358
385
  model_id: The model id of the pre-trained model to be loaded. It can be downloaded from the HuggingFace model hub or a local path, or a model id of a compiled model using the RBLN Compiler.
359
- export: A boolean flag to indicate whether the model should be compiled.
386
+ export: A boolean flag to indicate whether the model should be compiled. If None, it will be determined based on the existence of the compiled model files in the model_id.
360
387
  rbln_config: Configuration for RBLN model compilation and runtime. This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
361
388
  For detailed configuration options, see the specific model's configuration class documentation.
362
389
 
@@ -368,6 +395,18 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
368
395
 
369
396
  if isinstance(model_id, Path):
370
397
  model_id = model_id.as_posix()
398
+
399
+ if export is None:
400
+ export = not cls._is_compiled(
401
+ model_id=model_id,
402
+ token=kwargs.get("token"),
403
+ revision=kwargs.get("revision"),
404
+ force_download=kwargs.get("force_download", False),
405
+ cache_dir=kwargs.get("cache_dir"),
406
+ subfolder=kwargs.get("subfolder", ""),
407
+ local_files_only=kwargs.get("local_files_only", False),
408
+ )
409
+
371
410
  from_pretrained_method = cls._export if export else cls._from_pretrained
372
411
  return from_pretrained_method(model_id=model_id, **kwargs, rbln_config=rbln_config)
373
412
 
@@ -400,8 +439,21 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
400
439
  return compiled_model
401
440
 
402
441
  @classmethod
403
- def update_rbln_config(cls, **others) -> RBLNModelConfig:
404
- rbln_config = cls._update_rbln_config(**others)
442
+ def update_rbln_config(
443
+ cls,
444
+ preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
445
+ model: "PreTrainedModel",
446
+ model_config: "PretrainedConfig",
447
+ rbln_config: RBLNModelConfig,
448
+ ) -> RBLNModelConfig:
449
+ rbln_config.torch_dtype = model.dtype
450
+ if not cls._supports_non_fp32 and rbln_config.torch_dtype != torch.float32:
451
+ raise NotImplementedError(
452
+ f"Currently, {cls.__name__} does not support non-fp32 dtype. Please use float32 dtype."
453
+ )
454
+ rbln_config = cls._update_rbln_config(
455
+ preprocessors=preprocessors, model=model, model_config=model_config, rbln_config=rbln_config
456
+ )
405
457
  rbln_config.freeze()
406
458
  if rbln_config.rbln_model_cls_name != cls.__name__:
407
459
  raise NameError(
@@ -444,12 +496,12 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
444
496
 
445
497
  # This method mimics the interface of torch.nn.Module.parameters()
446
498
  # specifically for code that uses `next(model.parameters())` to infer
447
- # the device or dtype. It yields a single dummy tensor on CPU with float32 dtype.
499
+ # the device or dtype. It yields a single dummy tensor on CPU with model dtype.
448
500
 
449
501
  # Warning:
450
502
  # This does NOT yield the actual model parameters used by the RBLN runtime.
451
503
  # Code relying on iterating through all model parameters will not work as expected.
452
- yield torch.tensor([1.0], dtype=torch.float32, device=torch.device("cpu"))
504
+ yield torch.tensor([1.0], dtype=self.dtype, device=torch.device("cpu"))
453
505
 
454
506
  def __call__(self, *args, **kwargs):
455
507
  return self.forward(*args, **kwargs)
@@ -14,9 +14,10 @@
14
14
  import importlib
15
15
  import inspect
16
16
  import warnings
17
- from typing import Type
17
+ from pathlib import Path
18
+ from typing import Any, Type, Union
18
19
 
19
- from transformers import AutoConfig, PretrainedConfig
20
+ from transformers import AutoConfig, PretrainedConfig, PreTrainedModel
20
21
  from transformers.dynamic_module_utils import get_class_from_dynamic_module
21
22
  from transformers.models.auto.auto_factory import _get_model_class
22
23
 
@@ -43,10 +44,10 @@ class _BaseAutoModelClass:
43
44
  @classmethod
44
45
  def get_rbln_cls(
45
46
  cls,
46
- pretrained_model_name_or_path,
47
- *args,
48
- export=True,
49
- **kwargs,
47
+ pretrained_model_name_or_path: Union[str, Path],
48
+ *args: Any,
49
+ export: bool = None,
50
+ **kwargs: Any,
50
51
  ):
51
52
  """
52
53
  Determine the appropriate RBLN model class based on the given model ID and configuration.
@@ -59,6 +60,20 @@ class _BaseAutoModelClass:
59
60
  Returns:
60
61
  RBLNBaseModel: The corresponding RBLN model class.
61
62
  """
63
+ if isinstance(pretrained_model_name_or_path, Path):
64
+ pretrained_model_name_or_path = pretrained_model_name_or_path.as_posix()
65
+
66
+ if export is None:
67
+ export = not RBLNBaseModel._is_compiled(
68
+ model_id=pretrained_model_name_or_path,
69
+ token=kwargs.get("token"),
70
+ revision=kwargs.get("revision"),
71
+ force_download=kwargs.get("force_download", False),
72
+ cache_dir=kwargs.get("cache_dir"),
73
+ subfolder=kwargs.get("subfolder", ""),
74
+ local_files_only=kwargs.get("local_files_only", False),
75
+ )
76
+
62
77
  if export:
63
78
  hf_model_class = cls.infer_hf_model_class(pretrained_model_name_or_path, **kwargs)
64
79
  rbln_class_name = convert_hf_to_rbln_model_name(hf_model_class.__name__)
@@ -85,9 +100,9 @@ class _BaseAutoModelClass:
85
100
  @classmethod
86
101
  def infer_hf_model_class(
87
102
  cls,
88
- pretrained_model_name_or_path,
89
- *args,
90
- **kwargs,
103
+ pretrained_model_name_or_path: Union[str, Path],
104
+ *args: Any,
105
+ **kwargs: Any,
91
106
  ):
92
107
  """
93
108
  Infer the HuggingFace model class based on the configuration or model name.
@@ -140,7 +155,7 @@ class _BaseAutoModelClass:
140
155
  return model_class
141
156
 
142
157
  @classmethod
143
- def get_rbln_model_cls_name(cls, pretrained_model_name_or_path, **kwargs):
158
+ def get_rbln_model_cls_name(cls, pretrained_model_name_or_path: Union[str, Path], **kwargs):
144
159
  """
145
160
  Retrieve the path to the compiled model directory for a given RBLN model.
146
161
 
@@ -163,17 +178,17 @@ class _BaseAutoModelClass:
163
178
  return rbln_config.rbln_model_cls_name
164
179
 
165
180
  @classmethod
166
- def from_pretrained(cls, model_id, *args, **kwargs):
181
+ def from_pretrained(cls, model_id: Union[str, Path], *args, **kwargs):
167
182
  rbln_cls = cls.get_rbln_cls(model_id, *args, **kwargs)
168
183
  return rbln_cls.from_pretrained(model_id, *args, **kwargs)
169
184
 
170
185
  @classmethod
171
- def from_model(cls, model, *args, **kwargs):
186
+ def from_model(cls, model: PreTrainedModel, *args, **kwargs):
172
187
  rbln_cls = get_rbln_model_cls(f"RBLN{model.__class__.__name__}")
173
188
  return rbln_cls.from_model(model, *args, **kwargs)
174
189
 
175
190
  @staticmethod
176
- def register(rbln_cls: Type[RBLNBaseModel], exist_ok=False):
191
+ def register(rbln_cls: Type[RBLNBaseModel], exist_ok: bool = False):
177
192
  """
178
193
  Register a new RBLN model class.
179
194
 
@@ -1066,7 +1066,7 @@ class RotaryEmbedding(nn.Module):
1066
1066
  rope_type = "default"
1067
1067
 
1068
1068
  inv_freq, attention_scaling = ROPE_INIT_FUNCTIONS[rope_type](config, max_seq_len_cached)
1069
- cache_position = torch.arange(0, max_seq_len_cached, dtype=torch.float32)
1069
+ cache_position = torch.arange(0, max_seq_len_cached)
1070
1070
  cache_position_expanded = cache_position[:, None]
1071
1071
 
1072
1072
  if rope_type == "dynamic":
@@ -1085,8 +1085,8 @@ class RotaryEmbedding(nn.Module):
1085
1085
 
1086
1086
  def forward(self, x, seq_len):
1087
1087
  return (
1088
- self._cos_cached[:seq_len].to(dtype=x.dtype),
1089
- self._sin_cached[:seq_len].to(dtype=x.dtype),
1088
+ self._cos_cached[:seq_len].to(dtype=torch.float32),
1089
+ self._sin_cached[:seq_len].to(dtype=torch.float32),
1090
1090
  )
1091
1091
 
1092
1092
 
@@ -1116,8 +1116,11 @@ def rotate_half(x):
1116
1116
 
1117
1117
  def apply_rotary_pos_emb(q, k, cos, sin):
1118
1118
  """Applies Rotary Position Embedding to the query and key tensors."""
1119
+ dtype = q.dtype
1119
1120
  q_embed = (q * cos) + (rotate_half(q) * sin)
1120
1121
  k_embed = (k * cos) + (rotate_half(k) * sin)
1122
+ q_embed = q_embed.to(dtype)
1123
+ k_embed = k_embed.to(dtype)
1121
1124
  return q_embed, k_embed
1122
1125
 
1123
1126
 
@@ -317,7 +317,13 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
317
317
 
318
318
  # Initialize attention mask for chunked processing
319
319
  chunked_attention_mask = (
320
- torch.zeros(1, 1, self.rbln_config.prefill_chunk_size, self.rbln_config.max_seq_len, dtype=torch.float32)
320
+ torch.zeros(
321
+ 1,
322
+ 1,
323
+ self.rbln_config.prefill_chunk_size,
324
+ self.rbln_config.max_seq_len,
325
+ dtype=self.rbln_config.torch_dtype,
326
+ )
321
327
  if self.rbln_config.use_attention_mask
322
328
  else None
323
329
  )
@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple, Union
19
19
  import rebel
20
20
  import torch
21
21
  from rebel.compile_context import CompileContext
22
- from transformers import AutoConfig, AutoModel, AutoModelForCausalLM, PretrainedConfig, PreTrainedModel
22
+ from transformers import AutoModel, AutoModelForCausalLM, PretrainedConfig, PreTrainedModel
23
23
  from transformers.modeling_outputs import BaseModelOutputWithPast
24
24
  from transformers.modeling_utils import no_init_weights
25
25
 
@@ -33,7 +33,7 @@ from ...modeling_attention_utils import (
33
33
  validate_sliding_window,
34
34
  )
35
35
  from ...modeling_outputs import RBLNDecoderOnlyOutput
36
- from ...utils.rbln_quantization import prepare_model_for_quantization
36
+ from ...utils.rbln_quantization import get_quantized_model
37
37
  from .configuration_decoderonly import RBLNDecoderOnlyModelConfig, RBLNDecoderOnlyModelForCausalLMConfig
38
38
  from .decoderonly_architecture import DecoderOnlyWrapper
39
39
  from .decoderonly_runtime_utils import RBLNPageTableManager, RBLNRuntimeModel
@@ -72,6 +72,7 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
72
72
  auto_model_class = AutoModel
73
73
  _decoder_wrapper_cls = DecoderOnlyWrapper
74
74
  _use_rotary_emb = True
75
+ _supports_non_fp32 = True
75
76
 
76
77
  def __post_init__(self, **kwargs):
77
78
  if self.rbln_config.use_inputs_embeds:
@@ -86,10 +87,8 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
86
87
  def setup_runtime(self):
87
88
  # Initialize resources to be used across Runtime instances (prefill and decode phases)
88
89
  page_table_manager = RBLNPageTableManager(self.rbln_config)
89
- dec_attn_mask = torch.zeros(
90
- self.rbln_config.batch_size, 1, 1, self.rbln_config.max_seq_len, dtype=torch.float32
91
- )
92
- out_buffers = [torch.empty(self.prefill_output_size, dtype=torch.float32, device="cpu")]
90
+ dec_attn_mask = torch.zeros(self.rbln_config.batch_size, 1, 1, self.rbln_config.max_seq_len, dtype=self.dtype)
91
+ out_buffers = [torch.empty(self.prefill_output_size, dtype=self.dtype)]
93
92
 
94
93
  common_kwargs = {
95
94
  "main_input_name": "inputs_embeds" if self.rbln_config.use_inputs_embeds else "input_ids",
@@ -143,35 +142,17 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
143
142
  ):
144
143
  kwargs = cls.update_kwargs(kwargs)
145
144
 
146
- if config is None:
147
- config = AutoConfig.from_pretrained(
148
- model_id,
149
- use_auth_token=use_auth_token,
150
- revision=revision,
151
- force_download=force_download,
152
- cache_dir=cache_dir,
153
- trust_remote_code=trust_remote_code,
154
- **kwargs,
155
- )
156
- if config.torch_dtype == torch.bfloat16:
157
- # FIXME: bfloat16 is not supported by rebel-compiler
158
- config.torch_dtype = torch.float32
159
-
160
- with no_init_weights():
161
- model = cls.auto_model_class.from_config(config)
162
-
163
- model = prepare_model_for_quantization(
164
- model,
145
+ return get_quantized_model(
146
+ cls.auto_model_class,
165
147
  model_id,
166
- kwargs.get("num_hidden_layers"),
167
148
  use_auth_token=use_auth_token,
168
149
  revision=revision,
169
150
  cache_dir=cache_dir,
170
151
  force_download=force_download,
171
152
  local_files_only=local_files_only,
172
153
  rbln_quantization=rbln_config.quantization,
154
+ **kwargs,
173
155
  )
174
- return model
175
156
 
176
157
  def __getattr__(self, __name: str) -> Any:
177
158
  # Special method to delegate attribute access to the original Huggingface LM class.
@@ -365,7 +346,7 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
365
346
 
366
347
  input_info = []
367
348
  if rbln_config.use_inputs_embeds:
368
- input_info.append(("inputs_embeds", [batch_size, query_length, hidden_size], "float32"))
349
+ input_info.append(("inputs_embeds", [batch_size, query_length, hidden_size], rbln_config.torch_dtype))
369
350
  else:
370
351
  input_info.append(("input_ids", [batch_size, query_length], "int64"))
371
352
 
@@ -384,16 +365,16 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
384
365
 
385
366
  if rbln_config.use_attention_mask:
386
367
  if rbln_config.use_position_ids:
387
- input_info.append(("attention_mask", [batch_size, rbln_config.max_seq_len], "float32"))
368
+ input_info.append(("attention_mask", [batch_size, rbln_config.max_seq_len], rbln_config.torch_dtype))
388
369
  else:
389
370
  input_info.append(
390
- ("attention_mask", [batch_size, 1, query_length, rbln_config.max_seq_len], "float32")
371
+ ("attention_mask", [batch_size, 1, query_length, rbln_config.max_seq_len], rbln_config.torch_dtype)
391
372
  )
392
373
 
393
374
  if rbln_config.use_position_ids:
394
375
  input_info.append(("position_ids", [batch_size, query_length], "int32"))
395
376
 
396
- kvcache_dtype = "float32"
377
+ kvcache_dtype = rbln_config.torch_dtype
397
378
  if rbln_config.quantization and rbln_config.quantization.kv_caches == "fp8":
398
379
  kvcache_dtype = "float8_e4m3fn"
399
380
 
@@ -345,6 +345,7 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
345
345
  """
346
346
 
347
347
  _decoder_wrapper_cls = Gemma3ForCausalLMWrapper
348
+ _supports_non_fp32 = False
348
349
 
349
350
  def setup_runtime(self):
350
351
  # Initialize shared resources to be used across Runtime instances (prefill and decode phases)
@@ -357,10 +357,16 @@ class _GroundingDinoMultiscaleDeformableAttention(torch.nn.Module):
357
357
  batch_size, num_queries, _ = hidden_states.shape
358
358
  batch_size, sequence_length, _ = encoder_hidden_states.shape
359
359
  # Ignore copy
360
- if (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() != sequence_length:
361
- raise ValueError(
362
- "Make sure to align the spatial shapes with the sequence length of the encoder hidden states"
360
+ if torch.compiler.is_exporting():
361
+ torch._check(
362
+ (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum().item() == sequence_length,
363
+ "Make sure to align the spatial shapes with the sequence length of the encoder hidden states",
363
364
  )
365
+ else:
366
+ if (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() != sequence_length:
367
+ raise ValueError(
368
+ "Make sure to align the spatial shapes with the sequence length of the encoder hidden states"
369
+ )
364
370
 
365
371
  value = self.value_proj(encoder_hidden_states)
366
372
  if attention_mask is not None:
@@ -372,6 +372,8 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
372
372
  ```
373
373
  """
374
374
 
375
+ _supports_non_fp32 = False
376
+
375
377
  auto_model_class = AutoModelForVision2Seq
376
378
  _rbln_submodules = [
377
379
  {"name": "visual"},
@@ -162,7 +162,13 @@ class TimeSeriesTransformersDecoder(nn.Module):
162
162
  attention_mask = _prepare_4d_causal_attention_mask(attention_mask, input_shape, inputs_embeds, cache_position)
163
163
 
164
164
  hidden_states = self.value_embedding(inputs_embeds)
165
- embed_pos = self.embed_positions.weight[cache_position + self.config.context_length]
165
+ embed_idx = cache_position + self.config.context_length
166
+ if torch.compiler.is_exporting():
167
+ embed_idx = embed_idx.item()
168
+ torch._check_is_size(embed_idx)
169
+ torch._check(embed_idx >= 0)
170
+ torch._check(embed_idx < len(self.embed_positions.weight))
171
+ embed_pos = self.embed_positions.weight[embed_idx]
166
172
  hidden_states = self.layernorm_embedding(hidden_states + embed_pos)
167
173
 
168
174
  # iterate decoder_layer
@@ -14,18 +14,23 @@
14
14
 
15
15
  import glob
16
16
  import os
17
- from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
17
+ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Type, Union
18
18
 
19
19
  import torch
20
20
  from huggingface_hub import hf_hub_download, list_repo_files
21
21
  from safetensors.torch import load_file
22
22
  from torch.nn import Linear, Parameter
23
23
  from torch.nn import functional as F
24
+ from transformers import AutoConfig
25
+ from transformers.modeling_utils import get_state_dict_dtype, no_init_weights
24
26
 
25
27
  from ...configuration_utils import RBLNSerializableConfigProtocol
26
28
  from ...utils.logging import get_logger
27
29
 
28
30
 
31
+ if TYPE_CHECKING:
32
+ from transformers.models.auto.modeling_auto import _BaseAutoModelClass
33
+
29
34
  logger = get_logger()
30
35
 
31
36
 
@@ -138,22 +143,31 @@ class QuantizedLayerFactory:
138
143
  return create_fp8linear(layer, self.quantization_config)
139
144
 
140
145
 
141
- def prepare_model_for_quantization(
142
- model: torch.nn.Module,
146
+ def get_quantized_model(
147
+ hf_auto_model_class: Type["_BaseAutoModelClass"],
143
148
  model_id: str,
144
- n_layer: Optional[int] = None,
145
149
  use_auth_token: Optional[Union[bool, str]] = None,
146
150
  revision: Optional[str] = None,
147
151
  cache_dir: Optional[str] = None,
148
152
  force_download: bool = False,
149
153
  local_files_only: bool = False,
150
154
  rbln_quantization: Optional[RBLNQuantizationConfig] = None,
151
- ) -> torch.nn.Module:
155
+ **kwargs,
156
+ ):
152
157
  """
153
- Prepare the model for quantization by updating specified linear layers to quantized (qlinear) layers.
158
+ Get a quantized model from a model class and model id.
154
159
  """
160
+ # torch_dtype should not be passed to AutoConfig.from_pretrained
161
+ # since it doesn't support 'auto'
162
+ torch_dtype = kwargs.pop("torch_dtype", None)
163
+ if torch_dtype is not None:
164
+ logger.warning(
165
+ "torch_dtype is not supported for quantized models. "
166
+ "It will be ignored and the dtype of the model will be determined by the weights."
167
+ )
168
+ torch_dtype = None
155
169
 
156
- # 1. Load weight files
170
+ # get paths of safetensors files in the model repo
157
171
  safetensor_files = load_weight_files(
158
172
  model_id,
159
173
  use_auth_token=use_auth_token,
@@ -163,17 +177,31 @@ def prepare_model_for_quantization(
163
177
  local_files_only=local_files_only,
164
178
  )
165
179
 
166
- # 2. Update linear layers based on the quantization config
167
- update_layers_to_quantize(model, rbln_quantization)
180
+ # load safetensors files into memory
181
+ safetensors = [load_file(safetensor_file) for safetensor_file in safetensor_files]
182
+
183
+ # get the dtype of the model from the first safetensor file
184
+ torch_dtype = get_state_dict_dtype(safetensors[0])
168
185
 
169
- # 3. Load weights into model parameters
170
- load_weights_from_files(
171
- model,
172
- safetensor_files,
173
- n_layer,
174
- rbln_quantization=rbln_quantization,
186
+ config = AutoConfig.from_pretrained(
187
+ model_id,
188
+ use_auth_token=use_auth_token,
189
+ revision=revision,
190
+ cache_dir=cache_dir,
191
+ force_download=force_download,
192
+ local_files_only=local_files_only,
193
+ **kwargs,
175
194
  )
176
195
 
196
+ with no_init_weights():
197
+ model = hf_auto_model_class.from_config(config, torch_dtype=torch_dtype)
198
+
199
+ # Quantize the model
200
+ update_layers_to_quantize(model, rbln_quantization)
201
+
202
+ # Load weights into the model
203
+ load_weights_from_files(model, safetensors, rbln_quantization)
204
+
177
205
  return model
178
206
 
179
207
 
@@ -372,32 +400,26 @@ def canonicalize_checkpoint_items(
372
400
 
373
401
  def load_weights_from_files(
374
402
  model: torch.nn.Module,
375
- safetensor_files: list[str],
376
- n_layer: Optional[int] = None,
403
+ safetensors: List[Dict[str, torch.Tensor]],
377
404
  rbln_quantization: Optional[RBLNQuantizationConfig] = None,
378
405
  ):
379
406
  """
380
- Load safetensor file data directly into the model from provided safetensor files,
381
- filtering by layer if n_layer is provided.
407
+ Load safetensor file data directly into the model from provided safetensor files.
382
408
  """
383
409
 
384
410
  model_params = dict(model.named_parameters(recurse=True))
385
411
  model_buffers = dict(model.named_buffers(recurse=True))
386
412
 
387
- target_layers = list(range(n_layer)) if n_layer is not None else None
388
-
389
413
  unloaded_keys = []
390
414
  loaded_input_scale = False
391
415
  loaded_kv_scale = False
392
416
  loaded_weight_scale = False
393
417
 
394
- for safetensor_file in safetensor_files:
395
- file_data = load_file(safetensor_file)
396
-
418
+ for safetensor in safetensors:
397
419
  # Normalize all (key, tensor) pairs to the internal schema
398
420
  normalized_items = canonicalize_checkpoint_items(
399
421
  model=model,
400
- items=file_data.items(),
422
+ items=safetensor.items(),
401
423
  rbln_quantization=rbln_quantization,
402
424
  )
403
425
 
@@ -410,12 +432,6 @@ def load_weights_from_files(
410
432
  if key.endswith("k_scale") or key.endswith("v_scale"):
411
433
  loaded_kv_scale = True
412
434
 
413
- # Filter by layer index if requested
414
- if target_layers is not None:
415
- parts = key.split(".")
416
- if len(parts) > 2 and parts[2].isdigit() and (int(parts[2]) not in target_layers):
417
- continue
418
-
419
435
  # Copy into parameters or buffers
420
436
  if key in model_params:
421
437
  # Ensure dtype compatibility
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.8.3rc0
3
+ Version: 0.8.4a1
4
4
  Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -1,10 +1,10 @@
1
1
  optimum/rbln/__init__.py,sha256=32ouGKDGus9k5_kD27CxP8jIQOw66zpDTfS0xs1XlfE,18298
2
- optimum/rbln/__version__.py,sha256=boIaJ8T6HCT9Qh8wBU3n-6ZyjtAKYgztQh0WMaN7BxM,714
3
- optimum/rbln/configuration_utils.py,sha256=fE3HlZblxukKSdS-4VofjuyCAiqwPMX8bqXpOiTZp4g,33926
4
- optimum/rbln/modeling.py,sha256=jMiJy9PGjZpXpAmRTFD5fTuj8xEbLCUmncIxGD6XWLk,14338
5
- optimum/rbln/modeling_base.py,sha256=txBab-zVXcjqnF2gZJBzhrp5ruA3vwt3hjls0Q2S_0w,25492
2
+ optimum/rbln/__version__.py,sha256=Xldcu_i01nl8cPxjp-cO8CxxNYyVzFEpw4QQPEW-cj4,712
3
+ optimum/rbln/configuration_utils.py,sha256=WNubd8EJIrdBkLOGT2UJJorgNL3lzhjg3a4bihAIptY,34761
4
+ optimum/rbln/modeling.py,sha256=cAIPWEw5DGzUWeqjCbocRhU6OO3jyhVGW60AmBLh1Nw,14134
5
+ optimum/rbln/modeling_base.py,sha256=97ju0uHJXB7PaorKaspf-FbLfsaHy0HwRVLJqtVscXA,27574
6
6
  optimum/rbln/diffusers/__init__.py,sha256=1tgU_xWA42BmInqu9bBz_5R_E9TGhhK3mI06YlaiTLg,7232
7
- optimum/rbln/diffusers/modeling_diffusers.py,sha256=TAuMb7PSMjNwK7mh5ItE_CtAEgYeZKI27XkFFmxjHlQ,19902
7
+ optimum/rbln/diffusers/modeling_diffusers.py,sha256=3bzL0ZH7XyS8nGMWRSMIGjl9H3H2fhiZgmPaIF50mwg,20464
8
8
  optimum/rbln/diffusers/configurations/__init__.py,sha256=vMRnPY4s-Uju43xP038D2EA18X_mhy2YfsZVpSU-VoA,1322
9
9
  optimum/rbln/diffusers/configurations/models/__init__.py,sha256=7q95gtgDzCeIBogGw8SLQoHT4Wch7vpLJVF2UQovuoo,567
10
10
  optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py,sha256=ADS4SGZbwY6fy3SVNhgo3Zg4KxzAAGq5_zsJ97Dezh4,3201
@@ -36,7 +36,7 @@ optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=yF7sS0Qvawo
36
36
  optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
37
37
  optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=v3WS9EGKROE_QClXrxC7rmRko1BspAvAbeIfh83LK88,15832
38
38
  optimum/rbln/diffusers/pipelines/__init__.py,sha256=r8mu21102cKXdkG1II9tpfpUS6wuyren2oK9y_MptZY,3703
39
- optimum/rbln/diffusers/pipelines/auto_pipeline.py,sha256=zFDXbO9Iv0LO7maefV82dmi5Ta6L9oZxY09QFVX6F_Q,9511
39
+ optimum/rbln/diffusers/pipelines/auto_pipeline.py,sha256=DaDWla59LhKGv7h8sdnJrwYaxvcwnO3-qFc47NHvx20,10644
40
40
  optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
41
41
  optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=3S9dogIHW8Bqg5kIlCudhCQG-4g3FcdOPEWhBOf7CJA,4059
42
42
  optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py,sha256=G96bh4D9Cu-w4F9gZBQF6wNzhJQv9kvI34ZFsuEDjSw,35714
@@ -83,7 +83,7 @@ optimum/rbln/transformers/models/audio_spectrogram_transformer/__init__.py,sha25
83
83
  optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py,sha256=z7LJiVJPmnlCM3mcyhPJP8AufSrxO_dsPeJ51onq-Nc,833
84
84
  optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py,sha256=FIKEVWpIt6-JQX9B_rAfCrAPqdUHtR2i8D_X2k7639E,1498
85
85
  optimum/rbln/transformers/models/auto/__init__.py,sha256=tdYqXkg9xBGNr4fZjH7_O3qRVbHvpEVjrJ6wtNUMMJM,1150
86
- optimum/rbln/transformers/models/auto/auto_factory.py,sha256=1CA52xV2dS1Uzumcgqe4zobdpoi-Xt2oNjP3uLFtm08,8020
86
+ optimum/rbln/transformers/models/auto/auto_factory.py,sha256=9oaynN5f6aL6BTgDu5xF3b-5lz9eFuzLOdfVaZwIwvc,8834
87
87
  optimum/rbln/transformers/models/auto/modeling_auto.py,sha256=SMsWnD8f7VhKmh7h_S2voksEWlNccfF4fQ7AmwLYq6U,4790
88
88
  optimum/rbln/transformers/models/bart/__init__.py,sha256=fVo-gZEmJ0yxkIxEX6ciuRAGgXNyuvaXE2s88bhbjAE,830
89
89
  optimum/rbln/transformers/models/bart/bart_architecture.py,sha256=mAepjL0paPMK180vGTTCxXQ-hVZ1DD6JR-GvVNGJLqY,6268
@@ -105,10 +105,10 @@ optimum/rbln/transformers/models/colpali/configuration_colpali.py,sha256=eDWPVlo
105
105
  optimum/rbln/transformers/models/colpali/modeling_colpali.py,sha256=v9rPLmNx-BQZhDFhKnr2kmARElTtKdFZCgFIU4m-HPw,15703
106
106
  optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=w3VZOIBYaHXVdnuhK4y0zWAj0IAv7_5LGTJYaz9oYmI,1056
107
107
  optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py,sha256=H2i9Iefy-q5X-0BLWQ-CrxK8ZoT3p9t0lt_3r4TFSCY,15182
108
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=cGkhGc8XlseMWqDCrt13z0Itn9b0emZ2PjHI-1TP0wI,42685
109
- optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py,sha256=9acEQxGRzd21YkzxRchkhqxqpX7emQHZigFg60BIulc,19902
108
+ optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=L5LArhjN36fTdiwrUABgn3cnS7hh4SVCF4FMHBbiLZU,42760
109
+ optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py,sha256=v3mfIlQImQkYYr-rPn7rQR3GYdVUhALRttEduLI7H9c,20012
110
110
  optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py,sha256=4D89IF0yQju_Dp_vLJN_dBkpe2U_LMWaUciYx57D-0M,3379
111
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=hu2eJr0CpLHnRPSLhyBhyyC6DfosKmPu7lPjapcBCkE,33061
111
+ optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=dAHV9NgdpXHyTJGT0lieXOB3Pzi_NPlR4rqmRtmAWzM,32412
112
112
  optimum/rbln/transformers/models/depth_anything/__init__.py,sha256=xvPSIriMJWyNeVYoVB1Z7YqB4kkHOIkaHq7loNps-dk,756
113
113
  optimum/rbln/transformers/models/depth_anything/configuration_depth_anything.py,sha256=JujBVEUa_zZDXNPr1y-B_PhK5SgFFcY8Ib4EoGjjtmE,989
114
114
  optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py,sha256=tTmsVaW9Wb2WD3nKRLwp7swn3hbMvgwUEJwwVIfNYEc,1008
@@ -130,14 +130,14 @@ optimum/rbln/transformers/models/gemma3/__init__.py,sha256=6rugk3615SEt4lh7gduo_
130
130
  optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=rKjKJhyaIM7YoiLR-q8GAZKIQNzDzcb5X7qf_FJE72M,3398
131
131
  optimum/rbln/transformers/models/gemma3/gemma3_architecture.py,sha256=fpLDAXCe5paWVsfc0tL59JkRQMRF-WNgIzOIb_QpSLU,6191
132
132
  optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py,sha256=vYQ9sjRlkfamxZca_hVMQI0ylKeExsV02gOWaYVMjyg,9640
133
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=zraPjowA8ni9Lb0NrmsiUai2XdOjgYOOpVnIU1n2jGA,24208
133
+ optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=TxbgkvW2Nv0VGdXNXnN_Beas6E_1D9NAH8f09Fo8t0E,24239
134
134
  optimum/rbln/transformers/models/gpt2/__init__.py,sha256=SsawHMStE3wYRtqkH5EvdTFkCdX0LLmp-QSKFhEBrHo,740
135
135
  optimum/rbln/transformers/models/gpt2/configuration_gpt2.py,sha256=iGdHfzG7plekZcIz-Z5U8lRE4SB8gbJJNcFQJ9l8Myg,1533
136
136
  optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=MyAWReXmyuHnDpW5HI_TI7psyJZxLujZ9KT5XnNm7nA,2802
137
137
  optimum/rbln/transformers/models/gpt2/modeling_gpt2.py,sha256=DhF6hU3oCYGbZ7UijKCsRfTx-VCkTqqqNwqqMSrjqRE,2230
138
138
  optimum/rbln/transformers/models/grounding_dino/__init__.py,sha256=DE7DipZGvrKC6b1T77k4I4X3G70ss8mlr-PrZCaohto,307
139
139
  optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py,sha256=b6aeAlAMf0aOoTKAqe5nnBfontu_H3zvIHgOiCNMJ1I,3127
140
- optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py,sha256=A_YBgvPVHwwKgsGLL0z4MyTKb6Hb6r3y6sU3oVIrKiU,22779
140
+ optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py,sha256=E6HReXGwvSV7YDeetSBuds1rAVSzEeL0AGHYgBOQW6o,23097
141
141
  optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py,sha256=bXAOs2QH4sy2UFoFLUSM6u1_VHouUT5COERLQX20F6Y,46897
142
142
  optimum/rbln/transformers/models/idefics3/__init__.py,sha256=ulxE7HEfXsNJhd25J9Fvi6vggo9aZH9sLKJjWB6LlzQ,814
143
143
  optimum/rbln/transformers/models/idefics3/configuration_idefics3.py,sha256=8BhPLkfE1_ZU0eSm2iTbWQOnVe1q0g99srYHWZM6VJ4,2373
@@ -182,7 +182,7 @@ optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=VOboPJF1rvvSVWkH
182
182
  optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
183
183
  optimum/rbln/transformers/models/qwen2_5_vl/__init__.py,sha256=rAW3DKQUzGL6EMwa5r1iLu94yhpiZpk6zfoD7TtYXrc,865
184
184
  optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py,sha256=1yyMFxh1SKsKR7rOjuotPvpSneN2_4a89bYfNk42370,4735
185
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=BfpALuavpdCqe5RuHaNZNo2IDlLjE4SwsoPAlaictgc,26607
185
+ optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=hRvA37sPFC9xH1FqnFbtHS9rQOPwAvLYg4zl4oEyK-w,26639
186
186
  optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py,sha256=i_UUWhKoFjJ5CCpgeWicqABM23TxMEKPQ354LoZ6iUU,7445
187
187
  optimum/rbln/transformers/models/qwen3/__init__.py,sha256=tI4KwvXpD35dUUaa8aLUXpWoU9gJGcmKXeywOlH14ZE,746
188
188
  optimum/rbln/transformers/models/qwen3/configuration_qwen3.py,sha256=BFRPggnH4VlsXlOa19C6KAID-bPgQ8ooQ29dvogh5zk,2102
@@ -211,7 +211,7 @@ optimum/rbln/transformers/models/t5/t5_architecture.py,sha256=DlJNrGk35NTBhcp76P
211
211
  optimum/rbln/transformers/models/time_series_transformer/__init__.py,sha256=xJaFWQawlwtv4H5tVFcY1pxLYzjHtMAlLq6nXysdkN8,1243
212
212
  optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py,sha256=MO-T4pcsea4EOmYeeg0tosUH6w76azqIPyV8Em8CMqw,1621
213
213
  optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py,sha256=8orxM-LbShCt2jC8Uyx43cSxWN1CGxamS58pKPjvzxs,17167
214
- optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py,sha256=XJDjQGbWXUq4ZimNojlcbm3mTDpxUMCl6tkFSzfYFl4,13769
214
+ optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py,sha256=hAZXyXxzSDJMdkI883eefzpjz2L9KTVTRBeOVU8e92k,14038
215
215
  optimum/rbln/transformers/models/vit/__init__.py,sha256=CrrkHehfCe3U-_rUS00aMBY7Tncdeh43sNUgVI9Dt_g,807
216
216
  optimum/rbln/transformers/models/vit/configuration_vit.py,sha256=x98CxKR1cpKAG7Eh43uuPeGeGn4gS3HcKLPoDL3SWJo,994
217
217
  optimum/rbln/transformers/models/vit/modeling_vit.py,sha256=Q8xvX2oG2dC2RYM4ocaS0H70a2q_vQ9DZK2mCdyvxa0,1058
@@ -227,7 +227,7 @@ optimum/rbln/transformers/models/xlm_roberta/__init__.py,sha256=O3o2KzJ8Li3QhB7G
227
227
  optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py,sha256=wHRpGTXL9khYqSkKL1IgA7__6_lt9QpOz9tHumjK7fo,1260
228
228
  optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py,sha256=EZd3flRUEE38DYtdqEnG70LV7fHhkamRZV51xrVyjYI,1093
229
229
  optimum/rbln/transformers/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
230
- optimum/rbln/transformers/utils/rbln_quantization.py,sha256=ARngdvRmeVoOphUU3Md9kT6zS5HDrYdEFYljJwaAaio,21020
230
+ optimum/rbln/transformers/utils/rbln_quantization.py,sha256=pORshQUgTInNaibUtd0HL-T8bKW5wuulZs2q0Oshppc,21659
231
231
  optimum/rbln/utils/__init__.py,sha256=ieDBT2VFTt2E0M4v_POLBpuGW9LxSydpb_DuPd6PQqc,712
232
232
  optimum/rbln/utils/decorator_utils.py,sha256=xu-TrsNi33SRC2a7DBsyoo6-pEQxWKZPZSmM9QlDe2Y,3745
233
233
  optimum/rbln/utils/depreacate_utils.py,sha256=uKxl3ENUCNaZXPnaDQvNxrH8hUIWdBWfZH6BM7ZV__4,385
@@ -238,7 +238,7 @@ optimum/rbln/utils/model_utils.py,sha256=4k5879Kh75m3x_vS4-qOGfqsOiAvc2kdNFFfvsF
238
238
  optimum/rbln/utils/runtime_utils.py,sha256=R6uXDbeJP03-FWdd4vthNe2D4aCra5n12E3WB1ifiGM,7933
239
239
  optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
240
240
  optimum/rbln/utils/submodule.py,sha256=60NGLFvnhjP1DJg1opdb-FVQDsthcLCwWjW_1WQaasU,5280
241
- optimum_rbln-0.8.3rc0.dist-info/METADATA,sha256=ls15qV7a7bVTpkphb6aHteuBfil7u1xOzkUuysoRPZg,5300
242
- optimum_rbln-0.8.3rc0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
243
- optimum_rbln-0.8.3rc0.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
244
- optimum_rbln-0.8.3rc0.dist-info/RECORD,,
241
+ optimum_rbln-0.8.4a1.dist-info/METADATA,sha256=cs0rmwPfLMefC6PHPHGw7XYrZIQVGPP3ax09PhmeUB8,5299
242
+ optimum_rbln-0.8.4a1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
243
+ optimum_rbln-0.8.4a1.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
244
+ optimum_rbln-0.8.4a1.dist-info/RECORD,,