optimum-rbln 0.8.2a2__py3-none-any.whl → 0.8.2a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/diffusers/modeling_diffusers.py +2 -4
- optimum/rbln/modeling.py +1 -3
- optimum/rbln/modeling_base.py +17 -13
- optimum/rbln/transformers/models/bart/bart_architecture.py +1 -4
- optimum/rbln/transformers/models/clip/configuration_clip.py +12 -1
- optimum/rbln/transformers/models/clip/modeling_clip.py +123 -28
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +8 -1
- optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +2 -3
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +104 -249
- optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +18 -1
- optimum/rbln/utils/hub.py +8 -47
- {optimum_rbln-0.8.2a2.dist-info → optimum_rbln-0.8.2a4.dist-info}/METADATA +1 -1
- {optimum_rbln-0.8.2a2.dist-info → optimum_rbln-0.8.2a4.dist-info}/RECORD +16 -16
- {optimum_rbln-0.8.2a2.dist-info → optimum_rbln-0.8.2a4.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.8.2a2.dist-info → optimum_rbln-0.8.2a4.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/__version__.py
CHANGED
|
@@ -17,5 +17,5 @@ __version__: str
|
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
|
18
18
|
version_tuple: VERSION_TUPLE
|
|
19
19
|
|
|
20
|
-
__version__ = version = '0.8.
|
|
21
|
-
__version_tuple__ = version_tuple = (0, 8, 2, '
|
|
20
|
+
__version__ = version = '0.8.2a4'
|
|
21
|
+
__version_tuple__ = version_tuple = (0, 8, 2, 'a4')
|
|
@@ -70,8 +70,6 @@ class RBLNDiffusionMixin:
|
|
|
70
70
|
_submodules = []
|
|
71
71
|
_optional_submodules = []
|
|
72
72
|
_prefix = {}
|
|
73
|
-
_rbln_config_class = None
|
|
74
|
-
_hf_class = None
|
|
75
73
|
|
|
76
74
|
@staticmethod
|
|
77
75
|
def _maybe_apply_and_fuse_lora(
|
|
@@ -114,14 +112,14 @@ class RBLNDiffusionMixin:
|
|
|
114
112
|
@classmethod
|
|
115
113
|
def get_rbln_config_class(cls) -> Type[RBLNModelConfig]:
|
|
116
114
|
# Lazily loads and caches the corresponding RBLN model config class.
|
|
117
|
-
if cls._rbln_config_class is None:
|
|
115
|
+
if "_rbln_config_class" not in cls.__dict__ or cls._rbln_config_class is None:
|
|
118
116
|
rbln_config_class_name = cls.__name__ + "Config"
|
|
119
117
|
cls._rbln_config_class = get_rbln_config_class(rbln_config_class_name)
|
|
120
118
|
return cls._rbln_config_class
|
|
121
119
|
|
|
122
120
|
@classmethod
|
|
123
121
|
def get_hf_class(cls):
|
|
124
|
-
if cls._hf_class is None:
|
|
122
|
+
if "_hf_class" not in cls.__dict__ or cls._hf_class is None:
|
|
125
123
|
hf_cls_name = cls.__name__[4:]
|
|
126
124
|
library = importlib.import_module("diffusers")
|
|
127
125
|
cls._hf_class = getattr(library, hf_cls_name, None)
|
optimum/rbln/modeling.py
CHANGED
|
@@ -35,8 +35,6 @@ logger = get_logger(__name__)
|
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
class RBLNModel(RBLNBaseModel):
|
|
38
|
-
_output_class = None
|
|
39
|
-
|
|
40
38
|
@classmethod
|
|
41
39
|
def update_kwargs(cls, kwargs):
|
|
42
40
|
# Update user-given kwargs to get proper pytorch model.
|
|
@@ -289,7 +287,7 @@ class RBLNModel(RBLNBaseModel):
|
|
|
289
287
|
@classmethod
|
|
290
288
|
def get_hf_output_class(cls):
|
|
291
289
|
# Dynamically gets the output class from the corresponding HuggingFace model class.
|
|
292
|
-
if cls._output_class:
|
|
290
|
+
if "_output_class" in cls.__dict__ and cls._output_class is not None:
|
|
293
291
|
return cls._output_class
|
|
294
292
|
|
|
295
293
|
hf_class = cls.get_hf_class()
|
optimum/rbln/modeling_base.py
CHANGED
|
@@ -23,9 +23,10 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union
|
|
|
23
23
|
import rebel
|
|
24
24
|
import torch
|
|
25
25
|
from transformers import AutoConfig, AutoModel, GenerationConfig, PretrainedConfig
|
|
26
|
+
from transformers.utils.hub import PushToHubMixin
|
|
26
27
|
|
|
27
28
|
from .configuration_utils import RBLNAutoConfig, RBLNCompileConfig, RBLNModelConfig, get_rbln_config_class
|
|
28
|
-
from .utils.hub import
|
|
29
|
+
from .utils.hub import pull_compiled_model_from_hub, validate_files
|
|
29
30
|
from .utils.logging import get_logger
|
|
30
31
|
from .utils.runtime_utils import UnavailableRuntime, tp_and_devices_are_ok
|
|
31
32
|
from .utils.save_utils import maybe_load_preprocessors
|
|
@@ -50,11 +51,8 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
50
51
|
model_type = "rbln_model"
|
|
51
52
|
auto_model_class = AutoModel
|
|
52
53
|
config_class = AutoConfig
|
|
53
|
-
|
|
54
54
|
config_name = "config.json"
|
|
55
55
|
hf_library_name = "transformers"
|
|
56
|
-
_hf_class = None
|
|
57
|
-
_rbln_config_class = None
|
|
58
56
|
|
|
59
57
|
def __init__(
|
|
60
58
|
self,
|
|
@@ -115,7 +113,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
115
113
|
def _load_compiled_model_dir(
|
|
116
114
|
cls,
|
|
117
115
|
model_id: Union[str, Path],
|
|
118
|
-
|
|
116
|
+
token: Optional[Union[bool, str]] = None,
|
|
119
117
|
revision: Optional[str] = None,
|
|
120
118
|
force_download: bool = False,
|
|
121
119
|
cache_dir: Optional[str] = None,
|
|
@@ -134,7 +132,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
134
132
|
model_path = pull_compiled_model_from_hub(
|
|
135
133
|
model_id=model_id,
|
|
136
134
|
subfolder=subfolder,
|
|
137
|
-
|
|
135
|
+
token=token,
|
|
138
136
|
revision=revision,
|
|
139
137
|
cache_dir=cache_dir,
|
|
140
138
|
force_download=force_download,
|
|
@@ -172,7 +170,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
172
170
|
cls,
|
|
173
171
|
model_id: Union[str, Path],
|
|
174
172
|
config: Optional["PretrainedConfig"] = None,
|
|
175
|
-
|
|
173
|
+
token: Optional[Union[bool, str]] = None,
|
|
176
174
|
revision: Optional[str] = None,
|
|
177
175
|
force_download: bool = False,
|
|
178
176
|
cache_dir: Optional[str] = None,
|
|
@@ -189,7 +187,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
189
187
|
if rbln_compiled_models is None:
|
|
190
188
|
model_path_subfolder = cls._load_compiled_model_dir(
|
|
191
189
|
model_id=model_id,
|
|
192
|
-
|
|
190
|
+
token=token,
|
|
193
191
|
revision=revision,
|
|
194
192
|
force_download=force_download,
|
|
195
193
|
cache_dir=cache_dir,
|
|
@@ -232,7 +230,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
232
230
|
cache_dir=cache_dir,
|
|
233
231
|
force_download=force_download,
|
|
234
232
|
revision=revision,
|
|
235
|
-
token=
|
|
233
|
+
token=token,
|
|
236
234
|
trust_remote_code=trust_remote_code,
|
|
237
235
|
)
|
|
238
236
|
elif cls.hf_library_name == "diffusers":
|
|
@@ -250,7 +248,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
250
248
|
force_download=force_download,
|
|
251
249
|
local_files_only=local_files_only,
|
|
252
250
|
revision=revision,
|
|
253
|
-
token=
|
|
251
|
+
token=token,
|
|
254
252
|
subfolder=subfolder,
|
|
255
253
|
)
|
|
256
254
|
config = PretrainedConfig(**config)
|
|
@@ -421,7 +419,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
421
419
|
|
|
422
420
|
# Returns:
|
|
423
421
|
# type: The original HuggingFace model class
|
|
424
|
-
if cls._hf_class is None:
|
|
422
|
+
if "_hf_class" not in cls.__dict__ or cls._hf_class is None:
|
|
425
423
|
hf_cls_name = cls.__name__[4:]
|
|
426
424
|
library = importlib.import_module(cls.hf_library_name)
|
|
427
425
|
cls._hf_class = getattr(library, hf_cls_name, None)
|
|
@@ -430,7 +428,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
430
428
|
@classmethod
|
|
431
429
|
def get_rbln_config_class(cls) -> Type[RBLNModelConfig]:
|
|
432
430
|
# Lazily loads and caches the corresponding RBLN model config class.
|
|
433
|
-
if cls._rbln_config_class is None:
|
|
431
|
+
if "_rbln_config_class" not in cls.__dict__ or cls._rbln_config_class is None:
|
|
434
432
|
rbln_config_class_name = cls.__name__ + "Config"
|
|
435
433
|
cls._rbln_config_class = get_rbln_config_class(rbln_config_class_name)
|
|
436
434
|
return cls._rbln_config_class
|
|
@@ -507,6 +505,9 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
507
505
|
f"Please ensure the model directory exists and you have the necessary permissions to access it."
|
|
508
506
|
)
|
|
509
507
|
|
|
508
|
+
if isinstance(self.config, PretrainedConfig):
|
|
509
|
+
self.config.save_pretrained(real_save_dir)
|
|
510
|
+
|
|
510
511
|
if save_directory_path == real_save_dir:
|
|
511
512
|
raise FileExistsError(
|
|
512
513
|
f"Cannot save model to '{save_directory}'. This directory already exists and contains the model files."
|
|
@@ -534,7 +535,10 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
534
535
|
raise e # Re-raise the exception after cleanup
|
|
535
536
|
|
|
536
537
|
if push_to_hub:
|
|
537
|
-
|
|
538
|
+
repo_id = kwargs.pop("repo_id", None)
|
|
539
|
+
if repo_id is None:
|
|
540
|
+
raise ValueError("`repo_id` must be provided to push the model to the HuggingFace model hub.")
|
|
541
|
+
return super().push_to_hub(repo_id=repo_id, **kwargs)
|
|
538
542
|
|
|
539
543
|
@staticmethod
|
|
540
544
|
def _raise_missing_compiled_file_error(missing_files: List[str]):
|
|
@@ -56,10 +56,7 @@ class BartDecoderWrapper(Seq2SeqDecoderWrapper):
|
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
class BartForConditionalGeneration(Seq2SeqForConditionalGeneration):
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def __post_init__(self):
|
|
62
|
-
self.scaling = self.config.d_model**-0.5
|
|
59
|
+
pass
|
|
63
60
|
|
|
64
61
|
|
|
65
62
|
class BartDecoder(Seq2SeqDecoder):
|
|
@@ -43,7 +43,15 @@ class RBLNCLIPTextModelWithProjectionConfig(RBLNCLIPTextModelConfig):
|
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
class RBLNCLIPVisionModelConfig(RBLNModelConfig):
|
|
46
|
-
def __init__(
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
batch_size: Optional[int] = None,
|
|
49
|
+
image_size: Optional[int] = None,
|
|
50
|
+
interpolate_pos_encoding: Optional[bool] = None,
|
|
51
|
+
output_hidden_states: Optional[bool] = None,
|
|
52
|
+
output_attentions: Optional[bool] = None,
|
|
53
|
+
**kwargs: Dict[str, Any],
|
|
54
|
+
):
|
|
47
55
|
"""
|
|
48
56
|
Args:
|
|
49
57
|
batch_size (Optional[int]): The batch size for image processing. Defaults to 1.
|
|
@@ -60,6 +68,9 @@ class RBLNCLIPVisionModelConfig(RBLNModelConfig):
|
|
|
60
68
|
raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
|
|
61
69
|
|
|
62
70
|
self.image_size = image_size
|
|
71
|
+
self.interpolate_pos_encoding = interpolate_pos_encoding or False
|
|
72
|
+
self.output_hidden_states = output_hidden_states
|
|
73
|
+
self.output_attentions = output_attentions
|
|
63
74
|
|
|
64
75
|
@property
|
|
65
76
|
def image_width(self):
|
|
@@ -16,6 +16,7 @@ from typing import TYPE_CHECKING, Optional, Tuple, Union
|
|
|
16
16
|
|
|
17
17
|
import torch
|
|
18
18
|
from transformers import CLIPTextConfig, CLIPTextModel, CLIPVisionConfig, CLIPVisionModel
|
|
19
|
+
from transformers.modeling_outputs import BaseModelOutputWithPooling
|
|
19
20
|
from transformers.models.clip.modeling_clip import CLIPTextModelOutput, CLIPVisionModelOutput
|
|
20
21
|
|
|
21
22
|
from ....configuration_utils import RBLNCompileConfig
|
|
@@ -111,12 +112,27 @@ class RBLNCLIPTextModelWithProjection(RBLNCLIPTextModel):
|
|
|
111
112
|
|
|
112
113
|
|
|
113
114
|
class _VisionEncoder(torch.nn.Module):
|
|
114
|
-
def __init__(
|
|
115
|
+
def __init__(
|
|
116
|
+
self,
|
|
117
|
+
enc: CLIPVisionModel,
|
|
118
|
+
interpolate_pos_encoding: bool,
|
|
119
|
+
output_hidden_states: bool,
|
|
120
|
+
output_attentions: bool,
|
|
121
|
+
):
|
|
115
122
|
super().__init__()
|
|
116
123
|
self.enc = enc
|
|
124
|
+
self.interpolate_pos_encoding = interpolate_pos_encoding
|
|
125
|
+
self.output_hidden_states = output_hidden_states
|
|
126
|
+
self.output_attentions = output_attentions
|
|
117
127
|
|
|
118
128
|
def forward(self, inp):
|
|
119
|
-
enc_out = self.enc(
|
|
129
|
+
enc_out = self.enc(
|
|
130
|
+
inp,
|
|
131
|
+
output_hidden_states=self.output_hidden_states,
|
|
132
|
+
interpolate_pos_encoding=self.interpolate_pos_encoding,
|
|
133
|
+
output_attentions=self.output_attentions,
|
|
134
|
+
return_dict=False,
|
|
135
|
+
)
|
|
120
136
|
return enc_out
|
|
121
137
|
|
|
122
138
|
|
|
@@ -130,7 +146,12 @@ class RBLNCLIPVisionModel(RBLNModel):
|
|
|
130
146
|
|
|
131
147
|
@classmethod
|
|
132
148
|
def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPVisionModelConfig) -> torch.nn.Module:
|
|
133
|
-
|
|
149
|
+
wrapper_cfg = {
|
|
150
|
+
"interpolate_pos_encoding": rbln_config.interpolate_pos_encoding,
|
|
151
|
+
"output_hidden_states": rbln_config.output_hidden_states,
|
|
152
|
+
"output_attentions": rbln_config.output_attentions,
|
|
153
|
+
}
|
|
154
|
+
return _VisionEncoder(model, **wrapper_cfg).eval()
|
|
134
155
|
|
|
135
156
|
@classmethod
|
|
136
157
|
def update_rbln_config_using_pipe(
|
|
@@ -155,6 +176,12 @@ class RBLNCLIPVisionModel(RBLNModel):
|
|
|
155
176
|
if rbln_config.image_size is None:
|
|
156
177
|
raise ValueError("`rbln_image_size` should be specified!")
|
|
157
178
|
|
|
179
|
+
if rbln_config.output_attentions is None:
|
|
180
|
+
rbln_config.output_attentions = getattr(model_config, "output_attentions", False)
|
|
181
|
+
|
|
182
|
+
if rbln_config.output_hidden_states is None:
|
|
183
|
+
rbln_config.output_hidden_states = getattr(model_config, "output_hidden_states", False)
|
|
184
|
+
|
|
158
185
|
rbln_compile_config = RBLNCompileConfig(
|
|
159
186
|
input_info=[
|
|
160
187
|
(
|
|
@@ -176,27 +203,76 @@ class RBLNCLIPVisionModel(RBLNModel):
|
|
|
176
203
|
def forward(
|
|
177
204
|
self,
|
|
178
205
|
pixel_values: Optional[torch.FloatTensor] = None,
|
|
179
|
-
return_dict: bool =
|
|
206
|
+
return_dict: bool = True,
|
|
207
|
+
output_attentions: bool = None,
|
|
208
|
+
output_hidden_states: bool = None,
|
|
209
|
+
interpolate_pos_encoding: bool = False,
|
|
180
210
|
**kwargs,
|
|
181
|
-
) -> Union[Tuple,
|
|
211
|
+
) -> Union[Tuple, BaseModelOutputWithPooling]:
|
|
182
212
|
if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
|
|
183
213
|
logger.warning(
|
|
184
214
|
f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__.__name__}."
|
|
185
215
|
)
|
|
216
|
+
|
|
217
|
+
output_attentions = output_attentions if output_attentions is not None else self.rbln_config.output_attentions
|
|
218
|
+
output_hidden_states = (
|
|
219
|
+
output_hidden_states if output_hidden_states is not None else self.rbln_config.output_hidden_states
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
if output_attentions != self.rbln_config.output_attentions:
|
|
223
|
+
raise ValueError(
|
|
224
|
+
f"Variable output_attentions {output_attentions} is not equal to rbln_config.output_attentions {self.rbln_config.output_attentions} "
|
|
225
|
+
f"Please compile again with the correct argument."
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
if output_hidden_states != self.rbln_config.output_hidden_states:
|
|
229
|
+
raise ValueError(
|
|
230
|
+
f"Variable output_hidden_states {output_hidden_states} is not equal to rbln_config.output_hidden_states {self.rbln_config.output_hidden_states} "
|
|
231
|
+
f"Please compile again with the correct argument."
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
if interpolate_pos_encoding != self.rbln_config.interpolate_pos_encoding:
|
|
235
|
+
raise ValueError(
|
|
236
|
+
f"Variable interpolate_pos_encoding {interpolate_pos_encoding} is not equal to rbln_config.interpolate_pos_encoding {self.rbln_config.interpolate_pos_encoding} "
|
|
237
|
+
f"Please compile again with the correct argument."
|
|
238
|
+
)
|
|
239
|
+
|
|
186
240
|
output = super().forward(pixel_values, return_dict=return_dict)
|
|
187
241
|
return output
|
|
188
242
|
|
|
189
243
|
def _prepare_output(self, output, return_dict):
|
|
190
244
|
# Prepare model output based on return_dict flag.
|
|
191
245
|
# This method can be overridden by subclasses to provide task-specific output handling.
|
|
246
|
+
last_hidden_state = output.pop(0)
|
|
247
|
+
pooler_output = output.pop(0)
|
|
248
|
+
vision_config = self.config.vision_config if hasattr(self.config, "vision_config") else self.config
|
|
249
|
+
|
|
250
|
+
if self.rbln_config.output_hidden_states:
|
|
251
|
+
hidden_states = ()
|
|
252
|
+
num_hidden_layers = vision_config.num_hidden_layers
|
|
253
|
+
for _ in range(num_hidden_layers + 1):
|
|
254
|
+
hidden_states += (output.pop(0),)
|
|
255
|
+
else:
|
|
256
|
+
hidden_states = None
|
|
257
|
+
|
|
258
|
+
if self.rbln_config.output_attentions:
|
|
259
|
+
attentions = ()
|
|
260
|
+
num_hidden_layers = vision_config.num_hidden_layers
|
|
261
|
+
for _ in range(num_hidden_layers):
|
|
262
|
+
attentions += (output.pop(0),)
|
|
263
|
+
else:
|
|
264
|
+
attentions = None
|
|
192
265
|
|
|
193
266
|
if not return_dict:
|
|
194
|
-
return (
|
|
267
|
+
return tuple(
|
|
268
|
+
item for item in (last_hidden_state, pooler_output, hidden_states, attentions) if item is not None
|
|
269
|
+
)
|
|
195
270
|
else:
|
|
196
|
-
return
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
hidden_states=
|
|
271
|
+
return BaseModelOutputWithPooling(
|
|
272
|
+
last_hidden_state=last_hidden_state,
|
|
273
|
+
pooler_output=pooler_output,
|
|
274
|
+
hidden_states=hidden_states,
|
|
275
|
+
attentions=attentions,
|
|
200
276
|
)
|
|
201
277
|
|
|
202
278
|
|
|
@@ -208,21 +284,40 @@ class RBLNCLIPVisionModelWithProjection(RBLNCLIPVisionModel):
|
|
|
208
284
|
multimodal embedding alignment tasks.
|
|
209
285
|
"""
|
|
210
286
|
|
|
211
|
-
def
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
287
|
+
def _prepare_output(self, output, return_dict):
|
|
288
|
+
# Prepare model output based on return_dict flag.
|
|
289
|
+
# This method can be overridden by subclasses to provide task-specific output handling.
|
|
290
|
+
|
|
291
|
+
image_embeds = output.pop(0) if isinstance(output, (tuple, list)) else output
|
|
292
|
+
last_hidden_state = output.pop(0)
|
|
293
|
+
|
|
294
|
+
vision_config = self.config.vision_config if hasattr(self.config, "vision_config") else self.config
|
|
295
|
+
|
|
296
|
+
if self.rbln_config.output_hidden_states:
|
|
297
|
+
hidden_states = ()
|
|
298
|
+
num_hidden_layers = vision_config.num_hidden_layers
|
|
299
|
+
for _ in range(num_hidden_layers + 1):
|
|
300
|
+
hidden_states += (output.pop(0),)
|
|
301
|
+
else:
|
|
302
|
+
hidden_states = None
|
|
303
|
+
|
|
304
|
+
if self.rbln_config.output_attentions:
|
|
305
|
+
attentions = ()
|
|
306
|
+
num_hidden_layers = vision_config.num_hidden_layers
|
|
307
|
+
for _ in range(num_hidden_layers):
|
|
308
|
+
attentions += (output.pop(0),)
|
|
309
|
+
else:
|
|
310
|
+
attentions = None
|
|
311
|
+
|
|
312
|
+
if not return_dict:
|
|
313
|
+
return tuple(
|
|
314
|
+
item for item in (image_embeds, last_hidden_state, hidden_states, attentions) if item is not None
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
else:
|
|
318
|
+
return CLIPVisionModelOutput(
|
|
319
|
+
image_embeds=image_embeds,
|
|
320
|
+
last_hidden_state=last_hidden_state,
|
|
321
|
+
hidden_states=hidden_states,
|
|
322
|
+
attentions=attentions,
|
|
323
|
+
)
|
|
@@ -303,6 +303,8 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
|
|
|
303
303
|
position_embed = (
|
|
304
304
|
position_embed[:, :, :, attention_mask.bool(), :] if attention_mask is not None else position_embed
|
|
305
305
|
)
|
|
306
|
+
if token_type_ids is not None:
|
|
307
|
+
token_type_ids = token_type_ids[:, attention_mask.bool()] if attention_mask is not None else token_type_ids
|
|
306
308
|
|
|
307
309
|
query_length = inputs.shape[1]
|
|
308
310
|
if query_length > self.rbln_config.max_seq_len:
|
|
@@ -352,8 +354,11 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
|
|
|
352
354
|
if position_embed is not None:
|
|
353
355
|
position_embed = torch.nn.functional.pad(position_embed, (0, 0, 0, padding_size))
|
|
354
356
|
|
|
357
|
+
if token_type_ids is not None:
|
|
358
|
+
token_type_ids = torch.nn.functional.pad(token_type_ids, (0, padding_size), value=-1)
|
|
359
|
+
|
|
355
360
|
# Overwrite position_ids and padded_cache_lengths
|
|
356
|
-
position_ids =
|
|
361
|
+
position_ids = cache_position.clone()
|
|
357
362
|
padded_cache_lengths = 0
|
|
358
363
|
|
|
359
364
|
return (
|
|
@@ -365,6 +370,7 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
|
|
|
365
370
|
position_embed,
|
|
366
371
|
padded_cache_lengths,
|
|
367
372
|
query_length,
|
|
373
|
+
token_type_ids,
|
|
368
374
|
)
|
|
369
375
|
|
|
370
376
|
def prefill_forward(
|
|
@@ -393,6 +399,7 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
|
|
|
393
399
|
position_embed,
|
|
394
400
|
padded_cache_lengths,
|
|
395
401
|
query_length,
|
|
402
|
+
token_type_ids,
|
|
396
403
|
) = self._prepare_prefill_inputs(
|
|
397
404
|
inputs, cache_position, attention_mask, position_embed, token_type_ids=token_type_ids
|
|
398
405
|
)
|
|
@@ -23,22 +23,21 @@ from ..siglip.configuration_siglip import RBLNSiglipVisionModelConfig
|
|
|
23
23
|
class RBLNGemma3ForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
|
|
24
24
|
def __init__(
|
|
25
25
|
self,
|
|
26
|
-
prefill_chunk_size: Optional[int] = None,
|
|
27
26
|
use_position_ids: Optional[bool] = None,
|
|
28
27
|
use_attention_mask: Optional[bool] = None,
|
|
28
|
+
image_prefill_chunk_size: Optional[int] = None,
|
|
29
29
|
**kwargs: Dict[str, Any],
|
|
30
30
|
):
|
|
31
31
|
# use_attention_mask and use_position_ids are always True for Gemma3
|
|
32
32
|
use_attention_mask = use_attention_mask or True
|
|
33
33
|
use_position_ids = use_position_ids or True
|
|
34
|
-
prefill_chunk_size = prefill_chunk_size or 256
|
|
35
34
|
|
|
36
35
|
super().__init__(
|
|
37
|
-
prefill_chunk_size=prefill_chunk_size,
|
|
38
36
|
use_attention_mask=use_attention_mask,
|
|
39
37
|
use_position_ids=use_position_ids,
|
|
40
38
|
**kwargs,
|
|
41
39
|
)
|
|
40
|
+
self.image_prefill_chunk_size = image_prefill_chunk_size
|
|
42
41
|
|
|
43
42
|
npu = self.npu or rebel.get_npu_name()
|
|
44
43
|
if npu == "RBLN-CA02":
|
|
@@ -31,15 +31,11 @@ from transformers.models.gemma3.modeling_gemma3 import Gemma3TextScaledWordEmbed
|
|
|
31
31
|
|
|
32
32
|
from ....configuration_utils import RBLNCompileConfig, RBLNModelConfig
|
|
33
33
|
from ....modeling import RBLNModel
|
|
34
|
-
from ....utils.logging import get_logger
|
|
35
34
|
from ..decoderonly.modeling_decoderonly import RBLNDecoderOnlyModelForCausalLM, RBLNDecoderOnlyOutput, RBLNRuntimeModel
|
|
36
35
|
from .configuration_gemma3 import RBLNGemma3ForCausalLMConfig
|
|
37
36
|
from .gemma3_architecture import Gemma3ForCausalLMWrapper
|
|
38
37
|
|
|
39
38
|
|
|
40
|
-
logger = get_logger()
|
|
41
|
-
|
|
42
|
-
|
|
43
39
|
if TYPE_CHECKING:
|
|
44
40
|
from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, Gemma3ForConditionalGeneration
|
|
45
41
|
|
|
@@ -320,194 +316,28 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
|
|
|
320
316
|
self.prefill = self.runtime if self.phase == "prefill" else None # FIXME
|
|
321
317
|
self.decode = self.runtime if self.phase == "decode" else None
|
|
322
318
|
|
|
323
|
-
def
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
inputs: (1, seq_len, hidden_size) tensor.
|
|
336
|
-
attention_mask: (1, seq_len) tensor, 1 for valid, 0 for masked.
|
|
337
|
-
position_ids: (1, seq_len) tensor for RoPE.
|
|
338
|
-
token_type_ids: (1, seq_len) tensor, 0 for text, 1 for image.
|
|
339
|
-
|
|
340
|
-
Returns:
|
|
341
|
-
(inputs_padded, attention_mask_padded, position_ids_padded, padded_len, token_type_ids_padded).
|
|
342
|
-
"""
|
|
343
|
-
|
|
344
|
-
if token_type_ids is None:
|
|
345
|
-
return inputs, attention_mask, position_ids, 0, torch.zeros(inputs.shape[:2], dtype=torch.long)
|
|
346
|
-
|
|
347
|
-
seq_len = inputs.shape[1]
|
|
348
|
-
|
|
349
|
-
# Find image start positions
|
|
350
|
-
image_starts = [
|
|
351
|
-
s
|
|
352
|
-
for s in range(seq_len - self.rbln_config.prefill_chunk_size + 1)
|
|
353
|
-
if torch.all(token_type_ids[:, s : s + self.rbln_config.prefill_chunk_size] == 1)
|
|
354
|
-
]
|
|
355
|
-
|
|
356
|
-
# Initialize padded tensors
|
|
357
|
-
padded_input_len = seq_len
|
|
358
|
-
for image_start in image_starts:
|
|
359
|
-
pad_needed = (
|
|
360
|
-
self.rbln_config.prefill_chunk_size
|
|
361
|
-
- (image_start + padded_input_len - seq_len) % self.rbln_config.prefill_chunk_size
|
|
362
|
-
) % self.rbln_config.prefill_chunk_size
|
|
363
|
-
padded_input_len += pad_needed
|
|
364
|
-
total_padding = padded_input_len - seq_len
|
|
365
|
-
|
|
366
|
-
if inputs.dim() == 3:
|
|
367
|
-
inputs_padded = torch.zeros(1, padded_input_len, inputs.shape[2], dtype=inputs.dtype)
|
|
368
|
-
else:
|
|
369
|
-
inputs_padded = torch.zeros(1, padded_input_len, dtype=inputs.dtype)
|
|
370
|
-
attention_mask_padded = torch.zeros(1, padded_input_len, dtype=attention_mask.dtype)
|
|
371
|
-
position_ids_padded = torch.zeros(1, padded_input_len, dtype=position_ids.dtype)
|
|
372
|
-
token_type_ids_padded = torch.zeros(1, padded_input_len, dtype=token_type_ids.dtype)
|
|
373
|
-
|
|
374
|
-
# Fill padded tensors
|
|
375
|
-
dest_pos = 0
|
|
376
|
-
src_pos = 0
|
|
377
|
-
last_pos_id = -1
|
|
378
|
-
for image_start in image_starts + [seq_len]:
|
|
379
|
-
# Text segment
|
|
380
|
-
if src_pos < image_start:
|
|
381
|
-
length = image_start - src_pos
|
|
382
|
-
inputs_padded[:, dest_pos : dest_pos + length] = inputs[:, src_pos:image_start]
|
|
383
|
-
attention_mask_padded[:, dest_pos : dest_pos + length] = attention_mask[:, src_pos:image_start]
|
|
384
|
-
position_ids_padded[:, dest_pos : dest_pos + length] = position_ids[:, src_pos:image_start]
|
|
385
|
-
token_type_ids_padded[:, dest_pos : dest_pos + length] = token_type_ids[:, src_pos:image_start]
|
|
386
|
-
dest_pos += length
|
|
387
|
-
last_pos_id = position_ids[0, image_start - 1].item()
|
|
388
|
-
src_pos = image_start
|
|
389
|
-
|
|
390
|
-
# Padding
|
|
391
|
-
pad_needed = (
|
|
392
|
-
self.rbln_config.prefill_chunk_size - dest_pos % self.rbln_config.prefill_chunk_size
|
|
393
|
-
) % self.rbln_config.prefill_chunk_size
|
|
394
|
-
if pad_needed and dest_pos < padded_input_len:
|
|
395
|
-
position_ids_padded[:, dest_pos : dest_pos + pad_needed] = torch.arange(
|
|
396
|
-
last_pos_id + 1, last_pos_id + pad_needed + 1, dtype=position_ids.dtype
|
|
397
|
-
).unsqueeze(0)
|
|
398
|
-
dest_pos += pad_needed
|
|
399
|
-
|
|
400
|
-
# Image segment
|
|
401
|
-
if src_pos < seq_len and src_pos == image_start:
|
|
402
|
-
inputs_padded[:, dest_pos : dest_pos + self.rbln_config.prefill_chunk_size] = inputs[
|
|
403
|
-
:, src_pos : src_pos + self.rbln_config.prefill_chunk_size
|
|
404
|
-
]
|
|
405
|
-
attention_mask_padded[:, dest_pos : dest_pos + self.rbln_config.prefill_chunk_size] = attention_mask[
|
|
406
|
-
:, src_pos : src_pos + self.rbln_config.prefill_chunk_size
|
|
407
|
-
]
|
|
408
|
-
position_ids_padded[:, dest_pos : dest_pos + self.rbln_config.prefill_chunk_size] = position_ids[
|
|
409
|
-
:, src_pos : src_pos + self.rbln_config.prefill_chunk_size
|
|
410
|
-
]
|
|
411
|
-
token_type_ids_padded[:, dest_pos : dest_pos + self.rbln_config.prefill_chunk_size] = token_type_ids[
|
|
412
|
-
:, src_pos : src_pos + self.rbln_config.prefill_chunk_size
|
|
413
|
-
]
|
|
414
|
-
dest_pos += self.rbln_config.prefill_chunk_size
|
|
415
|
-
src_pos += self.rbln_config.prefill_chunk_size
|
|
416
|
-
last_pos_id = position_ids[0, image_start + self.rbln_config.prefill_chunk_size - 1].item()
|
|
417
|
-
|
|
418
|
-
return inputs_padded, attention_mask_padded, position_ids_padded, total_padding, token_type_ids_padded
|
|
419
|
-
|
|
420
|
-
def _prepare_prefill_inputs(
|
|
421
|
-
self,
|
|
422
|
-
inputs: torch.Tensor,
|
|
423
|
-
cache_position: torch.Tensor,
|
|
424
|
-
attention_mask: Optional[torch.Tensor] = None,
|
|
425
|
-
position_embed: Optional[torch.Tensor] = None,
|
|
426
|
-
token_type_ids: Optional[torch.Tensor] = None,
|
|
427
|
-
):
|
|
428
|
-
"""
|
|
429
|
-
Prepare inputs for prefill phase.
|
|
430
|
-
"""
|
|
431
|
-
# Handle continuous batching in a compiled graph by extracting valid inputs
|
|
432
|
-
# If an attention mask is provided, select only the valid (non-masked) inputs
|
|
433
|
-
inputs = inputs[:, attention_mask.bool()] if attention_mask is not None else inputs
|
|
434
|
-
token_type_ids = (
|
|
435
|
-
token_type_ids[:, attention_mask.bool()]
|
|
436
|
-
if attention_mask is not None and token_type_ids is not None
|
|
437
|
-
else token_type_ids
|
|
438
|
-
)
|
|
439
|
-
|
|
440
|
-
if position_embed is not None:
|
|
441
|
-
position_embed = (
|
|
442
|
-
position_embed[:, :, :, attention_mask.bool(), :] if attention_mask is not None else position_embed
|
|
443
|
-
)
|
|
444
|
-
|
|
445
|
-
seq_len = inputs.shape[1]
|
|
446
|
-
# Initialize attention mask for chunked processing
|
|
447
|
-
if self.rbln_config.use_attention_mask:
|
|
448
|
-
chunked_attention_mask = (
|
|
449
|
-
torch.ones(1, seq_len, dtype=torch.float32)
|
|
450
|
-
if self.rbln_config.use_position_ids
|
|
451
|
-
else torch.zeros(
|
|
452
|
-
1, 1, self.rbln_config.prefill_chunk_size, self.rbln_config.max_seq_len, dtype=torch.float32
|
|
453
|
-
)
|
|
454
|
-
)
|
|
455
|
-
else:
|
|
456
|
-
chunked_attention_mask = None
|
|
457
|
-
|
|
458
|
-
# Buffer for storing output logits
|
|
459
|
-
out_buffers = [
|
|
460
|
-
torch.empty(
|
|
461
|
-
size=self.output_size,
|
|
462
|
-
dtype=torch.float32,
|
|
463
|
-
device="cpu",
|
|
464
|
-
)
|
|
465
|
-
]
|
|
466
|
-
|
|
467
|
-
inputs, chunked_attention_mask, position_ids, padded_cache_lengths, token_type_ids_padded = (
|
|
468
|
-
self.pad_for_chunked_images(inputs, chunked_attention_mask, cache_position, token_type_ids)
|
|
469
|
-
)
|
|
470
|
-
|
|
471
|
-
query_length = inputs.shape[1]
|
|
472
|
-
if query_length > self.rbln_config.max_seq_len:
|
|
473
|
-
raise ValueError(
|
|
474
|
-
f"Input length ({query_length}) exceeds the maximum allowed sequence length ({self.rbln_config.max_seq_len})."
|
|
475
|
-
)
|
|
476
|
-
|
|
477
|
-
# Align attention_mask to compiled shape
|
|
478
|
-
if self.rbln_config.use_position_ids:
|
|
479
|
-
chunked_attention_mask = torch.nn.functional.pad(
|
|
480
|
-
chunked_attention_mask, (0, self.rbln_config.max_seq_len - query_length)
|
|
481
|
-
)
|
|
482
|
-
|
|
483
|
-
# Pad input and cache_position if the last chunk is smaller than `prefill_chunk_size`
|
|
484
|
-
padding_size = 0
|
|
485
|
-
if query_length % self.rbln_config.prefill_chunk_size != 0:
|
|
486
|
-
padding_size = (self.rbln_config.prefill_chunk_size - query_length) % self.rbln_config.prefill_chunk_size
|
|
487
|
-
# inputs_embeds
|
|
488
|
-
if inputs.dim() == 3:
|
|
489
|
-
inputs = torch.nn.functional.pad(inputs, (0, 0, 0, padding_size))
|
|
490
|
-
# inputs_ids
|
|
491
|
-
else:
|
|
492
|
-
inputs = torch.nn.functional.pad(inputs, (0, padding_size))
|
|
493
|
-
|
|
494
|
-
position_ids = torch.cat(
|
|
495
|
-
[
|
|
496
|
-
position_ids,
|
|
497
|
-
torch.arange(
|
|
498
|
-
query_length,
|
|
499
|
-
query_length + padding_size,
|
|
500
|
-
dtype=torch.int32,
|
|
501
|
-
).unsqueeze(0),
|
|
502
|
-
],
|
|
503
|
-
dim=-1,
|
|
504
|
-
)
|
|
505
|
-
token_type_ids_padded = torch.nn.functional.pad(token_type_ids_padded, (0, padding_size))
|
|
319
|
+
def _prepare_prefill_inputs(self, *args, **kwargs):
|
|
320
|
+
(
|
|
321
|
+
inputs,
|
|
322
|
+
cache_position,
|
|
323
|
+
chunked_attention_mask,
|
|
324
|
+
out_buffers,
|
|
325
|
+
position_ids,
|
|
326
|
+
position_embed,
|
|
327
|
+
padded_cache_lengths,
|
|
328
|
+
query_length,
|
|
329
|
+
token_type_ids,
|
|
330
|
+
) = super()._prepare_prefill_inputs(*args, **kwargs)
|
|
506
331
|
|
|
507
|
-
|
|
508
|
-
|
|
332
|
+
# chunked_attention_mask shape
|
|
333
|
+
chunked_attention_mask = torch.zeros(1, chunked_attention_mask.shape[-1], dtype=torch.float32)
|
|
509
334
|
|
|
510
|
-
|
|
335
|
+
# as gemma3 has different prefill chunk size for image and text, we need to pad the inputs to the max of the two.
|
|
336
|
+
padding_size = max(self.rbln_config.prefill_chunk_size, self.rbln_config.image_prefill_chunk_size)
|
|
337
|
+
inputs = torch.nn.functional.pad(inputs, (0, 0, 0, padding_size))
|
|
338
|
+
cache_position = torch.nn.functional.pad(cache_position, (0, padding_size))
|
|
339
|
+
position_ids = torch.nn.functional.pad(position_ids, (0, padding_size))
|
|
340
|
+
token_type_ids = torch.nn.functional.pad(token_type_ids, (0, padding_size), value=-1)
|
|
511
341
|
|
|
512
342
|
return (
|
|
513
343
|
inputs,
|
|
@@ -518,7 +348,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
|
|
|
518
348
|
position_embed,
|
|
519
349
|
padded_cache_lengths,
|
|
520
350
|
query_length,
|
|
521
|
-
|
|
351
|
+
token_type_ids,
|
|
522
352
|
)
|
|
523
353
|
|
|
524
354
|
def prefill_forward(
|
|
@@ -541,65 +371,73 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
|
|
|
541
371
|
(
|
|
542
372
|
inputs,
|
|
543
373
|
cache_position,
|
|
544
|
-
|
|
374
|
+
chunked_attention_mask,
|
|
545
375
|
out_buffers,
|
|
546
376
|
position_ids,
|
|
547
377
|
position_embed,
|
|
548
378
|
padded_cache_lengths,
|
|
549
379
|
query_length,
|
|
550
|
-
|
|
380
|
+
token_type_ids,
|
|
551
381
|
) = self._prepare_prefill_inputs(
|
|
552
382
|
inputs, cache_position, attention_mask, position_embed, token_type_ids=token_type_ids
|
|
553
383
|
)
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
# Extract the current chunk of inputs and cache positions
|
|
564
|
-
input_chunk = inputs[:, step : step + self.rbln_config.prefill_chunk_size]
|
|
565
|
-
cache_pos_chunk = cache_position[:, step : step + self.rbln_config.prefill_chunk_size]
|
|
566
|
-
position_ids_chunk = (
|
|
567
|
-
position_ids[:, step : step + self.rbln_config.prefill_chunk_size]
|
|
568
|
-
if position_ids is not None
|
|
569
|
-
else None
|
|
384
|
+
|
|
385
|
+
step = 0
|
|
386
|
+
while step < query_length:
|
|
387
|
+
# Check if the prefill chunk is an image prefill
|
|
388
|
+
is_image_prefill = torch.all(
|
|
389
|
+
token_type_ids[:, step : step + self.rbln_config.image_prefill_chunk_size] == 1
|
|
390
|
+
)
|
|
391
|
+
prefill_chunk_size = (
|
|
392
|
+
self.rbln_config.image_prefill_chunk_size if is_image_prefill else self.rbln_config.prefill_chunk_size
|
|
570
393
|
)
|
|
571
394
|
|
|
572
|
-
if
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
padded_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size]
|
|
576
|
-
)
|
|
577
|
-
|
|
578
|
-
# Define query position
|
|
579
|
-
query_position = (
|
|
580
|
-
torch.sum(
|
|
581
|
-
chunked_attention_mask[0][step : step + self.rbln_config.prefill_chunk_size],
|
|
582
|
-
dim=-1,
|
|
583
|
-
dtype=torch.int16,
|
|
584
|
-
).squeeze(0)
|
|
585
|
-
- 1
|
|
395
|
+
# Check if the prefill chunk is a text prefill which have image_tokens in it.
|
|
396
|
+
is_text_prefill_with_image_tokens = not is_image_prefill and torch.any(
|
|
397
|
+
token_type_ids[:, step : step + prefill_chunk_size] == 1
|
|
586
398
|
)
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
399
|
+
|
|
400
|
+
# Check if the prefill chunk crosses a block boundary, requiring padding to align with block boundaries
|
|
401
|
+
is_cross_block_boundary = (
|
|
402
|
+
step // self.rbln_config.kvcache_block_size
|
|
403
|
+
!= (step + prefill_chunk_size) // self.rbln_config.kvcache_block_size
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# Check if the prefill chunk is the last chunk
|
|
407
|
+
is_last_chunk = step + prefill_chunk_size >= query_length
|
|
408
|
+
|
|
409
|
+
if is_cross_block_boundary:
|
|
410
|
+
padding_size = prefill_chunk_size - (step + prefill_chunk_size) % self.rbln_config.kvcache_block_size
|
|
411
|
+
padded_cache_lengths += padding_size
|
|
412
|
+
|
|
413
|
+
# if text_prefill end with image_tokens, we only treat the text part.
|
|
414
|
+
num_processed_tokens = prefill_chunk_size
|
|
415
|
+
if is_text_prefill_with_image_tokens:
|
|
416
|
+
first_image_token_idx = torch.where(token_type_ids[:, step : step + prefill_chunk_size] == 1)[1][0]
|
|
417
|
+
num_processed_tokens = first_image_token_idx
|
|
418
|
+
if is_last_chunk:
|
|
419
|
+
num_processed_tokens = query_length - step
|
|
420
|
+
|
|
421
|
+
input_chunk = inputs[:, step : step + prefill_chunk_size]
|
|
422
|
+
cache_pos_chunk = cache_position[:, step : step + prefill_chunk_size].clone() + padded_cache_lengths
|
|
423
|
+
position_ids_chunk = position_ids[:, step : step + prefill_chunk_size].clone()
|
|
424
|
+
chunked_attention_mask[
|
|
425
|
+
:, step + padded_cache_lengths : step + num_processed_tokens + padded_cache_lengths
|
|
426
|
+
] = 1
|
|
427
|
+
query_position = torch.tensor(num_processed_tokens - 1, dtype=torch.int16)
|
|
428
|
+
|
|
429
|
+
if is_image_prefill:
|
|
430
|
+
logits = self.image_prefill(
|
|
431
|
+
input_chunk,
|
|
432
|
+
cache_pos_chunk,
|
|
433
|
+
block_tables,
|
|
434
|
+
local_block_tables,
|
|
435
|
+
query_position,
|
|
436
|
+
chunked_attention_mask,
|
|
437
|
+
position_ids_chunk,
|
|
438
|
+
out=out_buffers,
|
|
439
|
+
)
|
|
601
440
|
else:
|
|
602
|
-
# Forward pass for the current chunk
|
|
603
441
|
logits = self.prefill(
|
|
604
442
|
input_chunk,
|
|
605
443
|
cache_pos_chunk,
|
|
@@ -611,6 +449,11 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
|
|
|
611
449
|
out=out_buffers,
|
|
612
450
|
)
|
|
613
451
|
|
|
452
|
+
step += num_processed_tokens
|
|
453
|
+
|
|
454
|
+
if not is_external_block_tables:
|
|
455
|
+
self.dec_attn_mask[batch_idx : batch_idx + 1] = chunked_attention_mask
|
|
456
|
+
|
|
614
457
|
return RBLNGemma3ForCausalLMOutput(
|
|
615
458
|
logits=logits, padded_cache_lengths=padded_cache_lengths, attention_mask=chunked_attention_mask
|
|
616
459
|
)
|
|
@@ -757,13 +600,14 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
|
|
757
600
|
|
|
758
601
|
@classmethod
|
|
759
602
|
def _update_submodule_config(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
|
|
760
|
-
if rbln_config.
|
|
761
|
-
rbln_config.
|
|
603
|
+
if rbln_config.image_prefill_chunk_size is None:
|
|
604
|
+
rbln_config.image_prefill_chunk_size = model.config.mm_tokens_per_image
|
|
762
605
|
|
|
763
|
-
if rbln_config.
|
|
764
|
-
|
|
765
|
-
f"
|
|
606
|
+
if rbln_config.image_prefill_chunk_size != model.config.mm_tokens_per_image:
|
|
607
|
+
raise ValueError(
|
|
608
|
+
f"Image prefill chunk size is different from mm_tokens_per_image: {rbln_config.image_prefill_chunk_size} != {model.config.mm_tokens_per_image}"
|
|
766
609
|
)
|
|
610
|
+
|
|
767
611
|
return rbln_config
|
|
768
612
|
|
|
769
613
|
@classmethod
|
|
@@ -777,14 +621,22 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
|
|
777
621
|
# Update rbln_config with super class
|
|
778
622
|
rbln_config = super()._update_rbln_config(preprocessors, model, model_config, rbln_config)
|
|
779
623
|
|
|
780
|
-
|
|
781
|
-
|
|
624
|
+
if not (rbln_config.use_attention_mask and rbln_config.use_position_ids):
|
|
625
|
+
raise ValueError("use_attention_mask and use_position_ids must be True for RBLNGemma3ForCausalLM")
|
|
626
|
+
|
|
627
|
+
# Update image prefill compile config
|
|
628
|
+
img_prefill_input_info = cls.get_input_info(
|
|
629
|
+
batch_size=1,
|
|
630
|
+
query_length=rbln_config.image_prefill_chunk_size,
|
|
631
|
+
rbln_config=rbln_config,
|
|
632
|
+
model_config=model_config,
|
|
633
|
+
)
|
|
782
634
|
image_prefill_compile_config = RBLNCompileConfig(
|
|
783
|
-
compiled_model_name="image_prefill", input_info=
|
|
635
|
+
compiled_model_name="image_prefill", input_info=img_prefill_input_info
|
|
784
636
|
)
|
|
785
637
|
# Insert image_prefill compile config at index 1
|
|
786
|
-
|
|
787
|
-
compile_cfgs.insert(
|
|
638
|
+
compile_cfgs = rbln_config.compile_cfgs
|
|
639
|
+
compile_cfgs.insert(1, image_prefill_compile_config)
|
|
788
640
|
rbln_config.set_compile_cfgs(compile_cfgs)
|
|
789
641
|
|
|
790
642
|
return rbln_config
|
|
@@ -840,11 +692,14 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
|
|
840
692
|
)
|
|
841
693
|
|
|
842
694
|
image_prefill_compile_config = rbln_compile_configs[1]
|
|
695
|
+
image_prefill_example_inputs = image_prefill_compile_config.get_dummy_inputs(
|
|
696
|
+
fill=0, static_tensors=static_tensors
|
|
697
|
+
)
|
|
843
698
|
wrapped_model.phase = "image_prefill"
|
|
844
699
|
compiled_image_prefill = compile_model(
|
|
845
700
|
wrapped_model,
|
|
846
701
|
image_prefill_compile_config,
|
|
847
|
-
|
|
702
|
+
image_prefill_example_inputs,
|
|
848
703
|
context,
|
|
849
704
|
rbln_config.quantization,
|
|
850
705
|
)
|
|
@@ -15,6 +15,11 @@
|
|
|
15
15
|
from typing import Any, Dict, Optional
|
|
16
16
|
|
|
17
17
|
from ....configuration_utils import RBLNModelConfig
|
|
18
|
+
from ....utils.logging import get_logger
|
|
19
|
+
from ...models.clip import RBLNCLIPVisionModelConfig
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__)
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
class RBLNLlavaNextForConditionalGenerationConfig(RBLNModelConfig):
|
|
@@ -50,5 +55,17 @@ class RBLNLlavaNextForConditionalGenerationConfig(RBLNModelConfig):
|
|
|
50
55
|
if not isinstance(self.batch_size, int) or self.batch_size < 0:
|
|
51
56
|
raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
|
|
52
57
|
|
|
53
|
-
self.vision_tower =
|
|
58
|
+
self.vision_tower = self.init_submodule_config(
|
|
59
|
+
RBLNCLIPVisionModelConfig,
|
|
60
|
+
vision_tower,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if self.vision_tower.output_hidden_states is False:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"LlavaNext requires output_hidden_states to be True, but found output_hidden_states={self.vision_tower.output_hidden_states}. "
|
|
66
|
+
f"Please compile again with the correct argument."
|
|
67
|
+
)
|
|
68
|
+
else:
|
|
69
|
+
self.vision_tower.output_hidden_states = True
|
|
70
|
+
|
|
54
71
|
self.language_model = language_model
|
optimum/rbln/utils/hub.py
CHANGED
|
@@ -12,59 +12,23 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import os
|
|
16
15
|
from pathlib import Path
|
|
17
16
|
from typing import List, Optional, Union
|
|
18
17
|
|
|
19
|
-
from huggingface_hub import HfApi,
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class PushToHubMixin:
|
|
23
|
-
def push_to_hub(
|
|
24
|
-
self,
|
|
25
|
-
save_directory: str,
|
|
26
|
-
repository_id: str,
|
|
27
|
-
private: Optional[bool] = None,
|
|
28
|
-
use_auth_token: Union[bool, str] = True,
|
|
29
|
-
) -> str:
|
|
30
|
-
huggingface_token = _get_huggingface_token(use_auth_token)
|
|
31
|
-
api = HfApi()
|
|
32
|
-
|
|
33
|
-
api.create_repo(
|
|
34
|
-
token=huggingface_token,
|
|
35
|
-
repo_id=repository_id,
|
|
36
|
-
exist_ok=True,
|
|
37
|
-
private=private,
|
|
38
|
-
)
|
|
39
|
-
for path, subdirs, files in os.walk(save_directory):
|
|
40
|
-
for name in files:
|
|
41
|
-
local_file_path = os.path.join(path, name)
|
|
42
|
-
_, hub_file_path = os.path.split(local_file_path)
|
|
43
|
-
# FIXME: when huggingface_hub fixes the return of upload_file
|
|
44
|
-
try:
|
|
45
|
-
api.upload_file(
|
|
46
|
-
token=huggingface_token,
|
|
47
|
-
repo_id=f"{repository_id}",
|
|
48
|
-
path_or_fileobj=os.path.join(os.getcwd(), local_file_path),
|
|
49
|
-
path_in_repo=hub_file_path,
|
|
50
|
-
)
|
|
51
|
-
except KeyError:
|
|
52
|
-
pass
|
|
53
|
-
except NameError:
|
|
54
|
-
pass
|
|
18
|
+
from huggingface_hub import HfApi, get_token, hf_hub_download
|
|
55
19
|
|
|
56
20
|
|
|
57
21
|
def pull_compiled_model_from_hub(
|
|
58
22
|
model_id: Union[str, Path],
|
|
59
23
|
subfolder: str,
|
|
60
|
-
|
|
24
|
+
token: Union[bool, str],
|
|
61
25
|
revision: Optional[str],
|
|
62
26
|
cache_dir: Optional[str],
|
|
63
27
|
force_download: bool,
|
|
64
28
|
local_files_only: bool,
|
|
65
29
|
) -> Path:
|
|
66
30
|
"""Pull model files from the HuggingFace Hub."""
|
|
67
|
-
huggingface_token = _get_huggingface_token(
|
|
31
|
+
huggingface_token = _get_huggingface_token(token)
|
|
68
32
|
repo_files = list(
|
|
69
33
|
map(
|
|
70
34
|
Path,
|
|
@@ -87,7 +51,7 @@ def pull_compiled_model_from_hub(
|
|
|
87
51
|
repo_id=model_id,
|
|
88
52
|
filename=filename,
|
|
89
53
|
subfolder=subfolder,
|
|
90
|
-
|
|
54
|
+
token=token,
|
|
91
55
|
revision=revision,
|
|
92
56
|
cache_dir=cache_dir,
|
|
93
57
|
force_download=force_download,
|
|
@@ -113,10 +77,7 @@ def validate_files(
|
|
|
113
77
|
raise FileExistsError(f"Multiple rbln_config.json files found in {location}. This is not expected.")
|
|
114
78
|
|
|
115
79
|
|
|
116
|
-
def _get_huggingface_token(
|
|
117
|
-
if isinstance(
|
|
118
|
-
return
|
|
119
|
-
|
|
120
|
-
return HfFolder.get_token()
|
|
121
|
-
else:
|
|
122
|
-
raise ValueError("`use_auth_token` must be provided to interact with the HuggingFace Hub.")
|
|
80
|
+
def _get_huggingface_token(token: Union[bool, str]) -> str:
|
|
81
|
+
if isinstance(token, str):
|
|
82
|
+
return token
|
|
83
|
+
return get_token()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: optimum-rbln
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.2a4
|
|
4
4
|
Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
|
|
5
5
|
Project-URL: Homepage, https://rebellions.ai
|
|
6
6
|
Project-URL: Documentation, https://docs.rbln.ai
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
optimum/rbln/__init__.py,sha256=Pl_On5dBWYbmJk6776cc0eU49oc0pebdCvgb2l4hQRA,15564
|
|
2
|
-
optimum/rbln/__version__.py,sha256=
|
|
2
|
+
optimum/rbln/__version__.py,sha256=al82X96cfOX7yNTvVBaPgx2coI4eq0-fqG5LYKf53zE,519
|
|
3
3
|
optimum/rbln/configuration_utils.py,sha256=tnR9XZfzZ9oHc7nU_kX33oo3qFFyicICSVQSujYPiOM,32911
|
|
4
|
-
optimum/rbln/modeling.py,sha256=
|
|
5
|
-
optimum/rbln/modeling_base.py,sha256=
|
|
4
|
+
optimum/rbln/modeling.py,sha256=gww-H-q16_mGw2qGnFwOjEj3J9yMjBKnRTKlnCkVlx8,14315
|
|
5
|
+
optimum/rbln/modeling_base.py,sha256=AShxAt3KIOcCqfyF4U83dIrKwoj4p2Kxtc1ns_9-ltU,24154
|
|
6
6
|
optimum/rbln/diffusers/__init__.py,sha256=_3FoA0uxCdFd6YK9PMaptFmR9XvdB_PcvYR1MkbGpN8,6957
|
|
7
|
-
optimum/rbln/diffusers/modeling_diffusers.py,sha256=
|
|
7
|
+
optimum/rbln/diffusers/modeling_diffusers.py,sha256=gnCsDZ6WD6N7yOfcZU8pK5rtwyh4zwSCtmR6Wa4zRG8,19913
|
|
8
8
|
optimum/rbln/diffusers/configurations/__init__.py,sha256=vMRnPY4s-Uju43xP038D2EA18X_mhy2YfsZVpSU-VoA,1322
|
|
9
9
|
optimum/rbln/diffusers/configurations/models/__init__.py,sha256=7q95gtgDzCeIBogGw8SLQoHT4Wch7vpLJVF2UQovuoo,567
|
|
10
10
|
optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py,sha256=ySetuNq6koleFIZ542zZLTzEEyl_CTul9l12ufWlQ_Y,3218
|
|
@@ -83,7 +83,7 @@ optimum/rbln/transformers/models/auto/__init__.py,sha256=34Xghf1ogG4u-jhBMlj134n
|
|
|
83
83
|
optimum/rbln/transformers/models/auto/auto_factory.py,sha256=1CA52xV2dS1Uzumcgqe4zobdpoi-Xt2oNjP3uLFtm08,8020
|
|
84
84
|
optimum/rbln/transformers/models/auto/modeling_auto.py,sha256=Ex2ARRRIt3LtKhazr0UWy67R6WFAwSKRoi5n6B8TvoI,4213
|
|
85
85
|
optimum/rbln/transformers/models/bart/__init__.py,sha256=fVo-gZEmJ0yxkIxEX6ciuRAGgXNyuvaXE2s88bhbjAE,830
|
|
86
|
-
optimum/rbln/transformers/models/bart/bart_architecture.py,sha256=
|
|
86
|
+
optimum/rbln/transformers/models/bart/bart_architecture.py,sha256=BjVXyk7Ron4ga_Ac_cvQiRvrdJ9LULEHJcKK0LfCqas,6277
|
|
87
87
|
optimum/rbln/transformers/models/bart/configuration_bart.py,sha256=L466cRy4egEbu_lTb_v1sK6t3Lbi4Bra6Jjkj3FjdQw,1315
|
|
88
88
|
optimum/rbln/transformers/models/bart/modeling_bart.py,sha256=H4MmQZbofb9kJq5WKqoFVjmj3HVtgns3t2F3QdSU-QQ,2337
|
|
89
89
|
optimum/rbln/transformers/models/bert/__init__.py,sha256=86FuGRBLw315_Roa9D5OUx6Ku2PM0DqSPZ-YSqbF-io,806
|
|
@@ -93,8 +93,8 @@ optimum/rbln/transformers/models/blip_2/__init__.py,sha256=L01gPXcUCa8Vg-bcng20v
|
|
|
93
93
|
optimum/rbln/transformers/models/blip_2/configuration_blip_2.py,sha256=ke75GqPU139dNOY1nm6QE661LepbD_0V9Bx1QbtHhKA,3210
|
|
94
94
|
optimum/rbln/transformers/models/blip_2/modeling_blip_2.py,sha256=2sIVGrIn1B2nUZ8hw1sgW3VbJ2vxrlBRN37GgDiw0GU,16191
|
|
95
95
|
optimum/rbln/transformers/models/clip/__init__.py,sha256=TLeXDqcFK6M6v9x7Xr64kBbqGu3hFHM7p754dQ8UVQc,938
|
|
96
|
-
optimum/rbln/transformers/models/clip/configuration_clip.py,sha256=
|
|
97
|
-
optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=
|
|
96
|
+
optimum/rbln/transformers/models/clip/configuration_clip.py,sha256=HeKqLtTKrpQbx1Gq5GRSSLvjjgXHYD0rWIUhQOZDQCY,3836
|
|
97
|
+
optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=knK7gINAluSHcWvg3zaByb3XRLNmSEGw2NcsOGHnIow,12364
|
|
98
98
|
optimum/rbln/transformers/models/colpali/__init__.py,sha256=n3rueXT_oC0N8myoZiic0YkVK24CW5hZBPa-0L8so6Y,119
|
|
99
99
|
optimum/rbln/transformers/models/colpali/colpali_architecture.py,sha256=bWG7TehWRZkTh2y6mGkpd85_onWAyiyKdaQC9TFsy3E,8065
|
|
100
100
|
optimum/rbln/transformers/models/colpali/configuration_colpali.py,sha256=ieY-tuyDPObFUIJ5sfpcfuCsJ_HTAizN7ZGqirqeFRU,2636
|
|
@@ -102,7 +102,7 @@ optimum/rbln/transformers/models/colpali/modeling_colpali.py,sha256=jzvJCBrrCXSp
|
|
|
102
102
|
optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=vQYZDDdoddwA7yKc5zzrq2Zs9sax-0p8rNF_aYfF4bk,1006
|
|
103
103
|
optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py,sha256=cakn8RGo8gS3nmXdEqOfC2xUBOMGInROgLEbCOoLFR0,13398
|
|
104
104
|
optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=HrI12t9X9wV_-AZuTBSs-W7c5yVUkvd0secWlI72x2A,46325
|
|
105
|
-
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=
|
|
105
|
+
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=lmQtd50LS6xGfLvSCSv1u7gwPk0ggjb3wSdzqIHRvLM,54080
|
|
106
106
|
optimum/rbln/transformers/models/distilbert/__init__.py,sha256=zXL78SOEORTnUN_wrdoaDaYpntG8lcFHvPobM6jC0CI,841
|
|
107
107
|
optimum/rbln/transformers/models/distilbert/configuration_distilbert.py,sha256=O3BW9JjyYk9PLyiofvOKEgTdMZ_jpIuPfot281pSsyg,984
|
|
108
108
|
optimum/rbln/transformers/models/distilbert/modeling_distilbert.py,sha256=LUh6zYGa8AR3Yxaj3gtyJRc-czBN3qnHTc-JTAhuqY0,1099
|
|
@@ -118,9 +118,9 @@ optimum/rbln/transformers/models/gemma/configuration_gemma.py,sha256=3hAxl7LL9vF
|
|
|
118
118
|
optimum/rbln/transformers/models/gemma/gemma_architecture.py,sha256=2Ivay8NTSHmQAqXFh9JvG6Ja5rMThcRAjYPzyipcRI8,956
|
|
119
119
|
optimum/rbln/transformers/models/gemma/modeling_gemma.py,sha256=Ojvum34EhDHWfMB4D6S1BrwoTNwuBSZuBzwdnAgvq38,3095
|
|
120
120
|
optimum/rbln/transformers/models/gemma3/__init__.py,sha256=6rugk3615SEt4lh7gduo_J9VyGiSReuEIvL0Uno0eaI,790
|
|
121
|
-
optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=
|
|
121
|
+
optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=LwzlThcIXlpK1PdvgaIobp3uHQgaDCiKngi1XR9hDvU,3028
|
|
122
122
|
optimum/rbln/transformers/models/gemma3/gemma3_architecture.py,sha256=fpLDAXCe5paWVsfc0tL59JkRQMRF-WNgIzOIb_QpSLU,6191
|
|
123
|
-
optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=
|
|
123
|
+
optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=Hbh_Cfz0j_so8zCLP3ExO2lrCkyJqVnHI2Vqfpglfn4,31895
|
|
124
124
|
optimum/rbln/transformers/models/gpt2/__init__.py,sha256=socBMIBZSiLbrVN12rQ4nL9gFeT0axMgz6SWaCaD4Ac,704
|
|
125
125
|
optimum/rbln/transformers/models/gpt2/configuration_gpt2.py,sha256=9sS6-EGapmow3rG9ViejK9qwrqy_X86VBxQ7u9x0Yqk,923
|
|
126
126
|
optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=O7hBiaFJrpLSswGwW83cX9S9Q2wKRBDrpAqOgOS7zQg,2733
|
|
@@ -133,7 +133,7 @@ optimum/rbln/transformers/models/llama/configuration_llama.py,sha256=X6SXtRXGBC8
|
|
|
133
133
|
optimum/rbln/transformers/models/llama/llama_architecture.py,sha256=S7MCPfyjG5eUqgaS-QNBB0ApUD6wnb5fR0RHq7k7-pA,728
|
|
134
134
|
optimum/rbln/transformers/models/llama/modeling_llama.py,sha256=G91Yd8BfhiD8GT5bF46ZIHuc-ajGC-PO-mOQN3BhE1A,3122
|
|
135
135
|
optimum/rbln/transformers/models/llava_next/__init__.py,sha256=kDXKr7wMkp1XqE__DER2B8kQF_NYMxhzsQS5ytGg56I,752
|
|
136
|
-
optimum/rbln/transformers/models/llava_next/configuration_llava_next.py,sha256=
|
|
136
|
+
optimum/rbln/transformers/models/llava_next/configuration_llava_next.py,sha256=U6_DQoaXugN2Bc4ntUb7WkelbNmw1L4VbgqsMRVuuE4,2776
|
|
137
137
|
optimum/rbln/transformers/models/llava_next/modeling_llava_next.py,sha256=paYtCk58--FSZp8xjVrfZAxkJxO02X-jxaVPqL-l7ZU,27421
|
|
138
138
|
optimum/rbln/transformers/models/midm/__init__.py,sha256=IC3FETwgYinbp3wDj7tp4zIHJhbqM-c6GfTRdYcMNj8,913
|
|
139
139
|
optimum/rbln/transformers/models/midm/configuration_midm.py,sha256=DxhcSJlApxfi00XxYmSkKZ6bY9vfLXT0zh-oMKkZot0,1365
|
|
@@ -202,14 +202,14 @@ optimum/rbln/transformers/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
202
202
|
optimum/rbln/transformers/utils/rbln_quantization.py,sha256=um0N2ZruU_lNibo6rlzgwHAI2_8QOrYE7W7pA1qfXKM,9396
|
|
203
203
|
optimum/rbln/utils/__init__.py,sha256=ieDBT2VFTt2E0M4v_POLBpuGW9LxSydpb_DuPd6PQqc,712
|
|
204
204
|
optimum/rbln/utils/decorator_utils.py,sha256=xu-TrsNi33SRC2a7DBsyoo6-pEQxWKZPZSmM9QlDe2Y,3745
|
|
205
|
-
optimum/rbln/utils/hub.py,sha256=
|
|
205
|
+
optimum/rbln/utils/hub.py,sha256=FPBGslHJAMeyfBID3viLmh51xJzcR29xWtYtMN8y2CI,2765
|
|
206
206
|
optimum/rbln/utils/import_utils.py,sha256=fpOERIIxXm-cDYGn1NN6c7aWDPQYVitPQW2MiyZ9NEY,5471
|
|
207
207
|
optimum/rbln/utils/logging.py,sha256=VKKBmlQSdg6iZCGmAXaWYiW67K84jyp1QJhLQSSjPPE,3453
|
|
208
208
|
optimum/rbln/utils/model_utils.py,sha256=4k5879Kh75m3x_vS4-qOGfqsOiAvc2kdNFFfvsFvz3k,1748
|
|
209
209
|
optimum/rbln/utils/runtime_utils.py,sha256=nIJioiN16nAyAzoArKjsy5ocLUsrr0UEy4f3LNT82SA,7961
|
|
210
210
|
optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
|
|
211
211
|
optimum/rbln/utils/submodule.py,sha256=w5mgPgncI740gVKMu3S-69DGNdUSI0bTZxegQGcZ98Y,5011
|
|
212
|
-
optimum_rbln-0.8.
|
|
213
|
-
optimum_rbln-0.8.
|
|
214
|
-
optimum_rbln-0.8.
|
|
215
|
-
optimum_rbln-0.8.
|
|
212
|
+
optimum_rbln-0.8.2a4.dist-info/METADATA,sha256=4R2Q_7kuyLzvC65FTrT3xcRXzdP-xIAcaNyM9JFMxQg,5299
|
|
213
|
+
optimum_rbln-0.8.2a4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
214
|
+
optimum_rbln-0.8.2a4.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
|
215
|
+
optimum_rbln-0.8.2a4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|