optimum-rbln 0.8.3a2__py3-none-any.whl → 0.8.3a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of optimum-rbln might be problematic. Click here for more details.

optimum/rbln/__init__.py CHANGED
@@ -81,6 +81,8 @@ _import_structure = {
81
81
  "RBLNDistilBertForQuestionAnsweringConfig",
82
82
  "RBLNDPTForDepthEstimation",
83
83
  "RBLNDPTForDepthEstimationConfig",
84
+ "RBLNDepthAnythingForDepthEstimationConfig",
85
+ "RBLNDepthAnythingForDepthEstimation",
84
86
  "RBLNExaoneForCausalLM",
85
87
  "RBLNExaoneForCausalLMConfig",
86
88
  "RBLNGemmaModel",
@@ -147,6 +149,8 @@ _import_structure = {
147
149
  "RBLNRobertaForSequenceClassificationConfig",
148
150
  "RBLNSiglipVisionModel",
149
151
  "RBLNSiglipVisionModelConfig",
152
+ "RBLNSwinBackbone",
153
+ "RBLNSwinBackboneConfig",
150
154
  "RBLNT5EncoderModel",
151
155
  "RBLNT5EncoderModelConfig",
152
156
  "RBLNT5ForConditionalGeneration",
@@ -352,6 +356,8 @@ if TYPE_CHECKING:
352
356
  RBLNDecoderOnlyModelConfig,
353
357
  RBLNDecoderOnlyModelForCausalLM,
354
358
  RBLNDecoderOnlyModelForCausalLMConfig,
359
+ RBLNDepthAnythingForDepthEstimation,
360
+ RBLNDepthAnythingForDepthEstimationConfig,
355
361
  RBLNDistilBertForQuestionAnswering,
356
362
  RBLNDistilBertForQuestionAnsweringConfig,
357
363
  RBLNDPTForDepthEstimation,
@@ -422,6 +428,8 @@ if TYPE_CHECKING:
422
428
  RBLNRobertaForSequenceClassificationConfig,
423
429
  RBLNSiglipVisionModel,
424
430
  RBLNSiglipVisionModelConfig,
431
+ RBLNSwinBackbone,
432
+ RBLNSwinBackboneConfig,
425
433
  RBLNT5EncoderModel,
426
434
  RBLNT5EncoderModelConfig,
427
435
  RBLNT5ForConditionalGeneration,
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.8.3a2'
32
- __version_tuple__ = version_tuple = (0, 8, 3, 'a2')
31
+ __version__ = version = '0.8.3a4'
32
+ __version_tuple__ = version_tuple = (0, 8, 3, 'a4')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -21,6 +21,7 @@ from typing import Any, Dict, List, Optional, Protocol, Tuple, Type, Union, runt
21
21
 
22
22
  import numpy as np
23
23
  import torch
24
+ from packaging.version import Version
24
25
 
25
26
  from .__version__ import __version__
26
27
  from .utils.depreacate_utils import warn_deprecated_npu
@@ -621,6 +622,21 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
621
622
  self.set_compile_cfgs([RBLNCompileConfig(**cfg) for cfg in self._compile_cfgs])
622
623
 
623
624
  if len(kwargs) > 0:
625
+ if optimum_rbln_version is not None: # loaded from file
626
+ if Version(__version__) < Version(optimum_rbln_version):
627
+ diff = "newer"
628
+ elif Version(__version__) > Version(optimum_rbln_version):
629
+ diff = "older"
630
+ else:
631
+ diff = None
632
+ if diff is not None:
633
+ raise ValueError(
634
+ f"Unexpected arguments: {kwargs.keys()}\n"
635
+ f"Maybe you are trying to load a model compiled with {diff} version of optimum-rbln. "
636
+ "It is recommended to use the same version to compile and load the model.\n"
637
+ f"Current version: {__version__}, Loaded version: {optimum_rbln_version}"
638
+ )
639
+
624
640
  raise ValueError(f"Unexpected arguments: {kwargs.keys()}")
625
641
 
626
642
  @property
@@ -48,7 +48,7 @@ class RBLNAutoPipelineBase:
48
48
  rbln_class_name = convert_hf_to_rbln_model_name(hf_model_class.__name__)
49
49
  else:
50
50
  rbln_class_name = cls.get_rbln_model_cls_name(pretrained_model_name_or_path, **kwargs)
51
- if convert_rbln_to_hf_model_name(rbln_class_name) not in cls._model_mapping_names:
51
+ if convert_rbln_to_hf_model_name(rbln_class_name) not in cls._model_mapping_names.values():
52
52
  raise ValueError(
53
53
  f"The architecture '{rbln_class_name}' is not supported by the `{cls.__name__}.from_pretrained()` method. "
54
54
  "Please use the `from_pretrained()` method of the appropriate class to load this model, "
@@ -69,6 +69,8 @@ _import_structure = {
69
69
  "RBLNDistilBertForQuestionAnsweringConfig",
70
70
  "RBLNDPTForDepthEstimation",
71
71
  "RBLNDPTForDepthEstimationConfig",
72
+ "RBLNDepthAnythingForDepthEstimation",
73
+ "RBLNDepthAnythingForDepthEstimationConfig",
72
74
  "RBLNExaoneForCausalLM",
73
75
  "RBLNExaoneForCausalLMConfig",
74
76
  "RBLNGemmaModel",
@@ -137,6 +139,8 @@ _import_structure = {
137
139
  "RBLNRobertaForSequenceClassificationConfig",
138
140
  "RBLNSiglipVisionModel",
139
141
  "RBLNSiglipVisionModelConfig",
142
+ "RBLNSwinBackbone",
143
+ "RBLNSwinBackboneConfig",
140
144
  "RBLNT5EncoderModel",
141
145
  "RBLNT5EncoderModelConfig",
142
146
  "RBLNT5ForConditionalGeneration",
@@ -204,6 +208,8 @@ if TYPE_CHECKING:
204
208
  RBLNDecoderOnlyModelConfig,
205
209
  RBLNDecoderOnlyModelForCausalLM,
206
210
  RBLNDecoderOnlyModelForCausalLMConfig,
211
+ RBLNDepthAnythingForDepthEstimation,
212
+ RBLNDepthAnythingForDepthEstimationConfig,
207
213
  RBLNDistilBertForQuestionAnswering,
208
214
  RBLNDistilBertForQuestionAnsweringConfig,
209
215
  RBLNDPTForDepthEstimation,
@@ -274,6 +280,8 @@ if TYPE_CHECKING:
274
280
  RBLNRobertaForSequenceClassificationConfig,
275
281
  RBLNSiglipVisionModel,
276
282
  RBLNSiglipVisionModelConfig,
283
+ RBLNSwinBackbone,
284
+ RBLNSwinBackboneConfig,
277
285
  RBLNT5EncoderModel,
278
286
  RBLNT5EncoderModelConfig,
279
287
  RBLNT5ForConditionalGeneration,
@@ -90,6 +90,7 @@ _import_structure = {
90
90
  "RBLNDecoderOnlyModelForCausalLM",
91
91
  "RBLNDecoderOnlyModelForCausalLMConfig",
92
92
  ],
93
+ "depth_anything": ["RBLNDepthAnythingForDepthEstimationConfig", "RBLNDepthAnythingForDepthEstimation"],
93
94
  "dpt": [
94
95
  "RBLNDPTForDepthEstimation",
95
96
  "RBLNDPTForDepthEstimationConfig",
@@ -141,6 +142,10 @@ _import_structure = {
141
142
  "RBLNSiglipVisionModel",
142
143
  "RBLNSiglipVisionModelConfig",
143
144
  ],
145
+ "swin": [
146
+ "RBLNSwinBackbone",
147
+ "RBLNSwinBackboneConfig",
148
+ ],
144
149
  "time_series_transformer": [
145
150
  "RBLNTimeSeriesTransformerForPrediction",
146
151
  "RBLNTimeSeriesTransformerForPredictionConfig",
@@ -219,6 +224,7 @@ if TYPE_CHECKING:
219
224
  RBLNDecoderOnlyModelForCausalLM,
220
225
  RBLNDecoderOnlyModelForCausalLMConfig,
221
226
  )
227
+ from .depth_anything import RBLNDepthAnythingForDepthEstimation, RBLNDepthAnythingForDepthEstimationConfig
222
228
  from .distilbert import RBLNDistilBertForQuestionAnswering, RBLNDistilBertForQuestionAnsweringConfig
223
229
  from .dpt import RBLNDPTForDepthEstimation, RBLNDPTForDepthEstimationConfig
224
230
  from .exaone import RBLNExaoneForCausalLM, RBLNExaoneForCausalLMConfig
@@ -266,6 +272,7 @@ if TYPE_CHECKING:
266
272
  RBLNRobertaForSequenceClassificationConfig,
267
273
  )
268
274
  from .siglip import RBLNSiglipVisionModel, RBLNSiglipVisionModelConfig
275
+ from .swin import RBLNSwinBackbone, RBLNSwinBackboneConfig
269
276
  from .t5 import (
270
277
  RBLNT5EncoderModel,
271
278
  RBLNT5EncoderModelConfig,
@@ -0,0 +1,16 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .configuration_depth_anything import RBLNDepthAnythingForDepthEstimationConfig
16
+ from .modeling_depth_anything import RBLNDepthAnythingForDepthEstimation
@@ -0,0 +1,24 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ...configuration_generic import RBLNModelForDepthEstimationConfig
16
+
17
+
18
+ class RBLNDepthAnythingForDepthEstimationConfig(RBLNModelForDepthEstimationConfig):
19
+ """
20
+ Configuration class for DepthAnythingForDepthEstimation.
21
+
22
+ This configuration class stores the configuration parameters specific to
23
+ RBLN-optimized Depth Anything V2 Small models for depth estimation tasks.
24
+ """
@@ -0,0 +1,25 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from ...modeling_generic import RBLNModelForDepthEstimation
17
+
18
+
19
+ class RBLNDepthAnythingForDepthEstimation(RBLNModelForDepthEstimation):
20
+ """
21
+ RBLN optimized DepthAnythingForDepthEstimation model for depth estimation tasks.
22
+
23
+ This class provides hardware-accelerated inference for Depth Anything V2 Small
24
+ models on RBLN devices, providing the most capable monocular depth estimation (MDE) model.
25
+ """
@@ -0,0 +1,16 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .configuration_swin import RBLNSwinBackboneConfig
16
+ from .modeling_swin import RBLNSwinBackbone
@@ -0,0 +1,42 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at:
4
+
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ from typing import Any, Optional, Tuple, Union
14
+
15
+ from ...configuration_generic import RBLNModelForImageClassificationConfig
16
+
17
+
18
+ class RBLNSwinBackboneConfig(RBLNModelForImageClassificationConfig):
19
+ def __init__(
20
+ self,
21
+ image_size: Optional[Union[int, Tuple[int, int]]] = None,
22
+ batch_size: Optional[int] = None,
23
+ output_hidden_states: Optional[bool] = None,
24
+ output_attentions: Optional[bool] = None,
25
+ **kwargs: Any,
26
+ ):
27
+ """
28
+ Args:
29
+ batch_size (Optional[int]): The batch size for text processing. Defaults to 1.
30
+ **kwargs: Additional arguments passed to the parent RBLNModelConfig.
31
+
32
+ Raises:
33
+ ValueError: If batch_size is not a positive integer.
34
+ """
35
+ super().__init__(**kwargs)
36
+ self.batch_size = batch_size or 1
37
+ if not isinstance(self.batch_size, int) or self.batch_size < 0:
38
+ raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
39
+
40
+ self.image_size = image_size
41
+ self.output_hidden_states = output_hidden_states
42
+ self.output_attentions = output_attentions
@@ -0,0 +1,316 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import types
16
+ from typing import TYPE_CHECKING, Optional, Tuple, Union
17
+
18
+ import torch
19
+ import torch.nn.functional as F
20
+ from transformers import SwinConfig
21
+ from transformers.models.swin.modeling_swin import BackboneOutput
22
+
23
+ from ....configuration_utils import RBLNCompileConfig
24
+ from ....modeling import RBLNModel
25
+ from ....utils.logging import get_logger
26
+ from .configuration_swin import RBLNSwinBackboneConfig
27
+
28
+
29
+ logger = get_logger(__name__)
30
+
31
+ if TYPE_CHECKING:
32
+ from transformers import (
33
+ AutoFeatureExtractor,
34
+ AutoProcessor,
35
+ AutoTokenizer,
36
+ PreTrainedModel,
37
+ SwinBackbone,
38
+ SwinEncoder,
39
+ )
40
+
41
+
42
+ def window_partition(input_feature, window_size):
43
+ """
44
+ Partitions the given input into windows.
45
+ """
46
+ batch_size, height, width, num_channels = input_feature.shape
47
+ input_feature = input_feature.view(
48
+ batch_size, height // window_size, window_size, width // window_size, window_size, num_channels
49
+ )
50
+ windows = input_feature.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, num_channels)
51
+ return windows
52
+
53
+
54
+ def get_attn_mask(self, height, width, dtype, device):
55
+ if self.shift_size > 0:
56
+ # calculate attention mask for SW-MSA
57
+ img_mask = torch.zeros((1, height, width, 1), dtype=dtype, device=device)
58
+ height_slices = (
59
+ slice(0, -self.window_size),
60
+ slice(-self.window_size, -self.shift_size),
61
+ slice(-self.shift_size, None),
62
+ )
63
+ width_slices = (
64
+ slice(0, -self.window_size),
65
+ slice(-self.window_size, -self.shift_size),
66
+ slice(-self.shift_size, None),
67
+ )
68
+ count = torch.zeros(1)
69
+ for height_slice in height_slices:
70
+ for width_slice in width_slices:
71
+ img_mask[:, height_slice, width_slice, :] = count
72
+ count += 1
73
+
74
+ mask_windows = window_partition(img_mask, self.window_size)
75
+ mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
76
+ attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
77
+ attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
78
+ else:
79
+ attn_mask = None
80
+ return attn_mask
81
+
82
+
83
+ class _SwinEncoder(torch.nn.Module):
84
+ def __init__(self, model: "SwinEncoder"):
85
+ super().__init__()
86
+ self.layers = model.layers
87
+
88
+ def forward(
89
+ self,
90
+ hidden_states: torch.Tensor,
91
+ input_dimensions: Tuple[int, int],
92
+ head_mask: Optional[torch.FloatTensor] = None,
93
+ output_attentions: Optional[bool] = False,
94
+ output_hidden_states: Optional[bool] = False,
95
+ output_hidden_states_before_downsampling: Optional[bool] = False,
96
+ always_partition: Optional[bool] = False,
97
+ return_dict: Optional[bool] = True,
98
+ ):
99
+ all_hidden_states = () if output_hidden_states else None
100
+ all_reshaped_hidden_states = () if output_hidden_states else None
101
+ all_self_attentions = () if output_attentions else None
102
+
103
+ if output_hidden_states:
104
+ batch_size, _, hidden_size = hidden_states.shape
105
+ # rearrange b (h w) c -> b c h w
106
+ reshaped_hidden_state = hidden_states.view(batch_size, *input_dimensions, hidden_size)
107
+ reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
108
+ all_hidden_states += (hidden_states,)
109
+ all_reshaped_hidden_states += (reshaped_hidden_state,)
110
+
111
+ for i, layer_module in enumerate(self.layers):
112
+ layer_head_mask = head_mask[i] if head_mask is not None else None
113
+
114
+ layer_outputs = layer_module(
115
+ hidden_states, input_dimensions, layer_head_mask, output_attentions, always_partition
116
+ )
117
+
118
+ hidden_states = layer_outputs[0]
119
+ hidden_states_before_downsampling = layer_outputs[1]
120
+ output_dimensions = layer_outputs[2]
121
+
122
+ input_dimensions = (output_dimensions[-2], output_dimensions[-1])
123
+
124
+ if output_hidden_states and output_hidden_states_before_downsampling:
125
+ batch_size, _, hidden_size = hidden_states_before_downsampling.shape
126
+ # rearrange b (h w) c -> b c h w
127
+ # here we use the original (not downsampled) height and width
128
+ reshaped_hidden_state = hidden_states_before_downsampling.view(
129
+ batch_size, *(output_dimensions[0], output_dimensions[1]), hidden_size
130
+ )
131
+ reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
132
+ all_hidden_states += (hidden_states_before_downsampling,)
133
+ all_reshaped_hidden_states += (reshaped_hidden_state,)
134
+ elif output_hidden_states and not output_hidden_states_before_downsampling:
135
+ batch_size, _, hidden_size = hidden_states.shape
136
+ # rearrange b (h w) c -> b c h w
137
+ reshaped_hidden_state = hidden_states.view(batch_size, *input_dimensions, hidden_size)
138
+ reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
139
+ all_hidden_states += (hidden_states,)
140
+ all_reshaped_hidden_states += (reshaped_hidden_state,)
141
+
142
+ if output_attentions:
143
+ all_self_attentions += layer_outputs[3:]
144
+
145
+ return tuple(
146
+ v
147
+ for v in [hidden_states, all_hidden_states, all_self_attentions, all_reshaped_hidden_states]
148
+ if v is not None
149
+ )
150
+
151
+
152
+ class _SwinBackbone(torch.nn.Module):
153
+ def __init__(self, model: "SwinBackbone", output_hidden_states: bool, output_attentions: bool):
154
+ super().__init__()
155
+ self.model = model
156
+ self.embeddings = model.embeddings
157
+ self.encoder = model.encoder
158
+ self.stage_names = model.stage_names
159
+ self.out_features = model.out_features
160
+ self.hidden_states_norms = model.hidden_states_norms
161
+ self.output_hidden_states = output_hidden_states
162
+ self.output_attentions = output_attentions
163
+
164
+ def forward(
165
+ self,
166
+ pixel_values: torch.Tensor,
167
+ ):
168
+ embedding_output, input_dimensions = self.embeddings(pixel_values)
169
+ outputs = _SwinEncoder(self.encoder)(
170
+ embedding_output,
171
+ input_dimensions,
172
+ head_mask=None,
173
+ output_attentions=self.output_attentions,
174
+ output_hidden_states=True,
175
+ output_hidden_states_before_downsampling=True,
176
+ always_partition=True,
177
+ return_dict=False,
178
+ )
179
+
180
+ hidden_states = outputs[-1]
181
+
182
+ feature_maps = ()
183
+ for stage, hidden_state in zip(self.stage_names, hidden_states):
184
+ if stage in self.out_features:
185
+ batch_size, num_channels, height, width = hidden_state.shape
186
+ hidden_state = hidden_state.permute(0, 2, 3, 1).contiguous()
187
+ hidden_state = hidden_state.view(batch_size, height * width, num_channels)
188
+ hidden_state = self.hidden_states_norms[stage](hidden_state)
189
+ hidden_state = hidden_state.view(batch_size, height, width, num_channels)
190
+ hidden_state = hidden_state.permute(0, 3, 1, 2).contiguous()
191
+ feature_maps += (hidden_state,)
192
+
193
+ output = (feature_maps,)
194
+
195
+ if self.output_hidden_states:
196
+ output += (outputs[1],)
197
+
198
+ if self.output_attentions:
199
+ output += (outputs[2],)
200
+
201
+ return output
202
+
203
+
204
+ class RBLNSwinBackbone(RBLNModel):
205
+ @classmethod
206
+ def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNSwinBackboneConfig) -> torch.nn.Module:
207
+ for layer in model.encoder.layers:
208
+ for block in layer.blocks:
209
+ block.get_attn_mask = types.MethodType(get_attn_mask, block)
210
+
211
+ wrapper_cfg = {
212
+ "output_hidden_states": rbln_config.output_hidden_states,
213
+ "output_attentions": rbln_config.output_attentions,
214
+ }
215
+ return _SwinBackbone(model, **wrapper_cfg).eval()
216
+
217
+ @classmethod
218
+ def _update_rbln_config(
219
+ cls,
220
+ preprocessors: Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"],
221
+ model: Optional["PreTrainedModel"] = None,
222
+ model_config: "SwinConfig" = None,
223
+ rbln_config: Optional[RBLNSwinBackboneConfig] = None,
224
+ ) -> RBLNSwinBackboneConfig:
225
+ if rbln_config.image_size is None:
226
+ for processor in preprocessors:
227
+ if hasattr(processor, "size"):
228
+ if all(required_key in processor.size.keys() for required_key in ["height", "width"]):
229
+ rbln_config.image_size = (processor.size["height"], processor.size["width"])
230
+ break
231
+
232
+ input_info = [
233
+ (
234
+ "pixel_values",
235
+ [
236
+ rbln_config.batch_size,
237
+ 3,
238
+ rbln_config.image_size[0],
239
+ rbln_config.image_size[1],
240
+ ],
241
+ "float32",
242
+ ),
243
+ ]
244
+
245
+ rbln_config.set_compile_cfgs([RBLNCompileConfig(input_info=input_info)])
246
+ return rbln_config
247
+
248
+ def forward(
249
+ self,
250
+ pixel_values: Optional[torch.FloatTensor] = None,
251
+ return_dict: bool = True,
252
+ output_attentions: bool = None,
253
+ output_hidden_states: bool = None,
254
+ **kwargs,
255
+ ) -> Union[Tuple, BackboneOutput]:
256
+ if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
257
+ logger.warning(
258
+ f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__.__name__}."
259
+ )
260
+
261
+ output_attentions = output_attentions if output_attentions is not None else self.rbln_config.output_attentions
262
+ output_hidden_states = (
263
+ output_hidden_states if output_hidden_states is not None else self.rbln_config.output_hidden_states
264
+ )
265
+
266
+ if output_attentions != self.rbln_config.output_attentions:
267
+ raise ValueError(
268
+ f"Variable output_attentions {output_attentions} is not equal to rbln_config.output_attentions {self.rbln_config.output_attentions} "
269
+ f"Please compile again with the correct argument."
270
+ )
271
+
272
+ if output_hidden_states != self.rbln_config.output_hidden_states:
273
+ raise ValueError(
274
+ f"Variable output_hidden_states {output_hidden_states} is not equal to rbln_config.output_hidden_states {self.rbln_config.output_hidden_states} "
275
+ f"Please compile again with the correct argument."
276
+ )
277
+
278
+ _, _, original_h, original_w = pixel_values.shape
279
+ if original_h > self.rbln_config.image_size[0] or original_w > self.rbln_config.image_size[1]:
280
+ raise ValueError(
281
+ f"Input image size ({original_h}x{original_w}) exceeds the configured maximum size"
282
+ f" ({self.rbln_config.image_size[0]}x{self.rbln_config.image_size[1]})."
283
+ )
284
+
285
+ pad_h = self.rbln_config.image_size[0] - original_h
286
+ pad_w = self.rbln_config.image_size[1] - original_w
287
+ padded_pixel_values = F.pad(pixel_values, (0, pad_w, 0, pad_h))
288
+
289
+ output = self.model[0](padded_pixel_values)
290
+
291
+ feature_maps = ()
292
+ for i in range(len(self.config.out_features)):
293
+ feature_maps += (output.pop(0),)
294
+
295
+ if self.rbln_config.output_hidden_states:
296
+ hidden_states = ()
297
+ for i in range(len(self.config.stage_names)):
298
+ hidden_states += (output.pop(0),)
299
+ else:
300
+ hidden_states = None
301
+
302
+ if self.rbln_config.output_attentions:
303
+ attentions = ()
304
+ for i in range(len(self.config.depths)):
305
+ attentions += (output.pop(0),)
306
+ else:
307
+ attentions = None
308
+
309
+ if not return_dict:
310
+ return tuple(item for item in (feature_maps, hidden_states, attentions) if item is not None)
311
+ else:
312
+ return BackboneOutput(
313
+ feature_maps=feature_maps,
314
+ hidden_states=hidden_states,
315
+ attentions=attentions,
316
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.8.3a2
3
+ Version: 0.8.3a4
4
4
  Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -1,6 +1,6 @@
1
- optimum/rbln/__init__.py,sha256=YhaBhcyu6BgoJrprUogLGAmiBaHayvg6Tjm6PpfJETw,17382
2
- optimum/rbln/__version__.py,sha256=LoGi14U0L2os-fSHKgBIGeByegJLodfXKteGMBVsCEc,712
3
- optimum/rbln/configuration_utils.py,sha256=xneqnRWSUVROqpzbTrBACex42-L9zwo3eSjfHjFuhv4,33072
1
+ optimum/rbln/__init__.py,sha256=1AF2jfrUGYLo4ps_OAPxCUagSnHF60N3W45cTeXlDJE,17698
2
+ optimum/rbln/__version__.py,sha256=vl5bkt2g1rkDYLzC7btKWOmkJwvyed8HxQBkBGIvFzA,712
3
+ optimum/rbln/configuration_utils.py,sha256=fE3HlZblxukKSdS-4VofjuyCAiqwPMX8bqXpOiTZp4g,33926
4
4
  optimum/rbln/modeling.py,sha256=0CMQnpVvW9evNrTFHM2XFbNpRY1HkbFzYJ5sRyYFq0o,14293
5
5
  optimum/rbln/modeling_base.py,sha256=gHfqIO6lKT8smkUthUuRHnbITpxHpnDeBPT8iTeasCk,24575
6
6
  optimum/rbln/diffusers/__init__.py,sha256=1tgU_xWA42BmInqu9bBz_5R_E9TGhhK3mI06YlaiTLg,7232
@@ -36,7 +36,7 @@ optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=yF7sS0Qvawo
36
36
  optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
37
37
  optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=v3WS9EGKROE_QClXrxC7rmRko1BspAvAbeIfh83LK88,15832
38
38
  optimum/rbln/diffusers/pipelines/__init__.py,sha256=r8mu21102cKXdkG1II9tpfpUS6wuyren2oK9y_MptZY,3703
39
- optimum/rbln/diffusers/pipelines/auto_pipeline.py,sha256=oGZWXfj82w695D2NiYUitgoWiwP2Z4PlgA3q6hoOKww,9502
39
+ optimum/rbln/diffusers/pipelines/auto_pipeline.py,sha256=zFDXbO9Iv0LO7maefV82dmi5Ta6L9oZxY09QFVX6F_Q,9511
40
40
  optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
41
41
  optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=3S9dogIHW8Bqg5kIlCudhCQG-4g3FcdOPEWhBOf7CJA,4059
42
42
  optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py,sha256=G96bh4D9Cu-w4F9gZBQF6wNzhJQv9kvI34ZFsuEDjSw,35714
@@ -72,13 +72,13 @@ optimum/rbln/ops/flash_attn.py,sha256=yTCdYQVqm_1rHMHWjrMQaIR8WTuG_xA6t033x1IVvT
72
72
  optimum/rbln/ops/kv_cache_update.py,sha256=aIvK2Sp7M3EfJzJgNvIvAJv4emoN6QOhmgaWj-VboLs,1440
73
73
  optimum/rbln/ops/linear.py,sha256=5K3pcrrUHu_p8LrMIU-jX2TnafksveFjjZSCsYSp_yw,1328
74
74
  optimum/rbln/ops/sliding_window_attn.py,sha256=EQrV_yRGc5z6kvwEsAcLP028bJWkQg2UPI3xubt9skU,3487
75
- optimum/rbln/transformers/__init__.py,sha256=eBPY5FjB_6jul-s3ixero-7irrWMu1D2smuXr_7zDcI,11127
75
+ optimum/rbln/transformers/__init__.py,sha256=uV2rEhw93alpbZ-fnVrAex_6QF1sFHcVM5tsJh64osk,11443
76
76
  optimum/rbln/transformers/configuration_generic.py,sha256=95ks6REJYuzI1zLwGlPSlxVV45saVcYOob6ihn-WAAY,5092
77
77
  optimum/rbln/transformers/modeling_attention_utils.py,sha256=aLyOaq4me1m-JMmnKbuyNQageDxNU2jjEhGE_ew2P5o,11465
78
78
  optimum/rbln/transformers/modeling_generic.py,sha256=2BtroigKuu7z7C98dpLwI875R0EoHN-ceHEVbyPQuYk,12212
79
79
  optimum/rbln/transformers/modeling_outputs.py,sha256=cd8ZlhHAGq7S6i5-QK6TJCxgORvoPMnZpqPBlUc_pMY,1177
80
80
  optimum/rbln/transformers/modeling_rope_utils.py,sha256=6Zg3r-TeUk4WQAlr95pqfhuoAD_RQ4njT1rbO9uPL0Q,14379
81
- optimum/rbln/transformers/models/__init__.py,sha256=PfaXIPx5fGroSkcS1xUx7eQ62aZovO3e-f71Tp69cWI,11907
81
+ optimum/rbln/transformers/models/__init__.py,sha256=A9ThjEgBo6RZzqGzoY3tgQucdchkuXWrpgJjuERxjcE,12272
82
82
  optimum/rbln/transformers/models/audio_spectrogram_transformer/__init__.py,sha256=I2vL4lrzbT5p4eJcH-EKHzEfcPkj_XVsie7jb9q6yic,775
83
83
  optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py,sha256=z7LJiVJPmnlCM3mcyhPJP8AufSrxO_dsPeJ51onq-Nc,833
84
84
  optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py,sha256=FIKEVWpIt6-JQX9B_rAfCrAPqdUHtR2i8D_X2k7639E,1498
@@ -108,6 +108,9 @@ optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=
108
108
  optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py,sha256=9acEQxGRzd21YkzxRchkhqxqpX7emQHZigFg60BIulc,19902
109
109
  optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py,sha256=4D89IF0yQju_Dp_vLJN_dBkpe2U_LMWaUciYx57D-0M,3379
110
110
  optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=hu2eJr0CpLHnRPSLhyBhyyC6DfosKmPu7lPjapcBCkE,33061
111
+ optimum/rbln/transformers/models/depth_anything/__init__.py,sha256=xvPSIriMJWyNeVYoVB1Z7YqB4kkHOIkaHq7loNps-dk,756
112
+ optimum/rbln/transformers/models/depth_anything/configuration_depth_anything.py,sha256=JujBVEUa_zZDXNPr1y-B_PhK5SgFFcY8Ib4EoGjjtmE,989
113
+ optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py,sha256=ganUtyPKcdKe5QCJ2atQhblzfAstyYkThYDm_DIecU8,1014
111
114
  optimum/rbln/transformers/models/distilbert/__init__.py,sha256=zXL78SOEORTnUN_wrdoaDaYpntG8lcFHvPobM6jC0CI,841
112
115
  optimum/rbln/transformers/models/distilbert/configuration_distilbert.py,sha256=O3BW9JjyYk9PLyiofvOKEgTdMZ_jpIuPfot281pSsyg,984
113
116
  optimum/rbln/transformers/models/distilbert/modeling_distilbert.py,sha256=LUh6zYGa8AR3Yxaj3gtyJRc-czBN3qnHTc-JTAhuqY0,1099
@@ -193,6 +196,9 @@ optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py,sha256=4MupGjhe
193
196
  optimum/rbln/transformers/models/siglip/__init__.py,sha256=X1Fc1GUnJ2EIxFx45nbeoW-T2t0OyP3W73C0HD8Vowo,712
194
197
  optimum/rbln/transformers/models/siglip/configuration_siglip.py,sha256=m1h8iDx_X9VmHdJi0sc1a2KsAO3OnpMb4cd9jW2Ic-U,3031
195
198
  optimum/rbln/transformers/models/siglip/modeling_siglip.py,sha256=1TyRaxmhp6mg6UfhQTbZhW26013TE3nVnroYG7EROcU,8033
199
+ optimum/rbln/transformers/models/swin/__init__.py,sha256=gUsLDB8ceNxt53Cf69OT32JuZoRdmmIsRfjRdHTLDd0,698
200
+ optimum/rbln/transformers/models/swin/configuration_swin.py,sha256=iVtpT2jXY5LNkUbbr5J08z97unc43KEhArIZ1tBRzEU,1692
201
+ optimum/rbln/transformers/models/swin/modeling_swin.py,sha256=IvBOyIWq233UGZqUG7XhaoYIHk6gjbisYUEr8HDqRbY,12791
196
202
  optimum/rbln/transformers/models/t5/__init__.py,sha256=R1Q8Z1vaIdx4rDjeCmm_ZMSgewWaqaI0l93AHwewtew,818
197
203
  optimum/rbln/transformers/models/t5/configuration_t5.py,sha256=nqDbibqykeeWn1TlKk6LmCn-DawTVudMMuBn2c2jds8,1362
198
204
  optimum/rbln/transformers/models/t5/modeling_t5.py,sha256=pdAWBLVknTzbma0Ij-VQ2Qve-frPjxL-AwMyU-zouPY,5123
@@ -227,7 +233,7 @@ optimum/rbln/utils/model_utils.py,sha256=4k5879Kh75m3x_vS4-qOGfqsOiAvc2kdNFFfvsF
227
233
  optimum/rbln/utils/runtime_utils.py,sha256=R6uXDbeJP03-FWdd4vthNe2D4aCra5n12E3WB1ifiGM,7933
228
234
  optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
229
235
  optimum/rbln/utils/submodule.py,sha256=w5mgPgncI740gVKMu3S-69DGNdUSI0bTZxegQGcZ98Y,5011
230
- optimum_rbln-0.8.3a2.dist-info/METADATA,sha256=KAOx0J5beZebrxsAf9AsklRO43eTWaw222WX1iInnpk,5299
231
- optimum_rbln-0.8.3a2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
232
- optimum_rbln-0.8.3a2.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
233
- optimum_rbln-0.8.3a2.dist-info/RECORD,,
236
+ optimum_rbln-0.8.3a4.dist-info/METADATA,sha256=4ikxK2ldRi8PkKZrcSmTH4unPPzl3UZ3JBXlC-fr_MU,5299
237
+ optimum_rbln-0.8.3a4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
238
+ optimum_rbln-0.8.3a4.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
239
+ optimum_rbln-0.8.3a4.dist-info/RECORD,,