optimum-rbln 0.2.1a2__py3-none-any.whl → 0.2.1a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. optimum/rbln/__version__.py +1 -1
  2. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +2 -2
  3. optimum/rbln/diffusers/models/autoencoders/vae.py +2 -2
  4. optimum/rbln/diffusers/models/controlnet.py +2 -2
  5. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +2 -2
  6. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +2 -2
  7. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +2 -2
  8. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +3 -2
  9. optimum/rbln/modeling.py +2 -2
  10. optimum/rbln/modeling_base.py +35 -15
  11. optimum/rbln/transformers/models/bert/modeling_bert.py +2 -2
  12. optimum/rbln/transformers/models/clip/modeling_clip.py +2 -2
  13. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +200 -154
  14. optimum/rbln/transformers/models/dpt/modeling_dpt.py +2 -2
  15. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +6 -9
  16. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +61 -39
  17. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +2 -2
  18. optimum/rbln/transformers/models/whisper/generation_whisper.py +19 -17
  19. optimum/rbln/transformers/models/whisper/modeling_whisper.py +2 -2
  20. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +2 -2
  21. optimum/rbln/utils/save_utils.py +3 -2
  22. {optimum_rbln-0.2.1a2.dist-info → optimum_rbln-0.2.1a4.dist-info}/METADATA +1 -1
  23. {optimum_rbln-0.2.1a2.dist-info → optimum_rbln-0.2.1a4.dist-info}/RECORD +25 -25
  24. {optimum_rbln-0.2.1a2.dist-info → optimum_rbln-0.2.1a4.dist-info}/WHEEL +0 -0
  25. {optimum_rbln-0.2.1a2.dist-info → optimum_rbln-0.2.1a4.dist-info}/licenses/LICENSE +0 -0
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.2.1a2'
15
+ __version__ = version = '0.2.1a4'
16
16
  __version_tuple__ = version_tuple = (0, 2, 1)
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
17
16
 
18
17
  import rebel
@@ -23,6 +22,7 @@ from transformers import PretrainedConfig
23
22
 
24
23
  from ....modeling import RBLNModel
25
24
  from ....modeling_config import DEFAULT_COMPILED_MODEL_NAME, RBLNCompileConfig, RBLNConfig
25
+ from ....utils.logging import get_logger
26
26
  from ...modeling_diffusers import RBLNDiffusionMixin
27
27
  from .vae import RBLNRuntimeVAEDecoder, RBLNRuntimeVAEEncoder, _VAEDecoder, _VAEEncoder
28
28
 
@@ -31,7 +31,7 @@ if TYPE_CHECKING:
31
31
  import torch
32
32
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PretrainedConfig
33
33
 
34
- logger = logging.getLogger(__name__)
34
+ logger = get_logger(__name__)
35
35
 
36
36
 
37
37
  class RBLNAutoencoderKL(RBLNModel):
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from typing import TYPE_CHECKING
17
16
 
18
17
  import torch # noqa: I001
@@ -20,13 +19,14 @@ from diffusers import AutoencoderKL
20
19
  from diffusers.models.autoencoders.vae import DiagonalGaussianDistribution
21
20
  from diffusers.models.modeling_outputs import AutoencoderKLOutput
22
21
 
22
+ from ....utils.logging import get_logger
23
23
  from ....utils.runtime_utils import RBLNPytorchRuntime
24
24
 
25
25
 
26
26
  if TYPE_CHECKING:
27
27
  import torch
28
28
 
29
- logger = logging.getLogger(__name__)
29
+ logger = get_logger(__name__)
30
30
 
31
31
 
32
32
  class RBLNRuntimeVAEEncoder(RBLNPytorchRuntime):
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import importlib
16
- import logging
17
16
  from typing import TYPE_CHECKING, Any, Dict, Optional, Union
18
17
 
19
18
  import torch
@@ -22,6 +21,7 @@ from transformers import PretrainedConfig
22
21
 
23
22
  from ...modeling import RBLNModel
24
23
  from ...modeling_config import RBLNCompileConfig, RBLNConfig
24
+ from ...utils.logging import get_logger
25
25
  from ..modeling_diffusers import RBLNDiffusionMixin
26
26
 
27
27
 
@@ -29,7 +29,7 @@ if TYPE_CHECKING:
29
29
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
30
30
 
31
31
 
32
- logger = logging.getLogger(__name__)
32
+ logger = get_logger(__name__)
33
33
 
34
34
 
35
35
  class _ControlNetModel(torch.nn.Module):
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
17
16
 
18
17
  import torch
@@ -22,13 +21,14 @@ from transformers import PretrainedConfig
22
21
 
23
22
  from ....modeling import RBLNModel
24
23
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
24
+ from ....utils.logging import get_logger
25
25
  from ...modeling_diffusers import RBLNDiffusionMixin
26
26
 
27
27
 
28
28
  if TYPE_CHECKING:
29
29
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
30
30
 
31
- logger = logging.getLogger(__name__)
31
+ logger = get_logger(__name__)
32
32
 
33
33
 
34
34
  class SD3Transformer2DModelWrapper(torch.nn.Module):
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from dataclasses import dataclass
17
16
  from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
18
17
 
@@ -22,13 +21,14 @@ from transformers import PretrainedConfig
22
21
 
23
22
  from ....modeling import RBLNModel
24
23
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
24
+ from ....utils.logging import get_logger
25
25
  from ...modeling_diffusers import RBLNDiffusionMixin
26
26
 
27
27
 
28
28
  if TYPE_CHECKING:
29
29
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
30
30
 
31
- logger = logging.getLogger(__name__)
31
+ logger = get_logger(__name__)
32
32
 
33
33
 
34
34
  class _UNet_SD(torch.nn.Module):
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  import os
17
16
  from pathlib import Path
18
17
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
@@ -21,13 +20,14 @@ import torch
21
20
  from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
22
21
 
23
22
  from ....modeling import RBLNModel
23
+ from ....utils.logging import get_logger
24
24
  from ...models.controlnet import RBLNControlNetModel
25
25
 
26
26
 
27
27
  if TYPE_CHECKING:
28
28
  pass
29
29
 
30
- logger = logging.getLogger(__name__)
30
+ logger = get_logger(__name__)
31
31
 
32
32
 
33
33
  class RBLNMultiControlNetModel(RBLNModel):
@@ -34,16 +34,17 @@ from diffusers import StableDiffusionControlNetPipeline
34
34
  from diffusers.image_processor import PipelineImageInput
35
35
  from diffusers.pipelines.controlnet.pipeline_controlnet import retrieve_timesteps
36
36
  from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
37
- from diffusers.utils import deprecate, logging
37
+ from diffusers.utils import deprecate
38
38
  from diffusers.utils.torch_utils import is_compiled_module, is_torch_version
39
39
 
40
40
  from ....utils.decorator_utils import remove_compile_time_kwargs
41
+ from ....utils.logging import get_logger
41
42
  from ...modeling_diffusers import RBLNDiffusionMixin
42
43
  from ...models import RBLNControlNetModel
43
44
  from ...pipelines.controlnet.multicontrolnet import RBLNMultiControlNetModel
44
45
 
45
46
 
46
- logger = logging.get_logger(__name__)
47
+ logger = get_logger(__name__)
47
48
 
48
49
 
49
50
  class RBLNStableDiffusionControlNetPipeline(RBLNDiffusionMixin, StableDiffusionControlNetPipeline):
optimum/rbln/modeling.py CHANGED
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from pathlib import Path
17
16
  from tempfile import TemporaryDirectory
18
17
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
@@ -24,13 +23,14 @@ from transformers import AutoConfig, PretrainedConfig
24
23
 
25
24
  from .modeling_base import RBLNBaseModel
26
25
  from .modeling_config import DEFAULT_COMPILED_MODEL_NAME, RBLNConfig, use_rbln_config
26
+ from .utils.logging import get_logger
27
27
 
28
28
 
29
29
  if TYPE_CHECKING:
30
30
  from transformers import PreTrainedModel
31
31
 
32
32
 
33
- logger = logging.getLogger(__name__)
33
+ logger = get_logger(__name__)
34
34
 
35
35
 
36
36
  class RBLNModel(RBLNBaseModel):
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import importlib
16
- import logging
17
16
  import os
18
17
  import shutil
19
18
  from abc import ABC, abstractmethod
@@ -32,6 +31,7 @@ from transformers import (
32
31
 
33
32
  from .modeling_config import RBLNCompileConfig, RBLNConfig, use_rbln_config
34
33
  from .utils.hub import PushToHubMixin, pull_compiled_model_from_hub, validate_files
34
+ from .utils.logging import get_logger
35
35
  from .utils.runtime_utils import UnavailableRuntime
36
36
  from .utils.save_utils import maybe_load_preprocessors
37
37
  from .utils.submodule import SubModulesMixin
@@ -40,7 +40,7 @@ from .utils.submodule import SubModulesMixin
40
40
  if TYPE_CHECKING:
41
41
  from transformers import PreTrainedModel
42
42
 
43
- logger = logging.getLogger(__name__)
43
+ logger = get_logger(__name__)
44
44
 
45
45
 
46
46
  class PreTrainedModel(ABC): # noqa: F811
@@ -442,27 +442,47 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
442
442
  logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
443
443
  return
444
444
 
445
- os.makedirs(save_directory, exist_ok=True)
446
-
447
445
  real_save_dir = self.model_save_dir / self.subfolder
448
446
  save_directory_path = Path(save_directory)
449
- if os.path.exists(real_save_dir) and os.path.isdir(real_save_dir):
450
- if save_directory_path.absolute() == real_save_dir.absolute():
451
- raise FileExistsError(
452
- f"Cannot save model to '{save_directory}'. "
453
- f"This directory already exists and contains the model files."
454
- )
455
- shutil.copytree(real_save_dir, save_directory, dirs_exist_ok=True)
456
- self.config.save_pretrained(save_directory)
457
- if self.generation_config is not None:
458
- self.generation_config.save_pretrained(save_directory)
459
- else:
447
+
448
+ if not os.path.exists(real_save_dir) or not os.path.isdir(real_save_dir):
460
449
  raise FileNotFoundError(
461
450
  f"Unable to save the model. The model directory '{real_save_dir}' does not exist or is not accessible. "
462
451
  f"Cannot save to the specified destination '{save_directory}'. "
463
452
  f"Please ensure the model directory exists and you have the necessary permissions to access it."
464
453
  )
465
454
 
455
+ if save_directory_path.absolute() == real_save_dir.absolute():
456
+ raise FileExistsError(
457
+ f"Cannot save model to '{save_directory}'. This directory already exists and contains the model files."
458
+ )
459
+
460
+ # Create a temporary directory next to the target directory
461
+ tmp_dir = save_directory + ".tmp"
462
+ try:
463
+ # Remove temporary directory if it exists from a previous failed attempt
464
+ if os.path.exists(tmp_dir):
465
+ shutil.rmtree(tmp_dir)
466
+
467
+ # First copy everything to a temporary directory
468
+ shutil.copytree(real_save_dir, tmp_dir)
469
+
470
+ # Save configs to the temporary directory
471
+ self.config.save_pretrained(tmp_dir)
472
+ if self.generation_config is not None:
473
+ self.generation_config.save_pretrained(tmp_dir)
474
+
475
+ # If everything succeeded, atomically replace the target directory
476
+ if os.path.exists(save_directory):
477
+ shutil.rmtree(save_directory)
478
+ os.rename(tmp_dir, save_directory)
479
+
480
+ except Exception as e:
481
+ # Clean up the temporary directory if anything fails
482
+ if os.path.exists(tmp_dir):
483
+ shutil.rmtree(tmp_dir)
484
+ raise e # Re-raise the exception after cleanup
485
+
466
486
  if push_to_hub:
467
487
  return super().push_to_hub(save_directory, **kwargs)
468
488
 
@@ -13,17 +13,17 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import inspect
16
- import logging
17
16
  from typing import TYPE_CHECKING, Any, Dict, Optional, Union
18
17
 
19
18
  from transformers import PretrainedConfig
20
19
 
21
20
  from ....modeling import RBLNModel
22
21
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
22
+ from ....utils.logging import get_logger
23
23
  from ...modeling_generic import RBLNModelForMaskedLM, RBLNModelForQuestionAnswering
24
24
 
25
25
 
26
- logger = logging.getLogger(__name__)
26
+ logger = get_logger(__name__)
27
27
 
28
28
  if TYPE_CHECKING:
29
29
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
17
16
 
18
17
  import torch
@@ -28,9 +27,10 @@ from transformers.models.clip.modeling_clip import CLIPTextModelOutput
28
27
  from ....diffusers.modeling_diffusers import RBLNDiffusionMixin
29
28
  from ....modeling import RBLNModel
30
29
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
30
+ from ....utils.logging import get_logger
31
31
 
32
32
 
33
- logger = logging.getLogger(__name__)
33
+ logger = get_logger(__name__)
34
34
 
35
35
  if TYPE_CHECKING:
36
36
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, CLIPTextModel
@@ -38,34 +38,188 @@ from .decoderonly_architecture import (
38
38
  logger = get_logger()
39
39
 
40
40
  if TYPE_CHECKING:
41
- from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PretrainedConfig
41
+ from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
42
42
 
43
43
 
44
44
  class RBLNRuntimeModel(RBLNPytorchRuntime):
45
45
  mandatory_members = ["main_input_name", "embed_tokens"]
46
46
 
47
+ def __init__(
48
+ self,
49
+ runtime: rebel.Runtime,
50
+ phase: str,
51
+ batch_size: int,
52
+ dec_attn_mask: torch.Tensor,
53
+ **kwargs: Any,
54
+ ) -> None:
55
+ super().__init__(runtime, **kwargs)
56
+ self.phase = phase
57
+ self.batch_size = batch_size
58
+
59
+ # shared tensor between prefill and decode phase
60
+ self.dec_attn_mask = dec_attn_mask
61
+
62
+ if self.phase == "prefill":
63
+ vocab_size = kwargs.pop("vocab_size")
64
+ self.max_seq_len = kwargs.pop("max_seq_len")
65
+ self.prefill_chunk_size = kwargs.pop("prefill_chunk_size")
66
+ self.output_size = [1, 1, vocab_size]
67
+ self.causal_mask = 1 - torch.triu(
68
+ torch.ones(1, 1, self.prefill_chunk_size, self.prefill_chunk_size), diagonal=1
69
+ )
70
+
47
71
  def forward(
48
72
  self,
49
- input_ids: torch.LongTensor,
50
- inputs_embeds: torch.Tensor,
51
- attention_mask: torch.Tensor,
52
- cache_position: torch.Tensor,
53
- **kwargs,
73
+ input_ids: Optional[torch.LongTensor] = None,
74
+ inputs_embeds: Optional[torch.Tensor] = None,
75
+ cache_position: torch.Tensor = None,
76
+ attention_mask: Optional[torch.Tensor] = None,
77
+ batch_idx: Optional[int] = None,
54
78
  ):
79
+ if input_ids is None and inputs_embeds is None:
80
+ raise ValueError("Either `input_ids` or `inputs_embeds` must be provided.")
81
+
55
82
  if inputs_embeds is None:
56
- inp = input_ids
83
+ inputs = input_ids
57
84
  if self.embed_tokens is not None:
58
- inp = self.embed_tokens(inp)
85
+ inputs = self.embed_tokens(inputs)
59
86
  else:
60
- inp = inputs_embeds
87
+ inputs = inputs_embeds
61
88
 
62
- return super().forward(
63
- inp,
64
- attention_mask,
89
+ if self.phase == "decode":
90
+ return self.decode_forward(
91
+ inputs,
92
+ cache_position,
93
+ attention_mask=attention_mask,
94
+ )
95
+ else:
96
+ return self.prefill_forward(inputs, cache_position, attention_mask, batch_idx)
97
+
98
+ def decode_forward(
99
+ self,
100
+ inputs: torch.Tensor,
101
+ cache_position: torch.Tensor = None,
102
+ attention_mask: Optional[torch.Tensor] = None,
103
+ ) -> torch.FloatTensor:
104
+ batch_size = inputs.shape[0]
105
+ if batch_size != self.batch_size:
106
+ raise RuntimeError(
107
+ f"Batch size mismatch: got {batch_size}, expected {self.batch_size} (compiled batch size)."
108
+ )
109
+
110
+ if batch_size != cache_position.shape[0]:
111
+ raise RuntimeError(f"Cache position size mismatch: got {cache_position.shape[0]}, expected {batch_size}.")
112
+
113
+ if attention_mask is None:
114
+ for b_idx in range(batch_size):
115
+ decoding_step = cache_position[b_idx].item()
116
+ if not (0 <= decoding_step < self.dec_attn_mask.shape[-1]):
117
+ raise ValueError(
118
+ f"Decoding step {decoding_step} out of bounds for attention mask with shape {self.dec_attn_mask.shape}."
119
+ )
120
+ self.dec_attn_mask[b_idx, :, :, decoding_step] = 1
121
+
122
+ logits = super().forward(
123
+ inputs,
124
+ self.dec_attn_mask if attention_mask is None else attention_mask,
65
125
  cache_position,
66
- **kwargs,
67
126
  )
68
127
 
128
+ return logits
129
+
130
+ def prefill_forward(
131
+ self,
132
+ inputs: torch.Tensor,
133
+ cache_position: torch.Tensor = None,
134
+ attention_mask: Optional[torch.Tensor] = None,
135
+ batch_idx: int = None,
136
+ ) -> torch.FloatTensor:
137
+ """
138
+ Performs chunked prefill for efficient KV-cache updates and memory optimization.
139
+ Instead of processing the entire sequence at once, the input is divided into chunks of size `prefill_chunk_size`,
140
+ and each chunk is processed sequentially. This allows for better memory utilization and compatibility with continuous batching.
141
+ """
142
+
143
+ if batch_idx is None or batch_idx >= self.batch_size:
144
+ raise RuntimeError(
145
+ f"Invalid batch_idx ({batch_idx}). It must be a non-null value less than the batch size ({self.batch_size})."
146
+ )
147
+
148
+ # Handle continuous batching in a compiled graph by extracting valid inputs
149
+ # If an attention mask is provided, select only the valid (non-masked) inputs
150
+ inputs = inputs[:, attention_mask.bool()] if attention_mask is not None else inputs
151
+
152
+ query_length = inputs.shape[1]
153
+ if query_length > self.max_seq_len:
154
+ raise ValueError(
155
+ f"Input length ({query_length}) exceeds the maximum allowed sequence length ({self.max_seq_len})."
156
+ )
157
+
158
+ # Initialize attention mask for chunked processing
159
+ chunked_attention_mask = torch.zeros(1, 1, self.prefill_chunk_size, self.max_seq_len, dtype=torch.float32)
160
+
161
+ # Buffer for storing output logits
162
+ out_buffers = [
163
+ torch.empty(
164
+ size=self.output_size,
165
+ dtype=torch.float32,
166
+ device="cpu",
167
+ )
168
+ ]
169
+
170
+ # Process input in chunks of size `prefill_chunk_size`
171
+ for step in range(0, query_length, self.prefill_chunk_size):
172
+ # Pad input and cache_position if the last chunk is smaller than `prefill_chunk_size`
173
+ if (step + self.prefill_chunk_size) > query_length:
174
+ padding_size = step + self.prefill_chunk_size - query_length
175
+ # inputs_embeds
176
+ if inputs.dim() == 3:
177
+ inputs = torch.nn.functional.pad(inputs, (0, 0, 0, padding_size))
178
+ # inputs_ids
179
+ else:
180
+ inputs = torch.nn.functional.pad(inputs, (0, padding_size))
181
+
182
+ cache_position = torch.cat(
183
+ [
184
+ cache_position,
185
+ torch.arange(
186
+ query_length,
187
+ step + self.prefill_chunk_size,
188
+ dtype=torch.int32,
189
+ ).unsqueeze(0),
190
+ ],
191
+ dim=-1,
192
+ )
193
+
194
+ # Extract the current chunk of inputs and cache positions
195
+ input_chunk = inputs[:, step : step + self.prefill_chunk_size]
196
+ cache_pos_chunk = cache_position[:, step : step + self.prefill_chunk_size]
197
+
198
+ # Update attention mask to ensure proper causal behavior
199
+ if step >= self.prefill_chunk_size:
200
+ chunked_attention_mask[:, :, :, step - self.prefill_chunk_size : step] = 1
201
+ chunked_attention_mask[:, :, :, step : step + self.prefill_chunk_size] = self.causal_mask
202
+
203
+ # Define batch position and query position
204
+ batch_position = torch.tensor(batch_idx, dtype=torch.int16)
205
+ query_position = torch.tensor((query_length - 1) % self.prefill_chunk_size, dtype=torch.int16)
206
+
207
+ # Forward pass for the current chunk
208
+ logits = super().forward(
209
+ input_chunk,
210
+ chunked_attention_mask,
211
+ cache_pos_chunk,
212
+ batch_position,
213
+ query_position,
214
+ out=out_buffers,
215
+ )
216
+
217
+ # Update decoder attention mask with processed KV-cache length from prefill phase
218
+ self.dec_attn_mask[batch_idx].fill_(0)
219
+ self.dec_attn_mask[batch_idx, :, :, :query_length] = 1
220
+
221
+ return logits
222
+
69
223
 
70
224
  @dataclass
71
225
  class RBLNDecoderOnlyOutput(ModelOutput):
@@ -103,13 +257,6 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
103
257
  self.max_seq_len = self.rbln_config.model_cfg["max_seq_len"]
104
258
  self.prefill_chunk_size = self.rbln_config.model_cfg["prefill_chunk_size"]
105
259
 
106
- self.prefill_attention_mask = torch.zeros(1, 1, self.prefill_chunk_size, self.max_seq_len, dtype=torch.float32)
107
- self.causal_mask = 1 - torch.triu(
108
- torch.ones(1, 1, self.prefill_chunk_size, self.prefill_chunk_size), diagonal=1
109
- )
110
- self.dec_attn_mask_init = torch.zeros(1, 1, 1, self.max_seq_len, dtype=torch.float32)
111
- self.dec_attn_mask = torch.zeros(self.batch_size, 1, 1, self.max_seq_len, dtype=torch.float32)
112
-
113
260
  main_input_name = self.main_input_name
114
261
  if self.rbln_config.model_cfg["use_inputs_embeds"]:
115
262
  main_input_name = "inputs_embeds"
@@ -124,11 +271,25 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
124
271
  else:
125
272
  self.embed_tokens = None
126
273
 
274
+ dec_attn_mask = torch.zeros(self.batch_size, 1, 1, self.max_seq_len, dtype=torch.float32)
127
275
  self.prefill_decoder = RBLNRuntimeModel(
128
- runtime=self.model[0], main_input_name=main_input_name, embed_tokens=self.embed_tokens
276
+ runtime=self.model[0],
277
+ main_input_name=main_input_name,
278
+ embed_tokens=self.embed_tokens,
279
+ phase="prefill",
280
+ batch_size=self.batch_size,
281
+ dec_attn_mask=dec_attn_mask,
282
+ vocab_size=self.config.vocab_size,
283
+ max_seq_len=self.max_seq_len,
284
+ prefill_chunk_size=self.prefill_chunk_size,
129
285
  )
130
286
  self.decoder = RBLNRuntimeModel(
131
- runtime=self.model[1], main_input_name=main_input_name, embed_tokens=self.embed_tokens
287
+ runtime=self.model[1],
288
+ main_input_name=main_input_name,
289
+ embed_tokens=self.embed_tokens,
290
+ phase="decode",
291
+ batch_size=self.batch_size,
292
+ dec_attn_mask=dec_attn_mask,
132
293
  )
133
294
 
134
295
  @classmethod
@@ -155,7 +316,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
155
316
  def get_quantized_model(
156
317
  cls,
157
318
  model_id: str,
158
- config: Optional[PretrainedConfig] = None,
319
+ config: Optional["PretrainedConfig"] = None,
159
320
  use_auth_token: Optional[Union[bool, str]] = None,
160
321
  revision: Optional[str] = None,
161
322
  force_download: bool = False,
@@ -496,32 +657,33 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
496
657
  generate_idx: Optional[torch.Tensor] = None,
497
658
  **kwargs,
498
659
  ) -> Tuple[torch.FloatTensor]:
499
- # prefll
660
+ """
661
+ Forward method for the RBLN-optimized model, designed for integration with the HuggingFace generate API.
662
+ For continuous batching, the prefill stage processes one batch at a time and updates the KV cache using batch_idx.
663
+ A for-loop ensures synchronization with the HuggingFace generate API.
664
+ The decoder stage operates as usual, processing inputs in batch mode.
665
+ """
666
+ # Prefll
500
667
  if cache_position is None:
501
668
  logits = []
502
- input_tensors = inputs_embeds if inputs_embeds is not None else input_ids
503
- batch_size = input_tensors.shape[0]
669
+ inputs = inputs_embeds if inputs_embeds is not None else input_ids
670
+ batch_size = inputs.shape[0]
504
671
 
505
672
  for b_idx in range(batch_size):
506
- # Transform inputs as vllm format
507
- if attention_mask is not None:
508
- input_tensor = input_tensors[b_idx : b_idx + 1, attention_mask[b_idx].bool()]
509
- else:
510
- input_tensor = input_tensors[b_idx : b_idx + 1]
511
-
512
673
  cache_position = torch.arange(0, generate_idx[b_idx].item(), dtype=torch.int32).unsqueeze(0)
513
-
514
- logit = self._forward_prefill(
515
- input_ids=input_tensor if inputs_embeds is None else None,
516
- inputs_embeds=input_tensor if inputs_embeds is not None else None,
674
+ logit = self.prefill_decoder(
675
+ input_ids=inputs[b_idx : b_idx + 1] if inputs_embeds is None else None,
676
+ inputs_embeds=inputs[b_idx : b_idx + 1] if inputs_embeds is not None else None,
677
+ attention_mask=attention_mask[b_idx] if attention_mask is not None else None,
517
678
  cache_position=cache_position,
518
679
  batch_idx=b_idx,
519
680
  )
520
681
  logits.append(logit)
682
+
521
683
  logits = torch.cat(logits, dim=0)
522
- # decoder
684
+ # Decoder
523
685
  else:
524
- logits = self._forward_decoder(
686
+ logits = self.decoder(
525
687
  input_ids=input_ids,
526
688
  inputs_embeds=inputs_embeds,
527
689
  cache_position=cache_position,
@@ -531,119 +693,3 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
531
693
  logits=logits,
532
694
  generate_idx=generate_idx,
533
695
  )
534
-
535
- def _forward_prefill(
536
- self,
537
- input_ids: torch.LongTensor = None,
538
- inputs_embeds: torch.Tensor = None,
539
- cache_position: torch.Tensor = None,
540
- batch_idx: int = None,
541
- ) -> torch.FloatTensor:
542
- if batch_idx is None or batch_idx >= self.batch_size:
543
- raise RuntimeError(
544
- f"Invalid batch_idx ({batch_idx}). It must be a non-null value less than the batch size ({self.batch_size})."
545
- )
546
-
547
- out_buffers = [
548
- torch.empty(
549
- size=[
550
- 1,
551
- 1,
552
- self.config.vocab_size,
553
- ],
554
- dtype=torch.float32,
555
- device="cpu",
556
- )
557
- ]
558
-
559
- input_tensors = inputs_embeds if inputs_embeds is not None else input_ids
560
- query_length = input_tensors.shape[1]
561
- if query_length > self.max_seq_len:
562
- raise ValueError(
563
- f"Input length ({query_length}) exceeds the maximum allowed sequence length ({self.max_seq_len})."
564
- )
565
-
566
- _attention_mask = self.prefill_attention_mask.clone()
567
-
568
- for step in range(0, query_length, self.prefill_chunk_size):
569
- # pad input_tensors & cache_position for prefill_chunk
570
- if (step + self.prefill_chunk_size) > query_length:
571
- pad_to_chunk = step + self.prefill_chunk_size - query_length
572
- if inputs_embeds is not None:
573
- input_tensors = torch.nn.functional.pad(input_tensors, (0, 0, 0, pad_to_chunk))
574
- else:
575
- input_tensors = torch.nn.functional.pad(input_tensors, (0, pad_to_chunk))
576
-
577
- cache_position = torch.cat(
578
- [
579
- cache_position,
580
- torch.arange(
581
- query_length,
582
- step + self.prefill_chunk_size,
583
- dtype=torch.int32,
584
- ).unsqueeze(0),
585
- ],
586
- dim=-1,
587
- )
588
-
589
- # slice input_tensor & cache_position with prefill_chunk_size
590
- _input_tensors = input_tensors[:, step : step + self.prefill_chunk_size]
591
- _cache_position = cache_position[:, step : step + self.prefill_chunk_size]
592
-
593
- # update attention_mask
594
- if step >= self.prefill_chunk_size:
595
- _attention_mask[:, :, :, step - self.prefill_chunk_size : step] = 1
596
- _attention_mask[:, :, :, step : step + self.prefill_chunk_size] = self.causal_mask
597
-
598
- query_position = (query_length - 1) % self.prefill_chunk_size
599
-
600
- logits = self.prefill_decoder(
601
- input_ids=_input_tensors.contiguous() if inputs_embeds is None else None,
602
- inputs_embeds=_input_tensors.contiguous() if inputs_embeds is not None else None,
603
- attention_mask=_attention_mask.contiguous(),
604
- cache_position=_cache_position.contiguous(),
605
- batch_position=torch.tensor(batch_idx, dtype=torch.int16),
606
- query_position=torch.tensor(query_position, dtype=torch.int16),
607
- out=out_buffers,
608
- )
609
-
610
- # update decoder_attn_mask with preprocessed kv-cache length in prefill phase
611
- self.dec_attn_mask[batch_idx] = self.dec_attn_mask_init.clone()
612
- self.dec_attn_mask[batch_idx, :, :, :query_length] = 1
613
-
614
- return logits
615
-
616
- def _forward_decoder(
617
- self,
618
- input_ids: torch.LongTensor = None,
619
- inputs_embeds: torch.Tensor = None,
620
- cache_position: torch.Tensor = None,
621
- ) -> torch.FloatTensor:
622
- input_tensors = inputs_embeds if inputs_embeds is not None else input_ids
623
- if input_tensors is None:
624
- raise ValueError("Either `input_ids` or `inputs_embeds` must be provided.")
625
-
626
- batch_size = input_tensors.shape[0]
627
- if batch_size != self.batch_size:
628
- raise RuntimeError(
629
- f"Batch size mismatch: got {batch_size}, expected {self.batch_size} (compiled batch size)."
630
- )
631
-
632
- if batch_size != cache_position.shape[0]:
633
- raise RuntimeError(f"Cache position size mismatch: got {cache_position.shape[0]}, expected {batch_size}.")
634
-
635
- for b_idx in range(batch_size):
636
- decoding_step = cache_position[b_idx].item()
637
- if not (0 <= decoding_step < self.dec_attn_mask.shape[-1]):
638
- raise ValueError(
639
- f"Decoding step {decoding_step} out of bounds for attention mask with shape {self.dec_attn_mask.shape}."
640
- )
641
- self.dec_attn_mask[b_idx, :, :, decoding_step] = 1
642
- logits = self.decoder(
643
- input_ids=input_tensors.contiguous() if inputs_embeds is None else None,
644
- inputs_embeds=input_tensors.contiguous() if inputs_embeds is not None else None,
645
- attention_mask=self.dec_attn_mask.contiguous(),
646
- cache_position=cache_position.contiguous(),
647
- )
648
-
649
- return logits
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Union
17
16
 
18
17
  from transformers import AutoModelForDepthEstimation
@@ -20,9 +19,10 @@ from transformers.modeling_outputs import DepthEstimatorOutput
20
19
 
21
20
  from ....modeling import RBLNModel
22
21
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
22
+ from ....utils.logging import get_logger
23
23
 
24
24
 
25
- logger = logging.getLogger(__name__)
25
+ logger = get_logger(__name__)
26
26
 
27
27
  if TYPE_CHECKING:
28
28
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PretrainedConfig
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import inspect
16
- import logging
17
16
  from pathlib import Path
18
17
  from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Union
19
18
 
@@ -26,14 +25,14 @@ from transformers import (
26
25
  PreTrainedModel,
27
26
  )
28
27
  from transformers.modeling_outputs import BaseModelOutputWithPooling
29
- from transformers.models.llava_next.modeling_llava_next import LlavaNextCausalLMOutputWithPast
30
28
 
31
29
  from ....modeling import RBLNModel
32
30
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
31
+ from ....utils.logging import get_logger
33
32
  from ..decoderonly.modeling_decoderonly import RBLNDecoderOnlyOutput
34
33
 
35
34
 
36
- logger = logging.getLogger(__name__)
35
+ logger = get_logger(__name__)
37
36
 
38
37
  if TYPE_CHECKING:
39
38
  from transformers import (
@@ -337,7 +336,7 @@ class RBLNLlavaNextForConditionalGeneration(RBLNModel):
337
336
  generate_idx: Optional[torch.Tensor] = None,
338
337
  batch_idx: Optional[int] = None,
339
338
  **kwargs,
340
- ) -> Union[Tuple, LlavaNextCausalLMOutputWithPast]:
339
+ ) -> Union[Tuple, RBLNDecoderOnlyOutput]:
341
340
  vision_feature_layer = (
342
341
  vision_feature_layer if vision_feature_layer is not None else self.config.vision_feature_layer
343
342
  )
@@ -378,7 +377,7 @@ class RBLNLlavaNextForConditionalGeneration(RBLNModel):
378
377
  inputs_embeds = [inputs_embeds[i : i + 1, attention_mask[i].bool()] for i in range(batch_size)]
379
378
  for batch_idx in range(batch_size):
380
379
  generate_idx[batch_idx] = inputs_embeds[batch_idx].shape[-2]
381
- logit = self.language_model._forward_prefill(
380
+ logit = self.language_model.prefill_decoder(
382
381
  inputs_embeds=inputs_embeds[batch_idx],
383
382
  batch_idx=batch_idx,
384
383
  cache_position=torch.arange(
@@ -390,15 +389,13 @@ class RBLNLlavaNextForConditionalGeneration(RBLNModel):
390
389
 
391
390
  logits.append(logit)
392
391
  logits = torch.cat(logits, dim=0)
393
- outputs = RBLNDecoderOnlyOutput(logits=logits, generate_idx=generate_idx)
394
392
  else:
395
- outputs: RBLNDecoderOnlyOutput = self.language_model(
393
+ logits = self.language_model.decoder(
396
394
  inputs_embeds=inputs_embeds,
397
395
  cache_position=cache_position,
398
- generate_idx=generate_idx,
399
396
  )
400
397
 
401
- return outputs
398
+ return RBLNDecoderOnlyOutput(logits=logits, generate_idx=generate_idx)
402
399
 
403
400
  # Almost copied from : https://github.com/huggingface/transformers/blob/6b550462139655d488d4c663086a63e98713c6b9/src/transformers/models/llava_next/modeling_llava_next.py
404
401
  def pack_image_features(self, image_features, image_sizes, vision_feature_select_strategy, image_newline=None):
@@ -13,30 +13,25 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import inspect
16
- import logging
17
16
  from abc import ABC
18
17
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
19
18
 
20
19
  import rebel
21
20
  import torch
22
21
  from rebel.compile_context import CompileContext
23
- from transformers import AutoModelForSeq2SeqLM, GenerationConfig, PretrainedConfig, PreTrainedModel
22
+ from transformers import AutoModelForSeq2SeqLM, PretrainedConfig, PreTrainedModel
24
23
  from transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput
25
24
 
26
25
  from ....modeling import RBLNModel
27
26
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
27
+ from ....utils.logging import get_logger
28
28
  from ....utils.runtime_utils import RBLNPytorchRuntime
29
29
 
30
30
 
31
- logger = logging.getLogger(__name__)
31
+ logger = get_logger(__name__)
32
32
 
33
33
  if TYPE_CHECKING:
34
- from transformers import (
35
- AutoFeatureExtractor,
36
- AutoProcessor,
37
- AutoTokenizer,
38
- PretrainedConfig,
39
- )
34
+ from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, GenerationConfig, PretrainedConfig
40
35
 
41
36
 
42
37
  class RBLNRuntimeEncoder(RBLNPytorchRuntime):
@@ -50,9 +45,50 @@ class RBLNRuntimeEncoder(RBLNPytorchRuntime):
50
45
  class RBLNRuntimeDecoder(RBLNPytorchRuntime):
51
46
  mandatory_members = ["main_input_name"]
52
47
 
53
- def forward(self, *args: List[torch.Tensor], **kwargs: Dict[str, torch.Tensor]):
54
- outputs = super().forward(*args, **kwargs)
55
- return Seq2SeqLMOutput(logits=outputs)
48
+ def __init__(
49
+ self,
50
+ runtime: rebel.Runtime,
51
+ batch_size: int,
52
+ dec_max_seq_len: int,
53
+ **kwargs: Any,
54
+ ) -> None:
55
+ super().__init__(runtime, **kwargs)
56
+ self.batch_size = batch_size
57
+ self.dec_max_seq_len = dec_max_seq_len
58
+
59
+ def forward(
60
+ self,
61
+ decoder_input_ids: Optional[torch.LongTensor] = None,
62
+ attention_mask: Optional[torch.FloatTensor] = None,
63
+ decoder_attention_mask: Optional[torch.BoolTensor] = None,
64
+ cache_position: Optional[torch.Tensor] = None,
65
+ **kwargs,
66
+ ) -> Tuple[torch.FloatTensor]:
67
+ batch_size = decoder_input_ids.shape[0]
68
+ if batch_size != self.batch_size:
69
+ raise RuntimeError(
70
+ f"Batch size mismatch: got {batch_size}, expected {self.batch_size} (compiled batch size)."
71
+ )
72
+
73
+ if batch_size != cache_position.shape[0]:
74
+ raise RuntimeError(f"Cache position size mismatch: got {cache_position.shape[0]}, expected {batch_size}.")
75
+
76
+ for b_idx in range(self.batch_size):
77
+ decoding_step = cache_position[b_idx].item()
78
+ if not (0 <= decoding_step < self.dec_max_seq_len):
79
+ raise ValueError(
80
+ f"Decoding step {decoding_step} out of bounds for attention mask with shape {self.dec_attn_mask.shape}."
81
+ )
82
+ decoder_attention_mask[b_idx, : decoding_step + 1] = 1
83
+
84
+ lm_logits = super().forward(
85
+ decoder_input_ids,
86
+ decoder_attention_mask,
87
+ attention_mask,
88
+ cache_position,
89
+ )
90
+
91
+ return Seq2SeqLMOutput(logits=lm_logits)
56
92
 
57
93
 
58
94
  class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
@@ -72,8 +108,15 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
72
108
  auto_model_class = AutoModelForSeq2SeqLM
73
109
 
74
110
  def __post_init__(self, **kwargs):
75
- self.encoder = RBLNRuntimeEncoder(runtime=self.model[0], main_input_name="input_ids")
76
- self.decoder = RBLNRuntimeDecoder(runtime=self.model[1], main_input_name="input_ids")
111
+ batch_size = self.rbln_config.model_cfg["batch_size"]
112
+ dec_max_seq_len = self.rbln_config.model_cfg["dec_max_seq_len"]
113
+ self.encoder = RBLNRuntimeEncoder(
114
+ runtime=self.model[0],
115
+ main_input_name="input_ids",
116
+ )
117
+ self.decoder = RBLNRuntimeDecoder(
118
+ runtime=self.model[1], main_input_name="input_ids", batch_size=batch_size, dec_max_seq_len=dec_max_seq_len
119
+ )
77
120
 
78
121
  @classmethod
79
122
  @torch.inference_mode()
@@ -304,46 +347,24 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
304
347
 
305
348
  def forward(
306
349
  self,
307
- input_ids: torch.LongTensor = None,
350
+ decoder_input_ids: torch.LongTensor = None,
308
351
  cache_position: Union[List[torch.Tensor], torch.Tensor] = None,
309
352
  **kwargs,
310
353
  ) -> Tuple[torch.FloatTensor]:
311
354
  # common decoder
312
355
  cache_position = torch.full((self.rbln_config.model_cfg["batch_size"], 1), cache_position, dtype=torch.int32)
313
- logits = self._forward_decoder(input_ids=input_ids, cache_position=cache_position, **kwargs).logits
356
+ logits = self.decoder(decoder_input_ids=decoder_input_ids, cache_position=cache_position, **kwargs).logits
314
357
 
315
358
  return Seq2SeqLMOutput(
316
359
  logits=logits,
317
360
  )
318
361
 
319
- def _forward_decoder(
320
- self,
321
- attention_mask: Optional[torch.FloatTensor] = None,
322
- decoder_input_ids: Optional[torch.LongTensor] = None,
323
- decoder_attention_mask: Optional[torch.BoolTensor] = None,
324
- cache_position: Optional[torch.Tensor] = None,
325
- **kwargs,
326
- ) -> Tuple[torch.FloatTensor]:
327
- dec_attention_mask = decoder_attention_mask.clone()
328
- for b_idx in range(self.rbln_config.model_cfg["batch_size"]):
329
- dec_attention_mask[b_idx, : cache_position[b_idx] + 1] = 1
330
-
331
- decoder_output = self.decoder(
332
- input_ids=decoder_input_ids,
333
- attention_mask=dec_attention_mask,
334
- encoder_attention_mask=attention_mask,
335
- cache_position=cache_position,
336
- )
337
- lm_logits = decoder_output.logits
338
-
339
- return Seq2SeqLMOutput(logits=lm_logits)
340
-
341
362
  def _prepare_encoder_decoder_kwargs_for_generation(
342
363
  self,
343
364
  inputs_tensor: torch.Tensor,
344
365
  model_kwargs,
345
366
  model_input_name: Optional[str] = None,
346
- generation_config: Optional[GenerationConfig] = None,
367
+ generation_config: Optional["GenerationConfig"] = None,
347
368
  ) -> Dict[str, Any]:
348
369
  # 1. get encoder
349
370
  encoder = self.get_encoder()
@@ -373,6 +394,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
373
394
  )
374
395
 
375
396
  # 3. make sure that encoder returns `ModelOutput`
397
+ model_input_name = model_input_name if model_input_name is not None else self.main_input_name
376
398
  encoder_kwargs["return_dict"] = True
377
399
  encoder_kwargs["output_hidden_states"] = False
378
400
  encoder_kwargs["output_attentions"] = False
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from typing import TYPE_CHECKING, Any, Dict, Union
17
16
 
18
17
  import torch
@@ -21,9 +20,10 @@ from transformers.modeling_outputs import CausalLMOutput
21
20
 
22
21
  from ....modeling import RBLNModel
23
22
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
23
+ from ....utils.logging import get_logger
24
24
 
25
25
 
26
- logger = logging.getLogger(__name__)
26
+ logger = get_logger(__name__)
27
27
 
28
28
  if TYPE_CHECKING:
29
29
  from transformers import (
@@ -32,6 +32,8 @@ Modified from `transformers.models.whisper.generation_whisper.py`
32
32
  """
33
33
 
34
34
  import torch
35
+ import transformers
36
+ from packaging import version
35
37
  from transformers import GenerationMixin
36
38
  from transformers.models.whisper.generation_whisper import WhisperGenerationMixin
37
39
 
@@ -47,17 +49,12 @@ class RBLNWhisperGenerationMixin(WhisperGenerationMixin, GenerationMixin):
47
49
  self, seek_outputs, decoder_input_ids, return_token_timestamps, generation_config, *args, **kwargs
48
50
  ):
49
51
  # remove all previously passed decoder input ids
50
-
51
- ################################## rbln_change for 4.40.2###################################
52
- # 4.40.2 has no keyword shortform, it has seperate codes from generation_fallback
53
- is_shortform = kwargs.get("is_shortform", False)
54
- start_idx = decoder_input_ids.shape[-1] if not is_shortform else torch.tensor(0)
52
+ # should happen only if it is the first generated segment
53
+ start_idx = decoder_input_ids.shape[-1]
55
54
 
56
55
  if isinstance(seek_outputs, torch.Tensor):
57
- seek_outputs = seek_outputs[:, start_idx:]
58
- return seek_outputs, seek_outputs
56
+ return seek_outputs[:, start_idx:], seek_outputs
59
57
 
60
- ############## rbln validation#############
61
58
  if return_token_timestamps and not self.rbln_token_timestamps:
62
59
  raise RuntimeError(
63
60
  "To use .generate() with return_token_timestamps=True, the model must be compiled with rbln_token_timestamps=True. "
@@ -67,11 +64,19 @@ class RBLNWhisperGenerationMixin(WhisperGenerationMixin, GenerationMixin):
67
64
 
68
65
  if return_token_timestamps and hasattr(generation_config, "alignment_heads"):
69
66
  num_frames = getattr(generation_config, "num_frames", None)
70
- seek_outputs["token_timestamps"] = self._extract_token_timestamps(
71
- seek_outputs, generation_config.alignment_heads, num_frames=num_frames
72
- )
73
- seek_outputs["token_timestamps"] = seek_outputs["token_timestamps"][:, start_idx:]
74
-
67
+ if version.parse(transformers.__version__) >= version.parse("4.46.0"):
68
+ seek_outputs["token_timestamps"] = self._extract_token_timestamps(
69
+ seek_outputs,
70
+ generation_config.alignment_heads,
71
+ num_frames=num_frames,
72
+ num_input_ids=decoder_input_ids.shape[-1],
73
+ )
74
+ else:
75
+ seek_outputs["token_timestamps"] = self._extract_token_timestamps(
76
+ seek_outputs,
77
+ generation_config.alignment_heads,
78
+ num_frames=num_frames,
79
+ )
75
80
  seek_outputs["sequences"] = seek_outputs["sequences"][:, start_idx:]
76
81
 
77
82
  def split_by_batch_index(values, key, batch_idx):
@@ -87,15 +92,12 @@ class RBLNWhisperGenerationMixin(WhisperGenerationMixin, GenerationMixin):
87
92
 
88
93
  sequence_tokens = seek_outputs["sequences"]
89
94
 
90
- ##################################### thkim change #############################################
91
95
  valid_seekoutputs = []
92
96
  for k, v in seek_outputs.items():
93
97
  if v is not None and len(v) > 0 and v[0] is not None:
94
98
  valid_seekoutputs.append((k, v))
95
99
  seek_outputs = [
96
- {k: split_by_batch_index(v, k, i) for k, v in valid_seekoutputs}
97
- # {k: split_by_batch_index(v, k, i, is_shortform) for k, v in seek_outputs.items()}
98
- for i in range(sequence_tokens.shape[0])
100
+ {k: split_by_batch_index(v, k, i) for k, v in valid_seekoutputs} for i in range(sequence_tokens.shape[0])
99
101
  ]
100
102
 
101
103
  return sequence_tokens, seek_outputs
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import inspect
16
- import logging
17
16
  from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
18
17
 
19
18
  import rebel
@@ -30,12 +29,13 @@ from transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput
30
29
 
31
30
  from ....modeling import RBLNModel
32
31
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
32
+ from ....utils.logging import get_logger
33
33
  from ....utils.runtime_utils import RBLNPytorchRuntime
34
34
  from .generation_whisper import RBLNWhisperGenerationMixin
35
35
  from .whisper_architecture import WhisperWrapper
36
36
 
37
37
 
38
- logger = logging.getLogger(__name__)
38
+ logger = get_logger(__name__)
39
39
 
40
40
  if TYPE_CHECKING:
41
41
  from transformers import AutoFeatureExtractor, AutoProcessor, PretrainedConfig, PreTrainedModel
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import inspect
16
- import logging
17
16
  from typing import TYPE_CHECKING, Optional, Union
18
17
 
19
18
  import torch
@@ -21,9 +20,10 @@ from transformers import PretrainedConfig
21
20
 
22
21
  from ....modeling import RBLNModel
23
22
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
23
+ from ....utils.logging import get_logger
24
24
 
25
25
 
26
- logger = logging.getLogger(__name__)
26
+ logger = get_logger(__name__)
27
27
 
28
28
  if TYPE_CHECKING:
29
29
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
@@ -30,14 +30,15 @@
30
30
  Refer to huggingface/optimum/blob/4fdeea77d71e79451ba53e0c1f9d8f37e9704268/optimum/utils/save_utils.py
31
31
  """
32
32
 
33
- import logging
34
33
  from pathlib import Path
35
34
  from typing import List, Union
36
35
 
37
36
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
38
37
 
38
+ from .logging import get_logger
39
39
 
40
- logger = logging.getLogger(__name__)
40
+
41
+ logger = get_logger(__name__)
41
42
 
42
43
 
43
44
  def maybe_load_preprocessors(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.2.1a2
3
+ Version: 0.2.1a4
4
4
  Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -1,23 +1,23 @@
1
1
  optimum/rbln/__init__.py,sha256=sLCjJu_MLZEKDOwHIlJP4u4GzGZx-1kqHTYGw5B4xDg,6096
2
- optimum/rbln/__version__.py,sha256=RUdGjwBqCynJjA53NyAfXHitjfqAoWaGNCXiO_9cNsw,413
3
- optimum/rbln/modeling.py,sha256=OQGLkzlE3vD3O-ZeE1Z0jK-QCqWy1V46pSCOdmehFTI,8267
4
- optimum/rbln/modeling_base.py,sha256=sU5Tr3SmhQZPsbKz5xo-FqMU1gC4Xd4m9xZVIx2NY7I,20359
2
+ optimum/rbln/__version__.py,sha256=gOdVXc7MBdh3lFQDUicnX79h1S87OceEEbF0RhUUT8g,413
3
+ optimum/rbln/modeling.py,sha256=REImAAKO82CqSNABR-9E1jJEsWch9amSOwOOQhFEYLY,8283
4
+ optimum/rbln/modeling_base.py,sha256=_5M8hVySDwCJ6qfeku2_nJAPu_5JLfAUu3HO1bc3ALM,21098
5
5
  optimum/rbln/modeling_config.py,sha256=7104bxmrvKW4Q6XTruQayiIGl8GHDFmPkJ3cknMIInE,11335
6
6
  optimum/rbln/diffusers/__init__.py,sha256=68FTAMpbbMflm8qiSqfM5J2_gFb3iU3fng6AL0TG47A,2913
7
7
  optimum/rbln/diffusers/modeling_diffusers.py,sha256=E1x-iOKEJCUB6ml0RgtFEVPPk6J6pqEF-JTEyOZzOyc,14928
8
8
  optimum/rbln/diffusers/models/__init__.py,sha256=aSL5_yd-y8Q6DxNvfQ-yl-BUNyMzI1P6AikjQMKZzpI,1357
9
- optimum/rbln/diffusers/models/controlnet.py,sha256=AWX_ZFpzyNFEs-B7xHaXnO9grYkNYlLFzFDPdSoKexs,10494
9
+ optimum/rbln/diffusers/models/controlnet.py,sha256=EM_HlzCdaZdnnK0oGpY2fQeigPqHhlwh4NHCzlmoumI,10512
10
10
  optimum/rbln/diffusers/models/autoencoders/__init__.py,sha256=nMfnwEwuOje-qKofAw-uOWUWcYV_YvnaN68IGfDdqHg,645
11
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py,sha256=M4GyaBlUyXetieaGQvfK3pDP5FIvy6hDTMTfOT2ymz8,9201
12
- optimum/rbln/diffusers/models/autoencoders/vae.py,sha256=Ejazs1JcSr-l0fyYQAtqQlTufbnCmlR_GZmadMkbNn4,2494
11
+ optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py,sha256=rCbC32bJnfXtsLdVvNVVHpRAkCYy6jeCSwIZ-JSReWk,9220
12
+ optimum/rbln/diffusers/models/autoencoders/vae.py,sha256=A-F2TRJ2vL4gNXiMT_hRGeanIFKWxJ1QaKmYVp41rwI,2513
13
13
  optimum/rbln/diffusers/models/transformers/__init__.py,sha256=TEhARgQJx_NUZzI6M8gt3aWbdzmLHnM6FMSQd9M9zCk,654
14
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=pnzKO7EXuEyVWIxvDMl2R0EkT_BfeMI8DwubwkGBvNY,7289
14
+ optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=n_krmMgiRxWrG--567PNpk58EG_X7x7H4gidIkRvwjo,7308
15
15
  optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
16
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=tf0ILmn2t-D0omITpF5T1A1UnJJ3VA7sfluVXuQWIks,14002
16
+ optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=Z0-eAZw1Gah24y6uOO5m9-GRruBppCSdV2NQZLNtBaI,14021
17
17
  optimum/rbln/diffusers/pipelines/__init__.py,sha256=i8AQJSoV9clLTill7wP5ECci6E7lC2gBaNuqfhYklZk,2469
18
18
  optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
19
- optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=mwQRhKuEsuoFn-49UKobd0a7nNIzPAqRLwZ6ftXCr-s,4094
20
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py,sha256=NFlE6-gdK1QgfQy5F7IF5ZAxpwDethOv1AaTOVDxtbU,35163
19
+ optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=JWKtnZYBIfgmbAo0SLFIvHBQCv2BPSFNvpcdjG4GUOY,4113
20
+ optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py,sha256=dGdw5cwJLS4CLv6IHskk5ZCcPgS7UDuHKbfOZ8ojNUs,35187
21
21
  optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py,sha256=7xCiXrH4ToCTHohVGFXqO7_f9G8HShYaHgZxoMZARkQ,33664
22
22
  optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py,sha256=Gzt2wg4dgFg0TV3Bu0cs8Xru3wVrxWUxxgciwZ-QKLE,44755
23
23
  optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py,sha256=RfwxNX_zQWFtvvFQJ5bt3qtHbdYdQV_3XLHm9WYCKOs,46084
@@ -49,14 +49,14 @@ optimum/rbln/transformers/models/bart/__init__.py,sha256=32HPe0_GIO0hp9U464Iv6Jd
49
49
  optimum/rbln/transformers/models/bart/bart_architecture.py,sha256=dTkgMpNkyh4vT_mZU5tQ5bvH_lRZfRjaJ1gIHvJkmgs,5479
50
50
  optimum/rbln/transformers/models/bart/modeling_bart.py,sha256=ADRbE-5N3xJ60AzzjJ4BZs_THmB71qs4XTr9iFqsEqE,5667
51
51
  optimum/rbln/transformers/models/bert/__init__.py,sha256=YVV7k_laU6yJBawZrgjIWjRmIF-Y4oQQHqyf8lsraQs,691
52
- optimum/rbln/transformers/models/bert/modeling_bert.py,sha256=flzCLWqCaCnQLfWCVSRmQke_CEAXzcG0DOoUM8EAqkg,4649
52
+ optimum/rbln/transformers/models/bert/modeling_bert.py,sha256=-nv-sgmHkyHQIoQvF8-lXOJiL4eaa1pq8MpdN4uRi9M,4668
53
53
  optimum/rbln/transformers/models/clip/__init__.py,sha256=ssJqlEt318ti2QaEakGh_tO3Ap1VSPCVF-ymUuvjAJs,698
54
- optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=LGytQ33WGV2qqXnj_1dMiPN63ytL1JlNQlc3aXaG1bA,5705
54
+ optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=E1QfVNq1sTCp7uvuha1ZPfXMwvMTkGV9L4oFdmy1w4g,5724
55
55
  optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=pDogsdpJKKB5rqnVFrRjwfhUvOSV-jZ3oARMsqSvOOQ,665
56
56
  optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=BjQHwoPZfM-KUQzxm4AU-PdmoMgLxnCG6kfSpGjUvrk,36578
57
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=mAgRRMGVHvTUjJBDlmUOjNhSNjprKSD7tLeFknrx0Rw,25810
57
+ optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=2OO8MEgFgcl1VPrQXxqkvmRJJEuFdexwu8XqbHDbR6Y,27609
58
58
  optimum/rbln/transformers/models/dpt/__init__.py,sha256=gP1tkR3XMNlHq1GT87ugIVvb2o_1eAUg1JaniXjy1Lw,651
59
- optimum/rbln/transformers/models/dpt/modeling_dpt.py,sha256=HS_f7bL2CvbWP_7NGMzPYb0GdHRE2xHF0e1DkzlRdRE,3411
59
+ optimum/rbln/transformers/models/dpt/modeling_dpt.py,sha256=ZsS2SOiqcA4azULB-WFEMQZbgIoOyVUKqVKqrw_tWzA,3430
60
60
  optimum/rbln/transformers/models/exaone/__init__.py,sha256=zYH_5tVa8-juEdsOIky7I33WSC3Zuhoq1upI0OHYeVw,859
61
61
  optimum/rbln/transformers/models/exaone/exaone_architecture.py,sha256=thzWLVz3eUcst4IPiOavta5QeXZw7JQwwfdIzQ_x6Ns,3029
62
62
  optimum/rbln/transformers/models/exaone/modeling_exaone.py,sha256=WjyH8PmsMljSea7kJn_Cq1FJ96OXwXAoU7hv2Q8zUnI,1747
@@ -70,7 +70,7 @@ optimum/rbln/transformers/models/llama/__init__.py,sha256=jo_j_eIrHYGNEhR5lb6g3r
70
70
  optimum/rbln/transformers/models/llama/llama_architecture.py,sha256=S7MCPfyjG5eUqgaS-QNBB0ApUD6wnb5fR0RHq7k7-pA,728
71
71
  optimum/rbln/transformers/models/llama/modeling_llama.py,sha256=Z3iony7icoFhRQ11MAuFx9UF03uJCsvJQZ6bxHXlrgk,1530
72
72
  optimum/rbln/transformers/models/llava_next/__init__.py,sha256=VLieyWm-UgvuNxw9B38wrL1Jsa09NBDX_ebABmdpTbs,670
73
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py,sha256=shmL1ATB-gfRC-AFW1_GgHSUtbT-ZFQbecMEpVBgn-o,26379
73
+ optimum/rbln/transformers/models/llava_next/modeling_llava_next.py,sha256=w_plsUOzxnhkQBhQeUqW9aJqGCvCvLtsx0XNKYjOprU,26203
74
74
  optimum/rbln/transformers/models/midm/__init__.py,sha256=UJSaErsF-z6dZERIS143WTaygffZyzEGqoQ2ZPDiM-c,855
75
75
  optimum/rbln/transformers/models/midm/midm_architecture.py,sha256=mueRmMGX6UplZb0C0RFdUOa9lsNH8YJHV6rYrDLOdlQ,5302
76
76
  optimum/rbln/transformers/models/midm/modeling_midm.py,sha256=GG25BozEZriAL-OPFGpzOjyDtSFB-NfeiLJTDAqxe20,1734
@@ -84,19 +84,19 @@ optimum/rbln/transformers/models/qwen2/__init__.py,sha256=RAMWc21W_2I6DH9xBjeNxP
84
84
  optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=9-aFDvjMzPNUyGOz0qo33RE18bUFGYZ3Wt_68zb5uJY,1530
85
85
  optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
86
86
  optimum/rbln/transformers/models/seq2seq/__init__.py,sha256=EmEMV4rOYqKyruX85d0fR73-b8N6BSD6CPcbpYdBuVk,651
87
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py,sha256=4GHlLf6xm7a8YegYVX-zhIqk4ouwHCzQuj8Z-jXSFJw,15407
87
+ optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py,sha256=HG_-8ufRWIls67imU1547V0bk9FUWC0haOBL7eyRV6k,16365
88
88
  optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py,sha256=15yoF-wyhcLcK-Z2MOUmyPlkOMNTVOJ013uBepqtpxA,18387
89
89
  optimum/rbln/transformers/models/t5/__init__.py,sha256=1skR1RmnG62WTAP3-F5P1x-V_ReFhMyirH3u56vWwvc,675
90
90
  optimum/rbln/transformers/models/t5/modeling_t5.py,sha256=MFs-3yYviV1QqSpsTB2GarTEs9wGH5AYofksLQLMBXg,8043
91
91
  optimum/rbln/transformers/models/t5/t5_architecture.py,sha256=kkjErS42mW2jv5O_xL7BaKobvvqy7BGmYOowKyHakvI,7189
92
92
  optimum/rbln/transformers/models/wav2vec2/__init__.py,sha256=YpgA0K-vyg9veh0eL_jxauosbRpb_kpGKHvvQLBspKM,649
93
- optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py,sha256=Ws0tw2j9Mp8BREP6nI-Ann_U0rhkqofaQFCKoepDYRA,3837
93
+ optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py,sha256=JYJmV52j6cBwim4RanVJryfKnV80V96ol0A-oR6o7cg,3856
94
94
  optimum/rbln/transformers/models/whisper/__init__.py,sha256=ktnNe5ri3ycCWZ_W_voFB9y9-vgGgxS1X9s8LBRZmWc,665
95
- optimum/rbln/transformers/models/whisper/generation_whisper.py,sha256=eT4pEmahW4VqDKP6VGuuOjiBnoG1j3xkCNR4U2FKXDI,4707
96
- optimum/rbln/transformers/models/whisper/modeling_whisper.py,sha256=EIiP3DVovFNDMEuE_EaUpX_uFoAlsFVnd5svawp-VGQ,15821
95
+ optimum/rbln/transformers/models/whisper/generation_whisper.py,sha256=GIHTca3b1VtW81kp7BzKQ7f77c2t9OsEsbZetripgDo,4582
96
+ optimum/rbln/transformers/models/whisper/modeling_whisper.py,sha256=0nBADNxE0A1ozBbRutTBvxpo_Y1qkOycT_zronkN-ZU,15840
97
97
  optimum/rbln/transformers/models/whisper/whisper_architecture.py,sha256=eP3UgkwCRaaFjc5Jc4ZEiWxr3-L7oJx9KzpJ7eFkwUs,13158
98
98
  optimum/rbln/transformers/models/xlm_roberta/__init__.py,sha256=fC7iNcdxBZ_6eOF2snStmf8r2M3c8O_-XcXnQEaHQCE,653
99
- optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py,sha256=jqQXBl0pezmd7eXallgWWvS_a6jXFVHBVA0ily5USPg,4748
99
+ optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py,sha256=lKSeL3RUwIyfuca2jZ6SFV4N59EJS4UD59JMUfh3BiA,4767
100
100
  optimum/rbln/transformers/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
101
101
  optimum/rbln/transformers/utils/rbln_quantization.py,sha256=gwBVHf97sQgPNmGa0wq87E8mPyrtXYhMnO4X4sKp3c8,7639
102
102
  optimum/rbln/utils/__init__.py,sha256=ieDBT2VFTt2E0M4v_POLBpuGW9LxSydpb_DuPd6PQqc,712
@@ -106,9 +106,9 @@ optimum/rbln/utils/import_utils.py,sha256=ec-tISKIjUPHIfjzj6p-w78NVejWVBohb59f7J
106
106
  optimum/rbln/utils/logging.py,sha256=VKKBmlQSdg6iZCGmAXaWYiW67K84jyp1QJhLQSSjPPE,3453
107
107
  optimum/rbln/utils/model_utils.py,sha256=DfD_Z2qvZHqcddXqnzTM1AN8khanj3-DXK2lJvVxDvs,1278
108
108
  optimum/rbln/utils/runtime_utils.py,sha256=5-DYniyP59nx-mrrbi7AqA77L85b4Cm5oLpaxidSyss,3699
109
- optimum/rbln/utils/save_utils.py,sha256=W5ON-90xLcz1suFlZwOhmYB5Mf9XSTu00xrfTfyL88U,3608
109
+ optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
110
110
  optimum/rbln/utils/submodule.py,sha256=oZoGrItB8WqY4i-K9WJPlLlcLohc1YGB9OHB8_XZw3A,4071
111
- optimum_rbln-0.2.1a2.dist-info/METADATA,sha256=hwr_UclFUgkNbtwCanMy_5lF56FPX8WG0GGz3B0cJCc,5300
112
- optimum_rbln-0.2.1a2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
113
- optimum_rbln-0.2.1a2.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
114
- optimum_rbln-0.2.1a2.dist-info/RECORD,,
111
+ optimum_rbln-0.2.1a4.dist-info/METADATA,sha256=r7LWvGMq_IPBPFfe8MXAMmI7zxvArSL-rE-y4C73alo,5300
112
+ optimum_rbln-0.2.1a4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
113
+ optimum_rbln-0.2.1a4.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
114
+ optimum_rbln-0.2.1a4.dist-info/RECORD,,