PyPI - optimum-rbln - Versions diffs - 0.7.4a2__py3-none-any.whl → 0.7.4a4__py3-none-any.whl - Mend

optimum-rbln 0.7.4a2py3-none-any.whl → 0.7.4a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

optimum/rbln/ops/flash_attn.py CHANGED Viewed

@@ -12,71 +12,165 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from functools import lru_cache
 import torch
-from packaging import version
-if version.parse(torch.__version__) > version.parse("2.4.0"):
-    register_fake = torch.library.register_fake
-else:
-    register_fake = torch.library.impl_abstract
-@lru_cache
-def register_rbln_custom_paged_flash_attention():
-    torch.library.define(
-        "rbln_custom_ops::paged_flash_attn_decode",
-        "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f, int g) -> Tensor",
-    )
-    @torch.library.impl("rbln_custom_ops::paged_flash_attn_decode", "cpu")
-    def flash_attn_decode_cpu(q, k, v, mask, kcache, vcache, seq, scale, block_table, block_size, partition):
-        return q
-    @register_fake("rbln_custom_ops::paged_flash_attn_decode")
-    def flash_attn_decode_abstract(q, k, v, m, kcache, vcache, seq, scale, block_table, block_size, partition):
-        return q
-    torch.library.define(
-        "rbln_custom_ops::paged_flash_attn_prefill",
-        "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f, int g) -> Tensor",
-    )
-    @torch.library.impl("rbln_custom_ops::flash_attn_prefill", "cpu")
-    def flash_attn_prefill_cpu(q, k, v, mask, kcache, vcache, seq, scale, block_table, block_size, partition):
-        return q
-    @register_fake("rbln_custom_ops::paged_flash_attn_prefill")
-    def flash_attn_prefill_abstract(q, k, v, m, kcache, vcache, seq, scale, block_table, block_size, partition):
-        return q
-@lru_cache
-def register_rbln_custom_paged_flash_causal_attention():
-    torch.library.define(
-        "rbln_custom_ops::paged_flash_causal_attn_decode",
-        "(Tensor x, Tensor y, Tensor z, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f, int g) -> Tensor",
-    )
-    @torch.library.impl("rbln_custom_ops::paged_flash_causal_attn_decode", "cpu")
-    def flash_attn_decode_cpu(q, k, v, kcache, vcache, seq, scale, block_table, block_size, partition):
-        return q
-    @register_fake("rbln_custom_ops::paged_flash_causal_attn_decode")
-    def flash_attn_decode_abstract(q, k, v, kcache, vcache, seq, scale, block_table, block_size, partition):
-        return q
-    torch.library.define(
-        "rbln_custom_ops::paged_flash_causal_attn_prefill",
-        "(Tensor x, Tensor y, Tensor z, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f, int g) -> Tensor",
-    )
-    @torch.library.impl("rbln_custom_ops::paged_flash_causal_attn_prefill", "cpu")
-    def flash_attn_prefill_cpu(q, k, v, kcache, vcache, seq, scale, block_table, block_size, partition):
-        return q
-    @register_fake("rbln_custom_ops::paged_flash_causal_attn_prefill")
-    def flash_attn_prefill_abstract(q, k, v, kcache, vcache, seq, scale, block_table, block_size, partition):
-        return q
+from torch import Tensor
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_flash_attn_decode",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_flash_attn_decode(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+) -> Tensor:
+    """Defines the computation pattern for fused flash attention with KV cache for decoding.
+    Returns a tensor with the same shape as q.
+    """
+    return torch.empty_like(q)
+@paged_flash_attn_decode.register_fake
+def paged_flash_attn_decode_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+) -> Tensor:
+    return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_flash_attn_prefill",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_flash_attn_prefill(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+) -> Tensor:
+    """Defines the computation pattern for fused flash attention with KV cache for prefill.
+    Returns a tensor with the same shape as q.
+    """
+    return torch.empty_like(q)
+@paged_flash_attn_prefill.register_fake
+def paged_flash_attn_prefill_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+) -> Tensor:
+    return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_flash_causal_attn_decode",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_flash_causal_attn_decode(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+) -> Tensor:
+    """Defines the computation pattern for fused causal flash attention with KV cache for decoding.
+    Returns a tensor with the same shape as q.
+    """
+    return torch.empty_like(q)
+@paged_flash_causal_attn_decode.register_fake
+def paged_flash_causal_attn_decode_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+) -> Tensor:
+    return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_flash_causal_attn_prefill",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_flash_causal_attn_prefill(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+) -> Tensor:
+    """Defines the computation pattern for fused causal flash attention with KV cache for prefill.
+    Returns a tensor with the same shape as q.
+    """
+    return torch.empty_like(q)
+@paged_flash_causal_attn_prefill.register_fake
+def paged_flash_causal_attn_prefill_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    partition: int,
+) -> Tensor:
+    return torch.empty_like(q)

optimum/rbln/ops/kv_cache_update.py CHANGED Viewed

@@ -12,49 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from functools import lru_cache
 import torch
-from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4
+from torch import Tensor
-if is_torch_greater_or_equal_than_2_4:
-    register_fake = torch.library.register_fake
-else:
-    register_fake = torch.library.impl_abstract
-@lru_cache
-def register_rbln_custom_cache_update():
+@torch.library.custom_op("rbln_custom_ops::rbln_cache_update", mutates_args=(["cache"]))
+def rbln_cache_update(cache: Tensor, state: Tensor, position: Tensor, axis: Tensor) -> Tensor:
     # Define the RBLN custom operation "rbln_cache_update" which updates a cache tensor with a given state tensor.
     # This operation is designed to perform in-place updates directly on the device without needing to transfer the cache back to the host.
     # The `position` parameter specifies the start index for the update along the specified axis, allowing flexible updates to any part of the cache tensor.
-    torch.library.define("rbln_custom_ops::rbln_cache_update", "(Tensor x, Tensor y, Tensor z, Tensor w) -> Tensor")
-    # Implementation of the "rbln_cache_update" operation for the CPU.
-    @torch.library.impl("rbln_custom_ops::rbln_cache_update", "cpu")
-    def rbln_cache_update_cpu(cache, state, position, axis):
-        assert position.dim() == 0
-        assert axis.dim() == 0
-        # Calculate the start (s) and end (e) indices for the update based on the position and the shape of the state tensor along the specified axis.
-        s = position  # Start index for the update, specified by the position.
-        e = (
-            position + state.shape[axis]
-        )  # End index is determined by adding the size of the state along the given axis.
-        # Update the specified portion of the cache tensor with the state tensor, using `slice_scatter`.
-        # This operation modifies the cache tensor in-place directly on the device, avoiding any unnecessary transfers between host and device.
-        cache.slice_scatter(state, dim=axis, start=s, end=e)
-        # 'rbln_cache_update' is an in-place operation that isn't tracked in JIT trace, so a dummy output was added to the return value.
-        return torch.empty([256])
-    # Register a "fake" implementation of the "rbln_cache_update" operation.
-    # This serves as an abstract definition for the RBLN compiler to recognize the operation and generate an optimized implementation.
-    @register_fake("rbln_custom_ops::rbln_cache_update")
-    def rbln_cache_update_abstract(cache, state, position, axis):
-        # Return a tensor with the same shape as the input cache tensor.
-        # This is a placeholder for the abstract implementation and does not perform any actual computation.
-        # Like the actual implementation, the abstraction assumes in-place device-side updates.
-        return torch.empty([256])
+    return torch.empty_like(cache)

optimum/rbln/transformers/models/bart/__init__.py CHANGED Viewed

@@ -12,4 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from ....ops import paged_attn_decode, paged_causal_attn_decode
 from .modeling_bart import RBLNBartForConditionalGeneration, RBLNBartModel

optimum/rbln/transformers/models/decoderonly/__init__.py CHANGED Viewed

@@ -12,4 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from ....ops import (
+    paged_attn_decode,
+    paged_attn_prefill,
+    paged_causal_attn_decode,
+    paged_causal_attn_prefill,
+    paged_flash_attn_decode,
+    paged_flash_attn_prefill,
+    paged_flash_causal_attn_decode,
+    paged_flash_causal_attn_prefill,
+)
 from .modeling_decoderonly import RBLNDecoderOnlyModelForCausalLM

optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py CHANGED Viewed

@@ -19,12 +19,6 @@ import torch
 from torch import nn
 from transformers import PretrainedConfig, PreTrainedModel
-from ....ops import (
-    register_rbln_custom_paged_attention,
-    register_rbln_custom_paged_causal_attention,
-    register_rbln_custom_paged_flash_attention,
-    register_rbln_custom_paged_flash_causal_attention,
-)
 from ....utils import logging
 from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS
@@ -162,16 +156,8 @@ class DecoderOnlyWrapper(nn.Module):
         self.use_attention_mask = use_attention_mask
         if self.attn_impl == "flash_attn":
             self.kvcache_partition_len = kvcache_partition_len or DEFAULT_FLASH_ATTN_PARTITION_LENGTH
-            if self.use_attention_mask:
-                register_rbln_custom_paged_flash_attention()
-            else:
-                register_rbln_custom_paged_flash_causal_attention()
         elif self.attn_impl == "eager":
             self.kvcache_partition_len = None
-            if self.use_attention_mask:
-                register_rbln_custom_paged_attention()
-            else:
-                register_rbln_custom_paged_causal_attention()
         else:
             raise ValueError(f"Unknown attn_impl : {self.attn_impl}")
@@ -756,55 +742,55 @@ class AttentionOp(nn.Module):
         if self.phase == "decode":
             if self.use_attention_mask:
                 attn_output = torch.ops.rbln_custom_ops.paged_attn_decode(
-                    query_state,
-                    key_state,
-                    value_state,
-                    attn_mask,
-                    past_key_state.unsqueeze(2),
-                    past_value_state.unsqueeze(2),
-                    seq_position,
-                    scale,
-                    block_tables,
-                    block_size,
+                    q=query_state,
+                    k=key_state,
+                    v=value_state,
+                    mask=attn_mask,
+                    kcache=past_key_state.unsqueeze(2),
+                    vcache=past_value_state.unsqueeze(2),
+                    seq=seq_position,
+                    scale=scale,
+                    block_table=block_tables,
+                    block_size=block_size,
                 )
             else:
                 attn_output = torch.ops.rbln_custom_ops.paged_causal_attn_decode(
-                    query_state,
-                    key_state,
-                    value_state,
-                    past_key_state.unsqueeze(2),
-                    past_value_state.unsqueeze(2),
-                    seq_position,
-                    scale,
-                    block_tables,
-                    block_size,
+                    q=query_state,
+                    k=key_state,
+                    v=value_state,
+                    kcache=past_key_state.unsqueeze(2),
+                    vcache=past_value_state.unsqueeze(2),
+                    seq=seq_position,
+                    scale=scale,
+                    block_table=block_tables,
+                    block_size=block_size,
                 )
         else:
             if self.use_attention_mask:
                 attn_output = torch.ops.rbln_custom_ops.paged_attn_prefill(
-                    query_state,
-                    key_state,
-                    value_state,
-                    attn_mask,
-                    past_key_state.unsqueeze(2),
-                    past_value_state.unsqueeze(2),
-                    seq_position,
-                    scale,
-                    block_tables,
-                    block_size,
+                    q=query_state,
+                    k=key_state,
+                    v=value_state,
+                    mask=attn_mask,
+                    kcache=past_key_state.unsqueeze(2),
+                    vcache=past_value_state.unsqueeze(2),
+                    seq=seq_position,
+                    scale=scale,
+                    block_table=block_tables,
+                    block_size=block_size,
                 )
             else:
                 attn_output = torch.ops.rbln_custom_ops.paged_causal_attn_prefill(
-                    query_state,
-                    key_state,
-                    value_state,
-                    past_key_state.unsqueeze(2),
-                    past_value_state.unsqueeze(2),
-                    seq_position,
-                    scale,
-                    block_tables,
-                    block_size,
+                    q=query_state,
+                    k=key_state,
+                    v=value_state,
+                    kcache=past_key_state.unsqueeze(2),
+                    vcache=past_value_state.unsqueeze(2),
+                    seq=seq_position,
+                    scale=scale,
+                    block_table=block_tables,
+                    block_size=block_size,
                 )
         attn_output = attn_output.view(batch_size, self.num_heads, -1, self.head_dim)
@@ -1015,58 +1001,58 @@ class FlashAttentionOp(AttentionOp):
         if self.phase == "decode":
             if self.use_attention_mask:
                 attn_output = torch.ops.rbln_custom_ops.paged_flash_attn_decode(
-                    query_state,
-                    key_state,
-                    value_state,
-                    attn_mask,
-                    past_key_state.unsqueeze(2),
-                    past_value_state.unsqueeze(2),
-                    seq_position,
-                    scale,
-                    block_tables,
-                    kvcache_block_size,
-                    self.kvcache_partition_size,
+                    q=query_state,
+                    k=key_state,
+                    v=value_state,
+                    mask=attn_mask,
+                    kcache=past_key_state.unsqueeze(2),
+                    vcache=past_value_state.unsqueeze(2),
+                    seq=seq_position,
+                    scale=scale,
+                    block_table=block_tables,
+                    block_size=kvcache_block_size,
+                    partition=self.kvcache_partition_size,
                 )
             else:
                 attn_output = torch.ops.rbln_custom_ops.paged_flash_causal_attn_decode(
-                    query_state,
-                    key_state,
-                    value_state,
-                    past_key_state.unsqueeze(2),
-                    past_value_state.unsqueeze(2),
-                    seq_position,
-                    scale,
-                    block_tables,
-                    kvcache_block_size,
-                    self.kvcache_partition_size,
+                    q=query_state,
+                    k=key_state,
+                    v=value_state,
+                    kcache=past_key_state.unsqueeze(2),
+                    vcache=past_value_state.unsqueeze(2),
+                    seq=seq_position,
+                    scale=scale,
+                    block_table=block_tables,
+                    block_size=kvcache_block_size,
+                    partition=self.kvcache_partition_size,
                 )
         else:
             if self.use_attention_mask:
                 attn_output = torch.ops.rbln_custom_ops.paged_flash_attn_prefill(
-                    query_state,
-                    key_state,
-                    value_state,
-                    attn_mask,
-                    past_key_state.unsqueeze(2),
-                    past_value_state.unsqueeze(2),
-                    seq_position,
-                    scale,
-                    block_tables,
-                    kvcache_block_size,
-                    self.kvcache_partition_size,
+                    q=query_state,
+                    k=key_state,
+                    v=value_state,
+                    mask=attn_mask,
+                    kcache=past_key_state.unsqueeze(2),
+                    vcache=past_value_state.unsqueeze(2),
+                    seq=seq_position,
+                    scale=scale,
+                    block_table=block_tables,
+                    block_size=kvcache_block_size,
+                    partition=self.kvcache_partition_size,
                 )
             else:
                 attn_output = torch.ops.rbln_custom_ops.paged_flash_causal_attn_prefill(
-                    query_state,
-                    key_state,
-                    value_state,
-                    past_key_state.unsqueeze(2),
-                    past_value_state.unsqueeze(2),
-                    seq_position,
-                    scale,
-                    block_tables,
-                    kvcache_block_size,
-                    self.kvcache_partition_size,
+                    q=query_state,
+                    k=key_state,
+                    v=value_state,
+                    kcache=past_key_state.unsqueeze(2),
+                    vcache=past_value_state.unsqueeze(2),
+                    seq=seq_position,
+                    scale=scale,
+                    block_table=block_tables,
+                    block_size=kvcache_block_size,
+                    partition=self.kvcache_partition_size,
                 )
         # reshape for removing repeat_kv

optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py CHANGED Viewed

@@ -247,19 +247,23 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
         enc_input_info = [
             ("input_ids", [1, rbln_enc_max_seq_len], "int64"),
             ("attention_mask", [1, rbln_enc_max_seq_len], "float32"),
-            (
-                "cross_key_value_states",
-                [
-                    n_layer * 2,
-                    rbln_batch_size,
-                    n_head,
-                    rbln_enc_max_seq_len,
-                    d_kv,
-                ],
-                "float32",
-            ),
             ("block_tables", [1], "int16"),
         ]
+        enc_input_info.extend(
+            [
+                (
+                    f"cross_key_value_states_{i}",
+                    [
+                        rbln_batch_size,
+                        n_head,
+                        rbln_enc_max_seq_len,
+                        d_kv,
+                    ],
+                    "float32",
+                )
+                for i in range(n_layer * 2)
+            ]
+        )
         dec_input_info = [
             ("input_ids", [rbln_batch_size, 1], "int64"),
@@ -274,9 +278,8 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
         dec_input_info.extend(
             [
                 (
-                    "cross_key_value_states",
+                    f"cross_key_value_states_{i}",
                     [
-                        n_layer * 2,
                         rbln_batch_size,
                         n_head,
                         rbln_enc_max_seq_len,
@@ -284,6 +287,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
                     ],
                     "float32",
                 )
+                for i in range(n_layer * 2)
             ]
         )
         dec_input_info.extend(

optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py CHANGED Viewed

@@ -18,12 +18,6 @@ import torch
 from torch import nn
 from transformers.utils import logging
-from ....ops import (
-    register_rbln_custom_cache_update,
-    register_rbln_custom_paged_attention,
-    register_rbln_custom_paged_causal_attention,
-)
 logger = logging.get_logger(__name__)
@@ -59,7 +53,6 @@ class Seq2SeqEncoderWrapper(nn.Module):
     def __init__(self, model: nn.Module, enc_max_seq_len: int):
         super().__init__()
-        register_rbln_custom_cache_update()
         self.config = model.config
         self.encoder = model.get_encoder()
         self.encoder_max_length = enc_max_seq_len
@@ -90,8 +83,8 @@ class Seq2SeqEncoderWrapper(nn.Module):
         self,
         input_ids: torch.Tensor,
         attention_mask: torch.Tensor,
-        cross_key_values: torch.Tensor,
         b_idx: torch.Tensor,
+        *cross_key_values: Tuple[torch.Tensor],
     ) -> Tuple[torch.Tensor]:
         # 1. get encoder last_hidden_states
         encoder_outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
@@ -110,13 +103,15 @@ class Seq2SeqEncoderWrapper(nn.Module):
             cross_kv.append(past_k)
             cross_kv.append(past_v)
-        cross_kv = torch.stack(cross_kv, dim=0)
         # 3. update the cross_attention's past_key_value direct to the device-dram for optimization.
-        batch_axis = torch.tensor(1, dtype=torch.int16)
-        enc_out = torch.ops.rbln_custom_ops.rbln_cache_update(cross_key_values, cross_kv, b_idx[0], batch_axis)
+        batch_axis = torch.tensor(0, dtype=torch.int16)
+        cross_key_values = list(cross_key_values)
+        for i in range(self.n_layer * 2):
+            cross_key_values[i] = torch.ops.rbln_custom_ops.rbln_cache_update(
+                cross_key_values[i], cross_kv[i], b_idx[0], batch_axis
+            )
-        return enc_out
+        return cross_key_values
 class Seq2SeqDecoderWrapper(nn.Module):
@@ -146,11 +141,6 @@ class Seq2SeqDecoderWrapper(nn.Module):
         It is inspired by the BART architecture, but it is designed to be flexible and can be overridden
         by subclasses to modify or add custom attributes as necessary.
         """
-        if self.use_attention_mask:
-            register_rbln_custom_paged_attention()
-        else:
-            register_rbln_custom_paged_causal_attention()
         self.num_layers = self.config.decoder_layers
         self.conditional_generation = self.convert_to_rbln_conditional_generation(model)
@@ -176,16 +166,17 @@ class Seq2SeqDecoderWrapper(nn.Module):
                 encoder_attention_mask,
                 cache_position,
                 block_tables,
-                cross_kv_cache,
-                *self_kv_cache,
+                *kv_cache,
             ) = args
         else:
             attention_mask = None
-            (input_ids, encoder_attention_mask, cache_position, block_tables, cross_kv_cache, *self_kv_cache) = args
+            (input_ids, encoder_attention_mask, cache_position, block_tables, *kv_cache) = args
         self_past_key_values = ()
         cross_past_key_values = ()
+        self_kv_cache = kv_cache[self.num_layers * 2 :]
+        cross_kv_cache = kv_cache[: self.num_layers * 2]
         for i in range(0, self.num_layers * 2, 2):
             self_past_key_values = self_past_key_values + ((self_kv_cache[i], self_kv_cache[i + 1]),)
             cross_past_key_values = cross_past_key_values + ((cross_kv_cache[i], cross_kv_cache[i + 1]),)

optimum-rbln 0.7.4a2__py3-none-any.whl → 0.7.4a4__py3-none-any.whl

optimum-rbln 0.7.4a2py3-none-any.whl → 0.7.4a4py3-none-any.whl