PyPI - optimum-rbln - Versions diffs - 0.7.3a1__py3-none-any.whl → 0.7.3a3__py3-none-any.whl - Mend

optimum-rbln 0.7.3a1py3-none-any.whl → 0.7.3a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

optimum/rbln/__version__.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.7.3a1'
-__version_tuple__ = version_tuple = (0, 7, 3)
+__version__ = version = '0.7.3a3'
+__version_tuple__ = version_tuple = (0, 7, 3, 'a3')

optimum/rbln/ops/__init__.py CHANGED Viewed

@@ -13,9 +13,9 @@
 # limitations under the License.
 from .attn import (
-    register_rbln_custom_attention_add_softmax,
-    register_rbln_custom_causal_masked_attention,
-    register_rbln_custom_masked_attention,
+    register_rbln_custom_add_softmax_attention,
+    register_rbln_custom_paged_attention,
+    register_rbln_custom_paged_causal_attention,
 )
-from .flash_attn import register_rbln_custom_flash_causal_masked_attention, register_rbln_custom_flash_masked_attention
+from .flash_attn import register_rbln_custom_paged_flash_attention, register_rbln_custom_paged_flash_causal_attention
 from .kv_cache_update import register_rbln_custom_cache_update

optimum/rbln/ops/attn.py CHANGED Viewed

@@ -25,14 +25,14 @@ else:
 @lru_cache
-def register_rbln_custom_masked_attention():
+def register_rbln_custom_paged_attention():
     torch.library.define(
-        "rbln_custom_ops::masked_attn_decode",
-        "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d) -> Tensor[]",
+        "rbln_custom_ops::paged_attn_decode",
+        "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f) -> Tensor[]",
     )
-    @torch.library.impl("rbln_custom_ops::masked_attn_decode", "cpu")
-    def attn_decode_cpu(q, k, v, mask, kcache, vcache, seq, scale):
+    @torch.library.impl("rbln_custom_ops::paged_attn_decode", "cpu")
+    def attn_decode_cpu(q, k, v, mask, kcache, vcache, seq, scale, block_table, block_size):
         """Defines the computation pattern for fused attention with KV cache updates.
         IMPORTANT: This op serves as a pattern definition for the RBLN compiler to generate
@@ -51,8 +51,10 @@ def register_rbln_custom_masked_attention():
         - mask: [batch=1, n_heads, 1, 1, max_seq_len] - Attention mask
         - kcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Key cache
         - vcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Value cache
-        - seq: [1] - Current sequence position
+        - seq: [1, 1] - Current sequence position
         - scale: [] - Attention scale factor
+        - block_table: [batch_size, max_seq_len // block_size] - Block indices for KV cache management
+        - block_size: [] - Number of tokens per block
         Returns:
             Tuple[Tensor, Tensor, Tensor]:
@@ -66,8 +68,8 @@ def register_rbln_custom_masked_attention():
             torch.empty(*vcache.shape, device=vcache.device),
         )
-    @register_fake("rbln_custom_ops::masked_attn_decode")
-    def attn_decode_abstract(q, k, v, m, kcache, vcache, seq, partition):
+    @register_fake("rbln_custom_ops::paged_attn_decode")
+    def attn_decode_abstract(q, k, v, m, kcache, vcache, seq, scale, block_table, block_size):
         return (
             q,
             torch.empty(*kcache.shape, device=kcache.device),
@@ -75,12 +77,12 @@ def register_rbln_custom_masked_attention():
         )
     torch.library.define(
-        "rbln_custom_ops::masked_attn_prefill",
-        "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e) -> Tensor[]",
+        "rbln_custom_ops::paged_attn_prefill",
+        "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f) -> Tensor[]",
     )
-    @torch.library.impl("rbln_custom_ops::masked_attn_prefill", "cpu")
-    def attn_prefill_cpu(q, k, v, mask, kcache, vcache, batch, seq, scale):
+    @torch.library.impl("rbln_custom_ops::paged_attn_prefill", "cpu")
+    def attn_prefill_cpu(q, k, v, mask, kcache, vcache, seq, scale, block_table, block_size):
         """Defines the computation pattern for prefill phase attention with KV cache updates.
         IMPORTANT: This op serves as a pattern definition for the RBLN compiler to generate
@@ -97,9 +99,10 @@ def register_rbln_custom_masked_attention():
         - mask: [batch=1, 1, 1, seq_len, max_seq_len] - Attention mask
         - kcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Key cache
         - vcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Value cache
-        - batch: [1] - Batch index for cache access
-        - seq: [1] - Starting sequence position
+        - seq: [1, 1] - Starting sequence position
         - scale: [] - Attention scale factor
+        - block_table: [batch_size, max_seq_len // block_size] - Block indices for KV cache management
+        - block_size: [] - Number of tokens per block
         Returns:
             Tuple[Tensor, Tensor, Tensor]:
@@ -109,20 +112,20 @@ def register_rbln_custom_masked_attention():
         """
         return q, kcache, vcache
-    @register_fake("rbln_custom_ops::masked_attn_prefill")
-    def attn_prefill_abstract(q, k, v, m, kcache, vcache, batch, seq, partition):
+    @register_fake("rbln_custom_ops::paged_attn_prefill")
+    def attn_prefill_abstract(q, k, v, m, kcache, vcache, seq, scale, block_table, block_size):
         return q, kcache, vcache
 @lru_cache
-def register_rbln_custom_causal_masked_attention():
+def register_rbln_custom_paged_causal_attention():
     torch.library.define(
-        "rbln_custom_ops::causal_masked_attn_decode",
-        "(Tensor x, Tensor y, Tensor z, Tensor a, Tensor b, Tensor c, Tensor d) -> Tensor[]",
+        "rbln_custom_ops::paged_causal_attn_decode",
+        "(Tensor x, Tensor y, Tensor z, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f) -> Tensor[]",
     )
-    @torch.library.impl("rbln_custom_ops::causal_masked_attn_decode", "cpu")
-    def attn_decode_cpu(q, k, v, kcache, vcache, seq, scale):
+    @torch.library.impl("rbln_custom_ops::paged_causal_attn_decode", "cpu")
+    def attn_decode_cpu(q, k, v, kcache, vcache, seq, scale, block_table, block_size):
         """Defines the computation pattern for fused attention with KV cache updates.
         IMPORTANT: This op serves as a pattern definition for the RBLN compiler to generate
@@ -140,8 +143,10 @@ def register_rbln_custom_causal_masked_attention():
         - v: [batch=1, n_heads, 1, 1, head_dim] - Value states for current input
         - kcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Key cache
         - vcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Value cache
-        - seq: [1] - Current sequence position
+        - seq: [1, 1] - Starting sequence position
         - scale: [] - Attention scale factor
+        - block_table: [batch_size, max_seq_len // block_size] - Block indices for KV cache management
+        - block_size: [] - Number of tokens per block
         Returns:
             Tuple[Tensor, Tensor, Tensor]:
@@ -155,8 +160,8 @@ def register_rbln_custom_causal_masked_attention():
             torch.empty(*vcache.shape, device=vcache.device),
         )
-    @register_fake("rbln_custom_ops::causal_masked_attn_decode")
-    def attn_decode_abstract(q, k, v, kcache, vcache, seq, partition):
+    @register_fake("rbln_custom_ops::paged_causal_attn_decode")
+    def attn_decode_abstract(q, k, v, kcache, vcache, seq, scale, block_table, block_size):
         return (
             q,
             torch.empty(*kcache.shape, device=kcache.device),
@@ -164,12 +169,12 @@ def register_rbln_custom_causal_masked_attention():
         )
     torch.library.define(
-        "rbln_custom_ops::causal_masked_attn_prefill",
-        "(Tensor x, Tensor y, Tensor z, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e) -> Tensor[]",
+        "rbln_custom_ops::paged_causal_attn_prefill",
+        "(Tensor x, Tensor y, Tensor z, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f) -> Tensor[]",
     )
-    @torch.library.impl("rbln_custom_ops::causal_masked_attn_prefill", "cpu")
-    def attn_prefill_cpu(q, k, v, kcache, vcache, batch, seq, scale):
+    @torch.library.impl("rbln_custom_ops::paged_causal_attn_prefill", "cpu")
+    def attn_prefill_cpu(q, k, v, kcache, vcache, seq, scale, block_table, block_size):
         """Defines the computation pattern for prefill phase attention with KV cache updates.
         IMPORTANT: This op serves as a pattern definition for the RBLN compiler to generate
@@ -186,8 +191,10 @@ def register_rbln_custom_causal_masked_attention():
         - kcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Key cache
         - vcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Value cache
         - batch: [1] - Batch index for cache access
-        - seq: [1] - Starting sequence position
+        - seq: [1, 1] - Starting sequence position
         - scale: [] - Attention scale factor
+        - block_table: [batch_size, max_seq_len // block_size] - Block indices for KV cache management
+        - block_size: [] - Number of tokens per block
         Returns:
             Tuple[Tensor, Tensor, Tensor]:
@@ -197,20 +204,20 @@ def register_rbln_custom_causal_masked_attention():
         """
         return q, kcache, vcache
-    @register_fake("rbln_custom_ops::causal_masked_attn_prefill")
-    def attn_prefill_abstract(q, k, v, kcache, vcache, batch, seq, partition):
+    @register_fake("rbln_custom_ops::paged_causal_attn_prefill")
+    def attn_prefill_abstract(q, k, v, kcache, vcache, seq, scale, block_table, block_size):
         return q, kcache, vcache
 @lru_cache
-def register_rbln_custom_attention_add_softmax():
+def register_rbln_custom_add_softmax_attention():
     torch.library.define(
-        "rbln_custom_ops::attn_decode_add_softmax",
+        "rbln_custom_ops::add_softmax_attn_decode",
         "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d) -> Tensor[]",
     )
-    @torch.library.impl("rbln_custom_ops::attn_decode_add_softmax", "cpu")
-    def attn_decode_add_softmax_cpu(q, k, v, mask, kcache, vcache, seq, scale):
+    @torch.library.impl("rbln_custom_ops::add_softmax_attn_decode", "cpu")
+    def add_softmax_attn_decode_cpu(q, k, v, mask, kcache, vcache, seq, scale):
         """Defines the computation pattern for fused attention with KV cache updates.
         IMPORTANT: This op serves as a pattern definition for the RBLN compiler to generate
@@ -244,57 +251,10 @@ def register_rbln_custom_attention_add_softmax():
             torch.empty(*vcache.shape, device=vcache.device),
         )
-    @register_fake("rbln_custom_ops::attn_decode_add_softmax")
-    def attn_decode_add_softmax_abstract(q, k, v, m, kcache, vcache, seq, partition):
+    @register_fake("rbln_custom_ops::add_softmax_attn_decode")
+    def add_softmax_attn_decode_abstract(q, k, v, m, kcache, vcache, seq, partition):
         return (
             q,
             torch.empty(*kcache.shape, device=kcache.device),
             torch.empty(*vcache.shape, device=vcache.device),
         )
-    torch.library.define(
-        "rbln_custom_ops::attn_prefill_add_softmax",
-        "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e) -> Tensor[]",
-    )
-    @torch.library.impl("rbln_custom_ops::attn_prefill_add_softmax", "cpu")
-    def attn_prefill_add_softmax_cpu(q, k, v, mask, kcache, vcache, batch, seq, scale):
-        """Defines the computation pattern for prefill phase attention with KV cache updates.
-        IMPORTANT: This op serves as a pattern definition for the RBLN compiler to generate
-        a single optimized NPU operation. It is NOT meant for CPU execution.
-        Key differences from decode pattern:
-        - Handles prefill phase with multiple input tokens
-        - Takes explicit batch index for continuous batching
-        Expected tensor shapes:
-        - q: [batch=1, n_heads, n_groups, seq_len, head_dim] - Query states for multiple tokens
-        - k: [batch=1, n_heads, 1, seq_len, head_dim] - Key states for current input
-        - v: [batch=1, n_heads, 1, seq_len, head_dim] - Value states for current input
-        - mask: [batch=1, 1, 1, seq_len, max_seq_len] - Attention mask
-        - kcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Key cache
-        - vcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Value cache
-        - batch: [1] - Batch index for cache access
-        - seq: [1] - Starting sequence position
-        - scale: [] - Attention scale factor
-        Returns:
-            Tuple[Tensor, Tensor, Tensor]:
-            - attn_output: [batch=1, n_heads, seq_len, 1, head_dim] - Attention output
-            - empty_kcache: Same shape as input kcache - Placeholder for compiler
-            - empty_vcache: Same shape as input vcache - Placeholder for compiler
-        """
-        return (
-            q,
-            torch.empty(1, *kcache.shape[1:], device=kcache.device),
-            torch.empty(1, *vcache.shape[1:], device=vcache.device),
-        )
-    @register_fake("rbln_custom_ops::attn_prefill_add_softmax")
-    def attn_prefill_add_softmax_abstract(q, k, v, m, kcache, vcache, batch, seq, partition):
-        return (
-            q,
-            torch.empty(1, *kcache.shape[1:], device=kcache.device),
-            torch.empty(1, *vcache.shape[1:], device=vcache.device),
-        )

optimum/rbln/ops/flash_attn.py CHANGED Viewed

@@ -25,22 +25,22 @@ else:
 @lru_cache
-def register_rbln_custom_flash_masked_attention():
+def register_rbln_custom_paged_flash_attention():
     torch.library.define(
-        "rbln_custom_ops::flash_masked_attn_decode",
-        "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d, int e) -> Tensor[]",
+        "rbln_custom_ops::paged_flash_attn_decode",
+        "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f, int g) -> Tensor[]",
     )
-    @torch.library.impl("rbln_custom_ops::flash_masked_attn_decode", "cpu")
-    def flash_attn_decode_cpu(q, k, v, mask, kcache, vcache, seq, scale, partition):
+    @torch.library.impl("rbln_custom_ops::paged_flash_attn_decode", "cpu")
+    def flash_attn_decode_cpu(q, k, v, mask, kcache, vcache, seq, scale, block_table, block_size, partition):
         return (
             q,
             torch.empty(*kcache.shape, device=kcache.device),
             torch.empty(*vcache.shape, device=vcache.device),
         )
-    @register_fake("rbln_custom_ops::flash_masked_attn_decode")
-    def flash_attn_decode_abstract(q, k, v, m, kcache, vcache, seq, scale, partition):
+    @register_fake("rbln_custom_ops::paged_flash_attn_decode")
+    def flash_attn_decode_abstract(q, k, v, m, kcache, vcache, seq, scale, block_table, block_size, partition):
         return (
             q,
             torch.empty(*kcache.shape, device=kcache.device),
@@ -48,36 +48,36 @@ def register_rbln_custom_flash_masked_attention():
         )
     torch.library.define(
-        "rbln_custom_ops::flash_masked_attn_prefill",
-        "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f) -> Tensor[]",
+        "rbln_custom_ops::paged_flash_attn_prefill",
+        "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f, int g) -> Tensor[]",
     )
     @torch.library.impl("rbln_custom_ops::flash_attn_prefill", "cpu")
-    def flash_attn_prefill_cpu(q, k, v, mask, kcache, vcache, batch, seq, scale, partition):
+    def flash_attn_prefill_cpu(q, k, v, mask, kcache, vcache, seq, scale, block_table, block_size, partition):
         return q, kcache, vcache
-    @register_fake("rbln_custom_ops::flash_masked_attn_prefill")
-    def flash_attn_prefill_abstract(q, k, v, m, kcache, vcache, batch, seq, scale, partition):
+    @register_fake("rbln_custom_ops::paged_flash_attn_prefill")
+    def flash_attn_prefill_abstract(q, k, v, m, kcache, vcache, seq, scale, block_table, block_size, partition):
         return q, kcache, vcache
 @lru_cache
-def register_rbln_custom_flash_causal_masked_attention():
+def register_rbln_custom_paged_flash_causal_attention():
     torch.library.define(
-        "rbln_custom_ops::flash_causal_masked_attn_decode",
-        "(Tensor x, Tensor y, Tensor z, Tensor a, Tensor b, Tensor c, Tensor d, int e) -> Tensor[]",
+        "rbln_custom_ops::paged_flash_causal_attn_decode",
+        "(Tensor x, Tensor y, Tensor z, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f, int g) -> Tensor[]",
     )
-    @torch.library.impl("rbln_custom_ops::flash_causal_masked_attn_decode", "cpu")
-    def flash_attn_decode_cpu(q, k, v, kcache, vcache, seq, scale, partition):
+    @torch.library.impl("rbln_custom_ops::paged_flash_causal_attn_decode", "cpu")
+    def flash_attn_decode_cpu(q, k, v, kcache, vcache, seq, scale, block_table, block_size, partition):
         return (
             q,
             torch.empty(*kcache.shape, device=kcache.device),
             torch.empty(*vcache.shape, device=vcache.device),
         )
-    @register_fake("rbln_custom_ops::flash_causal_masked_attn_decode")
-    def flash_attn_decode_abstract(q, k, v, kcache, vcache, seq, scale, partition):
+    @register_fake("rbln_custom_ops::paged_flash_causal_attn_decode")
+    def flash_attn_decode_abstract(q, k, v, kcache, vcache, seq, scale, block_table, block_size, partition):
         return (
             q,
             torch.empty(*kcache.shape, device=kcache.device),
@@ -85,14 +85,14 @@ def register_rbln_custom_flash_causal_masked_attention():
         )
     torch.library.define(
-        "rbln_custom_ops::flash_causal_masked_attn_prefill",
-        "(Tensor x, Tensor y, Tensor z, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f) -> Tensor[]",
+        "rbln_custom_ops::paged_flash_causal_attn_prefill",
+        "(Tensor x, Tensor y, Tensor z, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f, int g) -> Tensor[]",
     )
-    @torch.library.impl("rbln_custom_ops::flash_causal_masked_attn_prefill", "cpu")
-    def flash_attn_prefill_cpu(q, k, v, kcache, vcache, batch, seq, scale, partition):
+    @torch.library.impl("rbln_custom_ops::paged_flash_causal_attn_prefill", "cpu")
+    def flash_attn_prefill_cpu(q, k, v, kcache, vcache, seq, scale, block_table, block_size, partition):
         return q, kcache, vcache
-    @register_fake("rbln_custom_ops::flash_causal_masked_attn_prefill")
-    def flash_attn_prefill_abstract(q, k, v, kcache, vcache, batch, seq, scale, partition):
+    @register_fake("rbln_custom_ops::paged_flash_causal_attn_prefill")
+    def flash_attn_prefill_abstract(q, k, v, kcache, vcache, seq, scale, block_table, block_size, partition):
         return q, kcache, vcache

optimum/rbln/transformers/models/bart/bart_architecture.py CHANGED Viewed

@@ -35,16 +35,16 @@ logger = logging.get_logger(__name__)
 class BartWrapper:
-    def __init__(self, model: nn.Module, enc_max_seq_len: int):
+    def __init__(self, model: nn.Module, enc_max_seq_len: int, use_attention_mask: bool):
         self.encoder = Seq2SeqEncoderWrapper(model, enc_max_seq_len)
-        self.decoder = BartDecoderWrapper(model)
+        self.decoder = BartDecoderWrapper(model, use_attention_mask=use_attention_mask)
 class BartDecoderWrapper(Seq2SeqDecoderWrapper):
     def convert_to_rbln_conditional_generation(self, model: nn.Module):
         new_layers = []
         for layer in model.get_decoder().layers:
-            self_attn = BartSelfAttention(layer.self_attn)
+            self_attn = BartSelfAttention(layer.self_attn, use_attention_mask=self.use_attention_mask)
             new_layers.append(BartDecoderLayer(layer, self_attn))
         decoder_model = BartDecoder(model.get_decoder(), new_layers)
@@ -69,7 +69,8 @@ class BartDecoder(Seq2SeqDecoder):
         self.embed_scale = getattr(self._original_mod, "embed_scale", None)
     def prepare_attn_mask(self, attention_mask, encoder_attention_mask, **kwargs):
-        attention_mask = attention_mask[:, None, None, :]
+        if attention_mask is not None:
+            attention_mask = attention_mask[:, None, None, :]
         encoder_attention_mask = _prepare_4d_attention_mask(encoder_attention_mask, torch.float32, tgt_len=1)
         return attention_mask, encoder_attention_mask
@@ -134,7 +135,7 @@ class BartDecoderLayer(Seq2SeqDecoderLayer):
 class BartSelfAttention(Seq2SeqSelfAttention):
-    def __post_init__(self):
+    def __post_init__(self, use_attention_mask: bool = True):
         self.q_proj = self._original_mod.q_proj
         self.k_proj = self._original_mod.k_proj
         self.v_proj = self._original_mod.v_proj
@@ -142,7 +143,10 @@ class BartSelfAttention(Seq2SeqSelfAttention):
         self.num_heads = self._original_mod.num_heads
         self.head_dim = self._original_mod.embed_dim // self._original_mod.num_heads
         self.scaling = self.head_dim**-0.5
-        self.attn_decode = torch.ops.rbln_custom_ops.masked_attn_decode
+        if use_attention_mask:
+            self.attn_decode = torch.ops.rbln_custom_ops.paged_attn_decode
+        else:
+            self.attn_decode = torch.ops.rbln_custom_ops.paged_causal_attn_decode
     def projection(self, hidden_states) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         query_states = self.q_proj(hidden_states) * self.scaling

optimum/rbln/transformers/models/bart/modeling_bart.py CHANGED Viewed

@@ -113,7 +113,9 @@ class RBLNBartForConditionalGeneration(RBLNModelForSeq2SeqLM):
         enc_max_seq_len = (
             rbln_config.model_cfg["enc_max_seq_len"] if "enc_max_seq_len" in rbln_config.model_cfg else 1024
         )
-        return BartWrapper(model, enc_max_seq_len=enc_max_seq_len)
+        use_attention_mask = rbln_config.model_cfg.get("use_attention_mask", False)
+        return BartWrapper(model, enc_max_seq_len=enc_max_seq_len, use_attention_mask=use_attention_mask)
     def __getattr__(self, __name: str) -> Any:
         def redirect(func):

optimum-rbln 0.7.3a1__py3-none-any.whl → 0.7.3a3__py3-none-any.whl

optimum-rbln 0.7.3a1py3-none-any.whl → 0.7.3a3py3-none-any.whl