PyPI - optimum-rbln - Versions diffs - 0.8.2a0__py3-none-any.whl → 0.8.2a1__py3-none-any.whl - Mend

optimum-rbln 0.8.2a0py3-none-any.whl → 0.8.2a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of optimum-rbln might be problematic. Click here for more details.

Files changed (6) hide show

optimum/rbln/__version__.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.8.2a0'
-__version_tuple__ = version_tuple = (0, 8, 2, 'a0')
+__version__ = version = '0.8.2a1'
+__version_tuple__ = version_tuple = (0, 8, 2, 'a1')

optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py CHANGED Viewed

@@ -231,15 +231,16 @@ class DecoderOnlyWrapper(nn.Module):
     def convert_to_rbln_causal_lm(self, causal_lm: PreTrainedModel, max_seq_len: int):
         new_layers = []
         for layer_idx, layer in enumerate(self.get_decoder_layers(causal_lm)):
+            is_sliding = layer_idx in self.sliding_window_layers
             new_self_attn = self.get_rbln_attn_class()(
                 self.get_attn_layer(layer),
-                self.use_attention_mask,
+                self.use_attention_mask if not is_sliding else True,
                 self.use_position_ids,
                 kvcache_block_size=self.sliding_window
                 if layer_idx in self.sliding_window_layers
                 else self.kvcache_block_size,
-                is_sliding=layer_idx in self.sliding_window_layers,
-                attn_impl=self.attn_impl,
+                is_sliding=is_sliding,
+                attn_impl=self.attn_impl if not is_sliding else "eager",
                 kvcache_partition_len=self.kvcache_partition_len,
             )
             new_layer = self.get_rbln_layer_class()(layer, new_self_attn)
@@ -720,10 +721,6 @@ class DecoderOnlyAttention(nn.Module):
         self.use_position_ids = use_position_ids
         self.is_sliding = is_sliding
         self.attn_impl = attn_impl
-        if self.is_sliding and self.attn_impl != "eager":
-            raise NotImplementedError("Sliding window attention is only supported with eager attention.")
         self.kvcache_partition_len = kvcache_partition_len
         setattr(self, self.get_attention_name(), self.create_attention_op())
@@ -873,7 +870,8 @@ class AttentionOp(nn.Module):
     def get_attn_op_name(self):
         phase = "decode" if self.phase == "decode" else "prefill"
-        if self.use_attention_mask:
+        if self.use_attention_mask and not self.use_position_ids:
             attn_op_name = "paged_attn_"
         else:
             attn_op_name = "paged_causal_attn_"
@@ -1074,7 +1072,7 @@ class FlashAttentionOp(AttentionOp):
     def get_attn_op_name(self):
         phase = "decode" if self.phase == "decode" else "prefill"
-        if self.use_attention_mask:
+        if self.use_attention_mask and not self.use_position_ids:
             attn_op_name = "paged_flash_attn_"
         else:
             attn_op_name = "paged_flash_causal_attn_"
@@ -1128,7 +1126,7 @@ class FlashAttentionOp(AttentionOp):
             "partition": self.kvcache_partition_size,
         }
-        if self.use_attention_mask != self.use_position_ids:
+        if self.use_attention_mask:
             op_args["mask"] = attn_mask
         if self.phase == "prefill" or self.phase == "image_prefill":
@@ -1151,8 +1149,8 @@ class FlashAttentionOp(AttentionOp):
 class SlidingWindowAttentionOp(AttentionOp):
     def get_attn_op_name(self):
         phase = "decode" if self.phase == "decode" else "prefill"
-        if self.use_attention_mask:
-            raise NotImplementedError("Attention mask is not supported for sliding window attention.")
+        if not self.use_attention_mask:
+            raise NotImplementedError("Attention mask is needed for sliding window attention.")
         attn_op_name = "paged_sliding_window_attn_" + phase
         return attn_op_name

{optimum_rbln-0.8.2a0.dist-info → optimum_rbln-0.8.2a1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: optimum-rbln
-Version: 0.8.2a0
+Version: 0.8.2a1
 Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
 Project-URL: Homepage, https://rebellions.ai
 Project-URL: Documentation, https://docs.rbln.ai

{optimum_rbln-0.8.2a0.dist-info → optimum_rbln-0.8.2a1.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 optimum/rbln/__init__.py,sha256=MZCYmY4Y_Zfk0TGo3xK52osHDLZHz4cSdduXZt6RfSI,15316
-optimum/rbln/__version__.py,sha256=kKne35dFUj-l3bjR0tLZka8O-dDdB-rFDsjhN13A2r4,519
+optimum/rbln/__version__.py,sha256=rJ63aeAz1FPyKP3vwSrg1ei6yt3oR1pU6Ziq_a74v3I,519
 optimum/rbln/configuration_utils.py,sha256=o5oer7fBdE-MHLGNXoP35FjmuQbMmjEIDv0QE_k3kpo,32336
 optimum/rbln/modeling.py,sha256=bsvK6GQtoH9vx72Ea59kvv61jguOk9XDTzVjsY1ugkk,14248
 optimum/rbln/modeling_base.py,sha256=QpNkU_Do__JKmnHjaPzv47OhQwgGfVohisip1jqXa7A,23871
@@ -101,7 +101,7 @@ optimum/rbln/transformers/models/colpali/configuration_colpali.py,sha256=ieY-tuy
 optimum/rbln/transformers/models/colpali/modeling_colpali.py,sha256=jzvJCBrrCXSpjfmJ3O-VvPNFGWGaNbpOV09JwLPAZWs,15757
 optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=vQYZDDdoddwA7yKc5zzrq2Zs9sax-0p8rNF_aYfF4bk,1006
 optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py,sha256=cakn8RGo8gS3nmXdEqOfC2xUBOMGInROgLEbCOoLFR0,13398
-optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=8ovJ5_q_asqVTuVnAuK1m6genW0OSJ30Cd7HS9JXJgc,46363
+optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=HrI12t9X9wV_-AZuTBSs-W7c5yVUkvd0secWlI72x2A,46325
 optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=NmWdodIcXXChI61-Ej7StTe52iQvalMYRUDuNtcQVEI,53342
 optimum/rbln/transformers/models/distilbert/__init__.py,sha256=zXL78SOEORTnUN_wrdoaDaYpntG8lcFHvPobM6jC0CI,841
 optimum/rbln/transformers/models/distilbert/configuration_distilbert.py,sha256=O3BW9JjyYk9PLyiofvOKEgTdMZ_jpIuPfot281pSsyg,984
@@ -205,7 +205,7 @@ optimum/rbln/utils/model_utils.py,sha256=4k5879Kh75m3x_vS4-qOGfqsOiAvc2kdNFFfvsF
 optimum/rbln/utils/runtime_utils.py,sha256=D9PS8hfH1NBf8yH8cAu-XfdC9fxKzPbt4LFBVpADbbs,7180
 optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
 optimum/rbln/utils/submodule.py,sha256=w5mgPgncI740gVKMu3S-69DGNdUSI0bTZxegQGcZ98Y,5011
-optimum_rbln-0.8.2a0.dist-info/METADATA,sha256=dHMIEdFF_IuTWww99Iypz6HQKVDDk___EVJ8cK77eG0,5299
-optimum_rbln-0.8.2a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-optimum_rbln-0.8.2a0.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-optimum_rbln-0.8.2a0.dist-info/RECORD,,
+optimum_rbln-0.8.2a1.dist-info/METADATA,sha256=dQnoYG1lMMBSDdXmd0gFDAK-uHXdSI5mB8jsIULhe0Q,5299
+optimum_rbln-0.8.2a1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+optimum_rbln-0.8.2a1.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+optimum_rbln-0.8.2a1.dist-info/RECORD,,

{optimum_rbln-0.8.2a0.dist-info → optimum_rbln-0.8.2a1.dist-info}/WHEEL RENAMED Viewed

File without changes

{optimum_rbln-0.8.2a0.dist-info → optimum_rbln-0.8.2a1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

optimum-rbln 0.8.2a0__py3-none-any.whl → 0.8.2a1__py3-none-any.whl

Potentially problematic release.

optimum-rbln 0.8.2a0py3-none-any.whl → 0.8.2a1py3-none-any.whl