optimum-rbln 0.8.2a0__py3-none-any.whl → 0.8.2a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of optimum-rbln might be problematic. Click here for more details.

@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.8.2a0'
21
- __version_tuple__ = version_tuple = (0, 8, 2, 'a0')
20
+ __version__ = version = '0.8.2a1'
21
+ __version_tuple__ = version_tuple = (0, 8, 2, 'a1')
@@ -231,15 +231,16 @@ class DecoderOnlyWrapper(nn.Module):
231
231
  def convert_to_rbln_causal_lm(self, causal_lm: PreTrainedModel, max_seq_len: int):
232
232
  new_layers = []
233
233
  for layer_idx, layer in enumerate(self.get_decoder_layers(causal_lm)):
234
+ is_sliding = layer_idx in self.sliding_window_layers
234
235
  new_self_attn = self.get_rbln_attn_class()(
235
236
  self.get_attn_layer(layer),
236
- self.use_attention_mask,
237
+ self.use_attention_mask if not is_sliding else True,
237
238
  self.use_position_ids,
238
239
  kvcache_block_size=self.sliding_window
239
240
  if layer_idx in self.sliding_window_layers
240
241
  else self.kvcache_block_size,
241
- is_sliding=layer_idx in self.sliding_window_layers,
242
- attn_impl=self.attn_impl,
242
+ is_sliding=is_sliding,
243
+ attn_impl=self.attn_impl if not is_sliding else "eager",
243
244
  kvcache_partition_len=self.kvcache_partition_len,
244
245
  )
245
246
  new_layer = self.get_rbln_layer_class()(layer, new_self_attn)
@@ -720,10 +721,6 @@ class DecoderOnlyAttention(nn.Module):
720
721
  self.use_position_ids = use_position_ids
721
722
  self.is_sliding = is_sliding
722
723
  self.attn_impl = attn_impl
723
-
724
- if self.is_sliding and self.attn_impl != "eager":
725
- raise NotImplementedError("Sliding window attention is only supported with eager attention.")
726
-
727
724
  self.kvcache_partition_len = kvcache_partition_len
728
725
 
729
726
  setattr(self, self.get_attention_name(), self.create_attention_op())
@@ -873,7 +870,8 @@ class AttentionOp(nn.Module):
873
870
 
874
871
  def get_attn_op_name(self):
875
872
  phase = "decode" if self.phase == "decode" else "prefill"
876
- if self.use_attention_mask:
873
+
874
+ if self.use_attention_mask and not self.use_position_ids:
877
875
  attn_op_name = "paged_attn_"
878
876
  else:
879
877
  attn_op_name = "paged_causal_attn_"
@@ -1074,7 +1072,7 @@ class FlashAttentionOp(AttentionOp):
1074
1072
 
1075
1073
  def get_attn_op_name(self):
1076
1074
  phase = "decode" if self.phase == "decode" else "prefill"
1077
- if self.use_attention_mask:
1075
+ if self.use_attention_mask and not self.use_position_ids:
1078
1076
  attn_op_name = "paged_flash_attn_"
1079
1077
  else:
1080
1078
  attn_op_name = "paged_flash_causal_attn_"
@@ -1128,7 +1126,7 @@ class FlashAttentionOp(AttentionOp):
1128
1126
  "partition": self.kvcache_partition_size,
1129
1127
  }
1130
1128
 
1131
- if self.use_attention_mask != self.use_position_ids:
1129
+ if self.use_attention_mask:
1132
1130
  op_args["mask"] = attn_mask
1133
1131
 
1134
1132
  if self.phase == "prefill" or self.phase == "image_prefill":
@@ -1151,8 +1149,8 @@ class FlashAttentionOp(AttentionOp):
1151
1149
  class SlidingWindowAttentionOp(AttentionOp):
1152
1150
  def get_attn_op_name(self):
1153
1151
  phase = "decode" if self.phase == "decode" else "prefill"
1154
- if self.use_attention_mask:
1155
- raise NotImplementedError("Attention mask is not supported for sliding window attention.")
1152
+ if not self.use_attention_mask:
1153
+ raise NotImplementedError("Attention mask is needed for sliding window attention.")
1156
1154
 
1157
1155
  attn_op_name = "paged_sliding_window_attn_" + phase
1158
1156
  return attn_op_name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.8.2a0
3
+ Version: 0.8.2a1
4
4
  Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -1,5 +1,5 @@
1
1
  optimum/rbln/__init__.py,sha256=MZCYmY4Y_Zfk0TGo3xK52osHDLZHz4cSdduXZt6RfSI,15316
2
- optimum/rbln/__version__.py,sha256=kKne35dFUj-l3bjR0tLZka8O-dDdB-rFDsjhN13A2r4,519
2
+ optimum/rbln/__version__.py,sha256=rJ63aeAz1FPyKP3vwSrg1ei6yt3oR1pU6Ziq_a74v3I,519
3
3
  optimum/rbln/configuration_utils.py,sha256=o5oer7fBdE-MHLGNXoP35FjmuQbMmjEIDv0QE_k3kpo,32336
4
4
  optimum/rbln/modeling.py,sha256=bsvK6GQtoH9vx72Ea59kvv61jguOk9XDTzVjsY1ugkk,14248
5
5
  optimum/rbln/modeling_base.py,sha256=QpNkU_Do__JKmnHjaPzv47OhQwgGfVohisip1jqXa7A,23871
@@ -101,7 +101,7 @@ optimum/rbln/transformers/models/colpali/configuration_colpali.py,sha256=ieY-tuy
101
101
  optimum/rbln/transformers/models/colpali/modeling_colpali.py,sha256=jzvJCBrrCXSpjfmJ3O-VvPNFGWGaNbpOV09JwLPAZWs,15757
102
102
  optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=vQYZDDdoddwA7yKc5zzrq2Zs9sax-0p8rNF_aYfF4bk,1006
103
103
  optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py,sha256=cakn8RGo8gS3nmXdEqOfC2xUBOMGInROgLEbCOoLFR0,13398
104
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=8ovJ5_q_asqVTuVnAuK1m6genW0OSJ30Cd7HS9JXJgc,46363
104
+ optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=HrI12t9X9wV_-AZuTBSs-W7c5yVUkvd0secWlI72x2A,46325
105
105
  optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=NmWdodIcXXChI61-Ej7StTe52iQvalMYRUDuNtcQVEI,53342
106
106
  optimum/rbln/transformers/models/distilbert/__init__.py,sha256=zXL78SOEORTnUN_wrdoaDaYpntG8lcFHvPobM6jC0CI,841
107
107
  optimum/rbln/transformers/models/distilbert/configuration_distilbert.py,sha256=O3BW9JjyYk9PLyiofvOKEgTdMZ_jpIuPfot281pSsyg,984
@@ -205,7 +205,7 @@ optimum/rbln/utils/model_utils.py,sha256=4k5879Kh75m3x_vS4-qOGfqsOiAvc2kdNFFfvsF
205
205
  optimum/rbln/utils/runtime_utils.py,sha256=D9PS8hfH1NBf8yH8cAu-XfdC9fxKzPbt4LFBVpADbbs,7180
206
206
  optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
207
207
  optimum/rbln/utils/submodule.py,sha256=w5mgPgncI740gVKMu3S-69DGNdUSI0bTZxegQGcZ98Y,5011
208
- optimum_rbln-0.8.2a0.dist-info/METADATA,sha256=dHMIEdFF_IuTWww99Iypz6HQKVDDk___EVJ8cK77eG0,5299
209
- optimum_rbln-0.8.2a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
210
- optimum_rbln-0.8.2a0.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
211
- optimum_rbln-0.8.2a0.dist-info/RECORD,,
208
+ optimum_rbln-0.8.2a1.dist-info/METADATA,sha256=dQnoYG1lMMBSDdXmd0gFDAK-uHXdSI5mB8jsIULhe0Q,5299
209
+ optimum_rbln-0.8.2a1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
210
+ optimum_rbln-0.8.2a1.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
211
+ optimum_rbln-0.8.2a1.dist-info/RECORD,,