optimum-rbln 0.7.3a4__py3-none-any.whl → 0.7.3a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.7.3a4'
21
- __version_tuple__ = version_tuple = (0, 7, 3, 'a4')
20
+ __version__ = version = '0.7.3a5'
21
+ __version_tuple__ = version_tuple = (0, 7, 3, 'a5')
@@ -685,27 +685,28 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
685
685
  else:
686
686
  rbln_kvcache_block_size = rbln_kvcache_partition_len
687
687
 
688
- max_num_blocks, nbytes_per_block = cls.get_maximum_num_blocks(
689
- config=model_config,
690
- tensor_parallel_size=rbln_kwargs.get("tensor_parallel_size", 1),
691
- kvcache_block_size=rbln_kvcache_block_size,
692
- nbits_per_param=16 if rbln_quantization is None else 4, # TODO(jongho): FIX Ad-hoc
693
- n_model_params=rbln_kwargs["n_model_params"],
694
- )
695
- model_num_blocks = (rbln_max_seq_len // rbln_kvcache_block_size) * rbln_batch_size
696
- rbln_kvcache_num_blocks = min(model_num_blocks, max_num_blocks)
688
+ rbln_kvcache_num_blocks = (rbln_max_seq_len // rbln_kvcache_block_size) * rbln_batch_size
689
+ if rbln_attn_impl == "flash_attn":
690
+ max_num_blocks, _ = cls.get_maximum_num_blocks(
691
+ config=model_config,
692
+ tensor_parallel_size=rbln_kwargs.get("tensor_parallel_size", 1),
693
+ kvcache_block_size=rbln_kvcache_block_size,
694
+ nbits_per_param=16 if rbln_quantization is None else 4, # TODO(jongho): FIX Ad-hoc
695
+ n_model_params=rbln_kwargs["n_model_params"],
696
+ )
697
+ rbln_kvcache_num_blocks = min(rbln_kvcache_num_blocks, max_num_blocks)
697
698
 
698
- required_blocks = rbln_max_seq_len // rbln_kvcache_block_size + 1
699
- if rbln_kvcache_num_blocks < required_blocks:
700
- rbln_kvcache_num_blocks = required_blocks
699
+ required_blocks = rbln_max_seq_len // rbln_kvcache_block_size + 1
700
+ if rbln_kvcache_num_blocks < required_blocks:
701
+ rbln_kvcache_num_blocks = required_blocks
701
702
 
702
- logger.info(f"[KVCache] Compiling with num_blocks: {rbln_kvcache_num_blocks}")
703
+ logger.info(f"[KVCache] Compiling with num_blocks: {rbln_kvcache_num_blocks}")
703
704
 
704
- if rbln_kvcache_num_blocks < rbln_batch_size:
705
- raise RuntimeError(
706
- f"Batch size ({rbln_batch_size}) exceeds available KV cache blocks ({rbln_kvcache_num_blocks}). "
707
- "Ensure the number of blocks is at least equal to the batch size."
708
- )
705
+ if rbln_kvcache_num_blocks < rbln_batch_size:
706
+ raise RuntimeError(
707
+ f"Batch size ({rbln_batch_size}) exceeds available KV cache blocks ({rbln_kvcache_num_blocks}). "
708
+ "Ensure the number of blocks is at least equal to the batch size."
709
+ )
709
710
 
710
711
  num_attention_heads = getattr(model_config, "n_head", None) or getattr(model_config, "num_attention_heads")
711
712
  num_key_value_heads = getattr(model_config, "num_key_value_heads", None) or num_attention_heads
@@ -805,9 +806,6 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
805
806
  "kvcache_block_size": rbln_kvcache_block_size,
806
807
  "attn_impl": rbln_attn_impl,
807
808
  "kvcache_num_blocks": rbln_kvcache_num_blocks,
808
- "model_num_blocks": model_num_blocks,
809
- "max_num_blocks": max_num_blocks,
810
- "nbytes_per_block": nbytes_per_block,
811
809
  }
812
810
  )
813
811
 
@@ -92,7 +92,7 @@ class PhiLayer(DecoderOnlyLayer):
92
92
 
93
93
  hidden_states = self.get_pre_attention_layernorm()(hidden_states)
94
94
 
95
- attn_outputs, present_key_values = self.self_attn(
95
+ attn_output = self.self_attn(
96
96
  hidden_states=hidden_states,
97
97
  attention_mask=attention_mask,
98
98
  seq_positions=seq_positions,
@@ -104,9 +104,9 @@ class PhiLayer(DecoderOnlyLayer):
104
104
 
105
105
  feed_forward_hidden_states = self._original_mod.mlp(hidden_states)
106
106
 
107
- hidden_states = attn_outputs + feed_forward_hidden_states + residual
107
+ hidden_states = attn_output + feed_forward_hidden_states + residual
108
108
 
109
- return hidden_states, present_key_values
109
+ return hidden_states
110
110
 
111
111
 
112
112
  class PhiModel(DecoderOnlyModel):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.7.3a4
3
+ Version: 0.7.3a5
4
4
  Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -1,5 +1,5 @@
1
1
  optimum/rbln/__init__.py,sha256=eHi15YM3989AcX52jka9rUmgAtlp1PHqMNwBEdOfuu8,6554
2
- optimum/rbln/__version__.py,sha256=MLlg_138GxyhciEP0ZB5dPN8vriXkicRnaZiwqygxOY,519
2
+ optimum/rbln/__version__.py,sha256=MC3yJ2-M633KXIQTqHjv3l9eWLzkJkdlmhuQkRiV278,519
3
3
  optimum/rbln/modeling.py,sha256=nJsAs5zs--VVOYGFjYNpqfxYIemJIK4Lr0WEzlDLdP0,8390
4
4
  optimum/rbln/modeling_base.py,sha256=Ow73GVJF1N5cDFO8_rgirtGj1wC-cXBDyqXHW5PCybA,22270
5
5
  optimum/rbln/modeling_config.py,sha256=7104bxmrvKW4Q6XTruQayiIGl8GHDFmPkJ3cknMIInE,11335
@@ -60,7 +60,7 @@ optimum/rbln/transformers/models/clip/__init__.py,sha256=H9vuBwrmFO0-CqZhXUrKF-u
60
60
  optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=NiSm7bHs4SReHDUr53BBWSX0Y8bkKOeUSpsBDrp8YDw,6628
61
61
  optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=pDogsdpJKKB5rqnVFrRjwfhUvOSV-jZ3oARMsqSvOOQ,665
62
62
  optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=7OIKteJLKNxOLOg0w3lLOM7TxZovQn4jkglI9wRkrtQ,40609
63
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=W9HnxJoTz78Wc4X5Q3sMSHhMTSa7-9uQCFlnqNVozvA,38932
63
+ optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=gcIkpRSsJycLtkFyVxU8PblzNhKlsIy5fDSDUlwhflM,38884
64
64
  optimum/rbln/transformers/models/dpt/__init__.py,sha256=gP1tkR3XMNlHq1GT87ugIVvb2o_1eAUg1JaniXjy1Lw,651
65
65
  optimum/rbln/transformers/models/dpt/modeling_dpt.py,sha256=ZsS2SOiqcA4azULB-WFEMQZbgIoOyVUKqVKqrw_tWzA,3430
66
66
  optimum/rbln/transformers/models/exaone/__init__.py,sha256=zYH_5tVa8-juEdsOIky7I33WSC3Zuhoq1upI0OHYeVw,859
@@ -85,7 +85,7 @@ optimum/rbln/transformers/models/mistral/mistral_architecture.py,sha256=_aU8TE_t
85
85
  optimum/rbln/transformers/models/mistral/modeling_mistral.py,sha256=7nrddoBIHf8S12LZWBUpotnvG3gND11vMQda9yYXJ-s,1560
86
86
  optimum/rbln/transformers/models/phi/__init__.py,sha256=mZLt1M7BbYEvSon5UlkniMUPa15SfjZFdw0kMSAF3VA,644
87
87
  optimum/rbln/transformers/models/phi/modeling_phi.py,sha256=j-6Pqd5rR2JE8I1pnKFlCi4nW5Dv3wZjoPWxohissoo,1516
88
- optimum/rbln/transformers/models/phi/phi_architecture.py,sha256=rBQjr6MOYBo1i5yLekMSR81TzYlHrHAA30kyKDdR7ww,4132
88
+ optimum/rbln/transformers/models/phi/phi_architecture.py,sha256=TueyqmjPXWmOPOxBm4dIFyd0X3iV1jgw0U6c26iCAPk,4090
89
89
  optimum/rbln/transformers/models/qwen2/__init__.py,sha256=RAMWc21W_2I6DH9xBjeNxPECmAcTrbKhSIefq3Lass0,648
90
90
  optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=9-aFDvjMzPNUyGOz0qo33RE18bUFGYZ3Wt_68zb5uJY,1530
91
91
  optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
@@ -114,7 +114,7 @@ optimum/rbln/utils/model_utils.py,sha256=DfD_Z2qvZHqcddXqnzTM1AN8khanj3-DXK2lJvV
114
114
  optimum/rbln/utils/runtime_utils.py,sha256=5-DYniyP59nx-mrrbi7AqA77L85b4Cm5oLpaxidSyss,3699
115
115
  optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
116
116
  optimum/rbln/utils/submodule.py,sha256=oZoGrItB8WqY4i-K9WJPlLlcLohc1YGB9OHB8_XZw3A,4071
117
- optimum_rbln-0.7.3a4.dist-info/METADATA,sha256=8VNTOVgsgFtcFUuZ9VEeRQfC2LEB60OFmW92hlJo8V8,5300
118
- optimum_rbln-0.7.3a4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
119
- optimum_rbln-0.7.3a4.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
120
- optimum_rbln-0.7.3a4.dist-info/RECORD,,
117
+ optimum_rbln-0.7.3a5.dist-info/METADATA,sha256=XgkOm4f_xhli40HbenyjQYm66ZGna1Pv1prBnpF5N5E,5300
118
+ optimum_rbln-0.7.3a5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
119
+ optimum_rbln-0.7.3a5.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
120
+ optimum_rbln-0.7.3a5.dist-info/RECORD,,