optimum-rbln 0.7.3a4__py3-none-any.whl → 0.7.3a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +19 -21
- optimum/rbln/transformers/models/phi/phi_architecture.py +3 -3
- {optimum_rbln-0.7.3a4.dist-info → optimum_rbln-0.7.3a5.dist-info}/METADATA +1 -1
- {optimum_rbln-0.7.3a4.dist-info → optimum_rbln-0.7.3a5.dist-info}/RECORD +7 -7
- {optimum_rbln-0.7.3a4.dist-info → optimum_rbln-0.7.3a5.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.7.3a4.dist-info → optimum_rbln-0.7.3a5.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/__version__.py
CHANGED
@@ -17,5 +17,5 @@ __version__: str
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
18
18
|
version_tuple: VERSION_TUPLE
|
19
19
|
|
20
|
-
__version__ = version = '0.7.
|
21
|
-
__version_tuple__ = version_tuple = (0, 7, 3, '
|
20
|
+
__version__ = version = '0.7.3a5'
|
21
|
+
__version_tuple__ = version_tuple = (0, 7, 3, 'a5')
|
@@ -685,27 +685,28 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
685
685
|
else:
|
686
686
|
rbln_kvcache_block_size = rbln_kvcache_partition_len
|
687
687
|
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
688
|
+
rbln_kvcache_num_blocks = (rbln_max_seq_len // rbln_kvcache_block_size) * rbln_batch_size
|
689
|
+
if rbln_attn_impl == "flash_attn":
|
690
|
+
max_num_blocks, _ = cls.get_maximum_num_blocks(
|
691
|
+
config=model_config,
|
692
|
+
tensor_parallel_size=rbln_kwargs.get("tensor_parallel_size", 1),
|
693
|
+
kvcache_block_size=rbln_kvcache_block_size,
|
694
|
+
nbits_per_param=16 if rbln_quantization is None else 4, # TODO(jongho): FIX Ad-hoc
|
695
|
+
n_model_params=rbln_kwargs["n_model_params"],
|
696
|
+
)
|
697
|
+
rbln_kvcache_num_blocks = min(rbln_kvcache_num_blocks, max_num_blocks)
|
697
698
|
|
698
|
-
|
699
|
-
|
700
|
-
|
699
|
+
required_blocks = rbln_max_seq_len // rbln_kvcache_block_size + 1
|
700
|
+
if rbln_kvcache_num_blocks < required_blocks:
|
701
|
+
rbln_kvcache_num_blocks = required_blocks
|
701
702
|
|
702
|
-
|
703
|
+
logger.info(f"[KVCache] Compiling with num_blocks: {rbln_kvcache_num_blocks}")
|
703
704
|
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
705
|
+
if rbln_kvcache_num_blocks < rbln_batch_size:
|
706
|
+
raise RuntimeError(
|
707
|
+
f"Batch size ({rbln_batch_size}) exceeds available KV cache blocks ({rbln_kvcache_num_blocks}). "
|
708
|
+
"Ensure the number of blocks is at least equal to the batch size."
|
709
|
+
)
|
709
710
|
|
710
711
|
num_attention_heads = getattr(model_config, "n_head", None) or getattr(model_config, "num_attention_heads")
|
711
712
|
num_key_value_heads = getattr(model_config, "num_key_value_heads", None) or num_attention_heads
|
@@ -805,9 +806,6 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
805
806
|
"kvcache_block_size": rbln_kvcache_block_size,
|
806
807
|
"attn_impl": rbln_attn_impl,
|
807
808
|
"kvcache_num_blocks": rbln_kvcache_num_blocks,
|
808
|
-
"model_num_blocks": model_num_blocks,
|
809
|
-
"max_num_blocks": max_num_blocks,
|
810
|
-
"nbytes_per_block": nbytes_per_block,
|
811
809
|
}
|
812
810
|
)
|
813
811
|
|
@@ -92,7 +92,7 @@ class PhiLayer(DecoderOnlyLayer):
|
|
92
92
|
|
93
93
|
hidden_states = self.get_pre_attention_layernorm()(hidden_states)
|
94
94
|
|
95
|
-
|
95
|
+
attn_output = self.self_attn(
|
96
96
|
hidden_states=hidden_states,
|
97
97
|
attention_mask=attention_mask,
|
98
98
|
seq_positions=seq_positions,
|
@@ -104,9 +104,9 @@ class PhiLayer(DecoderOnlyLayer):
|
|
104
104
|
|
105
105
|
feed_forward_hidden_states = self._original_mod.mlp(hidden_states)
|
106
106
|
|
107
|
-
hidden_states =
|
107
|
+
hidden_states = attn_output + feed_forward_hidden_states + residual
|
108
108
|
|
109
|
-
return hidden_states
|
109
|
+
return hidden_states
|
110
110
|
|
111
111
|
|
112
112
|
class PhiModel(DecoderOnlyModel):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: optimum-rbln
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.3a5
|
4
4
|
Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
|
5
5
|
Project-URL: Homepage, https://rebellions.ai
|
6
6
|
Project-URL: Documentation, https://docs.rbln.ai
|
@@ -1,5 +1,5 @@
|
|
1
1
|
optimum/rbln/__init__.py,sha256=eHi15YM3989AcX52jka9rUmgAtlp1PHqMNwBEdOfuu8,6554
|
2
|
-
optimum/rbln/__version__.py,sha256=
|
2
|
+
optimum/rbln/__version__.py,sha256=MC3yJ2-M633KXIQTqHjv3l9eWLzkJkdlmhuQkRiV278,519
|
3
3
|
optimum/rbln/modeling.py,sha256=nJsAs5zs--VVOYGFjYNpqfxYIemJIK4Lr0WEzlDLdP0,8390
|
4
4
|
optimum/rbln/modeling_base.py,sha256=Ow73GVJF1N5cDFO8_rgirtGj1wC-cXBDyqXHW5PCybA,22270
|
5
5
|
optimum/rbln/modeling_config.py,sha256=7104bxmrvKW4Q6XTruQayiIGl8GHDFmPkJ3cknMIInE,11335
|
@@ -60,7 +60,7 @@ optimum/rbln/transformers/models/clip/__init__.py,sha256=H9vuBwrmFO0-CqZhXUrKF-u
|
|
60
60
|
optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=NiSm7bHs4SReHDUr53BBWSX0Y8bkKOeUSpsBDrp8YDw,6628
|
61
61
|
optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=pDogsdpJKKB5rqnVFrRjwfhUvOSV-jZ3oARMsqSvOOQ,665
|
62
62
|
optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=7OIKteJLKNxOLOg0w3lLOM7TxZovQn4jkglI9wRkrtQ,40609
|
63
|
-
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=
|
63
|
+
optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=gcIkpRSsJycLtkFyVxU8PblzNhKlsIy5fDSDUlwhflM,38884
|
64
64
|
optimum/rbln/transformers/models/dpt/__init__.py,sha256=gP1tkR3XMNlHq1GT87ugIVvb2o_1eAUg1JaniXjy1Lw,651
|
65
65
|
optimum/rbln/transformers/models/dpt/modeling_dpt.py,sha256=ZsS2SOiqcA4azULB-WFEMQZbgIoOyVUKqVKqrw_tWzA,3430
|
66
66
|
optimum/rbln/transformers/models/exaone/__init__.py,sha256=zYH_5tVa8-juEdsOIky7I33WSC3Zuhoq1upI0OHYeVw,859
|
@@ -85,7 +85,7 @@ optimum/rbln/transformers/models/mistral/mistral_architecture.py,sha256=_aU8TE_t
|
|
85
85
|
optimum/rbln/transformers/models/mistral/modeling_mistral.py,sha256=7nrddoBIHf8S12LZWBUpotnvG3gND11vMQda9yYXJ-s,1560
|
86
86
|
optimum/rbln/transformers/models/phi/__init__.py,sha256=mZLt1M7BbYEvSon5UlkniMUPa15SfjZFdw0kMSAF3VA,644
|
87
87
|
optimum/rbln/transformers/models/phi/modeling_phi.py,sha256=j-6Pqd5rR2JE8I1pnKFlCi4nW5Dv3wZjoPWxohissoo,1516
|
88
|
-
optimum/rbln/transformers/models/phi/phi_architecture.py,sha256=
|
88
|
+
optimum/rbln/transformers/models/phi/phi_architecture.py,sha256=TueyqmjPXWmOPOxBm4dIFyd0X3iV1jgw0U6c26iCAPk,4090
|
89
89
|
optimum/rbln/transformers/models/qwen2/__init__.py,sha256=RAMWc21W_2I6DH9xBjeNxPECmAcTrbKhSIefq3Lass0,648
|
90
90
|
optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=9-aFDvjMzPNUyGOz0qo33RE18bUFGYZ3Wt_68zb5uJY,1530
|
91
91
|
optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
|
@@ -114,7 +114,7 @@ optimum/rbln/utils/model_utils.py,sha256=DfD_Z2qvZHqcddXqnzTM1AN8khanj3-DXK2lJvV
|
|
114
114
|
optimum/rbln/utils/runtime_utils.py,sha256=5-DYniyP59nx-mrrbi7AqA77L85b4Cm5oLpaxidSyss,3699
|
115
115
|
optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
|
116
116
|
optimum/rbln/utils/submodule.py,sha256=oZoGrItB8WqY4i-K9WJPlLlcLohc1YGB9OHB8_XZw3A,4071
|
117
|
-
optimum_rbln-0.7.
|
118
|
-
optimum_rbln-0.7.
|
119
|
-
optimum_rbln-0.7.
|
120
|
-
optimum_rbln-0.7.
|
117
|
+
optimum_rbln-0.7.3a5.dist-info/METADATA,sha256=XgkOm4f_xhli40HbenyjQYm66ZGna1Pv1prBnpF5N5E,5300
|
118
|
+
optimum_rbln-0.7.3a5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
119
|
+
optimum_rbln-0.7.3a5.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
120
|
+
optimum_rbln-0.7.3a5.dist-info/RECORD,,
|
File without changes
|
File without changes
|