optimum-rbln 0.8.0.post2__py3-none-any.whl → 0.8.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +2 -0
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/configuration_utils.py +45 -33
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +9 -2
- optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +4 -2
- optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +9 -2
- optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +4 -2
- optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +9 -2
- optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +9 -2
- optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +33 -9
- optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +30 -12
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +22 -6
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +16 -6
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +16 -6
- optimum/rbln/diffusers/modeling_diffusers.py +16 -26
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +11 -0
- optimum/rbln/diffusers/models/autoencoders/vae.py +1 -8
- optimum/rbln/diffusers/models/autoencoders/vq_model.py +11 -0
- optimum/rbln/diffusers/models/controlnet.py +13 -7
- optimum/rbln/diffusers/models/transformers/prior_transformer.py +10 -0
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py +2 -0
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +7 -0
- optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +1 -4
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +7 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +7 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +48 -27
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +7 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +7 -0
- optimum/rbln/modeling.py +33 -35
- optimum/rbln/modeling_base.py +45 -107
- optimum/rbln/transformers/__init__.py +39 -47
- optimum/rbln/transformers/configuration_generic.py +16 -13
- optimum/rbln/transformers/modeling_generic.py +18 -19
- optimum/rbln/transformers/modeling_rope_utils.py +1 -1
- optimum/rbln/transformers/models/__init__.py +46 -4
- optimum/rbln/transformers/models/audio_spectrogram_transformer/__init__.py +17 -0
- optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +21 -0
- optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +28 -0
- optimum/rbln/transformers/models/auto/auto_factory.py +30 -12
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +35 -4
- optimum/rbln/transformers/models/clip/configuration_clip.py +3 -3
- optimum/rbln/transformers/models/clip/modeling_clip.py +11 -12
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +111 -14
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +102 -35
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +231 -175
- optimum/rbln/transformers/models/distilbert/__init__.py +19 -0
- optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +19 -0
- optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +19 -0
- optimum/rbln/transformers/models/exaone/configuration_exaone.py +24 -1
- optimum/rbln/transformers/models/exaone/exaone_architecture.py +5 -1
- optimum/rbln/transformers/models/exaone/modeling_exaone.py +51 -5
- optimum/rbln/transformers/models/gemma/configuration_gemma.py +24 -1
- optimum/rbln/transformers/models/gemma/gemma_architecture.py +5 -1
- optimum/rbln/transformers/models/gemma/modeling_gemma.py +49 -0
- optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +3 -3
- optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +18 -250
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +87 -236
- optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +4 -1
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +6 -1
- optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +12 -2
- optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +41 -4
- optimum/rbln/transformers/models/llama/configuration_llama.py +24 -1
- optimum/rbln/transformers/models/llama/modeling_llama.py +49 -0
- optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +2 -2
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +33 -4
- optimum/rbln/transformers/models/midm/configuration_midm.py +24 -1
- optimum/rbln/transformers/models/midm/midm_architecture.py +6 -1
- optimum/rbln/transformers/models/midm/modeling_midm.py +51 -5
- optimum/rbln/transformers/models/mistral/configuration_mistral.py +24 -1
- optimum/rbln/transformers/models/mistral/modeling_mistral.py +62 -4
- optimum/rbln/transformers/models/opt/configuration_opt.py +4 -1
- optimum/rbln/transformers/models/opt/modeling_opt.py +10 -0
- optimum/rbln/transformers/models/opt/opt_architecture.py +7 -1
- optimum/rbln/transformers/models/phi/configuration_phi.py +24 -1
- optimum/rbln/transformers/models/phi/modeling_phi.py +49 -0
- optimum/rbln/transformers/models/phi/phi_architecture.py +1 -1
- optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +24 -1
- optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +67 -4
- optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +15 -3
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +46 -25
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +4 -2
- optimum/rbln/transformers/models/resnet/__init__.py +23 -0
- optimum/rbln/transformers/models/resnet/configuration_resnet.py +20 -0
- optimum/rbln/transformers/models/resnet/modeling_resnet.py +22 -0
- optimum/rbln/transformers/models/roberta/__init__.py +24 -0
- optimum/rbln/transformers/{configuration_alias.py → models/roberta/configuration_roberta.py} +4 -30
- optimum/rbln/transformers/{modeling_alias.py → models/roberta/modeling_roberta.py} +2 -32
- optimum/rbln/transformers/models/seq2seq/__init__.py +1 -1
- optimum/rbln/transformers/models/seq2seq/{configuration_seq2seq2.py → configuration_seq2seq.py} +2 -2
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +1 -1
- optimum/rbln/transformers/models/siglip/configuration_siglip.py +3 -0
- optimum/rbln/transformers/models/siglip/modeling_siglip.py +62 -21
- optimum/rbln/transformers/models/t5/modeling_t5.py +46 -4
- optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/__init__.py +1 -1
- optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/configuration_time_series_transformer.py +2 -2
- optimum/rbln/transformers/models/{time_series_transformers/modeling_time_series_transformers.py → time_series_transformer/modeling_time_series_transformer.py} +14 -9
- optimum/rbln/transformers/models/vit/__init__.py +19 -0
- optimum/rbln/transformers/models/vit/configuration_vit.py +19 -0
- optimum/rbln/transformers/models/vit/modeling_vit.py +19 -0
- optimum/rbln/transformers/models/wav2vec2/__init__.py +1 -1
- optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -1
- optimum/rbln/transformers/models/whisper/configuration_whisper.py +3 -1
- optimum/rbln/transformers/models/whisper/modeling_whisper.py +35 -15
- optimum/rbln/transformers/models/xlm_roberta/__init__.py +16 -2
- optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +15 -2
- optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +12 -3
- optimum/rbln/utils/model_utils.py +20 -0
- optimum/rbln/utils/submodule.py +6 -8
- {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1a1.dist-info}/METADATA +1 -1
- {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1a1.dist-info}/RECORD +127 -114
- /optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/time_series_transformers_architecture.py +0 -0
- /optimum/rbln/transformers/models/wav2vec2/{configuration_wav2vec.py → configuration_wav2vec2.py} +0 -0
- {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1a1.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1a1.dist-info}/licenses/LICENSE +0 -0
@@ -27,8 +27,57 @@ class RBLNLlamaForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
|
27
27
|
|
28
28
|
A class to convert and run pre-trained transformers based LlamaForCausalLM model on RBLN devices.
|
29
29
|
It implements the methods to convert a pre-trained transformers LlamaForCausalLM model into a RBLN transformer model by:
|
30
|
+
|
30
31
|
- transferring the checkpoint weights of the original into an optimized RBLN graph,
|
31
32
|
- compiling the resulting graph using the RBLN compiler.
|
33
|
+
|
34
|
+
**Configuration:**
|
35
|
+
This model uses [`RBLNLlamaForCausalLMConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
|
36
|
+
the `rbln_config` parameter should be an instance of [`RBLNLlamaForCausalLMConfig`] or a dictionary conforming to its structure.
|
37
|
+
|
38
|
+
See the [`RBLNLlamaForCausalLMConfig`] class for all available configuration options.
|
39
|
+
|
40
|
+
Examples:
|
41
|
+
```python
|
42
|
+
from optimum.rbln import RBLNLlamaForCausalLM
|
43
|
+
|
44
|
+
# Simple usage using rbln_* arguments
|
45
|
+
# `max_seq_len` is automatically inferred from the model config
|
46
|
+
model = RBLNLlamaForCausalLM.from_pretrained(
|
47
|
+
"meta-llama/Llama-2-7b-hf",
|
48
|
+
export=True,
|
49
|
+
rbln_batch_size=1,
|
50
|
+
rbln_tensor_parallel_size=4,
|
51
|
+
)
|
52
|
+
|
53
|
+
|
54
|
+
# Using a config dictionary
|
55
|
+
rbln_config = {
|
56
|
+
"batch_size": 1,
|
57
|
+
"max_seq_len": 4096,
|
58
|
+
"tensor_parallel_size": 4,
|
59
|
+
}
|
60
|
+
model = RBLNLlamaForCausalLM.from_pretrained(
|
61
|
+
"meta-llama/Llama-2-7b-hf",
|
62
|
+
export=True,
|
63
|
+
rbln_config=rbln_config
|
64
|
+
)
|
65
|
+
|
66
|
+
|
67
|
+
# Using a RBLNLlamaForCausalLMConfig instance (recommended for type checking)
|
68
|
+
from optimum.rbln import RBLNLlamaForCausalLMConfig
|
69
|
+
|
70
|
+
config = RBLNLlamaForCausalLMConfig(
|
71
|
+
batch_size=1,
|
72
|
+
max_seq_len=4096,
|
73
|
+
tensor_parallel_size=4
|
74
|
+
)
|
75
|
+
model = RBLNLlamaForCausalLM.from_pretrained(
|
76
|
+
"meta-llama/Llama-2-7b-hf",
|
77
|
+
export=True,
|
78
|
+
rbln_config=config
|
79
|
+
)
|
80
|
+
```
|
32
81
|
"""
|
33
82
|
|
34
83
|
_decoder_wrapper_cls = LlamaWrapper
|
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import Optional
|
15
|
+
from typing import Any, Dict, Optional
|
16
16
|
|
17
17
|
from ....configuration_utils import RBLNModelConfig
|
18
18
|
|
@@ -25,7 +25,7 @@ class RBLNLlavaNextForConditionalGenerationConfig(RBLNModelConfig):
|
|
25
25
|
batch_size: Optional[int] = None,
|
26
26
|
vision_tower: Optional[RBLNModelConfig] = None,
|
27
27
|
language_model: Optional[RBLNModelConfig] = None,
|
28
|
-
**kwargs,
|
28
|
+
**kwargs: Dict[str, Any],
|
29
29
|
):
|
30
30
|
"""
|
31
31
|
Args:
|
@@ -109,6 +109,36 @@ class LoopProjector:
|
|
109
109
|
|
110
110
|
|
111
111
|
class RBLNLlavaNextForConditionalGeneration(RBLNModel):
|
112
|
+
"""
|
113
|
+
RBLNLlavaNextForConditionalGeneration is a multi-modal model that combines vision and language processing capabilities,
|
114
|
+
optimized for RBLN NPUs. It is designed for conditional generation tasks that involve both image and text inputs.
|
115
|
+
|
116
|
+
This model inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods the library implements for all its models.
|
117
|
+
|
118
|
+
Important Note:
|
119
|
+
This model includes a Large Language Model (LLM) as a submodule. For optimal performance, it is highly recommended to use
|
120
|
+
tensor parallelism for the language model. This can be achieved by using the `rbln_config` parameter in the
|
121
|
+
`from_pretrained` method. Refer to the `from_pretrained` documentation and the RBLNLlavaNextForConditionalGenerationConfig class for details.
|
122
|
+
|
123
|
+
Examples:
|
124
|
+
```python
|
125
|
+
from optimum.rbln import RBLNLlavaNextForConditionalGeneration
|
126
|
+
|
127
|
+
model = RBLNLlavaNextForConditionalGeneration.from_pretrained(
|
128
|
+
"llava-hf/llava-v1.6-mistral-7b-hf",
|
129
|
+
export=True,
|
130
|
+
rbln_config={
|
131
|
+
"language_model": {
|
132
|
+
"tensor_parallel_size": 4,
|
133
|
+
"use_inputs_embeds": True, # In Llava-Next, language model must use inputs_embeds as input.
|
134
|
+
},
|
135
|
+
},
|
136
|
+
)
|
137
|
+
|
138
|
+
model.save_pretrained("compiled-llava-next-mistral-7b-hf")
|
139
|
+
```
|
140
|
+
"""
|
141
|
+
|
112
142
|
auto_model_class = AutoModelForVision2Seq
|
113
143
|
_rbln_submodules = [
|
114
144
|
{"name": "vision_tower"},
|
@@ -136,10 +166,9 @@ class RBLNLlavaNextForConditionalGeneration(RBLNModel):
|
|
136
166
|
subfolder: str,
|
137
167
|
rbln_config: RBLNModelConfig,
|
138
168
|
):
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
"""
|
169
|
+
# If you are unavoidably running on a CPU rather than an RBLN device,
|
170
|
+
# store the torch tensor, weight, etc. in this function.
|
171
|
+
|
143
172
|
save_dict = {}
|
144
173
|
save_dict["image_newline"] = model.image_newline
|
145
174
|
torch.save(save_dict, save_dir_path / subfolder / "torch_artifacts.pth")
|
@@ -16,4 +16,27 @@ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausa
|
|
16
16
|
|
17
17
|
|
18
18
|
class RBLNMidmLMHeadModelConfig(RBLNDecoderOnlyModelForCausalLMConfig):
|
19
|
-
|
19
|
+
"""
|
20
|
+
Configuration class for MIDM models.
|
21
|
+
|
22
|
+
This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
|
23
|
+
|
24
|
+
Example usage:
|
25
|
+
```python
|
26
|
+
from optimum.rbln import RBLNMidmLMHeadModel, RBLNMidmLMHeadModelConfig
|
27
|
+
|
28
|
+
# Create a configuration object
|
29
|
+
config = RBLNMidmLMHeadModelConfig(
|
30
|
+
batch_size=1,
|
31
|
+
max_seq_len=4096,
|
32
|
+
tensor_parallel_size=4
|
33
|
+
)
|
34
|
+
|
35
|
+
# Use the configuration with from_pretrained
|
36
|
+
model = RBLNMidmLMHeadModel.from_pretrained(
|
37
|
+
"KT-AI/midm-bitext-S-7B-inst-v1",
|
38
|
+
export=True,
|
39
|
+
rbln_config=config
|
40
|
+
)
|
41
|
+
```
|
42
|
+
"""
|
@@ -68,7 +68,12 @@ class MidmLMHeadModelWrapper(DecoderOnlyWrapper):
|
|
68
68
|
)
|
69
69
|
new_layer = MidmLayer(layer, new_self_attn)
|
70
70
|
new_layers.append(new_layer)
|
71
|
-
new_model = MidmModel(
|
71
|
+
new_model = MidmModel(
|
72
|
+
causal_lm.transformer,
|
73
|
+
new_layers,
|
74
|
+
max_seq_len=max_seq_len,
|
75
|
+
sliding_window_layers=self.sliding_window_layers,
|
76
|
+
)
|
72
77
|
new_causal_lm = DecoderOnlyForCausalLM(causal_lm, new_model)
|
73
78
|
return new_causal_lm
|
74
79
|
|
@@ -24,16 +24,62 @@ logger = logging.get_logger(__name__)
|
|
24
24
|
|
25
25
|
class RBLNMidmLMHeadModel(RBLNDecoderOnlyModelForCausalLM):
|
26
26
|
"""
|
27
|
-
The
|
28
|
-
|
27
|
+
The MIDM Model transformer with a language modeling head (linear layer) on top.
|
28
|
+
This model inherits from [`RBLNDecoderOnlyModelForCausalLM`]. Check the superclass documentation for the generic methods the library implements for all its models.
|
29
29
|
|
30
|
-
|
31
|
-
|
30
|
+
A class to convert and run pre-trained transformers based MidmForCausalLM model on RBLN devices.
|
31
|
+
It implements the methods to convert a pre-trained transformers MidmForCausalLM model into a RBLN transformer model by:
|
32
32
|
|
33
|
-
It implements the methods to convert a pre-trained transformers Midm model into a RBLN transformer model by:
|
34
33
|
- transferring the checkpoint weights of the original into an optimized RBLN graph,
|
35
34
|
- compiling the resulting graph using the RBLN compiler.
|
36
35
|
|
36
|
+
**Configuration:**
|
37
|
+
This model uses [`RBLNMidmLMHeadModelConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
|
38
|
+
the `rbln_config` parameter should be an instance of [`RBLNMidmLMHeadModelConfig`] or a dictionary conforming to its structure.
|
39
|
+
|
40
|
+
See the [`RBLNMidmLMHeadModelConfig`] class for all available configuration options.
|
41
|
+
|
42
|
+
Examples:
|
43
|
+
```python
|
44
|
+
from optimum.rbln import RBLNMidmLMHeadModel
|
45
|
+
|
46
|
+
# Simple usage using rbln_* arguments
|
47
|
+
# `max_seq_len` is automatically inferred from the model config
|
48
|
+
model = RBLNMidmLMHeadModel.from_pretrained(
|
49
|
+
"KT-AI/midm-bitext-S-7B-inst-v1",
|
50
|
+
export=True,
|
51
|
+
rbln_batch_size=1,
|
52
|
+
rbln_tensor_parallel_size=4,
|
53
|
+
)
|
54
|
+
|
55
|
+
|
56
|
+
# Using a config dictionary
|
57
|
+
rbln_config = {
|
58
|
+
"batch_size": 1,
|
59
|
+
"max_seq_len": 4096,
|
60
|
+
"tensor_parallel_size": 4,
|
61
|
+
}
|
62
|
+
model = RBLNMidmLMHeadModel.from_pretrained(
|
63
|
+
"KT-AI/midm-bitext-S-7B-inst-v1",
|
64
|
+
export=True,
|
65
|
+
rbln_config=rbln_config
|
66
|
+
)
|
67
|
+
|
68
|
+
|
69
|
+
# Using a RBLNMidmLMHeadModelConfig instance (recommended for type checking)
|
70
|
+
from optimum.rbln import RBLNMidmLMHeadModelConfig
|
71
|
+
|
72
|
+
config = RBLNMidmLMHeadModelConfig(
|
73
|
+
batch_size=1,
|
74
|
+
max_seq_len=4096,
|
75
|
+
tensor_parallel_size=4
|
76
|
+
)
|
77
|
+
model = RBLNMidmLMHeadModel.from_pretrained(
|
78
|
+
"KT-AI/midm-bitext-S-7B-inst-v1",
|
79
|
+
export=True,
|
80
|
+
rbln_config=config
|
81
|
+
)
|
82
|
+
```
|
37
83
|
"""
|
38
84
|
|
39
85
|
_decoder_wrapper_cls = MidmLMHeadModelWrapper
|
@@ -16,4 +16,27 @@ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausa
|
|
16
16
|
|
17
17
|
|
18
18
|
class RBLNMistralForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
|
19
|
-
|
19
|
+
"""
|
20
|
+
Configuration class for RBLN Mistral models.
|
21
|
+
|
22
|
+
This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
|
23
|
+
|
24
|
+
Example usage:
|
25
|
+
```python
|
26
|
+
from optimum.rbln import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig
|
27
|
+
|
28
|
+
# Create a configuration object
|
29
|
+
config = RBLNMistralForCausalLMConfig(
|
30
|
+
batch_size=1,
|
31
|
+
max_seq_len=4096,
|
32
|
+
tensor_parallel_size=4
|
33
|
+
)
|
34
|
+
|
35
|
+
# Use the configuration with from_pretrained
|
36
|
+
model = RBLNMistralForCausalLM.from_pretrained(
|
37
|
+
"mistralai/Mistral-7B-v0.1",
|
38
|
+
export=True,
|
39
|
+
rbln_config=config
|
40
|
+
)
|
41
|
+
```
|
42
|
+
"""
|
@@ -12,8 +12,10 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
from transformers import PretrainedConfig
|
16
|
+
|
15
17
|
from ....utils import logging
|
16
|
-
from ...models.decoderonly import RBLNDecoderOnlyModelForCausalLM
|
18
|
+
from ...models.decoderonly import RBLNDecoderOnlyModelForCausalLM, RBLNDecoderOnlyModelForCausalLMConfig
|
17
19
|
from .mistral_architecture import MistralForCausalLMWrapper
|
18
20
|
|
19
21
|
|
@@ -22,13 +24,69 @@ logger = logging.get_logger(__name__)
|
|
22
24
|
|
23
25
|
class RBLNMistralForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
24
26
|
"""
|
25
|
-
The
|
27
|
+
The Mistral Model transformer with a language modeling head (linear layer) on top.
|
26
28
|
This model inherits from [`RBLNDecoderOnlyModelForCausalLM`]. Check the superclass documentation for the generic methods the library implements for all its models.
|
27
29
|
|
28
|
-
A class to convert and run pre-trained transformers based
|
29
|
-
It implements the methods to convert a pre-trained transformers
|
30
|
+
A class to convert and run pre-trained transformers based MistralForCausalLM model on RBLN devices.
|
31
|
+
It implements the methods to convert a pre-trained transformers MistralForCausalLM model into a RBLN transformer model by:
|
30
32
|
- transferring the checkpoint weights of the original into an optimized RBLN graph,
|
31
33
|
- compiling the resulting graph using the RBLN compiler.
|
34
|
+
|
35
|
+
**Configuration:**
|
36
|
+
This model uses [`RBLNMistralForCausalLMConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
|
37
|
+
the `rbln_config` parameter should be an instance of [`RBLNMistralForCausalLMConfig`] or a dictionary conforming to its structure.
|
38
|
+
|
39
|
+
See the [`RBLNMistralForCausalLMConfig`] class for all available configuration options.
|
40
|
+
|
41
|
+
Examples:
|
42
|
+
```python
|
43
|
+
from optimum.rbln import RBLNMistralForCausalLM
|
44
|
+
|
45
|
+
# Simple usage using rbln_* arguments
|
46
|
+
# `max_seq_len` is automatically inferred from the model config
|
47
|
+
model = RBLNMistralForCausalLM.from_pretrained(
|
48
|
+
"mistralai/Mistral-7B-v0.1",
|
49
|
+
export=True,
|
50
|
+
rbln_batch_size=1,
|
51
|
+
rbln_tensor_parallel_size=4,
|
52
|
+
)
|
53
|
+
|
54
|
+
# Using a config dictionary
|
55
|
+
rbln_config = {
|
56
|
+
"batch_size": 1,
|
57
|
+
"max_seq_len": 4096,
|
58
|
+
"tensor_parallel_size": 4,
|
59
|
+
}
|
60
|
+
model = RBLNMistralForCausalLM.from_pretrained(
|
61
|
+
"mistralai/Mistral-7B-v0.1",
|
62
|
+
export=True,
|
63
|
+
rbln_config=rbln_config
|
64
|
+
)
|
65
|
+
|
66
|
+
# Using a RBLNMistralForCausalLMConfig instance (recommended for type checking)
|
67
|
+
from optimum.rbln import RBLNMistralForCausalLMConfig
|
68
|
+
|
69
|
+
config = RBLNMistralForCausalLMConfig(
|
70
|
+
batch_size=1,
|
71
|
+
max_seq_len=4096,
|
72
|
+
tensor_parallel_size=4
|
73
|
+
)
|
74
|
+
model = RBLNMistralForCausalLM.from_pretrained(
|
75
|
+
"mistralai/Mistral-7B-v0.1",
|
76
|
+
export=True,
|
77
|
+
rbln_config=config
|
78
|
+
)
|
79
|
+
```
|
32
80
|
"""
|
33
81
|
|
34
82
|
_decoder_wrapper_cls = MistralForCausalLMWrapper
|
83
|
+
|
84
|
+
@classmethod
|
85
|
+
def _update_sliding_window_config(
|
86
|
+
cls, model_config: PretrainedConfig, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig
|
87
|
+
):
|
88
|
+
rbln_config.cache_impl = "sliding_window"
|
89
|
+
rbln_config.sliding_window = model_config.sliding_window
|
90
|
+
rbln_config.sliding_window_layers = list(range(model_config.num_hidden_layers))
|
91
|
+
|
92
|
+
return rbln_config
|
@@ -16,4 +16,7 @@ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausa
|
|
16
16
|
|
17
17
|
|
18
18
|
class RBLNOPTForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
|
19
|
-
|
19
|
+
"""
|
20
|
+
Configuration class for OPT causal language model.
|
21
|
+
Inherits from RBLNDecoderOnlyModelForCausalLMConfig with no additional parameters.
|
22
|
+
"""
|
@@ -45,8 +45,15 @@ class RBLNOPTForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
|
45
45
|
|
46
46
|
A class to convert and run pre-trained transformers based OPTForCausalLM model on RBLN devices.
|
47
47
|
It implements the methods to convert a pre-trained transformers OPTForCausalLM model into a RBLN transformer model by:
|
48
|
+
|
48
49
|
- transferring the checkpoint weights of the original into an optimized RBLN graph,
|
49
50
|
- compiling the resulting graph using the RBLN compiler.
|
51
|
+
|
52
|
+
**Configuration:**
|
53
|
+
This model uses [`RBLNOPTForCausalLM`] for configuration. When calling methods like `from_pretrained` or `from_model`,
|
54
|
+
the `rbln_config` parameter should be an instance of [`RBLNOPTForCausalLM`] or a dictionary conforming to its structure.
|
55
|
+
|
56
|
+
See the [`RBLNOPTForCausalLM`] class for all available configuration options.
|
50
57
|
"""
|
51
58
|
|
52
59
|
_decoder_wrapper_cls = OPTWrapper
|
@@ -72,6 +79,9 @@ class RBLNOPTForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
|
72
79
|
"use_attention_mask": rbln_config.use_attention_mask,
|
73
80
|
"use_position_ids": rbln_config.use_position_ids,
|
74
81
|
"use_inputs_embeds": rbln_config.use_inputs_embeds,
|
82
|
+
"cache_impl": rbln_config.cache_impl,
|
83
|
+
"sliding_window": rbln_config.sliding_window,
|
84
|
+
"sliding_window_layers": rbln_config.sliding_window_layers,
|
75
85
|
}
|
76
86
|
|
77
87
|
for i in range(len(model.model.decoder.layers)):
|
@@ -45,7 +45,13 @@ class OPTWrapper(DecoderOnlyWrapper):
|
|
45
45
|
)
|
46
46
|
new_layer = OPTDecoderLayer(layer, new_self_attn)
|
47
47
|
new_layers.append(new_layer)
|
48
|
-
new_model = OPTModel(
|
48
|
+
new_model = OPTModel(
|
49
|
+
causal_lm.model.decoder,
|
50
|
+
new_layers,
|
51
|
+
max_seq_len=max_seq_len,
|
52
|
+
use_learned_pos_emb=True,
|
53
|
+
sliding_window_layers=self.sliding_window_layers,
|
54
|
+
)
|
49
55
|
new_causal_lm = DecoderOnlyForCausalLM(causal_lm, new_model)
|
50
56
|
return new_causal_lm
|
51
57
|
|
@@ -16,4 +16,27 @@ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausa
|
|
16
16
|
|
17
17
|
|
18
18
|
class RBLNPhiForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
|
19
|
-
|
19
|
+
"""
|
20
|
+
Configuration class for RBLN Phi models.
|
21
|
+
|
22
|
+
This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
|
23
|
+
|
24
|
+
Example usage:
|
25
|
+
```python
|
26
|
+
from optimum.rbln import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig
|
27
|
+
|
28
|
+
# Create a configuration object
|
29
|
+
config = RBLNPhiForCausalLMConfig(
|
30
|
+
batch_size=1,
|
31
|
+
max_seq_len=4096,
|
32
|
+
tensor_parallel_size=4
|
33
|
+
)
|
34
|
+
|
35
|
+
# Use the configuration with from_pretrained
|
36
|
+
model = RBLNPhiForCausalLM.from_pretrained(
|
37
|
+
"microsoft/phi-2",
|
38
|
+
export=True,
|
39
|
+
rbln_config=config
|
40
|
+
)
|
41
|
+
```
|
42
|
+
"""
|
@@ -27,8 +27,57 @@ class RBLNPhiForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
|
27
27
|
|
28
28
|
A class to convert and run pre-trained transformers based PhiForCausalLM model on RBLN devices.
|
29
29
|
It implements the methods to convert a pre-trained transformers PhiForCausalLM model into a RBLN transformer model by:
|
30
|
+
|
30
31
|
- transferring the checkpoint weights of the original into an optimized RBLN graph,
|
31
32
|
- compiling the resulting graph using the RBLN compiler.
|
33
|
+
|
34
|
+
**Configuration:**
|
35
|
+
This model uses [`RBLNPhiForCausalLMConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
|
36
|
+
the `rbln_config` parameter should be an instance of [`RBLNPhiForCausalLMConfig`] or a dictionary conforming to its structure.
|
37
|
+
|
38
|
+
See the [`RBLNPhiForCausalLMConfig`] class for all available configuration options.
|
39
|
+
|
40
|
+
Examples:
|
41
|
+
```python
|
42
|
+
from optimum.rbln import RBLNPhiForCausalLM
|
43
|
+
|
44
|
+
# Simple usage using rbln_* arguments
|
45
|
+
# `max_seq_len` is automatically inferred from the model config
|
46
|
+
model = RBLNPhiForCausalLM.from_pretrained(
|
47
|
+
"microsoft/phi-2",
|
48
|
+
export=True,
|
49
|
+
rbln_batch_size=1,
|
50
|
+
rbln_tensor_parallel_size=4,
|
51
|
+
)
|
52
|
+
|
53
|
+
|
54
|
+
# Using a config dictionary
|
55
|
+
rbln_config = {
|
56
|
+
"batch_size": 1,
|
57
|
+
"max_seq_len": 4096,
|
58
|
+
"tensor_parallel_size": 4,
|
59
|
+
}
|
60
|
+
model = RBLNPhiForCausalLM.from_pretrained(
|
61
|
+
"microsoft/phi-2",
|
62
|
+
export=True,
|
63
|
+
rbln_config=rbln_config
|
64
|
+
)
|
65
|
+
|
66
|
+
|
67
|
+
# Using a RBLNPhiForCausalLMConfig instance (recommended for type checking)
|
68
|
+
from optimum.rbln import RBLNPhiForCausalLMConfig
|
69
|
+
|
70
|
+
config = RBLNPhiForCausalLMConfig(
|
71
|
+
batch_size=1,
|
72
|
+
max_seq_len=4096,
|
73
|
+
tensor_parallel_size=4
|
74
|
+
)
|
75
|
+
model = RBLNPhiForCausalLM.from_pretrained(
|
76
|
+
"microsoft/phi-2",
|
77
|
+
export=True,
|
78
|
+
rbln_config=config
|
79
|
+
)
|
80
|
+
```
|
32
81
|
"""
|
33
82
|
|
34
83
|
_decoder_wrapper_cls = PhiWrapper
|
@@ -48,7 +48,7 @@ class PhiWrapper(DecoderOnlyWrapper):
|
|
48
48
|
raise NotImplementedError(f"Unknwon attn : {self.attn_impl}")
|
49
49
|
new_layer = PhiLayer(layer, new_self_attn)
|
50
50
|
new_layers.append(new_layer)
|
51
|
-
new_model = PhiModel(causal_lm.model, new_layers)
|
51
|
+
new_model = PhiModel(causal_lm.model, new_layers, sliding_window_layers=self.sliding_window_layers)
|
52
52
|
new_causal_lm = DecoderOnlyForCausalLM(causal_lm, new_model)
|
53
53
|
return new_causal_lm
|
54
54
|
|
@@ -16,4 +16,27 @@ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausa
|
|
16
16
|
|
17
17
|
|
18
18
|
class RBLNQwen2ForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
|
19
|
-
|
19
|
+
"""
|
20
|
+
Configuration class for RBLN Qwen2 models.
|
21
|
+
|
22
|
+
This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
|
23
|
+
|
24
|
+
Example usage:
|
25
|
+
```python
|
26
|
+
from optimum.rbln import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig
|
27
|
+
|
28
|
+
# Create a configuration object
|
29
|
+
config = RBLNQwen2ForCausalLMConfig(
|
30
|
+
batch_size=1,
|
31
|
+
max_seq_len=4096,
|
32
|
+
tensor_parallel_size=4
|
33
|
+
)
|
34
|
+
|
35
|
+
# Use the configuration with from_pretrained
|
36
|
+
model = RBLNQwen2ForCausalLM.from_pretrained(
|
37
|
+
"Qwen/Qwen2-7B",
|
38
|
+
export=True,
|
39
|
+
rbln_config=config
|
40
|
+
)
|
41
|
+
```
|
42
|
+
"""
|
@@ -12,8 +12,10 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
from transformers import PretrainedConfig
|
16
|
+
|
15
17
|
from ....utils import logging
|
16
|
-
from ...models.decoderonly import RBLNDecoderOnlyModelForCausalLM
|
18
|
+
from ...models.decoderonly import RBLNDecoderOnlyModelForCausalLM, RBLNDecoderOnlyModelForCausalLMConfig
|
17
19
|
from .qwen2_architecture import QWEN2Wrapper
|
18
20
|
|
19
21
|
|
@@ -22,13 +24,74 @@ logger = logging.get_logger(__name__)
|
|
22
24
|
|
23
25
|
class RBLNQwen2ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
24
26
|
"""
|
25
|
-
The
|
27
|
+
The Qwen2 Model transformer with a language modeling head (linear layer) on top.
|
26
28
|
This model inherits from [`RBLNDecoderOnlyModelForCausalLM`]. Check the superclass documentation for the generic methods the library implements for all its models.
|
27
29
|
|
28
|
-
A class to convert and run pre-trained transformers based
|
29
|
-
It implements the methods to convert a pre-trained transformers
|
30
|
+
A class to convert and run pre-trained transformers based Qwen2ForCausalLM model on RBLN devices.
|
31
|
+
It implements the methods to convert a pre-trained transformers Qwen2ForCausalLM model into a RBLN transformer model by:
|
30
32
|
- transferring the checkpoint weights of the original into an optimized RBLN graph,
|
31
33
|
- compiling the resulting graph using the RBLN compiler.
|
34
|
+
|
35
|
+
**Configuration:**
|
36
|
+
This model uses [`RBLNQwen2ForCausalLMConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
|
37
|
+
the `rbln_config` parameter should be an instance of [`RBLNQwen2ForCausalLMConfig`] or a dictionary conforming to its structure.
|
38
|
+
|
39
|
+
See the [`RBLNQwen2ForCausalLMConfig`] class for all available configuration options.
|
40
|
+
|
41
|
+
Examples:
|
42
|
+
```python
|
43
|
+
from optimum.rbln import RBLNQwen2ForCausalLM
|
44
|
+
|
45
|
+
# Simple usage using rbln_* arguments
|
46
|
+
# `max_seq_len` is automatically inferred from the model config
|
47
|
+
model = RBLNQwen2ForCausalLM.from_pretrained(
|
48
|
+
"Qwen/Qwen2-7B-Instruct",
|
49
|
+
export=True,
|
50
|
+
rbln_batch_size=1,
|
51
|
+
rbln_tensor_parallel_size=4,
|
52
|
+
)
|
53
|
+
|
54
|
+
|
55
|
+
# Using a config dictionary
|
56
|
+
rbln_config = {
|
57
|
+
"batch_size": 1,
|
58
|
+
"max_seq_len": 4096,
|
59
|
+
"tensor_parallel_size": 4,
|
60
|
+
}
|
61
|
+
model = RBLNQwen2ForCausalLM.from_pretrained(
|
62
|
+
"Qwen/Qwen2-7B-Instruct",
|
63
|
+
export=True,
|
64
|
+
rbln_config=rbln_config
|
65
|
+
)
|
66
|
+
|
67
|
+
|
68
|
+
# Using a RBLNQwen2ForCausalLMConfig instance (recommended for type checking)
|
69
|
+
from optimum.rbln import RBLNQwen2ForCausalLMConfig
|
70
|
+
|
71
|
+
config = RBLNQwen2ForCausalLMConfig(
|
72
|
+
batch_size=1,
|
73
|
+
max_seq_len=4096,
|
74
|
+
tensor_parallel_size=4
|
75
|
+
)
|
76
|
+
model = RBLNQwen2ForCausalLM.from_pretrained(
|
77
|
+
"Qwen/Qwen2-7B-Instruct",
|
78
|
+
export=True,
|
79
|
+
rbln_config=config
|
80
|
+
)
|
81
|
+
```
|
32
82
|
"""
|
33
83
|
|
34
84
|
_decoder_wrapper_cls = QWEN2Wrapper
|
85
|
+
|
86
|
+
@classmethod
|
87
|
+
def _update_sliding_window_config(
|
88
|
+
cls, model_config: PretrainedConfig, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig
|
89
|
+
):
|
90
|
+
# https://github.com/huggingface/transformers/issues/35896
|
91
|
+
# There seems to be a bug in transformers(v4.52.4). Therefore, similar to when attn_implementation is eager,
|
92
|
+
# we set all layers to use sliding window in this version. This should be updated once the bug is fixed.
|
93
|
+
|
94
|
+
rbln_config.cache_impl = "sliding_window"
|
95
|
+
rbln_config.sliding_window = model_config.sliding_window
|
96
|
+
rbln_config.sliding_window_layers = list(range(model_config.num_hidden_layers))
|
97
|
+
return rbln_config
|
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import List, Optional, Union
|
15
|
+
from typing import Any, Dict, List, Optional, Union
|
16
16
|
|
17
17
|
from ....configuration_utils import RBLNModelConfig
|
18
18
|
from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausalLMConfig
|
@@ -25,7 +25,7 @@ class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausal
|
|
25
25
|
self,
|
26
26
|
visual: Optional[RBLNModelConfig] = None,
|
27
27
|
use_inputs_embeds: bool = True,
|
28
|
-
**kwargs,
|
28
|
+
**kwargs: Dict[str, Any],
|
29
29
|
):
|
30
30
|
super().__init__(use_inputs_embeds=use_inputs_embeds, **kwargs)
|
31
31
|
if not self.use_inputs_embeds:
|
@@ -37,7 +37,7 @@ class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausal
|
|
37
37
|
|
38
38
|
|
39
39
|
class RBLNQwen2_5_VisionTransformerPretrainedModelConfig(RBLNModelConfig):
|
40
|
-
def __init__(self, max_seq_lens: Union[int, List[int]] = None, **kwargs):
|
40
|
+
def __init__(self, max_seq_lens: Union[int, List[int]] = None, **kwargs: Dict[str, Any]):
|
41
41
|
"""
|
42
42
|
Args:
|
43
43
|
max_seq_lens (Optional[Union[int, List[int]]]): Maximum sequence lengths for Vision
|
@@ -54,6 +54,18 @@ class RBLNQwen2_5_VisionTransformerPretrainedModelConfig(RBLNModelConfig):
|
|
54
54
|
|
55
55
|
Raises:
|
56
56
|
ValueError: If batch_size is not a positive integer.
|
57
|
+
|
58
|
+
Max Seq Lens:
|
59
|
+
Since `Qwen2_5_VLForConditionalGeneration` performs inference on a per-image or per-frame basis,
|
60
|
+
`max_seq_lens` should be set based on the maximum expected resolution of the input images or video frames,
|
61
|
+
according to the following guidelines:
|
62
|
+
|
63
|
+
1. **Minimum Value**: `max_seq_lens` must be greater than or equal to the number of patches generated from the input image.
|
64
|
+
For example, a 224x224 image with a patch size of 14 results in (224 / 14) * (224 / 14) = 256 patches.
|
65
|
+
Therefore, `max_seq_lens` must be at least 256.
|
66
|
+
2. **Alignment Requirement**: `max_seq_lens` must be a multiple of `(window_size / patch_size)^2` due to the requirements
|
67
|
+
of the window-based attention mechanism. For instance, if `window_size` is 112 and `patch_size` is 14, then
|
68
|
+
`(112 / 14)^2 = 64`, meaning valid values for `max_seq_lens` include 64, 128, 192, 256, etc.
|
57
69
|
"""
|
58
70
|
super().__init__(**kwargs)
|
59
71
|
|