optimum-rbln 0.7.4a5__tar.gz → 0.7.4a7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/PKG-INFO +1 -1
- optimum_rbln-0.7.4a7/examples/image-to-text/run_idefics3.py +67 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/__init__.py +16 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/__version__.py +2 -2
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/modeling_base.py +22 -3
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/__init__.py +16 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/__init__.py +24 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +67 -41
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +198 -99
- optimum_rbln-0.7.4a7/src/optimum/rbln/transformers/models/idefics3/__init__.py +16 -0
- optimum_rbln-0.7.4a7/src/optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +51 -0
- optimum_rbln-0.7.4a7/src/optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +459 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +6 -0
- optimum_rbln-0.7.4a7/src/optimum/rbln/transformers/models/qwen2_5_vl/__init__.py +19 -0
- optimum_rbln-0.7.4a7/src/optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +68 -0
- optimum_rbln-0.7.4a7/src/optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +608 -0
- optimum_rbln-0.7.4a7/src/optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +214 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/utils/runtime_utils.py +33 -2
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/utils/submodule.py +10 -1
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/tests/test_llm.py +72 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/tests/test_transformers.py +9 -1
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/ISSUE_TEMPLATE/model_request.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/pull_request_template.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/scripts/auto_code_review.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/scripts/validate_pr_checklist.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/version.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/workflows/auto_code_review.yml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/workflows/check_code_quality.yml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/workflows/deploy-on-tag.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/workflows/deploy.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/workflows/pr-title-check.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/workflows/pr_checklist_validator.yml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/workflows/rbln_check_compiler.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/workflows/rbln_dispatch_pytest.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/workflows/rbln_optimum_inference_test.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/workflows/rbln_optimum_pytest.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/workflows/rbln_scheduled_test.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.github/workflows/rbln_trigger_on_pr.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/.gitignore +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/CODE_OF_CONDUCT.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/CONTRIBUTING.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/LICENSE +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/README.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/assets/rbln_logo.png +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/audio-classification/run_ast_audio_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/depth-estimation/run_dpt.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/image-classification/run_image_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/image-classification/run_vit_image_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/image-to-text/run_llava_next_image_to_text.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/kandinsky2_2/run_kandinsky2_2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/kandinsky2_2/run_kandinsky2_2_combined.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/kandinsky2_2/run_kandinsky2_2_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/kandinsky2_2/run_kandinsky2_2_img2img_combined.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/kandinsky2_2/run_kandinsky2_2_inpaint.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/kandinsky2_2/run_kandinsky2_2_inpaint_combined.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/kandinsky2_2/run_kandinsky2_2_prior_interpolate.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/question-answering/run_question_answering.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/speech-recognition/run_wav2vec2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/speech-recognition/run_whisper.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/stable-diffusion/run_stable_diffusion.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/stable-diffusion/run_stable_diffusion_controlnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/stable-diffusion/run_stable_diffusion_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/stable-diffusion/run_stable_diffusion_img2img_controlnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/stable-diffusion/run_stable_diffusion_inpaint.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/stable-diffusion/run_stable_diffusion_lora.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/stable-diffusion/run_stable_diffusion_multicontrolnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/text-classification/run_bge_m3_text_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/text-classification/run_bge_reranker_v2_m3_text_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/text-classification/run_secureBERT.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/text-classification/run_t5_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/text-classification/run_twitter_roberta_text_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/text2text-generation/run_bart_text2text_generation.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/text2text-generation/run_llama_peft.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/text2text-generation/run_llama_text2text_generation.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/examples/time-series-forecasting/run_time_series_forecasting.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/pyproject.toml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/scripts/uv-lock.sh +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/scripts/uv-sync.sh +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/configuration_utils.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/models/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/pipelines/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/modeling_diffusers.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/models/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/models/autoencoders/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/models/autoencoders/vae.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/models/autoencoders/vq_model.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/models/controlnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/models/transformers/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/models/transformers/prior_transformer.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/models/unets/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/controlnet/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/modeling.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/ops/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/ops/attn.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/ops/flash_attn.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/ops/kv_cache_update.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/ops/linear.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/configuration_alias.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/configuration_generic.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/modeling_alias.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/modeling_generic.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/modeling_rope_utils.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/auto/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/auto/auto_factory.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/auto/modeling_auto.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/bart/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/bart/bart_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/bart/configuration_bart.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/bart/modeling_bart.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/bert/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/bert/configuration_bert.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/bert/modeling_bert.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/clip/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/clip/configuration_clip.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/clip/modeling_clip.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/decoderonly/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/dpt/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/dpt/configuration_dpt.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/dpt/modeling_dpt.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/exaone/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/exaone/configuration_exaone.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/exaone/modeling_exaone.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/gemma/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/gemma/configuration_gemma.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/gemma/gemma_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/gemma/modeling_gemma.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/gpt2/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/llama/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/llama/configuration_llama.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/llama/llama_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/llama/modeling_llama.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/llava_next/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/midm/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/midm/configuration_midm.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/midm/midm_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/midm/modeling_midm.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/mistral/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/mistral/configuration_mistral.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/mistral/mistral_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/mistral/modeling_mistral.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/phi/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/phi/configuration_phi.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/phi/modeling_phi.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/phi/phi_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/qwen2/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/seq2seq/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/seq2seq/configuration_seq2seq2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/t5/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/t5/configuration_t5.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/t5/modeling_t5.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/t5/t5_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/time_series_transformers/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/time_series_transformers/configuration_time_series_transformer.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/time_series_transformers/modeling_time_series_transformers.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/time_series_transformers/time_series_transformers_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/wav2vec2/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/whisper/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/whisper/configuration_whisper.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/whisper/generation_whisper.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/xlm_roberta/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/utils/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/utils/rbln_quantization.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/utils/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/utils/decorator_utils.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/utils/hub.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/utils/import_utils.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/utils/logging.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/utils/model_utils.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/utils/save_utils.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/tests/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/tests/psnr.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/tests/requirements_sdxl.txt +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/tests/run_stable_diffusion_xl_base.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/tests/test_base.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/tests/test_config.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/tests/test_diffusers.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/uv.lock +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: optimum-rbln
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.4a7
|
4
4
|
Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
|
5
5
|
Project-URL: Homepage, https://rebellions.ai
|
6
6
|
Project-URL: Documentation, https://docs.rbln.ai
|
@@ -0,0 +1,67 @@
|
|
1
|
+
import os
|
2
|
+
import typing
|
3
|
+
|
4
|
+
import fire
|
5
|
+
from datasets import load_dataset
|
6
|
+
from transformers import AutoProcessor
|
7
|
+
|
8
|
+
from optimum.rbln import RBLNIdefics3ForConditionalGeneration
|
9
|
+
|
10
|
+
|
11
|
+
def main(
|
12
|
+
model_id: str = "HuggingFaceM4/Idefics3-8B-Llama3",
|
13
|
+
batch_size: int = 1,
|
14
|
+
from_transformers: bool = False,
|
15
|
+
prompt: typing.Optional[str] = None,
|
16
|
+
max_seq_len: typing.Optional[int] = None,
|
17
|
+
tensor_parallel_size: typing.Optional[int] = 4,
|
18
|
+
):
|
19
|
+
processor = AutoProcessor.from_pretrained(model_id)
|
20
|
+
|
21
|
+
if from_transformers:
|
22
|
+
model = RBLNIdefics3ForConditionalGeneration.from_pretrained(
|
23
|
+
model_id,
|
24
|
+
export=True,
|
25
|
+
rbln_config={
|
26
|
+
"text_model": {
|
27
|
+
"attn_impl": "flash_attn",
|
28
|
+
"max_seq_len": max_seq_len,
|
29
|
+
"use_inputs_embeds": True,
|
30
|
+
"tensor_parallel_size": tensor_parallel_size,
|
31
|
+
"batch_size": batch_size,
|
32
|
+
}
|
33
|
+
},
|
34
|
+
)
|
35
|
+
model.save_pretrained(os.path.basename(model_id))
|
36
|
+
else:
|
37
|
+
model = RBLNIdefics3ForConditionalGeneration.from_pretrained(
|
38
|
+
os.path.basename(model_id),
|
39
|
+
export=False,
|
40
|
+
)
|
41
|
+
|
42
|
+
ds = load_dataset("HuggingFaceM4/the_cauldron", "ai2d", split="train")
|
43
|
+
samples = ds.select(range(batch_size))
|
44
|
+
images = []
|
45
|
+
prompts = []
|
46
|
+
|
47
|
+
for sample in samples:
|
48
|
+
img = sample["images"]
|
49
|
+
images.append(img)
|
50
|
+
|
51
|
+
message = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": "Describe this image."}]}]
|
52
|
+
prompt = processor.apply_chat_template(message, add_generation_prompt=True)
|
53
|
+
prompts.append(prompt)
|
54
|
+
|
55
|
+
inputs = processor(text=prompts, images=images, return_tensors="pt", padding=True)
|
56
|
+
inputs = dict(inputs)
|
57
|
+
# Generate
|
58
|
+
|
59
|
+
generated_ids = model.generate(**inputs, max_new_tokens=500)
|
60
|
+
generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)
|
61
|
+
|
62
|
+
for i, text in enumerate(generated_texts):
|
63
|
+
print(f"Sample {i + 1} generate:\n{text}\n")
|
64
|
+
|
65
|
+
|
66
|
+
if __name__ == "__main__":
|
67
|
+
fire.Fire(main)
|
@@ -74,6 +74,10 @@ _import_structure = {
|
|
74
74
|
"RBLNGemmaForCausalLMConfig",
|
75
75
|
"RBLNGPT2LMHeadModel",
|
76
76
|
"RBLNGPT2LMHeadModelConfig",
|
77
|
+
"RBLNIdefics3VisionTransformer",
|
78
|
+
"RBLNIdefics3ForConditionalGeneration",
|
79
|
+
"RBLNIdefics3ForConditionalGenerationConfig",
|
80
|
+
"RBLNIdefics3VisionTransformerConfig",
|
77
81
|
"RBLNLlamaForCausalLM",
|
78
82
|
"RBLNLlamaForCausalLMConfig",
|
79
83
|
"RBLNLlavaNextForConditionalGeneration",
|
@@ -86,6 +90,10 @@ _import_structure = {
|
|
86
90
|
"RBLNPhiForCausalLMConfig",
|
87
91
|
"RBLNQwen2ForCausalLM",
|
88
92
|
"RBLNQwen2ForCausalLMConfig",
|
93
|
+
"RBLNQwen2_5_VisionTransformerPretrainedModel",
|
94
|
+
"RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
|
95
|
+
"RBLNQwen2_5_VLForConditionalGeneration",
|
96
|
+
"RBLNQwen2_5_VLForConditionalGenerationConfig",
|
89
97
|
"RBLNResNetForImageClassification",
|
90
98
|
"RBLNResNetForImageClassificationConfig",
|
91
99
|
"RBLNRobertaForMaskedLM",
|
@@ -277,6 +285,10 @@ if TYPE_CHECKING:
|
|
277
285
|
RBLNGemmaForCausalLMConfig,
|
278
286
|
RBLNGPT2LMHeadModel,
|
279
287
|
RBLNGPT2LMHeadModelConfig,
|
288
|
+
RBLNIdefics3ForConditionalGeneration,
|
289
|
+
RBLNIdefics3ForConditionalGenerationConfig,
|
290
|
+
RBLNIdefics3VisionTransformer,
|
291
|
+
RBLNIdefics3VisionTransformerConfig,
|
280
292
|
RBLNLlamaForCausalLM,
|
281
293
|
RBLNLlamaForCausalLMConfig,
|
282
294
|
RBLNLlavaNextForConditionalGeneration,
|
@@ -287,6 +299,10 @@ if TYPE_CHECKING:
|
|
287
299
|
RBLNMistralForCausalLMConfig,
|
288
300
|
RBLNPhiForCausalLM,
|
289
301
|
RBLNPhiForCausalLMConfig,
|
302
|
+
RBLNQwen2_5_VisionTransformerPretrainedModel,
|
303
|
+
RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
|
304
|
+
RBLNQwen2_5_VLForConditionalGeneration,
|
305
|
+
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
290
306
|
RBLNQwen2ForCausalLM,
|
291
307
|
RBLNQwen2ForCausalLMConfig,
|
292
308
|
RBLNResNetForImageClassification,
|
@@ -17,5 +17,5 @@ __version__: str
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
18
18
|
version_tuple: VERSION_TUPLE
|
19
19
|
|
20
|
-
__version__ = version = '0.7.
|
21
|
-
__version_tuple__ = version_tuple = (0, 7, 4)
|
20
|
+
__version__ = version = '0.7.4a7'
|
21
|
+
__version_tuple__ = version_tuple = (0, 7, 4, 'a7')
|
@@ -314,10 +314,15 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
314
314
|
)
|
315
315
|
|
316
316
|
except rebel.core.exception.RBLNRuntimeError as e:
|
317
|
-
|
318
|
-
f"
|
317
|
+
error_msg = (
|
318
|
+
f"\nFailed to create RBLN runtime: {str(e)}\n\n"
|
319
|
+
f"If you only need to compile the model without loading it to NPU, you can use:\n"
|
320
|
+
f" from_pretrained(..., rbln_create_runtimes=False) or\n"
|
321
|
+
f" from_pretrained(..., rbln_config={{..., 'create_runtimes': False}})\n\n"
|
322
|
+
f"To check your NPU status, run the 'rbln-stat' command in your terminal.\n"
|
323
|
+
f"Make sure your NPU is properly installed and operational."
|
319
324
|
)
|
320
|
-
|
325
|
+
raise rebel.core.exception.RBLNRuntimeError(error_msg) from e
|
321
326
|
|
322
327
|
return cls(
|
323
328
|
models,
|
@@ -423,6 +428,20 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
423
428
|
def to(self, *args, **kwargs):
|
424
429
|
return self
|
425
430
|
|
431
|
+
def parameters(self):
|
432
|
+
"""
|
433
|
+
Provides a dummy parameter generator for compatibility.
|
434
|
+
|
435
|
+
This method mimics the interface of torch.nn.Module.parameters()
|
436
|
+
specifically for code that uses `next(model.parameters())` to infer
|
437
|
+
the device or dtype. It yields a single dummy tensor on CPU with float32 dtype.
|
438
|
+
|
439
|
+
Warning:
|
440
|
+
This does NOT yield the actual model parameters used by the RBLN runtime.
|
441
|
+
Code relying on iterating through all model parameters will not work as expected.
|
442
|
+
"""
|
443
|
+
yield torch.tensor([1.0], dtype=torch.float32, device=torch.device("cpu"))
|
444
|
+
|
426
445
|
def __call__(self, *args, **kwargs):
|
427
446
|
return self.forward(*args, **kwargs)
|
428
447
|
|
@@ -68,6 +68,10 @@ _import_structure = {
|
|
68
68
|
"RBLNGemmaForCausalLMConfig",
|
69
69
|
"RBLNGPT2LMHeadModel",
|
70
70
|
"RBLNGPT2LMHeadModelConfig",
|
71
|
+
"RBLNIdefics3VisionTransformer",
|
72
|
+
"RBLNIdefics3ForConditionalGeneration",
|
73
|
+
"RBLNIdefics3ForConditionalGenerationConfig",
|
74
|
+
"RBLNIdefics3VisionTransformerConfig",
|
71
75
|
"RBLNLlamaForCausalLM",
|
72
76
|
"RBLNLlamaForCausalLMConfig",
|
73
77
|
"RBLNLlavaNextForConditionalGeneration",
|
@@ -80,6 +84,10 @@ _import_structure = {
|
|
80
84
|
"RBLNPhiForCausalLMConfig",
|
81
85
|
"RBLNQwen2ForCausalLM",
|
82
86
|
"RBLNQwen2ForCausalLMConfig",
|
87
|
+
"RBLNQwen2_5_VisionTransformerPretrainedModel",
|
88
|
+
"RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
|
89
|
+
"RBLNQwen2_5_VLForConditionalGeneration",
|
90
|
+
"RBLNQwen2_5_VLForConditionalGenerationConfig",
|
83
91
|
"RBLNT5EncoderModel",
|
84
92
|
"RBLNT5EncoderModelConfig",
|
85
93
|
"RBLNT5ForConditionalGeneration",
|
@@ -165,6 +173,10 @@ if TYPE_CHECKING:
|
|
165
173
|
RBLNGemmaForCausalLMConfig,
|
166
174
|
RBLNGPT2LMHeadModel,
|
167
175
|
RBLNGPT2LMHeadModelConfig,
|
176
|
+
RBLNIdefics3ForConditionalGeneration,
|
177
|
+
RBLNIdefics3ForConditionalGenerationConfig,
|
178
|
+
RBLNIdefics3VisionTransformer,
|
179
|
+
RBLNIdefics3VisionTransformerConfig,
|
168
180
|
RBLNLlamaForCausalLM,
|
169
181
|
RBLNLlamaForCausalLMConfig,
|
170
182
|
RBLNLlavaNextForConditionalGeneration,
|
@@ -175,6 +187,10 @@ if TYPE_CHECKING:
|
|
175
187
|
RBLNMistralForCausalLMConfig,
|
176
188
|
RBLNPhiForCausalLM,
|
177
189
|
RBLNPhiForCausalLMConfig,
|
190
|
+
RBLNQwen2_5_VisionTransformerPretrainedModel,
|
191
|
+
RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
|
192
|
+
RBLNQwen2_5_VLForConditionalGeneration,
|
193
|
+
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
178
194
|
RBLNQwen2ForCausalLM,
|
179
195
|
RBLNQwen2ForCausalLMConfig,
|
180
196
|
RBLNT5EncoderModel,
|
{optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a7}/src/optimum/rbln/transformers/models/__init__.py
RENAMED
@@ -56,6 +56,12 @@ _import_structure = {
|
|
56
56
|
"RBLNCLIPVisionModelWithProjection",
|
57
57
|
"RBLNCLIPVisionModelWithProjectionConfig",
|
58
58
|
],
|
59
|
+
"qwen2_5_vl": [
|
60
|
+
"RBLNQwen2_5_VisionTransformerPretrainedModel",
|
61
|
+
"RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
|
62
|
+
"RBLNQwen2_5_VLForConditionalGeneration",
|
63
|
+
"RBLNQwen2_5_VLForConditionalGenerationConfig",
|
64
|
+
],
|
59
65
|
"decoderonly": [
|
60
66
|
"RBLNDecoderOnlyModelForCausalLM",
|
61
67
|
"RBLNDecoderOnlyModelForCausalLMConfig",
|
@@ -67,6 +73,12 @@ _import_structure = {
|
|
67
73
|
"exaone": ["RBLNExaoneForCausalLM", "RBLNExaoneForCausalLMConfig"],
|
68
74
|
"gemma": ["RBLNGemmaForCausalLM", "RBLNGemmaForCausalLMConfig"],
|
69
75
|
"gpt2": ["RBLNGPT2LMHeadModel", "RBLNGPT2LMHeadModelConfig"],
|
76
|
+
"idefics3": [
|
77
|
+
"RBLNIdefics3VisionTransformer",
|
78
|
+
"RBLNIdefics3ForConditionalGeneration",
|
79
|
+
"RBLNIdefics3ForConditionalGenerationConfig",
|
80
|
+
"RBLNIdefics3VisionTransformerConfig",
|
81
|
+
],
|
70
82
|
"llama": ["RBLNLlamaForCausalLM", "RBLNLlamaForCausalLMConfig"],
|
71
83
|
"llava_next": ["RBLNLlavaNextForConditionalGeneration", "RBLNLlavaNextForConditionalGenerationConfig"],
|
72
84
|
"midm": ["RBLNMidmLMHeadModel", "RBLNMidmLMHeadModelConfig"],
|
@@ -138,12 +150,24 @@ if TYPE_CHECKING:
|
|
138
150
|
from .exaone import RBLNExaoneForCausalLM, RBLNExaoneForCausalLMConfig
|
139
151
|
from .gemma import RBLNGemmaForCausalLM, RBLNGemmaForCausalLMConfig
|
140
152
|
from .gpt2 import RBLNGPT2LMHeadModel, RBLNGPT2LMHeadModelConfig
|
153
|
+
from .idefics3 import (
|
154
|
+
RBLNIdefics3ForConditionalGeneration,
|
155
|
+
RBLNIdefics3ForConditionalGenerationConfig,
|
156
|
+
RBLNIdefics3VisionTransformer,
|
157
|
+
RBLNIdefics3VisionTransformerConfig,
|
158
|
+
)
|
141
159
|
from .llama import RBLNLlamaForCausalLM, RBLNLlamaForCausalLMConfig
|
142
160
|
from .llava_next import RBLNLlavaNextForConditionalGeneration, RBLNLlavaNextForConditionalGenerationConfig
|
143
161
|
from .midm import RBLNMidmLMHeadModel, RBLNMidmLMHeadModelConfig
|
144
162
|
from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig
|
145
163
|
from .phi import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig
|
146
164
|
from .qwen2 import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig
|
165
|
+
from .qwen2_5_vl import (
|
166
|
+
RBLNQwen2_5_VisionTransformerPretrainedModel,
|
167
|
+
RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
|
168
|
+
RBLNQwen2_5_VLForConditionalGeneration,
|
169
|
+
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
170
|
+
)
|
147
171
|
from .t5 import (
|
148
172
|
RBLNT5EncoderModel,
|
149
173
|
RBLNT5EncoderModelConfig,
|
@@ -13,7 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
import math
|
16
|
-
from typing import List, Optional, Tuple
|
16
|
+
from typing import List, Optional, Tuple, Union
|
17
17
|
|
18
18
|
import torch
|
19
19
|
from torch import nn
|
@@ -184,6 +184,7 @@ class DecoderOnlyWrapper(nn.Module):
|
|
184
184
|
|
185
185
|
def convert_to_rbln_causal_lm(self, causal_lm: PreTrainedModel, max_seq_len: int):
|
186
186
|
new_layers = []
|
187
|
+
|
187
188
|
for layer in causal_lm.model.layers:
|
188
189
|
if self.attn_impl == "eager":
|
189
190
|
new_self_attn = DecoderOnlyAttention(
|
@@ -201,6 +202,7 @@ class DecoderOnlyWrapper(nn.Module):
|
|
201
202
|
|
202
203
|
new_layer = DecoderOnlyLayer(layer, new_self_attn)
|
203
204
|
new_layers.append(new_layer)
|
205
|
+
|
204
206
|
new_model = DecoderOnlyModel(
|
205
207
|
causal_lm.model,
|
206
208
|
new_layers,
|
@@ -220,6 +222,53 @@ class DecoderOnlyWrapper(nn.Module):
|
|
220
222
|
self._phase = phase
|
221
223
|
self.causal_lm.phase = phase
|
222
224
|
|
225
|
+
def forward_common(
|
226
|
+
self,
|
227
|
+
input_ids_or_inputs_embeds: torch.Tensor,
|
228
|
+
cache_position: torch.Tensor,
|
229
|
+
attention_mask: torch.Tensor,
|
230
|
+
query_position: torch.Tensor,
|
231
|
+
block_tables: torch.Tensor,
|
232
|
+
rotary_emb: Union[nn.Module, torch.Tensor],
|
233
|
+
*past_key_values: List[torch.Tensor],
|
234
|
+
):
|
235
|
+
if input_ids_or_inputs_embeds.ndim == 2:
|
236
|
+
input_ids = input_ids_or_inputs_embeds
|
237
|
+
inputs_embeds = None
|
238
|
+
elif input_ids_or_inputs_embeds.ndim == 3:
|
239
|
+
input_ids = None
|
240
|
+
inputs_embeds = input_ids_or_inputs_embeds
|
241
|
+
else:
|
242
|
+
raise NotImplementedError(f"Unknown ndim of input : {input_ids_or_inputs_embeds.ndim}")
|
243
|
+
|
244
|
+
if len(past_key_values) != 2 * self.num_hidden_layers:
|
245
|
+
raise ValueError(
|
246
|
+
f"Different past_key_values to model's config. {len(past_key_values)} != {2 * self.num_hidden_layers}"
|
247
|
+
)
|
248
|
+
|
249
|
+
# [key, value] * n_layer -> ( (key, value) ) * n_layer
|
250
|
+
# cache shape : batch, n_heads, 1, max_seq_len, head_dim
|
251
|
+
_past_key_values = []
|
252
|
+
for i in range(self.config.num_hidden_layers):
|
253
|
+
key_states = past_key_values[i * 2]
|
254
|
+
value_states = past_key_values[i * 2 + 1]
|
255
|
+
past_key_value = [key_states, value_states]
|
256
|
+
_past_key_values.append(past_key_value)
|
257
|
+
past_key_values = _past_key_values
|
258
|
+
|
259
|
+
logit = self.causal_lm(
|
260
|
+
input_ids=input_ids,
|
261
|
+
inputs_embeds=inputs_embeds,
|
262
|
+
attention_mask=attention_mask,
|
263
|
+
cache_position=cache_position,
|
264
|
+
query_position=query_position,
|
265
|
+
past_key_values=past_key_values,
|
266
|
+
rotary_emb=rotary_emb,
|
267
|
+
block_tables=block_tables,
|
268
|
+
)
|
269
|
+
|
270
|
+
return logit
|
271
|
+
|
223
272
|
def forward(self, *args):
|
224
273
|
if self.phase == "decode":
|
225
274
|
if self.use_attention_mask:
|
@@ -262,43 +311,16 @@ class DecoderOnlyWrapper(nn.Module):
|
|
262
311
|
else:
|
263
312
|
raise ValueError(f"Unknown phase: {self.phase}")
|
264
313
|
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
if len(past_key_values) != 2 * self.num_hidden_layers:
|
275
|
-
raise ValueError(
|
276
|
-
f"Different past_key_values to model's config. {len(past_key_values)} != {2 * self.num_hidden_layers}"
|
277
|
-
)
|
278
|
-
|
279
|
-
# [key, value] * n_layer -> ( (key, value) ) * n_layer
|
280
|
-
# cache shape : batch, n_heads, 1, max_seq_len, head_dim
|
281
|
-
_past_key_values = []
|
282
|
-
for i in range(self.config.num_hidden_layers):
|
283
|
-
key_states = past_key_values[i * 2]
|
284
|
-
value_states = past_key_values[i * 2 + 1]
|
285
|
-
past_key_value = [key_states, value_states]
|
286
|
-
_past_key_values.append(past_key_value)
|
287
|
-
past_key_values = _past_key_values
|
288
|
-
|
289
|
-
logit = self.causal_lm(
|
290
|
-
input_ids=input_ids,
|
291
|
-
inputs_embeds=inputs_embeds,
|
292
|
-
attention_mask=attention_mask,
|
293
|
-
cache_position=cache_position,
|
294
|
-
query_position=query_position,
|
295
|
-
past_key_values=past_key_values,
|
296
|
-
rotary_emb=self.rotary_emb,
|
297
|
-
block_tables=block_tables,
|
314
|
+
return self.forward_common(
|
315
|
+
input_ids_or_inputs_embeds,
|
316
|
+
cache_position,
|
317
|
+
attention_mask,
|
318
|
+
query_position,
|
319
|
+
block_tables,
|
320
|
+
self.rotary_emb,
|
321
|
+
*past_key_values,
|
298
322
|
)
|
299
323
|
|
300
|
-
return logit
|
301
|
-
|
302
324
|
|
303
325
|
class DecoderOnlyForCausalLM(nn.Module):
|
304
326
|
"""A specialized wrapper for Causal Language Models optimized for RBLN compilation.
|
@@ -322,12 +344,13 @@ class DecoderOnlyForCausalLM(nn.Module):
|
|
322
344
|
_phase: Current processing phase ("prefill" or "decode")
|
323
345
|
"""
|
324
346
|
|
325
|
-
def __init__(self, causal_lm: PreTrainedModel, model):
|
347
|
+
def __init__(self, causal_lm: PreTrainedModel, model: nn.Module):
|
326
348
|
super().__init__()
|
327
349
|
self.config = causal_lm.config
|
328
350
|
self._original_mod = causal_lm
|
329
351
|
self.model = model
|
330
352
|
self._phase = "prefill"
|
353
|
+
self.lm_head = self._original_mod.lm_head
|
331
354
|
|
332
355
|
@property
|
333
356
|
def phase(self):
|
@@ -363,7 +386,7 @@ class DecoderOnlyForCausalLM(nn.Module):
|
|
363
386
|
if self.phase == "prefill":
|
364
387
|
hidden_states = hidden_states[:, query_position.to(torch.int).unsqueeze(0)]
|
365
388
|
|
366
|
-
logits = self.
|
389
|
+
logits = self.lm_head(hidden_states)
|
367
390
|
return logits
|
368
391
|
|
369
392
|
|
@@ -455,8 +478,12 @@ class DecoderOnlyModel(nn.Module):
|
|
455
478
|
|
456
479
|
# get cos,sin vector if needed
|
457
480
|
if rotary_emb is not None:
|
458
|
-
|
459
|
-
|
481
|
+
if isinstance(rotary_emb, torch.Tensor):
|
482
|
+
cos = rotary_emb[0]
|
483
|
+
sin = rotary_emb[1]
|
484
|
+
else:
|
485
|
+
cos, sin = rotary_emb(hidden_states, self.max_seq_len) # dtype carrier, max_seq_len
|
486
|
+
cos, sin = slice_and_unsqueeze_cos_sin(cos, sin, cache_position)
|
460
487
|
else:
|
461
488
|
batch_size = inputs_embeds.shape[0]
|
462
489
|
if cache_position.shape[0] > 1:
|
@@ -833,7 +860,6 @@ def rotate_half(x):
|
|
833
860
|
|
834
861
|
def apply_rotary_pos_emb(q, k, cos, sin):
|
835
862
|
"""Applies Rotary Position Embedding to the query and key tensors."""
|
836
|
-
|
837
863
|
q_embed = (q * cos) + (rotate_half(q) * sin)
|
838
864
|
k_embed = (k * cos) + (rotate_half(k) * sin)
|
839
865
|
return q_embed, k_embed
|