optimum-rbln 0.7.4a5__tar.gz → 0.7.4a6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/PKG-INFO +1 -1
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/__init__.py +8 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/__version__.py +2 -2
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/modeling_base.py +22 -3
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/__init__.py +8 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/__init__.py +12 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +65 -41
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +192 -99
- optimum_rbln-0.7.4a6/src/optimum/rbln/transformers/models/qwen2_5_vl/__init__.py +19 -0
- optimum_rbln-0.7.4a6/src/optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +68 -0
- optimum_rbln-0.7.4a6/src/optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +608 -0
- optimum_rbln-0.7.4a6/src/optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +214 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/utils/runtime_utils.py +33 -2
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/tests/test_llm.py +36 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/tests/test_transformers.py +9 -1
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/ISSUE_TEMPLATE/model_request.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/pull_request_template.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/scripts/auto_code_review.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/scripts/validate_pr_checklist.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/version.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/workflows/auto_code_review.yml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/workflows/check_code_quality.yml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/workflows/deploy-on-tag.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/workflows/deploy.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/workflows/pr-title-check.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/workflows/pr_checklist_validator.yml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/workflows/rbln_check_compiler.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/workflows/rbln_dispatch_pytest.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/workflows/rbln_optimum_inference_test.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/workflows/rbln_optimum_pytest.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/workflows/rbln_scheduled_test.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.github/workflows/rbln_trigger_on_pr.yaml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/.gitignore +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/CODE_OF_CONDUCT.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/CONTRIBUTING.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/LICENSE +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/README.md +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/assets/rbln_logo.png +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/audio-classification/run_ast_audio_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/depth-estimation/run_dpt.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/image-classification/run_image_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/image-classification/run_vit_image_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/image-to-text/run_llava_next_image_to_text.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/kandinsky2_2/run_kandinsky2_2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/kandinsky2_2/run_kandinsky2_2_combined.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/kandinsky2_2/run_kandinsky2_2_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/kandinsky2_2/run_kandinsky2_2_img2img_combined.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/kandinsky2_2/run_kandinsky2_2_inpaint.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/kandinsky2_2/run_kandinsky2_2_inpaint_combined.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/kandinsky2_2/run_kandinsky2_2_prior_interpolate.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/question-answering/run_question_answering.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/speech-recognition/run_wav2vec2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/speech-recognition/run_whisper.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/stable-diffusion/run_stable_diffusion.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/stable-diffusion/run_stable_diffusion_controlnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/stable-diffusion/run_stable_diffusion_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/stable-diffusion/run_stable_diffusion_img2img_controlnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/stable-diffusion/run_stable_diffusion_inpaint.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/stable-diffusion/run_stable_diffusion_lora.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/stable-diffusion/run_stable_diffusion_multicontrolnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/text-classification/run_bge_m3_text_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/text-classification/run_bge_reranker_v2_m3_text_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/text-classification/run_secureBERT.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/text-classification/run_t5_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/text-classification/run_twitter_roberta_text_classification.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/text2text-generation/run_bart_text2text_generation.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/text2text-generation/run_llama_peft.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/text2text-generation/run_llama_text2text_generation.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/examples/time-series-forecasting/run_time_series_forecasting.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/pyproject.toml +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/scripts/uv-lock.sh +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/scripts/uv-sync.sh +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/configuration_utils.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/models/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/pipelines/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/modeling_diffusers.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/models/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/models/autoencoders/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/models/autoencoders/vae.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/models/autoencoders/vq_model.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/models/controlnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/models/transformers/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/models/transformers/prior_transformer.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/models/unets/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/controlnet/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/modeling.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/ops/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/ops/attn.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/ops/flash_attn.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/ops/kv_cache_update.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/ops/linear.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/configuration_alias.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/configuration_generic.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/modeling_alias.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/modeling_generic.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/modeling_rope_utils.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/auto/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/auto/auto_factory.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/auto/modeling_auto.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/bart/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/bart/bart_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/bart/configuration_bart.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/bart/modeling_bart.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/bert/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/bert/configuration_bert.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/bert/modeling_bert.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/clip/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/clip/configuration_clip.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/clip/modeling_clip.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/decoderonly/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/dpt/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/dpt/configuration_dpt.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/dpt/modeling_dpt.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/exaone/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/exaone/configuration_exaone.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/exaone/modeling_exaone.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/gemma/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/gemma/configuration_gemma.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/gemma/gemma_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/gemma/modeling_gemma.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/gpt2/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/llama/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/llama/configuration_llama.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/llama/llama_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/llama/modeling_llama.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/llava_next/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/midm/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/midm/configuration_midm.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/midm/midm_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/midm/modeling_midm.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/mistral/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/mistral/configuration_mistral.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/mistral/mistral_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/mistral/modeling_mistral.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/phi/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/phi/configuration_phi.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/phi/modeling_phi.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/phi/phi_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/qwen2/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/seq2seq/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/seq2seq/configuration_seq2seq2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/t5/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/t5/configuration_t5.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/t5/modeling_t5.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/t5/t5_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/time_series_transformers/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/time_series_transformers/configuration_time_series_transformer.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/time_series_transformers/modeling_time_series_transformers.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/time_series_transformers/time_series_transformers_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/wav2vec2/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/whisper/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/whisper/configuration_whisper.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/whisper/generation_whisper.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/xlm_roberta/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/utils/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/utils/rbln_quantization.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/utils/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/utils/decorator_utils.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/utils/hub.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/utils/import_utils.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/utils/logging.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/utils/model_utils.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/utils/save_utils.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/utils/submodule.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/tests/__init__.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/tests/psnr.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/tests/requirements_sdxl.txt +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/tests/run_stable_diffusion_xl_base.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/tests/test_base.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/tests/test_config.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/tests/test_diffusers.py +0 -0
- {optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/uv.lock +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: optimum-rbln
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.4a6
|
4
4
|
Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
|
5
5
|
Project-URL: Homepage, https://rebellions.ai
|
6
6
|
Project-URL: Documentation, https://docs.rbln.ai
|
@@ -86,6 +86,10 @@ _import_structure = {
|
|
86
86
|
"RBLNPhiForCausalLMConfig",
|
87
87
|
"RBLNQwen2ForCausalLM",
|
88
88
|
"RBLNQwen2ForCausalLMConfig",
|
89
|
+
"RBLNQwen2_5_VisionTransformerPretrainedModel",
|
90
|
+
"RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
|
91
|
+
"RBLNQwen2_5_VLForConditionalGeneration",
|
92
|
+
"RBLNQwen2_5_VLForConditionalGenerationConfig",
|
89
93
|
"RBLNResNetForImageClassification",
|
90
94
|
"RBLNResNetForImageClassificationConfig",
|
91
95
|
"RBLNRobertaForMaskedLM",
|
@@ -287,6 +291,10 @@ if TYPE_CHECKING:
|
|
287
291
|
RBLNMistralForCausalLMConfig,
|
288
292
|
RBLNPhiForCausalLM,
|
289
293
|
RBLNPhiForCausalLMConfig,
|
294
|
+
RBLNQwen2_5_VisionTransformerPretrainedModel,
|
295
|
+
RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
|
296
|
+
RBLNQwen2_5_VLForConditionalGeneration,
|
297
|
+
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
290
298
|
RBLNQwen2ForCausalLM,
|
291
299
|
RBLNQwen2ForCausalLMConfig,
|
292
300
|
RBLNResNetForImageClassification,
|
@@ -17,5 +17,5 @@ __version__: str
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
18
18
|
version_tuple: VERSION_TUPLE
|
19
19
|
|
20
|
-
__version__ = version = '0.7.
|
21
|
-
__version_tuple__ = version_tuple = (0, 7, 4)
|
20
|
+
__version__ = version = '0.7.4a6'
|
21
|
+
__version_tuple__ = version_tuple = (0, 7, 4, 'a6')
|
@@ -314,10 +314,15 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
314
314
|
)
|
315
315
|
|
316
316
|
except rebel.core.exception.RBLNRuntimeError as e:
|
317
|
-
|
318
|
-
f"
|
317
|
+
error_msg = (
|
318
|
+
f"\nFailed to create RBLN runtime: {str(e)}\n\n"
|
319
|
+
f"If you only need to compile the model without loading it to NPU, you can use:\n"
|
320
|
+
f" from_pretrained(..., rbln_create_runtimes=False) or\n"
|
321
|
+
f" from_pretrained(..., rbln_config={{..., 'create_runtimes': False}})\n\n"
|
322
|
+
f"To check your NPU status, run the 'rbln-stat' command in your terminal.\n"
|
323
|
+
f"Make sure your NPU is properly installed and operational."
|
319
324
|
)
|
320
|
-
|
325
|
+
raise rebel.core.exception.RBLNRuntimeError(error_msg) from e
|
321
326
|
|
322
327
|
return cls(
|
323
328
|
models,
|
@@ -423,6 +428,20 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
423
428
|
def to(self, *args, **kwargs):
|
424
429
|
return self
|
425
430
|
|
431
|
+
def parameters(self):
|
432
|
+
"""
|
433
|
+
Provides a dummy parameter generator for compatibility.
|
434
|
+
|
435
|
+
This method mimics the interface of torch.nn.Module.parameters()
|
436
|
+
specifically for code that uses `next(model.parameters())` to infer
|
437
|
+
the device or dtype. It yields a single dummy tensor on CPU with float32 dtype.
|
438
|
+
|
439
|
+
Warning:
|
440
|
+
This does NOT yield the actual model parameters used by the RBLN runtime.
|
441
|
+
Code relying on iterating through all model parameters will not work as expected.
|
442
|
+
"""
|
443
|
+
yield torch.tensor([1.0], dtype=torch.float32, device=torch.device("cpu"))
|
444
|
+
|
426
445
|
def __call__(self, *args, **kwargs):
|
427
446
|
return self.forward(*args, **kwargs)
|
428
447
|
|
@@ -80,6 +80,10 @@ _import_structure = {
|
|
80
80
|
"RBLNPhiForCausalLMConfig",
|
81
81
|
"RBLNQwen2ForCausalLM",
|
82
82
|
"RBLNQwen2ForCausalLMConfig",
|
83
|
+
"RBLNQwen2_5_VisionTransformerPretrainedModel",
|
84
|
+
"RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
|
85
|
+
"RBLNQwen2_5_VLForConditionalGeneration",
|
86
|
+
"RBLNQwen2_5_VLForConditionalGenerationConfig",
|
83
87
|
"RBLNT5EncoderModel",
|
84
88
|
"RBLNT5EncoderModelConfig",
|
85
89
|
"RBLNT5ForConditionalGeneration",
|
@@ -175,6 +179,10 @@ if TYPE_CHECKING:
|
|
175
179
|
RBLNMistralForCausalLMConfig,
|
176
180
|
RBLNPhiForCausalLM,
|
177
181
|
RBLNPhiForCausalLMConfig,
|
182
|
+
RBLNQwen2_5_VisionTransformerPretrainedModel,
|
183
|
+
RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
|
184
|
+
RBLNQwen2_5_VLForConditionalGeneration,
|
185
|
+
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
178
186
|
RBLNQwen2ForCausalLM,
|
179
187
|
RBLNQwen2ForCausalLMConfig,
|
180
188
|
RBLNT5EncoderModel,
|
{optimum_rbln-0.7.4a5 → optimum_rbln-0.7.4a6}/src/optimum/rbln/transformers/models/__init__.py
RENAMED
@@ -56,6 +56,12 @@ _import_structure = {
|
|
56
56
|
"RBLNCLIPVisionModelWithProjection",
|
57
57
|
"RBLNCLIPVisionModelWithProjectionConfig",
|
58
58
|
],
|
59
|
+
"qwen2_5_vl": [
|
60
|
+
"RBLNQwen2_5_VisionTransformerPretrainedModel",
|
61
|
+
"RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
|
62
|
+
"RBLNQwen2_5_VLForConditionalGeneration",
|
63
|
+
"RBLNQwen2_5_VLForConditionalGenerationConfig",
|
64
|
+
],
|
59
65
|
"decoderonly": [
|
60
66
|
"RBLNDecoderOnlyModelForCausalLM",
|
61
67
|
"RBLNDecoderOnlyModelForCausalLMConfig",
|
@@ -144,6 +150,12 @@ if TYPE_CHECKING:
|
|
144
150
|
from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig
|
145
151
|
from .phi import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig
|
146
152
|
from .qwen2 import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig
|
153
|
+
from .qwen2_5_vl import (
|
154
|
+
RBLNQwen2_5_VisionTransformerPretrainedModel,
|
155
|
+
RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
|
156
|
+
RBLNQwen2_5_VLForConditionalGeneration,
|
157
|
+
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
158
|
+
)
|
147
159
|
from .t5 import (
|
148
160
|
RBLNT5EncoderModel,
|
149
161
|
RBLNT5EncoderModelConfig,
|
@@ -13,7 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
import math
|
16
|
-
from typing import List, Optional, Tuple
|
16
|
+
from typing import List, Optional, Tuple, Union
|
17
17
|
|
18
18
|
import torch
|
19
19
|
from torch import nn
|
@@ -220,6 +220,53 @@ class DecoderOnlyWrapper(nn.Module):
|
|
220
220
|
self._phase = phase
|
221
221
|
self.causal_lm.phase = phase
|
222
222
|
|
223
|
+
def forward_common(
|
224
|
+
self,
|
225
|
+
input_ids_or_inputs_embeds: torch.Tensor,
|
226
|
+
cache_position: torch.Tensor,
|
227
|
+
attention_mask: torch.Tensor,
|
228
|
+
query_position: torch.Tensor,
|
229
|
+
block_tables: torch.Tensor,
|
230
|
+
rotary_emb: Union[nn.Module, torch.Tensor],
|
231
|
+
*past_key_values: List[torch.Tensor],
|
232
|
+
):
|
233
|
+
if input_ids_or_inputs_embeds.ndim == 2:
|
234
|
+
input_ids = input_ids_or_inputs_embeds
|
235
|
+
inputs_embeds = None
|
236
|
+
elif input_ids_or_inputs_embeds.ndim == 3:
|
237
|
+
input_ids = None
|
238
|
+
inputs_embeds = input_ids_or_inputs_embeds
|
239
|
+
else:
|
240
|
+
raise NotImplementedError(f"Unknown ndim of input : {input_ids_or_inputs_embeds.ndim}")
|
241
|
+
|
242
|
+
if len(past_key_values) != 2 * self.num_hidden_layers:
|
243
|
+
raise ValueError(
|
244
|
+
f"Different past_key_values to model's config. {len(past_key_values)} != {2 * self.num_hidden_layers}"
|
245
|
+
)
|
246
|
+
|
247
|
+
# [key, value] * n_layer -> ( (key, value) ) * n_layer
|
248
|
+
# cache shape : batch, n_heads, 1, max_seq_len, head_dim
|
249
|
+
_past_key_values = []
|
250
|
+
for i in range(self.config.num_hidden_layers):
|
251
|
+
key_states = past_key_values[i * 2]
|
252
|
+
value_states = past_key_values[i * 2 + 1]
|
253
|
+
past_key_value = [key_states, value_states]
|
254
|
+
_past_key_values.append(past_key_value)
|
255
|
+
past_key_values = _past_key_values
|
256
|
+
|
257
|
+
logit = self.causal_lm(
|
258
|
+
input_ids=input_ids,
|
259
|
+
inputs_embeds=inputs_embeds,
|
260
|
+
attention_mask=attention_mask,
|
261
|
+
cache_position=cache_position,
|
262
|
+
query_position=query_position,
|
263
|
+
past_key_values=past_key_values,
|
264
|
+
rotary_emb=rotary_emb,
|
265
|
+
block_tables=block_tables,
|
266
|
+
)
|
267
|
+
|
268
|
+
return logit
|
269
|
+
|
223
270
|
def forward(self, *args):
|
224
271
|
if self.phase == "decode":
|
225
272
|
if self.use_attention_mask:
|
@@ -262,43 +309,16 @@ class DecoderOnlyWrapper(nn.Module):
|
|
262
309
|
else:
|
263
310
|
raise ValueError(f"Unknown phase: {self.phase}")
|
264
311
|
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
if len(past_key_values) != 2 * self.num_hidden_layers:
|
275
|
-
raise ValueError(
|
276
|
-
f"Different past_key_values to model's config. {len(past_key_values)} != {2 * self.num_hidden_layers}"
|
277
|
-
)
|
278
|
-
|
279
|
-
# [key, value] * n_layer -> ( (key, value) ) * n_layer
|
280
|
-
# cache shape : batch, n_heads, 1, max_seq_len, head_dim
|
281
|
-
_past_key_values = []
|
282
|
-
for i in range(self.config.num_hidden_layers):
|
283
|
-
key_states = past_key_values[i * 2]
|
284
|
-
value_states = past_key_values[i * 2 + 1]
|
285
|
-
past_key_value = [key_states, value_states]
|
286
|
-
_past_key_values.append(past_key_value)
|
287
|
-
past_key_values = _past_key_values
|
288
|
-
|
289
|
-
logit = self.causal_lm(
|
290
|
-
input_ids=input_ids,
|
291
|
-
inputs_embeds=inputs_embeds,
|
292
|
-
attention_mask=attention_mask,
|
293
|
-
cache_position=cache_position,
|
294
|
-
query_position=query_position,
|
295
|
-
past_key_values=past_key_values,
|
296
|
-
rotary_emb=self.rotary_emb,
|
297
|
-
block_tables=block_tables,
|
312
|
+
return self.forward_common(
|
313
|
+
input_ids_or_inputs_embeds,
|
314
|
+
cache_position,
|
315
|
+
attention_mask,
|
316
|
+
query_position,
|
317
|
+
block_tables,
|
318
|
+
self.rotary_emb,
|
319
|
+
*past_key_values,
|
298
320
|
)
|
299
321
|
|
300
|
-
return logit
|
301
|
-
|
302
322
|
|
303
323
|
class DecoderOnlyForCausalLM(nn.Module):
|
304
324
|
"""A specialized wrapper for Causal Language Models optimized for RBLN compilation.
|
@@ -322,12 +342,13 @@ class DecoderOnlyForCausalLM(nn.Module):
|
|
322
342
|
_phase: Current processing phase ("prefill" or "decode")
|
323
343
|
"""
|
324
344
|
|
325
|
-
def __init__(self, causal_lm: PreTrainedModel, model):
|
345
|
+
def __init__(self, causal_lm: PreTrainedModel, model: nn.Module):
|
326
346
|
super().__init__()
|
327
347
|
self.config = causal_lm.config
|
328
348
|
self._original_mod = causal_lm
|
329
349
|
self.model = model
|
330
350
|
self._phase = "prefill"
|
351
|
+
self.lm_head = self._original_mod.lm_head
|
331
352
|
|
332
353
|
@property
|
333
354
|
def phase(self):
|
@@ -363,7 +384,7 @@ class DecoderOnlyForCausalLM(nn.Module):
|
|
363
384
|
if self.phase == "prefill":
|
364
385
|
hidden_states = hidden_states[:, query_position.to(torch.int).unsqueeze(0)]
|
365
386
|
|
366
|
-
logits = self.
|
387
|
+
logits = self.lm_head(hidden_states)
|
367
388
|
return logits
|
368
389
|
|
369
390
|
|
@@ -455,8 +476,12 @@ class DecoderOnlyModel(nn.Module):
|
|
455
476
|
|
456
477
|
# get cos,sin vector if needed
|
457
478
|
if rotary_emb is not None:
|
458
|
-
|
459
|
-
|
479
|
+
if isinstance(rotary_emb, torch.Tensor):
|
480
|
+
cos = rotary_emb[0]
|
481
|
+
sin = rotary_emb[1]
|
482
|
+
else:
|
483
|
+
cos, sin = rotary_emb(hidden_states, self.max_seq_len) # dtype carrier, max_seq_len
|
484
|
+
cos, sin = slice_and_unsqueeze_cos_sin(cos, sin, cache_position)
|
460
485
|
else:
|
461
486
|
batch_size = inputs_embeds.shape[0]
|
462
487
|
if cache_position.shape[0] > 1:
|
@@ -833,7 +858,6 @@ def rotate_half(x):
|
|
833
858
|
|
834
859
|
def apply_rotary_pos_emb(q, k, cos, sin):
|
835
860
|
"""Applies Rotary Position Embedding to the query and key tensors."""
|
836
|
-
|
837
861
|
q_embed = (q * cos) + (rotate_half(q) * sin)
|
838
862
|
k_embed = (k * cos) + (rotate_half(k) * sin)
|
839
863
|
return q_embed, k_embed
|