optimum-rbln 0.8.1a1__tar.gz → 0.8.1a2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum_rbln-0.8.1a2/.github/version.yaml +1 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/PKG-INFO +2 -2
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/pyproject.toml +1 -1
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/__version__.py +2 -2
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/modeling_rope_utils.py +5 -2
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/auto/auto_factory.py +5 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/bart/bart_architecture.py +14 -1
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/exaone/modeling_exaone.py +15 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +18 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +0 -1
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/midm/modeling_midm.py +15 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +11 -1
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +43 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +41 -3
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/t5/t5_architecture.py +5 -1
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/tests/test_transformers.py +1 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/uv.lock +973 -960
- optimum_rbln-0.8.1a1/.github/version.yaml +0 -1
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/ISSUE_TEMPLATE/model_request.md +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/pull_request_template.md +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/scripts/auto_code_review.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/scripts/validate_pr_checklist.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/workflows/auto_code_review.yml +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/workflows/check_code_quality.yml +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/workflows/deploy-on-tag.yaml +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/workflows/deploy.yaml +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/workflows/pr-title-check.yaml +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/workflows/pr_checklist_validator.yml +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/workflows/rbln_check_compiler.yaml +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/workflows/rbln_dispatch_pytest.yaml +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/workflows/rbln_optimum_inference_test.yaml +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/workflows/rbln_optimum_pytest.yaml +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/workflows/rbln_scheduled_test.yaml +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.github/workflows/rbln_trigger_on_pr.yaml +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/.gitignore +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/CODE_OF_CONDUCT.md +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/CONTRIBUTING.md +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/LICENSE +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/README.md +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/assets/rbln_logo.png +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/advanced/custom_class.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/audio-classification/run_ast_audio_classification.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/depth-estimation/run_dpt.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/image-classification/run_image_classification.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/image-classification/run_vit_image_classification.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/image-to-text/run_idefics3.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/image-to-text/run_llava_next_image_to_text.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/kandinsky2_2/run_kandinsky2_2.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/kandinsky2_2/run_kandinsky2_2_combined.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/kandinsky2_2/run_kandinsky2_2_img2img.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/kandinsky2_2/run_kandinsky2_2_img2img_combined.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/kandinsky2_2/run_kandinsky2_2_inpaint.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/kandinsky2_2/run_kandinsky2_2_inpaint_combined.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/kandinsky2_2/run_kandinsky2_2_prior_interpolate.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/question-answering/run_question_answering.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/speech-recognition/run_wav2vec2.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/speech-recognition/run_whisper.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/stable-diffusion/run_stable_diffusion.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/stable-diffusion/run_stable_diffusion_controlnet.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/stable-diffusion/run_stable_diffusion_img2img.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/stable-diffusion/run_stable_diffusion_img2img_controlnet.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/stable-diffusion/run_stable_diffusion_inpaint.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/stable-diffusion/run_stable_diffusion_lora.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/stable-diffusion/run_stable_diffusion_multicontrolnet.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/text-classification/run_bge_m3_text_classification.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/text-classification/run_bge_reranker_v2_m3_text_classification.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/text-classification/run_secureBERT.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/text-classification/run_t5_classification.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/text-classification/run_twitter_roberta_text_classification.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/text2text-generation/run_bart_text2text_generation.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/text2text-generation/run_llama_peft.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/text2text-generation/run_llama_text2text_generation.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/examples/time-series-forecasting/run_time_series_forecasting.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/scripts/uv-lock.sh +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/scripts/uv-sync.sh +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/configuration_utils.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/models/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/pipelines/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/modeling_diffusers.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/models/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/models/autoencoders/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/models/autoencoders/vae.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/models/autoencoders/vq_model.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/models/controlnet.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/models/transformers/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/models/transformers/prior_transformer.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/models/unets/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/controlnet/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/modeling.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/modeling_base.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/ops/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/ops/attn.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/ops/flash_attn.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/ops/kv_cache_update.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/ops/linear.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/ops/sliding_window_attn.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/configuration_generic.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/modeling_generic.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/auto/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/auto/modeling_auto.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/bart/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/bart/configuration_bart.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/bart/modeling_bart.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/bert/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/bert/configuration_bert.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/bert/modeling_bert.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/blip_2/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/clip/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/clip/configuration_clip.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/clip/modeling_clip.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/decoderonly/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/distilbert/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/dpt/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/dpt/configuration_dpt.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/dpt/modeling_dpt.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/exaone/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/exaone/configuration_exaone.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/gemma/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/gemma/configuration_gemma.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/gemma/gemma_architecture.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/gemma/modeling_gemma.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/gemma3/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/gpt2/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/idefics3/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/llama/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/llama/configuration_llama.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/llama/llama_architecture.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/llama/modeling_llama.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/llava_next/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/midm/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/midm/configuration_midm.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/midm/midm_architecture.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/mistral/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/mistral/configuration_mistral.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/mistral/mistral_architecture.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/mistral/modeling_mistral.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/opt/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/opt/configuration_opt.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/opt/modeling_opt.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/opt/opt_architecture.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/phi/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/phi/configuration_phi.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/phi/modeling_phi.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/phi/phi_architecture.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/qwen2/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/qwen2_5_vl/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/resnet/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/resnet/configuration_resnet.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/resnet/modeling_resnet.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/roberta/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/roberta/configuration_roberta.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/roberta/modeling_roberta.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/seq2seq/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/siglip/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/siglip/configuration_siglip.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/siglip/modeling_siglip.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/t5/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/t5/configuration_t5.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/t5/modeling_t5.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/time_series_transformer/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/vit/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/vit/configuration_vit.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/vit/modeling_vit.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/wav2vec2/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/whisper/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/whisper/configuration_whisper.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/whisper/generation_whisper.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/xlm_roberta/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/utils/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/utils/rbln_quantization.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/utils/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/utils/decorator_utils.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/utils/hub.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/utils/import_utils.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/utils/logging.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/utils/model_utils.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/utils/runtime_utils.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/utils/save_utils.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/utils/submodule.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/tests/__init__.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/tests/psnr.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/tests/requirements_sdxl.txt +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/tests/run_stable_diffusion_xl_base.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/tests/test_base.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/tests/test_config.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/tests/test_diffusers.py +0 -0
- {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/tests/test_llm.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
rebel_compiler_version: 0.8.1.dev142+gab6ad3c7
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: optimum-rbln
|
3
|
-
Version: 0.8.
|
3
|
+
Version: 0.8.1a2
|
4
4
|
Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
|
5
5
|
Project-URL: Homepage, https://rebellions.ai
|
6
6
|
Project-URL: Documentation, https://docs.rbln.ai
|
@@ -28,7 +28,7 @@ Requires-Dist: packaging>=24.1
|
|
28
28
|
Requires-Dist: torch==2.6.0
|
29
29
|
Requires-Dist: torchaudio<=2.6.0
|
30
30
|
Requires-Dist: torchvision<=0.21.0
|
31
|
-
Requires-Dist: transformers==4.
|
31
|
+
Requires-Dist: transformers==4.51.3
|
32
32
|
Description-Content-Type: text/markdown
|
33
33
|
|
34
34
|
|
@@ -17,5 +17,5 @@ __version__: str
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
18
18
|
version_tuple: VERSION_TUPLE
|
19
19
|
|
20
|
-
__version__ = version = '0.8.
|
21
|
-
__version_tuple__ = version_tuple = (0, 8, 1, '
|
20
|
+
__version__ = version = '0.8.1a2'
|
21
|
+
__version_tuple__ = version_tuple = (0, 8, 1, 'a2')
|
{optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a2}/src/optimum/rbln/transformers/modeling_rope_utils.py
RENAMED
@@ -48,10 +48,13 @@ def _compute_default_rope_parameters(
|
|
48
48
|
Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
|
49
49
|
post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
|
50
50
|
"""
|
51
|
-
|
52
51
|
base = config.rope_theta
|
53
52
|
partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
|
54
|
-
head_dim =
|
53
|
+
head_dim = (
|
54
|
+
config.head_dim
|
55
|
+
if hasattr(config, "head_dim") and config.head_dim is not None
|
56
|
+
else config.hidden_size // config.num_attention_heads
|
57
|
+
)
|
55
58
|
dim = int(head_dim * partial_rotary_factor)
|
56
59
|
|
57
60
|
attention_factor = 1.0 # Unused in this type of RoPE
|
@@ -167,6 +167,11 @@ class _BaseAutoModelClass:
|
|
167
167
|
rbln_cls = cls.get_rbln_cls(model_id, *args, **kwargs)
|
168
168
|
return rbln_cls.from_pretrained(model_id, *args, **kwargs)
|
169
169
|
|
170
|
+
@classmethod
|
171
|
+
def from_model(cls, model, *args, **kwargs):
|
172
|
+
rbln_cls = get_rbln_model_cls(f"RBLN{model.__class__.__name__}")
|
173
|
+
return rbln_cls.from_model(model, *args, **kwargs)
|
174
|
+
|
170
175
|
@staticmethod
|
171
176
|
def register(rbln_cls: Type[RBLNBaseModel], exist_ok=False):
|
172
177
|
"""
|
@@ -22,6 +22,7 @@ from transformers.modeling_attn_mask_utils import (
|
|
22
22
|
from transformers.utils import logging
|
23
23
|
|
24
24
|
from ..seq2seq.seq2seq_architecture import (
|
25
|
+
Seq2SeqCrossAttention,
|
25
26
|
Seq2SeqDecoder,
|
26
27
|
Seq2SeqDecoderLayer,
|
27
28
|
Seq2SeqDecoderWrapper,
|
@@ -45,7 +46,8 @@ class BartDecoderWrapper(Seq2SeqDecoderWrapper):
|
|
45
46
|
new_layers = []
|
46
47
|
for layer in model.get_decoder().layers:
|
47
48
|
self_attn = BartSelfAttention(layer.self_attn, use_attention_mask=self.use_attention_mask)
|
48
|
-
|
49
|
+
cross_attn = BartCrossAttention(layer.encoder_attn)
|
50
|
+
new_layers.append(BartDecoderLayer(layer, self_attn, cross_attn))
|
49
51
|
|
50
52
|
decoder_model = BartDecoder(model.get_decoder(), new_layers)
|
51
53
|
new_model = BartForConditionalGeneration(model, decoder_model)
|
@@ -153,3 +155,14 @@ class BartSelfAttention(Seq2SeqSelfAttention):
|
|
153
155
|
key_states = self.k_proj(hidden_states)
|
154
156
|
value_states = self.v_proj(hidden_states)
|
155
157
|
return query_states, key_states, value_states
|
158
|
+
|
159
|
+
|
160
|
+
class BartCrossAttention(Seq2SeqCrossAttention):
|
161
|
+
def __post_init__(self):
|
162
|
+
self.q_proj = self._original_mod.q_proj
|
163
|
+
self.k_proj = self._original_mod.k_proj
|
164
|
+
self.v_proj = self._original_mod.v_proj
|
165
|
+
self.out_proj = self._original_mod.out_proj
|
166
|
+
self.num_heads = self._original_mod.num_heads
|
167
|
+
self.head_dim = self._original_mod.embed_dim // self._original_mod.num_heads
|
168
|
+
self.embed_dim = self._original_mod.embed_dim
|
@@ -13,7 +13,11 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
|
16
|
+
import inspect
|
17
|
+
from typing import Any, Callable
|
18
|
+
|
16
19
|
from transformers import AutoModelForCausalLM
|
20
|
+
from transformers.generation.utils import GenerationMixin
|
17
21
|
|
18
22
|
from ....utils import logging
|
19
23
|
from ..decoderonly import RBLNDecoderOnlyModelForCausalLM
|
@@ -85,8 +89,19 @@ class RBLNExaoneForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
|
85
89
|
|
86
90
|
_decoder_wrapper_cls = ExaoneForCausalLMWrapper
|
87
91
|
_hf_class = AutoModelForCausalLM
|
92
|
+
_supports_cache_class = True
|
88
93
|
|
89
94
|
@classmethod
|
90
95
|
def from_pretrained(cls, *args, **kwargs):
|
91
96
|
kwargs.setdefault("trust_remote_code", True)
|
92
97
|
return super().from_pretrained(*args, **kwargs)
|
98
|
+
|
99
|
+
def __getattr__(self, __name: str) -> Any:
|
100
|
+
def redirect(func):
|
101
|
+
return lambda *pargs, **kwargs: func(self, *pargs, **kwargs)
|
102
|
+
|
103
|
+
val = getattr(GenerationMixin, __name)
|
104
|
+
|
105
|
+
if isinstance(val, Callable) and "self" in set(inspect.signature(val).parameters):
|
106
|
+
return redirect(val)
|
107
|
+
return val
|
@@ -11,6 +11,7 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
+
import importlib
|
14
15
|
import inspect
|
15
16
|
from collections import deque
|
16
17
|
from dataclasses import dataclass
|
@@ -123,6 +124,23 @@ class RBLNGemma3ForConditionalGeneration(RBLNModel):
|
|
123
124
|
def can_generate(self):
|
124
125
|
return True
|
125
126
|
|
127
|
+
@classmethod
|
128
|
+
def get_pytorch_model(cls, *args, **kwargs):
|
129
|
+
model = super().get_pytorch_model(*args, **kwargs)
|
130
|
+
|
131
|
+
with no_init_weights():
|
132
|
+
model_cls_name = model.model.language_model.__class__.__name__
|
133
|
+
causal_model_cls_name = model_cls_name.replace("TextModel", "ForCausalLM")
|
134
|
+
causal_model_cls = getattr(importlib.import_module("transformers"), causal_model_cls_name)
|
135
|
+
new_language_model = causal_model_cls(model.model.language_model.config)
|
136
|
+
|
137
|
+
new_language_model.lm_head = model.lm_head
|
138
|
+
new_language_model.model = model.model.language_model
|
139
|
+
model.model.language_model = new_language_model
|
140
|
+
model.lm_head = None
|
141
|
+
del model.lm_head
|
142
|
+
return model
|
143
|
+
|
126
144
|
def __post_init__(self, **kwargs):
|
127
145
|
self.vision_tower = LoopVisionTower(self.rbln_submodules[0])
|
128
146
|
self.language_model = self.rbln_submodules[1]
|
@@ -168,7 +168,6 @@ class RBLNLlavaNextForConditionalGeneration(RBLNModel):
|
|
168
168
|
):
|
169
169
|
# If you are unavoidably running on a CPU rather than an RBLN device,
|
170
170
|
# store the torch tensor, weight, etc. in this function.
|
171
|
-
|
172
171
|
save_dict = {}
|
173
172
|
save_dict["image_newline"] = model.image_newline
|
174
173
|
torch.save(save_dict, save_dir_path / subfolder / "torch_artifacts.pth")
|
@@ -12,7 +12,11 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
import inspect
|
16
|
+
from typing import Any, Callable
|
17
|
+
|
15
18
|
from transformers import AutoModelForCausalLM
|
19
|
+
from transformers.generation.utils import GenerationMixin
|
16
20
|
|
17
21
|
from ....utils import logging
|
18
22
|
from ..decoderonly import RBLNDecoderOnlyModelForCausalLM
|
@@ -84,8 +88,19 @@ class RBLNMidmLMHeadModel(RBLNDecoderOnlyModelForCausalLM):
|
|
84
88
|
|
85
89
|
_decoder_wrapper_cls = MidmLMHeadModelWrapper
|
86
90
|
_hf_class = AutoModelForCausalLM
|
91
|
+
_supports_cache_class = True
|
87
92
|
|
88
93
|
@classmethod
|
89
94
|
def from_pretrained(cls, *args, **kwargs):
|
90
95
|
kwargs.setdefault("trust_remote_code", True)
|
91
96
|
return super().from_pretrained(*args, **kwargs)
|
97
|
+
|
98
|
+
def __getattr__(self, __name: str) -> Any:
|
99
|
+
def redirect(func):
|
100
|
+
return lambda *pargs, **kwargs: func(self, *pargs, **kwargs)
|
101
|
+
|
102
|
+
val = getattr(GenerationMixin, __name)
|
103
|
+
|
104
|
+
if isinstance(val, Callable) and "self" in set(inspect.signature(val).parameters):
|
105
|
+
return redirect(val)
|
106
|
+
return val
|
@@ -28,6 +28,7 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
|
|
28
28
|
Qwen2_5_VisionPatchEmbed,
|
29
29
|
Qwen2_5_VisionRotaryEmbedding,
|
30
30
|
Qwen2_5_VisionTransformerPretrainedModel,
|
31
|
+
Qwen2_5_VLModel,
|
31
32
|
Qwen2_5_VLRotaryEmbedding,
|
32
33
|
)
|
33
34
|
|
@@ -390,6 +391,14 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
|
|
390
391
|
def can_generate(self):
|
391
392
|
return True
|
392
393
|
|
394
|
+
@classmethod
|
395
|
+
def get_pytorch_model(cls, *args, **kwargs):
|
396
|
+
model = super().get_pytorch_model(*args, **kwargs)
|
397
|
+
model.model.lm_head = model.lm_head
|
398
|
+
model.lm_head = None
|
399
|
+
del model.lm_head
|
400
|
+
return model
|
401
|
+
|
393
402
|
@classmethod
|
394
403
|
def update_kwargs(cls, kwargs):
|
395
404
|
kwargs.update(
|
@@ -531,7 +540,8 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
|
|
531
540
|
vision_tokens = input_id[0][vision_start_indices + 1]
|
532
541
|
image_nums = (vision_tokens == image_token_id).sum()
|
533
542
|
video_nums = (vision_tokens == video_token_id).sum()
|
534
|
-
position_ids, rope_deltas =
|
543
|
+
position_ids, rope_deltas = Qwen2_5_VLModel.get_rope_index(
|
544
|
+
self,
|
535
545
|
input_id,
|
536
546
|
image_grid_thw[image_idx : image_idx + image_nums] if image_grid_thw is not None else None,
|
537
547
|
video_grid_thw[video_idx : video_idx + video_nums] if video_grid_thw is not None else None,
|
@@ -3,8 +3,14 @@ from typing import Tuple
|
|
3
3
|
|
4
4
|
import torch
|
5
5
|
import torch.nn as nn
|
6
|
+
from transformers import PreTrainedModel
|
6
7
|
|
7
8
|
from ..decoderonly.decoderonly_architecture import (
|
9
|
+
DecoderOnlyAttention,
|
10
|
+
DecoderOnlyFlashAttention,
|
11
|
+
DecoderOnlyForCausalLM,
|
12
|
+
DecoderOnlyLayer,
|
13
|
+
DecoderOnlyModel,
|
8
14
|
DecoderOnlyWrapper,
|
9
15
|
apply_rotary_pos_emb,
|
10
16
|
)
|
@@ -197,3 +203,40 @@ class Qwen2_5_VL_LanguageModelWrapper(DecoderOnlyWrapper):
|
|
197
203
|
past_key_values,
|
198
204
|
position_embeds,
|
199
205
|
)
|
206
|
+
|
207
|
+
def convert_to_rbln_causal_lm(self, causal_lm: PreTrainedModel, max_seq_len: int):
|
208
|
+
new_layers = []
|
209
|
+
|
210
|
+
for layer in causal_lm.model.language_model.layers:
|
211
|
+
if self.attn_impl == "eager":
|
212
|
+
new_self_attn = DecoderOnlyAttention(
|
213
|
+
layer.self_attn,
|
214
|
+
self.use_attention_mask,
|
215
|
+
self.use_position_ids,
|
216
|
+
kvcache_block_size=self.kvcache_block_size,
|
217
|
+
)
|
218
|
+
elif self.attn_impl == "flash_attn":
|
219
|
+
new_self_attn = DecoderOnlyFlashAttention(
|
220
|
+
layer.self_attn,
|
221
|
+
kvcache_partition_len=self.kvcache_partition_len,
|
222
|
+
kvcache_block_size=self.kvcache_block_size,
|
223
|
+
use_attention_mask=self.use_attention_mask,
|
224
|
+
use_position_ids=self.use_position_ids,
|
225
|
+
)
|
226
|
+
else:
|
227
|
+
raise NotImplementedError(f"Unknwon attn : {self.attn_impl}")
|
228
|
+
|
229
|
+
new_layer = DecoderOnlyLayer(layer, new_self_attn)
|
230
|
+
new_layers.append(new_layer)
|
231
|
+
|
232
|
+
new_model = DecoderOnlyModel(
|
233
|
+
causal_lm.model.language_model,
|
234
|
+
new_layers,
|
235
|
+
partition_len=self.kvcache_partition_len,
|
236
|
+
max_seq_len=max_seq_len,
|
237
|
+
kvcache_block_size=self.kvcache_block_size,
|
238
|
+
use_learned_pos_emb=self.use_learned_pos_emb,
|
239
|
+
sliding_window_layers=self.sliding_window_layers,
|
240
|
+
)
|
241
|
+
new_causal_lm = DecoderOnlyForCausalLM(causal_lm.model, new_model)
|
242
|
+
return new_causal_lm
|
@@ -148,7 +148,8 @@ class Seq2SeqDecoderWrapper(nn.Module):
|
|
148
148
|
new_layers = []
|
149
149
|
for layer in model.get_decoder().layers:
|
150
150
|
self_attn = Seq2SeqSelfAttention(layer.self_attn)
|
151
|
-
|
151
|
+
cross_attn = Seq2SeqCrossAttention(layer.encoder_attn)
|
152
|
+
new_layers.append(Seq2SeqDecoderLayer(layer, self_attn, cross_attn))
|
152
153
|
|
153
154
|
decoder_model = Seq2SeqDecoder(model.get_decoder(), new_layers)
|
154
155
|
new_model = Seq2SeqForConditionalGeneration(model, decoder_model)
|
@@ -341,10 +342,11 @@ class Seq2SeqDecoderLayer(torch.nn.Module):
|
|
341
342
|
self_attn (Seq2SeqSelfAttention): Modified self-attention layer optimized for RBLN
|
342
343
|
"""
|
343
344
|
|
344
|
-
def __init__(self, decoder_layer, self_attn):
|
345
|
+
def __init__(self, decoder_layer, self_attn, cross_attn):
|
345
346
|
super().__init__()
|
346
347
|
self._original_mod = decoder_layer
|
347
348
|
self.self_attn = self_attn
|
349
|
+
self.cross_attn = cross_attn
|
348
350
|
self.__post_init__()
|
349
351
|
|
350
352
|
def __post_init__(self, **kwargs):
|
@@ -402,7 +404,8 @@ class Seq2SeqDecoderLayer(torch.nn.Module):
|
|
402
404
|
# Cross-Attention Block
|
403
405
|
residual = hidden_states
|
404
406
|
hidden_states = self.pre_cross_attn_layer_norm(hidden_states)
|
405
|
-
|
407
|
+
|
408
|
+
cross_attn_output = self.cross_attn(
|
406
409
|
hidden_states=hidden_states,
|
407
410
|
past_key_value=cross_past_key_value,
|
408
411
|
attention_mask=encoder_attention_mask,
|
@@ -487,3 +490,38 @@ class Seq2SeqSelfAttention(nn.Module):
|
|
487
490
|
attn_output = self.out_proj(attn_output)
|
488
491
|
|
489
492
|
return attn_output
|
493
|
+
|
494
|
+
|
495
|
+
class Seq2SeqCrossAttention(nn.Module):
|
496
|
+
def __init__(self, attn, **kwargs):
|
497
|
+
super().__init__()
|
498
|
+
self._original_mod = attn
|
499
|
+
self.__post_init__(**kwargs)
|
500
|
+
|
501
|
+
def forward(
|
502
|
+
self,
|
503
|
+
hidden_states: torch.Tensor,
|
504
|
+
key_value_states: torch.Tensor = None,
|
505
|
+
past_key_value: Optional[object] = None,
|
506
|
+
attention_mask: Optional[torch.Tensor] = None,
|
507
|
+
):
|
508
|
+
bsz, tgt_len, _ = hidden_states.size()
|
509
|
+
query_states = self.q_proj(hidden_states).view(bsz, -1, self.num_heads, self.head_dim).transpose(1, 2)
|
510
|
+
|
511
|
+
is_cross_attention = key_value_states is not None
|
512
|
+
if is_cross_attention:
|
513
|
+
key_states = past_key_value[0]
|
514
|
+
value_states = past_key_value[1]
|
515
|
+
|
516
|
+
attn_output = torch.nn.functional.scaled_dot_product_attention(
|
517
|
+
query_states,
|
518
|
+
key_states,
|
519
|
+
value_states,
|
520
|
+
attn_mask=attention_mask,
|
521
|
+
)
|
522
|
+
|
523
|
+
attn_output = attn_output.transpose(1, 2).contiguous()
|
524
|
+
attn_output = attn_output.view(bsz, tgt_len, self.embed_dim)
|
525
|
+
attn_output = self.out_proj(attn_output)
|
526
|
+
|
527
|
+
return attn_output, None, past_key_value
|
@@ -136,10 +136,14 @@ class T5Decoder(Seq2SeqDecoder):
|
|
136
136
|
|
137
137
|
|
138
138
|
class T5Block(Seq2SeqDecoderLayer):
|
139
|
+
def __init__(self, decoder_layer, self_attn):
|
140
|
+
super().__init__(decoder_layer, self_attn, cross_attn=None)
|
141
|
+
self.__post_init__()
|
142
|
+
|
139
143
|
def __post_init__(self):
|
140
144
|
self.self_attn_layer_norm = self._original_mod.layer[0].layer_norm
|
141
145
|
self.encoder_attn_layer_norm = self._original_mod.layer[1].layer_norm
|
142
|
-
self.
|
146
|
+
self.cross_attn = T5CrossAttention(self._original_mod.layer[1].EncDecAttention)
|
143
147
|
self.ff_layer = self._original_mod.layer[2]
|
144
148
|
|
145
149
|
def pre_self_attn_layer_norm(self, hidden_states):
|