optimum-rbln 0.8.1a1__tar.gz → 0.8.1a3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. optimum_rbln-0.8.1a3/.github/version.yaml +1 -0
  2. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/PKG-INFO +2 -2
  3. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/pyproject.toml +1 -1
  4. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/__version__.py +2 -2
  5. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/modeling_rope_utils.py +5 -2
  6. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/auto/auto_factory.py +5 -0
  7. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/bart/bart_architecture.py +14 -1
  8. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +2 -2
  9. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/exaone/modeling_exaone.py +15 -0
  10. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +22 -12
  11. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +0 -1
  12. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/midm/modeling_midm.py +15 -0
  13. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +41 -3
  14. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/t5/t5_architecture.py +5 -1
  15. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/tests/test_llm.py +2 -2
  16. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/tests/test_transformers.py +1 -0
  17. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/uv.lock +973 -960
  18. optimum_rbln-0.8.1a1/.github/version.yaml +0 -1
  19. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  20. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  21. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  22. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/ISSUE_TEMPLATE/model_request.md +0 -0
  23. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/pull_request_template.md +0 -0
  24. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/scripts/auto_code_review.py +0 -0
  25. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/scripts/validate_pr_checklist.py +0 -0
  26. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/workflows/auto_code_review.yml +0 -0
  27. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/workflows/check_code_quality.yml +0 -0
  28. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/workflows/deploy-on-tag.yaml +0 -0
  29. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/workflows/deploy.yaml +0 -0
  30. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/workflows/pr-title-check.yaml +0 -0
  31. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/workflows/pr_checklist_validator.yml +0 -0
  32. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/workflows/rbln_check_compiler.yaml +0 -0
  33. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/workflows/rbln_dispatch_pytest.yaml +0 -0
  34. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/workflows/rbln_optimum_inference_test.yaml +0 -0
  35. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/workflows/rbln_optimum_pytest.yaml +0 -0
  36. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/workflows/rbln_scheduled_test.yaml +0 -0
  37. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.github/workflows/rbln_trigger_on_pr.yaml +0 -0
  38. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/.gitignore +0 -0
  39. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/CODE_OF_CONDUCT.md +0 -0
  40. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/CONTRIBUTING.md +0 -0
  41. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/LICENSE +0 -0
  42. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/README.md +0 -0
  43. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/assets/rbln_logo.png +0 -0
  44. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/advanced/custom_class.py +0 -0
  45. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/audio-classification/run_ast_audio_classification.py +0 -0
  46. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/depth-estimation/run_dpt.py +0 -0
  47. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/image-classification/run_image_classification.py +0 -0
  48. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/image-classification/run_vit_image_classification.py +0 -0
  49. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/image-to-text/run_idefics3.py +0 -0
  50. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/image-to-text/run_llava_next_image_to_text.py +0 -0
  51. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/kandinsky2_2/run_kandinsky2_2.py +0 -0
  52. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/kandinsky2_2/run_kandinsky2_2_combined.py +0 -0
  53. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/kandinsky2_2/run_kandinsky2_2_img2img.py +0 -0
  54. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/kandinsky2_2/run_kandinsky2_2_img2img_combined.py +0 -0
  55. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/kandinsky2_2/run_kandinsky2_2_inpaint.py +0 -0
  56. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/kandinsky2_2/run_kandinsky2_2_inpaint_combined.py +0 -0
  57. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/kandinsky2_2/run_kandinsky2_2_prior_interpolate.py +0 -0
  58. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/question-answering/run_question_answering.py +0 -0
  59. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/speech-recognition/run_wav2vec2.py +0 -0
  60. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/speech-recognition/run_whisper.py +0 -0
  61. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/stable-diffusion/run_stable_diffusion.py +0 -0
  62. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/stable-diffusion/run_stable_diffusion_controlnet.py +0 -0
  63. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/stable-diffusion/run_stable_diffusion_img2img.py +0 -0
  64. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/stable-diffusion/run_stable_diffusion_img2img_controlnet.py +0 -0
  65. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/stable-diffusion/run_stable_diffusion_inpaint.py +0 -0
  66. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/stable-diffusion/run_stable_diffusion_lora.py +0 -0
  67. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/stable-diffusion/run_stable_diffusion_multicontrolnet.py +0 -0
  68. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/text-classification/run_bge_m3_text_classification.py +0 -0
  69. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/text-classification/run_bge_reranker_v2_m3_text_classification.py +0 -0
  70. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/text-classification/run_secureBERT.py +0 -0
  71. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/text-classification/run_t5_classification.py +0 -0
  72. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/text-classification/run_twitter_roberta_text_classification.py +0 -0
  73. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/text2text-generation/run_bart_text2text_generation.py +0 -0
  74. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/text2text-generation/run_llama_peft.py +0 -0
  75. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/text2text-generation/run_llama_text2text_generation.py +0 -0
  76. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/examples/time-series-forecasting/run_time_series_forecasting.py +0 -0
  77. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/scripts/uv-lock.sh +0 -0
  78. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/scripts/uv-sync.sh +0 -0
  79. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/__init__.py +0 -0
  80. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/configuration_utils.py +0 -0
  81. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/__init__.py +0 -0
  82. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/__init__.py +0 -0
  83. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/models/__init__.py +0 -0
  84. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +0 -0
  85. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +0 -0
  86. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +0 -0
  87. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +0 -0
  88. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +0 -0
  89. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +0 -0
  90. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/pipelines/__init__.py +0 -0
  91. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +0 -0
  92. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +0 -0
  93. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +0 -0
  94. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +0 -0
  95. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +0 -0
  96. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/modeling_diffusers.py +0 -0
  97. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/models/__init__.py +0 -0
  98. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/models/autoencoders/__init__.py +0 -0
  99. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +0 -0
  100. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/models/autoencoders/vae.py +0 -0
  101. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/models/autoencoders/vq_model.py +0 -0
  102. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/models/controlnet.py +0 -0
  103. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/models/transformers/__init__.py +0 -0
  104. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/models/transformers/prior_transformer.py +0 -0
  105. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py +0 -0
  106. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/models/unets/__init__.py +0 -0
  107. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py +0 -0
  108. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/__init__.py +0 -0
  109. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/controlnet/__init__.py +0 -0
  110. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +0 -0
  111. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +0 -0
  112. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +0 -0
  113. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +0 -0
  114. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +0 -0
  115. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +0 -0
  116. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +0 -0
  117. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +0 -0
  118. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +0 -0
  119. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +0 -0
  120. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +0 -0
  121. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +0 -0
  122. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +0 -0
  123. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +0 -0
  124. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +0 -0
  125. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +0 -0
  126. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +0 -0
  127. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +0 -0
  128. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +0 -0
  129. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +0 -0
  130. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +0 -0
  131. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +0 -0
  132. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +0 -0
  133. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/modeling.py +0 -0
  134. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/modeling_base.py +0 -0
  135. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/ops/__init__.py +0 -0
  136. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/ops/attn.py +0 -0
  137. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/ops/flash_attn.py +0 -0
  138. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/ops/kv_cache_update.py +0 -0
  139. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/ops/linear.py +0 -0
  140. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/ops/sliding_window_attn.py +0 -0
  141. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/__init__.py +0 -0
  142. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/configuration_generic.py +0 -0
  143. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/modeling_generic.py +0 -0
  144. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/__init__.py +0 -0
  145. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/__init__.py +0 -0
  146. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +0 -0
  147. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +0 -0
  148. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/auto/__init__.py +0 -0
  149. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/auto/modeling_auto.py +0 -0
  150. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/bart/__init__.py +0 -0
  151. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/bart/configuration_bart.py +0 -0
  152. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/bart/modeling_bart.py +0 -0
  153. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/bert/__init__.py +0 -0
  154. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/bert/configuration_bert.py +0 -0
  155. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/bert/modeling_bert.py +0 -0
  156. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/blip_2/__init__.py +0 -0
  157. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +0 -0
  158. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +0 -0
  159. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/clip/__init__.py +0 -0
  160. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/clip/configuration_clip.py +0 -0
  161. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/clip/modeling_clip.py +0 -0
  162. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/decoderonly/__init__.py +0 -0
  163. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +0 -0
  164. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +0 -0
  165. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/distilbert/__init__.py +0 -0
  166. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +0 -0
  167. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +0 -0
  168. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/dpt/__init__.py +0 -0
  169. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/dpt/configuration_dpt.py +0 -0
  170. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/dpt/modeling_dpt.py +0 -0
  171. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/exaone/__init__.py +0 -0
  172. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/exaone/configuration_exaone.py +0 -0
  173. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -0
  174. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/gemma/__init__.py +0 -0
  175. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/gemma/configuration_gemma.py +0 -0
  176. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/gemma/gemma_architecture.py +0 -0
  177. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/gemma/modeling_gemma.py +0 -0
  178. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/gemma3/__init__.py +0 -0
  179. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +0 -0
  180. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +0 -0
  181. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/gpt2/__init__.py +0 -0
  182. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +0 -0
  183. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +0 -0
  184. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +0 -0
  185. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/idefics3/__init__.py +0 -0
  186. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +0 -0
  187. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +0 -0
  188. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/llama/__init__.py +0 -0
  189. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/llama/configuration_llama.py +0 -0
  190. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/llama/llama_architecture.py +0 -0
  191. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/llama/modeling_llama.py +0 -0
  192. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/llava_next/__init__.py +0 -0
  193. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +0 -0
  194. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/midm/__init__.py +0 -0
  195. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/midm/configuration_midm.py +0 -0
  196. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/midm/midm_architecture.py +0 -0
  197. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/mistral/__init__.py +0 -0
  198. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/mistral/configuration_mistral.py +0 -0
  199. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/mistral/mistral_architecture.py +0 -0
  200. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/mistral/modeling_mistral.py +0 -0
  201. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/opt/__init__.py +0 -0
  202. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/opt/configuration_opt.py +0 -0
  203. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/opt/modeling_opt.py +0 -0
  204. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/opt/opt_architecture.py +0 -0
  205. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/phi/__init__.py +0 -0
  206. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/phi/configuration_phi.py +0 -0
  207. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/phi/modeling_phi.py +0 -0
  208. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/phi/phi_architecture.py +0 -0
  209. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/qwen2/__init__.py +0 -0
  210. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +0 -0
  211. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +0 -0
  212. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +0 -0
  213. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/qwen2_5_vl/__init__.py +0 -0
  214. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +0 -0
  215. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +0 -0
  216. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +0 -0
  217. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/resnet/__init__.py +0 -0
  218. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/resnet/configuration_resnet.py +0 -0
  219. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/resnet/modeling_resnet.py +0 -0
  220. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/roberta/__init__.py +0 -0
  221. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/roberta/configuration_roberta.py +0 -0
  222. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/roberta/modeling_roberta.py +0 -0
  223. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/seq2seq/__init__.py +0 -0
  224. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +0 -0
  225. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +0 -0
  226. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/siglip/__init__.py +0 -0
  227. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/siglip/configuration_siglip.py +0 -0
  228. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/siglip/modeling_siglip.py +0 -0
  229. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/t5/__init__.py +0 -0
  230. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/t5/configuration_t5.py +0 -0
  231. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/t5/modeling_t5.py +0 -0
  232. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/time_series_transformer/__init__.py +0 -0
  233. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +0 -0
  234. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +0 -0
  235. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +0 -0
  236. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/vit/__init__.py +0 -0
  237. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/vit/configuration_vit.py +0 -0
  238. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/vit/modeling_vit.py +0 -0
  239. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/wav2vec2/__init__.py +0 -0
  240. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +0 -0
  241. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +0 -0
  242. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/whisper/__init__.py +0 -0
  243. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/whisper/configuration_whisper.py +0 -0
  244. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/whisper/generation_whisper.py +0 -0
  245. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py +0 -0
  246. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -0
  247. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/xlm_roberta/__init__.py +0 -0
  248. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +0 -0
  249. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +0 -0
  250. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/utils/__init__.py +0 -0
  251. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/transformers/utils/rbln_quantization.py +0 -0
  252. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/utils/__init__.py +0 -0
  253. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/utils/decorator_utils.py +0 -0
  254. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/utils/hub.py +0 -0
  255. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/utils/import_utils.py +0 -0
  256. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/utils/logging.py +0 -0
  257. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/utils/model_utils.py +0 -0
  258. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/utils/runtime_utils.py +0 -0
  259. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/utils/save_utils.py +0 -0
  260. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/src/optimum/rbln/utils/submodule.py +0 -0
  261. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/tests/__init__.py +0 -0
  262. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/tests/psnr.py +0 -0
  263. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/tests/requirements_sdxl.txt +0 -0
  264. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/tests/run_stable_diffusion_xl_base.py +0 -0
  265. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/tests/test_base.py +0 -0
  266. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/tests/test_config.py +0 -0
  267. {optimum_rbln-0.8.1a1 → optimum_rbln-0.8.1a3}/tests/test_diffusers.py +0 -0
@@ -0,0 +1 @@
1
+ rebel_compiler_version: 0.8.1.dev142+gab6ad3c7
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.8.1a1
3
+ Version: 0.8.1a3
4
4
  Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -28,7 +28,7 @@ Requires-Dist: packaging>=24.1
28
28
  Requires-Dist: torch==2.6.0
29
29
  Requires-Dist: torchaudio<=2.6.0
30
30
  Requires-Dist: torchvision<=0.21.0
31
- Requires-Dist: transformers==4.50.3
31
+ Requires-Dist: transformers==4.51.3
32
32
  Description-Content-Type: text/markdown
33
33
 
34
34
 
@@ -32,7 +32,7 @@ dependencies = [
32
32
  "torchaudio<=2.6.0",
33
33
  "torchvision<=0.21.0",
34
34
  "accelerate>=1.0.1",
35
- "transformers==4.50.3",
35
+ "transformers==4.51.3",
36
36
  "diffusers<=0.31.0",
37
37
  "packaging>=24.1",
38
38
  ]
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.8.1a1'
21
- __version_tuple__ = version_tuple = (0, 8, 1, 'a1')
20
+ __version__ = version = '0.8.1a3'
21
+ __version_tuple__ = version_tuple = (0, 8, 1, 'a3')
@@ -48,10 +48,13 @@ def _compute_default_rope_parameters(
48
48
  Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
49
49
  post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
50
50
  """
51
-
52
51
  base = config.rope_theta
53
52
  partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
54
- head_dim = getattr(config, "head_dim", None) or config.hidden_size // config.num_attention_heads
53
+ head_dim = (
54
+ config.head_dim
55
+ if hasattr(config, "head_dim") and config.head_dim is not None
56
+ else config.hidden_size // config.num_attention_heads
57
+ )
55
58
  dim = int(head_dim * partial_rotary_factor)
56
59
 
57
60
  attention_factor = 1.0 # Unused in this type of RoPE
@@ -167,6 +167,11 @@ class _BaseAutoModelClass:
167
167
  rbln_cls = cls.get_rbln_cls(model_id, *args, **kwargs)
168
168
  return rbln_cls.from_pretrained(model_id, *args, **kwargs)
169
169
 
170
+ @classmethod
171
+ def from_model(cls, model, *args, **kwargs):
172
+ rbln_cls = get_rbln_model_cls(f"RBLN{model.__class__.__name__}")
173
+ return rbln_cls.from_model(model, *args, **kwargs)
174
+
170
175
  @staticmethod
171
176
  def register(rbln_cls: Type[RBLNBaseModel], exist_ok=False):
172
177
  """
@@ -22,6 +22,7 @@ from transformers.modeling_attn_mask_utils import (
22
22
  from transformers.utils import logging
23
23
 
24
24
  from ..seq2seq.seq2seq_architecture import (
25
+ Seq2SeqCrossAttention,
25
26
  Seq2SeqDecoder,
26
27
  Seq2SeqDecoderLayer,
27
28
  Seq2SeqDecoderWrapper,
@@ -45,7 +46,8 @@ class BartDecoderWrapper(Seq2SeqDecoderWrapper):
45
46
  new_layers = []
46
47
  for layer in model.get_decoder().layers:
47
48
  self_attn = BartSelfAttention(layer.self_attn, use_attention_mask=self.use_attention_mask)
48
- new_layers.append(BartDecoderLayer(layer, self_attn))
49
+ cross_attn = BartCrossAttention(layer.encoder_attn)
50
+ new_layers.append(BartDecoderLayer(layer, self_attn, cross_attn))
49
51
 
50
52
  decoder_model = BartDecoder(model.get_decoder(), new_layers)
51
53
  new_model = BartForConditionalGeneration(model, decoder_model)
@@ -153,3 +155,14 @@ class BartSelfAttention(Seq2SeqSelfAttention):
153
155
  key_states = self.k_proj(hidden_states)
154
156
  value_states = self.v_proj(hidden_states)
155
157
  return query_states, key_states, value_states
158
+
159
+
160
+ class BartCrossAttention(Seq2SeqCrossAttention):
161
+ def __post_init__(self):
162
+ self.q_proj = self._original_mod.q_proj
163
+ self.k_proj = self._original_mod.k_proj
164
+ self.v_proj = self._original_mod.v_proj
165
+ self.out_proj = self._original_mod.out_proj
166
+ self.num_heads = self._original_mod.num_heads
167
+ self.head_dim = self._original_mod.embed_dim // self._original_mod.num_heads
168
+ self.embed_dim = self._original_mod.embed_dim
@@ -177,8 +177,8 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
177
177
  )
178
178
  elif block_tables is None and local_block_tables is None:
179
179
  return False
180
- else:
181
- return True
180
+
181
+ return True
182
182
 
183
183
  def forward(
184
184
  self,
@@ -13,7 +13,11 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
+ import inspect
17
+ from typing import Any, Callable
18
+
16
19
  from transformers import AutoModelForCausalLM
20
+ from transformers.generation.utils import GenerationMixin
17
21
 
18
22
  from ....utils import logging
19
23
  from ..decoderonly import RBLNDecoderOnlyModelForCausalLM
@@ -85,8 +89,19 @@ class RBLNExaoneForCausalLM(RBLNDecoderOnlyModelForCausalLM):
85
89
 
86
90
  _decoder_wrapper_cls = ExaoneForCausalLMWrapper
87
91
  _hf_class = AutoModelForCausalLM
92
+ _supports_cache_class = True
88
93
 
89
94
  @classmethod
90
95
  def from_pretrained(cls, *args, **kwargs):
91
96
  kwargs.setdefault("trust_remote_code", True)
92
97
  return super().from_pretrained(*args, **kwargs)
98
+
99
+ def __getattr__(self, __name: str) -> Any:
100
+ def redirect(func):
101
+ return lambda *pargs, **kwargs: func(self, *pargs, **kwargs)
102
+
103
+ val = getattr(GenerationMixin, __name)
104
+
105
+ if isinstance(val, Callable) and "self" in set(inspect.signature(val).parameters):
106
+ return redirect(val)
107
+ return val
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ import importlib
14
15
  import inspect
15
16
  from collections import deque
16
17
  from dataclasses import dataclass
@@ -123,6 +124,23 @@ class RBLNGemma3ForConditionalGeneration(RBLNModel):
123
124
  def can_generate(self):
124
125
  return True
125
126
 
127
+ @classmethod
128
+ def get_pytorch_model(cls, *args, **kwargs):
129
+ model = super().get_pytorch_model(*args, **kwargs)
130
+
131
+ with no_init_weights():
132
+ model_cls_name = model.model.language_model.__class__.__name__
133
+ causal_model_cls_name = model_cls_name.replace("TextModel", "ForCausalLM")
134
+ causal_model_cls = getattr(importlib.import_module("transformers"), causal_model_cls_name)
135
+ new_language_model = causal_model_cls(model.model.language_model.config)
136
+
137
+ new_language_model.lm_head = model.lm_head
138
+ new_language_model.model = model.model.language_model
139
+ model.model.language_model = new_language_model
140
+ model.lm_head = None
141
+ del model.lm_head
142
+ return model
143
+
126
144
  def __post_init__(self, **kwargs):
127
145
  self.vision_tower = LoopVisionTower(self.rbln_submodules[0])
128
146
  self.language_model = self.rbln_submodules[1]
@@ -541,7 +559,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
541
559
  (
542
560
  inputs,
543
561
  cache_position,
544
- chunked_attention_mask,
562
+ padded_attention_mask,
545
563
  out_buffers,
546
564
  position_ids,
547
565
  position_embed,
@@ -553,7 +571,7 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
553
571
  )
554
572
  if not is_external_block_tables:
555
573
  local_block_tables = torch.tensor([batch_idx], dtype=torch.int16)
556
- self.dec_attn_mask[batch_idx : batch_idx + 1] = chunked_attention_mask[:1]
574
+ self.dec_attn_mask[batch_idx : batch_idx + 1] = padded_attention_mask[:1]
557
575
 
558
576
  if self.rbln_config.use_attention_mask and self.rbln_config.use_position_ids:
559
577
  chunked_attention_mask = torch.zeros(1, self.rbln_config.max_seq_len, dtype=torch.float32)
@@ -569,18 +587,10 @@ class RBLNGemma3RuntimeModel(RBLNRuntimeModel):
569
587
  else None
570
588
  )
571
589
 
572
- # Not used in Gemma3 yet.
573
590
  if self.rbln_config.use_attention_mask:
574
591
  if self.rbln_config.use_position_ids:
575
- chunked_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size] = self.dec_attn_mask[
576
- batch_idx, step : step + self.rbln_config.prefill_chunk_size
577
- ]
578
- else:
579
- # Update attention mask to ensure proper causal behavior
580
- if step >= self.rbln_config.prefill_chunk_size:
581
- chunked_attention_mask[:, :, :, step - self.rbln_config.prefill_chunk_size : step] = 1
582
- chunked_attention_mask[:, :, :, step : step + self.rbln_config.prefill_chunk_size] = (
583
- self.causal_mask
592
+ chunked_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size] = (
593
+ padded_attention_mask[0, step : step + self.rbln_config.prefill_chunk_size]
584
594
  )
585
595
 
586
596
  # Define query position
@@ -168,7 +168,6 @@ class RBLNLlavaNextForConditionalGeneration(RBLNModel):
168
168
  ):
169
169
  # If you are unavoidably running on a CPU rather than an RBLN device,
170
170
  # store the torch tensor, weight, etc. in this function.
171
-
172
171
  save_dict = {}
173
172
  save_dict["image_newline"] = model.image_newline
174
173
  torch.save(save_dict, save_dir_path / subfolder / "torch_artifacts.pth")
@@ -12,7 +12,11 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import inspect
16
+ from typing import Any, Callable
17
+
15
18
  from transformers import AutoModelForCausalLM
19
+ from transformers.generation.utils import GenerationMixin
16
20
 
17
21
  from ....utils import logging
18
22
  from ..decoderonly import RBLNDecoderOnlyModelForCausalLM
@@ -84,8 +88,19 @@ class RBLNMidmLMHeadModel(RBLNDecoderOnlyModelForCausalLM):
84
88
 
85
89
  _decoder_wrapper_cls = MidmLMHeadModelWrapper
86
90
  _hf_class = AutoModelForCausalLM
91
+ _supports_cache_class = True
87
92
 
88
93
  @classmethod
89
94
  def from_pretrained(cls, *args, **kwargs):
90
95
  kwargs.setdefault("trust_remote_code", True)
91
96
  return super().from_pretrained(*args, **kwargs)
97
+
98
+ def __getattr__(self, __name: str) -> Any:
99
+ def redirect(func):
100
+ return lambda *pargs, **kwargs: func(self, *pargs, **kwargs)
101
+
102
+ val = getattr(GenerationMixin, __name)
103
+
104
+ if isinstance(val, Callable) and "self" in set(inspect.signature(val).parameters):
105
+ return redirect(val)
106
+ return val
@@ -148,7 +148,8 @@ class Seq2SeqDecoderWrapper(nn.Module):
148
148
  new_layers = []
149
149
  for layer in model.get_decoder().layers:
150
150
  self_attn = Seq2SeqSelfAttention(layer.self_attn)
151
- new_layers.append(Seq2SeqDecoderLayer(layer, self_attn))
151
+ cross_attn = Seq2SeqCrossAttention(layer.encoder_attn)
152
+ new_layers.append(Seq2SeqDecoderLayer(layer, self_attn, cross_attn))
152
153
 
153
154
  decoder_model = Seq2SeqDecoder(model.get_decoder(), new_layers)
154
155
  new_model = Seq2SeqForConditionalGeneration(model, decoder_model)
@@ -341,10 +342,11 @@ class Seq2SeqDecoderLayer(torch.nn.Module):
341
342
  self_attn (Seq2SeqSelfAttention): Modified self-attention layer optimized for RBLN
342
343
  """
343
344
 
344
- def __init__(self, decoder_layer, self_attn):
345
+ def __init__(self, decoder_layer, self_attn, cross_attn):
345
346
  super().__init__()
346
347
  self._original_mod = decoder_layer
347
348
  self.self_attn = self_attn
349
+ self.cross_attn = cross_attn
348
350
  self.__post_init__()
349
351
 
350
352
  def __post_init__(self, **kwargs):
@@ -402,7 +404,8 @@ class Seq2SeqDecoderLayer(torch.nn.Module):
402
404
  # Cross-Attention Block
403
405
  residual = hidden_states
404
406
  hidden_states = self.pre_cross_attn_layer_norm(hidden_states)
405
- cross_attn_output = self.encoder_attn(
407
+
408
+ cross_attn_output = self.cross_attn(
406
409
  hidden_states=hidden_states,
407
410
  past_key_value=cross_past_key_value,
408
411
  attention_mask=encoder_attention_mask,
@@ -487,3 +490,38 @@ class Seq2SeqSelfAttention(nn.Module):
487
490
  attn_output = self.out_proj(attn_output)
488
491
 
489
492
  return attn_output
493
+
494
+
495
+ class Seq2SeqCrossAttention(nn.Module):
496
+ def __init__(self, attn, **kwargs):
497
+ super().__init__()
498
+ self._original_mod = attn
499
+ self.__post_init__(**kwargs)
500
+
501
+ def forward(
502
+ self,
503
+ hidden_states: torch.Tensor,
504
+ key_value_states: torch.Tensor = None,
505
+ past_key_value: Optional[object] = None,
506
+ attention_mask: Optional[torch.Tensor] = None,
507
+ ):
508
+ bsz, tgt_len, _ = hidden_states.size()
509
+ query_states = self.q_proj(hidden_states).view(bsz, -1, self.num_heads, self.head_dim).transpose(1, 2)
510
+
511
+ is_cross_attention = key_value_states is not None
512
+ if is_cross_attention:
513
+ key_states = past_key_value[0]
514
+ value_states = past_key_value[1]
515
+
516
+ attn_output = torch.nn.functional.scaled_dot_product_attention(
517
+ query_states,
518
+ key_states,
519
+ value_states,
520
+ attn_mask=attention_mask,
521
+ )
522
+
523
+ attn_output = attn_output.transpose(1, 2).contiguous()
524
+ attn_output = attn_output.view(bsz, tgt_len, self.embed_dim)
525
+ attn_output = self.out_proj(attn_output)
526
+
527
+ return attn_output, None, past_key_value
@@ -136,10 +136,14 @@ class T5Decoder(Seq2SeqDecoder):
136
136
 
137
137
 
138
138
  class T5Block(Seq2SeqDecoderLayer):
139
+ def __init__(self, decoder_layer, self_attn):
140
+ super().__init__(decoder_layer, self_attn, cross_attn=None)
141
+ self.__post_init__()
142
+
139
143
  def __post_init__(self):
140
144
  self.self_attn_layer_norm = self._original_mod.layer[0].layer_norm
141
145
  self.encoder_attn_layer_norm = self._original_mod.layer[1].layer_norm
142
- self.encoder_attn = T5CrossAttention(self._original_mod.layer[1].EncDecAttention)
146
+ self.cross_attn = T5CrossAttention(self._original_mod.layer[1].EncDecAttention)
143
147
  self.ff_layer = self._original_mod.layer[2]
144
148
 
145
149
  def pre_self_attn_layer_norm(self, hidden_states):
@@ -67,7 +67,7 @@ class LLMTest:
67
67
  class TestQwen2Model(LLMTest.TestLLM):
68
68
  RBLN_CLASS = RBLNQwen2ForCausalLM
69
69
  HF_MODEL_ID = "Qwen/Qwen2-0.5B-Instruct"
70
- EXPECTED_OUTPUT = " I am a 30-year-old woman who has been living with lupus for over 1"
70
+ EXPECTED_OUTPUT = " I am a 20 year old girl from the United States. I have been studying English for"
71
71
  HF_CONFIG_KWARGS = {"max_position_embeddings": 1024}
72
72
 
73
73
 
@@ -108,7 +108,7 @@ class TestLlamaForCausalLM_Flash(LLMTest.TestLLM):
108
108
  class TestLlamaForCausalLM_Multibatch(TestLlamaForCausalLM):
109
109
  PROMPT = ["Who are you?", "What is the capital of France?", "What is the capital of Germany?"]
110
110
  EXPECTED_OUTPUT = [
111
- "reress makefable R���� noethetsshss rechoolso�",
111
+ "reress makefable R���� noethetss0oss invetetet",
112
112
  "resget makeget makeichget makeichualichual#choolchool accngngngng",
113
113
  "resget makeget makeichget makeichualichual#choolchool accngngngng",
114
114
  ]
@@ -247,6 +247,7 @@ class TestWhisperModel(BaseTest.TestModel):
247
247
  data,
248
248
  generate_kwargs={
249
249
  "repetition_penalty": 1.3,
250
+ "num_beams": 1,
250
251
  },
251
252
  batch_size=2,
252
253
  )