optimum-rbln 0.2.1a1__tar.gz → 0.2.1a3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/workflows/rbln_trigger_on_pr.yaml +1 -1
  2. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/PKG-INFO +2 -2
  3. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/speech-recognition/run_whisper.py +1 -1
  4. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/pyproject.toml +1 -1
  5. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/__init__.py +2 -0
  6. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/__version__.py +1 -1
  7. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +2 -2
  8. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/models/autoencoders/vae.py +2 -2
  9. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/models/controlnet.py +2 -2
  10. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py +2 -2
  11. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py +2 -2
  12. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +2 -2
  13. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +3 -2
  14. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/modeling.py +2 -2
  15. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/modeling_base.py +35 -15
  16. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/ops/attn.py +4 -4
  17. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/__init__.py +2 -0
  18. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/__init__.py +2 -2
  19. optimum_rbln-0.2.1a3/src/optimum/rbln/transformers/models/bert/__init__.py +15 -0
  20. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/bert/modeling_bert.py +7 -3
  21. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/clip/modeling_clip.py +2 -2
  22. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +6 -2
  23. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/dpt/modeling_dpt.py +2 -2
  24. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +2 -2
  25. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +2 -2
  26. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +15 -32
  27. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/t5/t5_architecture.py +29 -6
  28. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +2 -2
  29. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/whisper/generation_whisper.py +19 -17
  30. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py +2 -2
  31. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +2 -2
  32. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/utils/save_utils.py +3 -2
  33. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/tests/test_base.py +0 -1
  34. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/tests/test_llm.py +12 -12
  35. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/tests/test_transformers.py +116 -5
  36. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/uv.lock +17 -49
  37. optimum_rbln-0.2.1a1/src/optimum/rbln/transformers/models/bert/__init__.py +0 -15
  38. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  39. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  40. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  41. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/ISSUE_TEMPLATE/model_request.md +0 -0
  42. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/pull_request_template.md +0 -0
  43. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/scripts/auto_code_review.py +0 -0
  44. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/scripts/validate_pr_checklist.py +0 -0
  45. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/workflows/auto_code_review.yml +0 -0
  46. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/workflows/check_code_quality.yml +0 -0
  47. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/workflows/deploy-on-tag.yaml +0 -0
  48. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/workflows/deploy.yaml +0 -0
  49. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/workflows/pr-title-check.yaml +0 -0
  50. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/workflows/pr_checklist_validator.yml +0 -0
  51. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/workflows/rbln_dispatch_pytest.yaml +0 -0
  52. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/workflows/rbln_optimum_inference_test.yaml +0 -0
  53. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.github/workflows/rbln_optimum_pytest.yaml +0 -0
  54. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/.gitignore +0 -0
  55. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/CODE_OF_CONDUCT.md +0 -0
  56. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/CONTRIBUTING.md +0 -0
  57. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/LICENSE +0 -0
  58. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/README.md +0 -0
  59. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/assets/rbln_logo.png +0 -0
  60. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/audio-classification/run_ast_audio_classification.py +0 -0
  61. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/depth-estimation/run_dpt.py +0 -0
  62. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/image-classification/run_image_classification.py +0 -0
  63. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/image-classification/run_vit_image_classification.py +0 -0
  64. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/image-to-text/run_llava_next_image_to_text.py +0 -0
  65. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/question-answering/run_question_answering.py +0 -0
  66. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/speech-recognition/run_wav2vec2.py +0 -0
  67. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/stable-diffusion/run_stable_diffusion.py +0 -0
  68. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/stable-diffusion/run_stable_diffusion_controlnet.py +0 -0
  69. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/stable-diffusion/run_stable_diffusion_img2img.py +0 -0
  70. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/stable-diffusion/run_stable_diffusion_img2img_controlnet.py +0 -0
  71. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/stable-diffusion/run_stable_diffusion_inpaint.py +0 -0
  72. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/stable-diffusion/run_stable_diffusion_lora.py +0 -0
  73. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/stable-diffusion/run_stable_diffusion_multicontrolnet.py +0 -0
  74. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/text-classification/run_bge_m3_text_classification.py +0 -0
  75. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/text-classification/run_bge_reranker_v2_m3_text_classification.py +0 -0
  76. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/text-classification/run_secureBERT.py +0 -0
  77. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/text-classification/run_t5_classification.py +0 -0
  78. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/text-classification/run_twitter_roberta_text_classification.py +0 -0
  79. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/text2text-generation/run_bart_text2text_generation.py +0 -0
  80. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/text2text-generation/run_llama_peft.py +0 -0
  81. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/examples/text2text-generation/run_llama_text2text_generation.py +0 -0
  82. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/scripts/uv-lock.sh +0 -0
  83. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/scripts/uv-sync.sh +0 -0
  84. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/__init__.py +0 -0
  85. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/modeling_diffusers.py +0 -0
  86. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/models/__init__.py +0 -0
  87. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/models/autoencoders/__init__.py +0 -0
  88. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/models/transformers/__init__.py +0 -0
  89. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/models/unets/__init__.py +0 -0
  90. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/__init__.py +0 -0
  91. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/controlnet/__init__.py +0 -0
  92. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +0 -0
  93. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +0 -0
  94. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +0 -0
  95. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +0 -0
  96. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +0 -0
  97. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +0 -0
  98. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +0 -0
  99. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +0 -0
  100. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +0 -0
  101. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +0 -0
  102. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +0 -0
  103. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +0 -0
  104. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +0 -0
  105. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +0 -0
  106. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +0 -0
  107. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/modeling_config.py +0 -0
  108. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/ops/__init__.py +0 -0
  109. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/ops/flash_attn.py +0 -0
  110. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/ops/kv_cache_update.py +0 -0
  111. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/modeling_alias.py +0 -0
  112. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/modeling_generic.py +0 -0
  113. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/modeling_rope_utils.py +0 -0
  114. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/auto/__init__.py +0 -0
  115. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/auto/auto_factory.py +0 -0
  116. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/auto/modeling_auto.py +0 -0
  117. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/bart/__init__.py +0 -0
  118. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/bart/bart_architecture.py +0 -0
  119. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/bart/modeling_bart.py +0 -0
  120. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/clip/__init__.py +0 -0
  121. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/decoderonly/__init__.py +0 -0
  122. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +0 -0
  123. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/dpt/__init__.py +0 -0
  124. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/exaone/__init__.py +0 -0
  125. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -0
  126. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/exaone/modeling_exaone.py +0 -0
  127. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/gemma/__init__.py +0 -0
  128. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/gemma/gemma_architecture.py +0 -0
  129. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/gemma/modeling_gemma.py +0 -0
  130. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/gpt2/__init__.py +0 -0
  131. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +0 -0
  132. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +0 -0
  133. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/llama/__init__.py +0 -0
  134. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/llama/llama_architecture.py +0 -0
  135. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/llama/modeling_llama.py +0 -0
  136. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/llava_next/__init__.py +0 -0
  137. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/midm/__init__.py +0 -0
  138. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/midm/midm_architecture.py +0 -0
  139. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/midm/modeling_midm.py +0 -0
  140. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/mistral/__init__.py +0 -0
  141. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/mistral/mistral_architecture.py +0 -0
  142. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/mistral/modeling_mistral.py +0 -0
  143. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/phi/__init__.py +0 -0
  144. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/phi/modeling_phi.py +0 -0
  145. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/phi/phi_architecture.py +0 -0
  146. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/qwen2/__init__.py +0 -0
  147. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +0 -0
  148. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +0 -0
  149. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/seq2seq/__init__.py +0 -0
  150. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/t5/__init__.py +0 -0
  151. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/t5/modeling_t5.py +0 -0
  152. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/wav2vec2/__init__.py +0 -0
  153. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/whisper/__init__.py +0 -0
  154. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -0
  155. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/models/xlm_roberta/__init__.py +0 -0
  156. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/utils/__init__.py +0 -0
  157. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/transformers/utils/rbln_quantization.py +0 -0
  158. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/utils/__init__.py +0 -0
  159. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/utils/decorator_utils.py +0 -0
  160. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/utils/hub.py +0 -0
  161. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/utils/import_utils.py +0 -0
  162. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/utils/logging.py +0 -0
  163. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/utils/model_utils.py +0 -0
  164. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/utils/runtime_utils.py +0 -0
  165. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/src/optimum/rbln/utils/submodule.py +0 -0
  166. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/tests/__init__.py +0 -0
  167. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/tests/psnr.py +0 -0
  168. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/tests/requirements_sdxl.txt +0 -0
  169. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/tests/run_stable_diffusion_xl_base.py +0 -0
  170. {optimum_rbln-0.2.1a1 → optimum_rbln-0.2.1a3}/tests/test_diffusers.py +0 -0
@@ -9,7 +9,7 @@ env:
9
9
  REBEL_PYPI_ENDPOINT: ${{ vars.REBEL_PYPI_INTERNAL_ENDPOINT }}
10
10
  REBEL_PYPI_USERNAME: ${{ secrets.REBEL_PYPI_USERNAME }}
11
11
  REBEL_PYPI_PASSWORD: ${{ secrets.REBEL_PYPI_PASSWORD }}
12
- REBEL_COMPILER_VERSION: 0.7.1.dev63+ge6c4fb68
12
+ REBEL_COMPILER_VERSION: 0.7.2.dev151+g19f099fd
13
13
 
14
14
  jobs:
15
15
  check-rebel-compiler-version:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.2.1a1
3
+ Version: 0.2.1a3
4
4
  Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -28,7 +28,7 @@ Requires-Dist: packaging>=24.1
28
28
  Requires-Dist: torch<=2.5.1
29
29
  Requires-Dist: torchaudio<=2.5.1
30
30
  Requires-Dist: torchvision<=0.20.1
31
- Requires-Dist: transformers==4.45.2
31
+ Requires-Dist: transformers==4.48.3
32
32
  Description-Content-Type: text/markdown
33
33
 
34
34
 
@@ -104,7 +104,7 @@ def main(
104
104
  **gen_kwargs,
105
105
  )
106
106
 
107
- generated_ids = outputs
107
+ generated_ids = outputs.get("sequences") if isinstance(outputs, dict) else outputs
108
108
  transcriptions = processor.batch_decode(generated_ids, skip_special_tokens=True, decode_with_timestamps=True)
109
109
 
110
110
  print("---RBLN Longform Generate Result ---")
@@ -32,7 +32,7 @@ dependencies = [
32
32
  "torchaudio<=2.5.1",
33
33
  "torchvision<=0.20.1",
34
34
  "accelerate>=1.0.1",
35
- "transformers==4.45.2",
35
+ "transformers==4.48.3",
36
36
  "diffusers<=0.31.0",
37
37
  "packaging>=24.1",
38
38
  ]
@@ -71,6 +71,7 @@ _import_structure = {
71
71
  "RBLNRobertaForSequenceClassification",
72
72
  "RBLNRobertaForMaskedLM",
73
73
  "RBLNViTForImageClassification",
74
+ "RBLNBertForMaskedLM",
74
75
  ],
75
76
  "diffusers": [
76
77
  "RBLNStableDiffusionPipeline",
@@ -141,6 +142,7 @@ if TYPE_CHECKING:
141
142
  RBLNAutoModelForVision2Seq,
142
143
  RBLNBartForConditionalGeneration,
143
144
  RBLNBartModel,
145
+ RBLNBertForMaskedLM,
144
146
  RBLNBertForQuestionAnswering,
145
147
  RBLNBertModel,
146
148
  RBLNCLIPTextModel,
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.2.1a1'
15
+ __version__ = version = '0.2.1a3'
16
16
  __version_tuple__ = version_tuple = (0, 2, 1)
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
17
16
 
18
17
  import rebel
@@ -23,6 +22,7 @@ from transformers import PretrainedConfig
23
22
 
24
23
  from ....modeling import RBLNModel
25
24
  from ....modeling_config import DEFAULT_COMPILED_MODEL_NAME, RBLNCompileConfig, RBLNConfig
25
+ from ....utils.logging import get_logger
26
26
  from ...modeling_diffusers import RBLNDiffusionMixin
27
27
  from .vae import RBLNRuntimeVAEDecoder, RBLNRuntimeVAEEncoder, _VAEDecoder, _VAEEncoder
28
28
 
@@ -31,7 +31,7 @@ if TYPE_CHECKING:
31
31
  import torch
32
32
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PretrainedConfig
33
33
 
34
- logger = logging.getLogger(__name__)
34
+ logger = get_logger(__name__)
35
35
 
36
36
 
37
37
  class RBLNAutoencoderKL(RBLNModel):
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from typing import TYPE_CHECKING
17
16
 
18
17
  import torch # noqa: I001
@@ -20,13 +19,14 @@ from diffusers import AutoencoderKL
20
19
  from diffusers.models.autoencoders.vae import DiagonalGaussianDistribution
21
20
  from diffusers.models.modeling_outputs import AutoencoderKLOutput
22
21
 
22
+ from ....utils.logging import get_logger
23
23
  from ....utils.runtime_utils import RBLNPytorchRuntime
24
24
 
25
25
 
26
26
  if TYPE_CHECKING:
27
27
  import torch
28
28
 
29
- logger = logging.getLogger(__name__)
29
+ logger = get_logger(__name__)
30
30
 
31
31
 
32
32
  class RBLNRuntimeVAEEncoder(RBLNPytorchRuntime):
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import importlib
16
- import logging
17
16
  from typing import TYPE_CHECKING, Any, Dict, Optional, Union
18
17
 
19
18
  import torch
@@ -22,6 +21,7 @@ from transformers import PretrainedConfig
22
21
 
23
22
  from ...modeling import RBLNModel
24
23
  from ...modeling_config import RBLNCompileConfig, RBLNConfig
24
+ from ...utils.logging import get_logger
25
25
  from ..modeling_diffusers import RBLNDiffusionMixin
26
26
 
27
27
 
@@ -29,7 +29,7 @@ if TYPE_CHECKING:
29
29
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
30
30
 
31
31
 
32
- logger = logging.getLogger(__name__)
32
+ logger = get_logger(__name__)
33
33
 
34
34
 
35
35
  class _ControlNetModel(torch.nn.Module):
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
17
16
 
18
17
  import torch
@@ -22,13 +21,14 @@ from transformers import PretrainedConfig
22
21
 
23
22
  from ....modeling import RBLNModel
24
23
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
24
+ from ....utils.logging import get_logger
25
25
  from ...modeling_diffusers import RBLNDiffusionMixin
26
26
 
27
27
 
28
28
  if TYPE_CHECKING:
29
29
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
30
30
 
31
- logger = logging.getLogger(__name__)
31
+ logger = get_logger(__name__)
32
32
 
33
33
 
34
34
  class SD3Transformer2DModelWrapper(torch.nn.Module):
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from dataclasses import dataclass
17
16
  from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
18
17
 
@@ -22,13 +21,14 @@ from transformers import PretrainedConfig
22
21
 
23
22
  from ....modeling import RBLNModel
24
23
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
24
+ from ....utils.logging import get_logger
25
25
  from ...modeling_diffusers import RBLNDiffusionMixin
26
26
 
27
27
 
28
28
  if TYPE_CHECKING:
29
29
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
30
30
 
31
- logger = logging.getLogger(__name__)
31
+ logger = get_logger(__name__)
32
32
 
33
33
 
34
34
  class _UNet_SD(torch.nn.Module):
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  import os
17
16
  from pathlib import Path
18
17
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
@@ -21,13 +20,14 @@ import torch
21
20
  from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
22
21
 
23
22
  from ....modeling import RBLNModel
23
+ from ....utils.logging import get_logger
24
24
  from ...models.controlnet import RBLNControlNetModel
25
25
 
26
26
 
27
27
  if TYPE_CHECKING:
28
28
  pass
29
29
 
30
- logger = logging.getLogger(__name__)
30
+ logger = get_logger(__name__)
31
31
 
32
32
 
33
33
  class RBLNMultiControlNetModel(RBLNModel):
@@ -34,16 +34,17 @@ from diffusers import StableDiffusionControlNetPipeline
34
34
  from diffusers.image_processor import PipelineImageInput
35
35
  from diffusers.pipelines.controlnet.pipeline_controlnet import retrieve_timesteps
36
36
  from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
37
- from diffusers.utils import deprecate, logging
37
+ from diffusers.utils import deprecate
38
38
  from diffusers.utils.torch_utils import is_compiled_module, is_torch_version
39
39
 
40
40
  from ....utils.decorator_utils import remove_compile_time_kwargs
41
+ from ....utils.logging import get_logger
41
42
  from ...modeling_diffusers import RBLNDiffusionMixin
42
43
  from ...models import RBLNControlNetModel
43
44
  from ...pipelines.controlnet.multicontrolnet import RBLNMultiControlNetModel
44
45
 
45
46
 
46
- logger = logging.get_logger(__name__)
47
+ logger = get_logger(__name__)
47
48
 
48
49
 
49
50
  class RBLNStableDiffusionControlNetPipeline(RBLNDiffusionMixin, StableDiffusionControlNetPipeline):
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from pathlib import Path
17
16
  from tempfile import TemporaryDirectory
18
17
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
@@ -24,13 +23,14 @@ from transformers import AutoConfig, PretrainedConfig
24
23
 
25
24
  from .modeling_base import RBLNBaseModel
26
25
  from .modeling_config import DEFAULT_COMPILED_MODEL_NAME, RBLNConfig, use_rbln_config
26
+ from .utils.logging import get_logger
27
27
 
28
28
 
29
29
  if TYPE_CHECKING:
30
30
  from transformers import PreTrainedModel
31
31
 
32
32
 
33
- logger = logging.getLogger(__name__)
33
+ logger = get_logger(__name__)
34
34
 
35
35
 
36
36
  class RBLNModel(RBLNBaseModel):
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import importlib
16
- import logging
17
16
  import os
18
17
  import shutil
19
18
  from abc import ABC, abstractmethod
@@ -32,6 +31,7 @@ from transformers import (
32
31
 
33
32
  from .modeling_config import RBLNCompileConfig, RBLNConfig, use_rbln_config
34
33
  from .utils.hub import PushToHubMixin, pull_compiled_model_from_hub, validate_files
34
+ from .utils.logging import get_logger
35
35
  from .utils.runtime_utils import UnavailableRuntime
36
36
  from .utils.save_utils import maybe_load_preprocessors
37
37
  from .utils.submodule import SubModulesMixin
@@ -40,7 +40,7 @@ from .utils.submodule import SubModulesMixin
40
40
  if TYPE_CHECKING:
41
41
  from transformers import PreTrainedModel
42
42
 
43
- logger = logging.getLogger(__name__)
43
+ logger = get_logger(__name__)
44
44
 
45
45
 
46
46
  class PreTrainedModel(ABC): # noqa: F811
@@ -442,27 +442,47 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
442
442
  logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
443
443
  return
444
444
 
445
- os.makedirs(save_directory, exist_ok=True)
446
-
447
445
  real_save_dir = self.model_save_dir / self.subfolder
448
446
  save_directory_path = Path(save_directory)
449
- if os.path.exists(real_save_dir) and os.path.isdir(real_save_dir):
450
- if save_directory_path.absolute() == real_save_dir.absolute():
451
- raise FileExistsError(
452
- f"Cannot save model to '{save_directory}'. "
453
- f"This directory already exists and contains the model files."
454
- )
455
- shutil.copytree(real_save_dir, save_directory, dirs_exist_ok=True)
456
- self.config.save_pretrained(save_directory)
457
- if self.generation_config is not None:
458
- self.generation_config.save_pretrained(save_directory)
459
- else:
447
+
448
+ if not os.path.exists(real_save_dir) or not os.path.isdir(real_save_dir):
460
449
  raise FileNotFoundError(
461
450
  f"Unable to save the model. The model directory '{real_save_dir}' does not exist or is not accessible. "
462
451
  f"Cannot save to the specified destination '{save_directory}'. "
463
452
  f"Please ensure the model directory exists and you have the necessary permissions to access it."
464
453
  )
465
454
 
455
+ if save_directory_path.absolute() == real_save_dir.absolute():
456
+ raise FileExistsError(
457
+ f"Cannot save model to '{save_directory}'. This directory already exists and contains the model files."
458
+ )
459
+
460
+ # Create a temporary directory next to the target directory
461
+ tmp_dir = save_directory + ".tmp"
462
+ try:
463
+ # Remove temporary directory if it exists from a previous failed attempt
464
+ if os.path.exists(tmp_dir):
465
+ shutil.rmtree(tmp_dir)
466
+
467
+ # First copy everything to a temporary directory
468
+ shutil.copytree(real_save_dir, tmp_dir)
469
+
470
+ # Save configs to the temporary directory
471
+ self.config.save_pretrained(tmp_dir)
472
+ if self.generation_config is not None:
473
+ self.generation_config.save_pretrained(tmp_dir)
474
+
475
+ # If everything succeeded, atomically replace the target directory
476
+ if os.path.exists(save_directory):
477
+ shutil.rmtree(save_directory)
478
+ os.rename(tmp_dir, save_directory)
479
+
480
+ except Exception as e:
481
+ # Clean up the temporary directory if anything fails
482
+ if os.path.exists(tmp_dir):
483
+ shutil.rmtree(tmp_dir)
484
+ raise e # Re-raise the exception after cleanup
485
+
466
486
  if push_to_hub:
467
487
  return super().push_to_hub(save_directory, **kwargs)
468
488
 
@@ -152,16 +152,16 @@ def register_rbln_custom_attention_add_softmax():
152
152
  """
153
153
  return (
154
154
  q,
155
- torch.empty(1, *kcache.shape[1:], device=kcache.device),
156
- torch.empty(1, *vcache.shape[1:], device=vcache.device),
155
+ torch.empty(*kcache.shape, device=kcache.device),
156
+ torch.empty(*vcache.shape, device=vcache.device),
157
157
  )
158
158
 
159
159
  @register_fake("rbln_custom_ops::attn_decode_add_softmax")
160
160
  def attn_decode_add_softmax_abstract(q, k, v, m, kcache, vcache, seq, partition):
161
161
  return (
162
162
  q,
163
- torch.empty(1, *kcache.shape[1:], device=kcache.device),
164
- torch.empty(1, *vcache.shape[1:], device=vcache.device),
163
+ torch.empty(*kcache.shape, device=kcache.device),
164
+ torch.empty(*vcache.shape, device=vcache.device),
165
165
  )
166
166
 
167
167
  torch.library.define(
@@ -35,6 +35,7 @@ _import_structure = {
35
35
  "RBLNBartForConditionalGeneration",
36
36
  "RBLNBartModel",
37
37
  "RBLNBertModel",
38
+ "RBLNBertForMaskedLM",
38
39
  "RBLNBertForQuestionAnswering",
39
40
  "RBLNCLIPTextModel",
40
41
  "RBLNCLIPTextModelWithProjection",
@@ -92,6 +93,7 @@ if TYPE_CHECKING:
92
93
  RBLNAutoModelForVision2Seq,
93
94
  RBLNBartForConditionalGeneration,
94
95
  RBLNBartModel,
96
+ RBLNBertForMaskedLM,
95
97
  RBLNBertForQuestionAnswering,
96
98
  RBLNBertModel,
97
99
  RBLNCLIPTextModel,
@@ -33,7 +33,7 @@ _import_structure = {
33
33
  "RBLNAutoModelForVision2Seq",
34
34
  ],
35
35
  "bart": ["RBLNBartForConditionalGeneration", "RBLNBartModel"],
36
- "bert": ["RBLNBertModel", "RBLNBertForQuestionAnswering"],
36
+ "bert": ["RBLNBertModel", "RBLNBertForQuestionAnswering", "RBLNBertForMaskedLM"],
37
37
  "clip": ["RBLNCLIPTextModel", "RBLNCLIPTextModelWithProjection", "RBLNCLIPVisionModel"],
38
38
  "dpt": ["RBLNDPTForDepthEstimation"],
39
39
  "exaone": ["RBLNExaoneForCausalLM"],
@@ -67,7 +67,7 @@ if TYPE_CHECKING:
67
67
  RBLNAutoModelForVision2Seq,
68
68
  )
69
69
  from .bart import RBLNBartForConditionalGeneration, RBLNBartModel
70
- from .bert import RBLNBertForQuestionAnswering, RBLNBertModel
70
+ from .bert import RBLNBertForMaskedLM, RBLNBertForQuestionAnswering, RBLNBertModel
71
71
  from .clip import RBLNCLIPTextModel, RBLNCLIPTextModelWithProjection, RBLNCLIPVisionModel
72
72
  from .dpt import RBLNDPTForDepthEstimation
73
73
  from .exaone import RBLNExaoneForCausalLM
@@ -0,0 +1,15 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .modeling_bert import RBLNBertForMaskedLM, RBLNBertForQuestionAnswering, RBLNBertModel
@@ -13,17 +13,17 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import inspect
16
- import logging
17
16
  from typing import TYPE_CHECKING, Any, Dict, Optional, Union
18
17
 
19
18
  from transformers import PretrainedConfig
20
19
 
21
20
  from ....modeling import RBLNModel
22
21
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
23
- from ...modeling_generic import RBLNModelForQuestionAnswering
22
+ from ....utils.logging import get_logger
23
+ from ...modeling_generic import RBLNModelForMaskedLM, RBLNModelForQuestionAnswering
24
24
 
25
25
 
26
- logger = logging.getLogger(__name__)
26
+ logger = get_logger(__name__)
27
27
 
28
28
  if TYPE_CHECKING:
29
29
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
@@ -100,5 +100,9 @@ class RBLNBertModel(RBLNModel):
100
100
  return rbln_config
101
101
 
102
102
 
103
+ class RBLNBertForMaskedLM(RBLNModelForMaskedLM):
104
+ rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
105
+
106
+
103
107
  class RBLNBertForQuestionAnswering(RBLNModelForQuestionAnswering):
104
108
  rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
17
16
 
18
17
  import torch
@@ -28,9 +27,10 @@ from transformers.models.clip.modeling_clip import CLIPTextModelOutput
28
27
  from ....diffusers.modeling_diffusers import RBLNDiffusionMixin
29
28
  from ....modeling import RBLNModel
30
29
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
30
+ from ....utils.logging import get_logger
31
31
 
32
32
 
33
- logger = logging.getLogger(__name__)
33
+ logger = get_logger(__name__)
34
34
 
35
35
  if TYPE_CHECKING:
36
36
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, CLIPTextModel
@@ -544,15 +544,19 @@ class DecoderOnlyAttention(nn.Module):
544
544
  super().__init__()
545
545
  self._original_mod = self_attn
546
546
  self.layer_idx = self_attn.layer_idx
547
- self.num_heads = self._original_mod.num_heads
547
+ self.num_heads = getattr(self._original_mod, "num_heads", None) or getattr(
548
+ self._original_mod.config, "num_attention_heads"
549
+ )
548
550
  self.head_dim = self._original_mod.head_dim
549
551
  self._phase = "prefill"
550
552
  self.scale = torch.tensor(self.get_attn_scale())
551
553
 
552
554
  if hasattr(self._original_mod, "num_key_value_heads"):
553
555
  self.num_key_value_heads = self._original_mod.num_key_value_heads
556
+ elif hasattr(self._original_mod, "config") and hasattr(self._original_mod.config, "num_key_value_heads"):
557
+ self.num_key_value_heads = self._original_mod.config.num_key_value_heads
554
558
  else:
555
- self.num_key_value_heads = self._original_mod.num_heads
559
+ self.num_key_value_heads = self.num_heads
556
560
 
557
561
  self.attention = self.get_attention()
558
562
  self.__post_init__()
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
16
15
  from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Union
17
16
 
18
17
  from transformers import AutoModelForDepthEstimation
@@ -20,9 +19,10 @@ from transformers.modeling_outputs import DepthEstimatorOutput
20
19
 
21
20
  from ....modeling import RBLNModel
22
21
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
22
+ from ....utils.logging import get_logger
23
23
 
24
24
 
25
- logger = logging.getLogger(__name__)
25
+ logger = get_logger(__name__)
26
26
 
27
27
  if TYPE_CHECKING:
28
28
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PretrainedConfig
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import inspect
16
- import logging
17
16
  from pathlib import Path
18
17
  from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Union
19
18
 
@@ -30,10 +29,11 @@ from transformers.models.llava_next.modeling_llava_next import LlavaNextCausalLM
30
29
 
31
30
  from ....modeling import RBLNModel
32
31
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
32
+ from ....utils.logging import get_logger
33
33
  from ..decoderonly.modeling_decoderonly import RBLNDecoderOnlyOutput
34
34
 
35
35
 
36
- logger = logging.getLogger(__name__)
36
+ logger = get_logger(__name__)
37
37
 
38
38
  if TYPE_CHECKING:
39
39
  from transformers import (
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import inspect
16
- import logging
17
16
  from abc import ABC
18
17
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
19
18
 
@@ -25,10 +24,11 @@ from transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput
25
24
 
26
25
  from ....modeling import RBLNModel
27
26
  from ....modeling_config import RBLNCompileConfig, RBLNConfig
27
+ from ....utils.logging import get_logger
28
28
  from ....utils.runtime_utils import RBLNPytorchRuntime
29
29
 
30
30
 
31
- logger = logging.getLogger(__name__)
31
+ logger = get_logger(__name__)
32
32
 
33
33
  if TYPE_CHECKING:
34
34
  from transformers import (
@@ -420,7 +420,7 @@ class Seq2SeqSelfAttention(nn.Module):
420
420
  pass
421
421
 
422
422
  def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int) -> torch.Tensor:
423
- return tensor.view(bsz, 1, seq_len, 1, self.num_heads, self.head_dim).transpose(2, 4)
423
+ return tensor.view(bsz, seq_len, 1, self.num_heads, self.head_dim).transpose(1, 3)
424
424
 
425
425
  def projection(self, hidden_states) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
426
426
  """Projects input hidden states into query, key, and value representations.
@@ -450,38 +450,21 @@ class Seq2SeqSelfAttention(nn.Module):
450
450
  key_states = self._shape(key_states, -1, bsz)
451
451
  value_states = self._shape(value_states, -1, bsz)
452
452
 
453
- all_key_states = []
454
- all_value_states = []
455
- all_attn_output = []
456
- for b_idx in range(bsz):
457
- query_state = query_states[b_idx]
458
- key_state = key_states[b_idx]
459
- value_state = value_states[b_idx]
460
- attn_mask = attention_mask[b_idx].unsqueeze(0).unsqueeze(2)
461
- past_key_state = past_key_value[0].view(bsz, self.num_heads, 1, -1, self.head_dim)
462
- past_value_state = past_key_value[1].view(bsz, self.num_heads, 1, -1, self.head_dim)
463
-
464
- attn_output, key_state, value_state = self.attn_decode(
465
- query_state,
466
- key_state,
467
- value_state,
468
- attn_mask,
469
- past_key_state,
470
- past_value_state,
471
- cache_position[b_idx][0],
472
- torch.tensor(1.0, dtype=torch.float32), # scale
473
- )
474
-
475
- attn_output = attn_output.view(1, self.num_heads, -1, self.head_dim).transpose(1, 2)
476
- attn_output = attn_output.reshape(1, -1, self.num_heads * self.head_dim)
477
-
478
- all_key_states.append(key_state.squeeze(2))
479
- all_value_states.append(value_state.squeeze(2))
480
- all_attn_output.append(attn_output)
453
+ attn_output, key_states, value_states = self.attn_decode(
454
+ query_states,
455
+ key_states,
456
+ value_states,
457
+ attention_mask.unsqueeze(
458
+ 2
459
+ ), # Unsqueeze group axis since CustomKernel expects it for group query attention
460
+ past_key_value[0].view(bsz, self.num_heads, 1, -1, self.head_dim),
461
+ past_key_value[1].view(bsz, self.num_heads, 1, -1, self.head_dim),
462
+ cache_position.squeeze(1),
463
+ torch.tensor(1.0, dtype=torch.float32), # scale
464
+ )
481
465
 
482
- key_states = torch.cat(all_key_states, dim=0)
483
- value_states = torch.cat(all_value_states, dim=0)
484
- attn_output = torch.cat(all_attn_output, dim=0)
466
+ attn_output = attn_output.view(bsz, self.num_heads, -1, self.head_dim).transpose(1, 2)
467
+ attn_output = attn_output.reshape(bsz, -1, self.num_heads * self.head_dim)
485
468
 
486
469
  attn_output = self.out_proj(attn_output)
487
470
  present_key_value = (key_states, value_states)