optimum-rbln 0.7.4a1__tar.gz → 0.7.4a2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/scripts/auto_code_review.py +4 -22
  2. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/workflows/auto_code_review.yml +1 -1
  3. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/workflows/check_code_quality.yml +1 -1
  4. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/workflows/pr-title-check.yaml +1 -1
  5. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/workflows/pr_checklist_validator.yml +1 -1
  6. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/PKG-INFO +1 -1
  7. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/__version__.py +1 -1
  8. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +39 -20
  9. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/t5/modeling_t5.py +37 -3
  10. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/whisper/whisper_architecture.py +2 -2
  11. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  12. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  13. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  14. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/ISSUE_TEMPLATE/model_request.md +0 -0
  15. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/pull_request_template.md +0 -0
  16. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/scripts/validate_pr_checklist.py +0 -0
  17. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/version.yaml +0 -0
  18. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/workflows/deploy-on-tag.yaml +0 -0
  19. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/workflows/deploy.yaml +0 -0
  20. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/workflows/rbln_check_compiler.yaml +0 -0
  21. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/workflows/rbln_dispatch_pytest.yaml +0 -0
  22. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/workflows/rbln_optimum_inference_test.yaml +0 -0
  23. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/workflows/rbln_optimum_pytest.yaml +0 -0
  24. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/workflows/rbln_scheduled_test.yaml +0 -0
  25. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.github/workflows/rbln_trigger_on_pr.yaml +0 -0
  26. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/.gitignore +0 -0
  27. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/CODE_OF_CONDUCT.md +0 -0
  28. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/CONTRIBUTING.md +0 -0
  29. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/LICENSE +0 -0
  30. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/README.md +0 -0
  31. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/assets/rbln_logo.png +0 -0
  32. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/audio-classification/run_ast_audio_classification.py +0 -0
  33. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/depth-estimation/run_dpt.py +0 -0
  34. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/image-classification/run_image_classification.py +0 -0
  35. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/image-classification/run_vit_image_classification.py +0 -0
  36. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/image-to-text/run_llava_next_image_to_text.py +0 -0
  37. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/kandinsky2_2/run_kandinsky2_2.py +0 -0
  38. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/kandinsky2_2/run_kandinsky2_2_combined.py +0 -0
  39. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/kandinsky2_2/run_kandinsky2_2_img2img.py +0 -0
  40. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/kandinsky2_2/run_kandinsky2_2_img2img_combined.py +0 -0
  41. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/kandinsky2_2/run_kandinsky2_2_inpaint.py +0 -0
  42. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/kandinsky2_2/run_kandinsky2_2_inpaint_combined.py +0 -0
  43. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/kandinsky2_2/run_kandinsky2_2_prior_interpolate.py +0 -0
  44. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/question-answering/run_question_answering.py +0 -0
  45. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/speech-recognition/run_wav2vec2.py +0 -0
  46. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/speech-recognition/run_whisper.py +0 -0
  47. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/stable-diffusion/run_stable_diffusion.py +0 -0
  48. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/stable-diffusion/run_stable_diffusion_controlnet.py +0 -0
  49. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/stable-diffusion/run_stable_diffusion_img2img.py +0 -0
  50. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/stable-diffusion/run_stable_diffusion_img2img_controlnet.py +0 -0
  51. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/stable-diffusion/run_stable_diffusion_inpaint.py +0 -0
  52. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/stable-diffusion/run_stable_diffusion_lora.py +0 -0
  53. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/stable-diffusion/run_stable_diffusion_multicontrolnet.py +0 -0
  54. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/text-classification/run_bge_m3_text_classification.py +0 -0
  55. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/text-classification/run_bge_reranker_v2_m3_text_classification.py +0 -0
  56. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/text-classification/run_secureBERT.py +0 -0
  57. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/text-classification/run_t5_classification.py +0 -0
  58. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/text-classification/run_twitter_roberta_text_classification.py +0 -0
  59. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/text2text-generation/run_bart_text2text_generation.py +0 -0
  60. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/text2text-generation/run_llama_peft.py +0 -0
  61. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/text2text-generation/run_llama_text2text_generation.py +0 -0
  62. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/examples/time-series-forecasting/run_time_series_forecasting.py +0 -0
  63. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/pyproject.toml +0 -0
  64. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/scripts/uv-lock.sh +0 -0
  65. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/scripts/uv-sync.sh +0 -0
  66. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/__init__.py +0 -0
  67. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/__init__.py +0 -0
  68. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/modeling_diffusers.py +0 -0
  69. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/models/__init__.py +0 -0
  70. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/models/autoencoders/__init__.py +0 -0
  71. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +0 -0
  72. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/models/autoencoders/vae.py +0 -0
  73. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/models/autoencoders/vq_model.py +0 -0
  74. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/models/controlnet.py +0 -0
  75. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/models/transformers/__init__.py +0 -0
  76. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/models/transformers/prior_transformer.py +0 -0
  77. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py +0 -0
  78. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/models/unets/__init__.py +0 -0
  79. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py +0 -0
  80. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/__init__.py +0 -0
  81. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/controlnet/__init__.py +0 -0
  82. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +0 -0
  83. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +0 -0
  84. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +0 -0
  85. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +0 -0
  86. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +0 -0
  87. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +0 -0
  88. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +0 -0
  89. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +0 -0
  90. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +0 -0
  91. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +0 -0
  92. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +0 -0
  93. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +0 -0
  94. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +0 -0
  95. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +0 -0
  96. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +0 -0
  97. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +0 -0
  98. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +0 -0
  99. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +0 -0
  100. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +0 -0
  101. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +0 -0
  102. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +0 -0
  103. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +0 -0
  104. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +0 -0
  105. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/modeling.py +0 -0
  106. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/modeling_base.py +0 -0
  107. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/modeling_config.py +0 -0
  108. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/ops/__init__.py +0 -0
  109. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/ops/attn.py +0 -0
  110. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/ops/flash_attn.py +0 -0
  111. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/ops/kv_cache_update.py +0 -0
  112. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/ops/linear.py +0 -0
  113. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/__init__.py +0 -0
  114. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/modeling_alias.py +0 -0
  115. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/modeling_generic.py +0 -0
  116. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/modeling_rope_utils.py +0 -0
  117. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/__init__.py +0 -0
  118. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/auto/__init__.py +0 -0
  119. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/auto/auto_factory.py +0 -0
  120. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/auto/modeling_auto.py +0 -0
  121. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/bart/__init__.py +0 -0
  122. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/bart/bart_architecture.py +0 -0
  123. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/bart/modeling_bart.py +0 -0
  124. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/bert/__init__.py +0 -0
  125. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/bert/modeling_bert.py +0 -0
  126. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/clip/__init__.py +0 -0
  127. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/clip/modeling_clip.py +0 -0
  128. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/decoderonly/__init__.py +0 -0
  129. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +0 -0
  130. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/dpt/__init__.py +0 -0
  131. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/dpt/modeling_dpt.py +0 -0
  132. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/exaone/__init__.py +0 -0
  133. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -0
  134. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/exaone/modeling_exaone.py +0 -0
  135. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/gemma/__init__.py +0 -0
  136. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/gemma/gemma_architecture.py +0 -0
  137. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/gemma/modeling_gemma.py +0 -0
  138. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/gpt2/__init__.py +0 -0
  139. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +0 -0
  140. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +0 -0
  141. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/llama/__init__.py +0 -0
  142. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/llama/llama_architecture.py +0 -0
  143. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/llama/modeling_llama.py +0 -0
  144. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/llava_next/__init__.py +0 -0
  145. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +0 -0
  146. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/midm/__init__.py +0 -0
  147. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/midm/midm_architecture.py +0 -0
  148. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/midm/modeling_midm.py +0 -0
  149. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/mistral/__init__.py +0 -0
  150. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/mistral/mistral_architecture.py +0 -0
  151. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/mistral/modeling_mistral.py +0 -0
  152. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/phi/__init__.py +0 -0
  153. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/phi/modeling_phi.py +0 -0
  154. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/phi/phi_architecture.py +0 -0
  155. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/qwen2/__init__.py +0 -0
  156. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +0 -0
  157. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +0 -0
  158. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/seq2seq/__init__.py +0 -0
  159. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +0 -0
  160. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +0 -0
  161. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/t5/__init__.py +0 -0
  162. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/t5/t5_architecture.py +0 -0
  163. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/time_series_transformers/__init__.py +0 -0
  164. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/time_series_transformers/modeling_time_series_transformers.py +0 -0
  165. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/time_series_transformers/time_series_transformers_architecture.py +0 -0
  166. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/wav2vec2/__init__.py +0 -0
  167. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +0 -0
  168. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/whisper/__init__.py +0 -0
  169. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/whisper/generation_whisper.py +0 -0
  170. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py +0 -0
  171. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/xlm_roberta/__init__.py +0 -0
  172. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +0 -0
  173. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/utils/__init__.py +0 -0
  174. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/transformers/utils/rbln_quantization.py +0 -0
  175. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/utils/__init__.py +0 -0
  176. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/utils/decorator_utils.py +0 -0
  177. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/utils/hub.py +0 -0
  178. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/utils/import_utils.py +0 -0
  179. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/utils/logging.py +0 -0
  180. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/utils/model_utils.py +0 -0
  181. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/utils/runtime_utils.py +0 -0
  182. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/utils/save_utils.py +0 -0
  183. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/src/optimum/rbln/utils/submodule.py +0 -0
  184. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/tests/__init__.py +0 -0
  185. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/tests/psnr.py +0 -0
  186. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/tests/requirements_sdxl.txt +0 -0
  187. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/tests/run_stable_diffusion_xl_base.py +0 -0
  188. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/tests/test_base.py +0 -0
  189. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/tests/test_diffusers.py +0 -0
  190. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/tests/test_llm.py +0 -0
  191. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/tests/test_transformers.py +0 -0
  192. {optimum_rbln-0.7.4a1 → optimum_rbln-0.7.4a2}/uv.lock +0 -0
@@ -22,8 +22,7 @@ from github import Github
22
22
 
23
23
  model_name = os.environ["GOOGLE_MODEL_ID"]
24
24
  genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
25
- max_context_token = 100000
26
- force_review = False
25
+ max_context_token = 500000
27
26
 
28
27
 
29
28
  def get_pr_diff():
@@ -58,19 +57,6 @@ Review the following code changes(GIT DIFF) along with the pull request (PR) det
58
57
  return system_prompt, prompt
59
58
 
60
59
 
61
- def translate_review(review):
62
- model = genai.GenerativeModel(
63
- model_name,
64
- system_instruction="You are a professional translator specializing in technical and software-related content. Keep the technical words in English, but understand the whole sentence and rephrase it in Korean.",
65
- )
66
- prompt = f"""Translate the following English text into Korean, maintaining technical accuracy and clarity. Include ONLY the translation, NO OTHER EXPLANATIONS or RESPONSES as a chatbot. :
67
-
68
- {review}"""
69
- response = model.generate_content(prompt)
70
-
71
- return response.text
72
-
73
-
74
60
  def review_code(system_prompt, prompt):
75
61
  model = genai.GenerativeModel(model_name, system_instruction=system_prompt)
76
62
  response = model.generate_content(prompt)
@@ -125,7 +111,7 @@ def main():
125
111
  system_prompt, prompt = get_prompt(diff, pr)
126
112
  model = genai.GenerativeModel(model_name=model_name, system_instruction=system_prompt)
127
113
  num_tokens = model.count_tokens(prompt).total_tokens
128
- if num_tokens > max_context_token and not force_review:
114
+ if num_tokens > max_context_token:
129
115
  msg = f"Diff ({len(diff)}) exceeds maximum allowed tokens ({max_context_token}) > ({num_tokens}). Skipping review."
130
116
  print(msg)
131
117
  pr.create_issue_comment(msg)
@@ -133,14 +119,10 @@ def main():
133
119
 
134
120
  # Get Auto review
135
121
  review = review_code(system_prompt, prompt)
136
- translation = translate_review(review)
137
122
 
138
123
  # Post comment on PR
139
- pr.create_issue_comment(f"""# Auto Code Review
140
-
141
- - [참고] Auto Code Review를 invoke하려면, commit message의 시작을 [autoreview]로 시작하거나, "/autoreview" 를 comment로 작성한 후,
142
- 해당 commit의 github action에서 code review를 re-run 하시면 됩니다.
143
- \n\n{review}\n\n{translation}""")
124
+ pr.create_issue_comment(f"""# Auto Code Review by {model_name}
125
+ \n\n{review}""")
144
126
 
145
127
 
146
128
  if __name__ == "__main__":
@@ -14,7 +14,7 @@ env:
14
14
 
15
15
  jobs:
16
16
  auto-review:
17
- runs-on: ubuntu-latest
17
+ runs-on: ubuntu-latest-rbln
18
18
  steps:
19
19
  - name: Checkout repository
20
20
  uses: actions/checkout@v3
@@ -25,7 +25,7 @@ jobs:
25
25
  fail-fast: false
26
26
  matrix:
27
27
  python-version: [3.9]
28
- os: [ubuntu-latest]
28
+ os: [ubuntu-latest-rbln]
29
29
 
30
30
  runs-on: ${{ matrix.os }}
31
31
  steps:
@@ -20,7 +20,7 @@ permissions:
20
20
  jobs:
21
21
  title-checker:
22
22
  name: Check PR title
23
- runs-on: ubuntu-latest
23
+ runs-on: ubuntu-latest-rbln
24
24
  steps:
25
25
  - uses: amannn/action-semantic-pull-request@v5
26
26
  id: lint_pr_title
@@ -10,7 +10,7 @@ on:
10
10
 
11
11
  jobs:
12
12
  validate-pr-checklist:
13
- runs-on: ubuntu-latest
13
+ runs-on: ubuntu-latest-rbln
14
14
  steps:
15
15
  - name: Checkout repository
16
16
  uses: actions/checkout@v2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.7.4a1
3
+ Version: 0.7.4a2
4
4
  Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.7.4a1'
20
+ __version__ = version = '0.7.4a2'
21
21
  __version_tuple__ = version_tuple = (0, 7, 4)
@@ -578,11 +578,41 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
578
578
  nbits_per_param: int,
579
579
  n_model_params: int,
580
580
  ) -> int:
581
+ """
582
+ We are finding max_n_blocks(x) that satisfies the following equation:
583
+
584
+ available_dram - kernel_size - buffer
585
+ - num_layers * 2 * tensor_parallel_size
586
+ * align_2MB(
587
+ x
588
+ * block_size
589
+ * align_64(head_dim)
590
+ * math.ceil(num_key_value_heads / tensor_parallel_size)
591
+ * 2
592
+ ) > 0
593
+
594
+ This inequality can be rewritten as follows:
595
+
596
+ a - c * align_2MB(b * x) > 0
597
+ where
598
+ a = available_dram - kernel_size - buffer
599
+ b = block_size * align_64(head_dim) * math.ceil(num_key_value_heads / tensor_parallel_size) * 2
600
+ c = num_layers * 2 * tensor_parallel_size
601
+
602
+ We can rewrite the inequality as follows:
603
+ k > align_2MB(b*x)
604
+ where
605
+ k = a / c
606
+
607
+ After that, we can derive the following equation:
608
+ x = floor(2**21 / b * floor((k - 1) / 2**21))
609
+ """
610
+
581
611
  def align(x: int, nbytes: int) -> int:
582
612
  return int(math.ceil(x / nbytes) * nbytes)
583
613
 
584
614
  def align_2MB(x: int) -> int:
585
- return align(x, 2 * 1024 * 1024)
615
+ return align(x, 2**21)
586
616
 
587
617
  num_attention_heads = getattr(config, "n_head", None) or getattr(config, "num_attention_heads")
588
618
  num_layers = getattr(config, "n_layer", None) or getattr(config, "num_hidden_layers")
@@ -612,27 +642,16 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
612
642
  available_dram -= kernel_size
613
643
 
614
644
  # TODO: Accurate buffer estimation
615
- buffer = 2**30 # 1GB Buffer
616
- if tensor_parallel_size <= 4:
617
- buffer /= 4
618
-
645
+ buffer_per_core = 2**29 # 500MB per npu
646
+ buffer = buffer_per_core * tensor_parallel_size
619
647
  available_dram -= buffer
620
648
 
621
- # Estimate nbytes per a single kvcache block
622
- nbytes_per_block = (
623
- align_2MB(
624
- kvcache_block_size
625
- * head_dim
626
- * math.ceil(num_key_value_heads / tensor_parallel_size) # Shard
627
- * 2 # (fp16)
628
- )
629
- * num_layers
630
- * 2 # (k, v)
631
- * tensor_parallel_size
632
- )
633
- n_blocks = available_dram // nbytes_per_block
649
+ b = kvcache_block_size * align(head_dim, 64) * math.ceil(num_key_value_heads / tensor_parallel_size) * 2
650
+ c = num_layers * 2 * tensor_parallel_size
651
+ k = available_dram / c
652
+ max_n_blocks = math.floor(2**21 / b * math.floor((k - 1) / 2**21))
634
653
 
635
- return n_blocks, nbytes_per_block
654
+ return max_n_blocks
636
655
 
637
656
  @classmethod
638
657
  def _get_rbln_config(
@@ -689,7 +708,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
689
708
 
690
709
  rbln_kvcache_num_blocks = (rbln_max_seq_len // rbln_kvcache_block_size) * rbln_batch_size
691
710
  if rbln_attn_impl == "flash_attn":
692
- max_num_blocks, _ = cls.get_maximum_num_blocks(
711
+ max_num_blocks = cls.get_maximum_num_blocks(
693
712
  config=model_config,
694
713
  tensor_parallel_size=rbln_kwargs.get("tensor_parallel_size", 1),
695
714
  kvcache_block_size=rbln_kvcache_block_size,
@@ -36,19 +36,50 @@ from .t5_architecture import T5Wrapper
36
36
  logger = get_logger()
37
37
 
38
38
  if TYPE_CHECKING:
39
+ from rebel import Runtime
39
40
  from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel
40
41
 
41
42
 
42
43
  class RBLNRuntimeModel(RBLNPytorchRuntime):
44
+ def __init__(
45
+ self,
46
+ runtime: "Runtime",
47
+ max_seq_len: int,
48
+ **kwargs: Any,
49
+ ) -> None:
50
+ super().__init__(runtime, **kwargs)
51
+ self.max_seq_len = max_seq_len
52
+
53
+ def _prepare_inputs(
54
+ self,
55
+ input_ids: torch.LongTensor,
56
+ attention_mask: torch.LongTensor,
57
+ ):
58
+ input_len = input_ids.shape[-1]
59
+ pad_len = None
60
+ if input_len > self.max_seq_len:
61
+ raise ValueError(f"Error input_len({input_len}) exceed max_seq_len({self.max_seq_len}).")
62
+ elif input_len < self.max_seq_len and input_len > 0:
63
+ pad_len = self.max_seq_len - input_len
64
+ logger.warning(
65
+ f"Warning: The input was padded with {pad_len} tokens to meet the compiled model's requirements. "
66
+ "For optimal performance, consider recompiling with a shorter 'rbln_max_seq_len'."
67
+ )
68
+ input_ids = torch.nn.functional.pad(input_ids, (0, pad_len))
69
+ attention_mask = torch.nn.functional.pad(attention_mask, (0, pad_len), value=0)
70
+
71
+ return input_ids, attention_mask, pad_len
72
+
43
73
  def forward(
44
74
  self,
45
75
  input_ids: torch.LongTensor,
46
- attention_mask: torch.FloatTensor,
76
+ attention_mask: torch.LongTensor,
47
77
  head_mask: torch.FloatTensor,
48
78
  inputs_embeds: torch.FloatTensor,
49
79
  **kwargs,
50
80
  ):
51
- return super().forward(
81
+ input_ids, attention_mask, pad_len = self._prepare_inputs(input_ids, attention_mask)
82
+ logits = super().forward(
52
83
  input_ids,
53
84
  attention_mask,
54
85
  head_mask,
@@ -56,6 +87,8 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
56
87
  **kwargs,
57
88
  )
58
89
 
90
+ return logits[:, :-pad_len, :] if pad_len is not None else logits
91
+
59
92
 
60
93
  class T5EncoderWrapper(torch.nn.Module):
61
94
  def __init__(self, model: "T5EncoderModel") -> None:
@@ -72,7 +105,8 @@ class RBLNT5EncoderModel(RBLNModel):
72
105
  rbln_model_input_names = ["input_ids", "attention_mask"]
73
106
 
74
107
  def __post_init__(self, **kwargs):
75
- self.model = RBLNRuntimeModel(runtime=self.model[0])
108
+ max_seq_len = self.rbln_config.model_cfg["max_seq_len"]
109
+ self.model = RBLNRuntimeModel(runtime=self.model[0], max_seq_len=max_seq_len)
76
110
 
77
111
  @classmethod
78
112
  def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: "RBLNConfig"):
@@ -190,11 +190,11 @@ class WhisperDecoder(nn.Module):
190
190
  all_hiddens = []
191
191
  for i in range(inputs_embeds.shape[0]):
192
192
  position_id = cache_position[i]
193
- position = self.embed_positions(input_ids, position_ids=position_id)
193
+ position = self.embed_positions.weight[position_id]
194
194
  batch_hidden = position + inputs_embeds[i]
195
195
  all_hiddens.append(batch_hidden)
196
196
 
197
- hidden_states = torch.stack(all_hiddens, dim=0)
197
+ hidden_states = torch.cat(all_hiddens, dim=0).unsqueeze(1)
198
198
 
199
199
  # prepare attn mask (normal attention - masked)
200
200
  if attention_mask is not None:
File without changes
File without changes
File without changes