optimum-rbln 0.7.3a6__tar.gz → 0.7.3.post1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. optimum_rbln-0.7.3.post1/.github/version.yaml +1 -0
  2. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/workflows/rbln_optimum_pytest.yaml +24 -4
  3. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/PKG-INFO +1 -1
  4. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/__version__.py +2 -2
  5. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/modeling_diffusers.py +99 -111
  6. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +3 -3
  7. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/bart/modeling_bart.py +2 -0
  8. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +12 -0
  9. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +19 -13
  10. optimum_rbln-0.7.3.post1/src/optimum/rbln/transformers/models/t5/modeling_t5.py +210 -0
  11. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/utils/import_utils.py +7 -0
  12. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/tests/test_diffusers.py +31 -0
  13. optimum_rbln-0.7.3a6/.github/version.yaml +0 -1
  14. optimum_rbln-0.7.3a6/src/optimum/rbln/transformers/models/t5/modeling_t5.py +0 -417
  15. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  16. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  17. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  18. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/ISSUE_TEMPLATE/model_request.md +0 -0
  19. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/pull_request_template.md +0 -0
  20. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/scripts/auto_code_review.py +0 -0
  21. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/scripts/validate_pr_checklist.py +0 -0
  22. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/workflows/auto_code_review.yml +0 -0
  23. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/workflows/check_code_quality.yml +0 -0
  24. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/workflows/deploy-on-tag.yaml +0 -0
  25. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/workflows/deploy.yaml +0 -0
  26. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/workflows/pr-title-check.yaml +0 -0
  27. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/workflows/pr_checklist_validator.yml +0 -0
  28. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/workflows/rbln_check_compiler.yaml +0 -0
  29. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/workflows/rbln_dispatch_pytest.yaml +0 -0
  30. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/workflows/rbln_optimum_inference_test.yaml +0 -0
  31. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/workflows/rbln_scheduled_test.yaml +0 -0
  32. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.github/workflows/rbln_trigger_on_pr.yaml +0 -0
  33. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/.gitignore +0 -0
  34. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/CODE_OF_CONDUCT.md +0 -0
  35. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/CONTRIBUTING.md +0 -0
  36. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/LICENSE +0 -0
  37. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/README.md +0 -0
  38. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/assets/rbln_logo.png +0 -0
  39. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/audio-classification/run_ast_audio_classification.py +0 -0
  40. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/depth-estimation/run_dpt.py +0 -0
  41. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/image-classification/run_image_classification.py +0 -0
  42. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/image-classification/run_vit_image_classification.py +0 -0
  43. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/image-to-text/run_llava_next_image_to_text.py +0 -0
  44. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/kandinsky2_2/run_kandinsky2_2.py +0 -0
  45. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/kandinsky2_2/run_kandinsky2_2_combined.py +0 -0
  46. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/kandinsky2_2/run_kandinsky2_2_img2img.py +0 -0
  47. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/kandinsky2_2/run_kandinsky2_2_img2img_combined.py +0 -0
  48. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/kandinsky2_2/run_kandinsky2_2_inpaint.py +0 -0
  49. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/kandinsky2_2/run_kandinsky2_2_inpaint_combined.py +0 -0
  50. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/kandinsky2_2/run_kandinsky2_2_prior_interpolate.py +0 -0
  51. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/question-answering/run_question_answering.py +0 -0
  52. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/speech-recognition/run_wav2vec2.py +0 -0
  53. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/speech-recognition/run_whisper.py +0 -0
  54. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/stable-diffusion/run_stable_diffusion.py +0 -0
  55. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/stable-diffusion/run_stable_diffusion_controlnet.py +0 -0
  56. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/stable-diffusion/run_stable_diffusion_img2img.py +0 -0
  57. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/stable-diffusion/run_stable_diffusion_img2img_controlnet.py +0 -0
  58. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/stable-diffusion/run_stable_diffusion_inpaint.py +0 -0
  59. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/stable-diffusion/run_stable_diffusion_lora.py +0 -0
  60. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/stable-diffusion/run_stable_diffusion_multicontrolnet.py +0 -0
  61. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/text-classification/run_bge_m3_text_classification.py +0 -0
  62. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/text-classification/run_bge_reranker_v2_m3_text_classification.py +0 -0
  63. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/text-classification/run_secureBERT.py +0 -0
  64. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/text-classification/run_t5_classification.py +0 -0
  65. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/text-classification/run_twitter_roberta_text_classification.py +0 -0
  66. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/text2text-generation/run_bart_text2text_generation.py +0 -0
  67. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/text2text-generation/run_llama_peft.py +0 -0
  68. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/examples/text2text-generation/run_llama_text2text_generation.py +0 -0
  69. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/pyproject.toml +0 -0
  70. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/scripts/uv-lock.sh +0 -0
  71. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/scripts/uv-sync.sh +0 -0
  72. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/__init__.py +0 -0
  73. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/__init__.py +0 -0
  74. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/models/__init__.py +0 -0
  75. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/models/autoencoders/__init__.py +0 -0
  76. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +0 -0
  77. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/models/autoencoders/vae.py +0 -0
  78. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/models/autoencoders/vq_model.py +0 -0
  79. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/models/controlnet.py +0 -0
  80. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/models/transformers/__init__.py +0 -0
  81. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/models/transformers/prior_transformer.py +0 -0
  82. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py +0 -0
  83. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/models/unets/__init__.py +0 -0
  84. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py +0 -0
  85. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/__init__.py +0 -0
  86. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/controlnet/__init__.py +0 -0
  87. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +0 -0
  88. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +0 -0
  89. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +0 -0
  90. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +0 -0
  91. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +0 -0
  92. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +0 -0
  93. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +0 -0
  94. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +0 -0
  95. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +0 -0
  96. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +0 -0
  97. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +0 -0
  98. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +0 -0
  99. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +0 -0
  100. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +0 -0
  101. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +0 -0
  102. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +0 -0
  103. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +0 -0
  104. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +0 -0
  105. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +0 -0
  106. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +0 -0
  107. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +0 -0
  108. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +0 -0
  109. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/modeling.py +0 -0
  110. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/modeling_base.py +0 -0
  111. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/modeling_config.py +0 -0
  112. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/ops/__init__.py +0 -0
  113. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/ops/attn.py +0 -0
  114. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/ops/flash_attn.py +0 -0
  115. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/ops/kv_cache_update.py +0 -0
  116. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/__init__.py +0 -0
  117. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/modeling_alias.py +0 -0
  118. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/modeling_generic.py +0 -0
  119. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/modeling_rope_utils.py +0 -0
  120. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/__init__.py +0 -0
  121. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/auto/__init__.py +0 -0
  122. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/auto/auto_factory.py +0 -0
  123. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/auto/modeling_auto.py +0 -0
  124. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/bart/__init__.py +0 -0
  125. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/bart/bart_architecture.py +0 -0
  126. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/bert/__init__.py +0 -0
  127. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/bert/modeling_bert.py +0 -0
  128. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/clip/__init__.py +0 -0
  129. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/clip/modeling_clip.py +0 -0
  130. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/decoderonly/__init__.py +0 -0
  131. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +0 -0
  132. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/dpt/__init__.py +0 -0
  133. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/dpt/modeling_dpt.py +0 -0
  134. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/exaone/__init__.py +0 -0
  135. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -0
  136. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/exaone/modeling_exaone.py +0 -0
  137. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/gemma/__init__.py +0 -0
  138. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/gemma/gemma_architecture.py +0 -0
  139. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/gemma/modeling_gemma.py +0 -0
  140. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/gpt2/__init__.py +0 -0
  141. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +0 -0
  142. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +0 -0
  143. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/llama/__init__.py +0 -0
  144. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/llama/llama_architecture.py +0 -0
  145. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/llama/modeling_llama.py +0 -0
  146. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/llava_next/__init__.py +0 -0
  147. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +0 -0
  148. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/midm/__init__.py +0 -0
  149. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/midm/midm_architecture.py +0 -0
  150. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/midm/modeling_midm.py +0 -0
  151. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/mistral/__init__.py +0 -0
  152. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/mistral/mistral_architecture.py +0 -0
  153. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/mistral/modeling_mistral.py +0 -0
  154. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/phi/__init__.py +0 -0
  155. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/phi/modeling_phi.py +0 -0
  156. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/phi/phi_architecture.py +0 -0
  157. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/qwen2/__init__.py +0 -0
  158. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +0 -0
  159. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +0 -0
  160. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/seq2seq/__init__.py +0 -0
  161. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +0 -0
  162. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/t5/__init__.py +0 -0
  163. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/t5/t5_architecture.py +0 -0
  164. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/wav2vec2/__init__.py +0 -0
  165. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +0 -0
  166. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/whisper/__init__.py +0 -0
  167. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/whisper/generation_whisper.py +0 -0
  168. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py +0 -0
  169. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -0
  170. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/xlm_roberta/__init__.py +0 -0
  171. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +0 -0
  172. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/utils/__init__.py +0 -0
  173. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/transformers/utils/rbln_quantization.py +0 -0
  174. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/utils/__init__.py +0 -0
  175. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/utils/decorator_utils.py +0 -0
  176. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/utils/hub.py +0 -0
  177. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/utils/logging.py +0 -0
  178. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/utils/model_utils.py +0 -0
  179. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/utils/runtime_utils.py +0 -0
  180. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/utils/save_utils.py +0 -0
  181. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/src/optimum/rbln/utils/submodule.py +0 -0
  182. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/tests/__init__.py +0 -0
  183. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/tests/psnr.py +0 -0
  184. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/tests/requirements_sdxl.txt +0 -0
  185. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/tests/run_stable_diffusion_xl_base.py +0 -0
  186. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/tests/test_base.py +0 -0
  187. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/tests/test_llm.py +0 -0
  188. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/tests/test_transformers.py +0 -0
  189. {optimum_rbln-0.7.3a6 → optimum_rbln-0.7.3.post1}/uv.lock +0 -0
@@ -0,0 +1 @@
1
+ rebel_compiler_version: 0.7.3
@@ -38,6 +38,13 @@ jobs:
38
38
  submodules: recursive
39
39
  fetch-depth: 0
40
40
 
41
+ - name: Get commit message if not provided
42
+ id: get_commit_message
43
+ if: ${{ inputs.commit_message == '' }}
44
+ run: |
45
+ COMMIT_MESSAGE=$(git log -1 --pretty=%B)
46
+ echo "message=$COMMIT_MESSAGE" >> $GITHUB_OUTPUT
47
+
41
48
  - name: Setup uv
42
49
  uses: astral-sh/setup-uv@v3
43
50
  with:
@@ -55,21 +62,34 @@ jobs:
55
62
  run: |
56
63
  PYPI_URL=$(echo ${{ env.REBEL_PYPI_ENDPOINT }} | sed "s/\/\//\0${{ env.REBEL_PYPI_USERNAME}}:${{ env.REBEL_PYPI_PASSWORD}}@/")
57
64
  uv pip install --extra-index-url $PYPI_URL rebel-compiler==${{ inputs.rebel_compiler_version }}
58
-
65
+
59
66
  - name: Run pytest (transformers)
60
67
  env:
61
68
  OPTIMUM_RBLN_TEST_LEVEL: ${{ inputs.test_level }}
62
69
  run: |
63
- uv run --no-sync pytest tests/test_transformers.py
70
+ echo
71
+ if ${{ !contains( steps.get_commit_message.outputs.message , '[skip-transformers]') }}; then
72
+ uv run --no-sync pytest tests/test_transformers.py
73
+ else
74
+ echo "Found [skip-transformers] in commit message, skipping CI"
75
+ fi
64
76
 
65
77
  - name: Run pytest (diffusers)
66
78
  env:
67
79
  OPTIMUM_RBLN_TEST_LEVEL: ${{ inputs.test_level }}
68
80
  run: |
69
- uv run --no-sync pytest tests/test_diffusers.py
81
+ if ${{ !contains( steps.get_commit_message.outputs.message , '[skip-diffusers]') }}; then
82
+ uv run --no-sync pytest tests/test_diffusers.py
83
+ else
84
+ echo "Found [skip-diffusers] in commit message, skipping CI"
85
+ fi
70
86
 
71
87
  - name: Run pytest (llm)
72
88
  env:
73
89
  OPTIMUM_RBLN_TEST_LEVEL: ${{ inputs.test_level }}
74
90
  run: |
75
- uv run --no-sync pytest tests/test_llm.py
91
+ if ${{ !contains( steps.get_commit_message.outputs.message , '[skip-llms]') }}; then
92
+ uv run --no-sync pytest tests/test_llm.py
93
+ else
94
+ echo "Found [skip-llms] in commit message, skipping CI"
95
+ fi
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.7.3a6
3
+ Version: 0.7.3.post1
4
4
  Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.7.3a6'
21
- __version_tuple__ = version_tuple = (0, 7, 3, 'a6')
20
+ __version__ = version = '0.7.3.post1'
21
+ __version_tuple__ = version_tuple = (0, 7, 3)
@@ -23,7 +23,6 @@ from ..modeling import RBLNModel
23
23
  from ..modeling_config import RUNTIME_KEYWORDS, ContextRblnConfig, use_rbln_config
24
24
  from ..utils.decorator_utils import remove_compile_time_kwargs
25
25
  from ..utils.logging import get_logger
26
- from . import pipelines
27
26
 
28
27
 
29
28
  logger = get_logger(__name__)
@@ -67,6 +66,7 @@ class RBLNDiffusionMixin:
67
66
  as keys in rbln_config
68
67
  """
69
68
 
69
+ _connected_classes = {}
70
70
  _submodules = []
71
71
  _prefix = {}
72
72
 
@@ -103,37 +103,6 @@ class RBLNDiffusionMixin:
103
103
  }
104
104
  )
105
105
  submodule_config = submodule_cls.update_rbln_config_using_pipe(model, submodule_config)
106
- elif hasattr(pipelines, submodule_class_name):
107
- submodule_config = rbln_config.get(submodule_name, {})
108
- submodule_config = copy.deepcopy(submodule_config)
109
-
110
- submodule_cls: RBLNModel = getattr(importlib.import_module("optimum.rbln"), f"{submodule_class_name}")
111
- prefix = cls._prefix.get(submodule_name, "")
112
- connected_submodules = cls._connected_classes.get(submodule_name)._submodules
113
- pipe_global_config = {k: v for k, v in submodule_config.items() if k not in connected_submodules}
114
- submodule_config = {k: v for k, v in submodule_config.items() if k in connected_submodules}
115
- for key in submodule_config.keys():
116
- submodule_config[key].update(pipe_global_config)
117
-
118
- for connected_submodule_name in connected_submodules:
119
- connected_submodule_config = rbln_config.pop(prefix + connected_submodule_name, {})
120
- if connected_submodule_name in submodule_config:
121
- submodule_config[connected_submodule_name].update(connected_submodule_config)
122
- else:
123
- submodule_config[connected_submodule_name] = connected_submodule_config
124
-
125
- pipe_global_config = {
126
- k: v for k, v in rbln_config.items() if k != submodule_class_name and not isinstance(v, dict)
127
- }
128
-
129
- for connected_submodule_name in connected_submodules:
130
- for k, v in pipe_global_config.items():
131
- if "guidance_scale" in k:
132
- if prefix + "guidance_scale" == k:
133
- submodule_config[connected_submodule_name]["guidance_scale"] = v
134
- else:
135
- submodule_config[connected_submodule_name][k] = v
136
- rbln_config[submodule_name] = submodule_config
137
106
  else:
138
107
  raise ValueError(f"submodule {submodule_name} isn't supported")
139
108
  return submodule_config
@@ -199,25 +168,8 @@ class RBLNDiffusionMixin:
199
168
  else:
200
169
  # raise error if any of submodules are torch module.
201
170
  model_index_config = cls.load_config(pretrained_model_name_or_path=model_id)
202
- if cls._load_connected_pipes:
203
- submodules = []
204
- for submodule in cls._submodules:
205
- submodule_config = rbln_config.pop(submodule, {})
206
- prefix = cls._prefix.get(submodule, "")
207
- connected_submodules = cls._connected_classes.get(submodule)._submodules
208
- for connected_submodule_name in connected_submodules:
209
- connected_submodule_config = submodule_config.pop(connected_submodule_name, {})
210
- if connected_submodule_config:
211
- rbln_config[prefix + connected_submodule_name] = connected_submodule_config
212
- submodules.append(prefix + connected_submodule_name)
213
- pipe_global_config = {k: v for k, v in rbln_config.items() if k not in submodules}
214
- for submodule in submodules:
215
- if submodule in rbln_config:
216
- rbln_config[submodule].update(pipe_global_config)
217
- else:
218
- submodules = cls._submodules
219
-
220
- for submodule_name in submodules:
171
+ rbln_config = cls._flatten_rbln_config(rbln_config)
172
+ for submodule_name in cls._submodules:
221
173
  if isinstance(kwargs.get(submodule_name), torch.nn.Module):
222
174
  raise AssertionError(
223
175
  f"{submodule_name} is not compiled torch module. If you want to compile, set `export=True`."
@@ -266,9 +218,89 @@ class RBLNDiffusionMixin:
266
218
  lora_scales=lora_scales,
267
219
  )
268
220
 
269
- compiled_submodules = cls._compile_submodules(model, passed_submodules, model_save_dir, rbln_config)
221
+ if cls._load_connected_pipes:
222
+ compiled_submodules = cls._compile_pipelines(model, passed_submodules, model_save_dir, rbln_config)
223
+ else:
224
+ compiled_submodules = cls._compile_submodules(model, passed_submodules, model_save_dir, rbln_config)
270
225
  return cls._construct_pipe(model, compiled_submodules, model_save_dir, rbln_config)
271
226
 
227
+ @classmethod
228
+ def _prepare_rbln_config(
229
+ cls,
230
+ rbln_config,
231
+ ) -> Dict[str, Any]:
232
+ prepared_config = {}
233
+ for connected_pipe_name, connected_pipe_cls in cls._connected_classes.items():
234
+ connected_pipe_config = rbln_config.pop(connected_pipe_name, {})
235
+ prefix = cls._prefix.get(connected_pipe_name, "")
236
+ guidance_scale = rbln_config.pop(f"{prefix}guidance_scale", None)
237
+ if "guidance_scale" not in connected_pipe_config and guidance_scale is not None:
238
+ connected_pipe_config["guidance_scale"] = guidance_scale
239
+ for submodule_name in connected_pipe_cls._submodules:
240
+ submodule_config = rbln_config.pop(prefix + submodule_name, {})
241
+ if submodule_name not in connected_pipe_config:
242
+ connected_pipe_config[submodule_name] = {}
243
+ connected_pipe_config[submodule_name].update(
244
+ {k: v for k, v in submodule_config.items() if k not in connected_pipe_config[submodule_name]}
245
+ )
246
+ prepared_config[connected_pipe_name] = connected_pipe_config
247
+ prepared_config.update(rbln_config)
248
+ return prepared_config
249
+
250
+ @classmethod
251
+ def _flatten_rbln_config(
252
+ cls,
253
+ rbln_config,
254
+ ) -> Dict[str, Any]:
255
+ prepared_config = cls._prepare_rbln_config(rbln_config)
256
+ flattened_config = {}
257
+ pipe_global_config = {k: v for k, v in prepared_config.items() if k not in cls._connected_classes.keys()}
258
+ for connected_pipe_name, connected_pipe_cls in cls._connected_classes.items():
259
+ connected_pipe_config = prepared_config.pop(connected_pipe_name)
260
+ prefix = cls._prefix.get(connected_pipe_name, "")
261
+ connected_pipe_global_config = {
262
+ k: v for k, v in connected_pipe_config.items() if k not in connected_pipe_cls._submodules
263
+ }
264
+ for submodule_name in connected_pipe_cls._submodules:
265
+ flattened_config[prefix + submodule_name] = connected_pipe_config[submodule_name]
266
+ flattened_config[prefix + submodule_name].update(
267
+ {
268
+ k: v
269
+ for k, v in connected_pipe_global_config.items()
270
+ if k not in flattened_config[prefix + submodule_name]
271
+ }
272
+ )
273
+ flattened_config.update(pipe_global_config)
274
+ return flattened_config
275
+
276
+ @classmethod
277
+ def _compile_pipelines(
278
+ cls,
279
+ model: torch.nn.Module,
280
+ passed_submodules: Dict[str, RBLNModel],
281
+ model_save_dir: Optional[PathLike],
282
+ rbln_config: Dict[str, Any],
283
+ ) -> Dict[str, RBLNModel]:
284
+ compiled_submodules = {}
285
+
286
+ rbln_config = cls._prepare_rbln_config(rbln_config)
287
+ pipe_global_config = {k: v for k, v in rbln_config.items() if k not in cls._connected_classes.keys()}
288
+ for connected_pipe_name, connected_pipe_cls in cls._connected_classes.items():
289
+ connected_pipe_submodules = {}
290
+ prefix = cls._prefix.get(connected_pipe_name, "")
291
+ for submodule_name in connected_pipe_cls._submodules:
292
+ connected_pipe_submodules[submodule_name] = passed_submodules.get(prefix + submodule_name, None)
293
+ connected_pipe = getattr(model, connected_pipe_name)
294
+ connected_pipe_config = {}
295
+ connected_pipe_config.update(pipe_global_config)
296
+ connected_pipe_config.update(rbln_config[connected_pipe_name])
297
+ connected_pipe_compiled_submodules = connected_pipe_cls._compile_submodules(
298
+ connected_pipe, connected_pipe_submodules, model_save_dir, connected_pipe_config, prefix
299
+ )
300
+ for submodule_name, compiled_submodule in connected_pipe_compiled_submodules.items():
301
+ compiled_submodules[prefix + submodule_name] = compiled_submodule
302
+ return compiled_submodules
303
+
272
304
  @classmethod
273
305
  def _compile_submodules(
274
306
  cls,
@@ -307,41 +339,6 @@ class RBLNDiffusionMixin:
307
339
  model_save_dir=model_save_dir,
308
340
  rbln_config=submodule_rbln_config,
309
341
  )
310
- elif hasattr(pipelines, submodule.__class__.__name__):
311
- connected_pipe = submodule
312
- connected_pipe_model_save_dir = model_save_dir
313
- connected_pipe_rbln_config = submodule_rbln_config
314
- connected_pipe_cls: RBLNDiffusionMixin = getattr(
315
- importlib.import_module("optimum.rbln"), connected_pipe.__class__.__name__
316
- )
317
- submodule_dict = {}
318
- for name in connected_pipe.config.keys():
319
- if hasattr(connected_pipe, name):
320
- submodule_dict[name] = getattr(connected_pipe, name)
321
- connected_pipe = connected_pipe_cls(**submodule_dict)
322
- connected_pipe_submodules = {}
323
- prefix = cls._prefix.get(submodule_name, "")
324
- for name in connected_pipe_cls._submodules:
325
- if prefix + name in passed_submodules:
326
- connected_pipe_submodules[name] = passed_submodules.get(prefix + name)
327
-
328
- connected_pipe_compiled_submodules = connected_pipe_cls._compile_submodules(
329
- model=connected_pipe,
330
- passed_submodules=connected_pipe_submodules,
331
- model_save_dir=model_save_dir,
332
- rbln_config=connected_pipe_rbln_config,
333
- prefix=prefix,
334
- )
335
- connected_pipe = connected_pipe_cls._construct_pipe(
336
- connected_pipe,
337
- connected_pipe_compiled_submodules,
338
- connected_pipe_model_save_dir,
339
- connected_pipe_rbln_config,
340
- )
341
-
342
- for name in connected_pipe_cls._submodules:
343
- compiled_submodules[prefix + name] = getattr(connected_pipe, name)
344
- submodule = connected_pipe
345
342
  else:
346
343
  raise ValueError(f"Unknown class of submodule({submodule_name}) : {submodule.__class__.__name__} ")
347
344
 
@@ -374,23 +371,16 @@ class RBLNDiffusionMixin:
374
371
  @classmethod
375
372
  def _construct_pipe(cls, model, submodules, model_save_dir, rbln_config):
376
373
  # Construct finalize pipe setup with compiled submodules and configurations
377
- submodule_names = []
378
- for submodule_name in cls._submodules:
379
- submodule = getattr(model, submodule_name)
380
- if hasattr(pipelines, submodule.__class__.__name__):
381
- prefix = cls._prefix.get(submodule_name, "")
382
- connected_pipe_submodules = submodules[submodule_name].__class__._submodules
383
- connected_pipe_submodules = [prefix + name for name in connected_pipe_submodules]
384
- submodule_names += connected_pipe_submodules
385
- setattr(model, submodule_name, submodules[submodule_name])
386
- else:
387
- submodule_names.append(submodule_name)
388
-
389
374
  if model_save_dir is not None:
390
375
  # To skip saving original pytorch modules
391
- for submodule_name in submodule_names:
376
+ for submodule_name in cls._submodules:
392
377
  delattr(model, submodule_name)
393
378
 
379
+ if cls._load_connected_pipes:
380
+ for connected_pipe_name, connected_pipe_cls in cls._connected_classes.items():
381
+ for submodule_name in connected_pipe_cls._submodules:
382
+ delattr(getattr(model, connected_pipe_name), submodule_name)
383
+
394
384
  # Direct calling of `save_pretrained` causes config.unet = (None, None).
395
385
  # So config must be saved again, later.
396
386
  model.save_pretrained(model_save_dir)
@@ -398,10 +388,15 @@ class RBLNDiffusionMixin:
398
388
  # Causing warning messeages.
399
389
 
400
390
  update_dict = {}
401
- for submodule_name in submodule_names:
391
+ for submodule_name in cls._submodules:
402
392
  # replace submodule
403
393
  setattr(model, submodule_name, submodules[submodule_name])
404
394
  update_dict[submodule_name] = ("optimum.rbln", submodules[submodule_name].__class__.__name__)
395
+ if cls._load_connected_pipes:
396
+ for connected_pipe_name, connected_pipe_cls in cls._connected_classes.items():
397
+ prefix = cls._prefix.get(connected_pipe_name, "")
398
+ for submodule_name in connected_pipe_cls._submodules:
399
+ setattr(getattr(model, connected_pipe_name), submodule_name, submodules[prefix + submodule_name])
405
400
 
406
401
  # Update config to be able to load from model directory.
407
402
  #
@@ -420,16 +415,9 @@ class RBLNDiffusionMixin:
420
415
  if rbln_config.get("optimize_host_memory") is False:
421
416
  # Keep compiled_model objs to further analysis. -> TODO: remove soon...
422
417
  model.compiled_models = []
423
- if model._load_connected_pipes:
424
- for name in cls._submodules:
425
- connected_pipe = getattr(model, name)
426
- for submodule_name in connected_pipe.__class__._submodules:
427
- submodule = getattr(connected_pipe, submodule_name)
428
- model.compiled_models.extend(submodule.compiled_models)
429
- else:
430
- for name in cls._submodules:
431
- submodule = getattr(model, name)
432
- model.compiled_models.extend(submodule.compiled_models)
418
+ for name in cls._submodules:
419
+ submodule = getattr(model, name)
420
+ model.compiled_models.extend(submodule.compiled_models)
433
421
 
434
422
  return model
435
423
 
@@ -39,7 +39,7 @@ from .pipeline_kandinsky2_2_prior import RBLNKandinskyV22PriorPipeline
39
39
  class RBLNKandinskyV22CombinedPipeline(RBLNDiffusionMixin, KandinskyV22CombinedPipeline):
40
40
  original_class = KandinskyV22CombinedPipeline
41
41
  _connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22Pipeline}
42
- _submodules = ["prior_pipe", "decoder_pipe"]
42
+ _submodules = ["prior_image_encoder", "prior_text_encoder", "prior_prior", "unet", "movq"]
43
43
  _prefix = {"prior_pipe": "prior_"}
44
44
 
45
45
  def __init__(
@@ -90,7 +90,7 @@ class RBLNKandinskyV22CombinedPipeline(RBLNDiffusionMixin, KandinskyV22CombinedP
90
90
  class RBLNKandinskyV22Img2ImgCombinedPipeline(RBLNDiffusionMixin, KandinskyV22Img2ImgCombinedPipeline):
91
91
  original_class = KandinskyV22Img2ImgCombinedPipeline
92
92
  _connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22Img2ImgPipeline}
93
- _submodules = ["prior_pipe", "decoder_pipe"]
93
+ _submodules = ["prior_image_encoder", "prior_text_encoder", "prior_prior", "unet", "movq"]
94
94
  _prefix = {"prior_pipe": "prior_"}
95
95
 
96
96
  def __init__(
@@ -141,7 +141,7 @@ class RBLNKandinskyV22Img2ImgCombinedPipeline(RBLNDiffusionMixin, KandinskyV22Im
141
141
  class RBLNKandinskyV22InpaintCombinedPipeline(RBLNDiffusionMixin, KandinskyV22InpaintCombinedPipeline):
142
142
  original_class = KandinskyV22InpaintCombinedPipeline
143
143
  _connected_classes = {"prior_pipe": RBLNKandinskyV22PriorPipeline, "decoder_pipe": RBLNKandinskyV22InpaintPipeline}
144
- _submodules = ["prior_pipe", "decoder_pipe"]
144
+ _submodules = ["prior_image_encoder", "prior_text_encoder", "prior_prior", "unet", "movq"]
145
145
  _prefix = {"prior_pipe": "prior_"}
146
146
 
147
147
  def __init__(
@@ -108,6 +108,8 @@ class RBLNBartModel(RBLNModel):
108
108
 
109
109
 
110
110
  class RBLNBartForConditionalGeneration(RBLNModelForSeq2SeqLM):
111
+ support_paged_causal_attn = True
112
+
111
113
  @classmethod
112
114
  def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: "RBLNConfig"):
113
115
  enc_max_seq_len = (
@@ -98,6 +98,18 @@ def validate_attention_method(
98
98
  "this requirement, or consider switching `rbln_attn_impl` to 'eager' for shorter lengths."
99
99
  )
100
100
 
101
+ if rbln_kvcache_block_size is not None:
102
+ if rbln_attn_impl == "flash_attn" and rbln_kvcache_partition_len != rbln_kvcache_block_size:
103
+ raise ValueError(
104
+ f" When using 'flash attention', the `rbln_kvcache_block_size` ({rbln_kvcache_block_size}) "
105
+ f"must always be set equal to the `rbln_kvcache_partition_len` {rbln_kvcache_partition_len}."
106
+ )
107
+ elif rbln_attn_impl == "eager" and rbln_kvcache_block_size != rbln_max_seq_len:
108
+ raise ValueError(
109
+ f" When using 'eager attention', the `rbln_kvcache_block_size` ({rbln_kvcache_block_size}) "
110
+ f"must always be set equal to the `rbln_max_seq_len` {rbln_max_seq_len}."
111
+ )
112
+
101
113
  return rbln_attn_impl, rbln_kvcache_partition_len, rbln_kvcache_block_size
102
114
 
103
115
 
@@ -50,6 +50,7 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
50
50
  runtime: rebel.Runtime,
51
51
  batch_size: int,
52
52
  dec_max_seq_len: int,
53
+ support_paged_causal_attn: Optional[bool] = None,
53
54
  use_attention_mask: Optional[bool] = None,
54
55
  **kwargs: Any,
55
56
  ) -> None:
@@ -57,7 +58,10 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
57
58
  self.batch_size = batch_size
58
59
  self.dec_max_seq_len = dec_max_seq_len
59
60
  self.use_attention_mask = use_attention_mask
60
- self.default_block_tables = torch.arange(0, self.batch_size, dtype=torch.int16).view(self.batch_size, 1)
61
+ if support_paged_causal_attn:
62
+ self.default_block_tables = torch.arange(0, self.batch_size, dtype=torch.int16).view(self.batch_size, 1)
63
+ else:
64
+ self.default_block_tables = None
61
65
 
62
66
  def forward(
63
67
  self,
@@ -94,7 +98,7 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
94
98
  decoder_attention_mask if self.use_attention_mask else None,
95
99
  attention_mask,
96
100
  cache_position,
97
- block_tables,
101
+ block_tables=block_tables,
98
102
  )
99
103
 
100
104
  return Seq2SeqLMOutput(logits=lm_logits)
@@ -115,6 +119,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
115
119
 
116
120
  main_input_name = "input_ids"
117
121
  auto_model_class = AutoModelForSeq2SeqLM
122
+ support_paged_causal_attn = None
118
123
 
119
124
  def __post_init__(self, **kwargs):
120
125
  batch_size = self.rbln_config.model_cfg["batch_size"]
@@ -130,6 +135,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
130
135
  main_input_name="input_ids",
131
136
  batch_size=batch_size,
132
137
  dec_max_seq_len=dec_max_seq_len,
138
+ support_paged_causal_attn=self.support_paged_causal_attn,
133
139
  use_attention_mask=self.use_attention_mask,
134
140
  )
135
141
 
@@ -186,13 +192,16 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
186
192
  rbln_dec_max_seq_len = rbln_kwargs.get("dec_max_seq_len", None)
187
193
  rbln_batch_size = rbln_kwargs.get("batch_size", None)
188
194
  rbln_batch_size = 1 if rbln_batch_size is None else rbln_batch_size
189
- rbln_use_attention_mask = rbln_kwargs.get("use_attention_mask", None)
190
195
 
191
- if rbln_use_attention_mask is None:
192
- rbln_use_attention_mask = False
193
- rbln_npu = rbln_kwargs.get("npu", None) or rebel.get_npu_name()
194
- if rbln_npu == "RBLN-CA02":
195
- rbln_use_attention_mask = True
196
+ if cls.support_paged_causal_attn:
197
+ rbln_use_attention_mask = rbln_kwargs.get("use_attention_mask", None)
198
+ if rbln_use_attention_mask is None:
199
+ rbln_use_attention_mask = False
200
+ rbln_npu = rbln_kwargs.get("npu", None) or rebel.get_npu_name()
201
+ if rbln_npu == "RBLN-CA02":
202
+ rbln_use_attention_mask = True
203
+ else:
204
+ rbln_use_attention_mask = True
196
205
 
197
206
  n_layer = getattr(model_config, "decoder_layers", None) or getattr(model_config, "num_layers")
198
207
  n_head = getattr(model_config, "decoder_attention_heads", None) or getattr(model_config, "num_heads")
@@ -265,11 +274,6 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
265
274
  [rbln_batch_size, 1],
266
275
  "int32",
267
276
  ),
268
- (
269
- "block_tables",
270
- [rbln_batch_size, 1],
271
- "int16",
272
- ),
273
277
  ]
274
278
  dec_input_info.extend(
275
279
  [
@@ -302,6 +306,8 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
302
306
  ]
303
307
  )
304
308
 
309
+ if cls.support_paged_causal_attn:
310
+ dec_input_info.insert(3, ("block_tables", [rbln_batch_size, 1], "int16"))
305
311
  if rbln_use_attention_mask:
306
312
  dec_input_info.insert(1, ("attention_mask", [rbln_batch_size, rbln_dec_max_seq_len], "float32"))
307
313