optimum-rbln 0.8.2a4__tar.gz → 0.8.2a6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of optimum-rbln might be problematic. Click here for more details.

Files changed (308) hide show
  1. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/PKG-INFO +1 -1
  2. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/pyproject.toml +1 -0
  3. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/__init__.py +44 -0
  4. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/__version__.py +2 -2
  5. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/configuration_utils.py +4 -0
  6. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/ops/kv_cache_update.py +5 -0
  7. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/ops/linear.py +7 -0
  8. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/__init__.py +48 -0
  9. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/modeling_attention_utils.py +252 -0
  10. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/__init__.py +35 -14
  11. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/decoderonly/__init__.py +2 -2
  12. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +392 -0
  13. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +122 -205
  14. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +569 -366
  15. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/gemma/__init__.py +16 -0
  16. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/gemma/configuration_gemma.py +9 -1
  17. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/gemma/modeling_gemma.py +13 -1
  18. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +7 -5
  19. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +82 -59
  20. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/gpt2/__init__.py +16 -0
  21. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +50 -0
  22. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +6 -7
  23. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +16 -1
  24. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +2 -2
  25. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/llama/__init__.py +16 -0
  26. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/llama/configuration_llama.py +9 -1
  27. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/llama/modeling_llama.py +13 -1
  28. {optimum_rbln-0.8.2a4/src/optimum/rbln/transformers/models/gpt2 → optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/llava}/__init__.py +2 -2
  29. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/llava/configuration_llava.py +54 -0
  30. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/llava/modeling_llava.py +379 -0
  31. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +4 -4
  32. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/mistral/__init__.py +16 -0
  33. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/mistral/configuration_mistral.py +9 -1
  34. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/mistral/mistral_architecture.py +1 -1
  35. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/mistral/modeling_mistral.py +26 -3
  36. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/opt/__init__.py +16 -0
  37. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/opt/configuration_opt.py +8 -1
  38. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/opt/modeling_opt.py +41 -1
  39. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/opt/opt_architecture.py +4 -4
  40. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/pegasus/__init__.py +17 -0
  41. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/pegasus/configuration_pegasus.py +34 -0
  42. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/pegasus/modeling_pegasus.py +69 -0
  43. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +163 -0
  44. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/phi/__init__.py +16 -0
  45. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/phi/configuration_phi.py +9 -1
  46. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/phi/modeling_phi.py +10 -1
  47. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/phi/phi_architecture.py +6 -6
  48. {optimum_rbln-0.8.2a4/src/optimum/rbln/transformers/models/gemma → optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/pixtral}/__init__.py +2 -2
  49. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/pixtral/configuration_pixtral.py +43 -0
  50. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +318 -0
  51. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/pixtral/pixtral_architecture.py +73 -0
  52. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/qwen2/__init__.py +16 -0
  53. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +9 -1
  54. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +27 -1
  55. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +3 -3
  56. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/qwen3/configuration_qwen3.py +2 -2
  57. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +59 -0
  58. optimum_rbln-0.8.2a6/src/optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +34 -0
  59. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +0 -10
  60. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/whisper/configuration_whisper.py +1 -10
  61. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py +5 -1
  62. optimum_rbln-0.8.2a6/src/optimum/rbln/utils/depreacate_utils.py +16 -0
  63. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/tests/test_base.py +4 -1
  64. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/tests/test_llm.py +217 -3
  65. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/uv.lock +984 -962
  66. optimum_rbln-0.8.2a4/src/optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +0 -223
  67. optimum_rbln-0.8.2a4/src/optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +0 -22
  68. optimum_rbln-0.8.2a4/src/optimum/rbln/transformers/models/llama/__init__.py +0 -16
  69. optimum_rbln-0.8.2a4/src/optimum/rbln/transformers/models/mistral/__init__.py +0 -16
  70. optimum_rbln-0.8.2a4/src/optimum/rbln/transformers/models/opt/__init__.py +0 -16
  71. optimum_rbln-0.8.2a4/src/optimum/rbln/transformers/models/phi/__init__.py +0 -16
  72. optimum_rbln-0.8.2a4/src/optimum/rbln/transformers/models/qwen2/__init__.py +0 -16
  73. optimum_rbln-0.8.2a4/src/optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +0 -377
  74. optimum_rbln-0.8.2a4/src/optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +0 -275
  75. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  76. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  77. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  78. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/ISSUE_TEMPLATE/model_request.md +0 -0
  79. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/pull_request_template.md +0 -0
  80. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/scripts/auto_code_review.py +0 -0
  81. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/scripts/validate_docstrings.py +0 -0
  82. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/scripts/validate_pr_checklist.py +0 -0
  83. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/version.yaml +0 -0
  84. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/workflows/auto_code_review.yml +0 -0
  85. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/workflows/check_code_quality.yml +0 -0
  86. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/workflows/deploy-on-tag.yaml +0 -0
  87. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/workflows/deploy.yaml +0 -0
  88. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/workflows/pr-title-check.yaml +0 -0
  89. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/workflows/pr_checklist_validator.yml +0 -0
  90. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/workflows/rbln_check_compiler.yaml +0 -0
  91. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/workflows/rbln_dispatch_pytest.yaml +0 -0
  92. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/workflows/rbln_optimum_inference_test.yaml +0 -0
  93. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/workflows/rbln_optimum_pytest.yaml +0 -0
  94. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/workflows/rbln_scheduled_test.yaml +0 -0
  95. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/workflows/rbln_trigger_on_pr.yaml +0 -0
  96. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.github/workflows/test-docstrings.yml +0 -0
  97. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/.gitignore +0 -0
  98. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/CODE_OF_CONDUCT.md +0 -0
  99. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/CONTRIBUTING.md +0 -0
  100. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/LICENSE +0 -0
  101. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/README.md +0 -0
  102. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/assets/rbln_logo.png +0 -0
  103. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/advanced/custom_class.py +0 -0
  104. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/audio-classification/run_ast_audio_classification.py +0 -0
  105. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/depth-estimation/run_dpt.py +0 -0
  106. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/image-classification/run_image_classification.py +0 -0
  107. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/image-classification/run_vit_image_classification.py +0 -0
  108. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/image-to-text/run_idefics3.py +0 -0
  109. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/image-to-text/run_llava_next_image_to_text.py +0 -0
  110. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/kandinsky2_2/run_kandinsky2_2.py +0 -0
  111. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/kandinsky2_2/run_kandinsky2_2_combined.py +0 -0
  112. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/kandinsky2_2/run_kandinsky2_2_img2img.py +0 -0
  113. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/kandinsky2_2/run_kandinsky2_2_img2img_combined.py +0 -0
  114. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/kandinsky2_2/run_kandinsky2_2_inpaint.py +0 -0
  115. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/kandinsky2_2/run_kandinsky2_2_inpaint_combined.py +0 -0
  116. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/kandinsky2_2/run_kandinsky2_2_prior_interpolate.py +0 -0
  117. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/question-answering/run_question_answering.py +0 -0
  118. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/speech-recognition/run_wav2vec2.py +0 -0
  119. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/speech-recognition/run_whisper.py +0 -0
  120. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/stable-diffusion/run_stable_diffusion.py +0 -0
  121. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/stable-diffusion/run_stable_diffusion_controlnet.py +0 -0
  122. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/stable-diffusion/run_stable_diffusion_img2img.py +0 -0
  123. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/stable-diffusion/run_stable_diffusion_img2img_controlnet.py +0 -0
  124. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/stable-diffusion/run_stable_diffusion_inpaint.py +0 -0
  125. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/stable-diffusion/run_stable_diffusion_lora.py +0 -0
  126. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/stable-diffusion/run_stable_diffusion_multicontrolnet.py +0 -0
  127. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/text-classification/run_bge_m3_text_classification.py +0 -0
  128. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/text-classification/run_bge_reranker_v2_m3_text_classification.py +0 -0
  129. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/text-classification/run_secureBERT.py +0 -0
  130. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/text-classification/run_t5_classification.py +0 -0
  131. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/text-classification/run_twitter_roberta_text_classification.py +0 -0
  132. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/text2text-generation/run_bart_text2text_generation.py +0 -0
  133. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/text2text-generation/run_llama_peft.py +0 -0
  134. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/text2text-generation/run_llama_text2text_generation.py +0 -0
  135. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/examples/time-series-forecasting/run_time_series_forecasting.py +0 -0
  136. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/scripts/uv-lock.sh +0 -0
  137. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/scripts/uv-sync.sh +0 -0
  138. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/__init__.py +0 -0
  139. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/__init__.py +0 -0
  140. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/models/__init__.py +0 -0
  141. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +0 -0
  142. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +0 -0
  143. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +0 -0
  144. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +0 -0
  145. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +0 -0
  146. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +0 -0
  147. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +0 -0
  148. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +0 -0
  149. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/pipelines/__init__.py +0 -0
  150. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +0 -0
  151. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +0 -0
  152. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +0 -0
  153. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +0 -0
  154. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +0 -0
  155. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +0 -0
  156. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/modeling_diffusers.py +0 -0
  157. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/models/__init__.py +0 -0
  158. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/models/autoencoders/__init__.py +0 -0
  159. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +0 -0
  160. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +0 -0
  161. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/models/autoencoders/vae.py +0 -0
  162. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/models/autoencoders/vq_model.py +0 -0
  163. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/models/controlnet.py +0 -0
  164. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/models/transformers/__init__.py +0 -0
  165. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/models/transformers/prior_transformer.py +0 -0
  166. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +0 -0
  167. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py +0 -0
  168. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/models/unets/__init__.py +0 -0
  169. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py +0 -0
  170. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/__init__.py +0 -0
  171. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/controlnet/__init__.py +0 -0
  172. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +0 -0
  173. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +0 -0
  174. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +0 -0
  175. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +0 -0
  176. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +0 -0
  177. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/cosmos/__init__.py +0 -0
  178. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py +0 -0
  179. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +0 -0
  180. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +0 -0
  181. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +0 -0
  182. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +0 -0
  183. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +0 -0
  184. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +0 -0
  185. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +0 -0
  186. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +0 -0
  187. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +0 -0
  188. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +0 -0
  189. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +0 -0
  190. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +0 -0
  191. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +0 -0
  192. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +0 -0
  193. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +0 -0
  194. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +0 -0
  195. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +0 -0
  196. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +0 -0
  197. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +0 -0
  198. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +0 -0
  199. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +0 -0
  200. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/modeling.py +0 -0
  201. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/modeling_base.py +0 -0
  202. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/ops/__init__.py +0 -0
  203. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/ops/attn.py +0 -0
  204. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/ops/flash_attn.py +0 -0
  205. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/ops/sliding_window_attn.py +0 -0
  206. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/configuration_generic.py +0 -0
  207. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/modeling_generic.py +0 -0
  208. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/modeling_rope_utils.py +0 -0
  209. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/__init__.py +0 -0
  210. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +0 -0
  211. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +0 -0
  212. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/auto/__init__.py +0 -0
  213. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/auto/auto_factory.py +0 -0
  214. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/auto/modeling_auto.py +0 -0
  215. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/bart/__init__.py +0 -0
  216. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/bart/bart_architecture.py +0 -0
  217. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/bart/configuration_bart.py +0 -0
  218. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/bart/modeling_bart.py +0 -0
  219. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/bert/__init__.py +0 -0
  220. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/bert/configuration_bert.py +0 -0
  221. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/bert/modeling_bert.py +0 -0
  222. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/blip_2/__init__.py +0 -0
  223. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +0 -0
  224. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +0 -0
  225. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/clip/__init__.py +0 -0
  226. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/clip/configuration_clip.py +0 -0
  227. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/clip/modeling_clip.py +0 -0
  228. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/colpali/__init__.py +0 -0
  229. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/colpali/colpali_architecture.py +0 -0
  230. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/colpali/configuration_colpali.py +0 -0
  231. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/colpali/modeling_colpali.py +0 -0
  232. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/distilbert/__init__.py +0 -0
  233. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +0 -0
  234. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +0 -0
  235. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/dpt/__init__.py +0 -0
  236. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/dpt/configuration_dpt.py +0 -0
  237. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/dpt/modeling_dpt.py +0 -0
  238. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/exaone/__init__.py +0 -0
  239. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/exaone/configuration_exaone.py +0 -0
  240. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -0
  241. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/exaone/modeling_exaone.py +0 -0
  242. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/gemma/gemma_architecture.py +0 -0
  243. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/gemma3/__init__.py +0 -0
  244. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +0 -0
  245. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/idefics3/__init__.py +0 -0
  246. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +0 -0
  247. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/llama/llama_architecture.py +0 -0
  248. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/llava_next/__init__.py +0 -0
  249. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +0 -0
  250. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/midm/__init__.py +0 -0
  251. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/midm/configuration_midm.py +0 -0
  252. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/midm/midm_architecture.py +0 -0
  253. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/midm/modeling_midm.py +0 -0
  254. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +0 -0
  255. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/qwen2_5_vl/__init__.py +0 -0
  256. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +0 -0
  257. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +0 -0
  258. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/qwen3/__init__.py +0 -0
  259. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/resnet/__init__.py +0 -0
  260. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/resnet/configuration_resnet.py +0 -0
  261. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/resnet/modeling_resnet.py +0 -0
  262. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/roberta/__init__.py +0 -0
  263. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/roberta/configuration_roberta.py +0 -0
  264. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/roberta/modeling_roberta.py +0 -0
  265. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/seq2seq/__init__.py +0 -0
  266. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +0 -0
  267. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +0 -0
  268. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/siglip/__init__.py +0 -0
  269. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/siglip/configuration_siglip.py +0 -0
  270. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/siglip/modeling_siglip.py +0 -0
  271. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/t5/__init__.py +0 -0
  272. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/t5/configuration_t5.py +0 -0
  273. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/t5/modeling_t5.py +0 -0
  274. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/t5/t5_architecture.py +0 -0
  275. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/time_series_transformer/__init__.py +0 -0
  276. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +0 -0
  277. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +0 -0
  278. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +0 -0
  279. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/vit/__init__.py +0 -0
  280. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/vit/configuration_vit.py +0 -0
  281. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/vit/modeling_vit.py +0 -0
  282. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/wav2vec2/__init__.py +0 -0
  283. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +0 -0
  284. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +0 -0
  285. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/whisper/__init__.py +0 -0
  286. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/whisper/generation_whisper.py +0 -0
  287. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -0
  288. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/xlm_roberta/__init__.py +0 -0
  289. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +0 -0
  290. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +0 -0
  291. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/utils/__init__.py +0 -0
  292. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/transformers/utils/rbln_quantization.py +0 -0
  293. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/utils/__init__.py +0 -0
  294. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/utils/decorator_utils.py +0 -0
  295. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/utils/hub.py +0 -0
  296. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/utils/import_utils.py +0 -0
  297. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/utils/logging.py +0 -0
  298. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/utils/model_utils.py +0 -0
  299. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/utils/runtime_utils.py +0 -0
  300. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/utils/save_utils.py +0 -0
  301. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/src/optimum/rbln/utils/submodule.py +0 -0
  302. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/tests/__init__.py +0 -0
  303. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/tests/psnr.py +0 -0
  304. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/tests/requirements_sdxl.txt +0 -0
  305. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/tests/run_stable_diffusion_xl_base.py +0 -0
  306. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/tests/test_config.py +0 -0
  307. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/tests/test_diffusers.py +0 -0
  308. {optimum_rbln-0.8.2a4 → optimum_rbln-0.8.2a6}/tests/test_transformers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.8.2a4
3
+ Version: 0.8.2a6
4
4
  Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -55,6 +55,7 @@ tests = [
55
55
  "protobuf>=5.27.2",
56
56
  "soundfile>=0.13.1",
57
57
  "librosa>=0.11.0",
58
+ "simphile>=1.0.2",
58
59
  ]
59
60
  quality = [
60
61
  "ruff>=0.3.3",
@@ -80,12 +80,16 @@ _import_structure = {
80
80
  "RBLNDPTForDepthEstimationConfig",
81
81
  "RBLNExaoneForCausalLM",
82
82
  "RBLNExaoneForCausalLMConfig",
83
+ "RBLNGemmaModel",
84
+ "RBLNGemmaModelConfig",
83
85
  "RBLNGemmaForCausalLM",
84
86
  "RBLNGemmaForCausalLMConfig",
85
87
  "RBLNGemma3ForCausalLM",
86
88
  "RBLNGemma3ForCausalLMConfig",
87
89
  "RBLNGemma3ForConditionalGeneration",
88
90
  "RBLNGemma3ForConditionalGenerationConfig",
91
+ "RBLNGPT2Model",
92
+ "RBLNGPT2ModelConfig",
89
93
  "RBLNGPT2LMHeadModel",
90
94
  "RBLNGPT2LMHeadModelConfig",
91
95
  "RBLNIdefics3VisionTransformer",
@@ -94,22 +98,40 @@ _import_structure = {
94
98
  "RBLNIdefics3VisionTransformerConfig",
95
99
  "RBLNLlamaForCausalLM",
96
100
  "RBLNLlamaForCausalLMConfig",
101
+ "RBLNLlamaModel",
102
+ "RBLNLlamaModelConfig",
97
103
  "RBLNOPTForCausalLM",
98
104
  "RBLNOPTForCausalLMConfig",
105
+ "RBLNLlavaForConditionalGeneration",
106
+ "RBLNLlavaForConditionalGenerationConfig",
99
107
  "RBLNLlavaNextForConditionalGeneration",
100
108
  "RBLNLlavaNextForConditionalGenerationConfig",
101
109
  "RBLNMidmLMHeadModel",
102
110
  "RBLNMidmLMHeadModelConfig",
111
+ "RBLNMistralModel",
112
+ "RBLNMistralModelConfig",
103
113
  "RBLNMistralForCausalLM",
104
114
  "RBLNMistralForCausalLMConfig",
115
+ "RBLNOPTModel",
116
+ "RBLNOPTModelConfig",
117
+ "RBLNPegasusForConditionalGeneration",
118
+ "RBLNPegasusForConditionalGenerationConfig",
119
+ "RBLNPegasusModel",
120
+ "RBLNPegasusModelConfig",
105
121
  "RBLNPhiForCausalLM",
106
122
  "RBLNPhiForCausalLMConfig",
123
+ "RBLNPixtralVisionModel",
124
+ "RBLNPixtralVisionModelConfig",
125
+ "RBLNPhiModel",
126
+ "RBLNPhiModelConfig",
107
127
  "RBLNQwen2ForCausalLM",
108
128
  "RBLNQwen2ForCausalLMConfig",
109
129
  "RBLNQwen2_5_VisionTransformerPretrainedModel",
110
130
  "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
111
131
  "RBLNQwen2_5_VLForConditionalGeneration",
112
132
  "RBLNQwen2_5_VLForConditionalGenerationConfig",
133
+ "RBLNQwen2Model",
134
+ "RBLNQwen2ModelConfig",
113
135
  "RBLNQwen3ForCausalLM",
114
136
  "RBLNQwen3ForCausalLMConfig",
115
137
  "RBLNQwen3Model",
@@ -337,30 +359,52 @@ if TYPE_CHECKING:
337
359
  RBLNGemma3ForConditionalGenerationConfig,
338
360
  RBLNGemmaForCausalLM,
339
361
  RBLNGemmaForCausalLMConfig,
362
+ RBLNGemmaModel,
363
+ RBLNGemmaModelConfig,
340
364
  RBLNGPT2LMHeadModel,
341
365
  RBLNGPT2LMHeadModelConfig,
366
+ RBLNGPT2Model,
367
+ RBLNGPT2ModelConfig,
342
368
  RBLNIdefics3ForConditionalGeneration,
343
369
  RBLNIdefics3ForConditionalGenerationConfig,
344
370
  RBLNIdefics3VisionTransformer,
345
371
  RBLNIdefics3VisionTransformerConfig,
346
372
  RBLNLlamaForCausalLM,
347
373
  RBLNLlamaForCausalLMConfig,
374
+ RBLNLlamaModel,
375
+ RBLNLlamaModelConfig,
376
+ RBLNLlavaForConditionalGeneration,
377
+ RBLNLlavaForConditionalGenerationConfig,
348
378
  RBLNLlavaNextForConditionalGeneration,
349
379
  RBLNLlavaNextForConditionalGenerationConfig,
350
380
  RBLNMidmLMHeadModel,
351
381
  RBLNMidmLMHeadModelConfig,
352
382
  RBLNMistralForCausalLM,
353
383
  RBLNMistralForCausalLMConfig,
384
+ RBLNMistralModel,
385
+ RBLNMistralModelConfig,
354
386
  RBLNOPTForCausalLM,
355
387
  RBLNOPTForCausalLMConfig,
388
+ RBLNOPTModel,
389
+ RBLNOPTModelConfig,
390
+ RBLNPegasusForConditionalGeneration,
391
+ RBLNPegasusForConditionalGenerationConfig,
392
+ RBLNPegasusModel,
393
+ RBLNPegasusModelConfig,
356
394
  RBLNPhiForCausalLM,
357
395
  RBLNPhiForCausalLMConfig,
396
+ RBLNPhiModel,
397
+ RBLNPhiModelConfig,
398
+ RBLNPixtralVisionModel,
399
+ RBLNPixtralVisionModelConfig,
358
400
  RBLNQwen2_5_VisionTransformerPretrainedModel,
359
401
  RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
360
402
  RBLNQwen2_5_VLForConditionalGeneration,
361
403
  RBLNQwen2_5_VLForConditionalGenerationConfig,
362
404
  RBLNQwen2ForCausalLM,
363
405
  RBLNQwen2ForCausalLMConfig,
406
+ RBLNQwen2Model,
407
+ RBLNQwen2ModelConfig,
364
408
  RBLNQwen3ForCausalLM,
365
409
  RBLNQwen3ForCausalLMConfig,
366
410
  RBLNQwen3Model,
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.8.2a4'
21
- __version_tuple__ = version_tuple = (0, 8, 2, 'a4')
20
+ __version__ = version = '0.8.2a6'
21
+ __version_tuple__ = version_tuple = (0, 8, 2, 'a6')
@@ -23,6 +23,7 @@ import numpy as np
23
23
  import torch
24
24
 
25
25
  from .__version__ import __version__
26
+ from .utils.depreacate_utils import warn_deprecated_npu
26
27
  from .utils.logging import get_logger
27
28
  from .utils.runtime_utils import ContextRblnConfig
28
29
 
@@ -675,6 +676,9 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
675
676
  compile_cfg.npu = self.npu
676
677
  compile_cfg.tensor_parallel_size = self.tensor_parallel_size
677
678
 
679
+ target_npu = self.npu or next((cfg.npu for cfg in self._compile_cfgs if cfg.npu is not None), None)
680
+ warn_deprecated_npu(target_npu)
681
+
678
682
  def freeze(self):
679
683
  if self._frozen:
680
684
  raise RuntimeError(f"`{self.__class__.__name__}` is already frozen.")
@@ -22,3 +22,8 @@ def rbln_cache_update(cache: Tensor, state: Tensor, position: Tensor, axis: Tens
22
22
  # This operation is designed to perform in-place updates directly on the device without needing to transfer the cache back to the host.
23
23
  # The `position` parameter specifies the start index for the update along the specified axis, allowing flexible updates to any part of the cache tensor.
24
24
  return torch.empty_like(cache)
25
+
26
+
27
+ @rbln_cache_update.register_fake
28
+ def rbln_cache_update_fake(cache: Tensor, state: Tensor, position: Tensor, axis: Tensor) -> Tensor:
29
+ return torch.empty_like(cache)
@@ -23,3 +23,10 @@ def linear(input: Tensor, weight: Tensor, bias: Optional[Tensor] = None) -> Tens
23
23
  output_shape = list(input.shape[:-1])
24
24
  output_shape += [weight.shape[0]]
25
25
  return torch.empty(size=output_shape, dtype=input.dtype, device=input.device, requires_grad=input.requires_grad)
26
+
27
+
28
+ @linear.register_fake
29
+ def linear_fake(input: Tensor, weight: Tensor, bias: Optional[Tensor] = None) -> Tensor:
30
+ output_shape = list(input.shape[:-1])
31
+ output_shape += [weight.shape[0]]
32
+ return torch.empty(size=output_shape, dtype=input.dtype, device=input.device, requires_grad=input.requires_grad)
@@ -68,6 +68,8 @@ _import_structure = {
68
68
  "RBLNDPTForDepthEstimationConfig",
69
69
  "RBLNExaoneForCausalLM",
70
70
  "RBLNExaoneForCausalLMConfig",
71
+ "RBLNGemmaModel",
72
+ "RBLNGemmaModelConfig",
71
73
  "RBLNGemma3ForCausalLM",
72
74
  "RBLNGemma3ForCausalLMConfig",
73
75
  "RBLNGemma3ForConditionalGeneration",
@@ -76,26 +78,48 @@ _import_structure = {
76
78
  "RBLNGemmaForCausalLMConfig",
77
79
  "RBLNGPT2LMHeadModel",
78
80
  "RBLNGPT2LMHeadModelConfig",
81
+ "RBLNGPT2Model",
82
+ "RBLNGPT2ModelConfig",
79
83
  "RBLNIdefics3ForConditionalGeneration",
80
84
  "RBLNIdefics3ForConditionalGenerationConfig",
81
85
  "RBLNIdefics3VisionTransformer",
82
86
  "RBLNIdefics3VisionTransformerConfig",
83
87
  "RBLNLlamaForCausalLM",
84
88
  "RBLNLlamaForCausalLMConfig",
89
+ "RBLNLlavaForConditionalGeneration",
90
+ "RBLNLlavaForConditionalGenerationConfig",
91
+ "RBLNLlamaModel",
92
+ "RBLNLlamaModelConfig",
93
+ "RBLNOPTForCausalLM",
94
+ "RBLNOPTForCausalLMConfig",
95
+ "RBLNPegasusForConditionalGeneration",
96
+ "RBLNPegasusForConditionalGenerationConfig",
97
+ "RBLNPegasusModel",
98
+ "RBLNPegasusModelConfig",
85
99
  "RBLNLlavaNextForConditionalGeneration",
86
100
  "RBLNLlavaNextForConditionalGenerationConfig",
87
101
  "RBLNMidmLMHeadModel",
88
102
  "RBLNMidmLMHeadModelConfig",
89
103
  "RBLNMistralForCausalLM",
90
104
  "RBLNMistralForCausalLMConfig",
105
+ "RBLNMistralModel",
106
+ "RBLNMistralModelConfig",
91
107
  "RBLNOPTForCausalLM",
92
108
  "RBLNOPTForCausalLMConfig",
109
+ "RBLNOPTModel",
110
+ "RBLNOPTModelConfig",
93
111
  "RBLNPhiForCausalLM",
94
112
  "RBLNPhiForCausalLMConfig",
113
+ "RBLNPixtralVisionModelConfig",
114
+ "RBLNPixtralVisionModel",
115
+ "RBLNPhiModel",
116
+ "RBLNPhiModelConfig",
95
117
  "RBLNQwen2_5_VisionTransformerPretrainedModel",
96
118
  "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
97
119
  "RBLNQwen2_5_VLForConditionalGeneration",
98
120
  "RBLNQwen2_5_VLForConditionalGenerationConfig",
121
+ "RBLNQwen2Model",
122
+ "RBLNQwen2ModelConfig",
99
123
  "RBLNQwen2ForCausalLM",
100
124
  "RBLNQwen2ForCausalLMConfig",
101
125
  "RBLNQwen3ForCausalLM",
@@ -170,6 +194,8 @@ if TYPE_CHECKING:
170
194
  RBLNCLIPVisionModelConfig,
171
195
  RBLNCLIPVisionModelWithProjection,
172
196
  RBLNCLIPVisionModelWithProjectionConfig,
197
+ RBLNColPaliForRetrieval,
198
+ RBLNColPaliForRetrievalConfig,
173
199
  RBLNDecoderOnlyModelForCausalLM,
174
200
  RBLNDecoderOnlyModelForCausalLMConfig,
175
201
  RBLNDistilBertForQuestionAnswering,
@@ -184,30 +210,52 @@ if TYPE_CHECKING:
184
210
  RBLNGemma3ForConditionalGenerationConfig,
185
211
  RBLNGemmaForCausalLM,
186
212
  RBLNGemmaForCausalLMConfig,
213
+ RBLNGemmaModel,
214
+ RBLNGemmaModelConfig,
187
215
  RBLNGPT2LMHeadModel,
188
216
  RBLNGPT2LMHeadModelConfig,
217
+ RBLNGPT2Model,
218
+ RBLNGPT2ModelConfig,
189
219
  RBLNIdefics3ForConditionalGeneration,
190
220
  RBLNIdefics3ForConditionalGenerationConfig,
191
221
  RBLNIdefics3VisionTransformer,
192
222
  RBLNIdefics3VisionTransformerConfig,
193
223
  RBLNLlamaForCausalLM,
194
224
  RBLNLlamaForCausalLMConfig,
225
+ RBLNLlamaModel,
226
+ RBLNLlamaModelConfig,
227
+ RBLNLlavaForConditionalGeneration,
228
+ RBLNLlavaForConditionalGenerationConfig,
195
229
  RBLNLlavaNextForConditionalGeneration,
196
230
  RBLNLlavaNextForConditionalGenerationConfig,
197
231
  RBLNMidmLMHeadModel,
198
232
  RBLNMidmLMHeadModelConfig,
199
233
  RBLNMistralForCausalLM,
200
234
  RBLNMistralForCausalLMConfig,
235
+ RBLNMistralModel,
236
+ RBLNMistralModelConfig,
201
237
  RBLNOPTForCausalLM,
202
238
  RBLNOPTForCausalLMConfig,
239
+ RBLNOPTModel,
240
+ RBLNOPTModelConfig,
241
+ RBLNPegasusForConditionalGeneration,
242
+ RBLNPegasusForConditionalGenerationConfig,
243
+ RBLNPegasusModel,
244
+ RBLNPegasusModelConfig,
203
245
  RBLNPhiForCausalLM,
204
246
  RBLNPhiForCausalLMConfig,
247
+ RBLNPhiModel,
248
+ RBLNPhiModelConfig,
249
+ RBLNPixtralVisionModel,
250
+ RBLNPixtralVisionModelConfig,
205
251
  RBLNQwen2_5_VisionTransformerPretrainedModel,
206
252
  RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
207
253
  RBLNQwen2_5_VLForConditionalGeneration,
208
254
  RBLNQwen2_5_VLForConditionalGenerationConfig,
209
255
  RBLNQwen2ForCausalLM,
210
256
  RBLNQwen2ForCausalLMConfig,
257
+ RBLNQwen2Model,
258
+ RBLNQwen2ModelConfig,
211
259
  RBLNQwen3ForCausalLM,
212
260
  RBLNQwen3ForCausalLMConfig,
213
261
  RBLNQwen3Model,
@@ -0,0 +1,252 @@
1
+ import math
2
+ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
3
+
4
+ from optimum.rbln.transformers.models.decoderonly.configuration_decoderonly import (
5
+ RBLNDecoderOnlyModelForCausalLMConfig,
6
+ )
7
+
8
+ from ..utils.logging import get_logger
9
+
10
+
11
+ logger = get_logger()
12
+
13
+ if TYPE_CHECKING:
14
+ from rebel import RBLNCompiledModel
15
+ from transformers import PretrainedConfig
16
+
17
+
18
+ DEFAULT_FLASH_ATTN_PARTITION_LENGTH = 16_384
19
+ DEFAULT_MAX_EAGER_ATTN_SEQUENCE_LENGTH = 32_768
20
+ MIN_FLASH_ATTN_MAX_SEQ_LEN = 8_192
21
+ MIN_FLASH_ATTN_PARTITION_LENGTH = 4_096
22
+ MAX_FLASH_ATTN_PARTITION_LENGTH = 32_768
23
+ MAX_SLIDING_WINDOW_SIZE = 32_768
24
+
25
+
26
+ def set_default_values(
27
+ attn_impl: Optional[str] = None,
28
+ kvcache_partition_len: Optional[int] = None,
29
+ kvcache_block_size: Optional[int] = None,
30
+ max_seq_len: Optional[int] = None,
31
+ ) -> Tuple[str, int, int]:
32
+ if attn_impl is None:
33
+ attn_impl = "eager"
34
+
35
+ if kvcache_partition_len is not None:
36
+ if attn_impl == "eager":
37
+ attn_impl = "flash_attn"
38
+ logger.warning(
39
+ "A non-null `kvcache_partition_len` was provided, but `attn_impl` was not explicitly set or "
40
+ "set to 'eager'. Since KV cache partitioning is only supported with flash attention, "
41
+ "`attn_impl` has been automatically switched to 'flash_attn'."
42
+ )
43
+
44
+ if kvcache_partition_len is None and attn_impl == "flash_attn":
45
+ kvcache_partition_len = DEFAULT_FLASH_ATTN_PARTITION_LENGTH
46
+
47
+ if kvcache_block_size is None:
48
+ if attn_impl == "eager":
49
+ kvcache_block_size = max_seq_len
50
+ else:
51
+ kvcache_block_size = kvcache_partition_len
52
+
53
+ return attn_impl, kvcache_partition_len, kvcache_block_size
54
+
55
+
56
+ def validate_attention_method(attn_impl: str, kvcache_partition_len: int, kvcache_block_size: int, max_seq_len: int):
57
+ if attn_impl not in ["eager", "flash_attn"]:
58
+ raise ValueError(f"Unknown `attn_impl` : {attn_impl}. (Available : 'eager', 'flash_attn`)")
59
+
60
+ ## Checking Constraints...
61
+ # Constraint of eager attention:
62
+ # - `max_seq_len` <= 32k
63
+
64
+ # Constraints of flash attention:
65
+ # 1. `max_seq_len` should be multiple of `partition_len`.
66
+ # 2. 4k <= `partition_len` <= 32k.
67
+ # 3. `max_seq_len` should be larger then 8k.
68
+ if attn_impl == "eager" and max_seq_len > DEFAULT_MAX_EAGER_ATTN_SEQUENCE_LENGTH:
69
+ raise ValueError(
70
+ f"`max_seq_len` is set to {max_seq_len}, "
71
+ f"which exceeds the limit of {DEFAULT_MAX_EAGER_ATTN_SEQUENCE_LENGTH} for 'eager' attention. "
72
+ f"Please reduce the `max_seq_len` to {DEFAULT_MAX_EAGER_ATTN_SEQUENCE_LENGTH} or lower,"
73
+ " or consider switching `attn_impl` to 'flash_attn' for larger sequence lengths."
74
+ )
75
+
76
+ if attn_impl == "flash_attn":
77
+ if max_seq_len // kvcache_partition_len < 2 or max_seq_len % kvcache_partition_len != 0:
78
+ raise ValueError(
79
+ f"`max_seq_len` ({max_seq_len}) must be a multiple of `kvcache_partition_len` ({kvcache_partition_len}) "
80
+ f"when using 'flash_attn'. Please adjust either value to meet this requirement."
81
+ )
82
+ elif not (MIN_FLASH_ATTN_PARTITION_LENGTH <= kvcache_partition_len <= MAX_FLASH_ATTN_PARTITION_LENGTH):
83
+ raise ValueError(
84
+ f"`kvcache_partition_len` ({kvcache_partition_len}) is out of the supported range for 'flash_attn' "
85
+ f"({MIN_FLASH_ATTN_PARTITION_LENGTH} <= `kvcache_partition_len` <= {MAX_FLASH_ATTN_PARTITION_LENGTH}). "
86
+ f"Please provide a valid value within this range."
87
+ )
88
+ elif max_seq_len < MIN_FLASH_ATTN_MAX_SEQ_LEN:
89
+ raise ValueError(
90
+ f"`max_seq_len` ({max_seq_len}) is too small for 'flash_attn'. The minimum "
91
+ f"supported value is {MIN_FLASH_ATTN_MAX_SEQ_LEN}. Please increase `max_seq_len` to meet "
92
+ "this requirement, or consider switching `attn_impl` to 'eager' for shorter lengths."
93
+ )
94
+
95
+ if kvcache_block_size is not None:
96
+ if attn_impl == "flash_attn" and kvcache_partition_len != kvcache_block_size:
97
+ raise ValueError(
98
+ f" When using 'flash attention', the `kvcache_block_size` ({kvcache_block_size}) "
99
+ f"must always be set equal to the `kvcache_partition_len` {kvcache_partition_len}."
100
+ )
101
+ elif attn_impl == "eager" and kvcache_block_size != max_seq_len:
102
+ raise ValueError(
103
+ f" When using 'eager attention', the `kvcache_block_size` ({kvcache_block_size}) "
104
+ f"must always be set equal to the `max_seq_len` {max_seq_len}."
105
+ )
106
+
107
+
108
+ def validate_sliding_window(rbln_config: RBLNDecoderOnlyModelForCausalLMConfig):
109
+ if rbln_config.sliding_window > MAX_SLIDING_WINDOW_SIZE - rbln_config.prefill_chunk_size:
110
+ raise ValueError(
111
+ f"Sliding window size ({rbln_config.sliding_window}) must be less than 32768 - prefill_chunk_size ({32768 - rbln_config.prefill_chunk_size})"
112
+ )
113
+
114
+ if rbln_config.cache_impl == "sliding_window" and rbln_config.use_attention_mask:
115
+ raise ValueError("`use_attention_mask` must be set to False when `cache_impl` is set to 'sliding_window'.")
116
+
117
+
118
+ class RBLNDecoderOnlyFlashAttentionMixin:
119
+ @classmethod
120
+ def get_maximum_num_blocks(
121
+ cls,
122
+ config: "PretrainedConfig",
123
+ tensor_parallel_size: int,
124
+ kvcache_block_size: int,
125
+ nbits_per_param: Optional[int] = None,
126
+ n_model_params: Optional[int] = None,
127
+ kernel_size: Optional[int] = None,
128
+ buffer: Optional[int] = None,
129
+ num_runtimes: int = 2,
130
+ ) -> int:
131
+ # We are finding max_n_blocks(x) that satisfies the following equation:
132
+
133
+ # available_dram - kernel_size - buffer
134
+ # - num_layers * 2 * tensor_parallel_size
135
+ # * align_2MB(
136
+ # x
137
+ # * block_size
138
+ # * align_64(head_dim)
139
+ # * math.ceil(num_key_value_heads / tensor_parallel_size)
140
+ # * 2
141
+ # ) > 0
142
+
143
+ # This inequality can be rewritten as follows:
144
+
145
+ # a - c * align_2MB(b * x) > 0
146
+ # where
147
+ # a = available_dram - kernel_size - buffer
148
+ # b = block_size * align_64(head_dim) * math.ceil(num_key_value_heads / tensor_parallel_size) * 2
149
+ # c = num_layers * 2 * tensor_parallel_size
150
+
151
+ # We can rewrite the inequality as follows:
152
+ # k > align_2MB(b*x)
153
+ # where
154
+ # k = a / c
155
+
156
+ # After that, we can derive the following equation:
157
+ # x = floor(2**21 / b * floor((k - 1) / 2**21))
158
+
159
+ def align(x: int, nbytes: int) -> int:
160
+ return int(math.ceil(x / nbytes) * nbytes)
161
+
162
+ def align_2MB(x: int) -> int:
163
+ return align(x, 2**21)
164
+
165
+ num_attention_heads = getattr(config, "n_head", None) or getattr(config, "num_attention_heads")
166
+ num_layers = getattr(config, "n_layer", None) or getattr(config, "num_hidden_layers")
167
+ head_dim = getattr(config, "head_dim", None) or config.hidden_size // num_attention_heads
168
+ vocab_size = config.vocab_size
169
+ hidden_size = getattr(config, "n_embd", None) or getattr(config, "hidden_size")
170
+ num_key_value_heads = getattr(config, "num_key_value_heads", None) or num_attention_heads
171
+
172
+ # TODO(jongho): Update if target npu is REBEL.
173
+ ATOM_DRAM_NBYTES = 16 * 2**30
174
+ ATOM_SYS_DRAM_NBYTES = 288 * 2**20
175
+ available_dram = tensor_parallel_size * (ATOM_DRAM_NBYTES - ATOM_SYS_DRAM_NBYTES)
176
+
177
+ if kernel_size is None:
178
+ if n_model_params is None:
179
+ raise ValueError("`n_model_params` should be specified to estimate the kernel memory.")
180
+ # Get estimated kernel size (approximated)
181
+ lm_heads_params = align(vocab_size, 64) * hidden_size
182
+ lm_heads_nbytes = (
183
+ align_2MB(lm_heads_params * nbits_per_param // 8 / tensor_parallel_size) * tensor_parallel_size
184
+ )
185
+ params = n_model_params - lm_heads_params
186
+ layer_nbytes = (
187
+ align_2MB(params * nbits_per_param // 8 / num_layers / tensor_parallel_size)
188
+ * num_layers
189
+ * tensor_parallel_size
190
+ )
191
+ kernel_size = layer_nbytes + lm_heads_nbytes
192
+ elif n_model_params is not None:
193
+ raise ValueError("Both `n_model_params` and `kernel_size` cannot be specified.")
194
+
195
+ available_dram -= kernel_size
196
+
197
+ if buffer is None:
198
+ # TODO: Accurate buffer estimation
199
+ buffer_per_runtime_per_core = 2**28 # 256MB per runtime
200
+ buffer_per_core = buffer_per_runtime_per_core * num_runtimes # 1 for prefill, 1 for decoder
201
+ buffer = buffer_per_core * tensor_parallel_size
202
+ available_dram -= buffer
203
+
204
+ b = kvcache_block_size * align(head_dim, 64) * math.ceil(num_key_value_heads / tensor_parallel_size) * 2
205
+ c = num_layers * 2 * tensor_parallel_size
206
+ k = available_dram / c
207
+ max_n_blocks = math.floor(2**21 / b * math.floor((k - 1) / 2**21))
208
+
209
+ return max_n_blocks
210
+
211
+ @classmethod
212
+ def maybe_suggest_kvcache_num_blocks(
213
+ cls,
214
+ compiled_models: Dict[str, "RBLNCompiledModel"],
215
+ model_config: "PretrainedConfig",
216
+ rbln_config: RBLNDecoderOnlyModelForCausalLMConfig,
217
+ ) -> None:
218
+ # Get the actual memory allocation of each node by key
219
+ alloc_memory_per_node_by_key: Dict[str, List[int]] = compiled_models["prefill"].get_alloc_per_node_by_key()
220
+ alloc_memory_by_key: Dict[str, int] = {
221
+ key: sum(memory_per_node) for key, memory_per_node in alloc_memory_per_node_by_key.items()
222
+ }
223
+ for batch_size in rbln_config.decoder_batch_sizes:
224
+ for key, memory_per_node in (
225
+ compiled_models[f"decoder_batch_{batch_size}"].get_alloc_per_node_by_key().items()
226
+ ):
227
+ alloc_memory_by_key[key] += sum(memory_per_node)
228
+ alloc_memory_by_key.pop("PortRecur", None) # Old compiler's kv-cache Key
229
+ alloc_memory_by_key.pop("DramTensor", None) # kv-cache
230
+ kernel_size = alloc_memory_by_key.pop("Kernel") # model weight
231
+
232
+ # Get the maximum number of blocks that can be allocated
233
+ buffer = sum(alloc_memory_by_key.values())
234
+ max_num_blocks = cls.get_maximum_num_blocks(
235
+ config=model_config,
236
+ tensor_parallel_size=rbln_config.tensor_parallel_size,
237
+ kvcache_block_size=rbln_config.kvcache_block_size,
238
+ kernel_size=kernel_size,
239
+ buffer=buffer,
240
+ )
241
+
242
+ # Since our estimation logic is not always accurate,
243
+ # users can set `kvcache_num_blocks` to `max_num_blocks`.
244
+ # If the memory is not enough, the model will fail to compile.
245
+ if rbln_config.kvcache_num_blocks < max_num_blocks:
246
+ logger.warning(
247
+ f"Current `kvcache_num_blocks` setting is {rbln_config.kvcache_num_blocks}. "
248
+ "Our analysis indicates that additional memory is available for more blocks. "
249
+ f"Consider increasing `kvcache_num_blocks` to {max_num_blocks} for potentially improved performance. "
250
+ "Please be advised that our memory estimation algorithm has limitations, "
251
+ "and increasing this value may not guarantee successful model compilation."
252
+ )
@@ -92,27 +92,40 @@ _import_structure = {
92
92
  "RBLNDPTForDepthEstimationConfig",
93
93
  ],
94
94
  "exaone": ["RBLNExaoneForCausalLM", "RBLNExaoneForCausalLMConfig"],
95
- "gemma": ["RBLNGemmaForCausalLM", "RBLNGemmaForCausalLMConfig"],
95
+ "gemma": ["RBLNGemmaForCausalLM", "RBLNGemmaForCausalLMConfig", "RBLNGemmaModel", "RBLNGemmaModelConfig"],
96
96
  "gemma3": [
97
97
  "RBLNGemma3ForCausalLM",
98
98
  "RBLNGemma3ForCausalLMConfig",
99
99
  "RBLNGemma3ForConditionalGeneration",
100
100
  "RBLNGemma3ForConditionalGenerationConfig",
101
101
  ],
102
- "gpt2": ["RBLNGPT2LMHeadModel", "RBLNGPT2LMHeadModelConfig"],
102
+ "gpt2": ["RBLNGPT2LMHeadModel", "RBLNGPT2LMHeadModelConfig", "RBLNGPT2Model", "RBLNGPT2ModelConfig"],
103
103
  "idefics3": [
104
104
  "RBLNIdefics3VisionTransformer",
105
105
  "RBLNIdefics3ForConditionalGeneration",
106
106
  "RBLNIdefics3ForConditionalGenerationConfig",
107
107
  "RBLNIdefics3VisionTransformerConfig",
108
108
  ],
109
- "llama": ["RBLNLlamaForCausalLM", "RBLNLlamaForCausalLMConfig"],
110
- "opt": ["RBLNOPTForCausalLM", "RBLNOPTForCausalLMConfig"],
109
+ "llava": ["RBLNLlavaForConditionalGeneration", "RBLNLlavaForConditionalGenerationConfig"],
110
+ "llama": ["RBLNLlamaForCausalLM", "RBLNLlamaForCausalLMConfig", "RBLNLlamaModel", "RBLNLlamaModelConfig"],
111
+ "opt": ["RBLNOPTForCausalLM", "RBLNOPTForCausalLMConfig", "RBLNOPTModel", "RBLNOPTModelConfig"],
112
+ "pegasus": [
113
+ "RBLNPegasusForConditionalGeneration",
114
+ "RBLNPegasusModel",
115
+ "RBLNPegasusForConditionalGenerationConfig",
116
+ "RBLNPegasusModelConfig",
117
+ ],
111
118
  "llava_next": ["RBLNLlavaNextForConditionalGeneration", "RBLNLlavaNextForConditionalGenerationConfig"],
112
119
  "midm": ["RBLNMidmLMHeadModel", "RBLNMidmLMHeadModelConfig"],
113
- "mistral": ["RBLNMistralForCausalLM", "RBLNMistralForCausalLMConfig"],
114
- "phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig"],
115
- "qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig"],
120
+ "pixtral": ["RBLNPixtralVisionModel", "RBLNPixtralVisionModelConfig"],
121
+ "mistral": [
122
+ "RBLNMistralForCausalLM",
123
+ "RBLNMistralForCausalLMConfig",
124
+ "RBLNMistralModel",
125
+ "RBLNMistralModelConfig",
126
+ ],
127
+ "phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig", "RBLNPhiModel", "RBLNPhiModelConfig"],
128
+ "qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig", "RBLNQwen2Model", "RBLNQwen2ModelConfig"],
116
129
  "qwen3": ["RBLNQwen3ForCausalLM", "RBLNQwen3ForCausalLMConfig", "RBLNQwen3Model", "RBLNQwen3ModelConfig"],
117
130
  "resnet": ["RBLNResNetForImageClassification", "RBLNResNetForImageClassificationConfig"],
118
131
  "roberta": [
@@ -215,27 +228,35 @@ if TYPE_CHECKING:
215
228
  RBLNDPTForDepthEstimationConfig,
216
229
  )
217
230
  from .exaone import RBLNExaoneForCausalLM, RBLNExaoneForCausalLMConfig
218
- from .gemma import RBLNGemmaForCausalLM, RBLNGemmaForCausalLMConfig
231
+ from .gemma import RBLNGemmaForCausalLM, RBLNGemmaForCausalLMConfig, RBLNGemmaModel, RBLNGemmaModelConfig
219
232
  from .gemma3 import (
220
233
  RBLNGemma3ForCausalLM,
221
234
  RBLNGemma3ForCausalLMConfig,
222
235
  RBLNGemma3ForConditionalGeneration,
223
236
  RBLNGemma3ForConditionalGenerationConfig,
224
237
  )
225
- from .gpt2 import RBLNGPT2LMHeadModel, RBLNGPT2LMHeadModelConfig
238
+ from .gpt2 import RBLNGPT2LMHeadModel, RBLNGPT2LMHeadModelConfig, RBLNGPT2Model, RBLNGPT2ModelConfig
226
239
  from .idefics3 import (
227
240
  RBLNIdefics3ForConditionalGeneration,
228
241
  RBLNIdefics3ForConditionalGenerationConfig,
229
242
  RBLNIdefics3VisionTransformer,
230
243
  RBLNIdefics3VisionTransformerConfig,
231
244
  )
232
- from .llama import RBLNLlamaForCausalLM, RBLNLlamaForCausalLMConfig
245
+ from .llama import RBLNLlamaForCausalLM, RBLNLlamaForCausalLMConfig, RBLNLlamaModel, RBLNLlamaModelConfig
246
+ from .llava import RBLNLlavaForConditionalGeneration, RBLNLlavaForConditionalGenerationConfig
233
247
  from .llava_next import RBLNLlavaNextForConditionalGeneration, RBLNLlavaNextForConditionalGenerationConfig
234
248
  from .midm import RBLNMidmLMHeadModel, RBLNMidmLMHeadModelConfig
235
- from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig
236
- from .opt import RBLNOPTForCausalLM, RBLNOPTForCausalLMConfig
237
- from .phi import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig
238
- from .qwen2 import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig
249
+ from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig, RBLNMistralModel, RBLNMistralModelConfig
250
+ from .opt import RBLNOPTForCausalLM, RBLNOPTForCausalLMConfig, RBLNOPTModel, RBLNOPTModelConfig
251
+ from .pegasus import (
252
+ RBLNPegasusForConditionalGeneration,
253
+ RBLNPegasusForConditionalGenerationConfig,
254
+ RBLNPegasusModel,
255
+ RBLNPegasusModelConfig,
256
+ )
257
+ from .phi import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig, RBLNPhiModel, RBLNPhiModelConfig
258
+ from .pixtral import RBLNPixtralVisionModel, RBLNPixtralVisionModelConfig
259
+ from .qwen2 import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig, RBLNQwen2Model, RBLNQwen2ModelConfig
239
260
  from .qwen2_5_vl import (
240
261
  RBLNQwen2_5_VisionTransformerPretrainedModel,
241
262
  RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
@@ -22,5 +22,5 @@ from ....ops import (
22
22
  paged_flash_causal_attn_decode,
23
23
  paged_flash_causal_attn_prefill,
24
24
  )
25
- from .configuration_decoderonly import RBLNDecoderOnlyModelForCausalLMConfig
26
- from .modeling_decoderonly import RBLNDecoderOnlyModelForCausalLM
25
+ from .configuration_decoderonly import RBLNDecoderOnlyModelConfig, RBLNDecoderOnlyModelForCausalLMConfig
26
+ from .modeling_decoderonly import RBLNDecoderOnlyModel, RBLNDecoderOnlyModelForCausalLM