optimum-rbln 0.7.3.post2__tar.gz → 0.7.4a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. optimum_rbln-0.7.4a1/.github/version.yaml +1 -0
  2. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/PKG-INFO +5 -5
  3. optimum_rbln-0.7.4a1/examples/time-series-forecasting/run_time_series_forecasting.py +43 -0
  4. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/pyproject.toml +6 -4
  5. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/__init__.py +2 -0
  6. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/__version__.py +2 -2
  7. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/ops/__init__.py +2 -1
  8. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/ops/attn.py +9 -7
  9. optimum_rbln-0.7.4a1/src/optimum/rbln/ops/linear.py +25 -0
  10. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/__init__.py +2 -0
  11. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/__init__.py +2 -0
  12. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/bart/modeling_bart.py +4 -3
  13. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +20 -17
  14. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +14 -14
  15. optimum_rbln-0.7.4a1/src/optimum/rbln/transformers/models/t5/modeling_t5.py +210 -0
  16. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/t5/t5_architecture.py +9 -3
  17. optimum_rbln-0.7.4a1/src/optimum/rbln/transformers/models/time_series_transformers/__init__.py +24 -0
  18. optimum_rbln-0.7.4a1/src/optimum/rbln/transformers/models/time_series_transformers/modeling_time_series_transformers.py +422 -0
  19. optimum_rbln-0.7.4a1/src/optimum/rbln/transformers/models/time_series_transformers/time_series_transformers_architecture.py +341 -0
  20. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py +98 -47
  21. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/whisper/whisper_architecture.py +71 -26
  22. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/tests/test_transformers.py +75 -17
  23. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/uv.lock +385 -133
  24. optimum_rbln-0.7.3.post2/.github/version.yaml +0 -1
  25. optimum_rbln-0.7.3.post2/src/optimum/rbln/transformers/models/t5/modeling_t5.py +0 -417
  26. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  27. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  28. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  29. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/ISSUE_TEMPLATE/model_request.md +0 -0
  30. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/pull_request_template.md +0 -0
  31. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/scripts/auto_code_review.py +0 -0
  32. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/scripts/validate_pr_checklist.py +0 -0
  33. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/workflows/auto_code_review.yml +0 -0
  34. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/workflows/check_code_quality.yml +0 -0
  35. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/workflows/deploy-on-tag.yaml +0 -0
  36. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/workflows/deploy.yaml +0 -0
  37. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/workflows/pr-title-check.yaml +0 -0
  38. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/workflows/pr_checklist_validator.yml +0 -0
  39. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/workflows/rbln_check_compiler.yaml +0 -0
  40. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/workflows/rbln_dispatch_pytest.yaml +0 -0
  41. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/workflows/rbln_optimum_inference_test.yaml +0 -0
  42. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/workflows/rbln_optimum_pytest.yaml +0 -0
  43. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/workflows/rbln_scheduled_test.yaml +0 -0
  44. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.github/workflows/rbln_trigger_on_pr.yaml +0 -0
  45. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/.gitignore +0 -0
  46. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/CODE_OF_CONDUCT.md +0 -0
  47. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/CONTRIBUTING.md +0 -0
  48. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/LICENSE +0 -0
  49. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/README.md +0 -0
  50. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/assets/rbln_logo.png +0 -0
  51. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/audio-classification/run_ast_audio_classification.py +0 -0
  52. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/depth-estimation/run_dpt.py +0 -0
  53. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/image-classification/run_image_classification.py +0 -0
  54. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/image-classification/run_vit_image_classification.py +0 -0
  55. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/image-to-text/run_llava_next_image_to_text.py +0 -0
  56. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/kandinsky2_2/run_kandinsky2_2.py +0 -0
  57. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/kandinsky2_2/run_kandinsky2_2_combined.py +0 -0
  58. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/kandinsky2_2/run_kandinsky2_2_img2img.py +0 -0
  59. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/kandinsky2_2/run_kandinsky2_2_img2img_combined.py +0 -0
  60. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/kandinsky2_2/run_kandinsky2_2_inpaint.py +0 -0
  61. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/kandinsky2_2/run_kandinsky2_2_inpaint_combined.py +0 -0
  62. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/kandinsky2_2/run_kandinsky2_2_prior_interpolate.py +0 -0
  63. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/question-answering/run_question_answering.py +0 -0
  64. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/speech-recognition/run_wav2vec2.py +0 -0
  65. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/speech-recognition/run_whisper.py +0 -0
  66. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/stable-diffusion/run_stable_diffusion.py +0 -0
  67. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/stable-diffusion/run_stable_diffusion_controlnet.py +0 -0
  68. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/stable-diffusion/run_stable_diffusion_img2img.py +0 -0
  69. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/stable-diffusion/run_stable_diffusion_img2img_controlnet.py +0 -0
  70. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/stable-diffusion/run_stable_diffusion_inpaint.py +0 -0
  71. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/stable-diffusion/run_stable_diffusion_lora.py +0 -0
  72. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/stable-diffusion/run_stable_diffusion_multicontrolnet.py +0 -0
  73. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/text-classification/run_bge_m3_text_classification.py +0 -0
  74. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/text-classification/run_bge_reranker_v2_m3_text_classification.py +0 -0
  75. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/text-classification/run_secureBERT.py +0 -0
  76. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/text-classification/run_t5_classification.py +0 -0
  77. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/text-classification/run_twitter_roberta_text_classification.py +0 -0
  78. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/text2text-generation/run_bart_text2text_generation.py +0 -0
  79. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/text2text-generation/run_llama_peft.py +0 -0
  80. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/examples/text2text-generation/run_llama_text2text_generation.py +0 -0
  81. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/scripts/uv-lock.sh +0 -0
  82. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/scripts/uv-sync.sh +0 -0
  83. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/__init__.py +0 -0
  84. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/modeling_diffusers.py +0 -0
  85. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/models/__init__.py +0 -0
  86. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/models/autoencoders/__init__.py +0 -0
  87. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +0 -0
  88. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/models/autoencoders/vae.py +0 -0
  89. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/models/autoencoders/vq_model.py +0 -0
  90. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/models/controlnet.py +0 -0
  91. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/models/transformers/__init__.py +0 -0
  92. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/models/transformers/prior_transformer.py +0 -0
  93. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py +0 -0
  94. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/models/unets/__init__.py +0 -0
  95. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py +0 -0
  96. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/__init__.py +0 -0
  97. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/controlnet/__init__.py +0 -0
  98. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +0 -0
  99. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +0 -0
  100. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +0 -0
  101. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +0 -0
  102. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +0 -0
  103. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +0 -0
  104. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +0 -0
  105. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +0 -0
  106. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +0 -0
  107. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +0 -0
  108. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +0 -0
  109. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +0 -0
  110. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +0 -0
  111. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +0 -0
  112. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +0 -0
  113. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +0 -0
  114. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +0 -0
  115. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +0 -0
  116. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +0 -0
  117. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +0 -0
  118. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +0 -0
  119. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +0 -0
  120. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +0 -0
  121. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/modeling.py +0 -0
  122. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/modeling_base.py +0 -0
  123. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/modeling_config.py +0 -0
  124. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/ops/flash_attn.py +0 -0
  125. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/ops/kv_cache_update.py +0 -0
  126. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/modeling_alias.py +0 -0
  127. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/modeling_generic.py +0 -0
  128. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/modeling_rope_utils.py +0 -0
  129. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/auto/__init__.py +0 -0
  130. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/auto/auto_factory.py +0 -0
  131. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/auto/modeling_auto.py +0 -0
  132. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/bart/__init__.py +0 -0
  133. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/bart/bart_architecture.py +0 -0
  134. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/bert/__init__.py +0 -0
  135. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/bert/modeling_bert.py +0 -0
  136. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/clip/__init__.py +0 -0
  137. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/clip/modeling_clip.py +0 -0
  138. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/decoderonly/__init__.py +0 -0
  139. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +0 -0
  140. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/dpt/__init__.py +0 -0
  141. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/dpt/modeling_dpt.py +0 -0
  142. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/exaone/__init__.py +0 -0
  143. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -0
  144. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/exaone/modeling_exaone.py +0 -0
  145. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/gemma/__init__.py +0 -0
  146. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/gemma/gemma_architecture.py +0 -0
  147. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/gemma/modeling_gemma.py +0 -0
  148. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/gpt2/__init__.py +0 -0
  149. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +0 -0
  150. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +0 -0
  151. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/llama/__init__.py +0 -0
  152. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/llama/llama_architecture.py +0 -0
  153. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/llama/modeling_llama.py +0 -0
  154. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/llava_next/__init__.py +0 -0
  155. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +0 -0
  156. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/midm/__init__.py +0 -0
  157. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/midm/midm_architecture.py +0 -0
  158. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/midm/modeling_midm.py +0 -0
  159. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/mistral/__init__.py +0 -0
  160. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/mistral/mistral_architecture.py +0 -0
  161. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/mistral/modeling_mistral.py +0 -0
  162. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/phi/__init__.py +0 -0
  163. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/phi/modeling_phi.py +0 -0
  164. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/phi/phi_architecture.py +0 -0
  165. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/qwen2/__init__.py +0 -0
  166. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +0 -0
  167. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +0 -0
  168. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/seq2seq/__init__.py +0 -0
  169. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +0 -0
  170. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/t5/__init__.py +0 -0
  171. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/wav2vec2/__init__.py +0 -0
  172. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +0 -0
  173. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/whisper/__init__.py +0 -0
  174. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/whisper/generation_whisper.py +0 -0
  175. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/xlm_roberta/__init__.py +0 -0
  176. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +0 -0
  177. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/utils/__init__.py +0 -0
  178. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/transformers/utils/rbln_quantization.py +0 -0
  179. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/utils/__init__.py +0 -0
  180. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/utils/decorator_utils.py +0 -0
  181. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/utils/hub.py +0 -0
  182. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/utils/import_utils.py +0 -0
  183. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/utils/logging.py +0 -0
  184. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/utils/model_utils.py +0 -0
  185. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/utils/runtime_utils.py +0 -0
  186. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/utils/save_utils.py +0 -0
  187. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/src/optimum/rbln/utils/submodule.py +0 -0
  188. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/tests/__init__.py +0 -0
  189. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/tests/psnr.py +0 -0
  190. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/tests/requirements_sdxl.txt +0 -0
  191. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/tests/run_stable_diffusion_xl_base.py +0 -0
  192. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/tests/test_base.py +0 -0
  193. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/tests/test_diffusers.py +0 -0
  194. {optimum_rbln-0.7.3.post2 → optimum_rbln-0.7.4a1}/tests/test_llm.py +0 -0
@@ -0,0 +1 @@
1
+ rebel_compiler_version: 0.7.4.dev61+gb562a7f0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.7.3.post2
3
+ Version: 0.7.4a1
4
4
  Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -25,10 +25,10 @@ Requires-Python: <3.13,>=3.9
25
25
  Requires-Dist: accelerate>=1.0.1
26
26
  Requires-Dist: diffusers<=0.31.0
27
27
  Requires-Dist: packaging>=24.1
28
- Requires-Dist: torch<=2.5.1
29
- Requires-Dist: torchaudio<=2.5.1
30
- Requires-Dist: torchvision<=0.20.1
31
- Requires-Dist: transformers==4.48.3
28
+ Requires-Dist: torch==2.6.0
29
+ Requires-Dist: torchaudio<=2.6.0
30
+ Requires-Dist: torchvision<=0.21.0
31
+ Requires-Dist: transformers==4.50.3
32
32
  Description-Content-Type: text/markdown
33
33
 
34
34
 
@@ -0,0 +1,43 @@
1
+ import os
2
+
3
+ import fire
4
+ import torch
5
+ from huggingface_hub import hf_hub_download
6
+
7
+ from optimum.rbln import RBLNTimeSeriesTransformerForPrediction
8
+
9
+
10
+ def main(
11
+ model_id: str = "huggingface/time-series-transformer-tourism-monthly",
12
+ batch_size: int = 1,
13
+ num_parallel_samples: int = 100,
14
+ from_transformers: bool = False,
15
+ ):
16
+ if from_transformers:
17
+ model = RBLNTimeSeriesTransformerForPrediction.from_pretrained(
18
+ model_id, export=True, rbln_batch_size=batch_size, num_parallel_samples=num_parallel_samples
19
+ )
20
+ model.save_pretrained(os.path.basename(model_id))
21
+ else:
22
+ model = RBLNTimeSeriesTransformerForPrediction.from_pretrained(
23
+ os.path.basename(model_id),
24
+ export=False,
25
+ )
26
+
27
+ dataset = hf_hub_download(
28
+ repo_id="hf-internal-testing/tourism-monthly-batch", filename="val-batch.pt", repo_type="dataset"
29
+ )
30
+ data = torch.load(dataset, weights_only=True)
31
+
32
+ batched_data = {}
33
+ for k, v in data.items():
34
+ batched_data[k] = v[:batch_size]
35
+
36
+ rbln_outputs = model.generate(**batched_data)
37
+ mean_prediction = rbln_outputs.sequences.mean(dim=1)
38
+
39
+ print(mean_prediction)
40
+
41
+
42
+ if __name__ == "__main__":
43
+ fire.Fire(main)
@@ -28,11 +28,11 @@ classifiers = [
28
28
  ]
29
29
  keywords = ["transformers", "diffusers", "inference", "rbln", "atom", "rebel"]
30
30
  dependencies = [
31
- "torch<=2.5.1",
32
- "torchaudio<=2.5.1",
33
- "torchvision<=0.20.1",
31
+ "torch==2.6.0",
32
+ "torchaudio<=2.6.0",
33
+ "torchvision<=0.21.0",
34
34
  "accelerate>=1.0.1",
35
- "transformers==4.48.3",
35
+ "transformers==4.50.3",
36
36
  "diffusers<=0.31.0",
37
37
  "packaging>=24.1",
38
38
  ]
@@ -53,6 +53,8 @@ tests = [
53
53
  "sacremoses>=0.1.1",
54
54
  "safetensors>=0.4.2",
55
55
  "protobuf>=5.27.2",
56
+ "soundfile>=0.13.1",
57
+ "librosa>=0.11.0",
56
58
  ]
57
59
  quality = [
58
60
  "ruff>=0.3.3",
@@ -73,6 +73,7 @@ _import_structure = {
73
73
  "RBLNRobertaForMaskedLM",
74
74
  "RBLNViTForImageClassification",
75
75
  "RBLNBertForMaskedLM",
76
+ "RBLNTimeSeriesTransformerForPrediction",
76
77
  ],
77
78
  "diffusers": [
78
79
  "RBLNAutoencoderKL",
@@ -184,6 +185,7 @@ if TYPE_CHECKING:
184
185
  RBLNRobertaForSequenceClassification,
185
186
  RBLNT5EncoderModel,
186
187
  RBLNT5ForConditionalGeneration,
188
+ RBLNTimeSeriesTransformerForPrediction,
187
189
  RBLNViTForImageClassification,
188
190
  RBLNWav2Vec2ForCTC,
189
191
  RBLNWhisperForConditionalGeneration,
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.7.3.post2'
21
- __version_tuple__ = version_tuple = (0, 7, 3)
20
+ __version__ = version = '0.7.4a1'
21
+ __version_tuple__ = version_tuple = (0, 7, 4)
@@ -13,9 +13,10 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from .attn import (
16
- register_rbln_custom_add_softmax_attention,
16
+ register_rbln_custom_paged_add_softmax_attention,
17
17
  register_rbln_custom_paged_attention,
18
18
  register_rbln_custom_paged_causal_attention,
19
19
  )
20
20
  from .flash_attn import register_rbln_custom_paged_flash_attention, register_rbln_custom_paged_flash_causal_attention
21
21
  from .kv_cache_update import register_rbln_custom_cache_update
22
+ from .linear import linear
@@ -182,14 +182,14 @@ def register_rbln_custom_paged_causal_attention():
182
182
 
183
183
 
184
184
  @lru_cache
185
- def register_rbln_custom_add_softmax_attention():
185
+ def register_rbln_custom_paged_add_softmax_attention():
186
186
  torch.library.define(
187
- "rbln_custom_ops::add_softmax_attn_decode",
188
- "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d) -> Tensor",
187
+ "rbln_custom_ops::paged_add_softmax_attn_decode",
188
+ "(Tensor x, Tensor y, Tensor z, Tensor w, Tensor a, Tensor b, Tensor c, Tensor d, Tensor e, int f) -> Tensor",
189
189
  )
190
190
 
191
- @torch.library.impl("rbln_custom_ops::add_softmax_attn_decode", "cpu")
192
- def add_softmax_attn_decode_cpu(q, k, v, mask, kcache, vcache, seq, scale):
191
+ @torch.library.impl("rbln_custom_ops::paged_add_softmax_attn_decode", "cpu")
192
+ def paged_add_softmax_attn_decode_cpu(q, k, v, mask, kcache, vcache, seq, scale, block_table, block_size):
193
193
  """Defines the computation pattern for fused attention with KV cache updates.
194
194
 
195
195
  IMPORTANT: This op serves as a pattern definition for the RBLN compiler to generate
@@ -210,12 +210,14 @@ def register_rbln_custom_add_softmax_attention():
210
210
  - vcache: [batch_size, n_heads, 1, max_seq_len, head_dim] - Value cache
211
211
  - seq: [1] - Current sequence position
212
212
  - scale: [] - Attention scale factor
213
+ - block_table: [batch_size, max_seq_len // block_size] - Block indices for KV cache management
214
+ - block_size: [] - Number of tokens per block
213
215
 
214
216
  Returns:
215
217
  Tensor: attn_output: [batch=1, n_heads, 1, 1, head_dim] - Attention output
216
218
  """
217
219
  return q
218
220
 
219
- @register_fake("rbln_custom_ops::add_softmax_attn_decode")
220
- def add_softmax_attn_decode_abstract(q, k, v, m, kcache, vcache, seq, partition):
221
+ @register_fake("rbln_custom_ops::paged_add_softmax_attn_decode")
222
+ def paged_add_softmax_attn_decode_abstract(q, k, v, m, kcache, vcache, seq, partition, block_table, block_size):
221
223
  return q
@@ -0,0 +1,25 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Optional
16
+
17
+ import torch
18
+ from torch import Tensor
19
+
20
+
21
+ @torch.library.custom_op("rbln_custom_ops::linear", mutates_args=())
22
+ def linear(input: Tensor, weight: Tensor, bias: Optional[Tensor] = None) -> Tensor:
23
+ output_shape = list(input.shape[:-1])
24
+ output_shape += [weight.shape[0]]
25
+ return torch.empty(size=output_shape, dtype=input.dtype, device=input.device, requires_grad=input.requires_grad)
@@ -52,6 +52,7 @@ _import_structure = {
52
52
  "RBLNPhiForCausalLM",
53
53
  "RBLNT5EncoderModel",
54
54
  "RBLNT5ForConditionalGeneration",
55
+ "RBLNTimeSeriesTransformerForPrediction",
55
56
  "RBLNLlavaNextForConditionalGeneration",
56
57
  "RBLNMidmLMHeadModel",
57
58
  "RBLNXLMRobertaModel",
@@ -113,6 +114,7 @@ if TYPE_CHECKING:
113
114
  RBLNQwen2ForCausalLM,
114
115
  RBLNT5EncoderModel,
115
116
  RBLNT5ForConditionalGeneration,
117
+ RBLNTimeSeriesTransformerForPrediction,
116
118
  RBLNWav2Vec2ForCTC,
117
119
  RBLNWhisperForConditionalGeneration,
118
120
  RBLNXLMRobertaModel,
@@ -50,6 +50,7 @@ _import_structure = {
50
50
  "mistral": ["RBLNMistralForCausalLM"],
51
51
  "phi": ["RBLNPhiForCausalLM"],
52
52
  "qwen2": ["RBLNQwen2ForCausalLM"],
53
+ "time_series_transformers": ["RBLNTimeSeriesTransformerForPrediction"],
53
54
  "t5": ["RBLNT5EncoderModel", "RBLNT5ForConditionalGeneration"],
54
55
  "wav2vec2": ["RBLNWav2Vec2ForCTC"],
55
56
  "whisper": ["RBLNWhisperForConditionalGeneration"],
@@ -90,6 +91,7 @@ if TYPE_CHECKING:
90
91
  from .phi import RBLNPhiForCausalLM
91
92
  from .qwen2 import RBLNQwen2ForCausalLM
92
93
  from .t5 import RBLNT5EncoderModel, RBLNT5ForConditionalGeneration
94
+ from .time_series_transformers import RBLNTimeSeriesTransformerForPrediction
93
95
  from .wav2vec2 import RBLNWav2Vec2ForCTC
94
96
  from .whisper import RBLNWhisperForConditionalGeneration
95
97
  from .xlm_roberta import RBLNXLMRobertaModel
@@ -94,12 +94,11 @@ class RBLNBartModel(RBLNModel):
94
94
  for model_input_name in rbln_model_input_names
95
95
  ]
96
96
 
97
- enc_compile_config = RBLNCompileConfig(input_info=input_info, compiled_model_name="encoder")
98
- dec_compile_config = RBLNCompileConfig(input_info=input_info, compiled_model_name="decoder")
97
+ rbln_compile_config = RBLNCompileConfig(input_info=input_info)
99
98
 
100
99
  rbln_config = RBLNConfig(
101
100
  rbln_cls=cls.__name__,
102
- compile_cfgs=[enc_compile_config, dec_compile_config],
101
+ compile_cfgs=[rbln_compile_config],
103
102
  rbln_kwargs=rbln_kwargs,
104
103
  )
105
104
 
@@ -108,6 +107,8 @@ class RBLNBartModel(RBLNModel):
108
107
 
109
108
 
110
109
  class RBLNBartForConditionalGeneration(RBLNModelForSeq2SeqLM):
110
+ support_causal_attn = True
111
+
111
112
  @classmethod
112
113
  def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: "RBLNConfig"):
113
114
  enc_max_seq_len = (
@@ -222,8 +222,6 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
222
222
 
223
223
  attention_mask = self.dec_attn_mask
224
224
 
225
- attention_mask = self.dec_attn_mask
226
-
227
225
  logits = super().forward(
228
226
  inputs,
229
227
  cache_position,
@@ -547,22 +545,27 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
547
545
 
548
546
  @QuantizationManager.with_quantization_env
549
547
  def compile_model(*args, **kwargs):
550
- wrapped_model.phase = "prefill"
551
- compiled_prefill = RBLNModel.compile(
552
- wrapped_model,
553
- prefill_compile_config,
554
- example_inputs=prefill_example_inputs,
555
- compile_context=context,
556
- )
548
+ try:
549
+ original_linear = torch.nn.functional.linear
550
+ torch.nn.functional.linear = torch.ops.rbln_custom_ops.linear
551
+ wrapped_model.phase = "prefill"
552
+ compiled_prefill = RBLNModel.compile(
553
+ wrapped_model,
554
+ prefill_compile_config,
555
+ example_inputs=prefill_example_inputs,
556
+ compile_context=context,
557
+ )
557
558
 
558
- wrapped_model.phase = "decode"
559
- compiled_decoder = RBLNModel.compile(
560
- wrapped_model,
561
- dec_compile_config,
562
- example_inputs=dec_example_inputs,
563
- compile_context=context,
564
- )
565
- return {"prefill": compiled_prefill, "decoder": compiled_decoder}
559
+ wrapped_model.phase = "decode"
560
+ compiled_decoder = RBLNModel.compile(
561
+ wrapped_model,
562
+ dec_compile_config,
563
+ example_inputs=dec_example_inputs,
564
+ compile_context=context,
565
+ )
566
+ return {"prefill": compiled_prefill, "decoder": compiled_decoder}
567
+ finally:
568
+ torch.nn.functional.linear = original_linear
566
569
 
567
570
  return compile_model(quantize_config=quantize_config)
568
571
 
@@ -38,8 +38,8 @@ class RBLNRuntimeEncoder(RBLNPytorchRuntime):
38
38
  mandatory_members = ["main_input_name"]
39
39
 
40
40
  def forward(self, *args: List[torch.Tensor], **kwargs: Dict[str, torch.Tensor]):
41
- _ = super().forward(*args, **kwargs)
42
- return BaseModelOutput(last_hidden_state=torch.tensor([1.0]))
41
+ output = super().forward(*args, **kwargs)
42
+ return BaseModelOutput(last_hidden_state=output)
43
43
 
44
44
 
45
45
  class RBLNRuntimeDecoder(RBLNPytorchRuntime):
@@ -94,7 +94,7 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
94
94
  decoder_attention_mask if self.use_attention_mask else None,
95
95
  attention_mask,
96
96
  cache_position,
97
- block_tables,
97
+ block_tables=block_tables,
98
98
  )
99
99
 
100
100
  return Seq2SeqLMOutput(logits=lm_logits)
@@ -115,6 +115,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
115
115
 
116
116
  main_input_name = "input_ids"
117
117
  auto_model_class = AutoModelForSeq2SeqLM
118
+ support_causal_attn = None
118
119
 
119
120
  def __post_init__(self, **kwargs):
120
121
  batch_size = self.rbln_config.model_cfg["batch_size"]
@@ -186,13 +187,16 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
186
187
  rbln_dec_max_seq_len = rbln_kwargs.get("dec_max_seq_len", None)
187
188
  rbln_batch_size = rbln_kwargs.get("batch_size", None)
188
189
  rbln_batch_size = 1 if rbln_batch_size is None else rbln_batch_size
189
- rbln_use_attention_mask = rbln_kwargs.get("use_attention_mask", None)
190
190
 
191
- if rbln_use_attention_mask is None:
192
- rbln_use_attention_mask = False
193
- rbln_npu = rbln_kwargs.get("npu", None) or rebel.get_npu_name()
194
- if rbln_npu == "RBLN-CA02":
195
- rbln_use_attention_mask = True
191
+ if cls.support_causal_attn:
192
+ rbln_use_attention_mask = rbln_kwargs.get("use_attention_mask", None)
193
+ if rbln_use_attention_mask is None:
194
+ rbln_use_attention_mask = False
195
+ rbln_npu = rbln_kwargs.get("npu", None) or rebel.get_npu_name()
196
+ if rbln_npu == "RBLN-CA02":
197
+ rbln_use_attention_mask = True
198
+ else:
199
+ rbln_use_attention_mask = True
196
200
 
197
201
  n_layer = getattr(model_config, "decoder_layers", None) or getattr(model_config, "num_layers")
198
202
  n_head = getattr(model_config, "decoder_attention_heads", None) or getattr(model_config, "num_heads")
@@ -265,11 +269,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
265
269
  [rbln_batch_size, 1],
266
270
  "int32",
267
271
  ),
268
- (
269
- "block_tables",
270
- [rbln_batch_size, 1],
271
- "int16",
272
- ),
272
+ ("block_tables", [rbln_batch_size, 1], "int16"),
273
273
  ]
274
274
  dec_input_info.extend(
275
275
  [
@@ -0,0 +1,210 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import inspect
16
+ from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Union
17
+
18
+ import torch
19
+ from transformers import (
20
+ AutoModelForTextEncoding,
21
+ PretrainedConfig,
22
+ T5EncoderModel,
23
+ T5ForConditionalGeneration,
24
+ )
25
+ from transformers.modeling_outputs import BaseModelOutput
26
+
27
+ from ....diffusers.modeling_diffusers import RBLNDiffusionMixin
28
+ from ....modeling import RBLNModel
29
+ from ....modeling_config import RBLNCompileConfig, RBLNConfig
30
+ from ....utils.logging import get_logger
31
+ from ....utils.runtime_utils import RBLNPytorchRuntime
32
+ from ...models.seq2seq import RBLNModelForSeq2SeqLM
33
+ from .t5_architecture import T5Wrapper
34
+
35
+
36
+ logger = get_logger()
37
+
38
+ if TYPE_CHECKING:
39
+ from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel
40
+
41
+
42
+ class RBLNRuntimeModel(RBLNPytorchRuntime):
43
+ def forward(
44
+ self,
45
+ input_ids: torch.LongTensor,
46
+ attention_mask: torch.FloatTensor,
47
+ head_mask: torch.FloatTensor,
48
+ inputs_embeds: torch.FloatTensor,
49
+ **kwargs,
50
+ ):
51
+ return super().forward(
52
+ input_ids,
53
+ attention_mask,
54
+ head_mask,
55
+ inputs_embeds,
56
+ **kwargs,
57
+ )
58
+
59
+
60
+ class T5EncoderWrapper(torch.nn.Module):
61
+ def __init__(self, model: "T5EncoderModel") -> None:
62
+ super().__init__()
63
+ self.model = model
64
+
65
+ def forward(self, *args, **kwargs):
66
+ kwargs.pop("return_dict", None)
67
+ return self.model(*args, **kwargs, return_dict=False)
68
+
69
+
70
+ class RBLNT5EncoderModel(RBLNModel):
71
+ auto_model_class = AutoModelForTextEncoding
72
+ rbln_model_input_names = ["input_ids", "attention_mask"]
73
+
74
+ def __post_init__(self, **kwargs):
75
+ self.model = RBLNRuntimeModel(runtime=self.model[0])
76
+
77
+ @classmethod
78
+ def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: "RBLNConfig"):
79
+ return T5EncoderWrapper(model)
80
+
81
+ @classmethod
82
+ def update_rbln_config_using_pipe(cls, pipe: RBLNDiffusionMixin, rbln_config: Dict[str, Any]) -> Dict[str, Any]:
83
+ batch_size = rbln_config.get("batch_size", 1)
84
+ max_sequence_length = rbln_config.get("max_sequence_length", 256)
85
+ model_input_names = ["input_ids"]
86
+
87
+ rbln_config.update(
88
+ {
89
+ "batch_size": batch_size,
90
+ "max_seq_len": max_sequence_length,
91
+ "model_input_names": model_input_names,
92
+ }
93
+ )
94
+
95
+ return rbln_config
96
+
97
+ @classmethod
98
+ def _get_rbln_config(
99
+ cls,
100
+ preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
101
+ model_config: Optional["PretrainedConfig"] = None,
102
+ rbln_kwargs: Dict[str, Any] = {},
103
+ ) -> RBLNConfig:
104
+ rbln_max_seq_len = rbln_kwargs.get("max_seq_len", None)
105
+ rbln_model_input_names = rbln_kwargs.get("model_input_names", None)
106
+ rbln_batch_size = rbln_kwargs.get("batch_size", None)
107
+
108
+ max_position_embeddings = getattr(model_config, "n_positions", None)
109
+
110
+ if rbln_max_seq_len is None:
111
+ rbln_max_seq_len = max_position_embeddings
112
+ if rbln_max_seq_len is None:
113
+ for tokenizer in preprocessors:
114
+ if hasattr(tokenizer, "model_max_length"):
115
+ rbln_max_seq_len = tokenizer.model_max_length
116
+ break
117
+ if rbln_max_seq_len is None:
118
+ raise ValueError("`rbln_max_seq_len` should be specified!")
119
+
120
+ if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
121
+ raise ValueError("`rbln_max_seq_len` should be less or equal than max_position_embeddings!")
122
+
123
+ signature_params = inspect.signature(cls.get_hf_class().forward).parameters.keys()
124
+
125
+ if rbln_model_input_names is None:
126
+ for tokenizer in preprocessors:
127
+ if hasattr(tokenizer, "model_input_names"):
128
+ rbln_model_input_names = [name for name in signature_params if name in tokenizer.model_input_names]
129
+
130
+ invalid_params = set(rbln_model_input_names) - set(signature_params)
131
+ if invalid_params:
132
+ raise ValueError(f"Invalid model input names: {invalid_params}")
133
+ break
134
+ if rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names"):
135
+ rbln_model_input_names = cls.rbln_model_input_names
136
+ elif rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names") is False:
137
+ raise ValueError(
138
+ "Specify the model input names obtained by the tokenizer via `rbln_model_input_names`, "
139
+ f"and be sure to make the order of the inputs same as T5EncoderModel forward() arguments like ({list(signature_params)})"
140
+ )
141
+ else:
142
+ invalid_params = set(rbln_model_input_names) - set(signature_params)
143
+ if invalid_params:
144
+ raise ValueError(f"Invalid model input names: {invalid_params}")
145
+ rbln_model_input_names = [name for name in signature_params if name in rbln_model_input_names]
146
+
147
+ if rbln_batch_size is None:
148
+ rbln_batch_size = 1
149
+
150
+ input_info = [
151
+ (model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
152
+ for model_input_name in rbln_model_input_names
153
+ ]
154
+
155
+ rbln_compile_config = RBLNCompileConfig(input_info=input_info)
156
+
157
+ rbln_config = RBLNConfig(
158
+ rbln_cls=cls.__name__,
159
+ compile_cfgs=[rbln_compile_config],
160
+ rbln_kwargs=rbln_kwargs,
161
+ )
162
+
163
+ rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
164
+ return rbln_config
165
+
166
+ def forward(
167
+ self,
168
+ input_ids: Optional[torch.LongTensor] = None,
169
+ attention_mask: Optional[torch.FloatTensor] = None,
170
+ head_mask: Optional[torch.FloatTensor] = None,
171
+ inputs_embeds: Optional[torch.FloatTensor] = None,
172
+ output_attentions: Optional[bool] = None,
173
+ output_hidden_states: Optional[bool] = None,
174
+ return_dict: Optional[bool] = None,
175
+ ) -> Union[Tuple[torch.FloatTensor], BaseModelOutput]:
176
+ encoder_outputs = self.model(
177
+ input_ids=input_ids,
178
+ attention_mask=attention_mask,
179
+ inputs_embeds=inputs_embeds,
180
+ head_mask=head_mask,
181
+ output_attentions=output_attentions,
182
+ output_hidden_states=output_hidden_states,
183
+ return_dict=return_dict,
184
+ )
185
+ if not return_dict:
186
+ return (encoder_outputs,)
187
+ else:
188
+ return BaseModelOutput(last_hidden_state=encoder_outputs)
189
+
190
+
191
+ class RBLNT5ForConditionalGeneration(RBLNModelForSeq2SeqLM):
192
+ support_causal_attn = False
193
+
194
+ @classmethod
195
+ def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: "RBLNConfig"):
196
+ enc_max_seq_len = rbln_config.model_cfg["enc_max_seq_len"]
197
+ dec_max_seq_len = rbln_config.model_cfg["dec_max_seq_len"]
198
+
199
+ return T5Wrapper(model, enc_max_seq_len=enc_max_seq_len, dec_max_seq_len=dec_max_seq_len)
200
+
201
+ def __getattr__(self, __name: str) -> Any:
202
+ def redirect(func):
203
+ return lambda *pargs, **kwargs: func(self, *pargs, **kwargs)
204
+
205
+ val = getattr(T5ForConditionalGeneration, __name)
206
+
207
+ if isinstance(val, Callable) and "self" in set(inspect.signature(val).parameters):
208
+ return redirect(val)
209
+
210
+ return val
@@ -18,7 +18,7 @@ import torch
18
18
  from torch import nn
19
19
  from transformers.utils import logging
20
20
 
21
- from ....ops import register_rbln_custom_add_softmax_attention
21
+ from ....ops import register_rbln_custom_paged_add_softmax_attention
22
22
  from ..seq2seq.seq2seq_architecture import (
23
23
  Seq2SeqDecoder,
24
24
  Seq2SeqDecoderLayer,
@@ -55,7 +55,7 @@ class T5EncoderWrapper(Seq2SeqEncoderWrapper):
55
55
 
56
56
  class T5DecoderWrapper(Seq2SeqDecoderWrapper):
57
57
  def __post_init__(self, model, dec_max_seq_len: int = None):
58
- register_rbln_custom_add_softmax_attention()
58
+ register_rbln_custom_paged_add_softmax_attention()
59
59
  self.num_layers = self.config.num_layers
60
60
  self.conditional_generation = self.convert_to_rbln_conditional_generation(model, dec_max_seq_len)
61
61
 
@@ -77,6 +77,7 @@ class T5DecoderWrapper(Seq2SeqDecoderWrapper):
77
77
  attention_mask,
78
78
  encoder_attention_mask,
79
79
  cache_position,
80
+ block_tables,
80
81
  cross_kv_cache,
81
82
  *self_kv_cache,
82
83
  ) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor]]:
@@ -95,6 +96,7 @@ class T5DecoderWrapper(Seq2SeqDecoderWrapper):
95
96
  self_past_key_values=self_past_key_values,
96
97
  cross_past_key_values=cross_past_key_values,
97
98
  cache_position=cache_position,
99
+ block_tables=block_tables,
98
100
  )
99
101
 
100
102
  return lm_logits
@@ -162,7 +164,7 @@ class T5LayerSelfAttention(Seq2SeqSelfAttention):
162
164
  self.out_proj = self._original_mod.o
163
165
  self.num_heads = self._original_mod.n_heads
164
166
  self.head_dim = self._original_mod.key_value_proj_dim
165
- self.attn_decode = torch.ops.rbln_custom_ops.add_softmax_attn_decode
167
+ self.attn_decode = torch.ops.rbln_custom_ops.paged_add_softmax_attn_decode
166
168
 
167
169
  def projection(self, hidden_states) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
168
170
  query_states = self.q_proj(hidden_states)
@@ -176,6 +178,7 @@ class T5LayerSelfAttention(Seq2SeqSelfAttention):
176
178
  past_key_value: Tuple[torch.Tensor],
177
179
  attention_mask: torch.Tensor,
178
180
  cache_position: torch.Tensor,
181
+ block_tables: torch.Tensor,
179
182
  **kwargs,
180
183
  ) -> Tuple[torch.Tensor, Tuple[torch.Tensor]]:
181
184
  bsz, tgt_len, _ = hidden_states.size()
@@ -185,6 +188,7 @@ class T5LayerSelfAttention(Seq2SeqSelfAttention):
185
188
  key_states = self._shape(key_states, -1, bsz)
186
189
  value_states = self._shape(value_states, -1, bsz)
187
190
 
191
+ block_size = past_key_value[0].shape[-2]
188
192
  attn_output = self.attn_decode(
189
193
  query_states,
190
194
  key_states,
@@ -196,6 +200,8 @@ class T5LayerSelfAttention(Seq2SeqSelfAttention):
196
200
  past_key_value[1].view(bsz, self.num_heads, 1, -1, self.head_dim),
197
201
  cache_position,
198
202
  torch.tensor(1.0, dtype=torch.float32), # scale
203
+ block_tables,
204
+ block_size,
199
205
  )
200
206
 
201
207
  attn_output = attn_output.view(bsz, self.num_heads, -1, self.head_dim).transpose(1, 2)