optimum-rbln 0.7.3.post1__tar.gz → 0.7.3.post2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/PKG-INFO +1 -1
  2. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/__version__.py +1 -1
  3. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/bart/modeling_bart.py +0 -2
  4. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +13 -19
  5. optimum_rbln-0.7.3.post2/src/optimum/rbln/transformers/models/t5/modeling_t5.py +417 -0
  6. optimum_rbln-0.7.3.post1/src/optimum/rbln/transformers/models/t5/modeling_t5.py +0 -210
  7. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  8. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  9. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  10. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/ISSUE_TEMPLATE/model_request.md +0 -0
  11. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/pull_request_template.md +0 -0
  12. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/scripts/auto_code_review.py +0 -0
  13. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/scripts/validate_pr_checklist.py +0 -0
  14. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/version.yaml +0 -0
  15. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/workflows/auto_code_review.yml +0 -0
  16. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/workflows/check_code_quality.yml +0 -0
  17. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/workflows/deploy-on-tag.yaml +0 -0
  18. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/workflows/deploy.yaml +0 -0
  19. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/workflows/pr-title-check.yaml +0 -0
  20. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/workflows/pr_checklist_validator.yml +0 -0
  21. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/workflows/rbln_check_compiler.yaml +0 -0
  22. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/workflows/rbln_dispatch_pytest.yaml +0 -0
  23. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/workflows/rbln_optimum_inference_test.yaml +0 -0
  24. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/workflows/rbln_optimum_pytest.yaml +0 -0
  25. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/workflows/rbln_scheduled_test.yaml +0 -0
  26. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.github/workflows/rbln_trigger_on_pr.yaml +0 -0
  27. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/.gitignore +0 -0
  28. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/CODE_OF_CONDUCT.md +0 -0
  29. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/CONTRIBUTING.md +0 -0
  30. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/LICENSE +0 -0
  31. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/README.md +0 -0
  32. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/assets/rbln_logo.png +0 -0
  33. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/audio-classification/run_ast_audio_classification.py +0 -0
  34. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/depth-estimation/run_dpt.py +0 -0
  35. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/image-classification/run_image_classification.py +0 -0
  36. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/image-classification/run_vit_image_classification.py +0 -0
  37. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/image-to-text/run_llava_next_image_to_text.py +0 -0
  38. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/kandinsky2_2/run_kandinsky2_2.py +0 -0
  39. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/kandinsky2_2/run_kandinsky2_2_combined.py +0 -0
  40. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/kandinsky2_2/run_kandinsky2_2_img2img.py +0 -0
  41. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/kandinsky2_2/run_kandinsky2_2_img2img_combined.py +0 -0
  42. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/kandinsky2_2/run_kandinsky2_2_inpaint.py +0 -0
  43. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/kandinsky2_2/run_kandinsky2_2_inpaint_combined.py +0 -0
  44. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/kandinsky2_2/run_kandinsky2_2_prior_interpolate.py +0 -0
  45. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/question-answering/run_question_answering.py +0 -0
  46. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/speech-recognition/run_wav2vec2.py +0 -0
  47. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/speech-recognition/run_whisper.py +0 -0
  48. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/stable-diffusion/run_stable_diffusion.py +0 -0
  49. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/stable-diffusion/run_stable_diffusion_controlnet.py +0 -0
  50. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/stable-diffusion/run_stable_diffusion_img2img.py +0 -0
  51. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/stable-diffusion/run_stable_diffusion_img2img_controlnet.py +0 -0
  52. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/stable-diffusion/run_stable_diffusion_inpaint.py +0 -0
  53. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/stable-diffusion/run_stable_diffusion_lora.py +0 -0
  54. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/stable-diffusion/run_stable_diffusion_multicontrolnet.py +0 -0
  55. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/text-classification/run_bge_m3_text_classification.py +0 -0
  56. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/text-classification/run_bge_reranker_v2_m3_text_classification.py +0 -0
  57. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/text-classification/run_secureBERT.py +0 -0
  58. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/text-classification/run_t5_classification.py +0 -0
  59. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/text-classification/run_twitter_roberta_text_classification.py +0 -0
  60. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/text2text-generation/run_bart_text2text_generation.py +0 -0
  61. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/text2text-generation/run_llama_peft.py +0 -0
  62. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/examples/text2text-generation/run_llama_text2text_generation.py +0 -0
  63. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/pyproject.toml +0 -0
  64. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/scripts/uv-lock.sh +0 -0
  65. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/scripts/uv-sync.sh +0 -0
  66. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/__init__.py +0 -0
  67. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/__init__.py +0 -0
  68. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/modeling_diffusers.py +0 -0
  69. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/models/__init__.py +0 -0
  70. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/models/autoencoders/__init__.py +0 -0
  71. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +0 -0
  72. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/models/autoencoders/vae.py +0 -0
  73. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/models/autoencoders/vq_model.py +0 -0
  74. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/models/controlnet.py +0 -0
  75. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/models/transformers/__init__.py +0 -0
  76. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/models/transformers/prior_transformer.py +0 -0
  77. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py +0 -0
  78. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/models/unets/__init__.py +0 -0
  79. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py +0 -0
  80. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/__init__.py +0 -0
  81. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/controlnet/__init__.py +0 -0
  82. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +0 -0
  83. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +0 -0
  84. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +0 -0
  85. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +0 -0
  86. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +0 -0
  87. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +0 -0
  88. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +0 -0
  89. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +0 -0
  90. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +0 -0
  91. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +0 -0
  92. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +0 -0
  93. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +0 -0
  94. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +0 -0
  95. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +0 -0
  96. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +0 -0
  97. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +0 -0
  98. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +0 -0
  99. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +0 -0
  100. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +0 -0
  101. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +0 -0
  102. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +0 -0
  103. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +0 -0
  104. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +0 -0
  105. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/modeling.py +0 -0
  106. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/modeling_base.py +0 -0
  107. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/modeling_config.py +0 -0
  108. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/ops/__init__.py +0 -0
  109. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/ops/attn.py +0 -0
  110. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/ops/flash_attn.py +0 -0
  111. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/ops/kv_cache_update.py +0 -0
  112. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/__init__.py +0 -0
  113. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/modeling_alias.py +0 -0
  114. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/modeling_generic.py +0 -0
  115. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/modeling_rope_utils.py +0 -0
  116. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/__init__.py +0 -0
  117. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/auto/__init__.py +0 -0
  118. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/auto/auto_factory.py +0 -0
  119. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/auto/modeling_auto.py +0 -0
  120. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/bart/__init__.py +0 -0
  121. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/bart/bart_architecture.py +0 -0
  122. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/bert/__init__.py +0 -0
  123. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/bert/modeling_bert.py +0 -0
  124. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/clip/__init__.py +0 -0
  125. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/clip/modeling_clip.py +0 -0
  126. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/decoderonly/__init__.py +0 -0
  127. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +0 -0
  128. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +0 -0
  129. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/dpt/__init__.py +0 -0
  130. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/dpt/modeling_dpt.py +0 -0
  131. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/exaone/__init__.py +0 -0
  132. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -0
  133. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/exaone/modeling_exaone.py +0 -0
  134. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/gemma/__init__.py +0 -0
  135. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/gemma/gemma_architecture.py +0 -0
  136. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/gemma/modeling_gemma.py +0 -0
  137. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/gpt2/__init__.py +0 -0
  138. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +0 -0
  139. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +0 -0
  140. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/llama/__init__.py +0 -0
  141. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/llama/llama_architecture.py +0 -0
  142. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/llama/modeling_llama.py +0 -0
  143. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/llava_next/__init__.py +0 -0
  144. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +0 -0
  145. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/midm/__init__.py +0 -0
  146. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/midm/midm_architecture.py +0 -0
  147. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/midm/modeling_midm.py +0 -0
  148. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/mistral/__init__.py +0 -0
  149. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/mistral/mistral_architecture.py +0 -0
  150. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/mistral/modeling_mistral.py +0 -0
  151. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/phi/__init__.py +0 -0
  152. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/phi/modeling_phi.py +0 -0
  153. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/phi/phi_architecture.py +0 -0
  154. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/qwen2/__init__.py +0 -0
  155. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +0 -0
  156. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +0 -0
  157. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/seq2seq/__init__.py +0 -0
  158. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +0 -0
  159. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/t5/__init__.py +0 -0
  160. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/t5/t5_architecture.py +0 -0
  161. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/wav2vec2/__init__.py +0 -0
  162. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +0 -0
  163. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/whisper/__init__.py +0 -0
  164. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/whisper/generation_whisper.py +0 -0
  165. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py +0 -0
  166. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -0
  167. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/xlm_roberta/__init__.py +0 -0
  168. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +0 -0
  169. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/utils/__init__.py +0 -0
  170. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/transformers/utils/rbln_quantization.py +0 -0
  171. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/utils/__init__.py +0 -0
  172. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/utils/decorator_utils.py +0 -0
  173. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/utils/hub.py +0 -0
  174. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/utils/import_utils.py +0 -0
  175. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/utils/logging.py +0 -0
  176. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/utils/model_utils.py +0 -0
  177. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/utils/runtime_utils.py +0 -0
  178. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/utils/save_utils.py +0 -0
  179. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/src/optimum/rbln/utils/submodule.py +0 -0
  180. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/tests/__init__.py +0 -0
  181. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/tests/psnr.py +0 -0
  182. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/tests/requirements_sdxl.txt +0 -0
  183. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/tests/run_stable_diffusion_xl_base.py +0 -0
  184. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/tests/test_base.py +0 -0
  185. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/tests/test_diffusers.py +0 -0
  186. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/tests/test_llm.py +0 -0
  187. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/tests/test_transformers.py +0 -0
  188. {optimum_rbln-0.7.3.post1 → optimum_rbln-0.7.3.post2}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: optimum-rbln
3
- Version: 0.7.3.post1
3
+ Version: 0.7.3.post2
4
4
  Summary: Optimum RBLN is the interface between the Hugging Face Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
5
5
  Project-URL: Homepage, https://rebellions.ai
6
6
  Project-URL: Documentation, https://docs.rbln.ai
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.7.3.post1'
20
+ __version__ = version = '0.7.3.post2'
21
21
  __version_tuple__ = version_tuple = (0, 7, 3)
@@ -108,8 +108,6 @@ class RBLNBartModel(RBLNModel):
108
108
 
109
109
 
110
110
  class RBLNBartForConditionalGeneration(RBLNModelForSeq2SeqLM):
111
- support_paged_causal_attn = True
112
-
113
111
  @classmethod
114
112
  def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: "RBLNConfig"):
115
113
  enc_max_seq_len = (
@@ -50,7 +50,6 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
50
50
  runtime: rebel.Runtime,
51
51
  batch_size: int,
52
52
  dec_max_seq_len: int,
53
- support_paged_causal_attn: Optional[bool] = None,
54
53
  use_attention_mask: Optional[bool] = None,
55
54
  **kwargs: Any,
56
55
  ) -> None:
@@ -58,10 +57,7 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
58
57
  self.batch_size = batch_size
59
58
  self.dec_max_seq_len = dec_max_seq_len
60
59
  self.use_attention_mask = use_attention_mask
61
- if support_paged_causal_attn:
62
- self.default_block_tables = torch.arange(0, self.batch_size, dtype=torch.int16).view(self.batch_size, 1)
63
- else:
64
- self.default_block_tables = None
60
+ self.default_block_tables = torch.arange(0, self.batch_size, dtype=torch.int16).view(self.batch_size, 1)
65
61
 
66
62
  def forward(
67
63
  self,
@@ -98,7 +94,7 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
98
94
  decoder_attention_mask if self.use_attention_mask else None,
99
95
  attention_mask,
100
96
  cache_position,
101
- block_tables=block_tables,
97
+ block_tables,
102
98
  )
103
99
 
104
100
  return Seq2SeqLMOutput(logits=lm_logits)
@@ -119,7 +115,6 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
119
115
 
120
116
  main_input_name = "input_ids"
121
117
  auto_model_class = AutoModelForSeq2SeqLM
122
- support_paged_causal_attn = None
123
118
 
124
119
  def __post_init__(self, **kwargs):
125
120
  batch_size = self.rbln_config.model_cfg["batch_size"]
@@ -135,7 +130,6 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
135
130
  main_input_name="input_ids",
136
131
  batch_size=batch_size,
137
132
  dec_max_seq_len=dec_max_seq_len,
138
- support_paged_causal_attn=self.support_paged_causal_attn,
139
133
  use_attention_mask=self.use_attention_mask,
140
134
  )
141
135
 
@@ -192,16 +186,13 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
192
186
  rbln_dec_max_seq_len = rbln_kwargs.get("dec_max_seq_len", None)
193
187
  rbln_batch_size = rbln_kwargs.get("batch_size", None)
194
188
  rbln_batch_size = 1 if rbln_batch_size is None else rbln_batch_size
189
+ rbln_use_attention_mask = rbln_kwargs.get("use_attention_mask", None)
195
190
 
196
- if cls.support_paged_causal_attn:
197
- rbln_use_attention_mask = rbln_kwargs.get("use_attention_mask", None)
198
- if rbln_use_attention_mask is None:
199
- rbln_use_attention_mask = False
200
- rbln_npu = rbln_kwargs.get("npu", None) or rebel.get_npu_name()
201
- if rbln_npu == "RBLN-CA02":
202
- rbln_use_attention_mask = True
203
- else:
204
- rbln_use_attention_mask = True
191
+ if rbln_use_attention_mask is None:
192
+ rbln_use_attention_mask = False
193
+ rbln_npu = rbln_kwargs.get("npu", None) or rebel.get_npu_name()
194
+ if rbln_npu == "RBLN-CA02":
195
+ rbln_use_attention_mask = True
205
196
 
206
197
  n_layer = getattr(model_config, "decoder_layers", None) or getattr(model_config, "num_layers")
207
198
  n_head = getattr(model_config, "decoder_attention_heads", None) or getattr(model_config, "num_heads")
@@ -274,6 +265,11 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
274
265
  [rbln_batch_size, 1],
275
266
  "int32",
276
267
  ),
268
+ (
269
+ "block_tables",
270
+ [rbln_batch_size, 1],
271
+ "int16",
272
+ ),
277
273
  ]
278
274
  dec_input_info.extend(
279
275
  [
@@ -306,8 +302,6 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
306
302
  ]
307
303
  )
308
304
 
309
- if cls.support_paged_causal_attn:
310
- dec_input_info.insert(3, ("block_tables", [rbln_batch_size, 1], "int16"))
311
305
  if rbln_use_attention_mask:
312
306
  dec_input_info.insert(1, ("attention_mask", [rbln_batch_size, rbln_dec_max_seq_len], "float32"))
313
307
 
@@ -0,0 +1,417 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import inspect
16
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
17
+
18
+ import rebel
19
+ import torch
20
+ from transformers import (
21
+ AutoModelForTextEncoding,
22
+ PretrainedConfig,
23
+ T5EncoderModel,
24
+ T5ForConditionalGeneration,
25
+ )
26
+ from transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput
27
+
28
+ from ....diffusers.modeling_diffusers import RBLNDiffusionMixin
29
+ from ....modeling import RBLNModel
30
+ from ....modeling_config import RBLNCompileConfig, RBLNConfig
31
+ from ....utils.logging import get_logger
32
+ from ....utils.runtime_utils import RBLNPytorchRuntime
33
+ from ...models.seq2seq import RBLNModelForSeq2SeqLM
34
+ from .t5_architecture import T5Wrapper
35
+
36
+
37
+ logger = get_logger()
38
+
39
+ if TYPE_CHECKING:
40
+ from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel
41
+
42
+
43
+ class RBLNRuntimeModel(RBLNPytorchRuntime):
44
+ def forward(
45
+ self,
46
+ input_ids: torch.LongTensor,
47
+ attention_mask: torch.FloatTensor,
48
+ head_mask: torch.FloatTensor,
49
+ inputs_embeds: torch.FloatTensor,
50
+ **kwargs,
51
+ ):
52
+ return super().forward(
53
+ input_ids,
54
+ attention_mask,
55
+ head_mask,
56
+ inputs_embeds,
57
+ **kwargs,
58
+ )
59
+
60
+
61
+ class RBLNRuntimeEncoder(RBLNPytorchRuntime):
62
+ mandatory_members = ["main_input_name"]
63
+
64
+ def forward(self, *args: List[torch.Tensor], **kwargs: Dict[str, torch.Tensor]):
65
+ _ = super().forward(*args, **kwargs)
66
+ return BaseModelOutput(last_hidden_state=torch.tensor([1.0]))
67
+
68
+
69
+ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
70
+ mandatory_members = ["main_input_name"]
71
+
72
+ def __init__(
73
+ self,
74
+ runtime: rebel.Runtime,
75
+ batch_size: int,
76
+ dec_max_seq_len: int,
77
+ **kwargs: Any,
78
+ ) -> None:
79
+ super().__init__(runtime, **kwargs)
80
+ self.batch_size = batch_size
81
+ self.dec_max_seq_len = dec_max_seq_len
82
+
83
+ def forward(
84
+ self,
85
+ decoder_input_ids: Optional[torch.LongTensor] = None,
86
+ attention_mask: Optional[torch.FloatTensor] = None,
87
+ decoder_attention_mask: Optional[torch.BoolTensor] = None,
88
+ cache_position: Optional[torch.Tensor] = None,
89
+ **kwargs,
90
+ ) -> Tuple[torch.FloatTensor]:
91
+ batch_size = decoder_input_ids.shape[0]
92
+ if batch_size != self.batch_size:
93
+ raise RuntimeError(
94
+ f"Batch size mismatch: got {batch_size}, expected {self.batch_size} (compiled batch size)."
95
+ )
96
+
97
+ if batch_size != cache_position.shape[0]:
98
+ raise RuntimeError(f"Cache position size mismatch: got {cache_position.shape[0]}, expected {batch_size}.")
99
+
100
+ for b_idx in range(self.batch_size):
101
+ decoding_step = cache_position[b_idx].item()
102
+ if not (0 <= decoding_step < self.dec_max_seq_len):
103
+ raise ValueError(
104
+ f"Decoding step {decoding_step} out of bounds for attention mask with shape {self.dec_attn_mask.shape}."
105
+ )
106
+ decoder_attention_mask[b_idx, : decoding_step + 1] = 1
107
+
108
+ lm_logits = super().forward(
109
+ decoder_input_ids,
110
+ decoder_attention_mask,
111
+ attention_mask,
112
+ cache_position,
113
+ )
114
+
115
+ return Seq2SeqLMOutput(logits=lm_logits)
116
+
117
+
118
+ class T5EncoderWrapper(torch.nn.Module):
119
+ def __init__(self, model: "T5EncoderModel") -> None:
120
+ super().__init__()
121
+ self.model = model
122
+
123
+ def forward(self, *args, **kwargs):
124
+ kwargs.pop("return_dict", None)
125
+ return self.model(*args, **kwargs, return_dict=False)
126
+
127
+
128
+ class RBLNT5EncoderModel(RBLNModel):
129
+ auto_model_class = AutoModelForTextEncoding
130
+ rbln_model_input_names = ["input_ids", "attention_mask"]
131
+
132
+ def __post_init__(self, **kwargs):
133
+ self.model = RBLNRuntimeModel(runtime=self.model[0])
134
+
135
+ @classmethod
136
+ def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: "RBLNConfig"):
137
+ return T5EncoderWrapper(model)
138
+
139
+ @classmethod
140
+ def update_rbln_config_using_pipe(cls, pipe: RBLNDiffusionMixin, rbln_config: Dict[str, Any]) -> Dict[str, Any]:
141
+ batch_size = rbln_config.get("batch_size", 1)
142
+ max_sequence_length = rbln_config.get("max_sequence_length", 256)
143
+ model_input_names = ["input_ids"]
144
+
145
+ rbln_config.update(
146
+ {
147
+ "batch_size": batch_size,
148
+ "max_seq_len": max_sequence_length,
149
+ "model_input_names": model_input_names,
150
+ }
151
+ )
152
+
153
+ return rbln_config
154
+
155
+ @classmethod
156
+ def _get_rbln_config(
157
+ cls,
158
+ preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
159
+ model_config: Optional["PretrainedConfig"] = None,
160
+ rbln_kwargs: Dict[str, Any] = {},
161
+ ) -> RBLNConfig:
162
+ rbln_max_seq_len = rbln_kwargs.get("max_seq_len", None)
163
+ rbln_model_input_names = rbln_kwargs.get("model_input_names", None)
164
+ rbln_batch_size = rbln_kwargs.get("batch_size", None)
165
+
166
+ max_position_embeddings = getattr(model_config, "n_positions", None)
167
+
168
+ if rbln_max_seq_len is None:
169
+ rbln_max_seq_len = max_position_embeddings
170
+ if rbln_max_seq_len is None:
171
+ for tokenizer in preprocessors:
172
+ if hasattr(tokenizer, "model_max_length"):
173
+ rbln_max_seq_len = tokenizer.model_max_length
174
+ break
175
+ if rbln_max_seq_len is None:
176
+ raise ValueError("`rbln_max_seq_len` should be specified!")
177
+
178
+ if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
179
+ raise ValueError("`rbln_max_seq_len` should be less or equal than max_position_embeddings!")
180
+
181
+ signature_params = inspect.signature(cls.get_hf_class().forward).parameters.keys()
182
+
183
+ if rbln_model_input_names is None:
184
+ for tokenizer in preprocessors:
185
+ if hasattr(tokenizer, "model_input_names"):
186
+ rbln_model_input_names = [name for name in signature_params if name in tokenizer.model_input_names]
187
+
188
+ invalid_params = set(rbln_model_input_names) - set(signature_params)
189
+ if invalid_params:
190
+ raise ValueError(f"Invalid model input names: {invalid_params}")
191
+ break
192
+ if rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names"):
193
+ rbln_model_input_names = cls.rbln_model_input_names
194
+ elif rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names") is False:
195
+ raise ValueError(
196
+ "Specify the model input names obtained by the tokenizer via `rbln_model_input_names`, "
197
+ f"and be sure to make the order of the inputs same as T5EncoderModel forward() arguments like ({list(signature_params)})"
198
+ )
199
+ else:
200
+ invalid_params = set(rbln_model_input_names) - set(signature_params)
201
+ if invalid_params:
202
+ raise ValueError(f"Invalid model input names: {invalid_params}")
203
+ rbln_model_input_names = [name for name in signature_params if name in rbln_model_input_names]
204
+
205
+ if rbln_batch_size is None:
206
+ rbln_batch_size = 1
207
+
208
+ input_info = [
209
+ (model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
210
+ for model_input_name in rbln_model_input_names
211
+ ]
212
+
213
+ rbln_compile_config = RBLNCompileConfig(input_info=input_info)
214
+
215
+ rbln_config = RBLNConfig(
216
+ rbln_cls=cls.__name__,
217
+ compile_cfgs=[rbln_compile_config],
218
+ rbln_kwargs=rbln_kwargs,
219
+ )
220
+
221
+ rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
222
+ return rbln_config
223
+
224
+ def forward(
225
+ self,
226
+ input_ids: Optional[torch.LongTensor] = None,
227
+ attention_mask: Optional[torch.FloatTensor] = None,
228
+ head_mask: Optional[torch.FloatTensor] = None,
229
+ inputs_embeds: Optional[torch.FloatTensor] = None,
230
+ output_attentions: Optional[bool] = None,
231
+ output_hidden_states: Optional[bool] = None,
232
+ return_dict: Optional[bool] = None,
233
+ ) -> Union[Tuple[torch.FloatTensor], BaseModelOutput]:
234
+ encoder_outputs = self.model(
235
+ input_ids=input_ids,
236
+ attention_mask=attention_mask,
237
+ inputs_embeds=inputs_embeds,
238
+ head_mask=head_mask,
239
+ output_attentions=output_attentions,
240
+ output_hidden_states=output_hidden_states,
241
+ return_dict=return_dict,
242
+ )
243
+ if not return_dict:
244
+ return (encoder_outputs,)
245
+ else:
246
+ return BaseModelOutput(last_hidden_state=encoder_outputs)
247
+
248
+
249
+ class RBLNT5ForConditionalGeneration(RBLNModelForSeq2SeqLM):
250
+ def __post_init__(self, **kwargs):
251
+ batch_size = self.rbln_config.model_cfg["batch_size"]
252
+ dec_max_seq_len = self.rbln_config.model_cfg["dec_max_seq_len"]
253
+
254
+ self.encoder = RBLNRuntimeEncoder(
255
+ runtime=self.model[0],
256
+ main_input_name="input_ids",
257
+ )
258
+ self.decoder = RBLNRuntimeDecoder(
259
+ runtime=self.model[1],
260
+ main_input_name="input_ids",
261
+ batch_size=batch_size,
262
+ dec_max_seq_len=dec_max_seq_len,
263
+ )
264
+
265
+ @classmethod
266
+ def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: "RBLNConfig"):
267
+ enc_max_seq_len = rbln_config.model_cfg["enc_max_seq_len"]
268
+ dec_max_seq_len = rbln_config.model_cfg["dec_max_seq_len"]
269
+
270
+ return T5Wrapper(model, enc_max_seq_len=enc_max_seq_len, dec_max_seq_len=dec_max_seq_len)
271
+
272
+ def __getattr__(self, __name: str) -> Any:
273
+ def redirect(func):
274
+ return lambda *pargs, **kwargs: func(self, *pargs, **kwargs)
275
+
276
+ val = getattr(T5ForConditionalGeneration, __name)
277
+
278
+ if isinstance(val, Callable) and "self" in set(inspect.signature(val).parameters):
279
+ return redirect(val)
280
+
281
+ return val
282
+
283
+ @classmethod
284
+ def _get_rbln_config(
285
+ cls,
286
+ preprocessors: Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"],
287
+ model_config: "PretrainedConfig",
288
+ rbln_kwargs: Dict[str, Any] = {},
289
+ ) -> RBLNConfig:
290
+ rbln_enc_max_seq_len = rbln_kwargs.get("enc_max_seq_len", None)
291
+ rbln_dec_max_seq_len = rbln_kwargs.get("dec_max_seq_len", None)
292
+ rbln_batch_size = rbln_kwargs.get("batch_size", None)
293
+ rbln_batch_size = 1 if rbln_batch_size is None else rbln_batch_size
294
+
295
+ n_layer = getattr(model_config, "decoder_layers", None) or getattr(model_config, "num_layers")
296
+ n_head = getattr(model_config, "decoder_attention_heads", None) or getattr(model_config, "num_heads")
297
+ d_kv = (
298
+ model_config.d_kv
299
+ if hasattr(model_config, "d_kv")
300
+ else model_config.d_model // model_config.encoder_attention_heads
301
+ )
302
+
303
+ max_position_embeddings = getattr(model_config, "n_positions", None) or getattr(
304
+ model_config, "max_position_embeddings", None
305
+ )
306
+
307
+ rbln_pad_token_id = getattr(model_config, "pad_token_id", None)
308
+ if rbln_pad_token_id is None:
309
+ rbln_pad_token_id = getattr(model_config, "bos_token_id", None)
310
+ if rbln_pad_token_id is None:
311
+ rbln_pad_token_id = getattr(model_config, "eos_token_id", None)
312
+ if rbln_pad_token_id is None:
313
+ rbln_pad_token_id = -1
314
+
315
+ if rbln_enc_max_seq_len is None:
316
+ rbln_enc_max_seq_len = max_position_embeddings
317
+ if rbln_enc_max_seq_len is None:
318
+ for tokenizer in preprocessors:
319
+ if hasattr(tokenizer, "model_max_length"):
320
+ rbln_enc_max_seq_len = tokenizer.model_max_length
321
+ break
322
+ if rbln_enc_max_seq_len is None:
323
+ raise ValueError("`rbln_enc_max_seq_len` should be specified!")
324
+ if max_position_embeddings is not None and rbln_enc_max_seq_len > max_position_embeddings:
325
+ raise ValueError("`rbln_enc_max_seq_len` should be less or equal than max_position_embeddings!")
326
+
327
+ if rbln_dec_max_seq_len is None:
328
+ rbln_dec_max_seq_len = max_position_embeddings
329
+ if rbln_dec_max_seq_len is None:
330
+ for tokenizer in preprocessors:
331
+ if hasattr(tokenizer, "model_max_length"):
332
+ rbln_dec_max_seq_len = tokenizer.model_max_length
333
+ break
334
+ if rbln_dec_max_seq_len is None:
335
+ raise ValueError("`rbln_dec_max_seq_len` should be specified!")
336
+
337
+ if max_position_embeddings is not None and rbln_dec_max_seq_len > max_position_embeddings:
338
+ raise ValueError("`rbln_dec_max_seq_len` should be less or equal than max_position_embeddings!")
339
+
340
+ # model input info
341
+ enc_input_info = [
342
+ ("input_ids", [1, rbln_enc_max_seq_len], "int64"),
343
+ ("attention_mask", [1, rbln_enc_max_seq_len], "float32"),
344
+ (
345
+ "cross_key_value_states",
346
+ [
347
+ n_layer * 2,
348
+ rbln_batch_size,
349
+ n_head,
350
+ rbln_enc_max_seq_len,
351
+ d_kv,
352
+ ],
353
+ "float32",
354
+ ),
355
+ ("block_tables", [1], "int16"),
356
+ ]
357
+
358
+ dec_input_info = [
359
+ ("input_ids", [rbln_batch_size, 1], "int64"),
360
+ ("attention_mask", [rbln_batch_size, rbln_dec_max_seq_len], "float32"),
361
+ ("encoder_attention_mask", [rbln_batch_size, rbln_enc_max_seq_len], "float32"),
362
+ (
363
+ "cache_position",
364
+ [rbln_batch_size, 1],
365
+ "int32",
366
+ ),
367
+ ]
368
+ dec_input_info.extend(
369
+ [
370
+ (
371
+ "cross_key_value_states",
372
+ [
373
+ n_layer * 2,
374
+ rbln_batch_size,
375
+ n_head,
376
+ rbln_enc_max_seq_len,
377
+ d_kv,
378
+ ],
379
+ "float32",
380
+ )
381
+ ]
382
+ )
383
+ dec_input_info.extend(
384
+ [
385
+ (
386
+ f"self_key_value_states_{i}",
387
+ [
388
+ rbln_batch_size,
389
+ n_head,
390
+ rbln_dec_max_seq_len,
391
+ d_kv,
392
+ ],
393
+ "float32",
394
+ )
395
+ for i in range(n_layer * 2)
396
+ ]
397
+ )
398
+
399
+ enc_compile_config = RBLNCompileConfig(compiled_model_name="encoder", input_info=enc_input_info)
400
+ dec_compile_config = RBLNCompileConfig(compiled_model_name="decoder", input_info=dec_input_info)
401
+
402
+ rbln_config = RBLNConfig(
403
+ rbln_cls=cls.__name__,
404
+ compile_cfgs=[enc_compile_config, dec_compile_config],
405
+ rbln_kwargs=rbln_kwargs,
406
+ )
407
+
408
+ rbln_config.model_cfg.update(
409
+ {
410
+ "enc_max_seq_len": rbln_enc_max_seq_len,
411
+ "dec_max_seq_len": rbln_dec_max_seq_len,
412
+ "batch_size": rbln_batch_size,
413
+ "pad_token_id": rbln_pad_token_id,
414
+ }
415
+ )
416
+
417
+ return rbln_config