optimum-rbln 0.9.3.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of optimum-rbln might be problematic. Click here for more details.

Files changed (264) hide show
  1. optimum/rbln/__init__.py +505 -0
  2. optimum/rbln/__version__.py +34 -0
  3. optimum/rbln/cli.py +660 -0
  4. optimum/rbln/configuration_utils.py +968 -0
  5. optimum/rbln/diffusers/__init__.py +198 -0
  6. optimum/rbln/diffusers/configurations/__init__.py +37 -0
  7. optimum/rbln/diffusers/configurations/models/__init__.py +10 -0
  8. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +73 -0
  9. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +84 -0
  10. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_temporal_decoder.py +67 -0
  11. optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +64 -0
  12. optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +59 -0
  13. optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +78 -0
  14. optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +63 -0
  15. optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +81 -0
  16. optimum/rbln/diffusers/configurations/models/configuration_unet_spatio_temporal_condition.py +59 -0
  17. optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +74 -0
  18. optimum/rbln/diffusers/configurations/pipelines/__init__.py +34 -0
  19. optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +316 -0
  20. optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +117 -0
  21. optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +363 -0
  22. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +156 -0
  23. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +176 -0
  24. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +159 -0
  25. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_video_diffusion.py +114 -0
  26. optimum/rbln/diffusers/modeling_diffusers.py +451 -0
  27. optimum/rbln/diffusers/models/__init__.py +64 -0
  28. optimum/rbln/diffusers/models/autoencoders/__init__.py +18 -0
  29. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +255 -0
  30. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +245 -0
  31. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +275 -0
  32. optimum/rbln/diffusers/models/autoencoders/vae.py +178 -0
  33. optimum/rbln/diffusers/models/autoencoders/vq_model.py +211 -0
  34. optimum/rbln/diffusers/models/controlnet.py +281 -0
  35. optimum/rbln/diffusers/models/transformers/__init__.py +17 -0
  36. optimum/rbln/diffusers/models/transformers/prior_transformer.py +160 -0
  37. optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +344 -0
  38. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +191 -0
  39. optimum/rbln/diffusers/models/unets/__init__.py +16 -0
  40. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +408 -0
  41. optimum/rbln/diffusers/models/unets/unet_spatio_temporal_condition.py +201 -0
  42. optimum/rbln/diffusers/pipelines/__init__.py +113 -0
  43. optimum/rbln/diffusers/pipelines/auto_pipeline.py +307 -0
  44. optimum/rbln/diffusers/pipelines/controlnet/__init__.py +19 -0
  45. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +139 -0
  46. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +669 -0
  47. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +640 -0
  48. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +825 -0
  49. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +837 -0
  50. optimum/rbln/diffusers/pipelines/cosmos/__init__.py +17 -0
  51. optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py +113 -0
  52. optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +425 -0
  53. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +128 -0
  54. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +128 -0
  55. optimum/rbln/diffusers/pipelines/kandinsky2_2/__init__.py +23 -0
  56. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +34 -0
  57. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +207 -0
  58. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +34 -0
  59. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +34 -0
  60. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +31 -0
  61. optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +17 -0
  62. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +32 -0
  63. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +31 -0
  64. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +31 -0
  65. optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +17 -0
  66. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +31 -0
  67. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +31 -0
  68. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +31 -0
  69. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +17 -0
  70. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +31 -0
  71. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +31 -0
  72. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +31 -0
  73. optimum/rbln/diffusers/pipelines/stable_video_diffusion/__init__.py +15 -0
  74. optimum/rbln/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +46 -0
  75. optimum/rbln/modeling.py +364 -0
  76. optimum/rbln/modeling_base.py +637 -0
  77. optimum/rbln/ops/__init__.py +19 -0
  78. optimum/rbln/ops/attn.py +455 -0
  79. optimum/rbln/ops/flash_attn.py +350 -0
  80. optimum/rbln/ops/kv_cache_update.py +29 -0
  81. optimum/rbln/ops/linear.py +32 -0
  82. optimum/rbln/ops/sliding_window_attn.py +111 -0
  83. optimum/rbln/transformers/__init__.py +340 -0
  84. optimum/rbln/transformers/configuration_generic.py +120 -0
  85. optimum/rbln/transformers/modeling_attention_utils.py +385 -0
  86. optimum/rbln/transformers/modeling_generic.py +280 -0
  87. optimum/rbln/transformers/modeling_outputs.py +37 -0
  88. optimum/rbln/transformers/modeling_rope_utils.py +314 -0
  89. optimum/rbln/transformers/models/__init__.py +343 -0
  90. optimum/rbln/transformers/models/audio_spectrogram_transformer/__init__.py +17 -0
  91. optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +47 -0
  92. optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +91 -0
  93. optimum/rbln/transformers/models/auto/__init__.py +31 -0
  94. optimum/rbln/transformers/models/auto/auto_factory.py +267 -0
  95. optimum/rbln/transformers/models/auto/modeling_auto.py +162 -0
  96. optimum/rbln/transformers/models/bart/__init__.py +17 -0
  97. optimum/rbln/transformers/models/bart/bart_architecture.py +163 -0
  98. optimum/rbln/transformers/models/bart/configuration_bart.py +36 -0
  99. optimum/rbln/transformers/models/bart/modeling_bart.py +86 -0
  100. optimum/rbln/transformers/models/bert/__init__.py +16 -0
  101. optimum/rbln/transformers/models/bert/bert_architecture.py +16 -0
  102. optimum/rbln/transformers/models/bert/configuration_bert.py +46 -0
  103. optimum/rbln/transformers/models/bert/modeling_bert.py +148 -0
  104. optimum/rbln/transformers/models/blip_2/__init__.py +20 -0
  105. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +115 -0
  106. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +526 -0
  107. optimum/rbln/transformers/models/clip/__init__.py +26 -0
  108. optimum/rbln/transformers/models/clip/configuration_clip.py +103 -0
  109. optimum/rbln/transformers/models/clip/modeling_clip.py +384 -0
  110. optimum/rbln/transformers/models/colpali/__init__.py +2 -0
  111. optimum/rbln/transformers/models/colpali/colpali_architecture.py +218 -0
  112. optimum/rbln/transformers/models/colpali/configuration_colpali.py +84 -0
  113. optimum/rbln/transformers/models/colpali/modeling_colpali.py +361 -0
  114. optimum/rbln/transformers/models/colqwen2/__init__.py +2 -0
  115. optimum/rbln/transformers/models/colqwen2/colqwen2_architecture.py +233 -0
  116. optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +74 -0
  117. optimum/rbln/transformers/models/colqwen2/modeling_colqwen2.py +446 -0
  118. optimum/rbln/transformers/models/decoderonly/__init__.py +27 -0
  119. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +300 -0
  120. optimum/rbln/transformers/models/decoderonly/configuration_lora.py +411 -0
  121. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +1224 -0
  122. optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +508 -0
  123. optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +119 -0
  124. optimum/rbln/transformers/models/decoderonly/lora_architecture.py +204 -0
  125. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +823 -0
  126. optimum/rbln/transformers/models/depth_anything/__init__.py +16 -0
  127. optimum/rbln/transformers/models/depth_anything/configuration_depth_anything.py +24 -0
  128. optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py +42 -0
  129. optimum/rbln/transformers/models/distilbert/__init__.py +19 -0
  130. optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +24 -0
  131. optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +51 -0
  132. optimum/rbln/transformers/models/dpt/__init__.py +16 -0
  133. optimum/rbln/transformers/models/dpt/configuration_dpt.py +24 -0
  134. optimum/rbln/transformers/models/dpt/modeling_dpt.py +42 -0
  135. optimum/rbln/transformers/models/exaone/__init__.py +24 -0
  136. optimum/rbln/transformers/models/exaone/configuration_exaone.py +42 -0
  137. optimum/rbln/transformers/models/exaone/exaone_architecture.py +77 -0
  138. optimum/rbln/transformers/models/exaone/modeling_exaone.py +145 -0
  139. optimum/rbln/transformers/models/gemma/__init__.py +16 -0
  140. optimum/rbln/transformers/models/gemma/configuration_gemma.py +50 -0
  141. optimum/rbln/transformers/models/gemma/gemma_architecture.py +27 -0
  142. optimum/rbln/transformers/models/gemma/modeling_gemma.py +104 -0
  143. optimum/rbln/transformers/models/gemma3/__init__.py +16 -0
  144. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +109 -0
  145. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +170 -0
  146. optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py +245 -0
  147. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +611 -0
  148. optimum/rbln/transformers/models/gpt2/__init__.py +16 -0
  149. optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +50 -0
  150. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +93 -0
  151. optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +55 -0
  152. optimum/rbln/transformers/models/grounding_dino/__init__.py +10 -0
  153. optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +92 -0
  154. optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +599 -0
  155. optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py +1048 -0
  156. optimum/rbln/transformers/models/idefics3/__init__.py +16 -0
  157. optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +89 -0
  158. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +497 -0
  159. optimum/rbln/transformers/models/llama/__init__.py +16 -0
  160. optimum/rbln/transformers/models/llama/configuration_llama.py +50 -0
  161. optimum/rbln/transformers/models/llama/llama_architecture.py +19 -0
  162. optimum/rbln/transformers/models/llama/modeling_llama.py +104 -0
  163. optimum/rbln/transformers/models/llava/__init__.py +16 -0
  164. optimum/rbln/transformers/models/llava/configuration_llava.py +72 -0
  165. optimum/rbln/transformers/models/llava/modeling_llava.py +490 -0
  166. optimum/rbln/transformers/models/llava_next/__init__.py +16 -0
  167. optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +69 -0
  168. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +493 -0
  169. optimum/rbln/transformers/models/midm/__init__.py +24 -0
  170. optimum/rbln/transformers/models/midm/configuration_midm.py +42 -0
  171. optimum/rbln/transformers/models/midm/midm_architecture.py +144 -0
  172. optimum/rbln/transformers/models/midm/modeling_midm.py +144 -0
  173. optimum/rbln/transformers/models/mistral/__init__.py +16 -0
  174. optimum/rbln/transformers/models/mistral/configuration_mistral.py +50 -0
  175. optimum/rbln/transformers/models/mistral/mistral_architecture.py +19 -0
  176. optimum/rbln/transformers/models/mistral/modeling_mistral.py +115 -0
  177. optimum/rbln/transformers/models/opt/__init__.py +16 -0
  178. optimum/rbln/transformers/models/opt/configuration_opt.py +29 -0
  179. optimum/rbln/transformers/models/opt/modeling_opt.py +102 -0
  180. optimum/rbln/transformers/models/opt/opt_architecture.py +74 -0
  181. optimum/rbln/transformers/models/pegasus/__init__.py +17 -0
  182. optimum/rbln/transformers/models/pegasus/configuration_pegasus.py +38 -0
  183. optimum/rbln/transformers/models/pegasus/modeling_pegasus.py +71 -0
  184. optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +161 -0
  185. optimum/rbln/transformers/models/phi/__init__.py +16 -0
  186. optimum/rbln/transformers/models/phi/configuration_phi.py +50 -0
  187. optimum/rbln/transformers/models/phi/modeling_phi.py +92 -0
  188. optimum/rbln/transformers/models/phi/phi_architecture.py +115 -0
  189. optimum/rbln/transformers/models/pixtral/__init__.py +16 -0
  190. optimum/rbln/transformers/models/pixtral/configuration_pixtral.py +43 -0
  191. optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +322 -0
  192. optimum/rbln/transformers/models/pixtral/pixtral_architecture.py +73 -0
  193. optimum/rbln/transformers/models/qwen2/__init__.py +16 -0
  194. optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +50 -0
  195. optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +123 -0
  196. optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +19 -0
  197. optimum/rbln/transformers/models/qwen2_5_vl/__init__.py +19 -0
  198. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +111 -0
  199. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +636 -0
  200. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +220 -0
  201. optimum/rbln/transformers/models/qwen2_vl/__init__.py +19 -0
  202. optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +88 -0
  203. optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +513 -0
  204. optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +165 -0
  205. optimum/rbln/transformers/models/qwen3/__init__.py +16 -0
  206. optimum/rbln/transformers/models/qwen3/configuration_qwen3.py +71 -0
  207. optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +133 -0
  208. optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +31 -0
  209. optimum/rbln/transformers/models/resnet/__init__.py +23 -0
  210. optimum/rbln/transformers/models/resnet/configuration_resnet.py +42 -0
  211. optimum/rbln/transformers/models/resnet/modeling_resnet.py +99 -0
  212. optimum/rbln/transformers/models/roberta/__init__.py +24 -0
  213. optimum/rbln/transformers/models/roberta/configuration_roberta.py +33 -0
  214. optimum/rbln/transformers/models/roberta/modeling_roberta.py +72 -0
  215. optimum/rbln/transformers/models/seq2seq/__init__.py +16 -0
  216. optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +71 -0
  217. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +477 -0
  218. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +527 -0
  219. optimum/rbln/transformers/models/siglip/__init__.py +16 -0
  220. optimum/rbln/transformers/models/siglip/configuration_siglip.py +76 -0
  221. optimum/rbln/transformers/models/siglip/modeling_siglip.py +199 -0
  222. optimum/rbln/transformers/models/swin/__init__.py +16 -0
  223. optimum/rbln/transformers/models/swin/configuration_swin.py +42 -0
  224. optimum/rbln/transformers/models/swin/modeling_swin.py +354 -0
  225. optimum/rbln/transformers/models/t5/__init__.py +17 -0
  226. optimum/rbln/transformers/models/t5/configuration_t5.py +36 -0
  227. optimum/rbln/transformers/models/t5/modeling_t5.py +130 -0
  228. optimum/rbln/transformers/models/t5/t5_architecture.py +264 -0
  229. optimum/rbln/transformers/models/time_series_transformer/__init__.py +26 -0
  230. optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +41 -0
  231. optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +435 -0
  232. optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +337 -0
  233. optimum/rbln/transformers/models/vit/__init__.py +19 -0
  234. optimum/rbln/transformers/models/vit/configuration_vit.py +24 -0
  235. optimum/rbln/transformers/models/vit/modeling_vit.py +44 -0
  236. optimum/rbln/transformers/models/wav2vec2/__init__.py +16 -0
  237. optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +38 -0
  238. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +104 -0
  239. optimum/rbln/transformers/models/whisper/__init__.py +17 -0
  240. optimum/rbln/transformers/models/whisper/configuration_whisper.py +72 -0
  241. optimum/rbln/transformers/models/whisper/generation_whisper.py +159 -0
  242. optimum/rbln/transformers/models/whisper/modeling_whisper.py +475 -0
  243. optimum/rbln/transformers/models/whisper/whisper_architecture.py +349 -0
  244. optimum/rbln/transformers/models/xlm_roberta/__init__.py +24 -0
  245. optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +32 -0
  246. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +82 -0
  247. optimum/rbln/transformers/utils/__init__.py +0 -0
  248. optimum/rbln/transformers/utils/rbln_quantization.py +589 -0
  249. optimum/rbln/transformers/utils/rbln_runtime_wrapper.py +79 -0
  250. optimum/rbln/utils/__init__.py +16 -0
  251. optimum/rbln/utils/decorator_utils.py +86 -0
  252. optimum/rbln/utils/deprecation.py +213 -0
  253. optimum/rbln/utils/hub.py +94 -0
  254. optimum/rbln/utils/import_utils.py +170 -0
  255. optimum/rbln/utils/logging.py +110 -0
  256. optimum/rbln/utils/model_utils.py +63 -0
  257. optimum/rbln/utils/runtime_utils.py +249 -0
  258. optimum/rbln/utils/save_utils.py +102 -0
  259. optimum/rbln/utils/submodule.py +152 -0
  260. optimum_rbln-0.9.3.post1.dist-info/METADATA +124 -0
  261. optimum_rbln-0.9.3.post1.dist-info/RECORD +264 -0
  262. optimum_rbln-0.9.3.post1.dist-info/WHEEL +4 -0
  263. optimum_rbln-0.9.3.post1.dist-info/entry_points.txt +2 -0
  264. optimum_rbln-0.9.3.post1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,71 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelConfig, RBLNDecoderOnlyModelForCausalLMConfig
16
+
17
+
18
+ class RBLNQwen3ForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
19
+ """
20
+ Configuration class for RBLN Qwen3 models.
21
+
22
+ This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
23
+
24
+ Example usage:
25
+ ```python
26
+ from optimum.rbln import RBLNQwen3ForCausalLM, RBLNQwen3ForCausalLMConfig
27
+
28
+ # Create a configuration object
29
+ config = RBLNQwen3ForCausalLMConfig(
30
+ batch_size=1,
31
+ max_seq_len=40960,
32
+ tensor_parallel_size=4,
33
+ kvcache_partition_len=16384
34
+ )
35
+
36
+ # Use the configuration with from_pretrained
37
+ model = RBLNQwen3ForCausalLM.from_pretrained(
38
+ "Qwen/Qwen3-4B",
39
+ export=True,
40
+ rbln_config=config
41
+ )
42
+ ```
43
+ """
44
+
45
+
46
+ class RBLNQwen3ModelConfig(RBLNDecoderOnlyModelConfig):
47
+ """
48
+ Configuration class for RBLN Qwen3 models.
49
+
50
+ This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
51
+
52
+ Example usage:
53
+ ```python
54
+ from optimum.rbln import RBLNQwen3Model, RBLNQwen3ModelConfig
55
+
56
+ # Create a configuration object
57
+ config = RBLNQwen3ModelConfig(
58
+ batch_size=1,
59
+ max_seq_len=40960,
60
+ tensor_parallel_size=4,
61
+ kvcache_partition_len=16384
62
+ )
63
+
64
+ # Use the configuration with from_pretrained
65
+ model = RBLNQwen3Model.from_pretrained(
66
+ "Qwen/Qwen3-Embedding-4B",
67
+ export=True,
68
+ rbln_config=config
69
+ )
70
+ ```
71
+ """
@@ -0,0 +1,133 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import TYPE_CHECKING
16
+
17
+ from transformers import PretrainedConfig
18
+
19
+ from ....utils import logging
20
+ from ...models.decoderonly import (
21
+ RBLNDecoderOnlyModel,
22
+ RBLNDecoderOnlyModelForCausalLM,
23
+ RBLNDecoderOnlyModelForCausalLMConfig,
24
+ )
25
+ from .qwen3_architecture import Qwen3Wrapper
26
+
27
+
28
+ logger = logging.get_logger(__name__)
29
+
30
+ if TYPE_CHECKING:
31
+ from transformers import PretrainedConfig
32
+
33
+
34
+ class RBLNQwen3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
35
+ """
36
+ The Qwen3 Model transformer with a language modeling head (linear layer) on top.
37
+ This model inherits from [`RBLNDecoderOnlyModelForCausalLM`]. Check the superclass documentation for the generic methods the library implements for all its models.
38
+ A class to convert and run pre-trained transformers based Qwen3ForCausalLM model on RBLN devices.
39
+ It implements the methods to convert a pre-trained transformers Qwen3ForCausalLM model into a RBLN transformer model by:
40
+ - transferring the checkpoint weights of the original into an optimized RBLN graph,
41
+ - compiling the resulting graph using the RBLN compiler.
42
+ **Configuration:**
43
+ This model uses [`RBLNQwen3ForCausalLMConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
44
+ the `rbln_config` parameter should be an instance of [`RBLNQwen3ForCausalLMConfig`] or a dictionary conforming to its structure.
45
+ See the [`RBLNQwen3ForCausalLMConfig`] class for all available configuration options.
46
+ Examples:
47
+ ```python
48
+ from optimum.rbln import RBLNQwen3ForCausalLM
49
+ # Simple usage using rbln_* arguments
50
+ # `max_seq_len` is automatically inferred from the model config
51
+ model = RBLNQwen3ForCausalLM.from_pretrained(
52
+ "Qwen/Qwen3-4B",
53
+ export=True,
54
+ rbln_batch_size=1,
55
+ rbln_tensor_parallel_size=4,
56
+ )
57
+ # Using a config dictionary
58
+ rbln_config = {
59
+ "batch_size": 1,
60
+ "max_seq_len": 40_960,
61
+ "tensor_parallel_size": 4,
62
+ "kvcache_partition_len": 8192,
63
+ }
64
+ model = RBLNQwen3ForCausalLM.from_pretrained(
65
+ "Qwen/Qwen3-4B",
66
+ export=True,
67
+ rbln_config=rbln_config
68
+ )
69
+ # Using a RBLNQwen3ForCausalLMConfig instance (recommended for type checking)
70
+ from optimum.rbln import RBLNQwen3ForCausalLMConfig
71
+ config = RBLNQwen3ForCausalLMConfig(
72
+ batch_size=1,
73
+ max_seq_len=40_960,
74
+ tensor_parallel_size=4,
75
+ kvcache_partition_len=8192,
76
+ )
77
+ model = RBLNQwen3ForCausalLM.from_pretrained(
78
+ "Qwen/Qwen3-4B",
79
+ export=True,
80
+ rbln_config=config
81
+ )
82
+ ```
83
+ """
84
+
85
+ _decoder_wrapper_cls = Qwen3Wrapper
86
+
87
+ @classmethod
88
+ def _update_sliding_window_config(
89
+ cls, model_config: PretrainedConfig, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig
90
+ ):
91
+ # https://github.com/huggingface/transformers/issues/35896
92
+ # There seems to be a bug in transformers(v4.52.4). Therefore, similar to when attn_implementation is eager,
93
+ # we set all layers to use sliding window in this version. This should be updated once the bug is fixed.
94
+
95
+ rbln_config.cache_impl = "sliding_window"
96
+ rbln_config.sliding_window = model_config.sliding_window
97
+ rbln_config.sliding_window_layers = list(range(model_config.num_hidden_layers))
98
+ return rbln_config
99
+
100
+ def forward(self, *args, **kwargs):
101
+ kwargs["return_dict"] = True
102
+ return super().forward(*args, **kwargs)
103
+
104
+
105
+ class RBLNQwen3Model(RBLNDecoderOnlyModel):
106
+ """
107
+ The bare Qwen3 Model outputting raw hidden-states without any specific head on top.
108
+ This model inherits from [`RBLNDecoderOnlyModel`]. Check the superclass documentation for the generic methods the library implements for all its models.
109
+ A class to convert and run pre-trained transformers based Qwen3Model on RBLN devices.
110
+ It implements the methods to convert a pre-trained transformers Qwen3Model into a RBLN transformer model by:
111
+ - transferring the checkpoint weights of the original into an optimized RBLN graph,
112
+ - compiling the resulting graph using the RBLN compiler.
113
+ **Configuration:**
114
+ This model uses [`RBLNQwen3ModelConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
115
+ the `rbln_config` parameter should be an instance of [`RBLNQwen3ModelConfig`] or a dictionary conforming to its structure.
116
+ See the [`RBLNQwen3ModelConfig`] class for all available configuration options.
117
+ Examples:
118
+ ```python
119
+ from optimum.rbln import RBLNQwen3Model
120
+ # Simple usage using rbln_* arguments
121
+ # `max_seq_len` is automatically inferred from the model config
122
+ model = RBLNQwen3Model.from_pretrained(
123
+ "Qwen/Qwen3-Embedding-4B",
124
+ export=True,
125
+ rbln_batch_size=1,
126
+ rbln_max_seq_len=40_960,
127
+ rbln_tensor_parallel_size=4,
128
+ rbln_kvcache_partition_len=8192,
129
+ )
130
+ """
131
+
132
+ _decoder_wrapper_cls = Qwen3Wrapper
133
+ _use_rotary_emb = True
@@ -0,0 +1,31 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from ..decoderonly.decoderonly_architecture import DecoderOnlyAttention, DecoderOnlyWrapper
17
+
18
+
19
+ class Qwen3Wrapper(DecoderOnlyWrapper):
20
+ def get_rbln_attn_class(self):
21
+ return Qwen3Attention
22
+
23
+
24
+ class Qwen3Attention(DecoderOnlyAttention):
25
+ def __post_init__(self):
26
+ self.k_proj = self._original_mod.k_proj
27
+ self.v_proj = self._original_mod.v_proj
28
+ self.q_proj = self._original_mod.q_proj
29
+ self.o_proj = self._original_mod.o_proj
30
+ self.q_norm = self._original_mod.q_norm
31
+ self.k_norm = self._original_mod.k_norm
@@ -0,0 +1,23 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from .configuration_resnet import RBLNResNetForImageClassificationConfig
17
+ from .modeling_resnet import RBLNResNetForImageClassification
18
+
19
+
20
+ __all__ = [
21
+ "RBLNResNetForImageClassificationConfig",
22
+ "RBLNResNetForImageClassification",
23
+ ]
@@ -0,0 +1,42 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from typing import Optional
17
+
18
+ from ...configuration_generic import RBLNModelForImageClassificationConfig
19
+
20
+
21
+ class RBLNResNetForImageClassificationConfig(RBLNModelForImageClassificationConfig):
22
+ """
23
+ Configuration class for RBLNResNetForImageClassification.
24
+
25
+ This configuration class stores the configuration parameters specific to
26
+ RBLN-optimized ResNet models for image classification tasks.
27
+ """
28
+
29
+ def __init__(self, output_hidden_states: Optional[bool] = None, **kwargs):
30
+ """
31
+ Args:
32
+ image_size (Optional[Union[int, Tuple[int, int]]]): The size of input images.
33
+ Can be an integer for square images or a tuple (height, width).
34
+ batch_size (Optional[int]): The batch size for inference. Defaults to 1.
35
+ output_hidden_states (bool, optional) — Whether or not to return the hidden states of all layers.
36
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
37
+
38
+ Raises:
39
+ ValueError: If batch_size is not a positive integer.
40
+ """
41
+ super().__init__(**kwargs)
42
+ self.output_hidden_states = output_hidden_states
@@ -0,0 +1,99 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from typing import TYPE_CHECKING, Optional, Tuple, Union
17
+
18
+ import torch
19
+ from transformers.modeling_outputs import ImageClassifierOutputWithNoAttention
20
+
21
+ from ...modeling_generic import RBLNModelForImageClassification
22
+ from .configuration_resnet import RBLNResNetForImageClassificationConfig
23
+
24
+
25
+ if TYPE_CHECKING:
26
+ from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PretrainedConfig, PreTrainedModel
27
+
28
+
29
+ class RBLNResNetForImageClassification(RBLNModelForImageClassification):
30
+ """
31
+ RBLN optimized ResNet model for image classification tasks.
32
+
33
+ This class provides hardware-accelerated inference for ResNet models
34
+ on RBLN devices, supporting image classification with convolutional neural networks
35
+ designed for computer vision tasks.
36
+ """
37
+
38
+ @classmethod
39
+ def _update_rbln_config(
40
+ cls,
41
+ preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]] = None,
42
+ model: Optional["PreTrainedModel"] = None,
43
+ model_config: Optional["PretrainedConfig"] = None,
44
+ rbln_config: Optional["RBLNResNetForImageClassificationConfig"] = None,
45
+ ) -> "RBLNResNetForImageClassificationConfig":
46
+ if rbln_config.output_hidden_states is None:
47
+ rbln_config.output_hidden_states = getattr(model_config, "output_hidden_states", False)
48
+
49
+ rbln_config = super()._update_rbln_config(
50
+ preprocessors=preprocessors,
51
+ model=model,
52
+ model_config=model_config,
53
+ rbln_config=rbln_config,
54
+ )
55
+
56
+ return rbln_config
57
+
58
+ @classmethod
59
+ def _wrap_model_if_needed(
60
+ cls, model: torch.nn.Module, rbln_config: "RBLNResNetForImageClassificationConfig"
61
+ ) -> torch.nn.Module:
62
+ class _ResNetForImageClassification(torch.nn.Module):
63
+ def __init__(self, model: torch.nn.Module, output_hidden_states: bool):
64
+ super().__init__()
65
+ self.model = model
66
+ self.output_hidden_states = output_hidden_states
67
+
68
+ def forward(self, *args, **kwargs):
69
+ output = self.model(*args, output_hidden_states=self.output_hidden_states, **kwargs)
70
+ return output
71
+
72
+ return _ResNetForImageClassification(model, rbln_config.output_hidden_states)
73
+
74
+ def forward(
75
+ self, pixel_values: torch.Tensor, output_hidden_states: bool = None, return_dict: bool = None, **kwargs
76
+ ) -> Union[Tuple, ImageClassifierOutputWithNoAttention]:
77
+ """
78
+ Foward pass for the RBLN-optimized ResNet model for image classification.
79
+
80
+ Args:
81
+ pixel_values (torch.FloatTensor of shape (batch_size, channels, height, width)): The tensors corresponding to the input images.
82
+ output_hidden_states (bool, *optional*, defaults to False): Whether or not to return the hidden states of all layers.
83
+ See hidden_states under returned tensors for more details.
84
+ return_dict (bool, *optional*, defaults to True): Whether to return a dictionary of outputs.
85
+
86
+ Returns:
87
+ The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a ImageClassifierOutputWithNoAttention object.
88
+ """
89
+ output_hidden_states = (
90
+ output_hidden_states if output_hidden_states is not None else self.rbln_config.output_hidden_states
91
+ )
92
+
93
+ if output_hidden_states != self.rbln_config.output_hidden_states:
94
+ raise ValueError(
95
+ f"Variable output_hidden_states {output_hidden_states} is not equal to rbln_config.output_hidden_states {self.rbln_config.output_hidden_states} "
96
+ f"Please compile again with the correct argument."
97
+ )
98
+
99
+ return super().forward(pixel_values=pixel_values, return_dict=return_dict, **kwargs)
@@ -0,0 +1,24 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .configuration_roberta import RBLNRobertaForMaskedLMConfig, RBLNRobertaForSequenceClassificationConfig
16
+ from .modeling_roberta import RBLNRobertaForMaskedLM, RBLNRobertaForSequenceClassification
17
+
18
+
19
+ __all__ = [
20
+ "RBLNRobertaForMaskedLMConfig",
21
+ "RBLNRobertaForSequenceClassificationConfig",
22
+ "RBLNRobertaForMaskedLM",
23
+ "RBLNRobertaForSequenceClassification",
24
+ ]
@@ -0,0 +1,33 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ...configuration_generic import RBLNModelForMaskedLMConfig, RBLNModelForSequenceClassificationConfig
16
+
17
+
18
+ class RBLNRobertaForMaskedLMConfig(RBLNModelForMaskedLMConfig):
19
+ """
20
+ Configuration class for RBLNRobertaForMaskedLM.
21
+
22
+ This configuration class stores the configuration parameters specific to
23
+ RBLN-optimized RoBERTa models for masked language modeling tasks.
24
+ """
25
+
26
+
27
+ class RBLNRobertaForSequenceClassificationConfig(RBLNModelForSequenceClassificationConfig):
28
+ """
29
+ Configuration class for RBLNRobertaForSequenceClassification.
30
+
31
+ This configuration class stores the configuration parameters specific to
32
+ RBLN-optimized RoBERTa models for sequence classification tasks.
33
+ """
@@ -0,0 +1,72 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Tuple, Union
16
+
17
+ import torch
18
+ from transformers.modeling_outputs import MaskedLMOutput, SequenceClassifierOutput
19
+
20
+ from ...modeling_generic import RBLNModelForMaskedLM, RBLNModelForSequenceClassification
21
+
22
+
23
+ class RBLNRobertaForMaskedLM(RBLNModelForMaskedLM):
24
+ """
25
+ RBLN optimized RoBERTa model for masked language modeling tasks.
26
+
27
+ This class provides hardware-accelerated inference for RoBERTa models
28
+ on RBLN devices, supporting masked language modeling tasks such as
29
+ token prediction and text completion.
30
+ """
31
+
32
+ rbln_model_input_names = ["input_ids", "attention_mask"]
33
+
34
+ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, **kwargs) -> Union[Tuple, MaskedLMOutput]:
35
+ """
36
+ Forward pass for the RBLN-optimized RoBERTa model for masked language modeling tasks.
37
+
38
+ Args:
39
+ input_ids (torch.LongTensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
40
+ attention_mask (torch.FloatTensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
41
+
42
+ Returns:
43
+ The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a MaskedLMOutput object.
44
+ """
45
+ return super().forward(input_ids, attention_mask, **kwargs)
46
+
47
+
48
+ class RBLNRobertaForSequenceClassification(RBLNModelForSequenceClassification):
49
+ """
50
+ RBLN optimized RoBERTa model for sequence classification tasks.
51
+
52
+ This class provides hardware-accelerated inference for RoBERTa models
53
+ on RBLN devices, supporting text classification tasks such as sentiment analysis,
54
+ topic classification, and other sequence-level prediction tasks.
55
+ """
56
+
57
+ rbln_model_input_names = ["input_ids", "attention_mask"]
58
+
59
+ def forward(
60
+ self, input_ids: torch.Tensor, attention_mask: torch.Tensor, **kwargs
61
+ ) -> Union[Tuple, SequenceClassifierOutput]:
62
+ """
63
+ Forward pass for the RBLN-optimized RoBERTa model for sequence classification tasks.
64
+
65
+ Args:
66
+ input_ids (torch.LongTensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
67
+ attention_mask (torch.FloatTensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
68
+
69
+ Returns:
70
+ The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a SequenceClassifierOutput object.
71
+ """
72
+ return super().forward(input_ids, attention_mask, **kwargs)
@@ -0,0 +1,16 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .configuration_seq2seq import RBLNModelForSeq2SeqLMConfig
16
+ from .modeling_seq2seq import RBLNModelForSeq2SeqLM
@@ -0,0 +1,71 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Any, Optional
16
+
17
+ from ....configuration_utils import RBLNModelConfig
18
+ from ....utils.deprecation import deprecate_kwarg
19
+ from ....utils.logging import get_logger
20
+
21
+
22
+ logger = get_logger()
23
+
24
+
25
+ class RBLNModelForSeq2SeqLMConfig(RBLNModelConfig):
26
+ support_paged_attention = None
27
+
28
+ @deprecate_kwarg(old_name="pad_token_id", version="0.10.0")
29
+ def __init__(
30
+ self,
31
+ batch_size: Optional[int] = None,
32
+ enc_max_seq_len: Optional[int] = None,
33
+ dec_max_seq_len: Optional[int] = None,
34
+ use_attention_mask: Optional[bool] = None,
35
+ kvcache_num_blocks: Optional[int] = None,
36
+ kvcache_block_size: Optional[int] = None,
37
+ **kwargs: Any,
38
+ ):
39
+ """
40
+ Args:
41
+ batch_size (Optional[int]): The batch size for inference. Defaults to 1.
42
+ enc_max_seq_len (Optional[int]): Maximum sequence length for the encoder.
43
+ dec_max_seq_len (Optional[int]): Maximum sequence length for the decoder.
44
+ use_attention_mask (Optional[bool]): Whether to use attention masks during inference.
45
+ kvcache_num_blocks (Optional[int]): The total number of blocks to allocate for the
46
+ PagedAttention KV cache for the SelfAttention. Defaults to batch_size.
47
+ kvcache_block_size (Optional[int]): Sets the size (in number of tokens) of each block
48
+ in the PagedAttention KV cache for the SelfAttention. Defaults to dec_max_seq_len.
49
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
50
+
51
+ Raises:
52
+ ValueError: If batch_size is not a positive integer.
53
+ """
54
+ super().__init__(**kwargs)
55
+ self.batch_size = batch_size or 1
56
+ if not isinstance(self.batch_size, int) or self.batch_size < 0:
57
+ raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
58
+
59
+ self.enc_max_seq_len = enc_max_seq_len
60
+ self.dec_max_seq_len = dec_max_seq_len
61
+
62
+ self.use_attention_mask = use_attention_mask
63
+
64
+ if self.support_paged_attention:
65
+ self.kvcache_num_blocks = kvcache_num_blocks
66
+ self.kvcache_block_size = kvcache_block_size
67
+ else:
68
+ if kvcache_num_blocks is not None or kvcache_block_size is not None:
69
+ raise ValueError(
70
+ "You cannot set kvcache_num_blocks or kvcache_block_size as paged attention is not supported for the model."
71
+ )