optimum-rbln 0.8.2a4__py3-none-any.whl → 0.9.3rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. optimum/rbln/__init__.py +96 -9
  2. optimum/rbln/__version__.py +16 -3
  3. optimum/rbln/cli.py +660 -0
  4. optimum/rbln/configuration_utils.py +153 -42
  5. optimum/rbln/diffusers/__init__.py +7 -0
  6. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +3 -3
  7. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +1 -1
  8. optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +3 -3
  9. optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +4 -4
  10. optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +9 -4
  11. optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +9 -4
  12. optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +3 -3
  13. optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +3 -3
  14. optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +35 -19
  15. optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +14 -11
  16. optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +30 -20
  17. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +13 -9
  18. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +17 -13
  19. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +17 -10
  20. optimum/rbln/diffusers/modeling_diffusers.py +30 -14
  21. optimum/rbln/diffusers/models/__init__.py +3 -13
  22. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +31 -3
  23. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +28 -3
  24. optimum/rbln/diffusers/models/autoencoders/vq_model.py +31 -3
  25. optimum/rbln/diffusers/models/transformers/prior_transformer.py +1 -1
  26. optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +9 -1
  27. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +9 -1
  28. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +6 -3
  29. optimum/rbln/diffusers/pipelines/__init__.py +11 -5
  30. optimum/rbln/diffusers/pipelines/auto_pipeline.py +307 -0
  31. optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py +19 -16
  32. optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +14 -18
  33. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +31 -1
  34. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +31 -1
  35. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +1 -6
  36. optimum/rbln/modeling.py +71 -19
  37. optimum/rbln/modeling_base.py +99 -21
  38. optimum/rbln/ops/attn.py +158 -0
  39. optimum/rbln/ops/flash_attn.py +166 -0
  40. optimum/rbln/ops/kv_cache_update.py +5 -0
  41. optimum/rbln/ops/linear.py +7 -0
  42. optimum/rbln/transformers/__init__.py +92 -0
  43. optimum/rbln/transformers/configuration_generic.py +9 -7
  44. optimum/rbln/transformers/modeling_attention_utils.py +252 -0
  45. optimum/rbln/transformers/modeling_generic.py +51 -9
  46. optimum/rbln/transformers/modeling_outputs.py +37 -0
  47. optimum/rbln/transformers/models/__init__.py +91 -30
  48. optimum/rbln/transformers/models/auto/__init__.py +2 -0
  49. optimum/rbln/transformers/models/auto/auto_factory.py +92 -17
  50. optimum/rbln/transformers/models/auto/modeling_auto.py +45 -0
  51. optimum/rbln/transformers/models/bart/bart_architecture.py +1 -3
  52. optimum/rbln/transformers/models/bart/configuration_bart.py +2 -0
  53. optimum/rbln/transformers/models/bert/bert_architecture.py +16 -0
  54. optimum/rbln/transformers/models/bert/modeling_bert.py +8 -4
  55. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +42 -11
  56. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +94 -30
  57. optimum/rbln/transformers/models/clip/configuration_clip.py +10 -7
  58. optimum/rbln/transformers/models/clip/modeling_clip.py +27 -4
  59. optimum/rbln/transformers/models/colpali/colpali_architecture.py +3 -6
  60. optimum/rbln/transformers/models/colpali/configuration_colpali.py +37 -21
  61. optimum/rbln/transformers/models/colpali/modeling_colpali.py +113 -96
  62. optimum/rbln/transformers/models/colqwen2/__init__.py +2 -0
  63. optimum/rbln/transformers/models/colqwen2/colqwen2_architecture.py +233 -0
  64. optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +74 -0
  65. optimum/rbln/transformers/models/colqwen2/modeling_colqwen2.py +446 -0
  66. optimum/rbln/transformers/models/decoderonly/__init__.py +3 -2
  67. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +109 -37
  68. optimum/rbln/transformers/models/decoderonly/configuration_lora.py +411 -0
  69. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +318 -309
  70. optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +504 -0
  71. optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +111 -0
  72. optimum/rbln/transformers/models/decoderonly/lora_architecture.py +204 -0
  73. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +453 -897
  74. optimum/rbln/transformers/models/depth_anything/__init__.py +16 -0
  75. optimum/rbln/transformers/models/depth_anything/configuration_depth_anything.py +24 -0
  76. optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py +25 -0
  77. optimum/rbln/transformers/models/exaone/modeling_exaone.py +42 -4
  78. optimum/rbln/transformers/models/gemma/__init__.py +2 -2
  79. optimum/rbln/transformers/models/gemma/configuration_gemma.py +9 -1
  80. optimum/rbln/transformers/models/gemma/gemma_architecture.py +1 -4
  81. optimum/rbln/transformers/models/gemma/modeling_gemma.py +22 -1
  82. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +49 -13
  83. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +12 -2
  84. optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py +245 -0
  85. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +201 -349
  86. optimum/rbln/transformers/models/gpt2/__init__.py +2 -2
  87. optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +31 -3
  88. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +10 -8
  89. optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +18 -1
  90. optimum/rbln/transformers/models/grounding_dino/__init__.py +10 -0
  91. optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +92 -0
  92. optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +599 -0
  93. optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py +1032 -0
  94. optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +35 -7
  95. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +26 -27
  96. optimum/rbln/transformers/models/llama/__init__.py +2 -2
  97. optimum/rbln/transformers/models/llama/configuration_llama.py +9 -1
  98. optimum/rbln/transformers/models/llama/modeling_llama.py +22 -1
  99. optimum/rbln/transformers/models/llava/__init__.py +16 -0
  100. optimum/rbln/transformers/models/llava/configuration_llava.py +72 -0
  101. optimum/rbln/transformers/models/llava/modeling_llava.py +478 -0
  102. optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +15 -17
  103. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +235 -375
  104. optimum/rbln/transformers/models/midm/midm_architecture.py +4 -1
  105. optimum/rbln/transformers/models/midm/modeling_midm.py +42 -4
  106. optimum/rbln/transformers/models/mistral/__init__.py +2 -2
  107. optimum/rbln/transformers/models/mistral/configuration_mistral.py +9 -1
  108. optimum/rbln/transformers/models/mistral/mistral_architecture.py +1 -1
  109. optimum/rbln/transformers/models/mistral/modeling_mistral.py +26 -3
  110. optimum/rbln/transformers/models/opt/__init__.py +2 -2
  111. optimum/rbln/transformers/models/opt/configuration_opt.py +8 -1
  112. optimum/rbln/transformers/models/opt/modeling_opt.py +28 -16
  113. optimum/rbln/transformers/models/opt/opt_architecture.py +4 -4
  114. optimum/rbln/transformers/models/pegasus/__init__.py +17 -0
  115. optimum/rbln/transformers/models/pegasus/configuration_pegasus.py +38 -0
  116. optimum/rbln/transformers/models/pegasus/modeling_pegasus.py +71 -0
  117. optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +161 -0
  118. optimum/rbln/transformers/models/phi/__init__.py +2 -2
  119. optimum/rbln/transformers/models/phi/configuration_phi.py +9 -1
  120. optimum/rbln/transformers/models/phi/modeling_phi.py +10 -1
  121. optimum/rbln/transformers/models/phi/phi_architecture.py +11 -7
  122. optimum/rbln/transformers/models/pixtral/__init__.py +16 -0
  123. optimum/rbln/transformers/models/pixtral/configuration_pixtral.py +43 -0
  124. optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +310 -0
  125. optimum/rbln/transformers/models/pixtral/pixtral_architecture.py +73 -0
  126. optimum/rbln/transformers/models/qwen2/__init__.py +2 -2
  127. optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +9 -1
  128. optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +27 -1
  129. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +21 -6
  130. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +15 -21
  131. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +28 -7
  132. optimum/rbln/transformers/models/qwen2_vl/__init__.py +19 -0
  133. optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +88 -0
  134. optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +514 -0
  135. optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +165 -0
  136. optimum/rbln/transformers/models/qwen3/configuration_qwen3.py +2 -2
  137. optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +86 -330
  138. optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +1 -245
  139. optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +20 -13
  140. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +24 -3
  141. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +2 -2
  142. optimum/rbln/transformers/models/siglip/__init__.py +2 -6
  143. optimum/rbln/transformers/models/siglip/configuration_siglip.py +1 -1
  144. optimum/rbln/transformers/models/siglip/modeling_siglip.py +5 -16
  145. optimum/rbln/transformers/models/swin/__init__.py +16 -0
  146. optimum/rbln/transformers/models/swin/configuration_swin.py +42 -0
  147. optimum/rbln/transformers/models/swin/modeling_swin.py +341 -0
  148. optimum/rbln/transformers/models/t5/configuration_t5.py +2 -0
  149. optimum/rbln/transformers/models/t5/t5_architecture.py +8 -1
  150. optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +3 -3
  151. optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -14
  152. optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +7 -1
  153. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -0
  154. optimum/rbln/transformers/models/whisper/configuration_whisper.py +12 -13
  155. optimum/rbln/transformers/models/whisper/generation_whisper.py +28 -6
  156. optimum/rbln/transformers/models/whisper/modeling_whisper.py +28 -3
  157. optimum/rbln/transformers/models/xlm_roberta/__init__.py +2 -8
  158. optimum/rbln/transformers/utils/rbln_quantization.py +391 -75
  159. optimum/rbln/transformers/utils/rbln_runtime_wrapper.py +79 -0
  160. optimum/rbln/utils/depreacate_utils.py +16 -0
  161. optimum/rbln/utils/runtime_utils.py +28 -18
  162. optimum/rbln/utils/submodule.py +31 -9
  163. {optimum_rbln-0.8.2a4.dist-info → optimum_rbln-0.9.3rc0.dist-info}/METADATA +8 -7
  164. {optimum_rbln-0.8.2a4.dist-info → optimum_rbln-0.9.3rc0.dist-info}/RECORD +167 -125
  165. optimum_rbln-0.9.3rc0.dist-info/entry_points.txt +2 -0
  166. {optimum_rbln-0.8.2a4.dist-info → optimum_rbln-0.9.3rc0.dist-info}/WHEEL +0 -0
  167. {optimum_rbln-0.8.2a4.dist-info → optimum_rbln-0.9.3rc0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,252 @@
1
+ import math
2
+ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
3
+
4
+ from optimum.rbln.transformers.models.decoderonly.configuration_decoderonly import (
5
+ RBLNDecoderOnlyModelForCausalLMConfig,
6
+ )
7
+
8
+ from ..utils.logging import get_logger
9
+
10
+
11
+ logger = get_logger()
12
+
13
+ if TYPE_CHECKING:
14
+ from rebel import RBLNCompiledModel
15
+ from transformers import PretrainedConfig
16
+
17
+
18
+ DEFAULT_FLASH_ATTN_PARTITION_LENGTH = 16_384
19
+ DEFAULT_MAX_EAGER_ATTN_SEQUENCE_LENGTH = 32_768
20
+ MIN_FLASH_ATTN_MAX_SEQ_LEN = 8_192
21
+ MIN_FLASH_ATTN_PARTITION_LENGTH = 4_096
22
+ MAX_FLASH_ATTN_PARTITION_LENGTH = 32_768
23
+ MAX_SLIDING_WINDOW_SIZE = 32_768
24
+
25
+
26
+ def set_default_values(
27
+ attn_impl: Optional[str] = None,
28
+ kvcache_partition_len: Optional[int] = None,
29
+ kvcache_block_size: Optional[int] = None,
30
+ max_seq_len: Optional[int] = None,
31
+ ) -> Tuple[str, int, int]:
32
+ if attn_impl is None:
33
+ attn_impl = "eager"
34
+
35
+ if kvcache_partition_len is not None:
36
+ if attn_impl == "eager":
37
+ attn_impl = "flash_attn"
38
+ logger.warning(
39
+ "A non-null `kvcache_partition_len` was provided, but `attn_impl` was not explicitly set or "
40
+ "set to 'eager'. Since KV cache partitioning is only supported with flash attention, "
41
+ "`attn_impl` has been automatically switched to 'flash_attn'."
42
+ )
43
+
44
+ if kvcache_partition_len is None and attn_impl == "flash_attn":
45
+ kvcache_partition_len = DEFAULT_FLASH_ATTN_PARTITION_LENGTH
46
+
47
+ if kvcache_block_size is None:
48
+ if attn_impl == "eager":
49
+ kvcache_block_size = max_seq_len
50
+ else:
51
+ kvcache_block_size = kvcache_partition_len
52
+
53
+ return attn_impl, kvcache_partition_len, kvcache_block_size
54
+
55
+
56
+ def validate_attention_method(attn_impl: str, kvcache_partition_len: int, kvcache_block_size: int, max_seq_len: int):
57
+ if attn_impl not in ["eager", "flash_attn"]:
58
+ raise ValueError(f"Unknown `attn_impl` : {attn_impl}. (Available : 'eager', 'flash_attn`)")
59
+
60
+ ## Checking Constraints...
61
+ # Constraint of eager attention:
62
+ # - `max_seq_len` <= 32k
63
+
64
+ # Constraints of flash attention:
65
+ # 1. `max_seq_len` should be multiple of `partition_len`.
66
+ # 2. 4k <= `partition_len` <= 32k.
67
+ # 3. `max_seq_len` should be larger then 8k.
68
+ if attn_impl == "eager" and max_seq_len > DEFAULT_MAX_EAGER_ATTN_SEQUENCE_LENGTH:
69
+ raise ValueError(
70
+ f"`max_seq_len` is set to {max_seq_len}, "
71
+ f"which exceeds the limit of {DEFAULT_MAX_EAGER_ATTN_SEQUENCE_LENGTH} for 'eager' attention. "
72
+ f"Please reduce the `max_seq_len` to {DEFAULT_MAX_EAGER_ATTN_SEQUENCE_LENGTH} or lower,"
73
+ " or consider switching `attn_impl` to 'flash_attn' for larger sequence lengths."
74
+ )
75
+
76
+ if attn_impl == "flash_attn":
77
+ if max_seq_len // kvcache_partition_len < 2 or max_seq_len % kvcache_partition_len != 0:
78
+ raise ValueError(
79
+ f"`max_seq_len` ({max_seq_len}) must be a multiple of `kvcache_partition_len` ({kvcache_partition_len}) "
80
+ f"when using 'flash_attn'. Please adjust either value to meet this requirement."
81
+ )
82
+ elif not (MIN_FLASH_ATTN_PARTITION_LENGTH <= kvcache_partition_len <= MAX_FLASH_ATTN_PARTITION_LENGTH):
83
+ raise ValueError(
84
+ f"`kvcache_partition_len` ({kvcache_partition_len}) is out of the supported range for 'flash_attn' "
85
+ f"({MIN_FLASH_ATTN_PARTITION_LENGTH} <= `kvcache_partition_len` <= {MAX_FLASH_ATTN_PARTITION_LENGTH}). "
86
+ f"Please provide a valid value within this range."
87
+ )
88
+ elif max_seq_len < MIN_FLASH_ATTN_MAX_SEQ_LEN:
89
+ raise ValueError(
90
+ f"`max_seq_len` ({max_seq_len}) is too small for 'flash_attn'. The minimum "
91
+ f"supported value is {MIN_FLASH_ATTN_MAX_SEQ_LEN}. Please increase `max_seq_len` to meet "
92
+ "this requirement, or consider switching `attn_impl` to 'eager' for shorter lengths."
93
+ )
94
+
95
+ if kvcache_block_size is not None:
96
+ if attn_impl == "flash_attn" and kvcache_partition_len != kvcache_block_size:
97
+ raise ValueError(
98
+ f" When using 'flash attention', the `kvcache_block_size` ({kvcache_block_size}) "
99
+ f"must always be set equal to the `kvcache_partition_len` {kvcache_partition_len}."
100
+ )
101
+ elif attn_impl == "eager" and kvcache_block_size != max_seq_len:
102
+ raise ValueError(
103
+ f" When using 'eager attention', the `kvcache_block_size` ({kvcache_block_size}) "
104
+ f"must always be set equal to the `max_seq_len` {max_seq_len}."
105
+ )
106
+
107
+
108
+ def validate_sliding_window(rbln_config: RBLNDecoderOnlyModelForCausalLMConfig):
109
+ if rbln_config.sliding_window > MAX_SLIDING_WINDOW_SIZE - rbln_config.prefill_chunk_size:
110
+ raise ValueError(
111
+ f"Sliding window size ({rbln_config.sliding_window}) must be less than 32768 - prefill_chunk_size ({32768 - rbln_config.prefill_chunk_size})"
112
+ )
113
+
114
+ if rbln_config.cache_impl == "sliding_window" and rbln_config.use_attention_mask:
115
+ raise ValueError("`use_attention_mask` must be set to False when `cache_impl` is set to 'sliding_window'.")
116
+
117
+
118
+ class RBLNDecoderOnlyFlashAttentionMixin:
119
+ @classmethod
120
+ def get_maximum_num_blocks(
121
+ cls,
122
+ config: "PretrainedConfig",
123
+ tensor_parallel_size: int,
124
+ kvcache_block_size: int,
125
+ nbits_per_param: Optional[int] = None,
126
+ n_model_params: Optional[int] = None,
127
+ kernel_size: Optional[int] = None,
128
+ buffer: Optional[int] = None,
129
+ num_runtimes: int = 2,
130
+ ) -> int:
131
+ # We are finding max_n_blocks(x) that satisfies the following equation:
132
+
133
+ # available_dram - kernel_size - buffer
134
+ # - num_layers * 2 * tensor_parallel_size
135
+ # * align_2MB(
136
+ # x
137
+ # * block_size
138
+ # * align_64(head_dim)
139
+ # * math.ceil(num_key_value_heads / tensor_parallel_size)
140
+ # * 2
141
+ # ) > 0
142
+
143
+ # This inequality can be rewritten as follows:
144
+
145
+ # a - c * align_2MB(b * x) > 0
146
+ # where
147
+ # a = available_dram - kernel_size - buffer
148
+ # b = block_size * align_64(head_dim) * math.ceil(num_key_value_heads / tensor_parallel_size) * 2
149
+ # c = num_layers * 2 * tensor_parallel_size
150
+
151
+ # We can rewrite the inequality as follows:
152
+ # k > align_2MB(b*x)
153
+ # where
154
+ # k = a / c
155
+
156
+ # After that, we can derive the following equation:
157
+ # x = floor(2**21 / b * floor((k - 1) / 2**21))
158
+
159
+ def align(x: int, nbytes: int) -> int:
160
+ return int(math.ceil(x / nbytes) * nbytes)
161
+
162
+ def align_2MB(x: int) -> int:
163
+ return align(x, 2**21)
164
+
165
+ num_attention_heads = getattr(config, "n_head", None) or getattr(config, "num_attention_heads")
166
+ num_layers = getattr(config, "n_layer", None) or getattr(config, "num_hidden_layers")
167
+ head_dim = getattr(config, "head_dim", None) or config.hidden_size // num_attention_heads
168
+ vocab_size = config.vocab_size
169
+ hidden_size = getattr(config, "n_embd", None) or getattr(config, "hidden_size")
170
+ num_key_value_heads = getattr(config, "num_key_value_heads", None) or num_attention_heads
171
+
172
+ # TODO(jongho): Update if target npu is REBEL.
173
+ ATOM_DRAM_NBYTES = 16 * 2**30
174
+ ATOM_SYS_DRAM_NBYTES = 288 * 2**20
175
+ available_dram = tensor_parallel_size * (ATOM_DRAM_NBYTES - ATOM_SYS_DRAM_NBYTES)
176
+
177
+ if kernel_size is None:
178
+ if n_model_params is None:
179
+ raise ValueError("`n_model_params` should be specified to estimate the kernel memory.")
180
+ # Get estimated kernel size (approximated)
181
+ lm_heads_params = align(vocab_size, 64) * hidden_size
182
+ lm_heads_nbytes = (
183
+ align_2MB(lm_heads_params * nbits_per_param // 8 / tensor_parallel_size) * tensor_parallel_size
184
+ )
185
+ params = n_model_params - lm_heads_params
186
+ layer_nbytes = (
187
+ align_2MB(params * nbits_per_param // 8 / num_layers / tensor_parallel_size)
188
+ * num_layers
189
+ * tensor_parallel_size
190
+ )
191
+ kernel_size = layer_nbytes + lm_heads_nbytes
192
+ elif n_model_params is not None:
193
+ raise ValueError("Both `n_model_params` and `kernel_size` cannot be specified.")
194
+
195
+ available_dram -= kernel_size
196
+
197
+ if buffer is None:
198
+ # TODO: Accurate buffer estimation
199
+ buffer_per_runtime_per_core = 2**28 # 256MB per runtime
200
+ buffer_per_core = buffer_per_runtime_per_core * num_runtimes # 1 for prefill, 1 for decoder
201
+ buffer = buffer_per_core * tensor_parallel_size
202
+ available_dram -= buffer
203
+
204
+ b = kvcache_block_size * align(head_dim, 64) * math.ceil(num_key_value_heads / tensor_parallel_size) * 2
205
+ c = num_layers * 2 * tensor_parallel_size
206
+ k = available_dram / c
207
+ max_n_blocks = math.floor(2**21 / b * math.floor((k - 1) / 2**21))
208
+
209
+ return max_n_blocks
210
+
211
+ @classmethod
212
+ def maybe_suggest_kvcache_num_blocks(
213
+ cls,
214
+ compiled_models: Dict[str, "RBLNCompiledModel"],
215
+ model_config: "PretrainedConfig",
216
+ rbln_config: RBLNDecoderOnlyModelForCausalLMConfig,
217
+ ) -> None:
218
+ # Get the actual memory allocation of each node by key
219
+ alloc_memory_per_node_by_key: Dict[str, List[int]] = compiled_models["prefill"].get_alloc_per_node_by_key()
220
+ alloc_memory_by_key: Dict[str, int] = {
221
+ key: sum(memory_per_node) for key, memory_per_node in alloc_memory_per_node_by_key.items()
222
+ }
223
+ for batch_size in rbln_config.decoder_batch_sizes:
224
+ for key, memory_per_node in (
225
+ compiled_models[f"decoder_batch_{batch_size}"].get_alloc_per_node_by_key().items()
226
+ ):
227
+ alloc_memory_by_key[key] += sum(memory_per_node)
228
+ alloc_memory_by_key.pop("PortRecur", None) # Old compiler's kv-cache Key
229
+ alloc_memory_by_key.pop("DramTensor", None) # kv-cache
230
+ kernel_size = alloc_memory_by_key.pop("Kernel") # model weight
231
+
232
+ # Get the maximum number of blocks that can be allocated
233
+ buffer = sum(alloc_memory_by_key.values())
234
+ max_num_blocks = cls.get_maximum_num_blocks(
235
+ config=model_config,
236
+ tensor_parallel_size=rbln_config.tensor_parallel_size,
237
+ kvcache_block_size=rbln_config.kvcache_block_size,
238
+ kernel_size=kernel_size,
239
+ buffer=buffer,
240
+ )
241
+
242
+ # Since our estimation logic is not always accurate,
243
+ # users can set `kvcache_num_blocks` to `max_num_blocks`.
244
+ # If the memory is not enough, the model will fail to compile.
245
+ if rbln_config.kvcache_num_blocks < max_num_blocks:
246
+ logger.warning(
247
+ f"Current `kvcache_num_blocks` setting is {rbln_config.kvcache_num_blocks}. "
248
+ "Our analysis indicates that additional memory is available for more blocks. "
249
+ f"Consider increasing `kvcache_num_blocks` to {max_num_blocks} for potentially improved performance. "
250
+ "Please be advised that our memory estimation algorithm has limitations, "
251
+ "and increasing this value may not guarantee successful model compilation."
252
+ )
@@ -23,6 +23,7 @@ different model architectures.
23
23
  import inspect
24
24
  from typing import TYPE_CHECKING, Optional, Union
25
25
 
26
+ from torch import nn
26
27
  from transformers import (
27
28
  AutoModel,
28
29
  AutoModelForAudioClassification,
@@ -34,10 +35,7 @@ from transformers import (
34
35
  AutoModelForTextEncoding,
35
36
  PretrainedConfig,
36
37
  )
37
- from transformers.modeling_outputs import (
38
- BaseModelOutput,
39
- QuestionAnsweringModelOutput,
40
- )
38
+ from transformers.modeling_outputs import BaseModelOutput, QuestionAnsweringModelOutput
41
39
 
42
40
  from ..configuration_utils import RBLNCompileConfig
43
41
  from ..modeling import RBLNModel
@@ -60,6 +58,28 @@ class RBLNTransformerEncoder(RBLNModel):
60
58
  rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
61
59
  rbln_dtype = "int64"
62
60
 
61
+ @classmethod
62
+ def wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNTransformerEncoderConfig) -> nn.Module:
63
+ class TransformerEncoderWrapper(nn.Module):
64
+ # Parameters to disable for RBLN compilation
65
+ DISABLED_PARAMS = {"return_dict", "use_cache"}
66
+
67
+ def __init__(self, model: "PreTrainedModel", rbln_config: RBLNTransformerEncoderConfig):
68
+ super().__init__()
69
+ self.model = model
70
+ self.rbln_config = rbln_config
71
+ self._forward_signature = inspect.signature(model.forward)
72
+
73
+ def forward(self, *args, **kwargs):
74
+ # Disable parameters that are not compatible with RBLN compilation
75
+ for param_name in self.DISABLED_PARAMS:
76
+ if param_name in self._forward_signature.parameters:
77
+ kwargs[param_name] = False
78
+
79
+ return self.model(*args, **kwargs)
80
+
81
+ return TransformerEncoderWrapper(model, rbln_config).eval()
82
+
63
83
  @classmethod
64
84
  def _update_rbln_config(
65
85
  cls,
@@ -130,10 +150,18 @@ class RBLNTransformerEncoder(RBLNModel):
130
150
  "This is an internal error. Please report it to the developers."
131
151
  )
132
152
 
133
- input_info = [
134
- (model_input_name, [rbln_config.batch_size, rbln_config.max_seq_len], cls.rbln_dtype)
135
- for model_input_name in rbln_config.model_input_names
136
- ]
153
+ if rbln_config.model_input_shapes is None:
154
+ input_info = [
155
+ (model_input_name, [rbln_config.batch_size, rbln_config.max_seq_len], cls.rbln_dtype)
156
+ for model_input_name in rbln_config.model_input_names
157
+ ]
158
+ else:
159
+ input_info = [
160
+ (model_input_name, model_input_shape, cls.rbln_dtype)
161
+ for model_input_name, model_input_shape in zip(
162
+ rbln_config.model_input_names, rbln_config.model_input_shapes
163
+ )
164
+ ]
137
165
 
138
166
  rbln_config.set_compile_cfgs([RBLNCompileConfig(input_info=input_info)])
139
167
  return rbln_config
@@ -203,7 +231,6 @@ class RBLNModelForQuestionAnswering(RBLNTransformerEncoder):
203
231
 
204
232
  def _prepare_output(self, output, return_dict):
205
233
  # Prepare QuestionAnswering specific output format.
206
-
207
234
  start_logits, end_logits = output
208
235
 
209
236
  if not return_dict:
@@ -240,6 +267,20 @@ class RBLNModelForImageClassification(RBLNImageModel):
240
267
  class RBLNModelForDepthEstimation(RBLNImageModel):
241
268
  auto_model_class = AutoModelForDepthEstimation
242
269
 
270
+ @classmethod
271
+ def wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNImageModelConfig):
272
+ class ImageModelWrapper(nn.Module):
273
+ def __init__(self, model: "PreTrainedModel", rbln_config: RBLNImageModelConfig):
274
+ super().__init__()
275
+ self.model = model
276
+ self.rbln_config = rbln_config
277
+
278
+ def forward(self, *args, **kwargs):
279
+ output = self.model(*args, return_dict=True, **kwargs)
280
+ return output.predicted_depth
281
+
282
+ return ImageModelWrapper(model, rbln_config).eval()
283
+
243
284
 
244
285
  class RBLNModelForAudioClassification(RBLNModel):
245
286
  """
@@ -248,6 +289,7 @@ class RBLNModelForAudioClassification(RBLNModel):
248
289
 
249
290
  A class to convert and run pre-trained transformers based AudioClassification models on RBLN devices.
250
291
  It implements the methods to convert a pre-trained transformers AudioClassification model into a RBLN transformer model by:
292
+
251
293
  - transferring the checkpoint weights of the original into an optimized RBLN graph,
252
294
  - compiling the resulting graph using the RBLN compiler.
253
295
 
@@ -0,0 +1,37 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from dataclasses import dataclass
16
+ from typing import Optional, Tuple
17
+
18
+ import torch
19
+ from transformers.modeling_outputs import ModelOutput
20
+
21
+
22
+ @dataclass
23
+ class RBLNDecoderOnlyOutput(ModelOutput):
24
+ logits: torch.FloatTensor = None
25
+ generate_idx: torch.Tensor = None
26
+ padded_cache_lengths: int = None
27
+
28
+
29
+ @dataclass
30
+ class RBLNGemma3ForCausalLMOutput(RBLNDecoderOnlyOutput):
31
+ attention_mask: Optional[torch.Tensor] = None
32
+
33
+
34
+ @dataclass
35
+ class RBLNSeq2SeqTSDecoderOutput(ModelOutput):
36
+ last_hidden_states: torch.FloatTensor = None
37
+ params: Tuple[torch.FloatTensor] = None
@@ -36,6 +36,8 @@ _import_structure = {
36
36
  "RBLNAutoModelForSpeechSeq2Seq",
37
37
  "RBLNAutoModelForVision2Seq",
38
38
  "RBLNAutoModelForImageTextToText",
39
+ "RBLNAutoModelForTextEncoding",
40
+ "RBLNAutoModelForZeroShotObjectDetection",
39
41
  ],
40
42
  "bart": [
41
43
  "RBLNBartForConditionalGeneration",
@@ -73,6 +75,10 @@ _import_structure = {
73
75
  "RBLNColPaliForRetrieval",
74
76
  "RBLNColPaliForRetrievalConfig",
75
77
  ],
78
+ "colqwen2": [
79
+ "RBLNColQwen2ForRetrieval",
80
+ "RBLNColQwen2ForRetrievalConfig",
81
+ ],
76
82
  "distilbert": [
77
83
  "RBLNDistilBertForQuestionAnswering",
78
84
  "RBLNDistilBertForQuestionAnsweringConfig",
@@ -83,36 +89,60 @@ _import_structure = {
83
89
  "RBLNQwen2_5_VLForConditionalGeneration",
84
90
  "RBLNQwen2_5_VLForConditionalGenerationConfig",
85
91
  ],
92
+ "qwen2_vl": [
93
+ "RBLNQwen2VisionTransformerPretrainedModel",
94
+ "RBLNQwen2VisionTransformerPretrainedModelConfig",
95
+ "RBLNQwen2VLForConditionalGeneration",
96
+ "RBLNQwen2VLForConditionalGenerationConfig",
97
+ ],
86
98
  "decoderonly": [
99
+ "RBLNDecoderOnlyModelConfig",
100
+ "RBLNDecoderOnlyModel",
87
101
  "RBLNDecoderOnlyModelForCausalLM",
88
102
  "RBLNDecoderOnlyModelForCausalLMConfig",
103
+ "RBLNLoRAAdapterConfig",
104
+ "RBLNLoRAConfig",
89
105
  ],
106
+ "depth_anything": ["RBLNDepthAnythingForDepthEstimationConfig", "RBLNDepthAnythingForDepthEstimation"],
90
107
  "dpt": [
91
108
  "RBLNDPTForDepthEstimation",
92
109
  "RBLNDPTForDepthEstimationConfig",
93
110
  ],
94
111
  "exaone": ["RBLNExaoneForCausalLM", "RBLNExaoneForCausalLMConfig"],
95
- "gemma": ["RBLNGemmaForCausalLM", "RBLNGemmaForCausalLMConfig"],
112
+ "gemma": ["RBLNGemmaForCausalLM", "RBLNGemmaForCausalLMConfig", "RBLNGemmaModel", "RBLNGemmaModelConfig"],
96
113
  "gemma3": [
97
114
  "RBLNGemma3ForCausalLM",
98
115
  "RBLNGemma3ForCausalLMConfig",
99
116
  "RBLNGemma3ForConditionalGeneration",
100
117
  "RBLNGemma3ForConditionalGenerationConfig",
101
118
  ],
102
- "gpt2": ["RBLNGPT2LMHeadModel", "RBLNGPT2LMHeadModelConfig"],
119
+ "gpt2": ["RBLNGPT2LMHeadModel", "RBLNGPT2LMHeadModelConfig", "RBLNGPT2Model", "RBLNGPT2ModelConfig"],
103
120
  "idefics3": [
104
121
  "RBLNIdefics3VisionTransformer",
105
122
  "RBLNIdefics3ForConditionalGeneration",
106
123
  "RBLNIdefics3ForConditionalGenerationConfig",
107
124
  "RBLNIdefics3VisionTransformerConfig",
108
125
  ],
109
- "llama": ["RBLNLlamaForCausalLM", "RBLNLlamaForCausalLMConfig"],
110
- "opt": ["RBLNOPTForCausalLM", "RBLNOPTForCausalLMConfig"],
126
+ "llava": ["RBLNLlavaForConditionalGeneration", "RBLNLlavaForConditionalGenerationConfig"],
127
+ "llama": ["RBLNLlamaForCausalLM", "RBLNLlamaForCausalLMConfig", "RBLNLlamaModel", "RBLNLlamaModelConfig"],
128
+ "opt": ["RBLNOPTForCausalLM", "RBLNOPTForCausalLMConfig", "RBLNOPTModel", "RBLNOPTModelConfig"],
129
+ "pegasus": [
130
+ "RBLNPegasusForConditionalGeneration",
131
+ "RBLNPegasusModel",
132
+ "RBLNPegasusForConditionalGenerationConfig",
133
+ "RBLNPegasusModelConfig",
134
+ ],
111
135
  "llava_next": ["RBLNLlavaNextForConditionalGeneration", "RBLNLlavaNextForConditionalGenerationConfig"],
112
136
  "midm": ["RBLNMidmLMHeadModel", "RBLNMidmLMHeadModelConfig"],
113
- "mistral": ["RBLNMistralForCausalLM", "RBLNMistralForCausalLMConfig"],
114
- "phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig"],
115
- "qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig"],
137
+ "pixtral": ["RBLNPixtralVisionModel", "RBLNPixtralVisionModelConfig"],
138
+ "mistral": [
139
+ "RBLNMistralForCausalLM",
140
+ "RBLNMistralForCausalLMConfig",
141
+ "RBLNMistralModel",
142
+ "RBLNMistralModelConfig",
143
+ ],
144
+ "phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig", "RBLNPhiModel", "RBLNPhiModelConfig"],
145
+ "qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig", "RBLNQwen2Model", "RBLNQwen2ModelConfig"],
116
146
  "qwen3": ["RBLNQwen3ForCausalLM", "RBLNQwen3ForCausalLMConfig", "RBLNQwen3Model", "RBLNQwen3ModelConfig"],
117
147
  "resnet": ["RBLNResNetForImageClassification", "RBLNResNetForImageClassificationConfig"],
118
148
  "roberta": [
@@ -125,6 +155,10 @@ _import_structure = {
125
155
  "RBLNSiglipVisionModel",
126
156
  "RBLNSiglipVisionModelConfig",
127
157
  ],
158
+ "swin": [
159
+ "RBLNSwinBackbone",
160
+ "RBLNSwinBackboneConfig",
161
+ ],
128
162
  "time_series_transformer": [
129
163
  "RBLNTimeSeriesTransformerForPrediction",
130
164
  "RBLNTimeSeriesTransformerForPredictionConfig",
@@ -144,13 +178,18 @@ _import_structure = {
144
178
  "RBLNXLMRobertaForSequenceClassification",
145
179
  "RBLNXLMRobertaForSequenceClassificationConfig",
146
180
  ],
181
+ "grounding_dino": [
182
+ "RBLNGroundingDinoForObjectDetection",
183
+ "RBLNGroundingDinoForObjectDetectionConfig",
184
+ "RBLNGroundingDinoEncoder",
185
+ "RBLNGroundingDinoEncoderConfig",
186
+ "RBLNGroundingDinoDecoder",
187
+ "RBLNGroundingDinoDecoderConfig",
188
+ ],
147
189
  }
148
190
 
149
191
  if TYPE_CHECKING:
150
- from .audio_spectrogram_transformer import (
151
- RBLNASTForAudioClassification,
152
- RBLNASTForAudioClassificationConfig,
153
- )
192
+ from .audio_spectrogram_transformer import RBLNASTForAudioClassification, RBLNASTForAudioClassificationConfig
154
193
  from .auto import (
155
194
  RBLNAutoModel,
156
195
  RBLNAutoModelForAudioClassification,
@@ -164,7 +203,9 @@ if TYPE_CHECKING:
164
203
  RBLNAutoModelForSeq2SeqLM,
165
204
  RBLNAutoModelForSequenceClassification,
166
205
  RBLNAutoModelForSpeechSeq2Seq,
206
+ RBLNAutoModelForTextEncoding,
167
207
  RBLNAutoModelForVision2Seq,
208
+ RBLNAutoModelForZeroShotObjectDetection,
168
209
  )
169
210
  from .bart import (
170
211
  RBLNBartForConditionalGeneration,
@@ -198,50 +239,69 @@ if TYPE_CHECKING:
198
239
  RBLNCLIPVisionModelWithProjection,
199
240
  RBLNCLIPVisionModelWithProjectionConfig,
200
241
  )
201
- from .colpali import (
202
- RBLNColPaliForRetrieval,
203
- RBLNColPaliForRetrievalConfig,
204
- )
242
+ from .colpali import RBLNColPaliForRetrieval, RBLNColPaliForRetrievalConfig
243
+ from .colqwen2 import RBLNColQwen2ForRetrieval, RBLNColQwen2ForRetrievalConfig
205
244
  from .decoderonly import (
245
+ RBLNDecoderOnlyModel,
246
+ RBLNDecoderOnlyModelConfig,
206
247
  RBLNDecoderOnlyModelForCausalLM,
207
248
  RBLNDecoderOnlyModelForCausalLMConfig,
249
+ RBLNLoRAAdapterConfig,
250
+ RBLNLoRAConfig,
208
251
  )
209
- from .distilbert import (
210
- RBLNDistilBertForQuestionAnswering,
211
- RBLNDistilBertForQuestionAnsweringConfig,
212
- )
213
- from .dpt import (
214
- RBLNDPTForDepthEstimation,
215
- RBLNDPTForDepthEstimationConfig,
216
- )
252
+ from .depth_anything import RBLNDepthAnythingForDepthEstimation, RBLNDepthAnythingForDepthEstimationConfig
253
+ from .distilbert import RBLNDistilBertForQuestionAnswering, RBLNDistilBertForQuestionAnsweringConfig
254
+ from .dpt import RBLNDPTForDepthEstimation, RBLNDPTForDepthEstimationConfig
217
255
  from .exaone import RBLNExaoneForCausalLM, RBLNExaoneForCausalLMConfig
218
- from .gemma import RBLNGemmaForCausalLM, RBLNGemmaForCausalLMConfig
256
+ from .gemma import RBLNGemmaForCausalLM, RBLNGemmaForCausalLMConfig, RBLNGemmaModel, RBLNGemmaModelConfig
219
257
  from .gemma3 import (
220
258
  RBLNGemma3ForCausalLM,
221
259
  RBLNGemma3ForCausalLMConfig,
222
260
  RBLNGemma3ForConditionalGeneration,
223
261
  RBLNGemma3ForConditionalGenerationConfig,
224
262
  )
225
- from .gpt2 import RBLNGPT2LMHeadModel, RBLNGPT2LMHeadModelConfig
263
+ from .gpt2 import RBLNGPT2LMHeadModel, RBLNGPT2LMHeadModelConfig, RBLNGPT2Model, RBLNGPT2ModelConfig
264
+ from .grounding_dino import (
265
+ RBLNGroundingDinoDecoder,
266
+ RBLNGroundingDinoDecoderConfig,
267
+ RBLNGroundingDinoEncoder,
268
+ RBLNGroundingDinoEncoderConfig,
269
+ RBLNGroundingDinoForObjectDetection,
270
+ RBLNGroundingDinoForObjectDetectionConfig,
271
+ )
226
272
  from .idefics3 import (
227
273
  RBLNIdefics3ForConditionalGeneration,
228
274
  RBLNIdefics3ForConditionalGenerationConfig,
229
275
  RBLNIdefics3VisionTransformer,
230
276
  RBLNIdefics3VisionTransformerConfig,
231
277
  )
232
- from .llama import RBLNLlamaForCausalLM, RBLNLlamaForCausalLMConfig
278
+ from .llama import RBLNLlamaForCausalLM, RBLNLlamaForCausalLMConfig, RBLNLlamaModel, RBLNLlamaModelConfig
279
+ from .llava import RBLNLlavaForConditionalGeneration, RBLNLlavaForConditionalGenerationConfig
233
280
  from .llava_next import RBLNLlavaNextForConditionalGeneration, RBLNLlavaNextForConditionalGenerationConfig
234
281
  from .midm import RBLNMidmLMHeadModel, RBLNMidmLMHeadModelConfig
235
- from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig
236
- from .opt import RBLNOPTForCausalLM, RBLNOPTForCausalLMConfig
237
- from .phi import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig
238
- from .qwen2 import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig
282
+ from .mistral import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig, RBLNMistralModel, RBLNMistralModelConfig
283
+ from .opt import RBLNOPTForCausalLM, RBLNOPTForCausalLMConfig, RBLNOPTModel, RBLNOPTModelConfig
284
+ from .pegasus import (
285
+ RBLNPegasusForConditionalGeneration,
286
+ RBLNPegasusForConditionalGenerationConfig,
287
+ RBLNPegasusModel,
288
+ RBLNPegasusModelConfig,
289
+ )
290
+ from .phi import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig, RBLNPhiModel, RBLNPhiModelConfig
291
+ from .pixtral import RBLNPixtralVisionModel, RBLNPixtralVisionModelConfig
292
+ from .qwen2 import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig, RBLNQwen2Model, RBLNQwen2ModelConfig
239
293
  from .qwen2_5_vl import (
240
294
  RBLNQwen2_5_VisionTransformerPretrainedModel,
241
295
  RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
242
296
  RBLNQwen2_5_VLForConditionalGeneration,
243
297
  RBLNQwen2_5_VLForConditionalGenerationConfig,
244
298
  )
299
+ from .qwen2_vl import (
300
+ RBLNQwen2VisionTransformerPretrainedModel,
301
+ RBLNQwen2VisionTransformerPretrainedModelConfig,
302
+ RBLNQwen2VLForConditionalGeneration,
303
+ RBLNQwen2VLForConditionalGenerationConfig,
304
+ )
245
305
  from .qwen3 import RBLNQwen3ForCausalLM, RBLNQwen3ForCausalLMConfig, RBLNQwen3Model, RBLNQwen3ModelConfig
246
306
  from .resnet import RBLNResNetForImageClassification, RBLNResNetForImageClassificationConfig
247
307
  from .roberta import (
@@ -251,6 +311,7 @@ if TYPE_CHECKING:
251
311
  RBLNRobertaForSequenceClassificationConfig,
252
312
  )
253
313
  from .siglip import RBLNSiglipVisionModel, RBLNSiglipVisionModelConfig
314
+ from .swin import RBLNSwinBackbone, RBLNSwinBackboneConfig
254
315
  from .t5 import (
255
316
  RBLNT5EncoderModel,
256
317
  RBLNT5EncoderModelConfig,
@@ -25,5 +25,7 @@ from .modeling_auto import (
25
25
  RBLNAutoModelForSeq2SeqLM,
26
26
  RBLNAutoModelForSequenceClassification,
27
27
  RBLNAutoModelForSpeechSeq2Seq,
28
+ RBLNAutoModelForTextEncoding,
28
29
  RBLNAutoModelForVision2Seq,
30
+ RBLNAutoModelForZeroShotObjectDetection,
29
31
  )