optimum-rbln 0.8.2a1__py3-none-any.whl → 0.8.2a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of optimum-rbln might be problematic. Click here for more details.
- optimum/rbln/__init__.py +8 -0
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/configuration_utils.py +16 -1
- optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +3 -0
- optimum/rbln/diffusers/modeling_diffusers.py +1 -0
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +1 -0
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1 -0
- optimum/rbln/diffusers/models/autoencoders/vq_model.py +1 -0
- optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +1 -1
- optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py +10 -2
- optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +4 -30
- optimum/rbln/modeling.py +1 -0
- optimum/rbln/transformers/__init__.py +8 -0
- optimum/rbln/transformers/models/__init__.py +2 -0
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +7 -0
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +3 -0
- optimum/rbln/transformers/models/qwen3/__init__.py +16 -0
- optimum/rbln/transformers/models/qwen3/configuration_qwen3.py +71 -0
- optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +377 -0
- optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +275 -0
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +2 -0
- optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +2 -0
- optimum/rbln/transformers/models/whisper/modeling_whisper.py +2 -0
- optimum/rbln/utils/runtime_utils.py +28 -2
- {optimum_rbln-0.8.2a1.dist-info → optimum_rbln-0.8.2a2.dist-info}/METADATA +1 -1
- {optimum_rbln-0.8.2a1.dist-info → optimum_rbln-0.8.2a2.dist-info}/RECORD +28 -24
- {optimum_rbln-0.8.2a1.dist-info → optimum_rbln-0.8.2a2.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.8.2a1.dist-info → optimum_rbln-0.8.2a2.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/__init__.py
CHANGED
|
@@ -110,6 +110,10 @@ _import_structure = {
|
|
|
110
110
|
"RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
|
|
111
111
|
"RBLNQwen2_5_VLForConditionalGeneration",
|
|
112
112
|
"RBLNQwen2_5_VLForConditionalGenerationConfig",
|
|
113
|
+
"RBLNQwen3ForCausalLM",
|
|
114
|
+
"RBLNQwen3ForCausalLMConfig",
|
|
115
|
+
"RBLNQwen3Model",
|
|
116
|
+
"RBLNQwen3ModelConfig",
|
|
113
117
|
"RBLNResNetForImageClassification",
|
|
114
118
|
"RBLNResNetForImageClassificationConfig",
|
|
115
119
|
"RBLNRobertaForMaskedLM",
|
|
@@ -357,6 +361,10 @@ if TYPE_CHECKING:
|
|
|
357
361
|
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
|
358
362
|
RBLNQwen2ForCausalLM,
|
|
359
363
|
RBLNQwen2ForCausalLMConfig,
|
|
364
|
+
RBLNQwen3ForCausalLM,
|
|
365
|
+
RBLNQwen3ForCausalLMConfig,
|
|
366
|
+
RBLNQwen3Model,
|
|
367
|
+
RBLNQwen3ModelConfig,
|
|
360
368
|
RBLNResNetForImageClassification,
|
|
361
369
|
RBLNResNetForImageClassificationConfig,
|
|
362
370
|
RBLNRobertaForMaskedLM,
|
optimum/rbln/__version__.py
CHANGED
|
@@ -17,5 +17,5 @@ __version__: str
|
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
|
18
18
|
version_tuple: VERSION_TUPLE
|
|
19
19
|
|
|
20
|
-
__version__ = version = '0.8.
|
|
21
|
-
__version_tuple__ = version_tuple = (0, 8, 2, '
|
|
20
|
+
__version__ = version = '0.8.2a2'
|
|
21
|
+
__version_tuple__ = version_tuple = (0, 8, 2, 'a2')
|
|
@@ -147,7 +147,7 @@ class RBLNCompileConfig:
|
|
|
147
147
|
return asdict(self)
|
|
148
148
|
|
|
149
149
|
|
|
150
|
-
RUNTIME_KEYWORDS = ["create_runtimes", "optimize_host_memory", "device", "device_map", "activate_profiler"]
|
|
150
|
+
RUNTIME_KEYWORDS = ["create_runtimes", "optimize_host_memory", "device", "device_map", "activate_profiler", "timeout"]
|
|
151
151
|
CONFIG_MAPPING: Dict[str, Type["RBLNModelConfig"]] = {}
|
|
152
152
|
|
|
153
153
|
|
|
@@ -481,6 +481,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
|
|
|
481
481
|
"device",
|
|
482
482
|
"device_map",
|
|
483
483
|
"activate_profiler",
|
|
484
|
+
"timeout",
|
|
484
485
|
]
|
|
485
486
|
submodules: List[str] = []
|
|
486
487
|
subclass_non_save_attributes = []
|
|
@@ -561,6 +562,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
|
|
|
561
562
|
activate_profiler: Optional[bool] = None,
|
|
562
563
|
npu: Optional[str] = None,
|
|
563
564
|
tensor_parallel_size: Optional[int] = None,
|
|
565
|
+
timeout: Optional[int] = None,
|
|
564
566
|
optimum_rbln_version: Optional[str] = None,
|
|
565
567
|
_compile_cfgs: List[RBLNCompileConfig] = [],
|
|
566
568
|
**kwargs: Dict[str, Any],
|
|
@@ -577,6 +579,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
|
|
|
577
579
|
activate_profiler (Optional[bool]): Whether to activate the profiler for performance analysis.
|
|
578
580
|
npu (Optional[str]): The NPU device name to use for compilation.
|
|
579
581
|
tensor_parallel_size (Optional[int]): Size for tensor parallelism to distribute the model across devices.
|
|
582
|
+
timeout (Optional[int]): The timeout for the runtime in seconds. If it isn't provided, it will be set to 60 by default.
|
|
580
583
|
optimum_rbln_version (Optional[str]): The optimum-rbln version used for this configuration.
|
|
581
584
|
_compile_cfgs (List[RBLNCompileConfig]): List of compilation configurations for the model.
|
|
582
585
|
**kwargs: Additional keyword arguments.
|
|
@@ -599,6 +602,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
|
|
|
599
602
|
self._runtime_options["device"] = device
|
|
600
603
|
self._runtime_options["device_map"] = device_map
|
|
601
604
|
self._runtime_options["activate_profiler"] = activate_profiler
|
|
605
|
+
self._runtime_options["timeout"] = timeout
|
|
602
606
|
|
|
603
607
|
# Automatically pass npu, tensor_parallel_size to compile_cfgs
|
|
604
608
|
self.npu = npu
|
|
@@ -838,3 +842,14 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
|
|
|
838
842
|
@activate_profiler.setter
|
|
839
843
|
def activate_profiler(self, activate_profiler: bool):
|
|
840
844
|
self._runtime_options["activate_profiler"] = activate_profiler
|
|
845
|
+
|
|
846
|
+
@property
|
|
847
|
+
def timeout(self):
|
|
848
|
+
context = ContextRblnConfig.get_current_context()["timeout"]
|
|
849
|
+
if context is not None:
|
|
850
|
+
return context
|
|
851
|
+
return self._runtime_options["timeout"]
|
|
852
|
+
|
|
853
|
+
@timeout.setter
|
|
854
|
+
def timeout(self, timeout: int):
|
|
855
|
+
self._runtime_options["timeout"] = timeout
|
|
@@ -52,6 +52,9 @@ class RBLNCosmosTransformer3DModelConfig(RBLNModelConfig):
|
|
|
52
52
|
Raises:
|
|
53
53
|
ValueError: If batch_size is not a positive integer.
|
|
54
54
|
"""
|
|
55
|
+
if kwargs.get("timeout") is None:
|
|
56
|
+
kwargs["timeout"] = 80
|
|
57
|
+
|
|
55
58
|
super().__init__(**kwargs)
|
|
56
59
|
self.batch_size = batch_size or 1
|
|
57
60
|
self.num_frames = num_frames or 121
|
|
@@ -230,6 +230,7 @@ class RBLNDiffusionMixin:
|
|
|
230
230
|
create_runtimes=rbln_config.create_runtimes,
|
|
231
231
|
optimize_host_mem=rbln_config.optimize_host_memory,
|
|
232
232
|
activate_profiler=rbln_config.activate_profiler,
|
|
233
|
+
timeout=rbln_config.timeout,
|
|
233
234
|
):
|
|
234
235
|
model = super().from_pretrained(pretrained_model_name_or_path=model_id, **kwargs)
|
|
235
236
|
|
|
@@ -200,6 +200,7 @@ class RBLNAutoencoderKLCosmos(RBLNModel):
|
|
|
200
200
|
tensor_type="pt",
|
|
201
201
|
device=device_val,
|
|
202
202
|
activate_profiler=rbln_config.activate_profiler,
|
|
203
|
+
timeout=rbln_config.timeout,
|
|
203
204
|
)
|
|
204
205
|
for compiled_model, device_val in zip(compiled_models, device_vals)
|
|
205
206
|
]
|
|
@@ -279,7 +279,7 @@ class RBLNCosmosTransformer3DModel(RBLNModel):
|
|
|
279
279
|
tensor_type="pt",
|
|
280
280
|
device=rbln_config.device_map[DEFAULT_COMPILED_MODEL_NAME],
|
|
281
281
|
activate_profiler=rbln_config.activate_profiler,
|
|
282
|
-
timeout=
|
|
282
|
+
timeout=rbln_config.timeout,
|
|
283
283
|
)
|
|
284
284
|
for compiled_model in compiled_models
|
|
285
285
|
]
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
from typing import Any, Dict, Optional, Tuple
|
|
16
16
|
|
|
17
17
|
from ....configuration_utils import RBLNAutoConfig, RBLNModelConfig
|
|
18
|
-
from ....transformers import RBLNSiglipVisionModelConfig
|
|
18
|
+
from ....transformers import RBLNLlamaForCausalLMConfig, RBLNSiglipVisionModelConfig
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class RBLNVideoSafetyModelConfig(RBLNModelConfig):
|
|
@@ -75,7 +75,15 @@ class RBLNCosmosSafetyCheckerConfig(RBLNModelConfig):
|
|
|
75
75
|
if height is not None and width is not None:
|
|
76
76
|
image_size = (height, width)
|
|
77
77
|
|
|
78
|
-
|
|
78
|
+
tensor_parallel_size = kwargs.get("tensor_parallel_size")
|
|
79
|
+
|
|
80
|
+
self.aegis = self.init_submodule_config(
|
|
81
|
+
RBLNLlamaForCausalLMConfig,
|
|
82
|
+
aegis,
|
|
83
|
+
batch_size=batch_size,
|
|
84
|
+
tensor_parallel_size=tensor_parallel_size,
|
|
85
|
+
)
|
|
86
|
+
|
|
79
87
|
self.siglip_encoder = self.init_submodule_config(
|
|
80
88
|
RBLNSiglipVisionModelConfig,
|
|
81
89
|
siglip_encoder,
|
|
@@ -127,25 +127,13 @@ class RBLNSigLIPEncoder(SigLIPEncoder):
|
|
|
127
127
|
|
|
128
128
|
# We don't use RBLNSiglipModel, but we need to override get_image_features to return pooler_output
|
|
129
129
|
self.model = RBLNSiglipVisionModel.from_pretrained(
|
|
130
|
-
self.checkpoint_dir,
|
|
131
|
-
rbln_device=rbln_config.siglip_encoder.device,
|
|
132
|
-
rbln_create_runtimes=rbln_config.siglip_encoder.create_runtimes,
|
|
133
|
-
rbln_activate_profiler=rbln_config.siglip_encoder.activate_profiler,
|
|
134
|
-
rbln_optimize_host_memory=rbln_config.siglip_encoder.optimize_host_memory,
|
|
130
|
+
self.checkpoint_dir, rbln_config=rbln_config.siglip_encoder
|
|
135
131
|
)
|
|
136
132
|
else:
|
|
137
133
|
super().__init__(model_name, checkpoint_id)
|
|
138
134
|
model = self.model
|
|
139
135
|
del self.model
|
|
140
|
-
self.model = RBLNSiglipVisionModel.from_model(
|
|
141
|
-
model,
|
|
142
|
-
rbln_device=rbln_config.siglip_encoder.device,
|
|
143
|
-
rbln_image_size=rbln_config.siglip_encoder.image_size,
|
|
144
|
-
rbln_npu=rbln_config.siglip_encoder.npu,
|
|
145
|
-
rbln_create_runtimes=rbln_config.siglip_encoder.create_runtimes,
|
|
146
|
-
rbln_activate_profiler=rbln_config.siglip_encoder.activate_profiler,
|
|
147
|
-
rbln_optimize_host_memory=rbln_config.siglip_encoder.optimize_host_memory,
|
|
148
|
-
)
|
|
136
|
+
self.model = RBLNSiglipVisionModel.from_model(model, rbln_config=rbln_config.siglip_encoder)
|
|
149
137
|
self.rbln_config = rbln_config
|
|
150
138
|
|
|
151
139
|
# Override get_image_features to return pooler_output
|
|
@@ -336,28 +324,14 @@ class RBLNAegis(Aegis):
|
|
|
336
324
|
torch.nn.Module.__init__(self)
|
|
337
325
|
cache_dir = pathlib.Path(checkpoint_id) / "aegis"
|
|
338
326
|
self.tokenizer = AutoTokenizer.from_pretrained(cache_dir)
|
|
339
|
-
self.model = RBLNAutoModelForCausalLM.from_pretrained(
|
|
340
|
-
cache_dir,
|
|
341
|
-
rbln_device=rbln_config.aegis.device,
|
|
342
|
-
rbln_create_runtimes=rbln_config.aegis.create_runtimes,
|
|
343
|
-
rbln_activate_profiler=rbln_config.aegis.activate_profiler,
|
|
344
|
-
rbln_optimize_host_memory=rbln_config.aegis.optimize_host_memory,
|
|
345
|
-
)
|
|
327
|
+
self.model = RBLNAutoModelForCausalLM.from_pretrained(cache_dir, rbln_config=rbln_config.aegis)
|
|
346
328
|
|
|
347
329
|
else:
|
|
348
330
|
super().__init__(checkpoint_id, base_model_id, aegis_adapter)
|
|
349
331
|
model = self.model.merge_and_unload() # peft merge
|
|
350
332
|
del self.model
|
|
351
333
|
|
|
352
|
-
self.model = RBLNAutoModelForCausalLM.from_model(
|
|
353
|
-
model,
|
|
354
|
-
rbln_tensor_parallel_size=4,
|
|
355
|
-
rbln_device=rbln_config.aegis.device,
|
|
356
|
-
rbln_create_runtimes=rbln_config.aegis.create_runtimes,
|
|
357
|
-
rbln_npu=rbln_config.aegis.npu,
|
|
358
|
-
rbln_activate_profiler=rbln_config.aegis.activate_profiler,
|
|
359
|
-
rbln_optimize_host_memory=rbln_config.aegis.optimize_host_memory,
|
|
360
|
-
)
|
|
334
|
+
self.model = RBLNAutoModelForCausalLM.from_model(model, rbln_config=rbln_config.aegis)
|
|
361
335
|
|
|
362
336
|
self.rbln_config = rbln_config
|
|
363
337
|
self.dtype = torch.bfloat16
|
optimum/rbln/modeling.py
CHANGED
|
@@ -238,6 +238,7 @@ class RBLNModel(RBLNBaseModel):
|
|
|
238
238
|
tensor_type="pt",
|
|
239
239
|
device=rbln_config.device_map[DEFAULT_COMPILED_MODEL_NAME],
|
|
240
240
|
activate_profiler=rbln_config.activate_profiler,
|
|
241
|
+
timeout=rbln_config.timeout,
|
|
241
242
|
)
|
|
242
243
|
for compiled_model in compiled_models
|
|
243
244
|
]
|
|
@@ -98,6 +98,10 @@ _import_structure = {
|
|
|
98
98
|
"RBLNQwen2_5_VLForConditionalGenerationConfig",
|
|
99
99
|
"RBLNQwen2ForCausalLM",
|
|
100
100
|
"RBLNQwen2ForCausalLMConfig",
|
|
101
|
+
"RBLNQwen3ForCausalLM",
|
|
102
|
+
"RBLNQwen3ForCausalLMConfig",
|
|
103
|
+
"RBLNQwen3Model",
|
|
104
|
+
"RBLNQwen3ModelConfig",
|
|
101
105
|
"RBLNResNetForImageClassification",
|
|
102
106
|
"RBLNResNetForImageClassificationConfig",
|
|
103
107
|
"RBLNRobertaForMaskedLM",
|
|
@@ -204,6 +208,10 @@ if TYPE_CHECKING:
|
|
|
204
208
|
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
|
205
209
|
RBLNQwen2ForCausalLM,
|
|
206
210
|
RBLNQwen2ForCausalLMConfig,
|
|
211
|
+
RBLNQwen3ForCausalLM,
|
|
212
|
+
RBLNQwen3ForCausalLMConfig,
|
|
213
|
+
RBLNQwen3Model,
|
|
214
|
+
RBLNQwen3ModelConfig,
|
|
207
215
|
RBLNResNetForImageClassification,
|
|
208
216
|
RBLNResNetForImageClassificationConfig,
|
|
209
217
|
RBLNRobertaForMaskedLM,
|
|
@@ -113,6 +113,7 @@ _import_structure = {
|
|
|
113
113
|
"mistral": ["RBLNMistralForCausalLM", "RBLNMistralForCausalLMConfig"],
|
|
114
114
|
"phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig"],
|
|
115
115
|
"qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig"],
|
|
116
|
+
"qwen3": ["RBLNQwen3ForCausalLM", "RBLNQwen3ForCausalLMConfig", "RBLNQwen3Model", "RBLNQwen3ModelConfig"],
|
|
116
117
|
"resnet": ["RBLNResNetForImageClassification", "RBLNResNetForImageClassificationConfig"],
|
|
117
118
|
"roberta": [
|
|
118
119
|
"RBLNRobertaForMaskedLM",
|
|
@@ -241,6 +242,7 @@ if TYPE_CHECKING:
|
|
|
241
242
|
RBLNQwen2_5_VLForConditionalGeneration,
|
|
242
243
|
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
|
243
244
|
)
|
|
245
|
+
from .qwen3 import RBLNQwen3ForCausalLM, RBLNQwen3ForCausalLMConfig, RBLNQwen3Model, RBLNQwen3ModelConfig
|
|
244
246
|
from .resnet import RBLNResNetForImageClassification, RBLNResNetForImageClassificationConfig
|
|
245
247
|
from .roberta import (
|
|
246
248
|
RBLNRobertaForMaskedLM,
|
|
@@ -1085,6 +1085,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
|
1085
1085
|
tensor_type="pt",
|
|
1086
1086
|
device=rbln_config.device_map["prefill"],
|
|
1087
1087
|
activate_profiler=rbln_config.activate_profiler,
|
|
1088
|
+
timeout=rbln_config.timeout,
|
|
1088
1089
|
),
|
|
1089
1090
|
*[
|
|
1090
1091
|
rebel.Runtime(
|
|
@@ -1092,6 +1093,7 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
|
1092
1093
|
tensor_type="pt",
|
|
1093
1094
|
device=rbln_config.device_map[f"decoder_batch_{batch_size}"],
|
|
1094
1095
|
activate_profiler=rbln_config.activate_profiler,
|
|
1096
|
+
timeout=rbln_config.timeout,
|
|
1095
1097
|
)
|
|
1096
1098
|
for i, batch_size in enumerate(rbln_config.decoder_batch_sizes)
|
|
1097
1099
|
],
|
|
@@ -1190,6 +1192,11 @@ class RBLNDecoderOnlyModelForCausalLM(RBLNModel):
|
|
|
1190
1192
|
if cache_position is None:
|
|
1191
1193
|
logits = []
|
|
1192
1194
|
inputs = inputs_embeds if inputs_embeds is not None else input_ids
|
|
1195
|
+
# for only use forward
|
|
1196
|
+
if generate_idx is None:
|
|
1197
|
+
generate_idx = attention_mask.sum(dim=-1, keepdim=True).int()
|
|
1198
|
+
if padded_cache_lengths is None:
|
|
1199
|
+
padded_cache_lengths = torch.zeros_like(generate_idx)
|
|
1193
1200
|
batch_size = inputs.shape[0]
|
|
1194
1201
|
for b_idx in range(batch_size):
|
|
1195
1202
|
cache_position = torch.arange(0, generate_idx[b_idx].item(), dtype=torch.int32).unsqueeze(0)
|
|
@@ -884,12 +884,14 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
|
|
884
884
|
tensor_type="pt",
|
|
885
885
|
device=rbln_config.device_map["prefill"],
|
|
886
886
|
activate_profiler=rbln_config.activate_profiler,
|
|
887
|
+
timeout=rbln_config.timeout,
|
|
887
888
|
),
|
|
888
889
|
rebel.Runtime(
|
|
889
890
|
compiled_models[1],
|
|
890
891
|
tensor_type="pt",
|
|
891
892
|
device=rbln_config.device_map["image_prefill"],
|
|
892
893
|
activate_profiler=rbln_config.activate_profiler,
|
|
894
|
+
timeout=rbln_config.timeout,
|
|
893
895
|
),
|
|
894
896
|
*[
|
|
895
897
|
rebel.Runtime(
|
|
@@ -897,6 +899,7 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
|
|
897
899
|
tensor_type="pt",
|
|
898
900
|
device=rbln_config.device_map[f"decoder_batch_{batch_size}"],
|
|
899
901
|
activate_profiler=rbln_config.activate_profiler,
|
|
902
|
+
timeout=rbln_config.timeout,
|
|
900
903
|
)
|
|
901
904
|
for i, batch_size in enumerate(rbln_config.decoder_batch_sizes)
|
|
902
905
|
],
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at:
|
|
6
|
+
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .configuration_qwen3 import RBLNQwen3ForCausalLMConfig, RBLNQwen3ModelConfig
|
|
16
|
+
from .modeling_qwen3 import RBLNQwen3ForCausalLM, RBLNQwen3Model
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at:
|
|
6
|
+
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausalLMConfig
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class RBLNQwen3ForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
|
|
19
|
+
"""
|
|
20
|
+
Configuration class for RBLN Qwen3 models.
|
|
21
|
+
|
|
22
|
+
This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
|
|
23
|
+
|
|
24
|
+
Example usage:
|
|
25
|
+
```python
|
|
26
|
+
from optimum.rbln import RBLNQwen3ForCausalLM, RBLNQwen3ForCausalLMConfig
|
|
27
|
+
|
|
28
|
+
# Create a configuration object
|
|
29
|
+
config = RBLNQwen3ForCausalLMConfig(
|
|
30
|
+
batch_size=1,
|
|
31
|
+
max_seq_len=40960,
|
|
32
|
+
tensor_parallel_size=4,
|
|
33
|
+
kvcache_partition_len=16384
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Use the configuration with from_pretrained
|
|
37
|
+
model = RBLNQwen3ForCausalLM.from_pretrained(
|
|
38
|
+
"Qwen/Qwen3-4B",
|
|
39
|
+
export=True,
|
|
40
|
+
rbln_config=config
|
|
41
|
+
)
|
|
42
|
+
```
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class RBLNQwen3ModelConfig(RBLNDecoderOnlyModelForCausalLMConfig):
|
|
47
|
+
"""
|
|
48
|
+
Configuration class for RBLN Qwen3 models.
|
|
49
|
+
|
|
50
|
+
This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
|
|
51
|
+
|
|
52
|
+
Example usage:
|
|
53
|
+
```python
|
|
54
|
+
from optimum.rbln import RBLNQwen3Model, RBLNQwen3ModelConfig
|
|
55
|
+
|
|
56
|
+
# Create a configuration object
|
|
57
|
+
config = RBLNQwen3ModelConfig(
|
|
58
|
+
batch_size=1,
|
|
59
|
+
max_seq_len=40960,
|
|
60
|
+
tensor_parallel_size=4,
|
|
61
|
+
kvcache_partition_len=16384
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Use the configuration with from_pretrained
|
|
65
|
+
model = RBLNQwen3Model.from_pretrained(
|
|
66
|
+
"Qwen/Qwen3-Embedding-4B",
|
|
67
|
+
export=True,
|
|
68
|
+
rbln_config=config
|
|
69
|
+
)
|
|
70
|
+
```
|
|
71
|
+
"""
|