optimum-rbln 0.8.2a0__py3-none-any.whl → 0.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +116 -9
- optimum/rbln/__version__.py +16 -3
- optimum/rbln/cli.py +660 -0
- optimum/rbln/configuration_utils.py +171 -43
- optimum/rbln/diffusers/__init__.py +19 -0
- optimum/rbln/diffusers/configurations/__init__.py +3 -0
- optimum/rbln/diffusers/configurations/models/__init__.py +2 -0
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +3 -3
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +1 -1
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_temporal_decoder.py +67 -0
- optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +3 -3
- optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +4 -4
- optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +12 -4
- optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +9 -4
- optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +3 -3
- optimum/rbln/diffusers/configurations/models/configuration_unet_spatio_temporal_condition.py +59 -0
- optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +3 -3
- optimum/rbln/diffusers/configurations/pipelines/__init__.py +3 -0
- optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +35 -19
- optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +14 -11
- optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +30 -20
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +13 -9
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +17 -13
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +17 -10
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_video_diffusion.py +114 -0
- optimum/rbln/diffusers/modeling_diffusers.py +33 -18
- optimum/rbln/diffusers/models/__init__.py +4 -0
- optimum/rbln/diffusers/models/autoencoders/__init__.py +1 -0
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +32 -3
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +32 -6
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +275 -0
- optimum/rbln/diffusers/models/autoencoders/vae.py +27 -8
- optimum/rbln/diffusers/models/autoencoders/vq_model.py +32 -3
- optimum/rbln/diffusers/models/controlnet.py +16 -1
- optimum/rbln/diffusers/models/transformers/prior_transformer.py +17 -3
- optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +26 -3
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py +23 -2
- optimum/rbln/diffusers/models/unets/__init__.py +1 -0
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +23 -4
- optimum/rbln/diffusers/models/unets/unet_spatio_temporal_condition.py +201 -0
- optimum/rbln/diffusers/pipelines/__init__.py +15 -5
- optimum/rbln/diffusers/pipelines/auto_pipeline.py +307 -0
- optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +20 -0
- optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py +23 -12
- optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +16 -46
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +31 -1
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +31 -1
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +1 -6
- optimum/rbln/diffusers/pipelines/stable_video_diffusion/__init__.py +15 -0
- optimum/rbln/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +46 -0
- optimum/rbln/modeling.py +50 -24
- optimum/rbln/modeling_base.py +116 -35
- optimum/rbln/ops/attn.py +158 -0
- optimum/rbln/ops/flash_attn.py +166 -0
- optimum/rbln/ops/kv_cache_update.py +5 -0
- optimum/rbln/ops/linear.py +7 -0
- optimum/rbln/transformers/__init__.py +100 -0
- optimum/rbln/transformers/configuration_generic.py +7 -32
- optimum/rbln/transformers/modeling_attention_utils.py +385 -0
- optimum/rbln/transformers/modeling_generic.py +48 -65
- optimum/rbln/transformers/modeling_outputs.py +37 -0
- optimum/rbln/transformers/models/__init__.py +93 -30
- optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +28 -2
- optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +68 -5
- optimum/rbln/transformers/models/auto/__init__.py +2 -0
- optimum/rbln/transformers/models/auto/auto_factory.py +92 -17
- optimum/rbln/transformers/models/auto/modeling_auto.py +45 -0
- optimum/rbln/transformers/models/bart/bart_architecture.py +2 -7
- optimum/rbln/transformers/models/bart/configuration_bart.py +2 -0
- optimum/rbln/transformers/models/bart/modeling_bart.py +23 -2
- optimum/rbln/transformers/models/bert/bert_architecture.py +16 -0
- optimum/rbln/transformers/models/bert/modeling_bert.py +93 -4
- optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +42 -11
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +135 -44
- optimum/rbln/transformers/models/clip/configuration_clip.py +21 -7
- optimum/rbln/transformers/models/clip/modeling_clip.py +183 -27
- optimum/rbln/transformers/models/colpali/colpali_architecture.py +3 -6
- optimum/rbln/transformers/models/colpali/configuration_colpali.py +37 -21
- optimum/rbln/transformers/models/colpali/modeling_colpali.py +82 -104
- optimum/rbln/transformers/models/colqwen2/__init__.py +2 -0
- optimum/rbln/transformers/models/colqwen2/colqwen2_architecture.py +233 -0
- optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +74 -0
- optimum/rbln/transformers/models/colqwen2/modeling_colqwen2.py +446 -0
- optimum/rbln/transformers/models/decoderonly/__init__.py +3 -2
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +114 -37
- optimum/rbln/transformers/models/decoderonly/configuration_lora.py +411 -0
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +323 -316
- optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +508 -0
- optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +119 -0
- optimum/rbln/transformers/models/decoderonly/lora_architecture.py +204 -0
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +486 -892
- optimum/rbln/transformers/models/depth_anything/__init__.py +16 -0
- optimum/rbln/transformers/models/depth_anything/configuration_depth_anything.py +24 -0
- optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py +42 -0
- optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +24 -0
- optimum/rbln/transformers/models/dpt/modeling_dpt.py +17 -0
- optimum/rbln/transformers/models/exaone/modeling_exaone.py +42 -4
- optimum/rbln/transformers/models/gemma/__init__.py +2 -2
- optimum/rbln/transformers/models/gemma/configuration_gemma.py +9 -1
- optimum/rbln/transformers/models/gemma/gemma_architecture.py +1 -4
- optimum/rbln/transformers/models/gemma/modeling_gemma.py +22 -1
- optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +49 -14
- optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +12 -2
- optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py +245 -0
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +212 -504
- optimum/rbln/transformers/models/gpt2/__init__.py +2 -2
- optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +31 -3
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +10 -8
- optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +18 -1
- optimum/rbln/transformers/models/grounding_dino/__init__.py +10 -0
- optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +92 -0
- optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +599 -0
- optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py +1048 -0
- optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +35 -7
- optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +29 -32
- optimum/rbln/transformers/models/llama/__init__.py +2 -2
- optimum/rbln/transformers/models/llama/configuration_llama.py +9 -1
- optimum/rbln/transformers/models/llama/modeling_llama.py +22 -1
- optimum/rbln/transformers/models/llava/__init__.py +16 -0
- optimum/rbln/transformers/models/llava/configuration_llava.py +72 -0
- optimum/rbln/transformers/models/llava/modeling_llava.py +490 -0
- optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +21 -6
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +234 -376
- optimum/rbln/transformers/models/midm/midm_architecture.py +4 -1
- optimum/rbln/transformers/models/midm/modeling_midm.py +42 -4
- optimum/rbln/transformers/models/mistral/__init__.py +2 -2
- optimum/rbln/transformers/models/mistral/configuration_mistral.py +9 -1
- optimum/rbln/transformers/models/mistral/mistral_architecture.py +1 -1
- optimum/rbln/transformers/models/mistral/modeling_mistral.py +26 -3
- optimum/rbln/transformers/models/opt/__init__.py +2 -2
- optimum/rbln/transformers/models/opt/configuration_opt.py +8 -1
- optimum/rbln/transformers/models/opt/modeling_opt.py +29 -17
- optimum/rbln/transformers/models/opt/opt_architecture.py +4 -4
- optimum/rbln/transformers/models/pegasus/__init__.py +17 -0
- optimum/rbln/transformers/models/pegasus/configuration_pegasus.py +38 -0
- optimum/rbln/transformers/models/pegasus/modeling_pegasus.py +71 -0
- optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +161 -0
- optimum/rbln/transformers/models/phi/__init__.py +2 -2
- optimum/rbln/transformers/models/phi/configuration_phi.py +9 -1
- optimum/rbln/transformers/models/phi/modeling_phi.py +10 -1
- optimum/rbln/transformers/models/phi/phi_architecture.py +11 -7
- optimum/rbln/transformers/models/pixtral/__init__.py +16 -0
- optimum/rbln/transformers/models/pixtral/configuration_pixtral.py +43 -0
- optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +322 -0
- optimum/rbln/transformers/models/pixtral/pixtral_architecture.py +73 -0
- optimum/rbln/transformers/models/qwen2/__init__.py +2 -2
- optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +9 -1
- optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +27 -1
- optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +21 -6
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +15 -22
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +28 -7
- optimum/rbln/transformers/models/qwen2_vl/__init__.py +19 -0
- optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +88 -0
- optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +513 -0
- optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +165 -0
- optimum/rbln/transformers/models/qwen3/__init__.py +16 -0
- optimum/rbln/transformers/models/qwen3/configuration_qwen3.py +71 -0
- optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +133 -0
- optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +31 -0
- optimum/rbln/transformers/models/resnet/configuration_resnet.py +17 -0
- optimum/rbln/transformers/models/resnet/modeling_resnet.py +73 -0
- optimum/rbln/transformers/models/roberta/modeling_roberta.py +33 -0
- optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +21 -16
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +60 -13
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +2 -2
- optimum/rbln/transformers/models/siglip/__init__.py +2 -6
- optimum/rbln/transformers/models/siglip/configuration_siglip.py +1 -1
- optimum/rbln/transformers/models/siglip/modeling_siglip.py +21 -16
- optimum/rbln/transformers/models/swin/__init__.py +16 -0
- optimum/rbln/transformers/models/swin/configuration_swin.py +42 -0
- optimum/rbln/transformers/models/swin/modeling_swin.py +354 -0
- optimum/rbln/transformers/models/t5/configuration_t5.py +2 -0
- optimum/rbln/transformers/models/t5/modeling_t5.py +2 -2
- optimum/rbln/transformers/models/t5/t5_architecture.py +8 -1
- optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +3 -3
- optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +22 -16
- optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +7 -1
- optimum/rbln/transformers/models/vit/modeling_vit.py +19 -0
- optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +15 -3
- optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +61 -8
- optimum/rbln/transformers/models/whisper/configuration_whisper.py +12 -13
- optimum/rbln/transformers/models/whisper/generation_whisper.py +62 -6
- optimum/rbln/transformers/models/whisper/modeling_whisper.py +32 -5
- optimum/rbln/transformers/models/xlm_roberta/__init__.py +2 -8
- optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +43 -0
- optimum/rbln/transformers/utils/rbln_quantization.py +400 -75
- optimum/rbln/transformers/utils/rbln_runtime_wrapper.py +79 -0
- optimum/rbln/utils/deprecation.py +213 -0
- optimum/rbln/utils/hub.py +22 -50
- optimum/rbln/utils/runtime_utils.py +85 -17
- optimum/rbln/utils/submodule.py +31 -9
- {optimum_rbln-0.8.2a0.dist-info → optimum_rbln-0.9.3.dist-info}/METADATA +8 -7
- optimum_rbln-0.9.3.dist-info/RECORD +264 -0
- {optimum_rbln-0.8.2a0.dist-info → optimum_rbln-0.9.3.dist-info}/WHEEL +1 -1
- optimum_rbln-0.9.3.dist-info/entry_points.txt +2 -0
- optimum_rbln-0.8.2a0.dist-info/RECORD +0 -211
- {optimum_rbln-0.8.2a0.dist-info → optimum_rbln-0.9.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -33,9 +33,9 @@ if is_cosmos_guardrail_available():
|
|
|
33
33
|
from cosmos_guardrail import CosmosSafetyChecker
|
|
34
34
|
from cosmos_guardrail.cosmos_guardrail import (
|
|
35
35
|
COSMOS_GUARDRAIL_CHECKPOINT,
|
|
36
|
-
Aegis,
|
|
37
36
|
Blocklist,
|
|
38
37
|
GuardrailRunner,
|
|
38
|
+
LlamaGuard3,
|
|
39
39
|
ModelConfig,
|
|
40
40
|
RetinaFaceFilter,
|
|
41
41
|
SafetyClassifier,
|
|
@@ -55,7 +55,7 @@ else:
|
|
|
55
55
|
|
|
56
56
|
COSMOS_GUARDRAIL_CHECKPOINT = None
|
|
57
57
|
|
|
58
|
-
class
|
|
58
|
+
class LlamaGuard3(FailToImportCosmosGuardrail): ...
|
|
59
59
|
|
|
60
60
|
class Blocklist(FailToImportCosmosGuardrail): ...
|
|
61
61
|
|
|
@@ -127,25 +127,13 @@ class RBLNSigLIPEncoder(SigLIPEncoder):
|
|
|
127
127
|
|
|
128
128
|
# We don't use RBLNSiglipModel, but we need to override get_image_features to return pooler_output
|
|
129
129
|
self.model = RBLNSiglipVisionModel.from_pretrained(
|
|
130
|
-
self.checkpoint_dir,
|
|
131
|
-
rbln_device=rbln_config.siglip_encoder.device,
|
|
132
|
-
rbln_create_runtimes=rbln_config.siglip_encoder.create_runtimes,
|
|
133
|
-
rbln_activate_profiler=rbln_config.siglip_encoder.activate_profiler,
|
|
134
|
-
rbln_optimize_host_memory=rbln_config.siglip_encoder.optimize_host_memory,
|
|
130
|
+
self.checkpoint_dir, rbln_config=rbln_config.siglip_encoder
|
|
135
131
|
)
|
|
136
132
|
else:
|
|
137
133
|
super().__init__(model_name, checkpoint_id)
|
|
138
134
|
model = self.model
|
|
139
135
|
del self.model
|
|
140
|
-
self.model = RBLNSiglipVisionModel.from_model(
|
|
141
|
-
model,
|
|
142
|
-
rbln_device=rbln_config.siglip_encoder.device,
|
|
143
|
-
rbln_image_size=rbln_config.siglip_encoder.image_size,
|
|
144
|
-
rbln_npu=rbln_config.siglip_encoder.npu,
|
|
145
|
-
rbln_create_runtimes=rbln_config.siglip_encoder.create_runtimes,
|
|
146
|
-
rbln_activate_profiler=rbln_config.siglip_encoder.activate_profiler,
|
|
147
|
-
rbln_optimize_host_memory=rbln_config.siglip_encoder.optimize_host_memory,
|
|
148
|
-
)
|
|
136
|
+
self.model = RBLNSiglipVisionModel.from_model(model, rbln_config=rbln_config.siglip_encoder)
|
|
149
137
|
self.rbln_config = rbln_config
|
|
150
138
|
|
|
151
139
|
# Override get_image_features to return pooler_output
|
|
@@ -324,47 +312,31 @@ class RBLNVideoContentSafetyFilter(VideoContentSafetyFilter):
|
|
|
324
312
|
self.encoder.save_pretrained(checkpoint_id)
|
|
325
313
|
|
|
326
314
|
|
|
327
|
-
class
|
|
315
|
+
class RBLNLlamaGuard3(LlamaGuard3):
|
|
328
316
|
def __init__(
|
|
329
317
|
self,
|
|
330
318
|
checkpoint_id: str = COSMOS_GUARDRAIL_CHECKPOINT,
|
|
331
|
-
base_model_id: str = "meta-llama/
|
|
332
|
-
aegis_adapter: str = "nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0",
|
|
319
|
+
base_model_id: str = "meta-llama/Llama-Guard-3-8B",
|
|
333
320
|
rbln_config: Optional[RBLNCosmosSafetyCheckerConfig] = None,
|
|
334
321
|
) -> None:
|
|
335
322
|
if is_compiled_dir(checkpoint_id):
|
|
336
323
|
torch.nn.Module.__init__(self)
|
|
337
|
-
cache_dir = pathlib.Path(checkpoint_id) / "
|
|
324
|
+
cache_dir = pathlib.Path(checkpoint_id) / "llamaguard3"
|
|
338
325
|
self.tokenizer = AutoTokenizer.from_pretrained(cache_dir)
|
|
339
|
-
self.model = RBLNAutoModelForCausalLM.from_pretrained(
|
|
340
|
-
cache_dir,
|
|
341
|
-
rbln_device=rbln_config.aegis.device,
|
|
342
|
-
rbln_create_runtimes=rbln_config.aegis.create_runtimes,
|
|
343
|
-
rbln_activate_profiler=rbln_config.aegis.activate_profiler,
|
|
344
|
-
rbln_optimize_host_memory=rbln_config.aegis.optimize_host_memory,
|
|
345
|
-
)
|
|
326
|
+
self.model = RBLNAutoModelForCausalLM.from_pretrained(cache_dir, rbln_config=rbln_config.llamaguard3)
|
|
346
327
|
|
|
347
328
|
else:
|
|
348
|
-
super().__init__(checkpoint_id, base_model_id
|
|
349
|
-
model = self.model
|
|
329
|
+
super().__init__(checkpoint_id, base_model_id)
|
|
330
|
+
model = self.model
|
|
350
331
|
del self.model
|
|
351
|
-
|
|
352
|
-
self.model = RBLNAutoModelForCausalLM.from_model(
|
|
353
|
-
model,
|
|
354
|
-
rbln_tensor_parallel_size=4,
|
|
355
|
-
rbln_device=rbln_config.aegis.device,
|
|
356
|
-
rbln_create_runtimes=rbln_config.aegis.create_runtimes,
|
|
357
|
-
rbln_npu=rbln_config.aegis.npu,
|
|
358
|
-
rbln_activate_profiler=rbln_config.aegis.activate_profiler,
|
|
359
|
-
rbln_optimize_host_memory=rbln_config.aegis.optimize_host_memory,
|
|
360
|
-
)
|
|
332
|
+
self.model = RBLNAutoModelForCausalLM.from_model(model, rbln_config=rbln_config.llamaguard3)
|
|
361
333
|
|
|
362
334
|
self.rbln_config = rbln_config
|
|
363
335
|
self.dtype = torch.bfloat16
|
|
364
336
|
self.device = torch.device("cpu")
|
|
365
337
|
|
|
366
338
|
def save_pretrained(self, checkpoint_id: str):
|
|
367
|
-
cache_dir = pathlib.Path(checkpoint_id) / "
|
|
339
|
+
cache_dir = pathlib.Path(checkpoint_id) / "llamaguard3"
|
|
368
340
|
self.model.save_pretrained(cache_dir)
|
|
369
341
|
self.tokenizer.save_pretrained(cache_dir)
|
|
370
342
|
|
|
@@ -377,8 +349,7 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
|
|
|
377
349
|
def __init__(
|
|
378
350
|
self,
|
|
379
351
|
checkpoint_id: str = COSMOS_GUARDRAIL_CHECKPOINT,
|
|
380
|
-
|
|
381
|
-
aegis_adapter_id: str = "nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0",
|
|
352
|
+
llamaguard_model_id: str = "meta-llama/Llama-Guard-3-8B",
|
|
382
353
|
rbln_config: Optional[RBLNCosmosSafetyCheckerConfig] = None,
|
|
383
354
|
) -> None:
|
|
384
355
|
torch.nn.Module.__init__(self)
|
|
@@ -395,10 +366,9 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
|
|
|
395
366
|
self.text_guardrail = GuardrailRunner(
|
|
396
367
|
safety_models=[
|
|
397
368
|
Blocklist(COSMOS_GUARDRAIL_CHECKPOINT), # Changed since it cannot be saved
|
|
398
|
-
|
|
369
|
+
RBLNLlamaGuard3(
|
|
399
370
|
checkpoint_id=checkpoint_id,
|
|
400
|
-
base_model_id=
|
|
401
|
-
aegis_adapter=aegis_adapter_id,
|
|
371
|
+
base_model_id=llamaguard_model_id,
|
|
402
372
|
rbln_config=rbln_config,
|
|
403
373
|
),
|
|
404
374
|
]
|
|
@@ -413,7 +383,7 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
|
|
|
413
383
|
|
|
414
384
|
def save_pretrained(self, save_dir: str):
|
|
415
385
|
for text_safety_models in self.text_guardrail.safety_models:
|
|
416
|
-
if isinstance(text_safety_models,
|
|
386
|
+
if isinstance(text_safety_models, RBLNLlamaGuard3):
|
|
417
387
|
text_safety_models.save_pretrained(save_dir)
|
|
418
388
|
|
|
419
389
|
for video_safety_models in self.video_guardrail.safety_models:
|
|
@@ -87,8 +87,38 @@ class RBLNCosmosTextToWorldPipeline(RBLNDiffusionMixin, CosmosTextToWorldPipelin
|
|
|
87
87
|
export: bool = False,
|
|
88
88
|
safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
|
|
89
89
|
rbln_config: Dict[str, Any] = {},
|
|
90
|
-
**kwargs:
|
|
90
|
+
**kwargs: Any,
|
|
91
91
|
):
|
|
92
|
+
"""
|
|
93
|
+
Load a pretrained diffusion pipeline from a model checkpoint, with optional compilation for RBLN NPUs.
|
|
94
|
+
|
|
95
|
+
This method has two distinct operating modes:
|
|
96
|
+
- When `export=True`: Takes a PyTorch-based diffusion model, compiles it for RBLN NPUs, and loads the compiled model
|
|
97
|
+
- When `export=False`: Loads an already compiled RBLN model from `model_id` without recompilation
|
|
98
|
+
|
|
99
|
+
It supports various diffusion pipelines including Stable Diffusion, Kandinsky, ControlNet, and other diffusers-based models.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
model_id (`str`):
|
|
103
|
+
The model ID or path to the pretrained model to load. Can be either:
|
|
104
|
+
|
|
105
|
+
- A model ID from the HuggingFace Hub
|
|
106
|
+
- A local path to a saved model directory
|
|
107
|
+
export:
|
|
108
|
+
If True, takes a PyTorch model from `model_id` and compiles it for RBLN NPU execution.
|
|
109
|
+
If False, loads an already compiled RBLN model from `model_id` without recompilation.
|
|
110
|
+
safety_checker:
|
|
111
|
+
Optional custom safety checker to use instead of the default one. Only used when `export=True`.
|
|
112
|
+
rbln_config:
|
|
113
|
+
Configuration options for RBLN compilation. Can include settings for specific submodules
|
|
114
|
+
such as `text_encoder`, `unet`, and `vae`. Configuration can be tailored to the specific
|
|
115
|
+
pipeline being compiled.
|
|
116
|
+
kwargs:
|
|
117
|
+
Additional arguments to pass to the underlying diffusion pipeline constructor or the
|
|
118
|
+
RBLN compilation process. These may include parameters specific to individual submodules
|
|
119
|
+
or the particular diffusion pipeline being used.
|
|
120
|
+
"""
|
|
121
|
+
|
|
92
122
|
rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
|
|
93
123
|
if safety_checker is None and export:
|
|
94
124
|
safety_checker = RBLNCosmosSafetyChecker(rbln_config=rbln_config.safety_checker)
|
|
@@ -87,8 +87,38 @@ class RBLNCosmosVideoToWorldPipeline(RBLNDiffusionMixin, CosmosVideoToWorldPipel
|
|
|
87
87
|
export: bool = False,
|
|
88
88
|
safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
|
|
89
89
|
rbln_config: Dict[str, Any] = {},
|
|
90
|
-
**kwargs:
|
|
90
|
+
**kwargs: Any,
|
|
91
91
|
):
|
|
92
|
+
"""
|
|
93
|
+
Load a pretrained diffusion pipeline from a model checkpoint, with optional compilation for RBLN NPUs.
|
|
94
|
+
|
|
95
|
+
This method has two distinct operating modes:
|
|
96
|
+
- When `export=True`: Takes a PyTorch-based diffusion model, compiles it for RBLN NPUs, and loads the compiled model
|
|
97
|
+
- When `export=False`: Loads an already compiled RBLN model from `model_id` without recompilation
|
|
98
|
+
|
|
99
|
+
It supports various diffusion pipelines including Stable Diffusion, Kandinsky, ControlNet, and other diffusers-based models.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
model_id (`str`):
|
|
103
|
+
The model ID or path to the pretrained model to load. Can be either:
|
|
104
|
+
|
|
105
|
+
- A model ID from the HuggingFace Hub
|
|
106
|
+
- A local path to a saved model directory
|
|
107
|
+
export:
|
|
108
|
+
If True, takes a PyTorch model from `model_id` and compiles it for RBLN NPU execution.
|
|
109
|
+
If False, loads an already compiled RBLN model from `model_id` without recompilation.
|
|
110
|
+
safety_checker:
|
|
111
|
+
Optional custom safety checker to use instead of the default one. Only used when `export=True`.
|
|
112
|
+
rbln_config:
|
|
113
|
+
Configuration options for RBLN compilation. Can include settings for specific submodules
|
|
114
|
+
such as `text_encoder`, `unet`, and `vae`. Configuration can be tailored to the specific
|
|
115
|
+
pipeline being compiled.
|
|
116
|
+
kwargs:
|
|
117
|
+
Additional arguments to pass to the underlying diffusion pipeline constructor or the
|
|
118
|
+
RBLN compilation process. These may include parameters specific to individual submodules
|
|
119
|
+
or the particular diffusion pipeline being used.
|
|
120
|
+
"""
|
|
121
|
+
|
|
92
122
|
rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
|
|
93
123
|
if safety_checker is None and export:
|
|
94
124
|
safety_checker = RBLNCosmosSafetyChecker(rbln_config=rbln_config.safety_checker)
|
|
@@ -22,12 +22,7 @@ from diffusers import (
|
|
|
22
22
|
UNet2DConditionModel,
|
|
23
23
|
VQModel,
|
|
24
24
|
)
|
|
25
|
-
from transformers import
|
|
26
|
-
CLIPImageProcessor,
|
|
27
|
-
CLIPTextModelWithProjection,
|
|
28
|
-
CLIPTokenizer,
|
|
29
|
-
CLIPVisionModelWithProjection,
|
|
30
|
-
)
|
|
25
|
+
from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTokenizer, CLIPVisionModelWithProjection
|
|
31
26
|
|
|
32
27
|
from ...configurations import RBLNKandinskyV22CombinedPipelineConfig
|
|
33
28
|
from ...modeling_diffusers import RBLNDiffusionMixin
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at:
|
|
6
|
+
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .pipeline_stable_video_diffusion import RBLNStableVideoDiffusionPipeline
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at:
|
|
6
|
+
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from diffusers import StableVideoDiffusionPipeline
|
|
17
|
+
|
|
18
|
+
from ....utils.logging import get_logger
|
|
19
|
+
from ...configurations import RBLNStableVideoDiffusionPipelineConfig
|
|
20
|
+
from ...modeling_diffusers import RBLNDiffusionMixin
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class RBLNStableVideoDiffusionPipeline(RBLNDiffusionMixin, StableVideoDiffusionPipeline):
|
|
27
|
+
"""
|
|
28
|
+
RBLN-accelerated implementation of Stable Video Diffusion pipeline for image-to-video generation.
|
|
29
|
+
|
|
30
|
+
This pipeline compiles Stable Video Diffusion models to run efficiently on RBLN NPUs, enabling high-performance
|
|
31
|
+
inference for generating videos from images with optimized memory usage and throughput.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
original_class = StableVideoDiffusionPipeline
|
|
35
|
+
_rbln_config_class = RBLNStableVideoDiffusionPipelineConfig
|
|
36
|
+
_submodules = ["image_encoder", "unet", "vae"]
|
|
37
|
+
|
|
38
|
+
def handle_additional_kwargs(self, **kwargs):
|
|
39
|
+
compiled_num_frames = self.unet.rbln_config.num_frames
|
|
40
|
+
if compiled_num_frames is not None:
|
|
41
|
+
kwargs["num_frames"] = compiled_num_frames
|
|
42
|
+
|
|
43
|
+
compiled_decode_chunk_size = self.vae.rbln_config.decode_chunk_size
|
|
44
|
+
if compiled_decode_chunk_size is not None:
|
|
45
|
+
kwargs["decode_chunk_size"] = compiled_decode_chunk_size
|
|
46
|
+
return kwargs
|
optimum/rbln/modeling.py
CHANGED
|
@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, get_args, ge
|
|
|
19
19
|
import rebel
|
|
20
20
|
import torch
|
|
21
21
|
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
|
|
22
|
-
from transformers import
|
|
22
|
+
from transformers import PretrainedConfig
|
|
23
23
|
from transformers.modeling_outputs import BaseModelOutput
|
|
24
24
|
|
|
25
25
|
from .configuration_utils import DEFAULT_COMPILED_MODEL_NAME, RBLNModelConfig
|
|
@@ -35,8 +35,6 @@ logger = get_logger(__name__)
|
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
class RBLNModel(RBLNBaseModel):
|
|
38
|
-
_output_class = None
|
|
39
|
-
|
|
40
38
|
@classmethod
|
|
41
39
|
def update_kwargs(cls, kwargs):
|
|
42
40
|
# Update user-given kwargs to get proper pytorch model.
|
|
@@ -56,13 +54,16 @@ class RBLNModel(RBLNBaseModel):
|
|
|
56
54
|
pass
|
|
57
55
|
|
|
58
56
|
@classmethod
|
|
59
|
-
def
|
|
57
|
+
def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
|
|
60
58
|
# Wrap the model if needed.
|
|
61
59
|
return model
|
|
62
60
|
|
|
63
61
|
@classmethod
|
|
64
62
|
def get_compiled_model(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
|
|
65
|
-
|
|
63
|
+
if rbln_config._allow_no_compile_cfgs:
|
|
64
|
+
return {}
|
|
65
|
+
|
|
66
|
+
model = cls._wrap_model_if_needed(model, rbln_config)
|
|
66
67
|
rbln_compile_config = rbln_config.compile_cfgs[0]
|
|
67
68
|
compiled_model = cls.compile(
|
|
68
69
|
model,
|
|
@@ -72,6 +73,22 @@ class RBLNModel(RBLNBaseModel):
|
|
|
72
73
|
)
|
|
73
74
|
return compiled_model
|
|
74
75
|
|
|
76
|
+
@classmethod
|
|
77
|
+
def _update_rbln_config(
|
|
78
|
+
cls,
|
|
79
|
+
preprocessors: Optional[Any],
|
|
80
|
+
model: Optional["PreTrainedModel"] = None,
|
|
81
|
+
model_config: Optional["PretrainedConfig"] = None,
|
|
82
|
+
rbln_config: Optional[RBLNModelConfig] = None,
|
|
83
|
+
) -> RBLNModelConfig:
|
|
84
|
+
# Default implementation: return config as-is
|
|
85
|
+
# Subclasses should override to set compile_cfgs if needed
|
|
86
|
+
return rbln_config
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
|
|
90
|
+
return model
|
|
91
|
+
|
|
75
92
|
@classmethod
|
|
76
93
|
def from_model(
|
|
77
94
|
cls,
|
|
@@ -80,18 +97,20 @@ class RBLNModel(RBLNBaseModel):
|
|
|
80
97
|
rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
|
|
81
98
|
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
|
|
82
99
|
subfolder: str = "",
|
|
83
|
-
**kwargs:
|
|
100
|
+
**kwargs: Any,
|
|
84
101
|
) -> "RBLNModel":
|
|
85
102
|
"""
|
|
86
103
|
Converts and compiles a pre-trained HuggingFace library model into a RBLN model.
|
|
87
104
|
This method performs the actual model conversion and compilation process.
|
|
88
105
|
|
|
89
106
|
Args:
|
|
90
|
-
model: The PyTorch model to be compiled.
|
|
91
|
-
|
|
107
|
+
model (PreTrainedModel): The PyTorch model to be compiled.
|
|
108
|
+
The object must be an instance of the HuggingFace transformers PreTrainedModel class.
|
|
109
|
+
config (Optional[PretrainedConfig]): The configuration object associated with the model.
|
|
110
|
+
rbln_config (Optional[Union[RBLNModelConfig, Dict]]): Configuration for RBLN model compilation and runtime.
|
|
111
|
+
This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
|
|
92
112
|
For detailed configuration options, see the specific model's configuration class documentation.
|
|
93
|
-
|
|
94
|
-
kwargs: Additional keyword arguments. Arguments with the prefix 'rbln_' are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
|
|
113
|
+
kwargs: Additional keyword arguments. Arguments with the prefix `rbln_` are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
|
|
95
114
|
|
|
96
115
|
The method performs the following steps:
|
|
97
116
|
|
|
@@ -101,8 +120,10 @@ class RBLNModel(RBLNBaseModel):
|
|
|
101
120
|
4. Saves the compiled model and configurations
|
|
102
121
|
|
|
103
122
|
Returns:
|
|
104
|
-
A RBLN model instance ready for inference on RBLN NPU devices.
|
|
123
|
+
(RBLNModel): A RBLN model instance ready for inference on RBLN NPU devices.
|
|
105
124
|
"""
|
|
125
|
+
|
|
126
|
+
model = cls._reconstruct_model_if_needed(model)
|
|
106
127
|
preprocessors = kwargs.pop("preprocessors", [])
|
|
107
128
|
rbln_config, kwargs = cls.prepare_rbln_config(rbln_config=rbln_config, **kwargs)
|
|
108
129
|
|
|
@@ -121,9 +142,6 @@ class RBLNModel(RBLNBaseModel):
|
|
|
121
142
|
# Save configs
|
|
122
143
|
if config is None:
|
|
123
144
|
config = model.config
|
|
124
|
-
# remote_config
|
|
125
|
-
if hasattr(config, "auto_map") and "AutoConfig" in config.auto_map:
|
|
126
|
-
config = AutoConfig.from_pretrained(config._name_or_path, **kwargs)
|
|
127
145
|
|
|
128
146
|
if hasattr(model, "can_generate") and model.can_generate():
|
|
129
147
|
import json
|
|
@@ -149,6 +167,7 @@ class RBLNModel(RBLNBaseModel):
|
|
|
149
167
|
model=model,
|
|
150
168
|
model_save_dir=save_dir,
|
|
151
169
|
rbln_config=rbln_config,
|
|
170
|
+
preprocessors=preprocessors,
|
|
152
171
|
**kwargs,
|
|
153
172
|
)
|
|
154
173
|
else:
|
|
@@ -211,6 +230,7 @@ class RBLNModel(RBLNBaseModel):
|
|
|
211
230
|
**kwargs,
|
|
212
231
|
) -> "PreTrainedModel":
|
|
213
232
|
kwargs = cls.update_kwargs(kwargs)
|
|
233
|
+
|
|
214
234
|
return cls.get_hf_class().from_pretrained(
|
|
215
235
|
model_id,
|
|
216
236
|
subfolder=subfolder,
|
|
@@ -229,6 +249,9 @@ class RBLNModel(RBLNBaseModel):
|
|
|
229
249
|
compiled_models: List[rebel.RBLNCompiledModel],
|
|
230
250
|
rbln_config: RBLNModelConfig,
|
|
231
251
|
) -> List[rebel.Runtime]:
|
|
252
|
+
if len(rbln_config.compile_cfgs) == 0:
|
|
253
|
+
return []
|
|
254
|
+
|
|
232
255
|
if DEFAULT_COMPILED_MODEL_NAME not in rbln_config.device_map:
|
|
233
256
|
cls._raise_missing_compiled_file_error([DEFAULT_COMPILED_MODEL_NAME])
|
|
234
257
|
|
|
@@ -238,35 +261,38 @@ class RBLNModel(RBLNBaseModel):
|
|
|
238
261
|
tensor_type="pt",
|
|
239
262
|
device=rbln_config.device_map[DEFAULT_COMPILED_MODEL_NAME],
|
|
240
263
|
activate_profiler=rbln_config.activate_profiler,
|
|
264
|
+
timeout=rbln_config.timeout,
|
|
241
265
|
)
|
|
242
266
|
for compiled_model in compiled_models
|
|
243
267
|
]
|
|
244
268
|
|
|
245
|
-
def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs:
|
|
269
|
+
def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Any) -> Any:
|
|
246
270
|
"""
|
|
247
|
-
Defines the forward pass of
|
|
271
|
+
Defines the forward pass of `RBLNModel`. The interface mirrors HuggingFace conventions so it can act as a drop-in
|
|
272
|
+
replacement in many cases.
|
|
248
273
|
|
|
249
|
-
This method executes the compiled RBLN model on RBLN NPU devices while
|
|
250
|
-
|
|
251
|
-
|
|
274
|
+
This method executes the compiled RBLN model on RBLN NPU devices while remaining fully compatible with Hugging Face
|
|
275
|
+
Transformers and Diffusers APIs. In practice, `RBLNModel` can replace models built on `torch.nn.Module` — including
|
|
276
|
+
`transformers.PreTrainedModel` implementations and Diffusers components based on `diffusers.ModelMixin` — enabling
|
|
277
|
+
seamless integration into existing workflows.
|
|
252
278
|
|
|
253
279
|
Args:
|
|
254
|
-
|
|
280
|
+
args: Variable length argument list containing model inputs. The format matches the original
|
|
255
281
|
HuggingFace model's forward method signature (e.g., input_ids, attention_mask for
|
|
256
282
|
transformers models, or sample, timestep for diffusers models).
|
|
257
283
|
return_dict:
|
|
258
284
|
Whether to return outputs as a dictionary-like object or as a tuple. When `None`:
|
|
259
285
|
- For transformers models: Uses `self.config.use_return_dict` (typically `True`)
|
|
260
286
|
- For diffusers models: Defaults to `True`
|
|
261
|
-
|
|
287
|
+
kwargs: Arbitrary keyword arguments containing additional model inputs and parameters,
|
|
262
288
|
matching the original HuggingFace model's interface.
|
|
263
289
|
|
|
264
290
|
Returns:
|
|
265
291
|
Model outputs in the same format as the original HuggingFace model.
|
|
266
292
|
|
|
267
|
-
|
|
293
|
+
If `return_dict=True`, Returns a dictionary-like object (e.g., BaseModelOutput,
|
|
268
294
|
CausalLMOutput) with named fields such as `logits`, `hidden_states`, etc.
|
|
269
|
-
|
|
295
|
+
If `return_dict=False`, Returns a tuple containing the raw model outputs.
|
|
270
296
|
|
|
271
297
|
Note:
|
|
272
298
|
- This method maintains the exact same interface as the original HuggingFace model's forward method
|
|
@@ -288,7 +314,7 @@ class RBLNModel(RBLNBaseModel):
|
|
|
288
314
|
@classmethod
|
|
289
315
|
def get_hf_output_class(cls):
|
|
290
316
|
# Dynamically gets the output class from the corresponding HuggingFace model class.
|
|
291
|
-
if cls._output_class:
|
|
317
|
+
if "_output_class" in cls.__dict__ and cls._output_class is not None:
|
|
292
318
|
return cls._output_class
|
|
293
319
|
|
|
294
320
|
hf_class = cls.get_hf_class()
|