optimum-rbln 0.8.2rc0__py3-none-any.whl → 0.8.3a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of optimum-rbln might be problematic. Click here for more details.

Files changed (91) hide show
  1. optimum/rbln/__init__.py +4 -9
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +4 -4
  4. optimum/rbln/diffusers/__init__.py +1 -0
  5. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +2 -2
  6. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +2 -2
  7. optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +2 -2
  8. optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +2 -2
  9. optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +2 -2
  10. optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +2 -2
  11. optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +2 -2
  12. optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +2 -2
  13. optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +3 -3
  14. optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +2 -2
  15. optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +4 -4
  16. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +2 -2
  17. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +2 -2
  18. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +2 -2
  19. optimum/rbln/diffusers/modeling_diffusers.py +1 -1
  20. optimum/rbln/diffusers/models/__init__.py +3 -13
  21. optimum/rbln/diffusers/pipelines/__init__.py +1 -5
  22. optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py +11 -6
  23. optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +14 -18
  24. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +1 -1
  25. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +1 -1
  26. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +1 -6
  27. optimum/rbln/modeling.py +2 -2
  28. optimum/rbln/modeling_base.py +12 -4
  29. optimum/rbln/ops/attn.py +158 -0
  30. optimum/rbln/ops/flash_attn.py +166 -0
  31. optimum/rbln/transformers/__init__.py +2 -0
  32. optimum/rbln/transformers/configuration_generic.py +4 -4
  33. optimum/rbln/transformers/modeling_generic.py +1 -4
  34. optimum/rbln/transformers/modeling_outputs.py +37 -0
  35. optimum/rbln/transformers/models/__init__.py +6 -16
  36. optimum/rbln/transformers/models/auto/__init__.py +1 -0
  37. optimum/rbln/transformers/models/auto/modeling_auto.py +7 -0
  38. optimum/rbln/transformers/models/bart/bart_architecture.py +1 -3
  39. optimum/rbln/transformers/models/bart/configuration_bart.py +2 -0
  40. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +2 -2
  41. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +1 -5
  42. optimum/rbln/transformers/models/clip/configuration_clip.py +3 -3
  43. optimum/rbln/transformers/models/colpali/colpali_architecture.py +1 -4
  44. optimum/rbln/transformers/models/colpali/configuration_colpali.py +2 -2
  45. optimum/rbln/transformers/models/colpali/modeling_colpali.py +2 -10
  46. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +43 -174
  47. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +101 -91
  48. optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +450 -0
  49. optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +88 -0
  50. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +296 -986
  51. optimum/rbln/transformers/models/gemma/gemma_architecture.py +1 -4
  52. optimum/rbln/transformers/models/gemma/modeling_gemma.py +9 -0
  53. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +3 -3
  54. optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py +217 -0
  55. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +19 -250
  56. optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +2 -0
  57. optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +2 -2
  58. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +3 -9
  59. optimum/rbln/transformers/models/llama/modeling_llama.py +12 -3
  60. optimum/rbln/transformers/models/llava/configuration_llava.py +2 -2
  61. optimum/rbln/transformers/models/llava/modeling_llava.py +53 -14
  62. optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +2 -2
  63. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +6 -16
  64. optimum/rbln/transformers/models/opt/modeling_opt.py +2 -30
  65. optimum/rbln/transformers/models/pegasus/configuration_pegasus.py +4 -0
  66. optimum/rbln/transformers/models/pegasus/modeling_pegasus.py +2 -0
  67. optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +1 -3
  68. optimum/rbln/transformers/models/pixtral/configuration_pixtral.py +2 -2
  69. optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +1 -4
  70. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +3 -3
  71. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +6 -15
  72. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +4 -7
  73. optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +77 -3
  74. optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +1 -4
  75. optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +19 -2
  76. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +20 -1
  77. optimum/rbln/transformers/models/siglip/__init__.py +2 -6
  78. optimum/rbln/transformers/models/siglip/modeling_siglip.py +2 -2
  79. optimum/rbln/transformers/models/t5/configuration_t5.py +2 -0
  80. optimum/rbln/transformers/models/t5/t5_architecture.py +8 -1
  81. optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +2 -2
  82. optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -14
  83. optimum/rbln/transformers/models/whisper/configuration_whisper.py +10 -2
  84. optimum/rbln/transformers/models/whisper/modeling_whisper.py +20 -1
  85. optimum/rbln/transformers/models/xlm_roberta/__init__.py +2 -8
  86. optimum/rbln/transformers/utils/rbln_quantization.py +365 -65
  87. optimum/rbln/utils/runtime_utils.py +3 -3
  88. {optimum_rbln-0.8.2rc0.dist-info → optimum_rbln-0.8.3a1.dist-info}/METADATA +1 -1
  89. {optimum_rbln-0.8.2rc0.dist-info → optimum_rbln-0.8.3a1.dist-info}/RECORD +91 -87
  90. {optimum_rbln-0.8.2rc0.dist-info → optimum_rbln-0.8.3a1.dist-info}/WHEEL +0 -0
  91. {optimum_rbln-0.8.2rc0.dist-info → optimum_rbln-0.8.3a1.dist-info}/licenses/LICENSE +0 -0
optimum/rbln/__init__.py CHANGED
@@ -46,6 +46,7 @@ _import_structure = {
46
46
  "RBLNAutoModelForSequenceClassification",
47
47
  "RBLNAutoModelForSpeechSeq2Seq",
48
48
  "RBLNAutoModelForVision2Seq",
49
+ "RBLNAutoModelForTextEncoding",
49
50
  "RBLNBartForConditionalGeneration",
50
51
  "RBLNBartForConditionalGenerationConfig",
51
52
  "RBLNBartModel",
@@ -231,11 +232,7 @@ _import_structure = {
231
232
  }
232
233
 
233
234
  if TYPE_CHECKING:
234
- from .configuration_utils import (
235
- RBLNAutoConfig,
236
- RBLNCompileConfig,
237
- RBLNModelConfig,
238
- )
235
+ from .configuration_utils import RBLNAutoConfig, RBLNCompileConfig, RBLNModelConfig
239
236
  from .diffusers import (
240
237
  RBLNAutoencoderKL,
241
238
  RBLNAutoencoderKLConfig,
@@ -301,10 +298,7 @@ if TYPE_CHECKING:
301
298
  RBLNVQModel,
302
299
  RBLNVQModelConfig,
303
300
  )
304
- from .modeling import (
305
- RBLNBaseModel,
306
- RBLNModel,
307
- )
301
+ from .modeling import RBLNBaseModel, RBLNModel
308
302
  from .transformers import (
309
303
  RBLNASTForAudioClassification,
310
304
  RBLNASTForAudioClassificationConfig,
@@ -320,6 +314,7 @@ if TYPE_CHECKING:
320
314
  RBLNAutoModelForSeq2SeqLM,
321
315
  RBLNAutoModelForSequenceClassification,
322
316
  RBLNAutoModelForSpeechSeq2Seq,
317
+ RBLNAutoModelForTextEncoding,
323
318
  RBLNAutoModelForVision2Seq,
324
319
  RBLNBartForConditionalGeneration,
325
320
  RBLNBartForConditionalGenerationConfig,
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.8.2rc0'
21
- __version_tuple__ = version_tuple = (0, 8, 2, 'rc0')
20
+ __version__ = version = '0.8.3a1'
21
+ __version_tuple__ = version_tuple = (0, 8, 3, 'a1')
@@ -491,7 +491,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
491
491
  self,
492
492
  submodule_config_cls: Type["RBLNModelConfig"],
493
493
  submodule_config: Optional[Union[Dict[str, Any], "RBLNModelConfig"]] = None,
494
- **kwargs: Dict[str, Any],
494
+ **kwargs: Any,
495
495
  ) -> "RBLNModelConfig":
496
496
  # Initialize a submodule config from a dict or a RBLNModelConfig.
497
497
  # kwargs is specified from the predecessor config.
@@ -566,7 +566,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
566
566
  timeout: Optional[int] = None,
567
567
  optimum_rbln_version: Optional[str] = None,
568
568
  _compile_cfgs: List[RBLNCompileConfig] = [],
569
- **kwargs: Dict[str, Any],
569
+ **kwargs: Any,
570
570
  ):
571
571
  """
572
572
  Initialize a RBLN model configuration with runtime options and compile configurations.
@@ -717,7 +717,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
717
717
  json.dump(serializable_data, jsonf, indent=2)
718
718
 
719
719
  @classmethod
720
- def load(cls, path: str, **kwargs: Dict[str, Any]) -> "RBLNModelConfig":
720
+ def load(cls, path: str, **kwargs: Any) -> "RBLNModelConfig":
721
721
  """
722
722
  Load a RBLNModelConfig from a path.
723
723
 
@@ -750,7 +750,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
750
750
  def initialize_from_kwargs(
751
751
  cls: Type["RBLNModelConfig"],
752
752
  rbln_config: Optional[Union[Dict[str, Any], "RBLNModelConfig"]] = None,
753
- **kwargs: Dict[str, Any],
753
+ **kwargs: Any,
754
754
  ) -> Tuple["RBLNModelConfig", Dict[str, Any]]:
755
755
  # Initialize RBLNModelConfig from kwargs.
756
756
  kwargs_keys = list(kwargs.keys())
@@ -135,6 +135,7 @@ if TYPE_CHECKING:
135
135
  from .modeling_diffusers import RBLNDiffusionMixin
136
136
  from .models import (
137
137
  RBLNAutoencoderKL,
138
+ RBLNAutoencoderKLCosmos,
138
139
  RBLNControlNetModel,
139
140
  RBLNCosmosTransformer3DModel,
140
141
  RBLNPriorTransformer,
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
 
@@ -33,7 +33,7 @@ class RBLNAutoencoderKLConfig(RBLNModelConfig):
33
33
  vae_scale_factor: Optional[float] = None, # TODO: rename to scaling_factor
34
34
  in_channels: Optional[int] = None,
35
35
  latent_channels: Optional[int] = None,
36
- **kwargs: Dict[str, Any],
36
+ **kwargs: Any,
37
37
  ):
38
38
  """
39
39
  Args:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional
15
+ from typing import Any, Optional
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....utils.logging import get_logger
@@ -35,7 +35,7 @@ class RBLNAutoencoderKLCosmosConfig(RBLNModelConfig):
35
35
  vae_scale_factor_temporal: Optional[int] = None,
36
36
  vae_scale_factor_spatial: Optional[int] = None,
37
37
  use_slicing: Optional[bool] = None,
38
- **kwargs: Dict[str, Any],
38
+ **kwargs: Any,
39
39
  ):
40
40
  """
41
41
  Args:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
 
@@ -29,7 +29,7 @@ class RBLNControlNetModelConfig(RBLNModelConfig):
29
29
  unet_sample_size: Optional[Tuple[int, int]] = None,
30
30
  vae_sample_size: Optional[Tuple[int, int]] = None,
31
31
  text_model_hidden_size: Optional[int] = None,
32
- **kwargs: Dict[str, Any],
32
+ **kwargs: Any,
33
33
  ):
34
34
  """
35
35
  Args:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional
15
+ from typing import Any, Optional
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
 
@@ -32,7 +32,7 @@ class RBLNPriorTransformerConfig(RBLNModelConfig):
32
32
  batch_size: Optional[int] = None,
33
33
  embedding_dim: Optional[int] = None,
34
34
  num_embeddings: Optional[int] = None,
35
- **kwargs: Dict[str, Any],
35
+ **kwargs: Any,
36
36
  ):
37
37
  """
38
38
  Args:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional
15
+ from typing import Any, Optional
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
 
@@ -33,7 +33,7 @@ class RBLNCosmosTransformer3DModelConfig(RBLNModelConfig):
33
33
  num_latent_frames: Optional[int] = None,
34
34
  latent_height: Optional[int] = None,
35
35
  latent_width: Optional[int] = None,
36
- **kwargs: Dict[str, Any],
36
+ **kwargs: Any,
37
37
  ):
38
38
  """
39
39
  Args:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple, Union
15
+ from typing import Any, Optional, Tuple, Union
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
 
@@ -27,7 +27,7 @@ class RBLNSD3Transformer2DModelConfig(RBLNModelConfig):
27
27
  batch_size: Optional[int] = None,
28
28
  sample_size: Optional[Union[int, Tuple[int, int]]] = None,
29
29
  prompt_embed_length: Optional[int] = None,
30
- **kwargs: Dict[str, Any],
30
+ **kwargs: Any,
31
31
  ):
32
32
  """
33
33
  Args:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
 
@@ -38,7 +38,7 @@ class RBLNUNet2DConditionModelConfig(RBLNModelConfig):
38
38
  in_features: Optional[int] = None,
39
39
  text_model_hidden_size: Optional[int] = None,
40
40
  image_model_hidden_size: Optional[int] = None,
41
- **kwargs: Dict[str, Any],
41
+ **kwargs: Any,
42
42
  ):
43
43
  """
44
44
  Args:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
 
@@ -33,7 +33,7 @@ class RBLNVQModelConfig(RBLNModelConfig):
33
33
  vqmodel_scale_factor: Optional[float] = None, # TODO: rename to scaling_factor
34
34
  in_channels: Optional[int] = None,
35
35
  latent_channels: Optional[int] = None,
36
- **kwargs: Dict[str, Any],
36
+ **kwargs: Any,
37
37
  ):
38
38
  """
39
39
  Args:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelConfig, RBLNCLIPTextModelWithProjectionConfig
@@ -38,7 +38,7 @@ class RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
38
38
  sample_size: Optional[Tuple[int, int]] = None,
39
39
  image_size: Optional[Tuple[int, int]] = None,
40
40
  guidance_scale: Optional[float] = None,
41
- **kwargs: Dict[str, Any],
41
+ **kwargs: Any,
42
42
  ):
43
43
  """
44
44
  Args:
@@ -178,7 +178,7 @@ class RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
178
178
  sample_size: Optional[Tuple[int, int]] = None,
179
179
  image_size: Optional[Tuple[int, int]] = None,
180
180
  guidance_scale: Optional[float] = None,
181
- **kwargs: Dict[str, Any],
181
+ **kwargs: Any,
182
182
  ):
183
183
  """
184
184
  Args:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional
15
+ from typing import Any, Optional
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNT5EncoderModelConfig
@@ -41,7 +41,7 @@ class RBLNCosmosPipelineBaseConfig(RBLNModelConfig):
41
41
  num_frames: Optional[int] = None,
42
42
  fps: Optional[int] = None,
43
43
  max_seq_len: Optional[int] = None,
44
- **kwargs: Dict[str, Any],
44
+ **kwargs: Any,
45
45
  ):
46
46
  """
47
47
  Args:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelWithProjectionConfig, RBLNCLIPVisionModelWithProjectionConfig
@@ -37,7 +37,7 @@ class RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
37
37
  img_width: Optional[int] = None,
38
38
  height: Optional[int] = None,
39
39
  width: Optional[int] = None,
40
- **kwargs: Dict[str, Any],
40
+ **kwargs: Any,
41
41
  ):
42
42
  """
43
43
  Args:
@@ -148,7 +148,7 @@ class RBLNKandinskyV22PriorPipelineConfig(RBLNModelConfig):
148
148
  *,
149
149
  batch_size: Optional[int] = None,
150
150
  guidance_scale: Optional[float] = None,
151
- **kwargs: Dict[str, Any],
151
+ **kwargs: Any,
152
152
  ):
153
153
  """
154
154
  Initialize a configuration for Kandinsky 2.2 prior pipeline optimized for RBLN NPU.
@@ -226,7 +226,7 @@ class RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
226
226
  prior_text_encoder: Optional[RBLNCLIPTextModelWithProjectionConfig] = None,
227
227
  unet: Optional[RBLNUNet2DConditionModelConfig] = None,
228
228
  movq: Optional[RBLNVQModelConfig] = None,
229
- **kwargs: Dict[str, Any],
229
+ **kwargs: Any,
230
230
  ):
231
231
  """
232
232
  Initialize a configuration for combined Kandinsky 2.2 pipelines optimized for RBLN NPU.
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelConfig
@@ -37,7 +37,7 @@ class RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
37
37
  sample_size: Optional[Tuple[int, int]] = None,
38
38
  image_size: Optional[Tuple[int, int]] = None,
39
39
  guidance_scale: Optional[float] = None,
40
- **kwargs: Dict[str, Any],
40
+ **kwargs: Any,
41
41
  ):
42
42
  """
43
43
  Args:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelWithProjectionConfig, RBLNT5EncoderModelConfig
@@ -40,7 +40,7 @@ class RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
40
40
  height: Optional[int] = None,
41
41
  width: Optional[int] = None,
42
42
  guidance_scale: Optional[float] = None,
43
- **kwargs: Dict[str, Any],
43
+ **kwargs: Any,
44
44
  ):
45
45
  """
46
46
  Args:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelConfig, RBLNCLIPTextModelWithProjectionConfig
@@ -38,7 +38,7 @@ class RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
38
38
  sample_size: Optional[Tuple[int, int]] = None,
39
39
  image_size: Optional[Tuple[int, int]] = None,
40
40
  guidance_scale: Optional[float] = None,
41
- **kwargs: Dict[str, Any],
41
+ **kwargs: Any,
42
42
  ):
43
43
  """
44
44
  Args:
@@ -136,7 +136,7 @@ class RBLNDiffusionMixin:
136
136
  lora_ids: Optional[Union[str, List[str]]] = None,
137
137
  lora_weights_names: Optional[Union[str, List[str]]] = None,
138
138
  lora_scales: Optional[Union[float, List[float]]] = None,
139
- **kwargs: Dict[str, Any],
139
+ **kwargs: Any,
140
140
  ) -> "RBLNDiffusionMixin":
141
141
  """
142
142
  Load a pretrained diffusion pipeline from a model checkpoint, with optional compilation for RBLN NPUs.
@@ -35,20 +35,10 @@ _import_structure = {
35
35
  }
36
36
 
37
37
  if TYPE_CHECKING:
38
- from .autoencoders import (
39
- RBLNAutoencoderKL,
40
- RBLNAutoencoderKLCosmos,
41
- RBLNVQModel,
42
- )
38
+ from .autoencoders import RBLNAutoencoderKL, RBLNAutoencoderKLCosmos, RBLNVQModel
43
39
  from .controlnet import RBLNControlNetModel
44
- from .transformers import (
45
- RBLNCosmosTransformer3DModel,
46
- RBLNPriorTransformer,
47
- RBLNSD3Transformer2DModel,
48
- )
49
- from .unets import (
50
- RBLNUNet2DConditionModel,
51
- )
40
+ from .transformers import RBLNCosmosTransformer3DModel, RBLNPriorTransformer, RBLNSD3Transformer2DModel
41
+ from .unets import RBLNUNet2DConditionModel
52
42
  else:
53
43
  import sys
54
44
 
@@ -63,11 +63,7 @@ if TYPE_CHECKING:
63
63
  RBLNStableDiffusionXLControlNetImg2ImgPipeline,
64
64
  RBLNStableDiffusionXLControlNetPipeline,
65
65
  )
66
- from .cosmos import (
67
- RBLNCosmosSafetyChecker,
68
- RBLNCosmosTextToWorldPipeline,
69
- RBLNCosmosVideoToWorldPipeline,
70
- )
66
+ from .cosmos import RBLNCosmosSafetyChecker, RBLNCosmosTextToWorldPipeline, RBLNCosmosVideoToWorldPipeline
71
67
  from .kandinsky2_2 import (
72
68
  RBLNKandinskyV22CombinedPipeline,
73
69
  RBLNKandinskyV22Img2ImgCombinedPipeline,
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNAutoConfig, RBLNModelConfig
18
18
  from ....transformers import RBLNLlamaForCausalLMConfig, RBLNSiglipVisionModelConfig
@@ -56,11 +56,11 @@ class RBLNCosmosSafetyCheckerConfig(RBLNModelConfig):
56
56
  Configuration class for RBLN Cosmos Safety Checker.
57
57
  """
58
58
 
59
- submodules = ["aegis", "video_safety_model", "face_blur_filter", "siglip_encoder"]
59
+ submodules = ["llamaguard3", "video_safety_model", "face_blur_filter", "siglip_encoder"]
60
60
 
61
61
  def __init__(
62
62
  self,
63
- aegis: Optional[RBLNModelConfig] = None,
63
+ llamaguard3: Optional[RBLNModelConfig] = None,
64
64
  video_safety_model: Optional[RBLNModelConfig] = None,
65
65
  face_blur_filter: Optional[RBLNModelConfig] = None,
66
66
  siglip_encoder: Optional[RBLNSiglipVisionModelConfig] = None,
@@ -69,19 +69,24 @@ class RBLNCosmosSafetyCheckerConfig(RBLNModelConfig):
69
69
  image_size: Optional[Tuple[int, int]] = None,
70
70
  height: Optional[int] = None,
71
71
  width: Optional[int] = None,
72
- **kwargs: Dict[str, Any],
72
+ max_seq_len: Optional[int] = None,
73
+ **kwargs: Any,
73
74
  ):
74
75
  super().__init__(**kwargs)
75
76
  if height is not None and width is not None:
76
77
  image_size = (height, width)
77
78
 
79
+ if max_seq_len is None:
80
+ max_seq_len = 512
81
+
78
82
  tensor_parallel_size = kwargs.get("tensor_parallel_size")
79
83
 
80
- self.aegis = self.init_submodule_config(
84
+ self.llamaguard3 = self.init_submodule_config(
81
85
  RBLNLlamaForCausalLMConfig,
82
- aegis,
86
+ llamaguard3,
83
87
  batch_size=batch_size,
84
88
  tensor_parallel_size=tensor_parallel_size,
89
+ max_seq_len=max_seq_len,
85
90
  )
86
91
 
87
92
  self.siglip_encoder = self.init_submodule_config(
@@ -33,9 +33,9 @@ if is_cosmos_guardrail_available():
33
33
  from cosmos_guardrail import CosmosSafetyChecker
34
34
  from cosmos_guardrail.cosmos_guardrail import (
35
35
  COSMOS_GUARDRAIL_CHECKPOINT,
36
- Aegis,
37
36
  Blocklist,
38
37
  GuardrailRunner,
38
+ LlamaGuard3,
39
39
  ModelConfig,
40
40
  RetinaFaceFilter,
41
41
  SafetyClassifier,
@@ -55,7 +55,7 @@ else:
55
55
 
56
56
  COSMOS_GUARDRAIL_CHECKPOINT = None
57
57
 
58
- class Aegis(FailToImportCosmosGuardrail): ...
58
+ class LlamaGuard3(FailToImportCosmosGuardrail): ...
59
59
 
60
60
  class Blocklist(FailToImportCosmosGuardrail): ...
61
61
 
@@ -312,33 +312,31 @@ class RBLNVideoContentSafetyFilter(VideoContentSafetyFilter):
312
312
  self.encoder.save_pretrained(checkpoint_id)
313
313
 
314
314
 
315
- class RBLNAegis(Aegis):
315
+ class RBLNLlamaGuard3(LlamaGuard3):
316
316
  def __init__(
317
317
  self,
318
318
  checkpoint_id: str = COSMOS_GUARDRAIL_CHECKPOINT,
319
- base_model_id: str = "meta-llama/LlamaGuard-7b",
320
- aegis_adapter: str = "nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0",
319
+ base_model_id: str = "meta-llama/Llama-Guard-3-8B",
321
320
  rbln_config: Optional[RBLNCosmosSafetyCheckerConfig] = None,
322
321
  ) -> None:
323
322
  if is_compiled_dir(checkpoint_id):
324
323
  torch.nn.Module.__init__(self)
325
- cache_dir = pathlib.Path(checkpoint_id) / "aegis"
324
+ cache_dir = pathlib.Path(checkpoint_id) / "llamaguard3"
326
325
  self.tokenizer = AutoTokenizer.from_pretrained(cache_dir)
327
- self.model = RBLNAutoModelForCausalLM.from_pretrained(cache_dir, rbln_config=rbln_config.aegis)
326
+ self.model = RBLNAutoModelForCausalLM.from_pretrained(cache_dir, rbln_config=rbln_config.llamaguard3)
328
327
 
329
328
  else:
330
- super().__init__(checkpoint_id, base_model_id, aegis_adapter)
331
- model = self.model.merge_and_unload() # peft merge
329
+ super().__init__(checkpoint_id, base_model_id)
330
+ model = self.model
332
331
  del self.model
333
-
334
- self.model = RBLNAutoModelForCausalLM.from_model(model, rbln_config=rbln_config.aegis)
332
+ self.model = RBLNAutoModelForCausalLM.from_model(model, rbln_config=rbln_config.llamaguard3)
335
333
 
336
334
  self.rbln_config = rbln_config
337
335
  self.dtype = torch.bfloat16
338
336
  self.device = torch.device("cpu")
339
337
 
340
338
  def save_pretrained(self, checkpoint_id: str):
341
- cache_dir = pathlib.Path(checkpoint_id) / "aegis"
339
+ cache_dir = pathlib.Path(checkpoint_id) / "llamaguard3"
342
340
  self.model.save_pretrained(cache_dir)
343
341
  self.tokenizer.save_pretrained(cache_dir)
344
342
 
@@ -351,8 +349,7 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
351
349
  def __init__(
352
350
  self,
353
351
  checkpoint_id: str = COSMOS_GUARDRAIL_CHECKPOINT,
354
- aegis_model_id: str = "meta-llama/LlamaGuard-7b",
355
- aegis_adapter_id: str = "nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0",
352
+ llamaguard_model_id: str = "meta-llama/Llama-Guard-3-8B",
356
353
  rbln_config: Optional[RBLNCosmosSafetyCheckerConfig] = None,
357
354
  ) -> None:
358
355
  torch.nn.Module.__init__(self)
@@ -369,10 +366,9 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
369
366
  self.text_guardrail = GuardrailRunner(
370
367
  safety_models=[
371
368
  Blocklist(COSMOS_GUARDRAIL_CHECKPOINT), # Changed since it cannot be saved
372
- RBLNAegis(
369
+ RBLNLlamaGuard3(
373
370
  checkpoint_id=checkpoint_id,
374
- base_model_id=aegis_model_id,
375
- aegis_adapter=aegis_adapter_id,
371
+ base_model_id=llamaguard_model_id,
376
372
  rbln_config=rbln_config,
377
373
  ),
378
374
  ]
@@ -387,7 +383,7 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
387
383
 
388
384
  def save_pretrained(self, save_dir: str):
389
385
  for text_safety_models in self.text_guardrail.safety_models:
390
- if isinstance(text_safety_models, RBLNAegis):
386
+ if isinstance(text_safety_models, RBLNLlamaGuard3):
391
387
  text_safety_models.save_pretrained(save_dir)
392
388
 
393
389
  for video_safety_models in self.video_guardrail.safety_models:
@@ -87,7 +87,7 @@ class RBLNCosmosTextToWorldPipeline(RBLNDiffusionMixin, CosmosTextToWorldPipelin
87
87
  export: bool = False,
88
88
  safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
89
89
  rbln_config: Dict[str, Any] = {},
90
- **kwargs: Dict[str, Any],
90
+ **kwargs: Any,
91
91
  ):
92
92
  rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
93
93
  if safety_checker is None and export:
@@ -87,7 +87,7 @@ class RBLNCosmosVideoToWorldPipeline(RBLNDiffusionMixin, CosmosVideoToWorldPipel
87
87
  export: bool = False,
88
88
  safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
89
89
  rbln_config: Dict[str, Any] = {},
90
- **kwargs: Dict[str, Any],
90
+ **kwargs: Any,
91
91
  ):
92
92
  rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
93
93
  if safety_checker is None and export:
@@ -22,12 +22,7 @@ from diffusers import (
22
22
  UNet2DConditionModel,
23
23
  VQModel,
24
24
  )
25
- from transformers import (
26
- CLIPImageProcessor,
27
- CLIPTextModelWithProjection,
28
- CLIPTokenizer,
29
- CLIPVisionModelWithProjection,
30
- )
25
+ from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTokenizer, CLIPVisionModelWithProjection
31
26
 
32
27
  from ...configurations import RBLNKandinskyV22CombinedPipelineConfig
33
28
  from ...modeling_diffusers import RBLNDiffusionMixin
optimum/rbln/modeling.py CHANGED
@@ -78,7 +78,7 @@ class RBLNModel(RBLNBaseModel):
78
78
  rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
79
79
  model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
80
80
  subfolder: str = "",
81
- **kwargs: Dict[str, Any],
81
+ **kwargs: Any,
82
82
  ) -> "RBLNModel":
83
83
  """
84
84
  Converts and compiles a pre-trained HuggingFace library model into a RBLN model.
@@ -241,7 +241,7 @@ class RBLNModel(RBLNBaseModel):
241
241
  for compiled_model in compiled_models
242
242
  ]
243
243
 
244
- def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Dict[str, Any]) -> Any:
244
+ def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Any) -> Any:
245
245
  """
246
246
  Defines the forward pass of the RBLN model, providing a drop-in replacement for HuggingFace PreTrainedModel.
247
247
 
@@ -348,7 +348,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
348
348
  model_id: Union[str, Path],
349
349
  export: bool = False,
350
350
  rbln_config: Optional[Union[Dict, RBLNModelConfig]] = None,
351
- **kwargs: Dict[str, Any],
351
+ **kwargs: Any,
352
352
  ) -> "RBLNBaseModel":
353
353
  """
354
354
  The `from_pretrained()` function is utilized in its standard form as in the HuggingFace transformers library.
@@ -523,10 +523,18 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
523
523
  # First copy everything to a temporary directory
524
524
  shutil.copytree(real_save_dir, tmp_dir)
525
525
 
526
- # If everything succeeded, atomically replace the target directory
526
+ # If everything succeeded, move files to target directory
527
527
  if os.path.exists(save_directory_path):
528
- shutil.rmtree(save_directory_path)
529
- os.rename(tmp_dir, save_directory_path)
528
+ # Move files from tmp_dir to existing directory (overwrite existing files)
529
+ for item in os.listdir(tmp_dir):
530
+ src_path = os.path.join(tmp_dir, item)
531
+ dst_path = os.path.join(save_directory_path, item)
532
+ shutil.move(src_path, dst_path)
533
+ # Clean up empty tmp_dir
534
+ os.rmdir(tmp_dir)
535
+ else:
536
+ # If target doesn't exist, just rename tmp_dir to target
537
+ os.rename(tmp_dir, save_directory_path)
530
538
 
531
539
  except Exception as e:
532
540
  # Clean up the temporary directory if anything fails