sinapsis-huggingface 0.2.10__py3-none-any.whl → 0.2.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (14) hide show
  1. {sinapsis_huggingface-0.2.10.dist-info → sinapsis_huggingface-0.2.11.dist-info}/METADATA +1 -1
  2. {sinapsis_huggingface-0.2.10.dist-info → sinapsis_huggingface-0.2.11.dist-info}/RECORD +14 -14
  3. sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/base_diffusers.py +37 -6
  4. sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_video_gen_xl_diffusers.py +30 -3
  5. sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/inpainting_diffusers.py +1 -1
  6. sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/base_transformers.py +29 -10
  7. sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/image_to_text_transformers.py +37 -4
  8. sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/speech_to_text_transformers.py +43 -8
  9. sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/summarization_transformers.py +41 -6
  10. sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/text_to_speech_transformers.py +5 -3
  11. sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/translation_transformers.py +26 -5
  12. {sinapsis_huggingface-0.2.10.dist-info → sinapsis_huggingface-0.2.11.dist-info}/WHEEL +0 -0
  13. {sinapsis_huggingface-0.2.10.dist-info → sinapsis_huggingface-0.2.11.dist-info}/licenses/LICENSE +0 -0
  14. {sinapsis_huggingface-0.2.10.dist-info → sinapsis_huggingface-0.2.11.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sinapsis-huggingface
3
- Version: 0.2.10
3
+ Version: 0.2.11
4
4
  Summary: Package for HuggingFace-based templates
5
5
  Author-email: SinapsisAI <dev@sinapsis.tech>
6
6
  Project-URL: Homepage, https://sinapsis.tech
@@ -1,12 +1,12 @@
1
- sinapsis_huggingface-0.2.10.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
1
+ sinapsis_huggingface-0.2.11.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
2
2
  sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/helpers/tags.py,sha256=TFmVD7r70vKmpNqSweVGme4riZZiRQWIfxySTexyJp8,522
5
5
  sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/__init__.py,sha256=9FHbS4hse9WIE-1a5jJlG-23gB3wahlULANJAWQ464c,947
6
- sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/base_diffusers.py,sha256=xQgt6ehlJ5ESNFgqWbMbL31sTCLJwz3zJNNNATooPw4,8745
6
+ sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/base_diffusers.py,sha256=OOb7xwHXnPifWWknrdYsk9i_U2dSdnBaeF6qDcaZ6xQ,10242
7
7
  sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_image_diffusers.py,sha256=OYT5fZBzCZoW7WTFi9kpvibGJw8wHTMm_O0eu74CRT4,6595
8
- sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_video_gen_xl_diffusers.py,sha256=XVm3HD1WtgteviwrVtvVyzNShKK8G8J5Nb_8iKdf74c,2394
9
- sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/inpainting_diffusers.py,sha256=9-y2nslRPvUxcx8A7CpYEkERhOHUI2g-VKuK1Zk_sFI,13807
8
+ sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_video_gen_xl_diffusers.py,sha256=tV5Ebv3-Qh6684WZQ7phE1B4QUGq0espICJFponhZ60,3590
9
+ sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/inpainting_diffusers.py,sha256=aiGMXphGRjqBWDFnAanp-Iv-P_igmI6vj0Phg5JvZ3E,13800
10
10
  sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/text_to_image_diffusers.py,sha256=LUuI3A4MXcHZrq7mCPvkgmswRNicmIvhLiZ6bdNTpqM,3360
11
11
  sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -26,18 +26,18 @@ sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/
26
26
  sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/tags.py,sha256=F6zVOBh8mVnl9AG0s_t1ftZ-1Fg1RoO3zgng9PxfLWk,652
27
27
  sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/text_to_sentences.py,sha256=teaJXoTAVzGwar9gxenBabkA9VBJd-VAxsNXlzkKMuU,1676
28
28
  sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/__init__.py,sha256=3BgUm6C_tRgzxh2ADMBcu6OHzR-U5Tl1eFVtU0PwxB0,1095
29
- sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/base_transformers.py,sha256=hlcI9NhjIyfEsbM2OhkCUvF5KDeBkQwdzrwJ4MnoKoM,7008
30
- sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/image_to_text_transformers.py,sha256=SMk_D1L9mX5UU-GY2NxJz7eqXSpXk2b71NSxz5tgPAY,3088
31
- sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/speech_to_text_transformers.py,sha256=d-RfLRVMoY36Reo-fgXhVjRjNRpcSgO_nSbDe-VPCFY,2771
32
- sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/summarization_transformers.py,sha256=RodHYOmTwXX1CEnFkVpK2RNczCVoMoHpSLx4CSdGM9g,2362
33
- sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/text_to_speech_transformers.py,sha256=VzsyVtNkfGHtX8nYE0uCTwG0qgJLgUQt6CAU4pJZkzA,6337
34
- sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/translation_transformers.py,sha256=Lq0lmUBjTWdLuLfbxINcOaAfj0tD96ew9PeuBUTZaKU,3022
29
+ sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/base_transformers.py,sha256=8vZBGQka2kSPYjbn5PbnT256_1UrJ099ygS6rQOMkmY,7647
30
+ sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/image_to_text_transformers.py,sha256=6L4IUGv9l6Z3xQi5qZB70KPIa1BBUmckS1BhkmXbrjs,4272
31
+ sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/speech_to_text_transformers.py,sha256=4lewS44TXjWoiPtcvNM9Xo1rvm8XkYTJtuduDXgbrXI,4062
32
+ sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/summarization_transformers.py,sha256=XF7HYBNaEThyFJoqdWxb3mUqBtxEtnHN6mb6jwP_UEk,3732
33
+ sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/text_to_speech_transformers.py,sha256=epvTmlxrRC1xeFiQzX0BJY9rHg6KtbYJSlKqbsLnE1k,6439
34
+ sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/translation_transformers.py,sha256=36ZjRzd8GRKq8s_b3_zkyK8BE7pmvyrtqi1_UNA0Fjg,4014
35
35
  sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_base.py,sha256=rcmnEo2o_xm7wExWfJDD9b8KQy0cgd2VeUyGNG6t4Aw,5037
36
36
  sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_detection.py,sha256=0bQB0rDU5foADo5IOe422UUJ1v6BZ3Q08esklwKOOQA,4210
37
37
  sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_inference.py,sha256=gC6RXqQi423uZr-GFpI_XzrRHjqMgJ7ry5MXC2-Y5N8,10894
38
38
  sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
39
  sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/helpers.py,sha256=IGeYd5U2xpimpwTQW_5xm1pUYB5tqHlpq-fjwBHI4gY,2187
40
- sinapsis_huggingface-0.2.10.dist-info/METADATA,sha256=0mOVbysZVoFfr0rfMbakeYQIQ_FSK7_RSxyjcd4XMP8,11401
41
- sinapsis_huggingface-0.2.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
42
- sinapsis_huggingface-0.2.10.dist-info/top_level.txt,sha256=ZxHwnMjSWRceQL_6-B7GJBPxQWdlwkba-SYMVufhj5s,133
43
- sinapsis_huggingface-0.2.10.dist-info/RECORD,,
40
+ sinapsis_huggingface-0.2.11.dist-info/METADATA,sha256=MmqGcCSRmeJ1AricO6rQIrEjKMm8Q_P0KFGvJiDm2BI,11401
41
+ sinapsis_huggingface-0.2.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
42
+ sinapsis_huggingface-0.2.11.dist-info/top_level.txt,sha256=ZxHwnMjSWRceQL_6-B7GJBPxQWdlwkba-SYMVufhj5s,133
43
+ sinapsis_huggingface-0.2.11.dist-info/RECORD,,
@@ -1,12 +1,12 @@
1
1
  # -*- coding: utf-8 -*-
2
2
 
3
3
  from abc import ABC, abstractmethod
4
- from typing import Any, Literal
4
+ from typing import Literal
5
5
 
6
6
  import numpy as np
7
7
  import torch
8
8
  from diffusers import DiffusionPipeline
9
- from pydantic import BaseModel, ConfigDict
9
+ from pydantic import BaseModel, ConfigDict, Field
10
10
  from sinapsis_core.data_containers.data_packet import DataContainer, ImagePacket
11
11
  from sinapsis_core.template_base import Template
12
12
  from sinapsis_core.template_base.base_models import (
@@ -20,6 +20,37 @@ from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
20
20
  from sinapsis_huggingface_diffusers.helpers.tags import Tags
21
21
 
22
22
 
23
+ class BaseGenerationParams(BaseModel):
24
+ """Defines the core parameters for a diffusers generation pipeline.
25
+
26
+ This model is flexible and allows any other parameters (e.g., `strength`)
27
+ to be passed, which will be forwarded to the underlying pipeline.
28
+
29
+ Attributes:
30
+ prompt (str | list[str] | None): The text prompt(s) to guide image generation.
31
+ height (int | None): The height in pixels of the generated image.
32
+ width (int | None): The width in pixels of the generated image.
33
+ negative_prompt (str | list[str] | None): Prompt(s) to guide the model away
34
+ from generating certain things.
35
+ num_inference_steps (int | None): The number of denoising steps. More steps
36
+ typically result in higher quality but are slower. Defaults to 50.
37
+ guidance_scale (float | None): Controls how much the prompt influences the
38
+ output. Higher values mean stronger adherence. Defaults to 7.5.
39
+ num_images_per_prompt (int | None): The number of images to generate per
40
+ prompt. Defaults to 1.
41
+ """
42
+
43
+ model_config = ConfigDict(extra="allow")
44
+
45
+ prompt: str | list[str] | None = None
46
+ height: int | None = None
47
+ width: int | None = None
48
+ negative_prompt: str | list[str] | None = None
49
+ num_inference_steps: int | None = 50
50
+ guidance_scale: float | None = 7.5
51
+ num_images_per_prompt: int | None = 1
52
+
53
+
23
54
  class BaseDiffusersAttributes(TemplateAttributes):
24
55
  """Configuration attributes for setting up a diffusion pipeline and generating images.
25
56
 
@@ -33,7 +64,7 @@ class BaseDiffusersAttributes(TemplateAttributes):
33
64
  device (Literal["cuda", "cpu"]): Device for computations, either "cpu" or "cuda".
34
65
  torch_dtype (Literal["float16", "float32"]): Data type for PyTorch tensors.
35
66
  enable_model_cpu_offload (bool): If True, enables CPU offloading to reduce GPU memory usage.
36
- generation_params (dict): Parameters for image generation (e.g., prompt, guidance_scale).
67
+ generation_params (BaseGenerationParams): Parameters for image generation (e.g., prompt, guidance_scale).
37
68
  seed (int | list[int] | None): Random seed(s) for reproducibility.
38
69
  overwrite_images (bool): Whether to overwrite the existing images in the container.
39
70
  Defaults to False.
@@ -44,7 +75,7 @@ class BaseDiffusersAttributes(TemplateAttributes):
44
75
  device: Literal["cuda", "cpu"]
45
76
  torch_dtype: Literal["float16", "float32"] = "float16"
46
77
  enable_model_cpu_offload: bool = False
47
- generation_params: dict[str, Any]
78
+ generation_params: BaseGenerationParams = Field(default_factory=BaseGenerationParams)
48
79
  seed: int | list[int] | None = None
49
80
  overwrite_images: bool = False
50
81
 
@@ -79,7 +110,7 @@ class BaseDiffusers(Template, ABC):
79
110
 
80
111
  self.pipeline = self._make_pipeline()
81
112
  self.pipeline.set_progress_bar_config(disable=True)
82
- self.num_images_per_prompt = self.attributes.generation_params.get("num_images_per_prompt", 1)
113
+ self.num_images_per_prompt = self.attributes.generation_params.num_images_per_prompt
83
114
  self.generator = self._make_generator()
84
115
 
85
116
  if self.attributes.enable_model_cpu_offload:
@@ -163,7 +194,7 @@ class BaseDiffusers(Template, ABC):
163
194
  inputs = {}
164
195
  output = self.pipeline(
165
196
  **inputs,
166
- **self.attributes.generation_params,
197
+ **self.attributes.generation_params.model_dump(exclude_none=True),
167
198
  generator=self.generator,
168
199
  output_type="np",
169
200
  )
@@ -1,9 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
-
3
2
  from diffusers import I2VGenXLPipeline
3
+ from pydantic import Field
4
4
  from sinapsis_core.template_base.base_models import TemplateAttributeType
5
5
 
6
6
  from sinapsis_huggingface_diffusers.helpers.tags import Tags
7
+ from sinapsis_huggingface_diffusers.templates.base_diffusers import BaseDiffusersAttributes, BaseGenerationParams
7
8
  from sinapsis_huggingface_diffusers.templates.image_to_image_diffusers import (
8
9
  ImageToImageDiffusers,
9
10
  )
@@ -12,6 +13,32 @@ ImageToVideoGenXLDiffusersUIProperties = ImageToImageDiffusers.UIProperties
12
13
  ImageToVideoGenXLDiffusersUIProperties.tags.extend([Tags.VIDEO, Tags.IMAGE_TO_VIDEO])
13
14
 
14
15
 
16
+ class ImageToVideoGenerationParams(BaseGenerationParams):
17
+ """Defines the specific parameters for image-to-video generation pipelines.
18
+
19
+ Attributes:
20
+ target_fps (int | None): The target frames per second for the generated video.
21
+ num_frames (int | None): The total number of frames to generate in the video. Defaults to 16.
22
+ num_videos_per_prompt (int | None): The number of different videos to generate
23
+ from the same input image and prompt.
24
+ """
25
+
26
+ target_fps: int | None = None
27
+ num_frames: int | None = 16
28
+ num_videos_per_prompt: int | None = None
29
+
30
+
31
+ class ImageToVideoGenXLDiffusersAttributes(BaseDiffusersAttributes):
32
+ """Defines the complete set of attributes for the ImageToVideoGenXLDiffusers template.
33
+
34
+ Attributes:
35
+ generation_params (ImageToVideoGenerationParams): Task-specific parameters for
36
+ video generation, such as `num_frames` and `target_fps`.
37
+ """
38
+
39
+ generation_params: ImageToVideoGenerationParams = Field(default_factory=ImageToVideoGenerationParams)
40
+
41
+
15
42
  class ImageToVideoGenXLDiffusers(ImageToImageDiffusers):
16
43
  """This class implements a specific template for image-to-video generation using Hugging Face's
17
44
  diffusers. The `ImageToVideoGenXLDiffusers` class inherits from the `ImageToImageDiffusers` template
@@ -42,12 +69,12 @@ class ImageToVideoGenXLDiffusers(ImageToImageDiffusers):
42
69
 
43
70
  """
44
71
 
72
+ AttributesBaseModel = ImageToVideoGenXLDiffusersAttributes
45
73
  UIProperties = ImageToVideoGenXLDiffusersUIProperties
46
- DEFAULT_NUM_FRAMES = 16
47
74
 
48
75
  def __init__(self, attributes: TemplateAttributeType) -> None:
49
76
  super().__init__(attributes)
50
- self.num_duplicates = self.attributes.generation_params.get("num_frames", self.DEFAULT_NUM_FRAMES)
77
+ self.num_duplicates = self.attributes.generation_params.num_frames
51
78
  self.requires_pil = True
52
79
  self.output_attribute = "frames"
53
80
 
@@ -327,6 +327,6 @@ class InpaintingDiffusers(ImageToImageDiffusers):
327
327
  if old_packet.annotations:
328
328
  new_packet.annotations = old_packet.annotations
329
329
  for ann in new_packet.annotations:
330
- ann.label_str = str(self.attributes.generation_params.get("prompt"))
330
+ ann.label_str = str(self.attributes.generation_params.prompt)
331
331
 
332
332
  return new_packets, old_packets
@@ -6,7 +6,7 @@ from abc import abstractmethod
6
6
  from typing import Any, Literal
7
7
 
8
8
  import torch
9
- from pydantic import Field
9
+ from pydantic import BaseModel, ConfigDict, Field
10
10
  from sinapsis_core.data_containers.data_packet import DataContainer
11
11
  from sinapsis_core.template_base import Template
12
12
  from sinapsis_core.template_base.base_models import TemplateAttributes, TemplateAttributeType, UIPropertiesMetadata
@@ -17,6 +17,29 @@ from transformers.pipelines import Pipeline
17
17
  from sinapsis_huggingface_transformers.helpers.tags import Tags
18
18
 
19
19
 
20
+ class BaseInferenceKwargs(BaseModel):
21
+ """A flexible container for keyword arguments passed during inference.
22
+
23
+ Attributes:
24
+ generate_kwargs (dict[str, Any] | None): A dictionary of advanced parameters passed directly to the
25
+ model's `generate` method for fine-tuning the pipeline generation.
26
+ """
27
+
28
+ generate_kwargs: dict[str, Any] | None = None
29
+ model_config = ConfigDict(extra="allow")
30
+
31
+
32
+ class PipelineKwargs(BaseModel):
33
+ """A flexible container for keyword arguments used to create the pipeline.
34
+
35
+ This model allows any extra parameters to be passed during pipeline instantiation.
36
+ """
37
+
38
+ device: Literal["cuda", "cpu"]
39
+ torch_dtype: Literal["float16", "float32", "auto"] = "float16"
40
+ model_config = ConfigDict(extra="allow")
41
+
42
+
20
43
  class TransformersBaseAttributes(TemplateAttributes):
21
44
  """Attributes for configuring the TransformersPipelineTemplate.
22
45
 
@@ -31,20 +54,18 @@ class TransformersBaseAttributes(TemplateAttributes):
31
54
  seed (int | None): Random seed for reproducibility. If provided, this seed will ensure
32
55
  consistent results for pipelines that involve randomness. If not provided, a random seed
33
56
  will be generated internally.
34
- pipeline_kwargs (dict[str, Any]): Keyword arguments passed during the instantiation of the
57
+ pipeline_kwargs (PipelineKwargs): Keyword arguments passed during the instantiation of the
35
58
  Hugging Face pipeline.
36
- inference_kwargs (dict[str, Any]): Keyword arguments passed during the task execution or
59
+ inference_kwargs (BaseInferenceKwargs): Keyword arguments passed during the task execution or
37
60
  inference phase. These allow dynamic customization of the task, such as `max_length`
38
61
  and `min_length` for summarization, or `max_new_tokens` for image-to-text.
39
62
  """
40
63
 
41
64
  model_path: str
42
65
  model_cache_dir: str = str(SINAPSIS_CACHE_DIR)
43
- device: Literal["cuda", "cpu"]
44
- torch_dtype: Literal["float16", "float32"] = "float16"
45
66
  seed: int | None = None
46
- pipeline_kwargs: dict[str, Any] = Field(default_factory=dict)
47
- inference_kwargs: dict[str, Any] = Field(default_factory=dict)
67
+ pipeline_kwargs: PipelineKwargs = Field(default_factory=PipelineKwargs)
68
+ inference_kwargs: BaseInferenceKwargs = Field(default_factory=BaseInferenceKwargs)
48
69
 
49
70
 
50
71
  class TransformersBase(Template):
@@ -123,9 +144,7 @@ class TransformersBase(Template):
123
144
  return pipeline(
124
145
  task=self.task,
125
146
  model=self.attributes.model_path,
126
- device=self.attributes.device,
127
- torch_dtype=self._TORCH_DTYPE.get(self.attributes.torch_dtype),
128
- **self.attributes.pipeline_kwargs,
147
+ **self.attributes.pipeline_kwargs.model_dump(),
129
148
  **kwargs,
130
149
  )
131
150
 
@@ -2,17 +2,47 @@
2
2
 
3
3
  import numpy as np
4
4
  from PIL import Image
5
+ from pydantic import Field
5
6
  from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
6
7
  from sinapsis_core.template_base.base_models import OutputTypes
7
8
 
8
9
  from sinapsis_huggingface_transformers.helpers.tags import Tags
9
- from sinapsis_huggingface_transformers.templates.base_transformers import TransformersBase
10
+ from sinapsis_huggingface_transformers.templates.base_transformers import (
11
+ BaseInferenceKwargs,
12
+ TransformersBase,
13
+ TransformersBaseAttributes,
14
+ )
10
15
 
11
16
  ImageToTextTransformersUIProperties = TransformersBase.UIProperties
12
17
  ImageToTextTransformersUIProperties.output_type = OutputTypes.TEXT
13
18
  ImageToTextTransformersUIProperties.tags.extend([Tags.IMAGE, Tags.TEXT, Tags.IMAGE_TO_TEXT])
14
19
 
15
20
 
21
+ class ImageToTextInferenceKwargs(BaseInferenceKwargs):
22
+ """Specific keyword arguments for the image-to-text pipeline.
23
+
24
+ Attributes:
25
+ max_new_tokens (int | None): The maximum number of tokens to generate in the description.
26
+ timeout (float | None): The maximum time in seconds to wait for fetching images from the web.
27
+ """
28
+
29
+ max_new_tokens: int | None = None
30
+ timeout: float | None = None
31
+
32
+
33
+ class ImageToTextTransformersAttributes(TransformersBaseAttributes):
34
+ """Defines the complete set of attributes for the ImageToTextTransformers template.
35
+
36
+ Inherits general transformer settings from TransformersBaseAttributes.
37
+
38
+ Attributes:
39
+ inference_kwargs (ImageToTextInferenceKwargs): Task-specific parameters for the image-to-text pipeline,
40
+ such as `max_new_tokens`.
41
+ """
42
+
43
+ inference_kwargs: ImageToTextInferenceKwargs = Field(default_factory=ImageToTextInferenceKwargs)
44
+
45
+
16
46
  class ImageToTextTransformers(TransformersBase):
17
47
  """ImageToTextTransformers template to generate text from an image.
18
48
 
@@ -37,6 +67,7 @@ class ImageToTextTransformers(TransformersBase):
37
67
 
38
68
  """
39
69
 
70
+ AttributesBaseModel = ImageToTextTransformersAttributes
40
71
  GENERATED_TEXT_KEY = "generated_text"
41
72
  UIProperties = ImageToTextTransformersUIProperties
42
73
 
@@ -78,7 +109,9 @@ class ImageToTextTransformers(TransformersBase):
78
109
  """
79
110
  for image_packet in container.images:
80
111
  image = self._convert_to_pil(image_packet.content)
81
- text_description = self.pipeline(image, **self.attributes.inference_kwargs)[0][self.GENERATED_TEXT_KEY]
82
- text_packet = TextPacket(content=text_description)
83
- container.texts.append(text_packet)
112
+ results = self.pipeline(image, **self.attributes.inference_kwargs.model_dump(exclude_none=True))
113
+ if results:
114
+ text_description = results[0].get(self.GENERATED_TEXT_KEY)
115
+ if text_description:
116
+ container.texts.append(TextPacket(content=text_description))
84
117
  return container
@@ -1,11 +1,17 @@
1
1
  # -*- coding: utf-8 -*-
2
+ from typing import Literal
2
3
 
3
4
  import numpy as np
5
+ from pydantic import Field
4
6
  from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
5
7
  from sinapsis_core.template_base.base_models import OutputTypes
6
8
 
7
9
  from sinapsis_huggingface_transformers.helpers.tags import Tags
8
- from sinapsis_huggingface_transformers.templates.base_transformers import TransformersBase
10
+ from sinapsis_huggingface_transformers.templates.base_transformers import (
11
+ BaseInferenceKwargs,
12
+ TransformersBase,
13
+ TransformersBaseAttributes,
14
+ )
9
15
 
10
16
  SpeechToTextTransformersUIProperties = TransformersBase.UIProperties
11
17
  SpeechToTextTransformersUIProperties.output_type = OutputTypes.TEXT
@@ -14,6 +20,30 @@ SpeechToTextTransformersUIProperties.tags.extend(
14
20
  )
15
21
 
16
22
 
23
+ class SpeechToTextInferenceKwargs(BaseInferenceKwargs):
24
+ """Specific keyword arguments for the automatic-speech-recognition pipeline.
25
+
26
+ Attributes:
27
+ return_timestamps (Literal["char", "word"] | bool | None ): If set, controls the granularity of
28
+ timestamps returned with the transcribed text. Can be "char", "word", or True for segments.
29
+ """
30
+
31
+ return_timestamps: Literal["char", "word"] | bool | None = None
32
+
33
+
34
+ class SpeechToTextTransformersAttributes(TransformersBaseAttributes):
35
+ """Defines the set of attributes for the SpeechToTextTransformers template.
36
+
37
+ Inherits general transformer settings from TransformersBaseAttributes.
38
+
39
+ Attributes:
40
+ inference_kwargs (SpeechToTextInferenceKwargs): Task-specific parameters for the speech-to-text pipeline,
41
+ such as `return_timestamps`.
42
+ """
43
+
44
+ inference_kwargs: SpeechToTextInferenceKwargs = Field(default_factory=SpeechToTextInferenceKwargs)
45
+
46
+
17
47
  class SpeechToTextTransformers(TransformersBase):
18
48
  """Template to perform speech-to-text actions
19
49
  using the HuggingFace module through the 'transformers' architecture.
@@ -40,6 +70,7 @@ class SpeechToTextTransformers(TransformersBase):
40
70
 
41
71
  """
42
72
 
73
+ AttributesBaseModel = SpeechToTextTransformersAttributes
43
74
  TEXT_KEY = "text"
44
75
  UIProperties = SpeechToTextTransformersUIProperties
45
76
 
@@ -65,11 +96,15 @@ class SpeechToTextTransformers(TransformersBase):
65
96
  for audio_packet in container.audios:
66
97
  audio = audio_packet.content
67
98
  audio = audio.astype(np.float32)
68
- transcribed_text = self.pipeline(audio, **self.attributes.inference_kwargs)[self.TEXT_KEY]
69
- transcribed_text_textpacket = TextPacket(
70
- content=transcribed_text,
71
- source=audio_packet.source,
72
- )
73
- self.logger.info(f"Speech-to-text transcription: {transcribed_text}")
74
- container.texts.append(transcribed_text_textpacket)
99
+ results = self.pipeline(audio, **self.attributes.inference_kwargs.model_dump(exclude_none=True))
100
+ if results:
101
+ transcribed_text = results.get(self.TEXT_KEY)
102
+ if transcribed_text:
103
+ self.logger.info(f"Speech-to-text transcription: {transcribed_text}")
104
+ container.texts.append(
105
+ TextPacket(
106
+ content=transcribed_text,
107
+ source=audio_packet.source,
108
+ )
109
+ )
75
110
  return container
@@ -1,16 +1,48 @@
1
1
  # -*- coding: utf-8 -*-
2
2
 
3
+ from pydantic import Field
3
4
  from sinapsis_core.data_containers.data_packet import DataContainer
4
5
  from sinapsis_core.template_base.base_models import OutputTypes
5
6
 
6
7
  from sinapsis_huggingface_transformers.helpers.tags import Tags
7
- from sinapsis_huggingface_transformers.templates.base_transformers import TransformersBase
8
+ from sinapsis_huggingface_transformers.templates.base_transformers import (
9
+ BaseInferenceKwargs,
10
+ TransformersBase,
11
+ TransformersBaseAttributes,
12
+ )
8
13
 
9
14
  SummarizationTransformersUIProperties = TransformersBase.UIProperties
10
15
  SummarizationTransformersUIProperties.output_type = OutputTypes.TEXT
11
16
  SummarizationTransformersUIProperties.tags.extend([Tags.SUMMARIZATION, Tags.TEXT])
12
17
 
13
18
 
19
+ class SummarizationInferenceKwargs(BaseInferenceKwargs):
20
+ """Specific keyword arguments for the summarization pipeline.
21
+
22
+ Attributes:
23
+ return_text (bool | None): Whether or not to include the decoded texts in the outputs.
24
+ return_tensors (bool | None): Whether or not to include the tensors of predictions.
25
+ clean_up_tokenization_spaces (bool | None): Whether or not to clean up the potential extra spaces.
26
+ """
27
+
28
+ return_text: bool | None = True
29
+ return_tensors: bool | None = False
30
+ clean_up_tokenization_spaces: bool | None = False
31
+
32
+
33
+ class SummarizationTransformersAttributes(TransformersBaseAttributes):
34
+ """Defines the complete set of attributes for the SummarizationTransformers template.
35
+
36
+ Inherits general transformer settings from TransformersBaseAttributes.
37
+
38
+ Attributes:
39
+ inference_kwargs: Task-specific parameters for the summarization pipeline,
40
+ such as `clean_up_tokenization_spaces`.
41
+ """
42
+
43
+ inference_kwargs: SummarizationInferenceKwargs = Field(default_factory=SummarizationInferenceKwargs)
44
+
45
+
14
46
  class SummarizationTransformers(TransformersBase):
15
47
  """Template for text summarization using a Hugging Face Transformers pipeline.
16
48
 
@@ -39,6 +71,7 @@ class SummarizationTransformers(TransformersBase):
39
71
 
40
72
  """
41
73
 
74
+ AttributesBaseModel = SummarizationTransformersAttributes
42
75
  SUMMARY_TEXT_KEY = "summary_text"
43
76
  UIProperties = SummarizationTransformersUIProperties
44
77
 
@@ -63,9 +96,11 @@ class SummarizationTransformers(TransformersBase):
63
96
  DataContainer: DataContainer including the summarized text.
64
97
  """
65
98
  for text_packet in container.texts:
66
- summarized_text = self.pipeline(text_packet.content, **self.attributes.inference_kwargs)[0][
67
- self.SUMMARY_TEXT_KEY
68
- ]
69
-
70
- text_packet.content = summarized_text
99
+ results = self.pipeline(
100
+ text_packet.content, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
101
+ )
102
+ if results:
103
+ summarized_text = results[0].get(self.SUMMARY_TEXT_KEY)
104
+ if summarized_text:
105
+ text_packet.content = summarized_text
71
106
  return container
@@ -1,5 +1,4 @@
1
1
  # -*- coding: utf-8 -*-
2
-
3
2
  import numpy as np
4
3
  import torch
5
4
  from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer, TextPacket
@@ -65,6 +64,7 @@ class TextToSpeechTransformers(TransformersBase):
65
64
  """
66
65
 
67
66
  AttributesBaseModel = TextToSpeechAttributes
67
+ SAMPLE_RATE_KEY = "sampling_rate"
68
68
  UIProperties = TextToSpeechTransformersUIProperties
69
69
 
70
70
  def initialize(self) -> None:
@@ -94,7 +94,7 @@ class TextToSpeechTransformers(TransformersBase):
94
94
  else {}
95
95
  )
96
96
  output = self.pipeline("Fetching sampling rate.", forward_params=forward_params)
97
- sample_rate = output.get("sampling_rate", 16000)
97
+ sample_rate = output.get(self.SAMPLE_RATE_KEY, 16000)
98
98
 
99
99
  return sample_rate
100
100
 
@@ -132,7 +132,9 @@ class TextToSpeechTransformers(TransformersBase):
132
132
  else {}
133
133
  )
134
134
  for chunk in sentences:
135
- output = self.pipeline(chunk, forward_params=forward_params, **self.attributes.inference_kwargs)
135
+ output = self.pipeline(
136
+ chunk, forward_params=forward_params, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
137
+ )
136
138
  total_audio.append(output["audio"][0] if output["audio"].ndim == 2 else output["audio"])
137
139
  if total_audio:
138
140
  total_audio = np.concatenate(total_audio)
@@ -1,11 +1,12 @@
1
1
  # -*- coding: utf-8 -*-
2
2
 
3
-
3
+ from pydantic import Field
4
4
  from sinapsis_core.data_containers.data_packet import DataContainer
5
5
  from sinapsis_core.template_base.base_models import OutputTypes
6
6
 
7
7
  from sinapsis_huggingface_transformers.helpers.tags import Tags
8
8
  from sinapsis_huggingface_transformers.templates.base_transformers import (
9
+ BaseInferenceKwargs,
9
10
  TransformersBase,
10
11
  TransformersBaseAttributes,
11
12
  )
@@ -15,14 +16,31 @@ TranslationTransformersUIProperties.output_type = OutputTypes.TEXT
15
16
  TranslationTransformersUIProperties.tags.extend([Tags.LANGUAGE, Tags.TRANSLATION])
16
17
 
17
18
 
19
+ class TranslationInferenceKwargs(BaseInferenceKwargs):
20
+ """Specific keyword arguments for the translation pipeline.
21
+
22
+ Attributes:
23
+ return_text (bool | None): Whether or not to include the decoded texts in the outputs.
24
+ return_tensors (bool | None): Whether or not to include the tensors of predictions.
25
+ clean_up_tokenization_spaces (bool | None): Whether or not to clean up the potential extra spaces.
26
+ """
27
+
28
+ return_text: bool | None = True
29
+ return_tensors: bool | None = False
30
+ clean_up_tokenization_spaces: bool | None = False
31
+
32
+
18
33
  class TranslationTransformersAttributes(TransformersBaseAttributes):
19
34
  """Attributes for the transformers pipeline translation task.
20
35
 
21
36
  Attributes:
37
+ inference_kwargs: Task-specific parameters for the transaltion pipeline,
38
+ such as `clean_up_tokenization_spaces`.
22
39
  source_language (str): The language code of the source language (e.g., "en" for English).
23
40
  target_language (str): The language code of the target language (e.g., "fr" for French).
24
41
  """
25
42
 
43
+ inference_kwargs: TranslationInferenceKwargs = Field(default_factory=TranslationInferenceKwargs)
26
44
  source_language: str
27
45
  target_language: str
28
46
 
@@ -79,8 +97,11 @@ class TranslationTransformers(TransformersBase):
79
97
  DataContainer: DataContainer including the translated text.
80
98
  """
81
99
  for text_packet in container.texts:
82
- translated_text = self.pipeline(text_packet.content, **self.attributes.inference_kwargs)[0][
83
- self.TRANSLATION_TEXT_KEY
84
- ]
85
- text_packet.content = translated_text
100
+ results = self.pipeline(
101
+ text_packet.content, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
102
+ )
103
+ if results:
104
+ translated_text = results[0].get(self.TRANSLATION_TEXT_KEY)
105
+ if translated_text:
106
+ text_packet.content = translated_text
86
107
  return container