PyPI - sinapsis-huggingface - Versions diffs - 0.2.10__py3-none-any.whl → 0.2.11__py3-none-any.whl - Mend

sinapsis-huggingface 0.2.10py3-none-any.whl → 0.2.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{sinapsis_huggingface-0.2.10.dist-info → sinapsis_huggingface-0.2.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sinapsis-huggingface
-Version: 0.2.10
+Version: 0.2.11
 Summary: Package for HuggingFace-based templates
 Author-email: SinapsisAI <dev@sinapsis.tech>
 Project-URL: Homepage, https://sinapsis.tech

{sinapsis_huggingface-0.2.10.dist-info → sinapsis_huggingface-0.2.11.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-sinapsis_huggingface-0.2.10.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
+sinapsis_huggingface-0.2.11.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
 sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/helpers/tags.py,sha256=TFmVD7r70vKmpNqSweVGme4riZZiRQWIfxySTexyJp8,522
 sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/__init__.py,sha256=9FHbS4hse9WIE-1a5jJlG-23gB3wahlULANJAWQ464c,947
-sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/base_diffusers.py,sha256=xQgt6ehlJ5ESNFgqWbMbL31sTCLJwz3zJNNNATooPw4,8745
+sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/base_diffusers.py,sha256=OOb7xwHXnPifWWknrdYsk9i_U2dSdnBaeF6qDcaZ6xQ,10242
 sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_image_diffusers.py,sha256=OYT5fZBzCZoW7WTFi9kpvibGJw8wHTMm_O0eu74CRT4,6595
-sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_video_gen_xl_diffusers.py,sha256=XVm3HD1WtgteviwrVtvVyzNShKK8G8J5Nb_8iKdf74c,2394
-sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/inpainting_diffusers.py,sha256=9-y2nslRPvUxcx8A7CpYEkERhOHUI2g-VKuK1Zk_sFI,13807
+sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_video_gen_xl_diffusers.py,sha256=tV5Ebv3-Qh6684WZQ7phE1B4QUGq0espICJFponhZ60,3590
+sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/inpainting_diffusers.py,sha256=aiGMXphGRjqBWDFnAanp-Iv-P_igmI6vj0Phg5JvZ3E,13800
 sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/text_to_image_diffusers.py,sha256=LUuI3A4MXcHZrq7mCPvkgmswRNicmIvhLiZ6bdNTpqM,3360
 sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -26,18 +26,18 @@ sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/
 sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/tags.py,sha256=F6zVOBh8mVnl9AG0s_t1ftZ-1Fg1RoO3zgng9PxfLWk,652
 sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/text_to_sentences.py,sha256=teaJXoTAVzGwar9gxenBabkA9VBJd-VAxsNXlzkKMuU,1676
 sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/__init__.py,sha256=3BgUm6C_tRgzxh2ADMBcu6OHzR-U5Tl1eFVtU0PwxB0,1095
-sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/base_transformers.py,sha256=hlcI9NhjIyfEsbM2OhkCUvF5KDeBkQwdzrwJ4MnoKoM,7008
-sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/image_to_text_transformers.py,sha256=SMk_D1L9mX5UU-GY2NxJz7eqXSpXk2b71NSxz5tgPAY,3088
-sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/speech_to_text_transformers.py,sha256=d-RfLRVMoY36Reo-fgXhVjRjNRpcSgO_nSbDe-VPCFY,2771
-sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/summarization_transformers.py,sha256=RodHYOmTwXX1CEnFkVpK2RNczCVoMoHpSLx4CSdGM9g,2362
-sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/text_to_speech_transformers.py,sha256=VzsyVtNkfGHtX8nYE0uCTwG0qgJLgUQt6CAU4pJZkzA,6337
-sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/translation_transformers.py,sha256=Lq0lmUBjTWdLuLfbxINcOaAfj0tD96ew9PeuBUTZaKU,3022
+sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/base_transformers.py,sha256=8vZBGQka2kSPYjbn5PbnT256_1UrJ099ygS6rQOMkmY,7647
+sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/image_to_text_transformers.py,sha256=6L4IUGv9l6Z3xQi5qZB70KPIa1BBUmckS1BhkmXbrjs,4272
+sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/speech_to_text_transformers.py,sha256=4lewS44TXjWoiPtcvNM9Xo1rvm8XkYTJtuduDXgbrXI,4062
+sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/summarization_transformers.py,sha256=XF7HYBNaEThyFJoqdWxb3mUqBtxEtnHN6mb6jwP_UEk,3732
+sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/text_to_speech_transformers.py,sha256=epvTmlxrRC1xeFiQzX0BJY9rHg6KtbYJSlKqbsLnE1k,6439
+sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/translation_transformers.py,sha256=36ZjRzd8GRKq8s_b3_zkyK8BE7pmvyrtqi1_UNA0Fjg,4014
 sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_base.py,sha256=rcmnEo2o_xm7wExWfJDD9b8KQy0cgd2VeUyGNG6t4Aw,5037
 sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_detection.py,sha256=0bQB0rDU5foADo5IOe422UUJ1v6BZ3Q08esklwKOOQA,4210
 sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_inference.py,sha256=gC6RXqQi423uZr-GFpI_XzrRHjqMgJ7ry5MXC2-Y5N8,10894
 sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/helpers.py,sha256=IGeYd5U2xpimpwTQW_5xm1pUYB5tqHlpq-fjwBHI4gY,2187
-sinapsis_huggingface-0.2.10.dist-info/METADATA,sha256=0mOVbysZVoFfr0rfMbakeYQIQ_FSK7_RSxyjcd4XMP8,11401
-sinapsis_huggingface-0.2.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-sinapsis_huggingface-0.2.10.dist-info/top_level.txt,sha256=ZxHwnMjSWRceQL_6-B7GJBPxQWdlwkba-SYMVufhj5s,133
-sinapsis_huggingface-0.2.10.dist-info/RECORD,,
+sinapsis_huggingface-0.2.11.dist-info/METADATA,sha256=MmqGcCSRmeJ1AricO6rQIrEjKMm8Q_P0KFGvJiDm2BI,11401
+sinapsis_huggingface-0.2.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+sinapsis_huggingface-0.2.11.dist-info/top_level.txt,sha256=ZxHwnMjSWRceQL_6-B7GJBPxQWdlwkba-SYMVufhj5s,133
+sinapsis_huggingface-0.2.11.dist-info/RECORD,,

sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/base_diffusers.py CHANGED Viewed

@@ -1,12 +1,12 @@
 # -*- coding: utf-8 -*-
 from abc import ABC, abstractmethod
-from typing import Any, Literal
+from typing import Literal
 import numpy as np
 import torch
 from diffusers import DiffusionPipeline
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, Field
 from sinapsis_core.data_containers.data_packet import DataContainer, ImagePacket
 from sinapsis_core.template_base import Template
 from sinapsis_core.template_base.base_models import (
@@ -20,6 +20,37 @@ from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
 from sinapsis_huggingface_diffusers.helpers.tags import Tags
+class BaseGenerationParams(BaseModel):
+    """Defines the core parameters for a diffusers generation pipeline.
+    This model is flexible and allows any other parameters (e.g., `strength`)
+    to be passed, which will be forwarded to the underlying pipeline.
+    Attributes:
+        prompt (str | list[str] | None): The text prompt(s) to guide image generation.
+        height (int | None): The height in pixels of the generated image.
+        width (int | None): The width in pixels of the generated image.
+        negative_prompt (str | list[str] | None): Prompt(s) to guide the model away
+            from generating certain things.
+        num_inference_steps (int | None): The number of denoising steps. More steps
+            typically result in higher quality but are slower. Defaults to 50.
+        guidance_scale (float | None): Controls how much the prompt influences the
+            output. Higher values mean stronger adherence. Defaults to 7.5.
+        num_images_per_prompt (int | None): The number of images to generate per
+            prompt. Defaults to 1.
+    """
+    model_config = ConfigDict(extra="allow")
+    prompt: str | list[str] | None = None
+    height: int | None = None
+    width: int | None = None
+    negative_prompt: str | list[str] | None = None
+    num_inference_steps: int | None = 50
+    guidance_scale: float | None = 7.5
+    num_images_per_prompt: int | None = 1
 class BaseDiffusersAttributes(TemplateAttributes):
     """Configuration attributes for setting up a diffusion pipeline and generating images.
@@ -33,7 +64,7 @@ class BaseDiffusersAttributes(TemplateAttributes):
         device (Literal["cuda", "cpu"]): Device for computations, either "cpu" or "cuda".
         torch_dtype (Literal["float16", "float32"]): Data type for PyTorch tensors.
         enable_model_cpu_offload (bool): If True, enables CPU offloading to reduce GPU memory usage.
-        generation_params (dict): Parameters for image generation (e.g., prompt, guidance_scale).
+        generation_params (BaseGenerationParams): Parameters for image generation (e.g., prompt, guidance_scale).
         seed (int | list[int] | None): Random seed(s) for reproducibility.
         overwrite_images (bool): Whether to overwrite the existing images in the container.
             Defaults to False.
@@ -44,7 +75,7 @@ class BaseDiffusersAttributes(TemplateAttributes):
     device: Literal["cuda", "cpu"]
     torch_dtype: Literal["float16", "float32"] = "float16"
     enable_model_cpu_offload: bool = False
-    generation_params: dict[str, Any]
+    generation_params: BaseGenerationParams = Field(default_factory=BaseGenerationParams)
     seed: int | list[int] | None = None
     overwrite_images: bool = False
@@ -79,7 +110,7 @@ class BaseDiffusers(Template, ABC):
         self.pipeline = self._make_pipeline()
         self.pipeline.set_progress_bar_config(disable=True)
-        self.num_images_per_prompt = self.attributes.generation_params.get("num_images_per_prompt", 1)
+        self.num_images_per_prompt = self.attributes.generation_params.num_images_per_prompt
         self.generator = self._make_generator()
         if self.attributes.enable_model_cpu_offload:
@@ -163,7 +194,7 @@ class BaseDiffusers(Template, ABC):
             inputs = {}
         output = self.pipeline(
             **inputs,
-            **self.attributes.generation_params,
+            **self.attributes.generation_params.model_dump(exclude_none=True),
             generator=self.generator,
             output_type="np",
         )

sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_video_gen_xl_diffusers.py CHANGED Viewed

@@ -1,9 +1,10 @@
 # -*- coding: utf-8 -*-
 from diffusers import I2VGenXLPipeline
+from pydantic import Field
 from sinapsis_core.template_base.base_models import TemplateAttributeType
 from sinapsis_huggingface_diffusers.helpers.tags import Tags
+from sinapsis_huggingface_diffusers.templates.base_diffusers import BaseDiffusersAttributes, BaseGenerationParams
 from sinapsis_huggingface_diffusers.templates.image_to_image_diffusers import (
     ImageToImageDiffusers,
 )
@@ -12,6 +13,32 @@ ImageToVideoGenXLDiffusersUIProperties = ImageToImageDiffusers.UIProperties
 ImageToVideoGenXLDiffusersUIProperties.tags.extend([Tags.VIDEO, Tags.IMAGE_TO_VIDEO])
+class ImageToVideoGenerationParams(BaseGenerationParams):
+    """Defines the specific parameters for image-to-video generation pipelines.
+    Attributes:
+        target_fps (int | None): The target frames per second for the generated video.
+        num_frames (int | None): The total number of frames to generate in the video. Defaults to 16.
+        num_videos_per_prompt (int | None): The number of different videos to generate
+            from the same input image and prompt.
+    """
+    target_fps: int | None = None
+    num_frames: int | None = 16
+    num_videos_per_prompt: int | None = None
+class ImageToVideoGenXLDiffusersAttributes(BaseDiffusersAttributes):
+    """Defines the complete set of attributes for the ImageToVideoGenXLDiffusers template.
+    Attributes:
+        generation_params (ImageToVideoGenerationParams): Task-specific parameters for
+            video generation, such as `num_frames` and `target_fps`.
+    """
+    generation_params: ImageToVideoGenerationParams = Field(default_factory=ImageToVideoGenerationParams)
 class ImageToVideoGenXLDiffusers(ImageToImageDiffusers):
     """This class implements a specific template for image-to-video generation using Hugging Face's
     diffusers. The `ImageToVideoGenXLDiffusers` class inherits from the `ImageToImageDiffusers` template
@@ -42,12 +69,12 @@ class ImageToVideoGenXLDiffusers(ImageToImageDiffusers):
     """
+    AttributesBaseModel = ImageToVideoGenXLDiffusersAttributes
     UIProperties = ImageToVideoGenXLDiffusersUIProperties
-    DEFAULT_NUM_FRAMES = 16
     def __init__(self, attributes: TemplateAttributeType) -> None:
         super().__init__(attributes)
-        self.num_duplicates = self.attributes.generation_params.get("num_frames", self.DEFAULT_NUM_FRAMES)
+        self.num_duplicates = self.attributes.generation_params.num_frames
         self.requires_pil = True
         self.output_attribute = "frames"

sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/inpainting_diffusers.py CHANGED Viewed

@@ -327,6 +327,6 @@ class InpaintingDiffusers(ImageToImageDiffusers):
             if old_packet.annotations:
                 new_packet.annotations = old_packet.annotations
                 for ann in new_packet.annotations:
-                    ann.label_str = str(self.attributes.generation_params.get("prompt"))
+                    ann.label_str = str(self.attributes.generation_params.prompt)
         return new_packets, old_packets

sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/base_transformers.py CHANGED Viewed

@@ -6,7 +6,7 @@ from abc import abstractmethod
 from typing import Any, Literal
 import torch
-from pydantic import Field
+from pydantic import BaseModel, ConfigDict, Field
 from sinapsis_core.data_containers.data_packet import DataContainer
 from sinapsis_core.template_base import Template
 from sinapsis_core.template_base.base_models import TemplateAttributes, TemplateAttributeType, UIPropertiesMetadata
@@ -17,6 +17,29 @@ from transformers.pipelines import Pipeline
 from sinapsis_huggingface_transformers.helpers.tags import Tags
+class BaseInferenceKwargs(BaseModel):
+    """A flexible container for keyword arguments passed during inference.
+    Attributes:
+        generate_kwargs (dict[str, Any] | None): A dictionary of advanced parameters passed directly to the
+            model's `generate` method for fine-tuning the pipeline generation.
+    """
+    generate_kwargs: dict[str, Any] | None = None
+    model_config = ConfigDict(extra="allow")
+class PipelineKwargs(BaseModel):
+    """A flexible container for keyword arguments used to create the pipeline.
+    This model allows any extra parameters to be passed during pipeline instantiation.
+    """
+    device: Literal["cuda", "cpu"]
+    torch_dtype: Literal["float16", "float32", "auto"] = "float16"
+    model_config = ConfigDict(extra="allow")
 class TransformersBaseAttributes(TemplateAttributes):
     """Attributes for configuring the TransformersPipelineTemplate.
@@ -31,20 +54,18 @@ class TransformersBaseAttributes(TemplateAttributes):
         seed (int | None): Random seed for reproducibility. If provided, this seed will ensure
             consistent results for pipelines that involve randomness. If not provided, a random seed
             will be generated internally.
-        pipeline_kwargs (dict[str, Any]): Keyword arguments passed during the instantiation of the
+        pipeline_kwargs (PipelineKwargs): Keyword arguments passed during the instantiation of the
             Hugging Face pipeline.
-        inference_kwargs (dict[str, Any]): Keyword arguments passed during the task execution or
+        inference_kwargs (BaseInferenceKwargs): Keyword arguments passed during the task execution or
             inference phase. These allow dynamic customization of the task, such as `max_length`
             and `min_length` for summarization, or `max_new_tokens` for image-to-text.
     """
     model_path: str
     model_cache_dir: str = str(SINAPSIS_CACHE_DIR)
-    device: Literal["cuda", "cpu"]
-    torch_dtype: Literal["float16", "float32"] = "float16"
     seed: int | None = None
-    pipeline_kwargs: dict[str, Any] = Field(default_factory=dict)
-    inference_kwargs: dict[str, Any] = Field(default_factory=dict)
+    pipeline_kwargs: PipelineKwargs = Field(default_factory=PipelineKwargs)
+    inference_kwargs: BaseInferenceKwargs = Field(default_factory=BaseInferenceKwargs)
 class TransformersBase(Template):
@@ -123,9 +144,7 @@ class TransformersBase(Template):
         return pipeline(
             task=self.task,
             model=self.attributes.model_path,
-            device=self.attributes.device,
-            torch_dtype=self._TORCH_DTYPE.get(self.attributes.torch_dtype),
-            **self.attributes.pipeline_kwargs,
+            **self.attributes.pipeline_kwargs.model_dump(),
             **kwargs,
         )

sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/image_to_text_transformers.py CHANGED Viewed

@@ -2,17 +2,47 @@
 import numpy as np
 from PIL import Image
+from pydantic import Field
 from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
 from sinapsis_core.template_base.base_models import OutputTypes
 from sinapsis_huggingface_transformers.helpers.tags import Tags
-from sinapsis_huggingface_transformers.templates.base_transformers import TransformersBase
+from sinapsis_huggingface_transformers.templates.base_transformers import (
+    BaseInferenceKwargs,
+    TransformersBase,
+    TransformersBaseAttributes,
+)
 ImageToTextTransformersUIProperties = TransformersBase.UIProperties
 ImageToTextTransformersUIProperties.output_type = OutputTypes.TEXT
 ImageToTextTransformersUIProperties.tags.extend([Tags.IMAGE, Tags.TEXT, Tags.IMAGE_TO_TEXT])
+class ImageToTextInferenceKwargs(BaseInferenceKwargs):
+    """Specific keyword arguments for the image-to-text pipeline.
+    Attributes:
+        max_new_tokens (int | None): The maximum number of tokens to generate in the description.
+        timeout (float | None): The maximum time in seconds to wait for fetching images from the web.
+    """
+    max_new_tokens: int | None = None
+    timeout: float | None = None
+class ImageToTextTransformersAttributes(TransformersBaseAttributes):
+    """Defines the complete set of attributes for the ImageToTextTransformers template.
+    Inherits general transformer settings from TransformersBaseAttributes.
+    Attributes:
+        inference_kwargs (ImageToTextInferenceKwargs): Task-specific parameters for the image-to-text pipeline,
+            such as `max_new_tokens`.
+    """
+    inference_kwargs: ImageToTextInferenceKwargs = Field(default_factory=ImageToTextInferenceKwargs)
 class ImageToTextTransformers(TransformersBase):
     """ImageToTextTransformers template to generate text from an image.
@@ -37,6 +67,7 @@ class ImageToTextTransformers(TransformersBase):
     """
+    AttributesBaseModel = ImageToTextTransformersAttributes
     GENERATED_TEXT_KEY = "generated_text"
     UIProperties = ImageToTextTransformersUIProperties
@@ -78,7 +109,9 @@ class ImageToTextTransformers(TransformersBase):
         """
         for image_packet in container.images:
             image = self._convert_to_pil(image_packet.content)
-            text_description = self.pipeline(image, **self.attributes.inference_kwargs)[0][self.GENERATED_TEXT_KEY]
-            text_packet = TextPacket(content=text_description)
-            container.texts.append(text_packet)
+            results = self.pipeline(image, **self.attributes.inference_kwargs.model_dump(exclude_none=True))
+            if results:
+                text_description = results[0].get(self.GENERATED_TEXT_KEY)
+                if text_description:
+                    container.texts.append(TextPacket(content=text_description))
         return container

sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/speech_to_text_transformers.py CHANGED Viewed

@@ -1,11 +1,17 @@
 # -*- coding: utf-8 -*-
+from typing import Literal
 import numpy as np
+from pydantic import Field
 from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
 from sinapsis_core.template_base.base_models import OutputTypes
 from sinapsis_huggingface_transformers.helpers.tags import Tags
-from sinapsis_huggingface_transformers.templates.base_transformers import TransformersBase
+from sinapsis_huggingface_transformers.templates.base_transformers import (
+    BaseInferenceKwargs,
+    TransformersBase,
+    TransformersBaseAttributes,
+)
 SpeechToTextTransformersUIProperties = TransformersBase.UIProperties
 SpeechToTextTransformersUIProperties.output_type = OutputTypes.TEXT
@@ -14,6 +20,30 @@ SpeechToTextTransformersUIProperties.tags.extend(
 )
+class SpeechToTextInferenceKwargs(BaseInferenceKwargs):
+    """Specific keyword arguments for the automatic-speech-recognition pipeline.
+    Attributes:
+        return_timestamps (Literal["char", "word"] | bool | None ): If set, controls the granularity of
+            timestamps returned with the transcribed text. Can be "char", "word", or True for segments.
+    """
+    return_timestamps: Literal["char", "word"] | bool | None = None
+class SpeechToTextTransformersAttributes(TransformersBaseAttributes):
+    """Defines the set of attributes for the SpeechToTextTransformers template.
+    Inherits general transformer settings from TransformersBaseAttributes.
+    Attributes:
+        inference_kwargs (SpeechToTextInferenceKwargs): Task-specific parameters for the speech-to-text pipeline,
+            such as `return_timestamps`.
+    """
+    inference_kwargs: SpeechToTextInferenceKwargs = Field(default_factory=SpeechToTextInferenceKwargs)
 class SpeechToTextTransformers(TransformersBase):
     """Template to perform speech-to-text actions
     using the HuggingFace module through the 'transformers' architecture.
@@ -40,6 +70,7 @@ class SpeechToTextTransformers(TransformersBase):
     """
+    AttributesBaseModel = SpeechToTextTransformersAttributes
     TEXT_KEY = "text"
     UIProperties = SpeechToTextTransformersUIProperties
@@ -65,11 +96,15 @@ class SpeechToTextTransformers(TransformersBase):
         for audio_packet in container.audios:
             audio = audio_packet.content
             audio = audio.astype(np.float32)
-            transcribed_text = self.pipeline(audio, **self.attributes.inference_kwargs)[self.TEXT_KEY]
-            transcribed_text_textpacket = TextPacket(
-                content=transcribed_text,
-                source=audio_packet.source,
-            )
-            self.logger.info(f"Speech-to-text transcription: {transcribed_text}")
-            container.texts.append(transcribed_text_textpacket)
+            results = self.pipeline(audio, **self.attributes.inference_kwargs.model_dump(exclude_none=True))
+            if results:
+                transcribed_text = results.get(self.TEXT_KEY)
+                if transcribed_text:
+                    self.logger.info(f"Speech-to-text transcription: {transcribed_text}")
+                    container.texts.append(
+                        TextPacket(
+                            content=transcribed_text,
+                            source=audio_packet.source,
+                        )
+                    )
         return container

sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/summarization_transformers.py CHANGED Viewed

@@ -1,16 +1,48 @@
 # -*- coding: utf-8 -*-
+from pydantic import Field
 from sinapsis_core.data_containers.data_packet import DataContainer
 from sinapsis_core.template_base.base_models import OutputTypes
 from sinapsis_huggingface_transformers.helpers.tags import Tags
-from sinapsis_huggingface_transformers.templates.base_transformers import TransformersBase
+from sinapsis_huggingface_transformers.templates.base_transformers import (
+    BaseInferenceKwargs,
+    TransformersBase,
+    TransformersBaseAttributes,
+)
 SummarizationTransformersUIProperties = TransformersBase.UIProperties
 SummarizationTransformersUIProperties.output_type = OutputTypes.TEXT
 SummarizationTransformersUIProperties.tags.extend([Tags.SUMMARIZATION, Tags.TEXT])
+class SummarizationInferenceKwargs(BaseInferenceKwargs):
+    """Specific keyword arguments for the summarization pipeline.
+    Attributes:
+        return_text (bool | None): Whether or not to include the decoded texts in the outputs.
+        return_tensors (bool | None): Whether or not to include the tensors of predictions.
+        clean_up_tokenization_spaces (bool | None): Whether or not to clean up the potential extra spaces.
+    """
+    return_text: bool | None = True
+    return_tensors: bool | None = False
+    clean_up_tokenization_spaces: bool | None = False
+class SummarizationTransformersAttributes(TransformersBaseAttributes):
+    """Defines the complete set of attributes for the SummarizationTransformers template.
+    Inherits general transformer settings from TransformersBaseAttributes.
+    Attributes:
+        inference_kwargs: Task-specific parameters for the summarization pipeline,
+            such as `clean_up_tokenization_spaces`.
+    """
+    inference_kwargs: SummarizationInferenceKwargs = Field(default_factory=SummarizationInferenceKwargs)
 class SummarizationTransformers(TransformersBase):
     """Template for text summarization using a Hugging Face Transformers pipeline.
@@ -39,6 +71,7 @@ class SummarizationTransformers(TransformersBase):
     """
+    AttributesBaseModel = SummarizationTransformersAttributes
     SUMMARY_TEXT_KEY = "summary_text"
     UIProperties = SummarizationTransformersUIProperties
@@ -63,9 +96,11 @@ class SummarizationTransformers(TransformersBase):
             DataContainer: DataContainer including the summarized text.
         """
         for text_packet in container.texts:
-            summarized_text = self.pipeline(text_packet.content, **self.attributes.inference_kwargs)[0][
-                self.SUMMARY_TEXT_KEY
-            ]
-            text_packet.content = summarized_text
+            results = self.pipeline(
+                text_packet.content, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
+            )
+            if results:
+                summarized_text = results[0].get(self.SUMMARY_TEXT_KEY)
+                if summarized_text:
+                    text_packet.content = summarized_text
         return container

sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/text_to_speech_transformers.py CHANGED Viewed

@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 import torch
 from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer, TextPacket
@@ -65,6 +64,7 @@ class TextToSpeechTransformers(TransformersBase):
     """
     AttributesBaseModel = TextToSpeechAttributes
+    SAMPLE_RATE_KEY = "sampling_rate"
     UIProperties = TextToSpeechTransformersUIProperties
     def initialize(self) -> None:
@@ -94,7 +94,7 @@ class TextToSpeechTransformers(TransformersBase):
             else {}
         )
         output = self.pipeline("Fetching sampling rate.", forward_params=forward_params)
-        sample_rate = output.get("sampling_rate", 16000)
+        sample_rate = output.get(self.SAMPLE_RATE_KEY, 16000)
         return sample_rate
@@ -132,7 +132,9 @@ class TextToSpeechTransformers(TransformersBase):
             else {}
         )
         for chunk in sentences:
-            output = self.pipeline(chunk, forward_params=forward_params, **self.attributes.inference_kwargs)
+            output = self.pipeline(
+                chunk, forward_params=forward_params, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
+            )
             total_audio.append(output["audio"][0] if output["audio"].ndim == 2 else output["audio"])
         if total_audio:
             total_audio = np.concatenate(total_audio)

sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/translation_transformers.py CHANGED Viewed

@@ -1,11 +1,12 @@
 # -*- coding: utf-8 -*-
+from pydantic import Field
 from sinapsis_core.data_containers.data_packet import DataContainer
 from sinapsis_core.template_base.base_models import OutputTypes
 from sinapsis_huggingface_transformers.helpers.tags import Tags
 from sinapsis_huggingface_transformers.templates.base_transformers import (
+    BaseInferenceKwargs,
     TransformersBase,
     TransformersBaseAttributes,
 )
@@ -15,14 +16,31 @@ TranslationTransformersUIProperties.output_type = OutputTypes.TEXT
 TranslationTransformersUIProperties.tags.extend([Tags.LANGUAGE, Tags.TRANSLATION])
+class TranslationInferenceKwargs(BaseInferenceKwargs):
+    """Specific keyword arguments for the translation pipeline.
+    Attributes:
+        return_text (bool | None): Whether or not to include the decoded texts in the outputs.
+        return_tensors (bool | None): Whether or not to include the tensors of predictions.
+        clean_up_tokenization_spaces (bool | None): Whether or not to clean up the potential extra spaces.
+    """
+    return_text: bool | None = True
+    return_tensors: bool | None = False
+    clean_up_tokenization_spaces: bool | None = False
 class TranslationTransformersAttributes(TransformersBaseAttributes):
     """Attributes for the transformers pipeline translation task.
     Attributes:
+        inference_kwargs: Task-specific parameters for the transaltion pipeline,
+            such as `clean_up_tokenization_spaces`.
         source_language (str): The language code of the source language (e.g., "en" for English).
         target_language (str): The language code of the target language (e.g., "fr" for French).
     """
+    inference_kwargs: TranslationInferenceKwargs = Field(default_factory=TranslationInferenceKwargs)
     source_language: str
     target_language: str
@@ -79,8 +97,11 @@ class TranslationTransformers(TransformersBase):
             DataContainer: DataContainer including the translated text.
         """
         for text_packet in container.texts:
-            translated_text = self.pipeline(text_packet.content, **self.attributes.inference_kwargs)[0][
-                self.TRANSLATION_TEXT_KEY
-            ]
-            text_packet.content = translated_text
+            results = self.pipeline(
+                text_packet.content, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
+            )
+            if results:
+                translated_text = results[0].get(self.TRANSLATION_TEXT_KEY)
+                if translated_text:
+                    text_packet.content = translated_text
         return container

{sinapsis_huggingface-0.2.10.dist-info → sinapsis_huggingface-0.2.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{sinapsis_huggingface-0.2.10.dist-info → sinapsis_huggingface-0.2.11.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{sinapsis_huggingface-0.2.10.dist-info → sinapsis_huggingface-0.2.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

sinapsis-huggingface 0.2.10__py3-none-any.whl → 0.2.11__py3-none-any.whl

sinapsis-huggingface 0.2.10py3-none-any.whl → 0.2.11py3-none-any.whl