PyPI - sinapsis-huggingface - Versions diffs - 0.2.10__tar.gz → 0.2.12__tar.gz - Mend

sinapsis-huggingface 0.2.10tar.gz → 0.2.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sinapsis-huggingface
-Version: 0.2.10
+Version: 0.2.12
 Summary: Package for HuggingFace-based templates
 Author-email: SinapsisAI <dev@sinapsis.tech>
 Project-URL: Homepage, https://sinapsis.tech

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sinapsis-huggingface
-Version: 0.2.10
+Version: 0.2.12
 Summary: Package for HuggingFace-based templates
 Author-email: SinapsisAI <dev@sinapsis.tech>
 Project-URL: Homepage, https://sinapsis.tech

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/base_diffusers.py RENAMED Viewed

@@ -1,12 +1,13 @@
 # -*- coding: utf-8 -*-
+import gc
 from abc import ABC, abstractmethod
-from typing import Any, Literal
+from typing import Literal
 import numpy as np
 import torch
 from diffusers import DiffusionPipeline
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, Field
 from sinapsis_core.data_containers.data_packet import DataContainer, ImagePacket
 from sinapsis_core.template_base import Template
 from sinapsis_core.template_base.base_models import (
@@ -20,6 +21,37 @@ from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
 from sinapsis_huggingface_diffusers.helpers.tags import Tags
+class BaseGenerationParams(BaseModel):
+    """Defines the core parameters for a diffusers generation pipeline.
+    This model is flexible and allows any other parameters (e.g., `strength`)
+    to be passed, which will be forwarded to the underlying pipeline.
+    Attributes:
+        prompt (str | list[str] | None): The text prompt(s) to guide image generation.
+        height (int | None): The height in pixels of the generated image.
+        width (int | None): The width in pixels of the generated image.
+        negative_prompt (str | list[str] | None): Prompt(s) to guide the model away
+            from generating certain things.
+        num_inference_steps (int | None): The number of denoising steps. More steps
+            typically result in higher quality but are slower. Defaults to 50.
+        guidance_scale (float | None): Controls how much the prompt influences the
+            output. Higher values mean stronger adherence. Defaults to 7.5.
+        num_images_per_prompt (int | None): The number of images to generate per
+            prompt. Defaults to 1.
+    """
+    model_config = ConfigDict(extra="allow")
+    prompt: str | list[str] | None = None
+    height: int | None = None
+    width: int | None = None
+    negative_prompt: str | list[str] | None = None
+    num_inference_steps: int | None = 50
+    guidance_scale: float | None = 7.5
+    num_images_per_prompt: int | None = 1
 class BaseDiffusersAttributes(TemplateAttributes):
     """Configuration attributes for setting up a diffusion pipeline and generating images.
@@ -33,7 +65,7 @@ class BaseDiffusersAttributes(TemplateAttributes):
         device (Literal["cuda", "cpu"]): Device for computations, either "cpu" or "cuda".
         torch_dtype (Literal["float16", "float32"]): Data type for PyTorch tensors.
         enable_model_cpu_offload (bool): If True, enables CPU offloading to reduce GPU memory usage.
-        generation_params (dict): Parameters for image generation (e.g., prompt, guidance_scale).
+        generation_params (BaseGenerationParams): Parameters for image generation (e.g., prompt, guidance_scale).
         seed (int | list[int] | None): Random seed(s) for reproducibility.
         overwrite_images (bool): Whether to overwrite the existing images in the container.
             Defaults to False.
@@ -44,7 +76,7 @@ class BaseDiffusersAttributes(TemplateAttributes):
     device: Literal["cuda", "cpu"]
     torch_dtype: Literal["float16", "float32"] = "float16"
     enable_model_cpu_offload: bool = False
-    generation_params: dict[str, Any]
+    generation_params: BaseGenerationParams = Field(default_factory=BaseGenerationParams)
     seed: int | list[int] | None = None
     overwrite_images: bool = False
@@ -76,10 +108,17 @@ class BaseDiffusers(Template, ABC):
     def __init__(self, attributes: TemplateAttributeType) -> None:
         super().__init__(attributes)
+        self.initialize()
+    def initialize(self) -> None:
+        """Initializes the template's common state for creation or reset.
+        This method is called by both `__init__` and `reset_state` to ensure
+        a consistent state. Can be overriden by subclasses for specific behaviour.
+        """
         self.pipeline = self._make_pipeline()
         self.pipeline.set_progress_bar_config(disable=True)
-        self.num_images_per_prompt = self.attributes.generation_params.get("num_images_per_prompt", 1)
+        self.num_images_per_prompt = self.attributes.generation_params.num_images_per_prompt
         self.generator = self._make_generator()
         if self.attributes.enable_model_cpu_offload:
@@ -124,7 +163,7 @@ class BaseDiffusers(Template, ABC):
         pipeline_class = self._pipeline_class()
         return pipeline_class.from_pretrained(
             self.attributes.model_path,
-            torch_dtype=self.TORCH_DTYPE.get(self.attributes.torch_dtype),
+            dtype=self.TORCH_DTYPE.get(self.attributes.torch_dtype),
             cache_dir=self.attributes.model_cache_dir,
         ).to(self.attributes.device)
@@ -163,13 +202,15 @@ class BaseDiffusers(Template, ABC):
             inputs = {}
         output = self.pipeline(
             **inputs,
-            **self.attributes.generation_params,
+            **self.attributes.generation_params.model_dump(exclude_none=True),
             generator=self.generator,
             output_type="np",
         )
         generated_images = output.images if output_attribute == "images" else output.frames[0]
+        images_as_uint8 = [(image * 255).clip(0, 255).astype(np.uint8) for image in generated_images]
+        del output
-        return [(image * 255).clip(0, 255).astype(np.uint8) for image in generated_images]
+        return images_as_uint8
     def _update_images_in_container(self, container: DataContainer, new_packets: list[ImagePacket]) -> None:
         """Updates the container with new image packets based on the `overwrite_images` attribute.
@@ -187,17 +228,39 @@ class BaseDiffusers(Template, ABC):
         else:
             container.images.extend(new_packets)
-    def _clear_memory(self) -> None:
+    @staticmethod
+    def clear_memory() -> None:
         """Clears memory to free up resources.
         This method performs garbage collection and clears GPU memory (if applicable) to prevent memory leaks
         and ensure efficient resource usage.
         """
-        del self.generator
-        if self.attributes.device == "cuda":
+        gc.collect()
+        if torch.cuda.is_available():
             torch.cuda.empty_cache()
-            torch.cuda.ipc_collect()
     def reset_state(self, template_name: str | None = None) -> None:
-        self._clear_memory()
-        super().reset_state(template_name)
+        """Releases the pipeline and processor from memory and re-instantiates the template.
+        Args:
+            template_name (str | None, optional): The name of the template instance being reset. Defaults to None.
+        """
+        _ = template_name
+        if hasattr(self, "pipeline") and self.pipeline is not None:
+            components = self.pipeline.components
+            for component in components.values():
+                if isinstance(component, torch.nn.Module):
+                    component.to("cpu")
+            del self.pipeline
+        if hasattr(self, "processor"):
+            del self.processor
+        if hasattr(self, "generator"):
+            del self.generator
+        self.clear_memory()
+        self.initialize()
+        self.logger.info(f"Reset template instance `{self.instance_name}`")

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_image_diffusers.py RENAMED Viewed

@@ -8,7 +8,6 @@ import numpy as np
 from diffusers import AutoPipelineForImage2Image
 from PIL import Image
 from sinapsis_core.data_containers.data_packet import DataContainer, ImagePacket
-from sinapsis_core.template_base.base_models import TemplateAttributeType
 from sinapsis_huggingface_diffusers.helpers.tags import Tags
 from sinapsis_huggingface_diffusers.templates.base_diffusers import BaseDiffusers
@@ -52,9 +51,13 @@ class ImageToImageDiffusers(BaseDiffusers):
     UIProperties = ImageToImageDiffusersUIProperties
-    def __init__(self, attributes: TemplateAttributeType) -> None:
-        super().__init__(attributes)
-        self.requires_pil = False
+    def initialize(self) -> None:
+        """Initializes the template's common state for creation or reset.
+        This method is called by both `__init__` and `reset_state` to ensure
+        a consistent state. Can be overriden by subclasses for specific behaviour.
+        """
+        super().initialize()
         self.output_attribute: Literal["images", "frames"] = "images"
         self.num_duplicates = self.num_images_per_prompt
@@ -67,7 +70,8 @@ class ImageToImageDiffusers(BaseDiffusers):
         """
         return AutoPipelineForImage2Image
-    def _convert_image_format(self, image_packet: ImagePacket) -> np.ndarray | Image.Image:
+    @staticmethod
+    def _convert_image_format(image_packet: ImagePacket) -> Image.Image:
         """Converts the input image into the appropriate format for the pipeline.
         The format depends on the `requires_pil` attribute:
@@ -78,12 +82,9 @@ class ImageToImageDiffusers(BaseDiffusers):
             image_packet (ImagePacket): The input image packet.
         Returns:
-            np.ndarray | Image.Image: The converted image, either as a normalized NumPy array or a
-                PIL Image.
+            Image.Image: The converted image as a PIL Image.
         """
-        if self.requires_pil:
-            return Image.fromarray(image_packet.content)
-        return image_packet.content / 255.0
+        return Image.fromarray(image_packet.content)
     def preprocess_inputs(self, image_packet: ImagePacket) -> dict[str, np.ndarray | list[np.ndarray]]:
         """Prepares the input image for the image-to-image pipeline.
@@ -157,6 +158,6 @@ class ImageToImageDiffusers(BaseDiffusers):
         new_packets = [ImagePacket(content=image) for image in all_generated_images]
         processed_packets, _ = self.post_processing_packets(new_packets, old_packets)
         self._update_images_in_container(container, processed_packets)
-        self._clear_memory()
+        self.clear_memory()
         return container

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_video_gen_xl_diffusers.py RENAMED Viewed

@@ -1,9 +1,11 @@
 # -*- coding: utf-8 -*-
+from typing import Literal
 from diffusers import I2VGenXLPipeline
-from sinapsis_core.template_base.base_models import TemplateAttributeType
+from pydantic import Field
 from sinapsis_huggingface_diffusers.helpers.tags import Tags
+from sinapsis_huggingface_diffusers.templates.base_diffusers import BaseDiffusersAttributes, BaseGenerationParams
 from sinapsis_huggingface_diffusers.templates.image_to_image_diffusers import (
     ImageToImageDiffusers,
 )
@@ -12,6 +14,32 @@ ImageToVideoGenXLDiffusersUIProperties = ImageToImageDiffusers.UIProperties
 ImageToVideoGenXLDiffusersUIProperties.tags.extend([Tags.VIDEO, Tags.IMAGE_TO_VIDEO])
+class ImageToVideoGenerationParams(BaseGenerationParams):
+    """Defines the specific parameters for image-to-video generation pipelines.
+    Attributes:
+        target_fps (int | None): The target frames per second for the generated video.
+        num_frames (int | None): The total number of frames to generate in the video. Defaults to 16.
+        num_videos_per_prompt (int | None): The number of different videos to generate
+            from the same input image and prompt.
+    """
+    target_fps: int | None = None
+    num_frames: int | None = 16
+    num_videos_per_prompt: int | None = None
+class ImageToVideoGenXLDiffusersAttributes(BaseDiffusersAttributes):
+    """Defines the complete set of attributes for the ImageToVideoGenXLDiffusers template.
+    Attributes:
+        generation_params (ImageToVideoGenerationParams): Task-specific parameters for
+            video generation, such as `num_frames` and `target_fps`.
+    """
+    generation_params: ImageToVideoGenerationParams = Field(default_factory=ImageToVideoGenerationParams)
 class ImageToVideoGenXLDiffusers(ImageToImageDiffusers):
     """This class implements a specific template for image-to-video generation using Hugging Face's
     diffusers. The `ImageToVideoGenXLDiffusers` class inherits from the `ImageToImageDiffusers` template
@@ -42,14 +70,18 @@ class ImageToVideoGenXLDiffusers(ImageToImageDiffusers):
     """
+    AttributesBaseModel = ImageToVideoGenXLDiffusersAttributes
     UIProperties = ImageToVideoGenXLDiffusersUIProperties
-    DEFAULT_NUM_FRAMES = 16
-    def __init__(self, attributes: TemplateAttributeType) -> None:
-        super().__init__(attributes)
-        self.num_duplicates = self.attributes.generation_params.get("num_frames", self.DEFAULT_NUM_FRAMES)
-        self.requires_pil = True
-        self.output_attribute = "frames"
+    def initialize(self) -> None:
+        """Initializes the template's common state for creation or reset.
+        This method is called by both `__init__` and `reset_state` to ensure
+        a consistent state. Can be overriden by subclasses for specific behaviour.
+        """
+        super().initialize()
+        self.output_attribute: Literal["images", "frames"] = "frames"
+        self.num_duplicates = self.attributes.generation_params.num_frames
     @staticmethod
     def _pipeline_class() -> I2VGenXLPipeline:

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/inpainting_diffusers.py RENAMED Viewed

@@ -7,7 +7,6 @@ import cv2
 import numpy as np
 from diffusers import AutoPipelineForInpainting
 from sinapsis_core.data_containers.data_packet import ImageAnnotations, ImagePacket
-from sinapsis_core.template_base.base_models import TemplateAttributeType
 from sinapsis_huggingface_diffusers.helpers.tags import Tags
 from sinapsis_huggingface_diffusers.templates.base_diffusers import (
@@ -75,8 +74,13 @@ class InpaintingDiffusers(ImageToImageDiffusers):
     UIProperties = InpaintingDiffusersUIProperties
     AttributesBaseModel = InpaintingDiffusersAttributes
-    def __init__(self, attributes: TemplateAttributeType) -> None:
-        super().__init__(attributes)
+    def initialize(self) -> None:
+        """Initializes the template's common state for creation or reset.
+        This method is called by both `__init__` and `reset_state` to ensure
+        a consistent state. Can be overriden by subclasses for specific behaviour.
+        """
+        super().initialize()
         if self.attributes.preserve_outside_content and self.attributes.dilation_radius is None:
             raise ValueError("Need to specify a dilation_radius if preserve_outside_content=True")
@@ -327,6 +331,6 @@ class InpaintingDiffusers(ImageToImageDiffusers):
             if old_packet.annotations:
                 new_packet.annotations = old_packet.annotations
                 for ann in new_packet.annotations:
-                    ann.label_str = str(self.attributes.generation_params.get("prompt"))
+                    ann.label_str = str(self.attributes.generation_params.prompt)
         return new_packets, old_packets

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/text_to_image_diffusers.py RENAMED Viewed

@@ -80,6 +80,6 @@ class TextToImageDiffusers(BaseDiffusers):
         image_packets = [ImagePacket(content=image) for image in generated_images]
         self._set_packet_sources(image_packets)
         self._update_images_in_container(container, image_packets)
-        self._clear_memory()
+        self.clear_memory()
         return container

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/templates/grounding_dino.py RENAMED Viewed

@@ -111,11 +111,18 @@ class GroundingDINO(Template):
             attributes (dict[str, Any]): Dictionary containing configuration parameters.
         """
         super().__init__(attributes)
-        self.device = self.attributes.device
+        self.initialize()
+    def initialize(self) -> None:
+        """Initializes the template's common state for creation or reset.
+        This method is called by both `__init__` and `reset_state` to ensure
+        a consistent state. Can be overriden by subclasses for specific behaviour.
+        """
         self.processor = AutoProcessor.from_pretrained(
             self.attributes.model_path, cache_dir=self.attributes.model_cache_dir
         )
-        self.model = self._set_model().to(self.device)
+        self.model = self._set_model().to(self.attributes.device)
         self.max_tokens = self.processor.tokenizer.model_max_length
         self.text_input = self.validate_and_format_text_input(self.attributes.text_input)
@@ -171,7 +178,7 @@ class GroundingDINO(Template):
             images=image_packet.content,
             text=self.text_input,
             return_tensors="pt",
-        ).to(self.device)
+        ).to(self.attributes.device)
         with torch.no_grad():
             outputs = self.model(**inputs)
@@ -350,19 +357,32 @@ class GroundingDINO(Template):
         return container
-    def _clear_memory(self) -> None:
+    @staticmethod
+    def clear_memory() -> None:
         """Clears memory to free up resources.
         This method performs garbage collection and clears GPU memory (if applicable) to prevent memory leaks
         and ensure efficient resource usage.
         """
-        for child in self.model.children():
-            child.cpu()
-            gc.collect()
-        if self.attributes.device == "cuda":
+        gc.collect()
+        if torch.cuda.is_available():
             torch.cuda.empty_cache()
-            torch.cuda.ipc_collect()
     def reset_state(self, template_name: str | None = None) -> None:
-        self._clear_memory()
-        super().reset_state(template_name)
+        """Releases the pipeline and processor from memory and re-instantiates the template.
+        Args:
+            template_name (str | None, optional): The name of the template instance being reset. Defaults to None.
+        """
+        _ = template_name
+        if hasattr(self, "model") and self.model is not None:
+            self.model.to("cpu")
+            del self.model
+        if hasattr(self, "processor"):
+            del self.processor
+        self.clear_memory()
+        self.initialize()
+        self.logger.info(f"Reset template instance `{self.instance_name}`")

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/base_transformers.py RENAMED Viewed

@@ -6,7 +6,7 @@ from abc import abstractmethod
 from typing import Any, Literal
 import torch
-from pydantic import Field
+from pydantic import BaseModel, ConfigDict, Field
 from sinapsis_core.data_containers.data_packet import DataContainer
 from sinapsis_core.template_base import Template
 from sinapsis_core.template_base.base_models import TemplateAttributes, TemplateAttributeType, UIPropertiesMetadata
@@ -17,6 +17,29 @@ from transformers.pipelines import Pipeline
 from sinapsis_huggingface_transformers.helpers.tags import Tags
+class BaseInferenceKwargs(BaseModel):
+    """A flexible container for keyword arguments passed during inference.
+    Attributes:
+        generate_kwargs (dict[str, Any] | None): A dictionary of advanced parameters passed directly to the
+            model's `generate` method for fine-tuning the pipeline generation.
+    """
+    generate_kwargs: dict[str, Any] | None = None
+    model_config = ConfigDict(extra="allow")
+class PipelineKwargs(BaseModel):
+    """A flexible container for keyword arguments used to create the pipeline.
+    This model allows any extra parameters to be passed during pipeline instantiation.
+    """
+    device: Literal["cuda", "cpu"]
+    torch_dtype: Literal["float16", "float32", "auto"] = "float16"
+    model_config = ConfigDict(extra="allow")
 class TransformersBaseAttributes(TemplateAttributes):
     """Attributes for configuring the TransformersPipelineTemplate.
@@ -31,20 +54,18 @@ class TransformersBaseAttributes(TemplateAttributes):
         seed (int | None): Random seed for reproducibility. If provided, this seed will ensure
             consistent results for pipelines that involve randomness. If not provided, a random seed
             will be generated internally.
-        pipeline_kwargs (dict[str, Any]): Keyword arguments passed during the instantiation of the
+        pipeline_kwargs (PipelineKwargs): Keyword arguments passed during the instantiation of the
             Hugging Face pipeline.
-        inference_kwargs (dict[str, Any]): Keyword arguments passed during the task execution or
+        inference_kwargs (BaseInferenceKwargs): Keyword arguments passed during the task execution or
             inference phase. These allow dynamic customization of the task, such as `max_length`
             and `min_length` for summarization, or `max_new_tokens` for image-to-text.
     """
     model_path: str
     model_cache_dir: str = str(SINAPSIS_CACHE_DIR)
-    device: Literal["cuda", "cpu"]
-    torch_dtype: Literal["float16", "float32"] = "float16"
     seed: int | None = None
-    pipeline_kwargs: dict[str, Any] = Field(default_factory=dict)
-    inference_kwargs: dict[str, Any] = Field(default_factory=dict)
+    pipeline_kwargs: PipelineKwargs = Field(default_factory=PipelineKwargs)
+    inference_kwargs: BaseInferenceKwargs = Field(default_factory=BaseInferenceKwargs)
 class TransformersBase(Template):
@@ -123,9 +144,7 @@ class TransformersBase(Template):
         return pipeline(
             task=self.task,
             model=self.attributes.model_path,
-            device=self.attributes.device,
-            torch_dtype=self._TORCH_DTYPE.get(self.attributes.torch_dtype),
-            **self.attributes.pipeline_kwargs,
+            **self.attributes.pipeline_kwargs.model_dump(),
             **kwargs,
         )

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/image_to_text_transformers.py RENAMED Viewed

@@ -2,17 +2,47 @@
 import numpy as np
 from PIL import Image
+from pydantic import Field
 from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
 from sinapsis_core.template_base.base_models import OutputTypes
 from sinapsis_huggingface_transformers.helpers.tags import Tags
-from sinapsis_huggingface_transformers.templates.base_transformers import TransformersBase
+from sinapsis_huggingface_transformers.templates.base_transformers import (
+    BaseInferenceKwargs,
+    TransformersBase,
+    TransformersBaseAttributes,
+)
 ImageToTextTransformersUIProperties = TransformersBase.UIProperties
 ImageToTextTransformersUIProperties.output_type = OutputTypes.TEXT
 ImageToTextTransformersUIProperties.tags.extend([Tags.IMAGE, Tags.TEXT, Tags.IMAGE_TO_TEXT])
+class ImageToTextInferenceKwargs(BaseInferenceKwargs):
+    """Specific keyword arguments for the image-to-text pipeline.
+    Attributes:
+        max_new_tokens (int | None): The maximum number of tokens to generate in the description.
+        timeout (float | None): The maximum time in seconds to wait for fetching images from the web.
+    """
+    max_new_tokens: int | None = None
+    timeout: float | None = None
+class ImageToTextTransformersAttributes(TransformersBaseAttributes):
+    """Defines the complete set of attributes for the ImageToTextTransformers template.
+    Inherits general transformer settings from TransformersBaseAttributes.
+    Attributes:
+        inference_kwargs (ImageToTextInferenceKwargs): Task-specific parameters for the image-to-text pipeline,
+            such as `max_new_tokens`.
+    """
+    inference_kwargs: ImageToTextInferenceKwargs = Field(default_factory=ImageToTextInferenceKwargs)
 class ImageToTextTransformers(TransformersBase):
     """ImageToTextTransformers template to generate text from an image.
@@ -37,6 +67,7 @@ class ImageToTextTransformers(TransformersBase):
     """
+    AttributesBaseModel = ImageToTextTransformersAttributes
     GENERATED_TEXT_KEY = "generated_text"
     UIProperties = ImageToTextTransformersUIProperties
@@ -78,7 +109,9 @@ class ImageToTextTransformers(TransformersBase):
         """
         for image_packet in container.images:
             image = self._convert_to_pil(image_packet.content)
-            text_description = self.pipeline(image, **self.attributes.inference_kwargs)[0][self.GENERATED_TEXT_KEY]
-            text_packet = TextPacket(content=text_description)
-            container.texts.append(text_packet)
+            results = self.pipeline(image, **self.attributes.inference_kwargs.model_dump(exclude_none=True))
+            if results:
+                text_description = results[0].get(self.GENERATED_TEXT_KEY)
+                if text_description:
+                    container.texts.append(TextPacket(content=text_description))
         return container

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/speech_to_text_transformers.py RENAMED Viewed

@@ -1,11 +1,17 @@
 # -*- coding: utf-8 -*-
+from typing import Literal
 import numpy as np
+from pydantic import Field
 from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
 from sinapsis_core.template_base.base_models import OutputTypes
 from sinapsis_huggingface_transformers.helpers.tags import Tags
-from sinapsis_huggingface_transformers.templates.base_transformers import TransformersBase
+from sinapsis_huggingface_transformers.templates.base_transformers import (
+    BaseInferenceKwargs,
+    TransformersBase,
+    TransformersBaseAttributes,
+)
 SpeechToTextTransformersUIProperties = TransformersBase.UIProperties
 SpeechToTextTransformersUIProperties.output_type = OutputTypes.TEXT
@@ -14,6 +20,30 @@ SpeechToTextTransformersUIProperties.tags.extend(
 )
+class SpeechToTextInferenceKwargs(BaseInferenceKwargs):
+    """Specific keyword arguments for the automatic-speech-recognition pipeline.
+    Attributes:
+        return_timestamps (Literal["char", "word"] | bool | None ): If set, controls the granularity of
+            timestamps returned with the transcribed text. Can be "char", "word", or True for segments.
+    """
+    return_timestamps: Literal["char", "word"] | bool | None = None
+class SpeechToTextTransformersAttributes(TransformersBaseAttributes):
+    """Defines the set of attributes for the SpeechToTextTransformers template.
+    Inherits general transformer settings from TransformersBaseAttributes.
+    Attributes:
+        inference_kwargs (SpeechToTextInferenceKwargs): Task-specific parameters for the speech-to-text pipeline,
+            such as `return_timestamps`.
+    """
+    inference_kwargs: SpeechToTextInferenceKwargs = Field(default_factory=SpeechToTextInferenceKwargs)
 class SpeechToTextTransformers(TransformersBase):
     """Template to perform speech-to-text actions
     using the HuggingFace module through the 'transformers' architecture.
@@ -40,6 +70,7 @@ class SpeechToTextTransformers(TransformersBase):
     """
+    AttributesBaseModel = SpeechToTextTransformersAttributes
     TEXT_KEY = "text"
     UIProperties = SpeechToTextTransformersUIProperties
@@ -65,11 +96,15 @@ class SpeechToTextTransformers(TransformersBase):
         for audio_packet in container.audios:
             audio = audio_packet.content
             audio = audio.astype(np.float32)
-            transcribed_text = self.pipeline(audio, **self.attributes.inference_kwargs)[self.TEXT_KEY]
-            transcribed_text_textpacket = TextPacket(
-                content=transcribed_text,
-                source=audio_packet.source,
-            )
-            self.logger.info(f"Speech-to-text transcription: {transcribed_text}")
-            container.texts.append(transcribed_text_textpacket)
+            results = self.pipeline(audio, **self.attributes.inference_kwargs.model_dump(exclude_none=True))
+            if results:
+                transcribed_text = results.get(self.TEXT_KEY)
+                if transcribed_text:
+                    self.logger.info(f"Speech-to-text transcription: {transcribed_text}")
+                    container.texts.append(
+                        TextPacket(
+                            content=transcribed_text,
+                            source=audio_packet.source,
+                        )
+                    )
         return container

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/summarization_transformers.py RENAMED Viewed

@@ -1,16 +1,48 @@
 # -*- coding: utf-8 -*-
+from pydantic import Field
 from sinapsis_core.data_containers.data_packet import DataContainer
 from sinapsis_core.template_base.base_models import OutputTypes
 from sinapsis_huggingface_transformers.helpers.tags import Tags
-from sinapsis_huggingface_transformers.templates.base_transformers import TransformersBase
+from sinapsis_huggingface_transformers.templates.base_transformers import (
+    BaseInferenceKwargs,
+    TransformersBase,
+    TransformersBaseAttributes,
+)
 SummarizationTransformersUIProperties = TransformersBase.UIProperties
 SummarizationTransformersUIProperties.output_type = OutputTypes.TEXT
 SummarizationTransformersUIProperties.tags.extend([Tags.SUMMARIZATION, Tags.TEXT])
+class SummarizationInferenceKwargs(BaseInferenceKwargs):
+    """Specific keyword arguments for the summarization pipeline.
+    Attributes:
+        return_text (bool | None): Whether or not to include the decoded texts in the outputs.
+        return_tensors (bool | None): Whether or not to include the tensors of predictions.
+        clean_up_tokenization_spaces (bool | None): Whether or not to clean up the potential extra spaces.
+    """
+    return_text: bool | None = True
+    return_tensors: bool | None = False
+    clean_up_tokenization_spaces: bool | None = False
+class SummarizationTransformersAttributes(TransformersBaseAttributes):
+    """Defines the complete set of attributes for the SummarizationTransformers template.
+    Inherits general transformer settings from TransformersBaseAttributes.
+    Attributes:
+        inference_kwargs: Task-specific parameters for the summarization pipeline,
+            such as `clean_up_tokenization_spaces`.
+    """
+    inference_kwargs: SummarizationInferenceKwargs = Field(default_factory=SummarizationInferenceKwargs)
 class SummarizationTransformers(TransformersBase):
     """Template for text summarization using a Hugging Face Transformers pipeline.
@@ -39,6 +71,7 @@ class SummarizationTransformers(TransformersBase):
     """
+    AttributesBaseModel = SummarizationTransformersAttributes
     SUMMARY_TEXT_KEY = "summary_text"
     UIProperties = SummarizationTransformersUIProperties
@@ -63,9 +96,11 @@ class SummarizationTransformers(TransformersBase):
             DataContainer: DataContainer including the summarized text.
         """
         for text_packet in container.texts:
-            summarized_text = self.pipeline(text_packet.content, **self.attributes.inference_kwargs)[0][
-                self.SUMMARY_TEXT_KEY
-            ]
-            text_packet.content = summarized_text
+            results = self.pipeline(
+                text_packet.content, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
+            )
+            if results:
+                summarized_text = results[0].get(self.SUMMARY_TEXT_KEY)
+                if summarized_text:
+                    text_packet.content = summarized_text
         return container

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/text_to_speech_transformers.py RENAMED Viewed

@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 import torch
 from sinapsis_core.data_containers.data_packet import AudioPacket, DataContainer, TextPacket
@@ -65,6 +64,7 @@ class TextToSpeechTransformers(TransformersBase):
     """
     AttributesBaseModel = TextToSpeechAttributes
+    SAMPLE_RATE_KEY = "sampling_rate"
     UIProperties = TextToSpeechTransformersUIProperties
     def initialize(self) -> None:
@@ -94,7 +94,7 @@ class TextToSpeechTransformers(TransformersBase):
             else {}
         )
         output = self.pipeline("Fetching sampling rate.", forward_params=forward_params)
-        sample_rate = output.get("sampling_rate", 16000)
+        sample_rate = output.get(self.SAMPLE_RATE_KEY, 16000)
         return sample_rate
@@ -132,7 +132,9 @@ class TextToSpeechTransformers(TransformersBase):
             else {}
         )
         for chunk in sentences:
-            output = self.pipeline(chunk, forward_params=forward_params, **self.attributes.inference_kwargs)
+            output = self.pipeline(
+                chunk, forward_params=forward_params, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
+            )
             total_audio.append(output["audio"][0] if output["audio"].ndim == 2 else output["audio"])
         if total_audio:
             total_audio = np.concatenate(total_audio)

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/translation_transformers.py RENAMED Viewed

@@ -1,11 +1,12 @@
 # -*- coding: utf-8 -*-
+from pydantic import Field
 from sinapsis_core.data_containers.data_packet import DataContainer
 from sinapsis_core.template_base.base_models import OutputTypes
 from sinapsis_huggingface_transformers.helpers.tags import Tags
 from sinapsis_huggingface_transformers.templates.base_transformers import (
+    BaseInferenceKwargs,
     TransformersBase,
     TransformersBaseAttributes,
 )
@@ -15,14 +16,31 @@ TranslationTransformersUIProperties.output_type = OutputTypes.TEXT
 TranslationTransformersUIProperties.tags.extend([Tags.LANGUAGE, Tags.TRANSLATION])
+class TranslationInferenceKwargs(BaseInferenceKwargs):
+    """Specific keyword arguments for the translation pipeline.
+    Attributes:
+        return_text (bool | None): Whether or not to include the decoded texts in the outputs.
+        return_tensors (bool | None): Whether or not to include the tensors of predictions.
+        clean_up_tokenization_spaces (bool | None): Whether or not to clean up the potential extra spaces.
+    """
+    return_text: bool | None = True
+    return_tensors: bool | None = False
+    clean_up_tokenization_spaces: bool | None = False
 class TranslationTransformersAttributes(TransformersBaseAttributes):
     """Attributes for the transformers pipeline translation task.
     Attributes:
+        inference_kwargs: Task-specific parameters for the transaltion pipeline,
+            such as `clean_up_tokenization_spaces`.
         source_language (str): The language code of the source language (e.g., "en" for English).
         target_language (str): The language code of the target language (e.g., "fr" for French).
     """
+    inference_kwargs: TranslationInferenceKwargs = Field(default_factory=TranslationInferenceKwargs)
     source_language: str
     target_language: str
@@ -79,8 +97,11 @@ class TranslationTransformers(TransformersBase):
             DataContainer: DataContainer including the translated text.
         """
         for text_packet in container.texts:
-            translated_text = self.pipeline(text_packet.content, **self.attributes.inference_kwargs)[0][
-                self.TRANSLATION_TEXT_KEY
-            ]
-            text_packet.content = translated_text
+            results = self.pipeline(
+                text_packet.content, **self.attributes.inference_kwargs.model_dump(exclude_none=True)
+            )
+            if results:
+                translated_text = results[0].get(self.TRANSLATION_TEXT_KEY)
+                if translated_text:
+                    text_packet.content = translated_text
         return container

{sinapsis_huggingface-0.2.10 → sinapsis_huggingface-0.2.12}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "sinapsis-huggingface"
-version = "0.2.10"
+version = "0.2.12"
 description = "Package for HuggingFace-based templates"
 authors = [
     {name = "SinapsisAI", email = "dev@sinapsis.tech"},